From aa384702dbd881e0d9eac28246454492b42d3586 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Fri, 13 Dec 2024 10:43:27 -0500 Subject: [PATCH 01/45] committing progress --- experimental/learn_descriptors/BUILD | 28 +- .../structure_from_motion.cc | 250 ++++++++++++++++++ .../structure_from_motion.hh | 124 +++++++++ ..._test.cc => structure_from_motion_test.cc} | 43 ++- .../learn_descriptors/symphony_lake_parser.cc | 3 +- .../learn_descriptors/symphony_lake_parser.hh | 75 +++++- .../symphony_lake_parser_test.cc | 45 ++-- .../learn_descriptors/visual_odometry.cc | 118 --------- .../learn_descriptors/visual_odometry.hh | 58 ---- 9 files changed, 530 insertions(+), 214 deletions(-) create mode 100644 experimental/learn_descriptors/structure_from_motion.cc create mode 100644 experimental/learn_descriptors/structure_from_motion.hh rename experimental/learn_descriptors/{visual_odometry_test.cc => structure_from_motion_test.cc} (77%) delete mode 100644 experimental/learn_descriptors/visual_odometry.cc delete mode 100644 experimental/learn_descriptors/visual_odometry.hh diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index e0f417bd7..91088abdf 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -17,19 +17,25 @@ cc_test( ) cc_library( - name = "visual_odometry", - hdrs = ["visual_odometry.hh"], + name = "structure_from_motion", + hdrs = ["structure_from_motion.hh"], visibility = ["//visibility:public"], - srcs = ["visual_odometry.cc"], + srcs = ["structure_from_motion.cc"], deps = [ - "@opencv//:opencv" + "@opencv//:opencv", + "@gtsam//:gtsam", + "@eigen" ] ) cc_library( name = "symphony_lake_parser", hdrs = ["symphony_lake_parser.hh"], - copts = ["-Wno-unused-parameter"], + copts = [ + "-Wno-unused-parameter", + "-std=c++20", + ], + data = ["@symphony_lake_snippet"], visibility = ["//visibility:public"], srcs = ["symphony_lake_parser.cc"], deps = [ @@ -39,11 +45,15 @@ cc_library( ) cc_test( - name = "visual_odometry_test", - srcs = ["visual_odometry_test.cc"], + name = "structure_from_motion_test", + srcs = ["structure_from_motion_test.cc"], + copts = [ + "-Wno-unused-parameter", + ], deps = [ "@com_google_googletest//:gtest_main", - ":visual_odometry" + ":symphony_lake_parser", + ":structure_from_motion" ] ) @@ -51,7 +61,7 @@ cc_test( name = "symphony_lake_parser_test", srcs = ["symphony_lake_parser_test.cc"], copts = ["-Wno-unused-parameter"], - data = ["@symphony_lake_snippet"], + # data = ["@symphony_lake_snippet"], deps = [ "@com_google_googletest//:gtest_main", ":symphony_lake_parser", diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc new file mode 100644 index 000000000..544b0728e --- /dev/null +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -0,0 +1,250 @@ +#include "experimental/learn_descriptors/structure_from_motion.hh" + +namespace robot::experimental::learn_descriptors { +StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose, + Frontend::MatcherType frontend_matcher) { + frontend_ = Frontend(frontend_extractor, frontend_matcher); + set_initial_pose(initial_pose); + backend_ = Backend(K); +} + +void StructureFromMotion::set_initial_pose(gtsam::Pose3 initial_pose) { + backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, 0), initial_pose); +} + +void StructureFromMotion::add_image(const cv::Mat &img) { + std::pair, cv::Mat> keypoints_and_descriptors = frontend_.get_keypoints_and_descriptors(img); + landmarks_.push_back(std::vector()); + if (get_num_images_added() > 0) { + std::vector matches = frontend_.get_matches(img_keypoints_and_descriptors_.back().second, keypoints_and_descriptors.second); + Frontend::enforce_bijective_matches(matches); + + matches_.push_back(matches); + backend_.add_between_factor( + gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()-1), + gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), + gtsam::Pose3::Identity()); + for (const cv::DMatch match : matches) { + Backend::Landmark landmark_cam_1( + gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), + gtsam::Symbol(Backend::camera_symbol_char, get_num_images_added()-1), + gtsam::Point2( + static_cast(img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x), + static_cast(img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y) + ) + ); + Backend::Landmark landmark_cam_2( + gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), + gtsam::Symbol(Backend::camera_symbol_char, get_num_images_added()), + gtsam::Point2( + static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.x), + static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.y) + ) + ); + landmark_count_++; + landmarks_[get_num_images_added()-1].push_back(landmark_cam_1); + landmarks_[get_num_images_added()].push_back(landmark_cam_2); + backend_.add_landmark(landmark_cam_1); + backend_.add_landmark(landmark_cam_2); + } + } + img_keypoints_and_descriptors_.push_back(keypoints_and_descriptors); +} + +Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matcher) { + extractor_type_ = frontend_algorithm; + matcher_type_ = frontend_matcher; + + switch (extractor_type_) { + case ExtractorType::SIFT: + feature_extractor_ = cv::SIFT::create(); + break; + case ExtractorType::ORB: + feature_extractor_ = cv::ORB::create(); + break; + default: + // Error handling needed? + break; + } + switch (matcher_type_) { + case MatcherType::BRUTE_FORCE: + descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2); + break; + case MatcherType::KNN: + descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2); + break; + case MatcherType::FLANN: + if (frontend_algorithm == ExtractorType::ORB) { + throw std::invalid_argument("FLANN can not be used with ORB."); + } + descriptor_matcher_ = cv::FlannBasedMatcher::create(); + break; + default: + // Error handling needed? + break; + } +} + +std::pair, cv::Mat> Frontend::get_keypoints_and_descriptors( + const cv::Mat &img) const { + std::vector keypoints; + cv::Mat descriptors; + switch (extractor_type_) { + default: // the opencv extractors have the same function signature + feature_extractor_->detectAndCompute(img, cv::noArray(), keypoints, descriptors); + break; + } + return std::pair, cv::Mat>(keypoints, descriptors); +} + +std::vector Frontend::get_matches(const cv::Mat &descriptors1, + const cv::Mat &descriptors2) const { + std::vector matches; + switch (matcher_type_) { + case MatcherType::BRUTE_FORCE: + get_brute_matches(descriptors1, descriptors2, matches); + break; + case MatcherType::KNN: + get_KNN_matches(descriptors1, descriptors2, matches); + break; + case MatcherType::FLANN: + get_FLANN_matches(descriptors1, descriptors2, matches); + break; + default: + break; + } + std::sort(matches.begin(), matches.end()); + return matches; +} + +void Frontend::threshold_matches(std::vector &matches, float dist_threshhold) { + matches.erase( + std::remove_if(matches.begin(), matches.end(), [dist_threshhold](const cv::DMatch& match) { + return match.distance > dist_threshhold; + }), + matches.end()); +} + +void Frontend::enforce_bijective_matches(std::vector& matches) { + std::unordered_map bestQueryMatch; + std::unordered_map bestTrainMatch; + + for (const auto& match : matches) { + int queryIdx = match.queryIdx; + int trainIdx = match.trainIdx; + + if (bestQueryMatch.find(queryIdx) == bestQueryMatch.end() || + match.distance < bestQueryMatch[queryIdx].distance) { + bestQueryMatch[queryIdx] = match; + } + + if (bestTrainMatch.find(trainIdx) == bestTrainMatch.end() || + match.distance < bestTrainMatch[trainIdx].distance) { + bestTrainMatch[trainIdx] = match; + } + } + + matches.erase( + std::remove_if( + matches.begin(), matches.end(), + [&bestQueryMatch, &bestTrainMatch](const cv::DMatch& match) { + int queryIdx = match.queryIdx; + int trainIdx = match.trainIdx; + + return bestQueryMatch[queryIdx].trainIdx != trainIdx || + bestTrainMatch[trainIdx].queryIdx != queryIdx; + }), + matches.end()); +} + + +bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const { + if (matcher_type_ != MatcherType::BRUTE_FORCE) { + return false; + } + matches_out.clear(); + descriptor_matcher_->match(descriptors1, descriptors2, matches_out); + return true; +} + +bool Frontend::get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const { + if (matcher_type_ != MatcherType::KNN) { + return false; + } + std::vector> knn_matches; + descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2); + const float ratio_thresh = 0.7f; + matches_out.clear(); + // Lowe's Ratio Test + for (size_t i = 0; i < knn_matches.size(); i++) { + if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) { + matches_out.push_back(knn_matches[i][0]); + } + } + return true; +} + +bool Frontend::get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const { + if (matcher_type_ != MatcherType::FLANN) { + return false; + } + std::vector> knn_matches; + descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2); + const float ratio_thresh = 0.7f; + matches_out.clear(); + for (size_t i = 0; i < knn_matches.size(); i++) { + if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) { + matches_out.push_back(knn_matches[i][0]); + } + } + return true; +} + +Backend::Backend() { + const size_t img_width = 640; + const size_t img_height = 480; + const double fx = 500.0; + const double fy = fx; + const double cx = img_width / 2.0; + const double cy = img_height / 2.0; + + gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); + + initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K); +} + +Backend::Backend(gtsam::Cal3_S2 K) { + initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K); +} + +void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value) { + graph_.emplace_shared>(symbol, value, pose_noise_); + initial_estimate_.insert(symbol, value); +} + +void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, const gtsam::Pose3 &value) { + graph_.emplace_shared>(symbol_1, symbol_2, value, pose_noise_); + initial_estimate_.insert(symbol_2, initial_estimate_.at(symbol_2).compose(value)); +} + +void Backend::add_landmarks(const std::vector &landmarks) { + for (const Landmark &landmark : landmarks) { + add_landmark(landmark); + } +} + +void Backend::add_landmark(const Landmark &landmark) { + graph_.emplace_shared>( + landmark.projection, measurement_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol, gtsam::Symbol(camera_symbol_char, 0) + ); + initial_estimate_.insert(landmark.lmk_factor_symbol, landmark.initial_guess); +} + +void Backend::solve_graph() { + gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_); + result_ = optimizer.optimize(); +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh new file mode 100644 index 000000000..a2185a909 --- /dev/null +++ b/experimental/learn_descriptors/structure_from_motion.hh @@ -0,0 +1,124 @@ +#pragma once + +#include + +#include "Eigen/Core" +#include "gtsam/geometry/Point3.h" +#include "gtsam/geometry/Pose3.h" +#include "gtsam/inference/Symbol.h" +#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" +#include "gtsam/nonlinear/Values.h" +#include "gtsam/slam/GeneralSFMFactor.h" +#include "gtsam/slam/PriorFactor.h" +#include "gtsam/slam/BetweenFactor.h" +#include "gtsam/slam/ProjectionFactor.h" + +#include +#include +#include +#include + +namespace robot::experimental::learn_descriptors { +class Frontend { + public: + enum class ExtractorType { SIFT, ORB }; + enum class MatcherType { BRUTE_FORCE, KNN, FLANN }; + + Frontend(){}; + Frontend(ExtractorType frontend_extractor, MatcherType frontend_matcher); + ~Frontend(){}; + + const ExtractorType& get_extractor_type() const { return extractor_type_; }; + const MatcherType& get_matcher_type() const { return matcher_type_; }; + + std::pair, cv::Mat> get_keypoints_and_descriptors( + const cv::Mat &img) const; + std::vector get_matches(const cv::Mat &descriptors1, + const cv::Mat &descriptors2) const; + static void threshold_matches(std::vector &matches, float dist_threshhold); + static void enforce_bijective_matches(std::vector &matches); + static void draw_keypoints(const cv::Mat &img, std::vector keypoints, + cv::Mat img_keypoints_out) { + cv::drawKeypoints(img, keypoints, img_keypoints_out, cv::Scalar::all(-1), + cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS); + } + static void draw_matches(const cv::Mat &img1, std::vector keypoints1, + const cv::Mat &img2, std::vector keypoints2, + std::vector matches, cv::Mat img_matches_out) { + cv::drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches_out); + } + + private: + bool get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const; + bool get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const; + bool get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const; + ExtractorType extractor_type_; + MatcherType matcher_type_; + + cv::Ptr feature_extractor_; + cv::Ptr descriptor_matcher_; +}; +class Backend { + public: + static const char pose_symbol_char = 'x'; + static const char landmark_symbol_char = 'l'; + static const char camera_symbol_char = 'k'; + + struct Landmark { + Landmark(const gtsam::Symbol &lmk_factor_symbol, const gtsam::Symbol &cam_pose_symbol, const gtsam::Point2 &projection, const gtsam::Point3 &initial_guess=gtsam::Point3::Identity()) + : lmk_factor_symbol(lmk_factor_symbol), cam_pose_symbol(cam_pose_symbol), projection(projection), initial_guess(initial_guess) {}; + const gtsam::Symbol lmk_factor_symbol; + const gtsam::Symbol cam_pose_symbol; + const gtsam::Point2 projection; + const gtsam::Point3 initial_guess; + }; + + Backend(); + Backend(gtsam::Cal3_S2 K); + ~Backend(){}; + + void add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value); + void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, const gtsam::Pose3 &value); + + void add_landmarks(const std::vector &landmarks); + void add_landmark(const Landmark &landmark); + + void solve_graph(); + const gtsam::Values& get_result() const { + return result_; + }; + private: + gtsam::Values initial_estimate_; + gtsam::Values result_; + gtsam::NonlinearFactorGraph graph_; + + gtsam::noiseModel::Isotropic::shared_ptr measurement_noise_ = gtsam::noiseModel::Isotropic::Sigma(2, 1.0); + gtsam::noiseModel::Diagonal::shared_ptr pose_noise_ = gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6::Constant(0.1)); +}; +class StructureFromMotion { + public: + StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose= gtsam::Pose3::Identity(), + Frontend::MatcherType frontend_matcher = Frontend::MatcherType::KNN); + ~StructureFromMotion(){}; + + void set_initial_pose(gtsam::Pose3 initial_pose); + void add_image(const cv::Mat &img); + void solve_structure() { backend_.get_result(); }; + const gtsam::Values& get_structure_result() { return backend_.get_result(); }; + + Frontend get_frontend() { return frontend_; }; + Backend get_backend() { return backend_; } + size_t get_num_images_added() { return img_keypoints_and_descriptors_.size(); }; + private: + std::vector, cv::Mat>> img_keypoints_and_descriptors_; + std::vector> matches_; + std::vector> landmarks_; + size_t landmark_count_; + + Frontend frontend_; + Backend backend_; +}; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/visual_odometry_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc similarity index 77% rename from experimental/learn_descriptors/visual_odometry_test.cc rename to experimental/learn_descriptors/structure_from_motion_test.cc index b1803317e..e7b4488fc 100644 --- a/experimental/learn_descriptors/visual_odometry_test.cc +++ b/experimental/learn_descriptors/structure_from_motion_test.cc @@ -1,4 +1,5 @@ -#include "experimental/learn_descriptors/visual_odometry.hh" +#include "experimental/learn_descriptors/structure_from_motion.hh" +#include "experimental/learn_descriptors/symphony_lake_parser.hh" #include #include @@ -6,7 +7,7 @@ #include "gtest/gtest.h" namespace robot::experimental::learn_descriptors { -TEST(VIO_TEST, frontend_pipeline_sweep) { +TEST(SFM_TEST, frontend_pipeline_sweep) { const size_t width = 640; const size_t height = 480; @@ -109,4 +110,42 @@ TEST(VIO_TEST, frontend_pipeline_sweep) { } } } +TEST(SFM_TEST, structure_from_motion) { + const size_t img_width = 640; + const size_t img_height = 480; + const double fx = 500.0; + const double fy = fx; + const double cx = img_width / 2.0; + const double cy = img_height / 2.0; + + gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); + + StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K); + DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); + DataParser::Generator generator = data_parser.create_img_generator(); + const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); + + cv::Mat image; + for (int i = 0; i < static_cast(survey_vector.getNumSurveys()); i++) { + const symphony_lake_dataset::Survey &survey = survey_vector.get(i); + for (int j = 0; j < static_cast(survey.getNumImages()); j++) { + image = survey.loadImageByImageIndex(j); + sfm.add_image(image); + } + } + // DataParser::Generator::iterator img_itr = generator.begin(); + // while (img_itr != generator.end()) { + // if ((*img_itr).empty()) { + // continue; + // } + // std::cout << "ooga booga" << std::endl; + // std::cout << "image is empty: " << (*img_itr).empty() << std::endl; + // cv::imshow("ooga", *img_itr); + // cv::waitKey(1000); + // sfm.add_image(*img_itr); + // ++img_itr; + // } + sfm.solve_structure(); + gtsam::Values output = sfm.get_structure_result(); +} } // namespace robot::experimental::learn_descriptors diff --git a/experimental/learn_descriptors/symphony_lake_parser.cc b/experimental/learn_descriptors/symphony_lake_parser.cc index 5f93ab174..790932fab 100644 --- a/experimental/learn_descriptors/symphony_lake_parser.cc +++ b/experimental/learn_descriptors/symphony_lake_parser.cc @@ -6,7 +6,8 @@ namespace robot::experimental::learn_descriptors { DataParser::DataParser(const std::filesystem::path &image_root_dir, - const std::vector &survey_list) { + const std::vector &survey_list) : + image_root_dir_(image_root_dir) , survey_list_(survey_list) { ROBOT_CHECK(std::filesystem::exists(image_root_dir), "Image root dir does not exist!", image_root_dir); surveys_.load(image_root_dir.string(), survey_list); diff --git a/experimental/learn_descriptors/symphony_lake_parser.hh b/experimental/learn_descriptors/symphony_lake_parser.hh index 021ebd3f2..a2aa67add 100644 --- a/experimental/learn_descriptors/symphony_lake_parser.hh +++ b/experimental/learn_descriptors/symphony_lake_parser.hh @@ -1,21 +1,88 @@ #pragma once -#pragma once -#include - #include +#include +#include +#include +#include #include "symphony_lake_dataset/SurveyVector.h" namespace robot::experimental::learn_descriptors { class DataParser { public: + template + struct Generator { + struct promise_type { + T value; + std::suspend_always yield_value(T v) { + value = v; + return {}; + } + std::suspend_always initial_suspend() { return {}; } + std::suspend_always final_suspend() noexcept { return {}; } + Generator get_return_object() { return Generator{handle_type::from_promise(*this)}; } + void return_void() {} + void unhandled_exception() { std::terminate(); } + }; + + using handle_type = std::coroutine_handle; + + Generator(handle_type h) : handle(h) {} + ~Generator() { if (handle) handle.destroy(); } + + struct iterator { + handle_type handle; + bool operator!=(std::default_sentinel_t) const { return !handle.done(); } + iterator& operator++() { handle.resume(); return *this; } + T operator*() const { return handle.promise().value; } + }; + + iterator begin() { return iterator{handle}; } + std::default_sentinel_t end() { return {}; } + + private: + handle_type handle; + }; + + Generator image_generator( + const symphony_lake_dataset::SurveyVector& survey_vector) { + + for (int i = 0; i < static_cast(survey_vector.getNumSurveys()); i++) { + const symphony_lake_dataset::Survey &survey = survey_vector.get(i); + for (int j = 0; j < static_cast(survey.getNumImages()); j++) { + co_yield survey.loadImageByImageIndex(j); + } + } + } + DataParser(const std::filesystem::path &image_root_dir, const std::vector &survey_list); ~DataParser(); const symphony_lake_dataset::SurveyVector &get_surveys() const { return surveys_; }; - + Generator create_img_generator() { return image_generator(surveys_); }; private: + std::filesystem::path image_root_dir_; + std::vector survey_list_; symphony_lake_dataset::SurveyVector surveys_; }; +class SymphonyLakeDatasetTestHelper { + public: + static constexpr const char* test_image_root_dir = "external/symphony_lake_snippet/symphony_lake"; + static constexpr std::array test_survey_list = {"140106_snippet"}; + static bool images_equal(cv::Mat img1, cv::Mat img2) { + if (img1.size() != img2.size() || img1.type() != img2.type()) { + return false; + } + cv::Mat diff; + cv::absdiff(img1, img2, diff); + diff = diff.reshape(1); + return cv::countNonZero(diff) == 0; + } + static DataParser get_test_parser() { return DataParser(get_test_iamge_root_dir(), get_test_survey_list()); }; + static std::filesystem::path get_test_iamge_root_dir() { return std::filesystem::path(test_image_root_dir); }; + static std::vector get_test_survey_list() { + return std::vector(test_survey_list.begin(), test_survey_list.end()); + }; +}; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/symphony_lake_parser_test.cc b/experimental/learn_descriptors/symphony_lake_parser_test.cc index e17ff8022..e8169b8da 100644 --- a/experimental/learn_descriptors/symphony_lake_parser_test.cc +++ b/experimental/learn_descriptors/symphony_lake_parser_test.cc @@ -14,24 +14,8 @@ namespace robot::experimental::learn_descriptors { namespace { bool is_test() { return std::getenv("BAZEL_TEST") != nullptr; } } // namespace - -class SymphonyLakeDatasetTestHelper { - public: - static bool images_equal(cv::Mat img1, cv::Mat img2) { - if (img1.size() != img2.size() || img1.type() != img2.type()) { - return false; - } - cv::Mat diff; - cv::absdiff(img1, img2, diff); - diff = diff.reshape(1); - return cv::countNonZero(diff) == 0; - } -}; TEST(SymphonyLakeParserTest, snippet_140106) { - const std::filesystem::path image_root_dir = "external/symphony_lake_snippet/symphony_lake"; - const std::vector survey_list{"140106_snippet"}; - - DataParser data_parser = DataParser(image_root_dir, survey_list); + DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); @@ -53,15 +37,32 @@ TEST(SymphonyLakeParserTest, snippet_140106) { std::string target_img_name_str = target_img_name.str(); target_img_name_str.replace(0, target_img_name_str.size() - target_img_name_length, ""); std::filesystem::path target_img_dir = - image_root_dir / survey_list[i] / "0027" / target_img_name_str; + SymphonyLakeDatasetTestHelper::get_test_iamge_root_dir() / SymphonyLakeDatasetTestHelper::get_test_survey_list()[i] / "0027" / target_img_name_str; target_img = cv::imread(target_img_dir.string()); EXPECT_TRUE(SymphonyLakeDatasetTestHelper::images_equal(image, target_img)); - if (!is_test()) { - cv::imshow("Symphony Dataset Image", image); - cv::waitKey(2); - } + // if (!is_test()) { + // cv::imshow("Symphony Dataset Image", image); + // cv::waitKey(2); + // } + // cv::imshow("Symphony Dataset Image", image); + // cv::waitKey(200); } } } +TEST(SymphonyLakeParserTest, snippet_140106_generator_test) { + DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); + DataParser::Generator generator = (SymphonyLakeDatasetTestHelper::get_test_parser()).create_img_generator(); + DataParser::Generator::iterator img_iter = generator.begin(); + while (img_iter != generator.end()) { + cv::Mat img = *img_iter; + // if (!is_test()) { + // cv::imshow("Symphony Dataset Image", img); + // cv::waitKey(200); + // } + // cv::imshow("Symphony Dataset Image", img); + // cv::waitKey(200); + ++img_iter; + } +} } // namespace robot::experimental::learn_descriptors diff --git a/experimental/learn_descriptors/visual_odometry.cc b/experimental/learn_descriptors/visual_odometry.cc deleted file mode 100644 index 360473208..000000000 --- a/experimental/learn_descriptors/visual_odometry.cc +++ /dev/null @@ -1,118 +0,0 @@ -#include "experimental/learn_descriptors/visual_odometry.hh" - -namespace robot::experimental::learn_descriptors { -VisualOdometry::VisualOdometry(Frontend::ExtractorType frontend_extractor, - Frontend::MatcherType frontend_matcher) { - frontend_ = Frontend(frontend_extractor, frontend_matcher); -} - -Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matcher) { - extractor_type_ = frontend_algorithm; - matcher_type_ = frontend_matcher; - - switch (extractor_type_) { - case ExtractorType::SIFT: - feature_extractor_ = cv::SIFT::create(); - break; - case ExtractorType::ORB: - feature_extractor_ = cv::ORB::create(); - break; - default: - // Error handling needed? - break; - } - switch (matcher_type_) { - case MatcherType::BRUTE_FORCE: - descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2); - break; - case MatcherType::KNN: - descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2); - break; - case MatcherType::FLANN: - if (frontend_algorithm == ExtractorType::ORB) { - throw std::invalid_argument("FLANN can not be used with ORB."); - } - descriptor_matcher_ = cv::FlannBasedMatcher::create(); - break; - default: - // Error handling needed? - break; - } -} - -std::pair, cv::Mat> Frontend::get_keypoints_and_descriptors( - const cv::Mat &img) const { - std::vector keypoints; - cv::Mat descriptors; - switch (extractor_type_) { - default: // the opencv extractors have the same function signature - feature_extractor_->detectAndCompute(img, cv::noArray(), keypoints, descriptors); - break; - } - return std::pair, cv::Mat>(keypoints, descriptors); -} - -std::vector Frontend::get_matches(const cv::Mat &descriptors1, - const cv::Mat &descriptors2) const { - std::vector matches; - switch (matcher_type_) { - case MatcherType::BRUTE_FORCE: - get_brute_matches(descriptors1, descriptors2, matches); - break; - case MatcherType::KNN: - get_KNN_matches(descriptors1, descriptors2, matches); - break; - case MatcherType::FLANN: - get_FLANN_matches(descriptors1, descriptors2, matches); - break; - default: - break; - } - std::sort(matches.begin(), matches.end()); - return matches; -} - -bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const { - if (matcher_type_ != MatcherType::BRUTE_FORCE) { - return false; - } - matches_out.clear(); - descriptor_matcher_->match(descriptors1, descriptors2, matches_out); - return true; -} - -bool Frontend::get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const { - if (matcher_type_ != MatcherType::KNN) { - return false; - } - std::vector> knn_matches; - descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2); - const float ratio_thresh = 0.7f; - matches_out.clear(); - for (size_t i = 0; i < knn_matches.size(); i++) { - if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) { - matches_out.push_back(knn_matches[i][0]); - } - } - return true; -} - -bool Frontend::get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const { - if (matcher_type_ != MatcherType::FLANN) { - return false; - } - std::vector> knn_matches; - descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2); - const float ratio_thresh = 0.7f; - matches_out.clear(); - for (size_t i = 0; i < knn_matches.size(); i++) { - if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) { - matches_out.push_back(knn_matches[i][0]); - } - } - return true; -} -} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/visual_odometry.hh b/experimental/learn_descriptors/visual_odometry.hh deleted file mode 100644 index b64434d2f..000000000 --- a/experimental/learn_descriptors/visual_odometry.hh +++ /dev/null @@ -1,58 +0,0 @@ -#pragma once - -#include - -namespace robot::experimental::learn_descriptors { -class Frontend { - public: - enum class ExtractorType { SIFT, ORB }; - enum class MatcherType { BRUTE_FORCE, KNN, FLANN }; - - Frontend(){}; - Frontend(ExtractorType frontend_extractor, MatcherType frontend_matcher); - ~Frontend(){}; - - ExtractorType get_extractor_type() const { return extractor_type_; }; - MatcherType get_matcher_type() const { return matcher_type_; }; - - std::pair, cv::Mat> get_keypoints_and_descriptors( - const cv::Mat &img) const; - std::vector get_matches(const cv::Mat &descriptors1, - const cv::Mat &descriptors2) const; - - static void draw_keypoints(const cv::Mat &img, std::vector keypoints, - cv::Mat img_keypoints_out) { - cv::drawKeypoints(img, keypoints, img_keypoints_out, cv::Scalar::all(-1), - cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS); - } - static void draw_matches(const cv::Mat &img1, std::vector keypoints1, - const cv::Mat &img2, std::vector keypoints2, - std::vector matches, cv::Mat img_matches_out) { - cv::drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches_out); - } - - private: - bool get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const; - bool get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const; - bool get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const; - ExtractorType extractor_type_; - MatcherType matcher_type_; - - cv::Ptr feature_extractor_; - cv::Ptr descriptor_matcher_; -}; -class VisualOdometry { - public: - VisualOdometry(Frontend::ExtractorType frontend_extractor, - Frontend::MatcherType frontend_matcher = Frontend::MatcherType::KNN); - ~VisualOdometry(){}; - - private: - cv::Mat prev_image_; - - Frontend frontend_; -}; -} // namespace robot::experimental::learn_descriptors \ No newline at end of file From fc37811b061bbee9cd22e51f66babeba54470e02 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Fri, 13 Dec 2024 18:51:21 -0500 Subject: [PATCH 02/45] Uploading progress. Need to approach initial values better to avoid landmark not in camera error --- .../structure_from_motion.cc | 20 ++++++++++++++----- .../structure_from_motion.hh | 3 ++- .../structure_from_motion_test.cc | 8 ++++++-- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index 544b0728e..76142ada2 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -4,8 +4,10 @@ namespace robot::experimental::learn_descriptors { StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose, Frontend::MatcherType frontend_matcher) { frontend_ = Frontend(frontend_extractor, frontend_matcher); - set_initial_pose(initial_pose); backend_ = Backend(K); + + set_initial_pose(initial_pose); + // backend_.get_current_initial_values().print("Ooga: "); } void StructureFromMotion::set_initial_pose(gtsam::Pose3 initial_pose) { @@ -27,7 +29,7 @@ void StructureFromMotion::add_image(const cv::Mat &img) { for (const cv::DMatch match : matches) { Backend::Landmark landmark_cam_1( gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), - gtsam::Symbol(Backend::camera_symbol_char, get_num_images_added()-1), + gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()-1), gtsam::Point2( static_cast(img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x), static_cast(img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y) @@ -35,7 +37,7 @@ void StructureFromMotion::add_image(const cv::Mat &img) { ); Backend::Landmark landmark_cam_2( gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), - gtsam::Symbol(Backend::camera_symbol_char, get_num_images_added()), + gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), gtsam::Point2( static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.x), static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.y) @@ -223,11 +225,15 @@ Backend::Backend(gtsam::Cal3_S2 K) { void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value) { graph_.emplace_shared>(symbol, value, pose_noise_); initial_estimate_.insert(symbol, value); + // std::cout << "adding a prior factor! with symbol: " << symbol << std::endl; + // initial_estimate_.print("values after adding prior: "); } void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, const gtsam::Pose3 &value) { graph_.emplace_shared>(symbol_1, symbol_2, value, pose_noise_); - initial_estimate_.insert(symbol_2, initial_estimate_.at(symbol_2).compose(value)); + // std::cout << "adding between factor. symbol_1: " << symbol_1 << ". symbol_2: " << symbol_2 << std::endl; + // initial_estimate_.print("values when adding between factor: "); + initial_estimate_.insert(symbol_2, initial_estimate_.at(symbol_1).compose(value)); } void Backend::add_landmarks(const std::vector &landmarks) { @@ -240,7 +246,11 @@ void Backend::add_landmark(const Landmark &landmark) { graph_.emplace_shared>( landmark.projection, measurement_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol, gtsam::Symbol(camera_symbol_char, 0) ); - initial_estimate_.insert(landmark.lmk_factor_symbol, landmark.initial_guess); + if (!initial_estimate_.exists(landmark.lmk_factor_symbol)) { + initial_estimate_.insert(landmark.lmk_factor_symbol, landmark.initial_guess); + } else { + initial_estimate_.update(landmark.lmk_factor_symbol, landmark.initial_guess); + } } void Backend::solve_graph() { diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh index a2185a909..7c2c859aa 100644 --- a/experimental/learn_descriptors/structure_from_motion.hh +++ b/experimental/learn_descriptors/structure_from_motion.hh @@ -87,6 +87,7 @@ class Backend { void add_landmark(const Landmark &landmark); void solve_graph(); + const gtsam::Values& get_current_initial_values() const { return initial_estimate_; }; const gtsam::Values& get_result() const { return result_; }; @@ -106,7 +107,7 @@ class StructureFromMotion { void set_initial_pose(gtsam::Pose3 initial_pose); void add_image(const cv::Mat &img); - void solve_structure() { backend_.get_result(); }; + void solve_structure() { backend_.solve_graph(); }; const gtsam::Values& get_structure_result() { return backend_.get_result(); }; Frontend get_frontend() { return frontend_; }; diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc index e7b4488fc..b7d617b59 100644 --- a/experimental/learn_descriptors/structure_from_motion_test.cc +++ b/experimental/learn_descriptors/structure_from_motion_test.cc @@ -122,7 +122,7 @@ TEST(SFM_TEST, structure_from_motion) { StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K); DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); - DataParser::Generator generator = data_parser.create_img_generator(); + // DataParser::Generator generator = data_parser.create_img_generator(); const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); cv::Mat image; @@ -146,6 +146,10 @@ TEST(SFM_TEST, structure_from_motion) { // ++img_itr; // } sfm.solve_structure(); - gtsam::Values output = sfm.get_structure_result(); + // gtsam::Values output = sfm.get_structure_result(); + // output.print("result values: "); + // for (size_t i = 0; i < sfm.get_num_images_added(); i++) { + + // } } } // namespace robot::experimental::learn_descriptors From 166d40d824698b16a5e766f7a931d0c06c2f6edc Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Mon, 23 Dec 2024 14:54:08 -0500 Subject: [PATCH 03/45] uploading progress --- experimental/learn_descriptors/BUILD | 3 ++- .../learn_descriptors/symphony_lake_parser.cc | 19 +++++++++++++++++++ .../learn_descriptors/symphony_lake_parser.hh | 9 +++++++-- .../symphony_lake_parser_test.cc | 5 ++++- 4 files changed, 32 insertions(+), 4 deletions(-) diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index 91088abdf..4c203ade2 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -40,7 +40,8 @@ cc_library( srcs = ["symphony_lake_parser.cc"], deps = [ "@symphony_lake_parser", - "//common:check" + "//common:check", + "@eigen" ] ) diff --git a/experimental/learn_descriptors/symphony_lake_parser.cc b/experimental/learn_descriptors/symphony_lake_parser.cc index 790932fab..c2c134273 100644 --- a/experimental/learn_descriptors/symphony_lake_parser.cc +++ b/experimental/learn_descriptors/symphony_lake_parser.cc @@ -13,4 +13,23 @@ DataParser::DataParser(const std::filesystem::path &image_root_dir, surveys_.load(image_root_dir.string(), survey_list); } DataParser::~DataParser() {} + +Eigen::Affine3d DataParser::get_T_world_camera(size_t survey_idx, size_t img_idx, bool use_gps, bool use_compass) { + const symphony_lake_dataset::ImagePoint img_point = surveys_.get(survey_idx).getImagePoint(img_idx); + Eigen::Vector3d t; + Eigen::Matrix3d R; + Eigen::Affine3d T_world_camera; + if (use_gps) { + t[0] = img_point.x; + t[1] = img_point.y; + } + // coordinate frame is x-axis north, y-axis east, z-axis down (to Earth's core) + double yaw = use_compass ? img_point.theta - img_point.pan : img_point.pan; + Eigen::Matrix3d R_y(Eigen::AngleAxisd(img_point.tilt, Eigen::Vector3d::UnitY())); + Eigen::Matrix3d R_x = Eigen::Matrix3d::Identity(); + Eigen::Matrix3d R_z(Eigen::AngleAxisd(yaw, Eigen::Vector3d::UnitZ())); + R = R_x * R_y * R_z; + T_world_camera = t * R; + return T_world_camera; +} } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/symphony_lake_parser.hh b/experimental/learn_descriptors/symphony_lake_parser.hh index a2aa67add..6dc74c7d3 100644 --- a/experimental/learn_descriptors/symphony_lake_parser.hh +++ b/experimental/learn_descriptors/symphony_lake_parser.hh @@ -5,8 +5,11 @@ #include #include +#include "symphony_lake_dataset/ImagePoint.h" #include "symphony_lake_dataset/SurveyVector.h" +#include "Eigen/Dense" + namespace robot::experimental::learn_descriptors { class DataParser { public: @@ -53,12 +56,14 @@ class DataParser { co_yield survey.loadImageByImageIndex(j); } } - } + } DataParser(const std::filesystem::path &image_root_dir, const std::vector &survey_list); ~DataParser(); + Eigen::Affine3d get_T_world_camera(size_t survey_idx, size_t image_idx, bool use_gps = false, bool use_compass = false); + const symphony_lake_dataset::SurveyVector &get_surveys() const { return surveys_; }; Generator create_img_generator() { return image_generator(surveys_); }; private: @@ -78,7 +83,7 @@ class SymphonyLakeDatasetTestHelper { cv::absdiff(img1, img2, diff); diff = diff.reshape(1); return cv::countNonZero(diff) == 0; - } + }; static DataParser get_test_parser() { return DataParser(get_test_iamge_root_dir(), get_test_survey_list()); }; static std::filesystem::path get_test_iamge_root_dir() { return std::filesystem::path(test_image_root_dir); }; static std::vector get_test_survey_list() { diff --git a/experimental/learn_descriptors/symphony_lake_parser_test.cc b/experimental/learn_descriptors/symphony_lake_parser_test.cc index e8169b8da..5ede85062 100644 --- a/experimental/learn_descriptors/symphony_lake_parser_test.cc +++ b/experimental/learn_descriptors/symphony_lake_parser_test.cc @@ -27,7 +27,10 @@ TEST(SymphonyLakeParserTest, snippet_140106) { printf("Press 'q' in graphic window to quit\n"); for (int i = 0; i < static_cast(survey_vector.getNumSurveys()); i++) { const symphony_lake_dataset::Survey &survey = survey_vector.get(i); - for (int j = 0; j < static_cast(survey.getNumImages()); j++) { + for (int j = 0; j < static_cast(survey.getNumImages()); j++) { + // img_point contains all the csv data entries + const symphony_lake_dataset::ImagePoint img_point = survey.getImagePoint(j); + (void)img_point; // suppress unused variable image = survey.loadImageByImageIndex(j); // get the target image From aa4a77a74e8ee7e83befacc960a861b4ca80af2e Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Tue, 21 Jan 2025 17:38:03 -0800 Subject: [PATCH 04/45] Uploading progress --- experimental/learn_descriptors/BUILD | 3 +- .../structure_from_motion.cc | 120 +++++++++++++++++- .../structure_from_motion.hh | 24 +++- .../structure_from_motion_test.cc | 85 ++++++++++++- .../learn_descriptors/symphony_lake_parser.cc | 5 +- 5 files changed, 223 insertions(+), 14 deletions(-) diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index 4c203ade2..a9d8e2224 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -24,7 +24,8 @@ cc_library( deps = [ "@opencv//:opencv", "@gtsam//:gtsam", - "@eigen" + "@eigen", + "@nlohmann_json//:json", ] ) diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index 76142ada2..87c82864b 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -1,6 +1,54 @@ #include "experimental/learn_descriptors/structure_from_motion.hh" +#include +#include + +namespace fs = std::filesystem; namespace robot::experimental::learn_descriptors { + +const Eigen::Affine3d StructureFromMotion::T_symlake_boat_cam = []() { + Eigen::Affine3d transform = Eigen::Affine3d::Identity(); + transform.translate(Eigen::Vector3d::Zero()); + transform.rotate(Eigen::Matrix3d( + Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(1,0,0)) * + Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0,1,0))) + ); + + // std::stringstream ss; + // ss << gtsam::Pose3(gtsam::Rot3(transform.rotation()), gtsam::Point3(transform.translation())); + // json json_obj; + // json_obj["T_symlake_boat_cam"] = ss.str(); + // fs::path output_dir = "output"; + // fs::path output_file = output_dir / "output_sfm.json"; + // if (!fs::exists(output_dir)) { + // fs::create_directory(output_dir); + // } + // std::ofstream file(output_file); + // if (file.is_open()) { + // file << json_obj.dump(4); + // file.close(); + // } + + return transform; +}(); + +std::string pose_to_string(gtsam::Pose3 pose) { + std::stringstream ss; + ss << pose; + return ss.str(); +} + +const gtsam::Pose3 StructureFromMotion::default_initial_pose = gtsam::Pose3( + gtsam::Rot3(T_symlake_boat_cam.rotation()), + T_symlake_boat_cam.translation()); +// gtsam::Pose3( +// gtsam::Rot3(Eigen::Matrix3d( +// Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix() * +// Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 1, 0)).toRotationMatrix() +// )), +// gtsam::Point3::Identity() +// ); + StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose, Frontend::MatcherType frontend_matcher) { frontend_ = Frontend(frontend_extractor, frontend_matcher); @@ -22,18 +70,46 @@ void StructureFromMotion::add_image(const cv::Mat &img) { Frontend::enforce_bijective_matches(matches); matches_.push_back(matches); + gtsam::Pose3 between_value = gtsam::Pose3::Identity(); backend_.add_between_factor( gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()-1), gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), - gtsam::Pose3::Identity()); + between_value); + gtsam::Pose3 T_cam_landmark(gtsam::Rot3::Identity(), gtsam::Point3(0,0,1)); + gtsam::Pose3 T_world_cam1 = backend_.get_current_initial_values().at(gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()-1)); + gtsam::Pose3 T_world_cam2 = backend_.get_current_initial_values().at(gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added())); + int count = 0; for (const cv::DMatch match : matches) { + if (count < 1) { + std::cout << "hello???" << std::endl; + json json_obj; + json_obj["add_image_1"] = { + {"T_world_cam1", pose_to_string(T_world_cam1)}, + {"T_world_cam2", pose_to_string(T_world_cam1)}, + {"T_world_lmkcam1", pose_to_string(T_world_cam1 * T_cam_landmark)}, + {"T_world_lmkcam2", pose_to_string(T_world_cam2 * T_cam_landmark)} + }; + fs::path output_dir = "output"; + fs::path output_file = output_dir / "output_sfm.json"; + if (!fs::exists(output_dir)) { + fs::create_directory(output_dir); + } + std::ofstream file(output_file); + if (file.is_open()) { + file << json_obj.dump(4); + file.close(); + } + } + count++; + std::cout << "T_world_landmark" << T_world_cam1 * T_cam_landmark << std::endl; Backend::Landmark landmark_cam_1( gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()-1), gtsam::Point2( static_cast(img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x), static_cast(img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y) - ) + ), + (T_world_cam1 * T_cam_landmark).translation() ); Backend::Landmark landmark_cam_2( gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), @@ -41,14 +117,19 @@ void StructureFromMotion::add_image(const cv::Mat &img) { gtsam::Point2( static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.x), static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.y) - ) + ), + (T_world_cam2 * T_cam_landmark).translation() ); - landmark_count_++; landmarks_[get_num_images_added()-1].push_back(landmark_cam_1); landmarks_[get_num_images_added()].push_back(landmark_cam_2); backend_.add_landmark(landmark_cam_1); backend_.add_landmark(landmark_cam_2); - } + landmark_count_++; + } + std::cout << "number of landmarks: " << count << std::endl; + std::cout << "number of landmarks added to graph: " << [this](){int count = 0; for (const auto &landmark : landmarks_){count += landmark.size();} return count; }() << std::endl; + backend_.get_current_initial_values().print("Current initial values: "); + std::cout << "landmark_count_: " << landmark_count_ << std::endl; } img_keypoints_and_descriptors_.push_back(keypoints_and_descriptors); } @@ -257,4 +338,33 @@ void Backend::solve_graph() { gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_); result_ = optimizer.optimize(); } + +// json SFM_Logger::gtsam_pose3_to_json(const gtsam::Pose3 &pose) { +// json json_obj; +// json_obj = { +// {}, + +// }; + +// return json_obj; +// } + +// void SFM_Logger::values_to_json(const gtsam::Values &values, const std::filesystem::path &output_path) { +// json json_obj; +// json_obj["add_image_1"] = { +// {"T_world_cam1", pose_to_string(T_world_cam1)}, +// {"T_world_cam2", pose_to_string(T_world_cam1)}, +// {"T_world_lmkcam1", pose_to_string(T_world_cam1 * T_cam_landmark)}, +// {"T_world_lmkcam2", pose_to_string(T_world_cam2 * T_cam_landmark)} +// }; +// if (!fs::exists(output_path)) { +// fs::create_directory(output_path); +// } +// std::ofstream file(output_file); +// if (file.is_open()) { +// file << json_obj.dump(4); +// file.close(); +// } +// } + } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh index 7c2c859aa..b2bbfdefa 100644 --- a/experimental/learn_descriptors/structure_from_motion.hh +++ b/experimental/learn_descriptors/structure_from_motion.hh @@ -18,6 +18,10 @@ #include #include +#include "nlohmann/json.hpp" + +using json = nlohmann::json; + namespace robot::experimental::learn_descriptors { class Frontend { public: @@ -63,9 +67,9 @@ class Frontend { }; class Backend { public: - static const char pose_symbol_char = 'x'; - static const char landmark_symbol_char = 'l'; - static const char camera_symbol_char = 'k'; + static constexpr char pose_symbol_char = 'x'; + static constexpr char landmark_symbol_char = 'l'; + static constexpr char camera_symbol_char = 'k'; struct Landmark { Landmark(const gtsam::Symbol &lmk_factor_symbol, const gtsam::Symbol &cam_pose_symbol, const gtsam::Point2 &projection, const gtsam::Point3 &initial_guess=gtsam::Point3::Identity()) @@ -101,7 +105,9 @@ class Backend { }; class StructureFromMotion { public: - StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose= gtsam::Pose3::Identity(), + static const Eigen::Affine3d T_symlake_boat_cam; + static const gtsam::Pose3 default_initial_pose; + StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose= default_initial_pose, Frontend::MatcherType frontend_matcher = Frontend::MatcherType::KNN); ~StructureFromMotion(){}; @@ -113,13 +119,21 @@ class StructureFromMotion { Frontend get_frontend() { return frontend_; }; Backend get_backend() { return backend_; } size_t get_num_images_added() { return img_keypoints_and_descriptors_.size(); }; + size_t get_landmark_count() { return landmark_count_; }; private: std::vector, cv::Mat>> img_keypoints_and_descriptors_; std::vector> matches_; std::vector> landmarks_; - size_t landmark_count_; + size_t landmark_count_ = 0; Frontend frontend_; Backend backend_; }; +// class SFM_Logger { +// public: +// // static const json gtsam_pose3_to_json(const gtsam::Pose3 &pose); +// static const json eigen_mat_to_json(const Eigen::Matrix &mat); +// static const json gtsam_pose3_to_json(const gtsam::Pose3 &pose); +// static const void values_to_json(const gtsam::Values &values, const std::filesystem::path &file); +// }; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc index b7d617b59..c1e1266ec 100644 --- a/experimental/learn_descriptors/structure_from_motion_test.cc +++ b/experimental/learn_descriptors/structure_from_motion_test.cc @@ -110,6 +110,85 @@ TEST(SFM_TEST, frontend_pipeline_sweep) { } } } + +TEST(SFM_TEST, frontend_snippet_two) { + DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); + const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); + const symphony_lake_dataset::Survey &survey = survey_vector.get(0); + const symphony_lake_dataset::ImagePoint image_point = survey.getImagePoint(180); + const cv::Mat image_1 = survey.loadImageByImageIndex(180); + const cv::Mat image_2 = survey.loadImageByImageIndex(185); + + const size_t img_width = image_point.width, img_height = image_point.height; + const double fx = image_point.fx, fy = image_point.fy; + const double cx = image_point.cx, cy = image_point.cy; + gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); + + StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K); + Frontend frontend = sfm.get_frontend(); + + cv::Mat img_keypoints_out_1(img_height, img_width, CV_8UC3), + img_keypoints_out_2(img_height, img_width, CV_8UC3), img_matches_out(img_height, 2 * img_width, CV_8UC3); + cv::Mat img_display_test; + + std::pair, cv::Mat>keypts_descriptors_1 = frontend.get_keypoints_and_descriptors(image_1); + std::pair, cv::Mat>keypts_descriptors_2 = frontend.get_keypoints_and_descriptors(image_2); + std::cout << "num of keypoints " << keypts_descriptors_1.first.size() << std::endl; + frontend.draw_keypoints(image_1, keypts_descriptors_1.first, + img_keypoints_out_1); + frontend.draw_keypoints(image_2, keypts_descriptors_2.first, + img_keypoints_out_2); + std::vector matches = frontend.get_matches(keypts_descriptors_1.second, keypts_descriptors_2.second); + frontend.draw_matches(image_1, keypts_descriptors_1.first, image_2, + keypts_descriptors_2.first, matches, img_matches_out); + + cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test); + cv::vconcat(img_display_test, img_matches_out, img_display_test); + cv::Mat og_images; + cv::hconcat(image_1, image_2, og_images); + cv::vconcat(og_images, img_display_test, img_display_test); + std::cout << "og_images.cols: " << og_images.cols << ". img_display_test.cols" << img_display_test.cols << std::endl; + cv::imshow("Keypoints and Matches Output.", img_display_test); + std::cout << "Press spacebar to pause." << std::endl; + while (cv::waitKey(1000) != 32) {} +} + +TEST(SFM_TEST, sfm_snippet_two) { + DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); + const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); + const symphony_lake_dataset::Survey &survey = survey_vector.get(0); + const symphony_lake_dataset::ImagePoint image_point = survey.getImagePoint(180); + const std::vector images = { + survey.loadImageByImageIndex(180), + survey.loadImageByImageIndex(185) + }; + + // const size_t img_width = image_point.width, img_height = image_point.height; + const double fx = image_point.fx, fy = image_point.fy; + const double cx = image_point.cx, cy = image_point.cy; + gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); + + StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K); + + std::cout << "sfm.landmark_count_: " << sfm.get_landmark_count() << std::endl; + + for (const cv::Mat &image : images) { + sfm.add_image(image); + } + + // sfm.solve_structure(); + + // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test); + // cv::vconcat(img_display_test, img_matches_out, img_display_test); + // cv::Mat og_images; + // cv::hconcat(image_1, image_2, og_images); + // cv::vconcat(og_images, img_display_test, img_display_test); + // std::cout << "og_images.cols: " << og_images.cols << ". img_display_test.cols" << img_display_test.cols << std::endl; + // cv::imshow("Keypoints and Matches Output.", img_display_test); + // std::cout << "Press spacebar to pause." << std::endl; + // while (cv::waitKey(1000) != 32) {} +} + TEST(SFM_TEST, structure_from_motion) { const size_t img_width = 640; const size_t img_height = 480; @@ -128,7 +207,11 @@ TEST(SFM_TEST, structure_from_motion) { cv::Mat image; for (int i = 0; i < static_cast(survey_vector.getNumSurveys()); i++) { const symphony_lake_dataset::Survey &survey = survey_vector.get(i); - for (int j = 0; j < static_cast(survey.getNumImages()); j++) { + // for (int j = 0; j < static_cast(survey.getNumImages()); j++) { + // image = survey.loadImageByImageIndex(j); + // sfm.add_image(image); + // } + for (int j = 0; j < 2; j++) { image = survey.loadImageByImageIndex(j); sfm.add_image(image); } diff --git a/experimental/learn_descriptors/symphony_lake_parser.cc b/experimental/learn_descriptors/symphony_lake_parser.cc index c2c134273..5f210578b 100644 --- a/experimental/learn_descriptors/symphony_lake_parser.cc +++ b/experimental/learn_descriptors/symphony_lake_parser.cc @@ -28,8 +28,9 @@ Eigen::Affine3d DataParser::get_T_world_camera(size_t survey_idx, size_t img_idx Eigen::Matrix3d R_y(Eigen::AngleAxisd(img_point.tilt, Eigen::Vector3d::UnitY())); Eigen::Matrix3d R_x = Eigen::Matrix3d::Identity(); Eigen::Matrix3d R_z(Eigen::AngleAxisd(yaw, Eigen::Vector3d::UnitZ())); - R = R_x * R_y * R_z; - T_world_camera = t * R; + R = R_x * R_y * R_z; + T_world_camera.translate(t); + T_world_camera.rotate(R); return T_world_camera; } } // namespace robot::experimental::learn_descriptors \ No newline at end of file From d14b504a4235ee5a938f0e92d9a7d51af5ef9fd0 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Thu, 23 Jan 2025 12:10:31 -0800 Subject: [PATCH 05/45] including matrix_to_proto as a dependency doesn't work for some reason --- experimental/learn_descriptors/BUILD | 11 +- experimental/learn_descriptors/gtsam_test.cc | 17 ++- experimental/learn_descriptors/opencv_viz.cc | 119 +++++++++++++++ .../learn_descriptors/opencv_viz_example.cc | 83 +++++++++++ .../structure_from_motion.cc | 12 +- .../structure_from_motion_test.cc | 139 +++++++++++------- 6 files changed, 322 insertions(+), 59 deletions(-) create mode 100644 experimental/learn_descriptors/opencv_viz.cc create mode 100644 experimental/learn_descriptors/opencv_viz_example.cc diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index a9d8e2224..83a43f8d1 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -55,7 +55,8 @@ cc_test( deps = [ "@com_google_googletest//:gtest_main", ":symphony_lake_parser", - ":structure_from_motion" + ":structure_from_motion", + "//common/math:matrix_to_proto" ] ) @@ -80,3 +81,11 @@ cc_test( ":learn_descriptors" ] ) + +cc_binary( + name = "opencv_viz_example", + srcs = ["opencv_viz_example.cc"], + deps = [ + "@opencv//:opencv" + ] +) diff --git a/experimental/learn_descriptors/gtsam_test.cc b/experimental/learn_descriptors/gtsam_test.cc index 53ee67b49..82ac8bf6a 100644 --- a/experimental/learn_descriptors/gtsam_test.cc +++ b/experimental/learn_descriptors/gtsam_test.cc @@ -87,7 +87,7 @@ TEST(GtsamTesting, gtsam_simple_cube) { gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial); gtsam::Values result = optimizer.optimize(); - // result.print("Optimized Results:\n"); + result.print("Optimized Results:\n"); constexpr double TOL = 1e-3; for (size_t i = 0; i < poses.size(); i++) { @@ -103,5 +103,20 @@ TEST(GtsamTesting, gtsam_simple_cube) { EXPECT_NEAR(cube_W[i][1], result_point[1], TOL); EXPECT_NEAR(cube_W[i][2], result_point[2], TOL); } + + // // Setup + // const Eigen::Matrix3d in{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}}; + + // // Action + // proto::Matrix proto; + // pack_into(in, &proto); + // const Eigen::Matrix3d out = unpack_from(proto); + + // // Verification + // EXPECT_EQ(in.rows(), out.rows()); + // EXPECT_EQ(in.cols(), out.cols()); + // for (int i = 0; i < in.rows(); i++) { + // EXPECT_EQ(in(i), out(i)); + // } } } // namespace robot::experimental::gtsam_testing \ No newline at end of file diff --git a/experimental/learn_descriptors/opencv_viz.cc b/experimental/learn_descriptors/opencv_viz.cc new file mode 100644 index 000000000..f61dd508d --- /dev/null +++ b/experimental/learn_descriptors/opencv_viz.cc @@ -0,0 +1,119 @@ +#include + +#include "eigen3/Eigen/Core" +#include "opencv2/viz.hpp" + +namespace opencv_viz { +template +cv::Mat eigen_to_cv_mat(const Eigen::MatrixBase& eigenMatrix) { + int cvType; + if constexpr (std::is_same_v) { + cvType = CV_64F; // 64-bit floating point + } else if constexpr (std::is_same_v) { + cvType = CV_32F; // 32-bit floating point + } else if constexpr (std::is_same_v) { + cvType = CV_32S; // 32-bit signed integer + } else { + static_assert(!std::is_same_v, "Unsupported data type in Eigen matrix"); + } + return cv::Mat(eigenMatrix.rows(), eigenMatrix.cols(), cvType, const_cast(static_cast(eigenMatrix.derived().data()))).clone(); +} +cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) { + // Ensure R is a valid rotation matrix + CV_Assert(cv::determinant(R) > 0.999 && cv::determinant(R) < 1.001); + + double trace = R(0, 0) + R(1, 1) + R(2, 2); + double theta = std::acos((trace - 1) / 2.0); + + if (std::abs(theta) < 1e-6) { + return cv::Vec3d(0, 0, 0); + } + + double sinTheta = std::sin(theta); + cv::Vec3d axis( + (R(2, 1) - R(1, 2)) / (2.0 * sinTheta), + (R(0, 2) - R(2, 0)) / (2.0 * sinTheta), + (R(1, 0) - R(0, 1)) / (2.0 * sinTheta) + ); + + return axis * theta; +} +void viz_scene(const std::vector &poses, const std::vector &points) { + cv::viz::Viz3d window("Viz Scene"); + + std::vector cv_poses; + constexpr double pose_size = 0.2; + for (unsigned int i = 0; i < poses.size(); i++) { + const Eigen::MatrixXd &pose = poses[i]; + cv::Affine3d cv_pose( + eigen_to_cv_mat(pose.block(0,0,3,3)) , + cv::Affine3d::Vec3{pose(0,3), pose(1,3), pose(2,3)} + ); + window.showWidget( + "pose_" + std::to_string(i), + cv::viz::WCoordinateSystem(pose_size), + cv_pose + ); + } + constexpr double point_radius = 0.2; + constexpr int sphere_res = 10; + const cv::viz::Color point_color = cv::viz::Color::celestial_blue(); + for (unsigned int i = 0; i < points.size(); i++) { + const Eigen::Vector3d &point = points[i]; + window.showWidget( + "translation_" + std::to_string(i), + cv::viz::WSphere( + cv::Point3d(point[0], point[1], point[2]), + point_radius, + sphere_res, + point_color + ) + ); + } + + window.showWidget("world_frame", cv::viz::WCoordinateSystem()); + + constexpr unsigned int num_cells = 6; + constexpr double units = 1.0; + const cv::viz::Color color = cv::viz::Color::white(); + const cv::Vec2i cells = cv::Vec2i::all(num_cells); + const cv::Vec2d cell_sizes = cv::Vec2d::all(units); + window.showWidget("xy_grid", cv::viz::WGrid(cells, cell_sizes, color)); + window.showWidget("yz_grid", cv::viz::WGrid(cv::Point3d(0.,0.,0.), cv::Point3d(1,0,0), cv::Point3d(0,0,1), cells, cell_sizes, color)); + window.showWidget("xz_grid", cv::viz::WGrid(cv::Point3d(0.,0.,0.), cv::Point3d(0,1,0), cv::Point3d(0,0,1), cells, cell_sizes, color)); + window.showWidget("text_overlay", cv::viz::WText("hello world overlay!", cv::Point(20, 50))); + + constexpr bool ALWAYS_FACE_CAMERA = true; + constexpr double TEXT_SCALE = 0.1; + window.showWidget("text_3d", cv::viz::WText3D("hello world 3d!", cv::Point3d(0.1, 0.2, 0.3), + TEXT_SCALE, ALWAYS_FACE_CAMERA)); + + constexpr bool FIXED_TEXT = false; + cv::Affine3d world_from_fixed_text( + cv::Affine3d::Vec3{M_PI_2, 0.0, 0.0}, // rotation + {0.2, 0.4, 0.6} // translation + ); + + window.showWidget( + "text_3d_fixed", + cv::viz::WText3D("hello world fixed!", cv::Point3d(0.0, 0.0, 0.0), TEXT_SCALE, FIXED_TEXT), + world_from_fixed_text + ); + constexpr double COORD_SCALE = 0.2; + window.showWidget("text_3d_fixed_frame", cv::viz::WCoordinateSystem(COORD_SCALE), world_from_fixed_text); + + constexpr double CIRCLE_RADIUS_M = 0.5; + cv::Affine3d world_from_circle( + cv::Affine3d::Vec3{0.0, 0.0, 0.0}, // rotation + {0.0, 0.0, 1.5} // translation + ); + window.showWidget( + "circle", + cv::viz::WCircle(CIRCLE_RADIUS_M), + world_from_circle + ); + + window.spin(); +} +} + diff --git a/experimental/learn_descriptors/opencv_viz_example.cc b/experimental/learn_descriptors/opencv_viz_example.cc new file mode 100644 index 000000000..c37c3ba4f --- /dev/null +++ b/experimental/learn_descriptors/opencv_viz_example.cc @@ -0,0 +1,83 @@ +#include "opencv2/viz.hpp" +#include "opencv_viz.cc" +#include "eigen/Eigen/Geometry" + +void demo() { + cv::viz::Viz3d window("My Window"); + + window.showWidget("world_frame", cv::viz::WCoordinateSystem()); + + constexpr unsigned int num_cells = 6; + constexpr double units = 1.0; + const cv::viz::Color color = cv::viz::Color::white(); + const cv::Vec2i cells = cv::Vec2i::all(num_cells); + const cv::Vec2d cell_sizes = cv::Vec2d::all(units); + window.showWidget("xy_grid", cv::viz::WGrid(cells, cell_sizes, color)); + window.showWidget("yz_grid", cv::viz::WGrid(cv::Point3d(0.,0.,0.), cv::Point3d(1,0,0), cv::Point3d(0,0,1), cells, cell_sizes, color)); + window.showWidget("xz_grid", cv::viz::WGrid(cv::Point3d(0.,0.,0.), cv::Point3d(0,1,0), cv::Point3d(0,0,1), cells, cell_sizes, color)); + window.showWidget("text_overlay", cv::viz::WText("hello world overlay!", cv::Point(20, 50))); + constexpr bool ALWAYS_FACE_CAMERA = true; + constexpr double TEXT_SCALE = 0.1; + window.showWidget("text_3d", cv::viz::WText3D("hello world 3d!", cv::Point3d(0.1, 0.2, 0.3), + TEXT_SCALE, ALWAYS_FACE_CAMERA)); + + constexpr bool FIXED_TEXT = false; + cv::Affine3d world_from_fixed_text( + cv::Affine3d::Vec3{M_PI_2, 0.0, 0.0}, // rotation + {0.2, 0.4, 0.6} // translation + ); + + window.showWidget( + "text_3d_fixed", + cv::viz::WText3D("hello world fixed!", cv::Point3d(0.0, 0.0, 0.0), TEXT_SCALE, FIXED_TEXT), + world_from_fixed_text + ); + constexpr double COORD_SCALE = 0.2; + window.showWidget("text_3d_fixed_frame", cv::viz::WCoordinateSystem(COORD_SCALE), world_from_fixed_text); + + constexpr double CIRCLE_RADIUS_M = 0.5; + cv::Affine3d world_from_circle( + cv::Affine3d::Vec3{0.0, 0.0, 0.0}, // rotation + {0.0, 0.0, 1.5} // translation + ); + window.showWidget( + "circle", + cv::viz::WCircle(CIRCLE_RADIUS_M), + world_from_circle + ); + + window.spin(); +} + +void demo_cube() { + std::vector cube_W; + float cube_size = 1.0f; + cube_W.push_back(Eigen::Vector3d(0, 0, 0)); + cube_W.push_back(Eigen::Vector3d(cube_size, 0, 0)); + cube_W.push_back(Eigen::Vector3d(cube_size, cube_size, 0)); + cube_W.push_back(Eigen::Vector3d(0, cube_size, 0)); + cube_W.push_back(Eigen::Vector3d(0, 0, cube_size)); + cube_W.push_back(Eigen::Vector3d(cube_size, 0, cube_size)); + cube_W.push_back(Eigen::Vector3d(cube_size, cube_size, cube_size)); + cube_W.push_back(Eigen::Vector3d(0, cube_size, cube_size)); + + std::vector poses; + + Eigen::Matrix3d rotation0( + Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + Eigen::AngleAxis(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); + cv::Affine3d pose0(opencv_viz::eigen_to_cv_mat(rotation0), cv::Affine3d::Vec3{4, 0, 0}); + + // Eigen::Matrix3d + // cv::Affine3d pose1( + // opencv_viz::eigen_to_cv_mat(Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * rotation0), + // Eigen::Vector3d(0, 4, 0)); + // gtsam::PinholeCamera camera1(pose1, *K); + // graph.emplace_shared>(gtsam::Symbol('x', 1), pose1, poseNoise); + // poses.push_back(pose1); + // cameras.push_back(camera1); +} + +int main() { + demo(); +} diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index 87c82864b..d19634ef1 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -81,7 +81,7 @@ void StructureFromMotion::add_image(const cv::Mat &img) { int count = 0; for (const cv::DMatch match : matches) { if (count < 1) { - std::cout << "hello???" << std::endl; + // std::cout << "hello???" << std::endl; json json_obj; json_obj["add_image_1"] = { {"T_world_cam1", pose_to_string(T_world_cam1)}, @@ -101,7 +101,7 @@ void StructureFromMotion::add_image(const cv::Mat &img) { } } count++; - std::cout << "T_world_landmark" << T_world_cam1 * T_cam_landmark << std::endl; + // std::cout << "T_world_landmark" << T_world_cam1 * T_cam_landmark << std::endl; Backend::Landmark landmark_cam_1( gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()-1), @@ -126,10 +126,10 @@ void StructureFromMotion::add_image(const cv::Mat &img) { backend_.add_landmark(landmark_cam_2); landmark_count_++; } - std::cout << "number of landmarks: " << count << std::endl; - std::cout << "number of landmarks added to graph: " << [this](){int count = 0; for (const auto &landmark : landmarks_){count += landmark.size();} return count; }() << std::endl; - backend_.get_current_initial_values().print("Current initial values: "); - std::cout << "landmark_count_: " << landmark_count_ << std::endl; + // std::cout << "number of landmarks: " << count << std::endl; + // std::cout << "number of landmarks added to graph: " << [this](){int count = 0; for (const auto &landmark : landmarks_){count += landmark.size();} return count; }() << std::endl; + // backend_.get_current_initial_values().print("Current initial values: "); + // std::cout << "landmark_count_: " << landmark_count_ << std::endl; } img_keypoints_and_descriptors_.push_back(keypoints_and_descriptors); } diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc index c1e1266ec..20a034e0d 100644 --- a/experimental/learn_descriptors/structure_from_motion_test.cc +++ b/experimental/learn_descriptors/structure_from_motion_test.cc @@ -1,5 +1,6 @@ #include "experimental/learn_descriptors/structure_from_motion.hh" #include "experimental/learn_descriptors/symphony_lake_parser.hh" +#include "common/math/matrix_to_proto.hh" #include #include @@ -142,15 +143,15 @@ TEST(SFM_TEST, frontend_snippet_two) { frontend.draw_matches(image_1, keypts_descriptors_1.first, image_2, keypts_descriptors_2.first, matches, img_matches_out); - cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test); - cv::vconcat(img_display_test, img_matches_out, img_display_test); - cv::Mat og_images; - cv::hconcat(image_1, image_2, og_images); - cv::vconcat(og_images, img_display_test, img_display_test); - std::cout << "og_images.cols: " << og_images.cols << ". img_display_test.cols" << img_display_test.cols << std::endl; - cv::imshow("Keypoints and Matches Output.", img_display_test); - std::cout << "Press spacebar to pause." << std::endl; - while (cv::waitKey(1000) != 32) {} + // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test); + // cv::vconcat(img_display_test, img_matches_out, img_display_test); + // cv::Mat og_images; + // cv::hconcat(image_1, image_2, og_images); + // cv::vconcat(og_images, img_display_test, img_display_test); + // std::cout << "og_images.cols: " << og_images.cols << ". img_display_test.cols" << img_display_test.cols << std::endl; + // cv::imshow("Keypoints and Matches Output.", img_display_test); + // std::cout << "Press spacebar to pause." << std::endl; + // while (cv::waitKey(1000) != 32) {} } TEST(SFM_TEST, sfm_snippet_two) { @@ -176,6 +177,42 @@ TEST(SFM_TEST, sfm_snippet_two) { sfm.add_image(image); } + gtsam::Values initial_values = sfm.get_backend().get_current_initial_values(); + std::vector poses; + std::vector protos; + for (size_t i = 0; i < images.size(); i++) { + gtsam::Pose3 pose = initial_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)); + Eigen::MatrixXd eigen_affine(4,4); + gtsam::Matrix3 rot_mat = pose.rotation().matrix(); + gtsam::Point3 translation = pose.translation(); + for (int j = 0; j < 3; j++) { + for (int k = 0; k < 3; k++) { + eigen_affine(j,k) = rot_mat(j,k); + } + eigen_affine(3,j) = translation(j); + } + eigen_affine(3,3) = 1; + poses.push_back(eigen_affine); + protos.emplace_shared(proto::Matrix()); + pack_into(eigen_affine, &protos.back()); + } + std::cout << poses.front() << std::endl; + + + // const Eigen::Vector3d in(1.0, 2.0, 3.0); + // // Action + + // // proto::Matrix proto; + // // pack_into(in, &proto); + // // const Eigen::Vector3d out = unpack_from(proto); + + // // const Eigen::VectorXd in{{1.0, 2.0, 3.0}}; + + // // // Action + // // proto::Matrix proto; + // // pack_into(in, &proto); + // // const Eigen::VectorXd out = unpack_from(proto); + // sfm.solve_structure(); // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test); @@ -189,50 +226,50 @@ TEST(SFM_TEST, sfm_snippet_two) { // while (cv::waitKey(1000) != 32) {} } -TEST(SFM_TEST, structure_from_motion) { - const size_t img_width = 640; - const size_t img_height = 480; - const double fx = 500.0; - const double fy = fx; - const double cx = img_width / 2.0; - const double cy = img_height / 2.0; +// TEST(SFM_TEST, structure_from_motion) { +// const size_t img_width = 640; +// const size_t img_height = 480; +// const double fx = 500.0; +// const double fy = fx; +// const double cx = img_width / 2.0; +// const double cy = img_height / 2.0; - gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); +// gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); - StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K); - DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); - // DataParser::Generator generator = data_parser.create_img_generator(); - const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); +// StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K); +// DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); +// // DataParser::Generator generator = data_parser.create_img_generator(); +// const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); - cv::Mat image; - for (int i = 0; i < static_cast(survey_vector.getNumSurveys()); i++) { - const symphony_lake_dataset::Survey &survey = survey_vector.get(i); - // for (int j = 0; j < static_cast(survey.getNumImages()); j++) { - // image = survey.loadImageByImageIndex(j); - // sfm.add_image(image); - // } - for (int j = 0; j < 2; j++) { - image = survey.loadImageByImageIndex(j); - sfm.add_image(image); - } - } - // DataParser::Generator::iterator img_itr = generator.begin(); - // while (img_itr != generator.end()) { - // if ((*img_itr).empty()) { - // continue; - // } - // std::cout << "ooga booga" << std::endl; - // std::cout << "image is empty: " << (*img_itr).empty() << std::endl; - // cv::imshow("ooga", *img_itr); - // cv::waitKey(1000); - // sfm.add_image(*img_itr); - // ++img_itr; - // } - sfm.solve_structure(); - // gtsam::Values output = sfm.get_structure_result(); - // output.print("result values: "); - // for (size_t i = 0; i < sfm.get_num_images_added(); i++) { +// cv::Mat image; +// for (int i = 0; i < static_cast(survey_vector.getNumSurveys()); i++) { +// const symphony_lake_dataset::Survey &survey = survey_vector.get(i); +// // for (int j = 0; j < static_cast(survey.getNumImages()); j++) { +// // image = survey.loadImageByImageIndex(j); +// // sfm.add_image(image); +// // } +// for (int j = 0; j < 2; j++) { +// image = survey.loadImageByImageIndex(j); +// sfm.add_image(image); +// } +// } +// // DataParser::Generator::iterator img_itr = generator.begin(); +// // while (img_itr != generator.end()) { +// // if ((*img_itr).empty()) { +// // continue; +// // } +// // std::cout << "ooga booga" << std::endl; +// // std::cout << "image is empty: " << (*img_itr).empty() << std::endl; +// // cv::imshow("ooga", *img_itr); +// // cv::waitKey(1000); +// // sfm.add_image(*img_itr); +// // ++img_itr; +// // } +// sfm.solve_structure(); +// // gtsam::Values output = sfm.get_structure_result(); +// // output.print("result values: "); +// // for (size_t i = 0; i < sfm.get_num_images_added(); i++) { - // } -} +// // } +// } } // namespace robot::experimental::learn_descriptors From da30081faaa1cc33193310c4b25b793d0d3f0305 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Wed, 5 Feb 2025 15:03:16 -0500 Subject: [PATCH 06/45] progress --- common/geometry/BUILD | 28 +++ common/geometry/opencv_viz.cc | 79 +++++++ common/geometry/opencv_viz.hh | 11 + common/geometry/opencv_viz_example.cc | 46 ++++ common/geometry/opencv_viz_test.cc | 89 ++++++++ experimental/learn_descriptors/BUILD | 3 +- experimental/learn_descriptors/gtsam_test.cc | 94 +++++++- experimental/learn_descriptors/opencv_viz.cc | 84 +++---- .../learn_descriptors/opencv_viz_example.cc | 45 ++-- .../structure_from_motion.cc | 176 ++++++++------- .../structure_from_motion.hh | 120 +++++----- .../structure_from_motion_test.cc | 210 ++++++++++++------ .../learn_descriptors/symphony_lake_parser.cc | 18 +- .../learn_descriptors/symphony_lake_parser.hh | 45 ++-- .../symphony_lake_parser_test.cc | 13 +- 15 files changed, 742 insertions(+), 319 deletions(-) create mode 100644 common/geometry/opencv_viz.cc create mode 100644 common/geometry/opencv_viz.hh create mode 100644 common/geometry/opencv_viz_example.cc create mode 100644 common/geometry/opencv_viz_test.cc diff --git a/common/geometry/BUILD b/common/geometry/BUILD index 8468ebe13..0a28b1e20 100644 --- a/common/geometry/BUILD +++ b/common/geometry/BUILD @@ -18,3 +18,31 @@ cc_test( "@com_google_googletest//:gtest_main", ] ) + +cc_library( + name = "opencv_viz", + hdrs = ["opencv_viz.hh"], + srcs = ["opencv_viz.cc"], + visibility = ["//visibility:public"], + deps = [ + "@eigen", + "@opencv//:opencv" + ] +) + +cc_test( + name = "opencv_viz_test", + srcs = ["opencv_viz_test.cc"], + deps = [ + ":opencv_viz", + "@com_google_googletest//:gtest_main", + ] +) + +cc_binary( + name = "opencv_viz_example", + srcs = ["opencv_viz_example.cc"], + deps = [ + "@opencv//:opencv" + ] +) diff --git a/common/geometry/opencv_viz.cc b/common/geometry/opencv_viz.cc new file mode 100644 index 000000000..48675e239 --- /dev/null +++ b/common/geometry/opencv_viz.cc @@ -0,0 +1,79 @@ +#include "common/geometry/opencv_viz.hh" + +#include + +namespace robot::geometry::opencv_viz { +template +cv::Mat eigen_to_cv_mat(const Eigen::MatrixBase &eigenMatrix) { + int cvType; + if constexpr (std::is_same_v) { + cvType = CV_64F; + } else if constexpr (std::is_same_v) { + cvType = CV_32F; + } else if constexpr (std::is_same_v) { + cvType = CV_32S; + } else { + static_assert(!std::is_same_v, "Unsupported data type in Eigen matrix"); + } + return cv::Mat(eigenMatrix.rows(), eigenMatrix.cols(), cvType, + const_cast(static_cast(eigenMatrix.derived().data()))) + .clone(); +} +cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) { + // Ensure R is a valid rotation matrix + CV_Assert(cv::determinant(R) > 0.999 && cv::determinant(R) < 1.001); + + double trace = R(0, 0) + R(1, 1) + R(2, 2); + double theta = std::acos((trace - 1) / 2.0); + + if (std::abs(theta) < 1e-6) { + return cv::Vec3d(0, 0, 0); + } + + double sinTheta = std::sin(theta); + cv::Vec3d axis((R(2, 1) - R(1, 2)) / (2.0 * sinTheta), (R(0, 2) - R(2, 0)) / (2.0 * sinTheta), + (R(1, 0) - R(0, 1)) / (2.0 * sinTheta)); + + return axis * theta; +} + +void viz_scene(const std::vector &poses, + const std::vector &points, const bool show_grid) { + cv::viz::Viz3d window("Viz Scene"); + + constexpr double pose_size = .5; + for (unsigned int i = 0; i < poses.size(); i++) { + cv::Affine3d cv_pose(eigen_to_cv_mat(Eigen::Matrix4d(poses[i].matrix().transpose()))); + window.showWidget("rigid_transform_" + std::to_string(i), + cv::viz::WCoordinateSystem(pose_size), cv_pose); + } + constexpr double point_radius = 0.08; + constexpr int sphere_res = 10; + const cv::viz::Color point_color = cv::viz::Color::celestial_blue(); + for (unsigned int i = 0; i < points.size(); i++) { + const Eigen::Vector3d &point = points[i]; + window.showWidget("point_" + std::to_string(i), + cv::viz::WSphere(cv::Point3d(point[0], point[1], point[2]), point_radius, + sphere_res, point_color)); + } + + window.showWidget("world_frame", cv::viz::WCoordinateSystem()); + + if (show_grid) { + constexpr unsigned int num_cells = 6; + constexpr double units = 1.0; + const cv::viz::Color color = cv::viz::Color::gray(); + const cv::Vec2i cells = cv::Vec2i::all(num_cells); + const cv::Vec2d cell_sizes = cv::Vec2d::all(units); + window.showWidget("xy_grid", cv::viz::WGrid(cells, cell_sizes, color)); + window.showWidget("yz_grid", + cv::viz::WGrid(cv::Point3d(0., 0., 0.), cv::Point3d(1, 0, 0), + cv::Point3d(0, 0, 1), cells, cell_sizes, color)); + window.showWidget("xz_grid", + cv::viz::WGrid(cv::Point3d(0., 0., 0.), cv::Point3d(0, 1, 0), + cv::Point3d(0, 0, 1), cells, cell_sizes, color)); + } + + window.spin(); +} +} // namespace robot::geometry::opencv_viz diff --git a/common/geometry/opencv_viz.hh b/common/geometry/opencv_viz.hh new file mode 100644 index 000000000..d5530ecef --- /dev/null +++ b/common/geometry/opencv_viz.hh @@ -0,0 +1,11 @@ +#pragma once +#include + +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "opencv2/viz.hpp" + +namespace robot::geometry::opencv_viz { +void viz_scene(const std::vector &poses, + const std::vector &points, const bool show_grid = true); +} \ No newline at end of file diff --git a/common/geometry/opencv_viz_example.cc b/common/geometry/opencv_viz_example.cc new file mode 100644 index 000000000..6a045b0bc --- /dev/null +++ b/common/geometry/opencv_viz_example.cc @@ -0,0 +1,46 @@ +#include "opencv2/viz.hpp" + +void demo() { + cv::viz::Viz3d window("My Window"); + + window.showWidget("world_frame", cv::viz::WCoordinateSystem()); + + constexpr unsigned int num_cells = 6; + constexpr double units = 1.0; + const cv::viz::Color color = cv::viz::Color::white(); + const cv::Vec2i cells = cv::Vec2i::all(num_cells); + const cv::Vec2d cell_sizes = cv::Vec2d::all(units); + window.showWidget("xy_grid", cv::viz::WGrid(cells, cell_sizes, color)); + window.showWidget("yz_grid", cv::viz::WGrid(cv::Point3d(0., 0., 0.), cv::Point3d(1, 0, 0), + cv::Point3d(0, 0, 1), cells, cell_sizes, color)); + window.showWidget("xz_grid", cv::viz::WGrid(cv::Point3d(0., 0., 0.), cv::Point3d(0, 1, 0), + cv::Point3d(0, 0, 1), cells, cell_sizes, color)); + window.showWidget("text_overlay", cv::viz::WText("hello world overlay!", cv::Point(20, 50))); + constexpr bool ALWAYS_FACE_CAMERA = true; + constexpr double TEXT_SCALE = 0.1; + window.showWidget("text_3d", cv::viz::WText3D("hello world 3d!", cv::Point3d(0.1, 0.2, 0.3), + TEXT_SCALE, ALWAYS_FACE_CAMERA)); + + constexpr bool FIXED_TEXT = false; + cv::Affine3d world_from_fixed_text(cv::Affine3d::Vec3{M_PI_2, 0.0, 0.0}, // rotation + {0.2, 0.4, 0.6} // translation + ); + + window.showWidget( + "text_3d_fixed", + cv::viz::WText3D("hello world fixed!", cv::Point3d(0.0, 0.0, 0.0), TEXT_SCALE, FIXED_TEXT), + world_from_fixed_text); + constexpr double COORD_SCALE = 0.2; + window.showWidget("text_3d_fixed_frame", cv::viz::WCoordinateSystem(COORD_SCALE), + world_from_fixed_text); + + constexpr double CIRCLE_RADIUS_M = 0.5; + cv::Affine3d world_from_circle(cv::Affine3d::Vec3{0.0, 0.0, 0.0}, // rotation + {0.0, 0.0, 1.5} // translation + ); + window.showWidget("circle", cv::viz::WCircle(CIRCLE_RADIUS_M), world_from_circle); + + window.spin(); +} + +int main() { demo(); } diff --git a/common/geometry/opencv_viz_test.cc b/common/geometry/opencv_viz_test.cc new file mode 100644 index 000000000..e5e8265af --- /dev/null +++ b/common/geometry/opencv_viz_test.cc @@ -0,0 +1,89 @@ +#include "common/geometry/opencv_viz.hh" + +#include "gtest/gtest.h" + +namespace robot::geometry::opencv_viz { +namespace { +bool is_test() { + return std::getenv("BAZEL_TEST") != nullptr && + std::getenv("BUILD_WORKSPACE_DIRECTORY") == nullptr; +} +} // namespace + +TEST(OpencvVizTest, demo) { + cv::viz::Viz3d window("My Window"); + + window.showWidget("world_frame", cv::viz::WCoordinateSystem()); + + constexpr unsigned int num_cells = 6; + constexpr double units = 1.0; + const cv::viz::Color color = cv::viz::Color::white(); + const cv::Vec2i cells = cv::Vec2i::all(num_cells); + const cv::Vec2d cell_sizes = cv::Vec2d::all(units); + window.showWidget("xy_grid", cv::viz::WGrid(cells, cell_sizes, color)); + window.showWidget("yz_grid", cv::viz::WGrid(cv::Point3d(0., 0., 0.), cv::Point3d(1, 0, 0), + cv::Point3d(0, 0, 1), cells, cell_sizes, color)); + window.showWidget("xz_grid", cv::viz::WGrid(cv::Point3d(0., 0., 0.), cv::Point3d(0, 1, 0), + cv::Point3d(0, 0, 1), cells, cell_sizes, color)); + window.showWidget("text_overlay", cv::viz::WText("hello world overlay!", cv::Point(20, 50))); + constexpr bool ALWAYS_FACE_CAMERA = true; + constexpr double TEXT_SCALE = 0.1; + window.showWidget("text_3d", cv::viz::WText3D("hello world 3d!", cv::Point3d(0.1, 0.2, 0.3), + TEXT_SCALE, ALWAYS_FACE_CAMERA)); + + constexpr bool FIXED_TEXT = false; + cv::Affine3d world_to_fixed_text(cv::Affine3d::Vec3{M_PI_2, 0.0, 0.0}, // rotation + {0.2, 0.4, 0.6} // translation + ); + + window.showWidget( + "text_3d_fixed", + cv::viz::WText3D("hello world fixed!", cv::Point3d(0.0, 0.0, 0.0), TEXT_SCALE, FIXED_TEXT), + world_to_fixed_text); + constexpr double COORD_SCALE = 0.2; + window.showWidget("text_3d_fixed_frame", cv::viz::WCoordinateSystem(COORD_SCALE), + world_to_fixed_text); + + constexpr double CIRCLE_RADIUS_M = 0.5; + cv::Affine3d world_to_circle(cv::Affine3d::Vec3{0.0, 0.0, 0.0}, // rotation + {0.0, 0.0, 1.5} // translation + ); + window.showWidget("circle", cv::viz::WCircle(CIRCLE_RADIUS_M), world_to_circle); + if (!is_test()) { + window.spin(); + } +} + +TEST(OpencvVizTest, cube_test) { + std::vector cube_W; + float cube_size = 1.0f; + cube_W.push_back(Eigen::Vector3d(0, 0, 0)); + cube_W.push_back(Eigen::Vector3d(cube_size, 0, 0)); + cube_W.push_back(Eigen::Vector3d(cube_size, cube_size, 0)); + cube_W.push_back(Eigen::Vector3d(0, cube_size, 0)); + cube_W.push_back(Eigen::Vector3d(0, 0, cube_size)); + cube_W.push_back(Eigen::Vector3d(cube_size, 0, cube_size)); + cube_W.push_back(Eigen::Vector3d(cube_size, cube_size, cube_size)); + cube_W.push_back(Eigen::Vector3d(0, cube_size, cube_size)); + + std::vector poses; + + Eigen::Matrix3d rotation0( + Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + Eigen::AngleAxis(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); + Eigen::Isometry3d pose0; + pose0.translation() = Eigen::Vector3d(4, 0, 0); + pose0.linear() = rotation0; + poses.push_back(pose0); + + Eigen::Isometry3d pose1; + pose1.linear() = + Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * rotation0; + pose1.translation() = Eigen::Vector3d(0, 4, 0); + poses.push_back(pose1); + + if (!is_test()) { + viz_scene(poses, cube_W); + } +} +} // namespace robot::geometry::opencv_viz \ No newline at end of file diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index 83a43f8d1..da4ec0163 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -56,7 +56,8 @@ cc_test( "@com_google_googletest//:gtest_main", ":symphony_lake_parser", ":structure_from_motion", - "//common/math:matrix_to_proto" + "//common/math:matrix_to_proto", + "//common/geometry:opencv_viz" ] ) diff --git a/experimental/learn_descriptors/gtsam_test.cc b/experimental/learn_descriptors/gtsam_test.cc index 82ac8bf6a..96b643d03 100644 --- a/experimental/learn_descriptors/gtsam_test.cc +++ b/experimental/learn_descriptors/gtsam_test.cc @@ -8,6 +8,7 @@ #include "gtsam/inference/Symbol.h" #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" #include "gtsam/nonlinear/Values.h" +#include "gtsam/slam/BetweenFactor.h" #include "gtsam/slam/GeneralSFMFactor.h" #include "gtsam/slam/PriorFactor.h" #include "gtsam/slam/ProjectionFactor.h" @@ -103,20 +104,89 @@ TEST(GtsamTesting, gtsam_simple_cube) { EXPECT_NEAR(cube_W[i][1], result_point[1], TOL); EXPECT_NEAR(cube_W[i][2], result_point[2], TOL); } +} +TEST(GtsamTesting, gtsam_simple_cube_2) { + std::vector cube_W; + float cube_size = 1.0f; + cube_W.push_back(gtsam::Point3(0, 0, 0)); + cube_W.push_back(gtsam::Point3(cube_size, 0, 0)); + cube_W.push_back(gtsam::Point3(cube_size, cube_size, 0)); + cube_W.push_back(gtsam::Point3(0, cube_size, 0)); + cube_W.push_back(gtsam::Point3(0, 0, cube_size)); + cube_W.push_back(gtsam::Point3(cube_size, 0, cube_size)); + cube_W.push_back(gtsam::Point3(cube_size, cube_size, cube_size)); + cube_W.push_back(gtsam::Point3(0, cube_size, cube_size)); + + const size_t img_width = 640; + const size_t img_height = 480; + const double fx = 500.0; + const double fy = fx; + const double cx = img_width / 2.0; + const double cy = img_height / 2.0; + + gtsam::Values initial; + gtsam::NonlinearFactorGraph graph; + + gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy)); + initial.insert(gtsam::Symbol('K', 0), *K); + + auto poseNoise = gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6::Constant(0.1)); + auto measurementNoise = gtsam::noiseModel::Isotropic::Sigma(2, 1.0); + std::vector poses; + std::vector> cameras; + + Eigen::Matrix3d rotation0( + Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + Eigen::AngleAxis(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); + gtsam::Pose3 pose0(gtsam::Rot3(rotation0), gtsam::Point3(4, 0, 0)); + gtsam::PinholeCamera camera0(pose0, *K); + graph.emplace_shared>(gtsam::Symbol('x', 0), pose0, poseNoise); + poses.push_back(pose0); + cameras.push_back(camera0); + + gtsam::Pose3 pose1( + gtsam::Rot3(Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + rotation0), + gtsam::Point3(0, 4, 0)); + gtsam::PinholeCamera camera1(pose1, *K); + graph.emplace_shared>( + gtsam::Symbol('x', 0), gtsam::Symbol('x', 1), pose0.between(pose1), poseNoise); + poses.push_back(pose1); + cameras.push_back(camera1); - // // Setup - // const Eigen::Matrix3d in{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}}; + for (size_t i = 0; i < poses.size(); i++) { + initial.insert(gtsam::Symbol('x', i), poses[i]); + } + for (size_t i = 0; i < cube_W.size(); i++) { + initial.insert(gtsam::Symbol('L', i), gtsam::Point3(0, 0, 0)); + for (size_t j = 0; j < poses.size(); j++) { + gtsam::Point2 pixel_p_cube_C = cameras[j].project(cube_W[i]); + if (GtsamTestHelper::pixel_in_range(pixel_p_cube_C, img_width, img_height)) { + graph.emplace_shared>( + pixel_p_cube_C, measurementNoise, gtsam::Symbol('x', j), gtsam::Symbol('L', i), + gtsam::Symbol('K', 0)); + } + } + } - // // Action - // proto::Matrix proto; - // pack_into(in, &proto); - // const Eigen::Matrix3d out = unpack_from(proto); + gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial); + gtsam::Values result = optimizer.optimize(); + + result.print("Optimized Results:\n"); - // // Verification - // EXPECT_EQ(in.rows(), out.rows()); - // EXPECT_EQ(in.cols(), out.cols()); - // for (int i = 0; i < in.rows(); i++) { - // EXPECT_EQ(in(i), out(i)); - // } + constexpr double TOL = 1e-3; + for (size_t i = 0; i < poses.size(); i++) { + gtsam::Pose3 result_pose = result.at(gtsam::Symbol('x', i)); + EXPECT_NEAR(poses[i].translation()[0], result_pose.translation()[0], TOL); + EXPECT_NEAR(poses[i].translation()[1], result_pose.translation()[1], TOL); + EXPECT_NEAR(poses[i].translation()[2], result_pose.translation()[2], TOL); + EXPECT_TRUE(poses[i].rotation().matrix().isApprox(result_pose.rotation().matrix(), TOL)); + } + for (size_t i = 0; i < cube_W.size(); i++) { + gtsam::Point3 result_point = result.at(gtsam::Symbol('L', i)); + EXPECT_NEAR(cube_W[i][0], result_point[0], TOL); + EXPECT_NEAR(cube_W[i][1], result_point[1], TOL); + EXPECT_NEAR(cube_W[i][2], result_point[2], TOL); + } } } // namespace robot::experimental::gtsam_testing \ No newline at end of file diff --git a/experimental/learn_descriptors/opencv_viz.cc b/experimental/learn_descriptors/opencv_viz.cc index f61dd508d..f24726e92 100644 --- a/experimental/learn_descriptors/opencv_viz.cc +++ b/experimental/learn_descriptors/opencv_viz.cc @@ -5,23 +5,25 @@ namespace opencv_viz { template -cv::Mat eigen_to_cv_mat(const Eigen::MatrixBase& eigenMatrix) { +cv::Mat eigen_to_cv_mat(const Eigen::MatrixBase &eigenMatrix) { int cvType; if constexpr (std::is_same_v) { - cvType = CV_64F; // 64-bit floating point + cvType = CV_64F; // 64-bit floating point } else if constexpr (std::is_same_v) { - cvType = CV_32F; // 32-bit floating point + cvType = CV_32F; // 32-bit floating point } else if constexpr (std::is_same_v) { - cvType = CV_32S; // 32-bit signed integer + cvType = CV_32S; // 32-bit signed integer } else { static_assert(!std::is_same_v, "Unsupported data type in Eigen matrix"); } - return cv::Mat(eigenMatrix.rows(), eigenMatrix.cols(), cvType, const_cast(static_cast(eigenMatrix.derived().data()))).clone(); + return cv::Mat(eigenMatrix.rows(), eigenMatrix.cols(), cvType, + const_cast(static_cast(eigenMatrix.derived().data()))) + .clone(); } cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) { // Ensure R is a valid rotation matrix CV_Assert(cv::determinant(R) > 0.999 && cv::determinant(R) < 1.001); - + double trace = R(0, 0) + R(1, 1) + R(2, 2); double theta = std::acos((trace - 1) / 2.0); @@ -30,45 +32,32 @@ cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) { } double sinTheta = std::sin(theta); - cv::Vec3d axis( - (R(2, 1) - R(1, 2)) / (2.0 * sinTheta), - (R(0, 2) - R(2, 0)) / (2.0 * sinTheta), - (R(1, 0) - R(0, 1)) / (2.0 * sinTheta) - ); + cv::Vec3d axis((R(2, 1) - R(1, 2)) / (2.0 * sinTheta), (R(0, 2) - R(2, 0)) / (2.0 * sinTheta), + (R(1, 0) - R(0, 1)) / (2.0 * sinTheta)); return axis * theta; } -void viz_scene(const std::vector &poses, const std::vector &points) { +void viz_scene(const std::vector &poses, + const std::vector &points) { cv::viz::Viz3d window("Viz Scene"); std::vector cv_poses; - constexpr double pose_size = 0.2; + constexpr double pose_size = 0.2; for (unsigned int i = 0; i < poses.size(); i++) { const Eigen::MatrixXd &pose = poses[i]; - cv::Affine3d cv_pose( - eigen_to_cv_mat(pose.block(0,0,3,3)) , - cv::Affine3d::Vec3{pose(0,3), pose(1,3), pose(2,3)} - ); - window.showWidget( - "pose_" + std::to_string(i), - cv::viz::WCoordinateSystem(pose_size), - cv_pose - ); + cv::Affine3d cv_pose(eigen_to_cv_mat(pose.block(0, 0, 3, 3)), + cv::Affine3d::Vec3{pose(0, 3), pose(1, 3), pose(2, 3)}); + window.showWidget("pose_" + std::to_string(i), cv::viz::WCoordinateSystem(pose_size), + cv_pose); } constexpr double point_radius = 0.2; constexpr int sphere_res = 10; const cv::viz::Color point_color = cv::viz::Color::celestial_blue(); for (unsigned int i = 0; i < points.size(); i++) { const Eigen::Vector3d &point = points[i]; - window.showWidget( - "translation_" + std::to_string(i), - cv::viz::WSphere( - cv::Point3d(point[0], point[1], point[2]), - point_radius, - sphere_res, - point_color - ) - ); + window.showWidget("translation_" + std::to_string(i), + cv::viz::WSphere(cv::Point3d(point[0], point[1], point[2]), point_radius, + sphere_res, point_color)); } window.showWidget("world_frame", cv::viz::WCoordinateSystem()); @@ -79,41 +68,36 @@ void viz_scene(const std::vector &poses, const std::vector camera1(pose1, *K); - // graph.emplace_shared>(gtsam::Symbol('x', 1), pose1, poseNoise); - // poses.push_back(pose1); - // cameras.push_back(camera1); + // graph.emplace_shared>(gtsam::Symbol('x', 1), pose1, + // poseNoise); poses.push_back(pose1); cameras.push_back(camera1); } -int main() { - demo(); -} +int main() { demo(); } diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index d19634ef1..92e9bc0a7 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -1,4 +1,5 @@ #include "experimental/learn_descriptors/structure_from_motion.hh" + #include #include @@ -9,18 +10,14 @@ namespace robot::experimental::learn_descriptors { const Eigen::Affine3d StructureFromMotion::T_symlake_boat_cam = []() { Eigen::Affine3d transform = Eigen::Affine3d::Identity(); transform.translate(Eigen::Vector3d::Zero()); - transform.rotate(Eigen::Matrix3d( - Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(1,0,0)) * - Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0,1,0))) - ); + transform.rotate(Eigen::Matrix3d(Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(1, 0, 0)) * + Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0, 1, 0)))); // std::stringstream ss; - // ss << gtsam::Pose3(gtsam::Rot3(transform.rotation()), gtsam::Point3(transform.translation())); - // json json_obj; - // json_obj["T_symlake_boat_cam"] = ss.str(); - // fs::path output_dir = "output"; - // fs::path output_file = output_dir / "output_sfm.json"; - // if (!fs::exists(output_dir)) { + // ss << gtsam::Pose3(gtsam::Rot3(transform.rotation()), + // gtsam::Point3(transform.translation())); json json_obj; json_obj["T_symlake_boat_cam"] = + // ss.str(); fs::path output_dir = "output"; fs::path output_file = output_dir / + // "output_sfm.json"; if (!fs::exists(output_dir)) { // fs::create_directory(output_dir); // } // std::ofstream file(output_file); @@ -38,9 +35,8 @@ std::string pose_to_string(gtsam::Pose3 pose) { return ss.str(); } -const gtsam::Pose3 StructureFromMotion::default_initial_pose = gtsam::Pose3( - gtsam::Rot3(T_symlake_boat_cam.rotation()), - T_symlake_boat_cam.translation()); +const gtsam::Pose3 StructureFromMotion::default_initial_pose = + gtsam::Pose3(gtsam::Rot3(T_symlake_boat_cam.rotation()), T_symlake_boat_cam.translation()); // gtsam::Pose3( // gtsam::Rot3(Eigen::Matrix3d( // Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix() * @@ -49,12 +45,13 @@ const gtsam::Pose3 StructureFromMotion::default_initial_pose = gtsam::Pose3( // gtsam::Point3::Identity() // ); -StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose, - Frontend::MatcherType frontend_matcher) { +StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor, + gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose, + Frontend::MatcherType frontend_matcher) { frontend_ = Frontend(frontend_extractor, frontend_matcher); backend_ = Backend(K); - set_initial_pose(initial_pose); + set_initial_pose(initial_pose); // backend_.get_current_initial_values().print("Ooga: "); } @@ -63,21 +60,26 @@ void StructureFromMotion::set_initial_pose(gtsam::Pose3 initial_pose) { } void StructureFromMotion::add_image(const cv::Mat &img) { - std::pair, cv::Mat> keypoints_and_descriptors = frontend_.get_keypoints_and_descriptors(img); + std::pair, cv::Mat> keypoints_and_descriptors = + frontend_.get_keypoints_and_descriptors(img); landmarks_.push_back(std::vector()); if (get_num_images_added() > 0) { - std::vector matches = frontend_.get_matches(img_keypoints_and_descriptors_.back().second, keypoints_and_descriptors.second); + std::vector matches = frontend_.get_matches( + img_keypoints_and_descriptors_.back().second, keypoints_and_descriptors.second); Frontend::enforce_bijective_matches(matches); matches_.push_back(matches); - gtsam::Pose3 between_value = gtsam::Pose3::Identity(); + gtsam::Pose3 between_value = get_backend().estimate_pose( + img_keypoints_and_descriptors_.back().first, keypoints_and_descriptors.first, matches, + get_backend().get_K()); backend_.add_between_factor( - gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()-1), - gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), - between_value); - gtsam::Pose3 T_cam_landmark(gtsam::Rot3::Identity(), gtsam::Point3(0,0,1)); - gtsam::Pose3 T_world_cam1 = backend_.get_current_initial_values().at(gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()-1)); - gtsam::Pose3 T_world_cam2 = backend_.get_current_initial_values().at(gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added())); + gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1), + gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), between_value); + gtsam::Pose3 T_cam_landmark(gtsam::Rot3::Identity(), gtsam::Point3(0, 0, 1)); + gtsam::Pose3 T_world_cam1 = backend_.get_current_initial_values().at( + gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1)); + gtsam::Pose3 T_world_cam2 = backend_.get_current_initial_values().at( + gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added())); int count = 0; for (const cv::DMatch match : matches) { if (count < 1) { @@ -87,8 +89,7 @@ void StructureFromMotion::add_image(const cv::Mat &img) { {"T_world_cam1", pose_to_string(T_world_cam1)}, {"T_world_cam2", pose_to_string(T_world_cam1)}, {"T_world_lmkcam1", pose_to_string(T_world_cam1 * T_cam_landmark)}, - {"T_world_lmkcam2", pose_to_string(T_world_cam2 * T_cam_landmark)} - }; + {"T_world_lmkcam2", pose_to_string(T_world_cam2 * T_cam_landmark)}}; fs::path output_dir = "output"; fs::path output_file = output_dir / "output_sfm.json"; if (!fs::exists(output_dir)) { @@ -104,30 +105,29 @@ void StructureFromMotion::add_image(const cv::Mat &img) { // std::cout << "T_world_landmark" << T_world_cam1 * T_cam_landmark << std::endl; Backend::Landmark landmark_cam_1( gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), - gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()-1), + gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1), gtsam::Point2( - static_cast(img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x), - static_cast(img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y) - ), - (T_world_cam1 * T_cam_landmark).translation() - ); + static_cast( + img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x), + static_cast( + img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)), + (T_world_cam1 * T_cam_landmark).translation()); Backend::Landmark landmark_cam_2( gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), gtsam::Point2( static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.x), - static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.y) - ), - (T_world_cam2 * T_cam_landmark).translation() - ); - landmarks_[get_num_images_added()-1].push_back(landmark_cam_1); + static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.y)), + (T_world_cam2 * T_cam_landmark).translation()); + landmarks_[get_num_images_added() - 1].push_back(landmark_cam_1); landmarks_[get_num_images_added()].push_back(landmark_cam_2); backend_.add_landmark(landmark_cam_1); backend_.add_landmark(landmark_cam_2); landmark_count_++; - } - // std::cout << "number of landmarks: " << count << std::endl; - // std::cout << "number of landmarks added to graph: " << [this](){int count = 0; for (const auto &landmark : landmarks_){count += landmark.size();} return count; }() << std::endl; + } + // std::cout << "number of landmarks: " << count << std::endl; + // std::cout << "number of landmarks added to graph: " << [this](){int count = 0; for (const + // auto &landmark : landmarks_){count += landmark.size();} return count; }() << std::endl; // backend_.get_current_initial_values().print("Current initial values: "); // std::cout << "landmark_count_: " << landmark_count_ << std::endl; } @@ -201,45 +201,42 @@ std::vector Frontend::get_matches(const cv::Mat &descriptors1, } void Frontend::threshold_matches(std::vector &matches, float dist_threshhold) { - matches.erase( - std::remove_if(matches.begin(), matches.end(), [dist_threshhold](const cv::DMatch& match) { - return match.distance > dist_threshhold; - }), - matches.end()); + matches.erase(std::remove_if(matches.begin(), matches.end(), + [dist_threshhold](const cv::DMatch &match) { + return match.distance > dist_threshhold; + }), + matches.end()); } -void Frontend::enforce_bijective_matches(std::vector& matches) { +void Frontend::enforce_bijective_matches(std::vector &matches) { std::unordered_map bestQueryMatch; std::unordered_map bestTrainMatch; - - for (const auto& match : matches) { + + for (const auto &match : matches) { int queryIdx = match.queryIdx; int trainIdx = match.trainIdx; - - if (bestQueryMatch.find(queryIdx) == bestQueryMatch.end() || + + if (bestQueryMatch.find(queryIdx) == bestQueryMatch.end() || match.distance < bestQueryMatch[queryIdx].distance) { bestQueryMatch[queryIdx] = match; } - - if (bestTrainMatch.find(trainIdx) == bestTrainMatch.end() || + + if (bestTrainMatch.find(trainIdx) == bestTrainMatch.end() || match.distance < bestTrainMatch[trainIdx].distance) { bestTrainMatch[trainIdx] = match; } } - - matches.erase( - std::remove_if( - matches.begin(), matches.end(), - [&bestQueryMatch, &bestTrainMatch](const cv::DMatch& match) { - int queryIdx = match.queryIdx; - int trainIdx = match.trainIdx; - - return bestQueryMatch[queryIdx].trainIdx != trainIdx || - bestTrainMatch[trainIdx].queryIdx != queryIdx; - }), - matches.end()); -} + matches.erase(std::remove_if(matches.begin(), matches.end(), + [&bestQueryMatch, &bestTrainMatch](const cv::DMatch &match) { + int queryIdx = match.queryIdx; + int trainIdx = match.trainIdx; + + return bestQueryMatch[queryIdx].trainIdx != trainIdx || + bestTrainMatch[trainIdx].queryIdx != queryIdx; + }), + matches.end()); +} bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, std::vector &matches_out) const { @@ -295,7 +292,7 @@ Backend::Backend() { const double cy = img_height / 2.0; gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); - + initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K); } @@ -310,10 +307,12 @@ void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 & // initial_estimate_.print("values after adding prior: "); } -void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, const gtsam::Pose3 &value) { - graph_.emplace_shared>(symbol_1, symbol_2, value, pose_noise_); - // std::cout << "adding between factor. symbol_1: " << symbol_1 << ". symbol_2: " << symbol_2 << std::endl; - // initial_estimate_.print("values when adding between factor: "); +void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, + const gtsam::Pose3 &value) { + graph_.emplace_shared>(symbol_1, symbol_2, value, + pose_noise_); + // std::cout << "adding between factor. symbol_1: " << symbol_1 << ". symbol_2: " << symbol_2 << + // std::endl; initial_estimate_.print("values when adding between factor: "); initial_estimate_.insert(symbol_2, initial_estimate_.at(symbol_1).compose(value)); } @@ -325,8 +324,8 @@ void Backend::add_landmarks(const std::vector &landmarks) { void Backend::add_landmark(const Landmark &landmark) { graph_.emplace_shared>( - landmark.projection, measurement_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol, gtsam::Symbol(camera_symbol_char, 0) - ); + landmark.projection, measurement_noise_, landmark.cam_pose_symbol, + landmark.lmk_factor_symbol, gtsam::Symbol(camera_symbol_char, 0)); if (!initial_estimate_.exists(landmark.lmk_factor_symbol)) { initial_estimate_.insert(landmark.lmk_factor_symbol, landmark.initial_guess); } else { @@ -334,13 +333,39 @@ void Backend::add_landmark(const Landmark &landmark) { } } +gtsam::Pose3 Backend::estimate_pose(const std::vector &kpts1, + const std::vector &kpts2, + const std::vector &matches, + const gtsam::Cal3_S2 &K) { + std::vector pts1; + std::vector pts2; + for (const cv::DMatch &match : matches) { + std::cout << "query point: " << kpts1[match.queryIdx].pt << std::endl; + std::cout << "trian point: " << kpts2[match.trainIdx].pt << std::endl; + pts1.push_back(kpts1[match.queryIdx].pt); + pts2.push_back(kpts2[match.trainIdx].pt); + } + cv::Mat cv_K = (cv::Mat_(3, 3) << K.fx(), K.skew(), K.px(), 0, K.fy(), K.py(), 0, 0, 1); + cv::Mat E = cv::findEssentialMat(pts1, pts2, cv_K, cv::RANSAC, 0.999, 1.0); + cv::Mat R, t; + cv::recoverPose(E, pts1, pts2, cv_K, R, t); + gtsam::Point3 translation(t.at(0, 0), t.at(1, 0), t.at(2, 0)); + gtsam::Matrix3 mat_rot; + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + mat_rot(i, j) = R.at(i, j); + } + } + return gtsam::Pose3(gtsam::Rot3(mat_rot), translation); +} + void Backend::solve_graph() { gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_); result_ = optimizer.optimize(); } // json SFM_Logger::gtsam_pose3_to_json(const gtsam::Pose3 &pose) { -// json json_obj; +// json json_obj; // json_obj = { // {}, @@ -349,7 +374,8 @@ void Backend::solve_graph() { // return json_obj; // } -// void SFM_Logger::values_to_json(const gtsam::Values &values, const std::filesystem::path &output_path) { +// void SFM_Logger::values_to_json(const gtsam::Values &values, const std::filesystem::path +// &output_path) { // json json_obj; // json_obj["add_image_1"] = { // {"T_world_cam1", pose_to_string(T_world_cam1)}, diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh index b2bbfdefa..991aa2f68 100644 --- a/experimental/learn_descriptors/structure_from_motion.hh +++ b/experimental/learn_descriptors/structure_from_motion.hh @@ -1,6 +1,9 @@ #pragma once -#include +#include +#include +#include +#include #include "Eigen/Core" #include "gtsam/geometry/Point3.h" @@ -8,17 +11,12 @@ #include "gtsam/inference/Symbol.h" #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" #include "gtsam/nonlinear/Values.h" +#include "gtsam/slam/BetweenFactor.h" #include "gtsam/slam/GeneralSFMFactor.h" #include "gtsam/slam/PriorFactor.h" -#include "gtsam/slam/BetweenFactor.h" #include "gtsam/slam/ProjectionFactor.h" - -#include -#include -#include -#include - #include "nlohmann/json.hpp" +#include "opencv2/opencv.hpp" using json = nlohmann::json; @@ -32,8 +30,8 @@ class Frontend { Frontend(ExtractorType frontend_extractor, MatcherType frontend_matcher); ~Frontend(){}; - const ExtractorType& get_extractor_type() const { return extractor_type_; }; - const MatcherType& get_matcher_type() const { return matcher_type_; }; + const ExtractorType &get_extractor_type() const { return extractor_type_; }; + const MatcherType &get_matcher_type() const { return matcher_type_; }; std::pair, cv::Mat> get_keypoints_and_descriptors( const cv::Mat &img) const; @@ -50,7 +48,7 @@ class Frontend { const cv::Mat &img2, std::vector keypoints2, std::vector matches, cv::Mat img_matches_out) { cv::drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches_out); - } + } private: bool get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, @@ -66,60 +64,75 @@ class Frontend { cv::Ptr descriptor_matcher_; }; class Backend { - public: - static constexpr char pose_symbol_char = 'x'; - static constexpr char landmark_symbol_char = 'l'; - static constexpr char camera_symbol_char = 'k'; - - struct Landmark { - Landmark(const gtsam::Symbol &lmk_factor_symbol, const gtsam::Symbol &cam_pose_symbol, const gtsam::Point2 &projection, const gtsam::Point3 &initial_guess=gtsam::Point3::Identity()) - : lmk_factor_symbol(lmk_factor_symbol), cam_pose_symbol(cam_pose_symbol), projection(projection), initial_guess(initial_guess) {}; - const gtsam::Symbol lmk_factor_symbol; - const gtsam::Symbol cam_pose_symbol; - const gtsam::Point2 projection; - const gtsam::Point3 initial_guess; - }; - - Backend(); - Backend(gtsam::Cal3_S2 K); - ~Backend(){}; - - void add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value); - void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, const gtsam::Pose3 &value); - - void add_landmarks(const std::vector &landmarks); - void add_landmark(const Landmark &landmark); - - void solve_graph(); - const gtsam::Values& get_current_initial_values() const { return initial_estimate_; }; - const gtsam::Values& get_result() const { - return result_; - }; - private: - gtsam::Values initial_estimate_; - gtsam::Values result_; - gtsam::NonlinearFactorGraph graph_; - - gtsam::noiseModel::Isotropic::shared_ptr measurement_noise_ = gtsam::noiseModel::Isotropic::Sigma(2, 1.0); - gtsam::noiseModel::Diagonal::shared_ptr pose_noise_ = gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6::Constant(0.1)); + public: + static constexpr char pose_symbol_char = 'x'; + static constexpr char landmark_symbol_char = 'l'; + static constexpr char camera_symbol_char = 'k'; + + struct Landmark { + Landmark(const gtsam::Symbol &lmk_factor_symbol, const gtsam::Symbol &cam_pose_symbol, + const gtsam::Point2 &projection, + const gtsam::Point3 &initial_guess = gtsam::Point3::Identity()) + : lmk_factor_symbol(lmk_factor_symbol), + cam_pose_symbol(cam_pose_symbol), + projection(projection), + initial_guess(initial_guess){}; + const gtsam::Symbol lmk_factor_symbol; + const gtsam::Symbol cam_pose_symbol; + const gtsam::Point2 projection; + const gtsam::Point3 initial_guess; + }; + + Backend(); + Backend(gtsam::Cal3_S2 K); + ~Backend(){}; + + void add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value); + void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, + const gtsam::Pose3 &value); + + void add_landmarks(const std::vector &landmarks); + void add_landmark(const Landmark &landmark); + gtsam::Pose3 estimate_pose(const std::vector &kpts1, + const std::vector &kpts2, + const std::vector &matches, const gtsam::Cal3_S2 &K); + + void solve_graph(); + const gtsam::Values &get_current_initial_values() const { return initial_estimate_; }; + const gtsam::Values &get_result() const { return result_; }; + const gtsam::Cal3_S2 &get_K() const { return K_; }; + + private: + gtsam::Cal3_S2 K_; + + gtsam::Values initial_estimate_; + gtsam::Values result_; + gtsam::NonlinearFactorGraph graph_; + + gtsam::noiseModel::Isotropic::shared_ptr measurement_noise_ = + gtsam::noiseModel::Isotropic::Sigma(2, 1.0); + gtsam::noiseModel::Diagonal::shared_ptr pose_noise_ = + gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6::Constant(0.1)); }; class StructureFromMotion { public: static const Eigen::Affine3d T_symlake_boat_cam; - static const gtsam::Pose3 default_initial_pose; - StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose= default_initial_pose, - Frontend::MatcherType frontend_matcher = Frontend::MatcherType::KNN); + static const gtsam::Pose3 default_initial_pose; + StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, + gtsam::Pose3 initial_pose = default_initial_pose, + Frontend::MatcherType frontend_matcher = Frontend::MatcherType::KNN); ~StructureFromMotion(){}; - + void set_initial_pose(gtsam::Pose3 initial_pose); void add_image(const cv::Mat &img); void solve_structure() { backend_.solve_graph(); }; - const gtsam::Values& get_structure_result() { return backend_.get_result(); }; + const gtsam::Values &get_structure_result() { return backend_.get_result(); }; Frontend get_frontend() { return frontend_; }; Backend get_backend() { return backend_; } size_t get_num_images_added() { return img_keypoints_and_descriptors_.size(); }; size_t get_landmark_count() { return landmark_count_; }; + private: std::vector, cv::Mat>> img_keypoints_and_descriptors_; std::vector> matches_; @@ -134,6 +147,7 @@ class StructureFromMotion { // // static const json gtsam_pose3_to_json(const gtsam::Pose3 &pose); // static const json eigen_mat_to_json(const Eigen::Matrix &mat); // static const json gtsam_pose3_to_json(const gtsam::Pose3 &pose); -// static const void values_to_json(const gtsam::Values &values, const std::filesystem::path &file); +// static const void values_to_json(const gtsam::Values &values, const std::filesystem::path +// &file); // }; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc index 20a034e0d..80ae60737 100644 --- a/experimental/learn_descriptors/structure_from_motion_test.cc +++ b/experimental/learn_descriptors/structure_from_motion_test.cc @@ -1,12 +1,21 @@ #include "experimental/learn_descriptors/structure_from_motion.hh" -#include "experimental/learn_descriptors/symphony_lake_parser.hh" -#include "common/math/matrix_to_proto.hh" #include #include +#include "Eigen/Geometry" +#include "common/geometry/opencv_viz.hh" +#include "common/math/matrix_to_proto.hh" +#include "experimental/learn_descriptors/symphony_lake_parser.hh" #include "gtest/gtest.h" +class GtsamTestHelper { + public: + static bool pixel_in_range(Eigen::Vector2d pixel, size_t img_width, size_t img_height) { + return pixel[0] > 0 && pixel[0] < img_width && pixel[1] > 0 && pixel[1] < img_height; + } +}; + namespace robot::experimental::learn_descriptors { TEST(SFM_TEST, frontend_pipeline_sweep) { const size_t width = 640; @@ -21,7 +30,7 @@ TEST(SFM_TEST, frontend_pipeline_sweep) { cv::Mat translation_mat = (cv::Mat_(2, 3) << 1, 0, pixel_shift_x, 0, 1, 0); cv::Point rect_points[4] = {{150, 200}, {350, 200}, {350, 300}, {150, 300}}; float com_x = 0.0f, com_y = 0.0f; - for (const cv::Point& rect_point : rect_points) { + for (const cv::Point &rect_point : rect_points) { com_x += rect_point.x; com_y += rect_point.y; } @@ -71,7 +80,7 @@ TEST(SFM_TEST, frontend_pipeline_sweep) { static_cast(matcher_type)); try { frontend = Frontend(extractor_type, matcher_type); - } catch (const std::invalid_argument& e) { + } catch (const std::invalid_argument &e) { assert(std::string(e.what()) == "FLANN can not be used with ORB."); // very jank... continue; } @@ -112,46 +121,105 @@ TEST(SFM_TEST, frontend_pipeline_sweep) { } } -TEST(SFM_TEST, frontend_snippet_two) { - DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); - const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); - const symphony_lake_dataset::Survey &survey = survey_vector.get(0); - const symphony_lake_dataset::ImagePoint image_point = survey.getImagePoint(180); - const cv::Mat image_1 = survey.loadImageByImageIndex(180); - const cv::Mat image_2 = survey.loadImageByImageIndex(185); - - const size_t img_width = image_point.width, img_height = image_point.height; - const double fx = image_point.fx, fy = image_point.fy; - const double cx = image_point.cx, cy = image_point.cy; - gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); +TEST(GtsamTesting, sfm_test_estimate_pose) { + std::vector cube_W; + float cube_size = 1.0f; + cube_W.push_back(gtsam::Point3(0, 0, 0)); + cube_W.push_back(gtsam::Point3(cube_size, 0, 0)); + cube_W.push_back(gtsam::Point3(cube_size, cube_size, 0)); + cube_W.push_back(gtsam::Point3(0, cube_size, 0)); + cube_W.push_back(gtsam::Point3(0, 0, cube_size)); + cube_W.push_back(gtsam::Point3(cube_size, 0, cube_size)); + cube_W.push_back(gtsam::Point3(cube_size, cube_size, cube_size)); + cube_W.push_back(gtsam::Point3(0, cube_size, cube_size)); + + Eigen::Matrix3d R_new_points( + Eigen::AngleAxis(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + Eigen::AngleAxis(M_PI / 4, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); + + const int initial_size = cube_W.size(); + const gtsam::Point3 p_world_cube_center(cube_size / 2, cube_size / 2, cube_size / 2); + for (int i = 0; i < initial_size; i++) { + cube_W.emplace_back( + R_new_points * (cube_W[i] - p_world_cube_center) + p_world_cube_center); + } - StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K); - Frontend frontend = sfm.get_frontend(); + const size_t img_width = 640; + const size_t img_height = 480; + const double fx = 500.0; + const double fy = fx; + const double cx = img_width / 2.0; + const double cy = img_height / 2.0; + + gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy)); + StructureFromMotion sfm(Frontend::ExtractorType::SIFT, *K); + + std::vector poses; + std::vector> cameras; + + Eigen::Matrix3d R_world_pose0( + Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + Eigen::AngleAxis(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); + gtsam::Pose3 T_world_pose0(gtsam::Rot3(R_world_pose0), + gtsam::Point3(4, cube_size / 2, cube_size / 2)); + gtsam::PinholeCamera camera0(T_world_pose0, *K); + poses.push_back(T_world_pose0); + cameras.push_back(camera0); + + Eigen::Matrix3d R_world_pose0_to_pose1( + Eigen::AngleAxis(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix()); + gtsam::Pose3 T_world_pose1( + gtsam::Rot3(R_world_pose0_to_pose1) * T_world_pose0.rotation(), + (R_world_pose0_to_pose1 * (T_world_pose0.translation() - p_world_cube_center) + + p_world_cube_center)); + gtsam::PinholeCamera camera1(T_world_pose1, *K); + poses.push_back(T_world_pose1); + cameras.push_back(camera1); + + std::vector isometries; + for (const gtsam::Pose3 &pose : poses) { + isometries.emplace_back(pose.matrix()); + } - cv::Mat img_keypoints_out_1(img_height, img_width, CV_8UC3), - img_keypoints_out_2(img_height, img_width, CV_8UC3), img_matches_out(img_height, 2 * img_width, CV_8UC3); - cv::Mat img_display_test; + std::vector kpts1; + std::vector kpts2; + std::vector matches; - std::pair, cv::Mat>keypts_descriptors_1 = frontend.get_keypoints_and_descriptors(image_1); - std::pair, cv::Mat>keypts_descriptors_2 = frontend.get_keypoints_and_descriptors(image_2); - std::cout << "num of keypoints " << keypts_descriptors_1.first.size() << std::endl; - frontend.draw_keypoints(image_1, keypts_descriptors_1.first, - img_keypoints_out_1); - frontend.draw_keypoints(image_2, keypts_descriptors_2.first, - img_keypoints_out_2); - std::vector matches = frontend.get_matches(keypts_descriptors_1.second, keypts_descriptors_2.second); - frontend.draw_matches(image_1, keypts_descriptors_1.first, image_2, - keypts_descriptors_2.first, matches, img_matches_out); + for (size_t i = 0; i < cube_W.size(); i++) { + gtsam::Point2 pixel_p_cube_C0 = cameras[0].project(cube_W[i]); + gtsam::Point2 pixel_p_cube_C1 = cameras[1].project(cube_W[i]); + if (GtsamTestHelper::pixel_in_range(pixel_p_cube_C0, img_width, img_height) && + GtsamTestHelper::pixel_in_range(pixel_p_cube_C1, img_width, img_height)) { + kpts1.push_back(cv::KeyPoint(pixel_p_cube_C0[0], pixel_p_cube_C0[1], 3)); + kpts2.push_back(cv::KeyPoint(pixel_p_cube_C1[0], pixel_p_cube_C1[1], 3)); + matches.emplace_back(cv::DMatch(kpts1.size() - 1, kpts2.size() - 1, 0)); + } + } - // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test); - // cv::vconcat(img_display_test, img_matches_out, img_display_test); - // cv::Mat og_images; - // cv::hconcat(image_1, image_2, og_images); - // cv::vconcat(og_images, img_display_test, img_display_test); - // std::cout << "og_images.cols: " << og_images.cols << ". img_display_test.cols" << img_display_test.cols << std::endl; - // cv::imshow("Keypoints and Matches Output.", img_display_test); - // std::cout << "Press spacebar to pause." << std::endl; - // while (cv::waitKey(1000) != 32) {} + std::cout << "kpts1.size(): " << kpts1.size() << std::endl; + std::cout << "kpts2.size(): " << kpts2.size() << std::endl; + + gtsam::Pose3 pose_diff_estimate = sfm.get_backend().estimate_pose(kpts1, kpts2, matches, *K); + gtsam::Pose3 pose_diff_groundtruth = T_world_pose0.between(T_world_pose1); + std::vector result_poses = {T_world_pose0, T_world_pose0 * pose_diff_estimate}; + std::cout << "pose_diff_estimate: " << pose_diff_estimate << std::endl; + std::cout << "pose_diff_ground_truth: " << pose_diff_groundtruth << std::endl; + + isometries.emplace_back(result_poses.back().matrix()); + robot::geometry::opencv_viz::viz_scene(isometries, cube_W); + + // std::cout << result_poses[1] << std::endl; + // std::cout << pose1 << std::endl; + // std::cout << pose0 << std::endl; + + constexpr double TOL = 1e-3; + for (size_t i = 0; i < poses.size(); i++) { + gtsam::Pose3 result_pose = result_poses[i]; + EXPECT_NEAR(poses[i].translation()[0], result_pose.translation()[0], TOL); + EXPECT_NEAR(poses[i].translation()[1], result_pose.translation()[1], TOL); + EXPECT_NEAR(poses[i].translation()[2], result_pose.translation()[2], TOL); + EXPECT_TRUE(poses[i].rotation().matrix().isApprox(result_pose.rotation().matrix(), TOL)); + } } TEST(SFM_TEST, sfm_snippet_two) { @@ -159,59 +227,59 @@ TEST(SFM_TEST, sfm_snippet_two) { const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); const symphony_lake_dataset::Survey &survey = survey_vector.get(0); const symphony_lake_dataset::ImagePoint image_point = survey.getImagePoint(180); - const std::vector images = { - survey.loadImageByImageIndex(180), - survey.loadImageByImageIndex(185) - }; + const std::vector images = {survey.loadImageByImageIndex(180), + survey.loadImageByImageIndex(185)}; // const size_t img_width = image_point.width, img_height = image_point.height; - const double fx = image_point.fx, fy = image_point.fy; - const double cx = image_point.cx, cy = image_point.cy; + const double fx = image_point.fx, fy = image_point.fy; + const double cx = image_point.cx, cy = image_point.cy; gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); - StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K); + StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K); std::cout << "sfm.landmark_count_: " << sfm.get_landmark_count() << std::endl; for (const cv::Mat &image : images) { - sfm.add_image(image); + sfm.add_image( + image); //, gtsam::Pose3(gtsam::Rot3::Identity(), gtsam::Point3(0.05, 0, 0))); } gtsam::Values initial_values = sfm.get_backend().get_current_initial_values(); std::vector poses; - std::vector protos; + std::vector protos; for (size_t i = 0; i < images.size(); i++) { - gtsam::Pose3 pose = initial_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)); - Eigen::MatrixXd eigen_affine(4,4); + gtsam::Pose3 pose = + initial_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)); + Eigen::MatrixXd eigen_affine(4, 4); gtsam::Matrix3 rot_mat = pose.rotation().matrix(); gtsam::Point3 translation = pose.translation(); for (int j = 0; j < 3; j++) { for (int k = 0; k < 3; k++) { - eigen_affine(j,k) = rot_mat(j,k); + eigen_affine(j, k) = rot_mat(j, k); } - eigen_affine(3,j) = translation(j); + eigen_affine(3, j) = translation(j); } - eigen_affine(3,3) = 1; + eigen_affine(3, 3) = 1; poses.push_back(eigen_affine); - protos.emplace_shared(proto::Matrix()); + protos.emplace_back(); pack_into(eigen_affine, &protos.back()); } std::cout << poses.front() << std::endl; - // const Eigen::Vector3d in(1.0, 2.0, 3.0); // // Action - // // proto::Matrix proto; - // // pack_into(in, &proto); - // // const Eigen::Vector3d out = unpack_from(proto); + // robot::math::proto::Matrix proto; + // pack_into(in, &proto); + // const Eigen::Vector3d out = unpack_from(proto); + // std::cout << "vector out " << out << std::endl; - // // const Eigen::VectorXd in{{1.0, 2.0, 3.0}}; + // const Eigen::VectorXd in{{1.0, 2.0, 3.0}}; - // // // Action - // // proto::Matrix proto; - // // pack_into(in, &proto); - // // const Eigen::VectorXd out = unpack_from(proto); + // // Action + // robot::math::proto::Matrix proto; + // pack_into(in, &proto); + // const Eigen::VectorXd out = unpack_from(proto); // sfm.solve_structure(); @@ -220,10 +288,10 @@ TEST(SFM_TEST, sfm_snippet_two) { // cv::Mat og_images; // cv::hconcat(image_1, image_2, og_images); // cv::vconcat(og_images, img_display_test, img_display_test); - // std::cout << "og_images.cols: " << og_images.cols << ". img_display_test.cols" << img_display_test.cols << std::endl; - // cv::imshow("Keypoints and Matches Output.", img_display_test); - // std::cout << "Press spacebar to pause." << std::endl; - // while (cv::waitKey(1000) != 32) {} + // std::cout << "og_images.cols: " << og_images.cols << ". img_display_test.cols" << + // img_display_test.cols << std::endl; cv::imshow("Keypoints and Matches Output.", + // img_display_test); std::cout << "Press spacebar to pause." << std::endl; while + // (cv::waitKey(1000) != 32) {} } // TEST(SFM_TEST, structure_from_motion) { @@ -236,7 +304,7 @@ TEST(SFM_TEST, sfm_snippet_two) { // gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); -// StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K); +// StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K); // DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); // // DataParser::Generator generator = data_parser.create_img_generator(); // const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); @@ -266,10 +334,10 @@ TEST(SFM_TEST, sfm_snippet_two) { // // ++img_itr; // // } // sfm.solve_structure(); -// // gtsam::Values output = sfm.get_structure_result(); +// // gtsam::Values output = sfm.get_structure_result(); // // output.print("result values: "); // // for (size_t i = 0; i < sfm.get_num_images_added(); i++) { - + // // } // } } // namespace robot::experimental::learn_descriptors diff --git a/experimental/learn_descriptors/symphony_lake_parser.cc b/experimental/learn_descriptors/symphony_lake_parser.cc index 5f210578b..377e367cb 100644 --- a/experimental/learn_descriptors/symphony_lake_parser.cc +++ b/experimental/learn_descriptors/symphony_lake_parser.cc @@ -6,31 +6,33 @@ namespace robot::experimental::learn_descriptors { DataParser::DataParser(const std::filesystem::path &image_root_dir, - const std::vector &survey_list) : - image_root_dir_(image_root_dir) , survey_list_(survey_list) { + const std::vector &survey_list) + : image_root_dir_(image_root_dir), survey_list_(survey_list) { ROBOT_CHECK(std::filesystem::exists(image_root_dir), "Image root dir does not exist!", image_root_dir); surveys_.load(image_root_dir.string(), survey_list); } DataParser::~DataParser() {} -Eigen::Affine3d DataParser::get_T_world_camera(size_t survey_idx, size_t img_idx, bool use_gps, bool use_compass) { - const symphony_lake_dataset::ImagePoint img_point = surveys_.get(survey_idx).getImagePoint(img_idx); +Eigen::Affine3d DataParser::get_T_world_camera(size_t survey_idx, size_t img_idx, bool use_gps, + bool use_compass) { + const symphony_lake_dataset::ImagePoint img_point = + surveys_.get(survey_idx).getImagePoint(img_idx); Eigen::Vector3d t; - Eigen::Matrix3d R; + Eigen::Matrix3d R; Eigen::Affine3d T_world_camera; if (use_gps) { t[0] = img_point.x; t[1] = img_point.y; } // coordinate frame is x-axis north, y-axis east, z-axis down (to Earth's core) - double yaw = use_compass ? img_point.theta - img_point.pan : img_point.pan; + double yaw = use_compass ? img_point.theta - img_point.pan : img_point.pan; Eigen::Matrix3d R_y(Eigen::AngleAxisd(img_point.tilt, Eigen::Vector3d::UnitY())); Eigen::Matrix3d R_x = Eigen::Matrix3d::Identity(); Eigen::Matrix3d R_z(Eigen::AngleAxisd(yaw, Eigen::Vector3d::UnitZ())); - R = R_x * R_y * R_z; + R = R_x * R_y * R_z; T_world_camera.translate(t); - T_world_camera.rotate(R); + T_world_camera.rotate(R); return T_world_camera; } } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/symphony_lake_parser.hh b/experimental/learn_descriptors/symphony_lake_parser.hh index 6dc74c7d3..dd8db32be 100644 --- a/experimental/learn_descriptors/symphony_lake_parser.hh +++ b/experimental/learn_descriptors/symphony_lake_parser.hh @@ -1,19 +1,18 @@ #pragma once +#include #include -#include #include -#include +#include #include +#include "Eigen/Dense" #include "symphony_lake_dataset/ImagePoint.h" #include "symphony_lake_dataset/SurveyVector.h" -#include "Eigen/Dense" - namespace robot::experimental::learn_descriptors { class DataParser { public: - template + template struct Generator { struct promise_type { T value; @@ -31,41 +30,46 @@ class DataParser { using handle_type = std::coroutine_handle; Generator(handle_type h) : handle(h) {} - ~Generator() { if (handle) handle.destroy(); } + ~Generator() { + if (handle) handle.destroy(); + } struct iterator { handle_type handle; bool operator!=(std::default_sentinel_t) const { return !handle.done(); } - iterator& operator++() { handle.resume(); return *this; } + iterator &operator++() { + handle.resume(); + return *this; + } T operator*() const { return handle.promise().value; } }; iterator begin() { return iterator{handle}; } std::default_sentinel_t end() { return {}; } - private: + private: handle_type handle; }; - Generator image_generator( - const symphony_lake_dataset::SurveyVector& survey_vector) { - + Generator image_generator(const symphony_lake_dataset::SurveyVector &survey_vector) { for (int i = 0; i < static_cast(survey_vector.getNumSurveys()); i++) { const symphony_lake_dataset::Survey &survey = survey_vector.get(i); - for (int j = 0; j < static_cast(survey.getNumImages()); j++) { + for (int j = 0; j < static_cast(survey.getNumImages()); j++) { co_yield survey.loadImageByImageIndex(j); } } - } + } DataParser(const std::filesystem::path &image_root_dir, const std::vector &survey_list); ~DataParser(); - Eigen::Affine3d get_T_world_camera(size_t survey_idx, size_t image_idx, bool use_gps = false, bool use_compass = false); + Eigen::Affine3d get_T_world_camera(size_t survey_idx, size_t image_idx, bool use_gps = false, + bool use_compass = false); const symphony_lake_dataset::SurveyVector &get_surveys() const { return surveys_; }; Generator create_img_generator() { return image_generator(surveys_); }; + private: std::filesystem::path image_root_dir_; std::vector survey_list_; @@ -73,8 +77,9 @@ class DataParser { }; class SymphonyLakeDatasetTestHelper { public: - static constexpr const char* test_image_root_dir = "external/symphony_lake_snippet/symphony_lake"; - static constexpr std::array test_survey_list = {"140106_snippet"}; + static constexpr const char *test_image_root_dir = + "external/symphony_lake_snippet/symphony_lake"; + static constexpr std::array test_survey_list = {"140106_snippet"}; static bool images_equal(cv::Mat img1, cv::Mat img2) { if (img1.size() != img2.size() || img1.type() != img2.type()) { return false; @@ -84,8 +89,12 @@ class SymphonyLakeDatasetTestHelper { diff = diff.reshape(1); return cv::countNonZero(diff) == 0; }; - static DataParser get_test_parser() { return DataParser(get_test_iamge_root_dir(), get_test_survey_list()); }; - static std::filesystem::path get_test_iamge_root_dir() { return std::filesystem::path(test_image_root_dir); }; + static DataParser get_test_parser() { + return DataParser(get_test_iamge_root_dir(), get_test_survey_list()); + }; + static std::filesystem::path get_test_iamge_root_dir() { + return std::filesystem::path(test_image_root_dir); + }; static std::vector get_test_survey_list() { return std::vector(test_survey_list.begin(), test_survey_list.end()); }; diff --git a/experimental/learn_descriptors/symphony_lake_parser_test.cc b/experimental/learn_descriptors/symphony_lake_parser_test.cc index 5ede85062..0b22cdd3c 100644 --- a/experimental/learn_descriptors/symphony_lake_parser_test.cc +++ b/experimental/learn_descriptors/symphony_lake_parser_test.cc @@ -27,10 +27,10 @@ TEST(SymphonyLakeParserTest, snippet_140106) { printf("Press 'q' in graphic window to quit\n"); for (int i = 0; i < static_cast(survey_vector.getNumSurveys()); i++) { const symphony_lake_dataset::Survey &survey = survey_vector.get(i); - for (int j = 0; j < static_cast(survey.getNumImages()); j++) { - // img_point contains all the csv data entries + for (int j = 0; j < static_cast(survey.getNumImages()); j++) { + // img_point contains all the csv data entries const symphony_lake_dataset::ImagePoint img_point = survey.getImagePoint(j); - (void)img_point; // suppress unused variable + (void)img_point; // suppress unused variable image = survey.loadImageByImageIndex(j); // get the target image @@ -40,7 +40,9 @@ TEST(SymphonyLakeParserTest, snippet_140106) { std::string target_img_name_str = target_img_name.str(); target_img_name_str.replace(0, target_img_name_str.size() - target_img_name_length, ""); std::filesystem::path target_img_dir = - SymphonyLakeDatasetTestHelper::get_test_iamge_root_dir() / SymphonyLakeDatasetTestHelper::get_test_survey_list()[i] / "0027" / target_img_name_str; + SymphonyLakeDatasetTestHelper::get_test_iamge_root_dir() / + SymphonyLakeDatasetTestHelper::get_test_survey_list()[i] / "0027" / + target_img_name_str; target_img = cv::imread(target_img_dir.string()); EXPECT_TRUE(SymphonyLakeDatasetTestHelper::images_equal(image, target_img)); @@ -55,7 +57,8 @@ TEST(SymphonyLakeParserTest, snippet_140106) { } TEST(SymphonyLakeParserTest, snippet_140106_generator_test) { DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); - DataParser::Generator generator = (SymphonyLakeDatasetTestHelper::get_test_parser()).create_img_generator(); + DataParser::Generator generator = + (SymphonyLakeDatasetTestHelper::get_test_parser()).create_img_generator(); DataParser::Generator::iterator img_iter = generator.begin(); while (img_iter != generator.end()) { cv::Mat img = *img_iter; From 5366913a7a979030ff93c9ca2722896d18f09ce9 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Thu, 6 Feb 2025 12:34:39 -0500 Subject: [PATCH 07/45] progress --- .../structure_from_motion.cc | 30 ++++++++++++------- .../structure_from_motion.hh | 18 ++++++++--- .../structure_from_motion_test.cc | 14 ++++----- 3 files changed, 38 insertions(+), 24 deletions(-) diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index 92e9bc0a7..64fa76e1f 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -69,7 +69,7 @@ void StructureFromMotion::add_image(const cv::Mat &img) { Frontend::enforce_bijective_matches(matches); matches_.push_back(matches); - gtsam::Pose3 between_value = get_backend().estimate_pose( + gtsam::Pose3 between_value = get_backend().estimate_c0_c1( img_keypoints_and_descriptors_.back().first, keypoints_and_descriptors.first, matches, get_backend().get_K()); backend_.add_between_factor( @@ -307,15 +307,23 @@ void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 & // initial_estimate_.print("values after adding prior: "); } +template <> void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, - const gtsam::Pose3 &value) { + const gtsam::Pose3 &value, const gtsam::SharedNoiseModel model) { graph_.emplace_shared>(symbol_1, symbol_2, value, - pose_noise_); + model); // std::cout << "adding between factor. symbol_1: " << symbol_1 << ". symbol_2: " << symbol_2 << // std::endl; initial_estimate_.print("values when adding between factor: "); initial_estimate_.insert(symbol_2, initial_estimate_.at(symbol_1).compose(value)); } +template <> +void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, + const gtsam::Rot3 &value, const gtsam::SharedNoiseModel &model) { + graph_.empalce_shared>(symbol_1, symbol_2, value, model); + initial_estimate_.insert() +} + void Backend::add_landmarks(const std::vector &landmarks) { for (const Landmark &landmark : landmarks) { add_landmark(landmark); @@ -324,7 +332,7 @@ void Backend::add_landmarks(const std::vector &landmarks) { void Backend::add_landmark(const Landmark &landmark) { graph_.emplace_shared>( - landmark.projection, measurement_noise_, landmark.cam_pose_symbol, + landmark.projection, landmark_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol, gtsam::Symbol(camera_symbol_char, 0)); if (!initial_estimate_.exists(landmark.lmk_factor_symbol)) { initial_estimate_.insert(landmark.lmk_factor_symbol, landmark.initial_guess); @@ -333,7 +341,7 @@ void Backend::add_landmark(const Landmark &landmark) { } } -gtsam::Pose3 Backend::estimate_pose(const std::vector &kpts1, +gtsam::Pose3 Backend::estimate_c0_c1(const std::vector &kpts1, const std::vector &kpts2, const std::vector &matches, const gtsam::Cal3_S2 &K) { @@ -347,16 +355,16 @@ gtsam::Pose3 Backend::estimate_pose(const std::vector &kpts1, } cv::Mat cv_K = (cv::Mat_(3, 3) << K.fx(), K.skew(), K.px(), 0, K.fy(), K.py(), 0, 0, 1); cv::Mat E = cv::findEssentialMat(pts1, pts2, cv_K, cv::RANSAC, 0.999, 1.0); - cv::Mat R, t; - cv::recoverPose(E, pts1, pts2, cv_K, R, t); - gtsam::Point3 translation(t.at(0, 0), t.at(1, 0), t.at(2, 0)); - gtsam::Matrix3 mat_rot; + cv::Mat R_c1_c0, t_c1_c0; + cv::recoverPose(E, pts1, pts2, cv_K, R_c1_c0, t_c1_c0); + gtsam::Point3 t_c1_c0_eigen(t_c1_c0.at(0, 0), t_c1_c0.at(1, 0), t_c1_c0.at(2, 0)); + gtsam::Matrix3 R_c1_c0_eigen; for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { - mat_rot(i, j) = R.at(i, j); + R_c1_c0_eigen(i, j) = R_c1_c0.at(i, j); } } - return gtsam::Pose3(gtsam::Rot3(mat_rot), translation); + return gtsam::Pose3(gtsam::Rot3(R_c1_c0_eigen), t_c1_c0_eigen).inverse(); } void Backend::solve_graph() { diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh index 991aa2f68..8c98e5b0e 100644 --- a/experimental/learn_descriptors/structure_from_motion.hh +++ b/experimental/learn_descriptors/structure_from_motion.hh @@ -66,6 +66,8 @@ class Frontend { class Backend { public: static constexpr char pose_symbol_char = 'x'; + static constexpr char pose_rot_symbol_char = 'r'; + static constexpr char pose_bearing_symbol_char = 'b'; static constexpr char landmark_symbol_char = 'l'; static constexpr char camera_symbol_char = 'k'; @@ -88,16 +90,20 @@ class Backend { ~Backend(){}; void add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value); + + template void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, - const gtsam::Pose3 &value); + const T &value, const gtsam::SharedNoiseModel &model); + void add_landmarks(const std::vector &landmarks); void add_landmark(const Landmark &landmark); - gtsam::Pose3 estimate_pose(const std::vector &kpts1, + gtsam::Pose3 estimate_c0_c1(const std::vector &kpts1, const std::vector &kpts2, const std::vector &matches, const gtsam::Cal3_S2 &K); - void solve_graph(); + void solve_graph(); + const gtsam::Values &get_current_initial_values() const { return initial_estimate_; }; const gtsam::Values &get_result() const { return result_; }; const gtsam::Cal3_S2 &get_K() const { return K_; }; @@ -109,10 +115,14 @@ class Backend { gtsam::Values result_; gtsam::NonlinearFactorGraph graph_; - gtsam::noiseModel::Isotropic::shared_ptr measurement_noise_ = + gtsam::noiseModel::Isotropic::shared_ptr landmark_noise_ = gtsam::noiseModel::Isotropic::Sigma(2, 1.0); gtsam::noiseModel::Diagonal::shared_ptr pose_noise_ = gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6::Constant(0.1)); + gtsam::noiseModel::Isotropic::shared_ptr rotation_noise_ = + gtsam::noiseModel::Isotropic::Sigma(3, 0.1); + gtsam::noiseModel::Isotropic::shared_ptr pose_bearing_noise_ = + gtsam::noiseModel::Isotropic::Sigma(2, 0.1); }; class StructureFromMotion { public: diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc index 80ae60737..170debd8e 100644 --- a/experimental/learn_descriptors/structure_from_motion_test.cc +++ b/experimental/learn_descriptors/structure_from_motion_test.cc @@ -199,19 +199,15 @@ TEST(GtsamTesting, sfm_test_estimate_pose) { std::cout << "kpts1.size(): " << kpts1.size() << std::endl; std::cout << "kpts2.size(): " << kpts2.size() << std::endl; - gtsam::Pose3 pose_diff_estimate = sfm.get_backend().estimate_pose(kpts1, kpts2, matches, *K); - gtsam::Pose3 pose_diff_groundtruth = T_world_pose0.between(T_world_pose1); - std::vector result_poses = {T_world_pose0, T_world_pose0 * pose_diff_estimate}; - std::cout << "pose_diff_estimate: " << pose_diff_estimate << std::endl; - std::cout << "pose_diff_ground_truth: " << pose_diff_groundtruth << std::endl; + gtsam::Pose3 T_pose0_pose1_estimate = sfm.get_backend().estimate_c0_c1(kpts1, kpts2, matches, *K); + gtsam::Pose3 T_pose0_pose1 = T_world_pose0.between(T_world_pose1); + std::vector result_poses = {T_world_pose0, T_world_pose0 * T_pose0_pose1_estimate}; + std::cout << "pose_diff_estimate: " << T_pose0_pose1_estimate << std::endl; + std::cout << "pose_diff_ground_truth: " << T_pose0_pose1 << std::endl; isometries.emplace_back(result_poses.back().matrix()); robot::geometry::opencv_viz::viz_scene(isometries, cube_W); - // std::cout << result_poses[1] << std::endl; - // std::cout << pose1 << std::endl; - // std::cout << pose0 << std::endl; - constexpr double TOL = 1e-3; for (size_t i = 0; i < poses.size(); i++) { gtsam::Pose3 result_pose = result_poses[i]; From 6313e8750f8ee494cafbfae931d052dd35ea3c19 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Fri, 7 Mar 2025 01:54:20 -0500 Subject: [PATCH 08/45] hard coded symphony intrinsics and distortion, added some commons, did some refactoring, debugged some Backend issues. Next step: properly put cameras in graph in the world frame. --- common/geometry/BUILD | 50 ++++- common/geometry/camera.cc | 45 ++++ common/geometry/camera.hh | 27 +++ common/geometry/camera_test.cc | 131 +++++++++++ common/geometry/opencv_viz.cc | 39 ++-- common/geometry/opencv_viz.hh | 7 +- common/geometry/opencv_viz_test.cc | 28 +-- common/geometry/translate_types.cc | 34 +++ common/geometry/translate_types.hh | 10 + common/geometry/translate_types_test.cc | 37 ++++ experimental/learn_descriptors/BUILD | 33 ++- .../structure_from_motion.cc | 204 ++++++++---------- .../structure_from_motion.hh | 65 +++--- .../structure_from_motion_test.cc | 172 +++------------ .../learn_descriptors/symphony_lake_parser.hh | 8 + 15 files changed, 531 insertions(+), 359 deletions(-) create mode 100644 common/geometry/camera.cc create mode 100644 common/geometry/camera.hh create mode 100644 common/geometry/camera_test.cc create mode 100644 common/geometry/translate_types.cc create mode 100644 common/geometry/translate_types.hh create mode 100644 common/geometry/translate_types_test.cc diff --git a/common/geometry/BUILD b/common/geometry/BUILD index 0a28b1e20..319c5d930 100644 --- a/common/geometry/BUILD +++ b/common/geometry/BUILD @@ -21,12 +21,13 @@ cc_test( cc_library( name = "opencv_viz", - hdrs = ["opencv_viz.hh"], + hdrs = ["opencv_viz.hh"], srcs = ["opencv_viz.cc"], visibility = ["//visibility:public"], deps = [ "@eigen", - "@opencv//:opencv" + "@opencv//:opencv", + ":translate_types" ] ) @@ -46,3 +47,48 @@ cc_binary( "@opencv//:opencv" ] ) + +cc_library( + name = "camera", + hdrs = ["camera.hh"], + srcs = ["camera.cc"], + visibility = ["//visibility:public"], + deps = [ + "@eigen", + "@gtsam//:gtsam", + "@opencv//:opencv", + ":translate_types" + ] +) + +cc_test( + name = "camera_test", + srcs = ["camera_test.cc"], + deps = [ + ":camera", + "@com_google_googletest//:gtest_main", + ":translate_types", + ":opencv_viz" + ] +) + +cc_library( + name = "translate_types", + hdrs = ["translate_types.hh"], + srcs = ["translate_types.cc"], + visibility = ["//visibility:public"], + deps = [ + "@eigen", + "@opencv//:opencv" + ] +) + +cc_test( + name = "translate_types_test", + srcs = ["translate_types_test.cc"], + visibility = ["//visibility:public"], + deps = [ + ":translate_types", + "@com_google_googletest//:gtest_main" + ] +) \ No newline at end of file diff --git a/common/geometry/camera.cc b/common/geometry/camera.cc new file mode 100644 index 000000000..b13718570 --- /dev/null +++ b/common/geometry/camera.cc @@ -0,0 +1,45 @@ +#include "common/geometry/camera.hh" + +#include "common/geometry/translate_types.hh" + +namespace robot::geometry { +Eigen::Matrix3d get_intrinsic_matrix(const gtsam::Cal3_S2 &intrinsic) { + Eigen::Matrix3d K; + K << intrinsic.fx(), intrinsic.skew(), intrinsic.px(), 0, intrinsic.fy(), intrinsic.py(), 0, 0, + 1; + return K; +} + +Eigen::Vector3d project(const Eigen::Matrix3d &K, const Eigen::Vector3d &p_cam_point) { + return K * p_cam_point; + // return (K * p_cam_point).head<2>(); +} + +Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector3d &pixel_homog) { + return K.inverse() * pixel_homog; +} + +Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel_inhomog, + const double depth) { + return depth * K.inverse() * Eigen::Vector3d(pixel_inhomog(0), pixel_inhomog(1), 1.); +} + +Eigen::Isometry3d estimate_c0_c1(const std::vector &kpts0, + const std::vector &kpts1, + const std::vector &matches, const cv::Mat &K) { + Eigen::Isometry3d result; + std::vector pts1; + std::vector pts2; + for (const cv::DMatch &match : matches) { + pts1.push_back(kpts0[match.queryIdx].pt); + pts2.push_back(kpts1[match.trainIdx].pt); + } + cv::Mat E = cv::findEssentialMat(pts1, pts2, K, cv::RANSAC, 0.999, 1.0); + cv::Mat R_c1_c0, t_c1_c0; + cv::recoverPose(E, pts1, pts2, K, R_c1_c0, t_c1_c0); + result.linear() = cv_to_eigen_mat(R_c1_c0); + result.translation() = cv_to_eigen_mat(t_c1_c0); + result = result.inverse(); + return result; +} +} // namespace robot::geometry \ No newline at end of file diff --git a/common/geometry/camera.hh b/common/geometry/camera.hh new file mode 100644 index 000000000..82b74c818 --- /dev/null +++ b/common/geometry/camera.hh @@ -0,0 +1,27 @@ +#pragma once +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "gtsam/geometry/Cal3_S2.h" +#include "opencv2/opencv.hpp" + +namespace robot::geometry { +Eigen::Matrix3d get_intrinsic_matrix(const gtsam::Cal3_S2 &intrinsic); + +/// @return Homogeneous pixel projection coordinates +Eigen::Vector3d project(const Eigen::Matrix3d &K, const Eigen::Vector3d &p_cam_point); +/** + * @param pixel_homog homogeneous pixel cordinates 3x1 + * @return 3d coordinate in camera frame + */ +Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector3d &pixel_homog); +/** + * @param pixel_inhomog inhomogeneous pixel cordinates 2x1 + * @return 3d coordinate in camera frame + */ +Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel_inhomog, + const double depth); + +Eigen::Isometry3d estimate_c0_c1(const std::vector &kpts0, + const std::vector &kpts1, + const std::vector &matches, const cv::Mat &K); +} // namespace robot::geometry \ No newline at end of file diff --git a/common/geometry/camera_test.cc b/common/geometry/camera_test.cc new file mode 100644 index 000000000..1c4923b82 --- /dev/null +++ b/common/geometry/camera_test.cc @@ -0,0 +1,131 @@ +#include "common/geometry/camera.hh" + +#include "common/geometry/opencv_viz.hh" +#include "common/geometry/translate_types.hh" +#include "gtest/gtest.h" + +class CameraTestHelper { + public: + static bool pixel_in_range(Eigen::Vector2d pixel, size_t img_width, size_t img_height) { + return pixel[0] > 0 && pixel[0] < img_width && pixel[1] > 0 && pixel[1] < img_height; + } +}; + +namespace robot::geometry { +TEST(CameraTest, test_get_intrinsics) { + const double fx = 1., fy = 1., s = 0, px = 0.5, py = 0.5; + const gtsam::Cal3_S2 K(fx, fy, s, px, py); + const Eigen::Matrix3d intrinsic_mat = get_intrinsic_matrix(K); + + EXPECT_FLOAT_EQ(fx, intrinsic_mat(0, 0)); + EXPECT_FLOAT_EQ(fy, intrinsic_mat(1, 1)); + EXPECT_FLOAT_EQ(s, intrinsic_mat(0, 1)); + EXPECT_FLOAT_EQ(px, intrinsic_mat(0, 2)); + EXPECT_FLOAT_EQ(py, intrinsic_mat(1, 2)); +} + +TEST(CameraTest, test_projection) { + const double fx = 1., fy = 1., s = 0, px = 0.5, py = 0.5; + const gtsam::Cal3_S2 K(fx, fy, s, px, py); + const Eigen::Matrix3d intrinsic_mat = get_intrinsic_matrix(K); + + const Eigen::Vector3d p_cam_lmk1(0.1, 0.15, 1.2); + const Eigen::Vector3d pxl_homog_cam_lmk1 = project(intrinsic_mat, p_cam_lmk1); + const Eigen::Vector2d pxl_inhomog_cam_lmk1 = + pxl_homog_cam_lmk1.head<2>() / pxl_homog_cam_lmk1(2); + const Eigen::Vector3d p_cam_lmk1_deproject = deproject(intrinsic_mat, pxl_homog_cam_lmk1); + const Eigen::Vector3d p_cam_lmk1_deproject_inhomog = + deproject(intrinsic_mat, pxl_inhomog_cam_lmk1, p_cam_lmk1(2)); + + EXPECT_TRUE(p_cam_lmk1.isApprox(p_cam_lmk1_deproject)); + EXPECT_TRUE(p_cam_lmk1.isApprox(p_cam_lmk1_deproject_inhomog)); +} + +TEST(CameraTest, test_estimate_pose) { + std::vector p_W_cube; + float cube_size = 1.0f; + p_W_cube.push_back(Eigen::Vector3d(0, 0, 0)); + p_W_cube.push_back(Eigen::Vector3d(cube_size, 0, 0)); + p_W_cube.push_back(Eigen::Vector3d(cube_size, cube_size, 0)); + p_W_cube.push_back(Eigen::Vector3d(0, cube_size, 0)); + p_W_cube.push_back(Eigen::Vector3d(0, 0, cube_size)); + p_W_cube.push_back(Eigen::Vector3d(cube_size, 0, cube_size)); + p_W_cube.push_back(Eigen::Vector3d(cube_size, cube_size, cube_size)); + p_W_cube.push_back(Eigen::Vector3d(0, cube_size, cube_size)); + + Eigen::Matrix3d R_new_points( + Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); + + const int initial_size = p_W_cube.size(); + const Eigen::Vector3d p_world_cube_center(cube_size / 2, cube_size / 2, cube_size / 2); + for (const Eigen::Vector3d &point_W_cube : p_W_cube) { + p_W_cube.emplace_back(R_new_points * (point_W_cube - p_world_cube_center) + + p_world_cube_center); + } + + const size_t img_width = 640; + const size_t img_height = 480; + const double fx = 500.0; + const double fy = fx; + const double cx = img_width / 2.0; + const double cy = img_height / 2.0; + + cv::Mat K = (cv::Mat_(3, 3) << fx, 0, cx, 0, fy, cy, 0, 0, 1); + + Eigen::Matrix3d K_eig = cv_to_eigen_mat(K); + std::vector poses; + + Eigen::Matrix3d R_world_cam0( + Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + Eigen::AngleAxisd(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); + Eigen::Isometry3d T_world_cam0 = Eigen::Isometry3d::Identity(); + T_world_cam0.linear() = R_world_cam0; + T_world_cam0.translation() = Eigen::Vector3d(4, cube_size / 2, cube_size / 2); + poses.push_back(T_world_cam0); + + Eigen::Matrix3d R_world_cam0_to_cam1( + Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix()); + Eigen::Isometry3d T_world_cam1; + T_world_cam1.linear() = R_world_cam0_to_cam1 * T_world_cam0.linear(); + T_world_cam1.translation() = + R_world_cam0_to_cam1 * (T_world_cam0.translation() - p_world_cube_center) + + p_world_cube_center; + poses.push_back(T_world_cam1); + + std::vector kpts0; + std::vector kpts1; + std::vector matches; + + for (const Eigen::Vector3d &point_W_cube : p_W_cube) { + Eigen::Vector3d p_cam0_cubep = + (T_world_cam0.inverse() * + Eigen::Vector4d(point_W_cube(0), point_W_cube(1), point_W_cube(2), 1.)) + .head<3>(); + Eigen::Vector3d p_cam1_cubep = + (T_world_cam1.inverse() * + Eigen::Vector4d(point_W_cube(0), point_W_cube(1), point_W_cube(2), 1.)) + .head<3>(); + Eigen::Vector3d pxl_c0_pcube_homog = K_eig * p_cam0_cubep; + Eigen::Vector3d pxl_c1_pcube_homog = K_eig * p_cam1_cubep; + Eigen::Vector2d pxl_c0_pcube = pxl_c0_pcube_homog.head<2>() / pxl_c0_pcube_homog(2); + Eigen::Vector2d pxl_c1_pcube = pxl_c1_pcube_homog.head<2>() / pxl_c1_pcube_homog(2); + if (CameraTestHelper::pixel_in_range(pxl_c0_pcube, img_width, img_height) && + CameraTestHelper::pixel_in_range(pxl_c1_pcube, img_width, img_height)) { + kpts0.push_back(cv::KeyPoint(pxl_c0_pcube[0], pxl_c0_pcube[1], 3)); + kpts1.push_back(cv::KeyPoint(pxl_c1_pcube[0], pxl_c1_pcube[1], 3)); + matches.emplace_back(cv::DMatch(kpts0.size() - 1, kpts1.size() - 1, 0)); + } + } + + Eigen::Isometry3d T_cam0_cam1_estimate = estimate_c0_c1(kpts0, kpts1, matches, K); + Eigen::Isometry3d T_cam0_cam1_scale = T_world_cam0.inverse() * T_world_cam1; + T_cam0_cam1_scale.translation() /= T_cam0_cam1_scale.translation().norm(); + EXPECT_TRUE( + T_cam0_cam1_estimate.translation().isApprox(T_cam0_cam1_scale.translation(), 0.000001)); + EXPECT_TRUE(T_cam0_cam1_estimate.linear().isApprox(T_cam0_cam1_scale.linear(), 0.001)); + + poses.emplace_back(T_world_cam0 * T_cam0_cam1_estimate); + // viz_scene(poses, p_W_cube); +} +} // namespace robot::geometry \ No newline at end of file diff --git a/common/geometry/opencv_viz.cc b/common/geometry/opencv_viz.cc index 48675e239..66df62a9a 100644 --- a/common/geometry/opencv_viz.cc +++ b/common/geometry/opencv_viz.cc @@ -2,23 +2,9 @@ #include -namespace robot::geometry::opencv_viz { -template -cv::Mat eigen_to_cv_mat(const Eigen::MatrixBase &eigenMatrix) { - int cvType; - if constexpr (std::is_same_v) { - cvType = CV_64F; - } else if constexpr (std::is_same_v) { - cvType = CV_32F; - } else if constexpr (std::is_same_v) { - cvType = CV_32S; - } else { - static_assert(!std::is_same_v, "Unsupported data type in Eigen matrix"); - } - return cv::Mat(eigenMatrix.rows(), eigenMatrix.cols(), cvType, - const_cast(static_cast(eigenMatrix.derived().data()))) - .clone(); -} +#include "common/geometry/translate_types.hh" + +namespace robot::geometry { cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) { // Ensure R is a valid rotation matrix CV_Assert(cv::determinant(R) > 0.999 && cv::determinant(R) < 1.001); @@ -37,27 +23,30 @@ cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) { return axis * theta; } -void viz_scene(const std::vector &poses, - const std::vector &points, const bool show_grid) { +void viz_scene(const std::vector &poses_world, + const std::vector &points_world, const bool show_grid, + const bool show_origin) { cv::viz::Viz3d window("Viz Scene"); constexpr double pose_size = .5; - for (unsigned int i = 0; i < poses.size(); i++) { - cv::Affine3d cv_pose(eigen_to_cv_mat(Eigen::Matrix4d(poses[i].matrix().transpose()))); + for (unsigned int i = 0; i < poses_world.size(); i++) { + cv::Affine3d cv_pose(eigen_mat_to_cv(Eigen::Matrix4d(poses_world[i].matrix()))); window.showWidget("rigid_transform_" + std::to_string(i), cv::viz::WCoordinateSystem(pose_size), cv_pose); } constexpr double point_radius = 0.08; constexpr int sphere_res = 10; const cv::viz::Color point_color = cv::viz::Color::celestial_blue(); - for (unsigned int i = 0; i < points.size(); i++) { - const Eigen::Vector3d &point = points[i]; + for (unsigned int i = 0; i < points_world.size(); i++) { + const Eigen::Vector3d &point = points_world[i]; window.showWidget("point_" + std::to_string(i), cv::viz::WSphere(cv::Point3d(point[0], point[1], point[2]), point_radius, sphere_res, point_color)); } - window.showWidget("world_frame", cv::viz::WCoordinateSystem()); + if (show_origin) { + window.showWidget("world_frame", cv::viz::WCoordinateSystem()); + } if (show_grid) { constexpr unsigned int num_cells = 6; @@ -76,4 +65,4 @@ void viz_scene(const std::vector &poses, window.spin(); } -} // namespace robot::geometry::opencv_viz +} // namespace robot::geometry diff --git a/common/geometry/opencv_viz.hh b/common/geometry/opencv_viz.hh index d5530ecef..cdfcfe857 100644 --- a/common/geometry/opencv_viz.hh +++ b/common/geometry/opencv_viz.hh @@ -5,7 +5,8 @@ #include "Eigen/Geometry" #include "opencv2/viz.hpp" -namespace robot::geometry::opencv_viz { -void viz_scene(const std::vector &poses, - const std::vector &points, const bool show_grid = true); +namespace robot::geometry { +void viz_scene(const std::vector &poses_world, + const std::vector &points_world, const bool show_grid = true, + const bool show_origin = true); } \ No newline at end of file diff --git a/common/geometry/opencv_viz_test.cc b/common/geometry/opencv_viz_test.cc index e5e8265af..3d618cc21 100644 --- a/common/geometry/opencv_viz_test.cc +++ b/common/geometry/opencv_viz_test.cc @@ -2,7 +2,7 @@ #include "gtest/gtest.h" -namespace robot::geometry::opencv_viz { +namespace robot::geometry { namespace { bool is_test() { return std::getenv("BAZEL_TEST") != nullptr && @@ -68,22 +68,22 @@ TEST(OpencvVizTest, cube_test) { std::vector poses; - Eigen::Matrix3d rotation0( - Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * - Eigen::AngleAxis(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); - Eigen::Isometry3d pose0; - pose0.translation() = Eigen::Vector3d(4, 0, 0); - pose0.linear() = rotation0; - poses.push_back(pose0); + Eigen::Matrix3d R_W_cam0( + Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + Eigen::AngleAxisd(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); + Eigen::Isometry3d T_W_cam0; + T_W_cam0.translation() = Eigen::Vector3d(4, 0, 0); + T_W_cam0.linear() = R_W_cam0; + poses.push_back(T_W_cam0); - Eigen::Isometry3d pose1; - pose1.linear() = - Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * rotation0; - pose1.translation() = Eigen::Vector3d(0, 4, 0); - poses.push_back(pose1); + Eigen::Isometry3d T_W_cam1; + T_W_cam1.linear() = + Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * R_W_cam0; + T_W_cam1.translation() = Eigen::Vector3d(0, 4, 0); + poses.push_back(T_W_cam1); if (!is_test()) { viz_scene(poses, cube_W); } } -} // namespace robot::geometry::opencv_viz \ No newline at end of file +} // namespace robot::geometry \ No newline at end of file diff --git a/common/geometry/translate_types.cc b/common/geometry/translate_types.cc new file mode 100644 index 000000000..8f940c348 --- /dev/null +++ b/common/geometry/translate_types.cc @@ -0,0 +1,34 @@ +#include "common/geometry/translate_types.hh" + +namespace robot::geometry { + +cv::Mat eigen_mat_to_cv(const Eigen::MatrixXd &matrix) { + cv::Mat result(matrix.rows(), matrix.cols(), CV_64F); // Ensure correct type (double precision) + for (int i = 0; i < matrix.rows(); i++) { + for (int j = 0; j < matrix.cols(); j++) { + result.at(i, j) = matrix(i, j); + } + } + return result; +} + +cv::Mat eigen_vec_to_cv(const Eigen::VectorXd &vector) { + // cols should always be 1 + cv::Mat result(vector.rows(), vector.cols(), CV_64F); + for (int i = 0; i < vector.rows(); i++) { + result.at(i, 0) = vector(i); + } + return result; +} + +Eigen::MatrixXd cv_to_eigen_mat(const cv::Mat &matrix) { + Eigen::MatrixXd result = Eigen::MatrixXd::Zero(matrix.rows, matrix.cols); + for (int i = 0; i < matrix.rows; i++) { + for (int j = 0; j < matrix.cols; j++) { + result(i, j) = matrix.at(i, j); + } + } + return result; +} + +} // namespace robot::geometry \ No newline at end of file diff --git a/common/geometry/translate_types.hh b/common/geometry/translate_types.hh new file mode 100644 index 000000000..6be817c13 --- /dev/null +++ b/common/geometry/translate_types.hh @@ -0,0 +1,10 @@ +#pragma once +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "opencv2/opencv.hpp" + +namespace robot::geometry { +cv::Mat eigen_mat_to_cv(const Eigen::MatrixXd &matrix); +cv::Mat eigen_vec_to_cv(const Eigen::VectorXd &vector); +Eigen::MatrixXd cv_to_eigen_mat(const cv::Mat &matrix); +} // namespace robot::geometry \ No newline at end of file diff --git a/common/geometry/translate_types_test.cc b/common/geometry/translate_types_test.cc new file mode 100644 index 000000000..67342ba21 --- /dev/null +++ b/common/geometry/translate_types_test.cc @@ -0,0 +1,37 @@ +#include "common/geometry/translate_types.hh" + +#include "gtest/gtest.h" + +namespace robot::geometry { +TEST(TranslateTypesTest, eigen_mat_and_cv) { + int rows = 5, cols = 5; + Eigen::MatrixXd mat_eig = Eigen::MatrixXd::Random(rows, cols); + cv::Mat mat_cv_from_eig = eigen_mat_to_cv(mat_eig); + Eigen::MatrixXd mat_eig_from_cv = cv_to_eigen_mat(mat_cv_from_eig); + EXPECT_TRUE(static_cast(mat_eig.rows()) == mat_cv_from_eig.rows); + EXPECT_TRUE(static_cast(mat_eig.cols()) == mat_cv_from_eig.cols); + EXPECT_TRUE(static_cast(mat_eig.rows()) == static_cast(mat_eig_from_cv.rows())); + EXPECT_TRUE(static_cast(mat_eig.cols()) == static_cast(mat_eig_from_cv.cols())); + for (int i = 0; i < rows; i++) { + for (int j = 0; j < cols; j++) { + EXPECT_DOUBLE_EQ(mat_eig(i, j), mat_cv_from_eig.at(i, j)); + EXPECT_DOUBLE_EQ(mat_eig(i, j), mat_eig_from_cv(i, j)); + } + } +} + +TEST(TranslateTypesTest, eigen_vec_and_cv) { + int cols = 5, rows = 1; + Eigen::VectorXd vec_eig = Eigen::VectorXd::Random(5); + cv::Mat mat_cv_from_eig = eigen_vec_to_cv(vec_eig); + Eigen::VectorXd vec_eig_from_cv = Eigen::VectorXd(cv_to_eigen_mat(mat_cv_from_eig)); + EXPECT_TRUE(static_cast(vec_eig.rows()) == mat_cv_from_eig.rows); + EXPECT_TRUE(static_cast(vec_eig.cols()) == mat_cv_from_eig.cols); + EXPECT_TRUE(static_cast(vec_eig.rows()) == static_cast(vec_eig_from_cv.rows())); + EXPECT_TRUE(static_cast(vec_eig.cols()) == static_cast(vec_eig_from_cv.cols())); + for (int i = 0; i < cols; i++) { + EXPECT_DOUBLE_EQ(vec_eig(i), mat_cv_from_eig.at(i, 0)); + EXPECT_DOUBLE_EQ(vec_eig(i), vec_eig_from_cv(i, 0)); + } +} +} // namespace robot::geometry \ No newline at end of file diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index da4ec0163..2a2e1b14b 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -25,7 +25,22 @@ cc_library( "@opencv//:opencv", "@gtsam//:gtsam", "@eigen", - "@nlohmann_json//:json", + "//common/geometry:camera", + "//common/geometry:translate_types" + ] +) + +cc_test( + name = "structure_from_motion_test", + srcs = ["structure_from_motion_test.cc"], + copts = [ + "-Wno-unused-parameter", + ], + deps = [ + "@com_google_googletest//:gtest_main", + ":symphony_lake_parser", + ":structure_from_motion", + "//common/geometry:opencv_viz" ] ) @@ -46,26 +61,10 @@ cc_library( ] ) -cc_test( - name = "structure_from_motion_test", - srcs = ["structure_from_motion_test.cc"], - copts = [ - "-Wno-unused-parameter", - ], - deps = [ - "@com_google_googletest//:gtest_main", - ":symphony_lake_parser", - ":structure_from_motion", - "//common/math:matrix_to_proto", - "//common/geometry:opencv_viz" - ] -) - cc_test( name = "symphony_lake_parser_test", srcs = ["symphony_lake_parser_test.cc"], copts = ["-Wno-unused-parameter"], - # data = ["@symphony_lake_snippet"], deps = [ "@com_google_googletest//:gtest_main", ":symphony_lake_parser", diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index 64fa76e1f..a70df5c03 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -2,9 +2,19 @@ #include #include +#include + +#include "common/geometry/camera.hh" +#include "common/geometry/translate_types.hh" +#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" +#include "gtsam/slam/BetweenFactor.h" +#include "gtsam/slam/PriorFactor.h" +#include "gtsam/slam/ProjectionFactor.h" namespace fs = std::filesystem; +namespace geom = robot::geometry; + namespace robot::experimental::learn_descriptors { const Eigen::Affine3d StructureFromMotion::T_symlake_boat_cam = []() { @@ -12,20 +22,6 @@ const Eigen::Affine3d StructureFromMotion::T_symlake_boat_cam = []() { transform.translate(Eigen::Vector3d::Zero()); transform.rotate(Eigen::Matrix3d(Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(1, 0, 0)) * Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0, 1, 0)))); - - // std::stringstream ss; - // ss << gtsam::Pose3(gtsam::Rot3(transform.rotation()), - // gtsam::Point3(transform.translation())); json json_obj; json_obj["T_symlake_boat_cam"] = - // ss.str(); fs::path output_dir = "output"; fs::path output_file = output_dir / - // "output_sfm.json"; if (!fs::exists(output_dir)) { - // fs::create_directory(output_dir); - // } - // std::ofstream file(output_file); - // if (file.is_open()) { - // file << json_obj.dump(4); - // file.close(); - // } - return transform; }(); @@ -35,6 +31,9 @@ std::string pose_to_string(gtsam::Pose3 pose) { return ss.str(); } +// const gtsam::Pose3 StructureFromMotion::default_initial_pose = []() { +// Eigen::Isometry3d +// }(); const gtsam::Pose3 StructureFromMotion::default_initial_pose = gtsam::Pose3(gtsam::Rot3(T_symlake_boat_cam.rotation()), T_symlake_boat_cam.translation()); // gtsam::Pose3( @@ -46,13 +45,16 @@ const gtsam::Pose3 StructureFromMotion::default_initial_pose = // ); StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor, - gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose, + gtsam::Cal3_S2 K, Eigen::Matrix D, + gtsam::Pose3 initial_pose, Frontend::MatcherType frontend_matcher) { frontend_ = Frontend(frontend_extractor, frontend_matcher); backend_ = Backend(K); + K_ = (cv::Mat_(3, 3) << K.fx(), 0, K.px(), 0, K.fy(), K.py(), 0, 0, 1); + D_ = (cv::Mat_(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0)); + set_initial_pose(initial_pose); - // backend_.get_current_initial_values().print("Ooga: "); } void StructureFromMotion::set_initial_pose(gtsam::Pose3 initial_pose) { @@ -60,49 +62,36 @@ void StructureFromMotion::set_initial_pose(gtsam::Pose3 initial_pose) { } void StructureFromMotion::add_image(const cv::Mat &img) { + cv::Mat img_undistorted; + cv::undistort(img, img_undistorted, K_, D_); std::pair, cv::Mat> keypoints_and_descriptors = frontend_.get_keypoints_and_descriptors(img); landmarks_.push_back(std::vector()); if (get_num_images_added() > 0) { - std::vector matches = frontend_.get_matches( - img_keypoints_and_descriptors_.back().second, keypoints_and_descriptors.second); - Frontend::enforce_bijective_matches(matches); - - matches_.push_back(matches); - gtsam::Pose3 between_value = get_backend().estimate_c0_c1( - img_keypoints_and_descriptors_.back().first, keypoints_and_descriptors.first, matches, - get_backend().get_K()); + std::vector matches = + get_matches(img_keypoints_and_descriptors_.back().second, + keypoints_and_descriptors.second, Frontend::enforce_bijective_matches); + // std::vector matches = frontend_.get_matches( + // img_keypoints_and_descriptors_.back().second, keypoints_and_descriptors.second); + // Frontend::enforce_bijective_matches(matches); + // matches_.push_back(matches); + gtsam::SharedNoiseModel pose_noise_model = gtsam::noiseModel::Diagonal::Sigmas( + (gtsam::Vector6() << 0.1, 0.1, 0.1, 0.01, 0.01, 0.01).finished()); + gtsam::Pose3 between_value( + geom::estimate_c0_c1( + img_keypoints_and_descriptors_.back().first, keypoints_and_descriptors.first, + matches, geom::eigen_mat_to_cv(geom::get_intrinsic_matrix(get_backend().get_K()))) + .matrix()); backend_.add_between_factor( gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1), - gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), between_value); + gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), between_value, + pose_noise_model); gtsam::Pose3 T_cam_landmark(gtsam::Rot3::Identity(), gtsam::Point3(0, 0, 1)); gtsam::Pose3 T_world_cam1 = backend_.get_current_initial_values().at( gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1)); gtsam::Pose3 T_world_cam2 = backend_.get_current_initial_values().at( gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added())); - int count = 0; for (const cv::DMatch match : matches) { - if (count < 1) { - // std::cout << "hello???" << std::endl; - json json_obj; - json_obj["add_image_1"] = { - {"T_world_cam1", pose_to_string(T_world_cam1)}, - {"T_world_cam2", pose_to_string(T_world_cam1)}, - {"T_world_lmkcam1", pose_to_string(T_world_cam1 * T_cam_landmark)}, - {"T_world_lmkcam2", pose_to_string(T_world_cam2 * T_cam_landmark)}}; - fs::path output_dir = "output"; - fs::path output_file = output_dir / "output_sfm.json"; - if (!fs::exists(output_dir)) { - fs::create_directory(output_dir); - } - std::ofstream file(output_file); - if (file.is_open()) { - file << json_obj.dump(4); - file.close(); - } - } - count++; - // std::cout << "T_world_landmark" << T_world_cam1 * T_cam_landmark << std::endl; Backend::Landmark landmark_cam_1( gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1), @@ -111,14 +100,14 @@ void StructureFromMotion::add_image(const cv::Mat &img) { img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x), static_cast( img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)), - (T_world_cam1 * T_cam_landmark).translation()); + backend_.get_K(), 3.0); Backend::Landmark landmark_cam_2( gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), gtsam::Point2( static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.x), static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.y)), - (T_world_cam2 * T_cam_landmark).translation()); + backend_.get_K(), 3.0); landmarks_[get_num_images_added() - 1].push_back(landmark_cam_1); landmarks_[get_num_images_added()].push_back(landmark_cam_2); backend_.add_landmark(landmark_cam_1); @@ -134,6 +123,17 @@ void StructureFromMotion::add_image(const cv::Mat &img) { img_keypoints_and_descriptors_.push_back(keypoints_and_descriptors); } +std::vector StructureFromMotion::get_matches( + const cv::Mat &descriptors_1, const cv::Mat &descriptors_2, + std::optional post_process_func) { + std::vector matches = frontend_.get_matches(descriptors_1, descriptors_2); + if (post_process_func) { + (*post_process_func)(matches); + } + matches_.push_back(matches); + return matches; +} + Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matcher) { extractor_type_ = frontend_algorithm; matcher_type_ = frontend_matcher; @@ -292,36 +292,42 @@ Backend::Backend() { const double cy = img_height / 2.0; gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); - + K_ = boost::make_shared(K); initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K); } Backend::Backend(gtsam::Cal3_S2 K) { + K_ = boost::make_shared(K); initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K); } void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value) { graph_.emplace_shared>(symbol, value, pose_noise_); - initial_estimate_.insert(symbol, value); + initial_estimate_.insert_or_assign(symbol, value); // std::cout << "adding a prior factor! with symbol: " << symbol << std::endl; // initial_estimate_.print("values after adding prior: "); } template <> -void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, - const gtsam::Pose3 &value, const gtsam::SharedNoiseModel model) { - graph_.emplace_shared>(symbol_1, symbol_2, value, - model); +void Backend::add_between_factor(const gtsam::Symbol &symbol_1, + const gtsam::Symbol &symbol_2, + const gtsam::Pose3 &value, + const gtsam::SharedNoiseModel &model) { + graph_.emplace_shared>(symbol_1, symbol_2, value, model); // std::cout << "adding between factor. symbol_1: " << symbol_1 << ". symbol_2: " << symbol_2 << // std::endl; initial_estimate_.print("values when adding between factor: "); - initial_estimate_.insert(symbol_2, initial_estimate_.at(symbol_1).compose(value)); + initial_estimate_.insert_or_assign(symbol_2, + initial_estimate_.at(symbol_1).compose(value)); } template <> -void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, - const gtsam::Rot3 &value, const gtsam::SharedNoiseModel &model) { - graph_.empalce_shared>(symbol_1, symbol_2, value, model); - initial_estimate_.insert() +void Backend::add_between_factor(const gtsam::Symbol &symbol_1, + const gtsam::Symbol &symbol_2, + const gtsam::Rot3 &value, + const gtsam::SharedNoiseModel &model) { + graph_.emplace_shared>(symbol_1, symbol_2, value, model); + initial_estimate_.insert_or_assign(symbol_2, + initial_estimate_.at(symbol_1).compose(value)); } void Backend::add_landmarks(const std::vector &landmarks) { @@ -331,74 +337,32 @@ void Backend::add_landmarks(const std::vector &landmarks) { } void Backend::add_landmark(const Landmark &landmark) { - graph_.emplace_shared>( - landmark.projection, landmark_noise_, landmark.cam_pose_symbol, - landmark.lmk_factor_symbol, gtsam::Symbol(camera_symbol_char, 0)); + // graph_.emplace_shared>( + // landmark.projection, landmark_noise_, landmark.cam_pose_symbol, + // landmark.lmk_factor_symbol, K_); + auto factor = boost::make_shared>( + landmark.projection, landmark_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol, + K_); + // Add the factor to the graph. + graph_.add(factor); + if (!initial_estimate_.exists(landmark.cam_pose_symbol)) { + throw std::runtime_error( + "landmark.cam_pose_symbol must already exist in Backend.initial_estimate_ before " + "add_landmark is called."); + } + gtsam::Point3 p_world_lmk_estimate = + initial_estimate_.at(landmark.cam_pose_symbol) + .transformTo(landmark.p_cam_lmk_guess); if (!initial_estimate_.exists(landmark.lmk_factor_symbol)) { - initial_estimate_.insert(landmark.lmk_factor_symbol, landmark.initial_guess); + initial_estimate_.insert(landmark.lmk_factor_symbol, p_world_lmk_estimate); } else { - initial_estimate_.update(landmark.lmk_factor_symbol, landmark.initial_guess); + initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate); } } -gtsam::Pose3 Backend::estimate_c0_c1(const std::vector &kpts1, - const std::vector &kpts2, - const std::vector &matches, - const gtsam::Cal3_S2 &K) { - std::vector pts1; - std::vector pts2; - for (const cv::DMatch &match : matches) { - std::cout << "query point: " << kpts1[match.queryIdx].pt << std::endl; - std::cout << "trian point: " << kpts2[match.trainIdx].pt << std::endl; - pts1.push_back(kpts1[match.queryIdx].pt); - pts2.push_back(kpts2[match.trainIdx].pt); - } - cv::Mat cv_K = (cv::Mat_(3, 3) << K.fx(), K.skew(), K.px(), 0, K.fy(), K.py(), 0, 0, 1); - cv::Mat E = cv::findEssentialMat(pts1, pts2, cv_K, cv::RANSAC, 0.999, 1.0); - cv::Mat R_c1_c0, t_c1_c0; - cv::recoverPose(E, pts1, pts2, cv_K, R_c1_c0, t_c1_c0); - gtsam::Point3 t_c1_c0_eigen(t_c1_c0.at(0, 0), t_c1_c0.at(1, 0), t_c1_c0.at(2, 0)); - gtsam::Matrix3 R_c1_c0_eigen; - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - R_c1_c0_eigen(i, j) = R_c1_c0.at(i, j); - } - } - return gtsam::Pose3(gtsam::Rot3(R_c1_c0_eigen), t_c1_c0_eigen).inverse(); -} - void Backend::solve_graph() { gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_); result_ = optimizer.optimize(); } -// json SFM_Logger::gtsam_pose3_to_json(const gtsam::Pose3 &pose) { -// json json_obj; -// json_obj = { -// {}, - -// }; - -// return json_obj; -// } - -// void SFM_Logger::values_to_json(const gtsam::Values &values, const std::filesystem::path -// &output_path) { -// json json_obj; -// json_obj["add_image_1"] = { -// {"T_world_cam1", pose_to_string(T_world_cam1)}, -// {"T_world_cam2", pose_to_string(T_world_cam1)}, -// {"T_world_lmkcam1", pose_to_string(T_world_cam1 * T_cam_landmark)}, -// {"T_world_lmkcam2", pose_to_string(T_world_cam2 * T_cam_landmark)} -// }; -// if (!fs::exists(output_path)) { -// fs::create_directory(output_path); -// } -// std::ofstream file(output_file); -// if (file.is_open()) { -// file << json_obj.dump(4); -// file.close(); -// } -// } - } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh index 8c98e5b0e..a1fb2ce27 100644 --- a/experimental/learn_descriptors/structure_from_motion.hh +++ b/experimental/learn_descriptors/structure_from_motion.hh @@ -6,20 +6,15 @@ #include #include "Eigen/Core" +#include "common/geometry/camera.hh" #include "gtsam/geometry/Point3.h" #include "gtsam/geometry/Pose3.h" #include "gtsam/inference/Symbol.h" -#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" +#include "gtsam/linear/NoiseModel.h" +#include "gtsam/nonlinear/NonlinearFactorGraph.h" #include "gtsam/nonlinear/Values.h" -#include "gtsam/slam/BetweenFactor.h" -#include "gtsam/slam/GeneralSFMFactor.h" -#include "gtsam/slam/PriorFactor.h" -#include "gtsam/slam/ProjectionFactor.h" -#include "nlohmann/json.hpp" #include "opencv2/opencv.hpp" -using json = nlohmann::json; - namespace robot::experimental::learn_descriptors { class Frontend { public: @@ -73,16 +68,17 @@ class Backend { struct Landmark { Landmark(const gtsam::Symbol &lmk_factor_symbol, const gtsam::Symbol &cam_pose_symbol, - const gtsam::Point2 &projection, - const gtsam::Point3 &initial_guess = gtsam::Point3::Identity()) + const gtsam::Point2 &projection, const gtsam::Cal3_S2 K, + float initial_depth_guess = 5.0) : lmk_factor_symbol(lmk_factor_symbol), cam_pose_symbol(cam_pose_symbol), projection(projection), - initial_guess(initial_guess){}; + p_cam_lmk_guess(robot::geometry::deproject(robot::geometry::get_intrinsic_matrix(K), + projection, initial_depth_guess)){}; const gtsam::Symbol lmk_factor_symbol; const gtsam::Symbol cam_pose_symbol; const gtsam::Point2 projection; - const gtsam::Point3 initial_guess; + const gtsam::Point3 p_cam_lmk_guess; }; Backend(); @@ -95,21 +91,17 @@ class Backend { void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, const T &value, const gtsam::SharedNoiseModel &model); - void add_landmarks(const std::vector &landmarks); void add_landmark(const Landmark &landmark); - gtsam::Pose3 estimate_c0_c1(const std::vector &kpts1, - const std::vector &kpts2, - const std::vector &matches, const gtsam::Cal3_S2 &K); - void solve_graph(); + void solve_graph(); const gtsam::Values &get_current_initial_values() const { return initial_estimate_; }; const gtsam::Values &get_result() const { return result_; }; - const gtsam::Cal3_S2 &get_K() const { return K_; }; + const gtsam::Cal3_S2 &get_K() const { return *K_; }; private: - gtsam::Cal3_S2 K_; + gtsam::Cal3_S2::shared_ptr K_; gtsam::Values initial_estimate_; gtsam::Values result_; @@ -118,17 +110,26 @@ class Backend { gtsam::noiseModel::Isotropic::shared_ptr landmark_noise_ = gtsam::noiseModel::Isotropic::Sigma(2, 1.0); gtsam::noiseModel::Diagonal::shared_ptr pose_noise_ = - gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6::Constant(0.1)); - gtsam::noiseModel::Isotropic::shared_ptr rotation_noise_ = - gtsam::noiseModel::Isotropic::Sigma(3, 0.1); - gtsam::noiseModel::Isotropic::shared_ptr pose_bearing_noise_ = - gtsam::noiseModel::Isotropic::Sigma(2, 0.1); + gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01)); }; + +template <> +void Backend::add_between_factor(const gtsam::Symbol &, const gtsam::Symbol &, + const gtsam::Pose3 &, + const gtsam::SharedNoiseModel &); +template <> +void Backend::add_between_factor(const gtsam::Symbol &, const gtsam::Symbol &, + const gtsam::Rot3 &, const gtsam::SharedNoiseModel &); + class StructureFromMotion { public: static const Eigen::Affine3d T_symlake_boat_cam; static const gtsam::Pose3 default_initial_pose; + /** + * @param D is vector (5x1) of the distortion coefficients (k1, k2, p1, p2, k3) + */ StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, + Eigen::Matrix D, gtsam::Pose3 initial_pose = default_initial_pose, Frontend::MatcherType frontend_matcher = Frontend::MatcherType::KNN); ~StructureFromMotion(){}; @@ -137,11 +138,16 @@ class StructureFromMotion { void add_image(const cv::Mat &img); void solve_structure() { backend_.solve_graph(); }; const gtsam::Values &get_structure_result() { return backend_.get_result(); }; + using MatchFunction = std::function &)>; + std::vector get_matches( + const cv::Mat &descriptors_1, const cv::Mat &descriptors_2, + std::optional post_process_func = std::nullopt); Frontend get_frontend() { return frontend_; }; Backend get_backend() { return backend_; } size_t get_num_images_added() { return img_keypoints_and_descriptors_.size(); }; size_t get_landmark_count() { return landmark_count_; }; + const std::vector> get_matches() { return matches_; }; private: std::vector, cv::Mat>> img_keypoints_and_descriptors_; @@ -149,15 +155,10 @@ class StructureFromMotion { std::vector> landmarks_; size_t landmark_count_ = 0; + cv::Mat K_; + cv::Mat D_; + Frontend frontend_; Backend backend_; }; -// class SFM_Logger { -// public: -// // static const json gtsam_pose3_to_json(const gtsam::Pose3 &pose); -// static const json eigen_mat_to_json(const Eigen::Matrix &mat); -// static const json gtsam_pose3_to_json(const gtsam::Pose3 &pose); -// static const void values_to_json(const gtsam::Values &values, const std::filesystem::path -// &file); -// }; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc index 170debd8e..82e4b84da 100644 --- a/experimental/learn_descriptors/structure_from_motion_test.cc +++ b/experimental/learn_descriptors/structure_from_motion_test.cc @@ -5,7 +5,6 @@ #include "Eigen/Geometry" #include "common/geometry/opencv_viz.hh" -#include "common/math/matrix_to_proto.hh" #include "experimental/learn_descriptors/symphony_lake_parser.hh" #include "gtest/gtest.h" @@ -76,8 +75,8 @@ TEST(SFM_TEST, frontend_pipeline_sweep) { // cv::Mat img_display_test; for (Frontend::ExtractorType extractor_type : extractor_types) { for (Frontend::MatcherType matcher_type : matcher_types) { - printf("started frontend combination: (%d, %d)\n", static_cast(extractor_type), - static_cast(matcher_type)); + // printf("started frontend combination: (%d, %d)\n", static_cast(extractor_type), + // static_cast(matcher_type)); try { frontend = Frontend(extractor_type, matcher_type); } catch (const std::invalid_argument &e) { @@ -105,8 +104,9 @@ TEST(SFM_TEST, frontend_pipeline_sweep) { // std::cout << "Press spacebar to pause." << std::endl; // while (cv::waitKey(1000) == 32) { // } - printf("completed frontend combination: (%d, %d)\n", static_cast(extractor_type), - static_cast(matcher_type)); + // printf("completed frontend combination: (%d, %d)\n", + // static_cast(extractor_type), + // static_cast(matcher_type)); if (extractor_type != Frontend::ExtractorType::ORB) { // don't check ORB for now for (const cv::DMatch match : matches) { EXPECT_NEAR(keypoints_descriptors_pair_1.first[match.queryIdx].pt.x - @@ -121,103 +121,6 @@ TEST(SFM_TEST, frontend_pipeline_sweep) { } } -TEST(GtsamTesting, sfm_test_estimate_pose) { - std::vector cube_W; - float cube_size = 1.0f; - cube_W.push_back(gtsam::Point3(0, 0, 0)); - cube_W.push_back(gtsam::Point3(cube_size, 0, 0)); - cube_W.push_back(gtsam::Point3(cube_size, cube_size, 0)); - cube_W.push_back(gtsam::Point3(0, cube_size, 0)); - cube_W.push_back(gtsam::Point3(0, 0, cube_size)); - cube_W.push_back(gtsam::Point3(cube_size, 0, cube_size)); - cube_W.push_back(gtsam::Point3(cube_size, cube_size, cube_size)); - cube_W.push_back(gtsam::Point3(0, cube_size, cube_size)); - - Eigen::Matrix3d R_new_points( - Eigen::AngleAxis(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * - Eigen::AngleAxis(M_PI / 4, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); - - const int initial_size = cube_W.size(); - const gtsam::Point3 p_world_cube_center(cube_size / 2, cube_size / 2, cube_size / 2); - for (int i = 0; i < initial_size; i++) { - cube_W.emplace_back( - R_new_points * (cube_W[i] - p_world_cube_center) + p_world_cube_center); - } - - const size_t img_width = 640; - const size_t img_height = 480; - const double fx = 500.0; - const double fy = fx; - const double cx = img_width / 2.0; - const double cy = img_height / 2.0; - - gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy)); - StructureFromMotion sfm(Frontend::ExtractorType::SIFT, *K); - - std::vector poses; - std::vector> cameras; - - Eigen::Matrix3d R_world_pose0( - Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * - Eigen::AngleAxis(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); - gtsam::Pose3 T_world_pose0(gtsam::Rot3(R_world_pose0), - gtsam::Point3(4, cube_size / 2, cube_size / 2)); - gtsam::PinholeCamera camera0(T_world_pose0, *K); - poses.push_back(T_world_pose0); - cameras.push_back(camera0); - - Eigen::Matrix3d R_world_pose0_to_pose1( - Eigen::AngleAxis(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix()); - gtsam::Pose3 T_world_pose1( - gtsam::Rot3(R_world_pose0_to_pose1) * T_world_pose0.rotation(), - (R_world_pose0_to_pose1 * (T_world_pose0.translation() - p_world_cube_center) + - p_world_cube_center)); - gtsam::PinholeCamera camera1(T_world_pose1, *K); - poses.push_back(T_world_pose1); - cameras.push_back(camera1); - - std::vector isometries; - for (const gtsam::Pose3 &pose : poses) { - isometries.emplace_back(pose.matrix()); - } - - std::vector kpts1; - std::vector kpts2; - std::vector matches; - - for (size_t i = 0; i < cube_W.size(); i++) { - gtsam::Point2 pixel_p_cube_C0 = cameras[0].project(cube_W[i]); - gtsam::Point2 pixel_p_cube_C1 = cameras[1].project(cube_W[i]); - if (GtsamTestHelper::pixel_in_range(pixel_p_cube_C0, img_width, img_height) && - GtsamTestHelper::pixel_in_range(pixel_p_cube_C1, img_width, img_height)) { - kpts1.push_back(cv::KeyPoint(pixel_p_cube_C0[0], pixel_p_cube_C0[1], 3)); - kpts2.push_back(cv::KeyPoint(pixel_p_cube_C1[0], pixel_p_cube_C1[1], 3)); - matches.emplace_back(cv::DMatch(kpts1.size() - 1, kpts2.size() - 1, 0)); - } - } - - std::cout << "kpts1.size(): " << kpts1.size() << std::endl; - std::cout << "kpts2.size(): " << kpts2.size() << std::endl; - - gtsam::Pose3 T_pose0_pose1_estimate = sfm.get_backend().estimate_c0_c1(kpts1, kpts2, matches, *K); - gtsam::Pose3 T_pose0_pose1 = T_world_pose0.between(T_world_pose1); - std::vector result_poses = {T_world_pose0, T_world_pose0 * T_pose0_pose1_estimate}; - std::cout << "pose_diff_estimate: " << T_pose0_pose1_estimate << std::endl; - std::cout << "pose_diff_ground_truth: " << T_pose0_pose1 << std::endl; - - isometries.emplace_back(result_poses.back().matrix()); - robot::geometry::opencv_viz::viz_scene(isometries, cube_W); - - constexpr double TOL = 1e-3; - for (size_t i = 0; i < poses.size(); i++) { - gtsam::Pose3 result_pose = result_poses[i]; - EXPECT_NEAR(poses[i].translation()[0], result_pose.translation()[0], TOL); - EXPECT_NEAR(poses[i].translation()[1], result_pose.translation()[1], TOL); - EXPECT_NEAR(poses[i].translation()[2], result_pose.translation()[2], TOL); - EXPECT_TRUE(poses[i].rotation().matrix().isApprox(result_pose.rotation().matrix(), TOL)); - } -} - TEST(SFM_TEST, sfm_snippet_two) { DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); @@ -230,64 +133,41 @@ TEST(SFM_TEST, sfm_snippet_two) { const double fx = image_point.fx, fy = image_point.fy; const double cx = image_point.cx, cy = image_point.cy; gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); + Eigen::Matrix D = + (Eigen::Matrix() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2, + SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3) + .finished(); - StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K); - - std::cout << "sfm.landmark_count_: " << sfm.get_landmark_count() << std::endl; + StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D); for (const cv::Mat &image : images) { - sfm.add_image( - image); //, gtsam::Pose3(gtsam::Rot3::Identity(), gtsam::Point3(0.05, 0, 0))); + sfm.add_image(image); } - gtsam::Values initial_values = sfm.get_backend().get_current_initial_values(); - std::vector poses; - std::vector protos; + const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values(); + std::vector poses_world; for (size_t i = 0; i < images.size(); i++) { gtsam::Pose3 pose = initial_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)); - Eigen::MatrixXd eigen_affine(4, 4); - gtsam::Matrix3 rot_mat = pose.rotation().matrix(); - gtsam::Point3 translation = pose.translation(); - for (int j = 0; j < 3; j++) { - for (int k = 0; k < 3; k++) { - eigen_affine(j, k) = rot_mat(j, k); + poses_world.emplace_back(pose.matrix()); + } + std::vector points_world; + for (int i = 0; i < static_cast(sfm.get_matches().size()); i++) { + // NOTE: this j for lmk_symbol is wrong. + for (int j = 0; j < static_cast(sfm.get_matches()[i].size()); j++) { + gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j); + if (initial_values.exists(lmk_symbol)) { + points_world.emplace_back(initial_values.at(lmk_symbol)); + } else { + std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl; } - eigen_affine(3, j) = translation(j); } - eigen_affine(3, 3) = 1; - poses.push_back(eigen_affine); - protos.emplace_back(); - pack_into(eigen_affine, &protos.back()); } - std::cout << poses.front() << std::endl; - - // const Eigen::Vector3d in(1.0, 2.0, 3.0); - // // Action + // std::cout << poses_world.front() << std::endl; - // robot::math::proto::Matrix proto; - // pack_into(in, &proto); - // const Eigen::Vector3d out = unpack_from(proto); - // std::cout << "vector out " << out << std::endl; - - // const Eigen::VectorXd in{{1.0, 2.0, 3.0}}; - - // // Action - // robot::math::proto::Matrix proto; - // pack_into(in, &proto); - // const Eigen::VectorXd out = unpack_from(proto); + robot::geometry::viz_scene(poses_world, points_world, true, true); // sfm.solve_structure(); - - // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test); - // cv::vconcat(img_display_test, img_matches_out, img_display_test); - // cv::Mat og_images; - // cv::hconcat(image_1, image_2, og_images); - // cv::vconcat(og_images, img_display_test, img_display_test); - // std::cout << "og_images.cols: " << og_images.cols << ". img_display_test.cols" << - // img_display_test.cols << std::endl; cv::imshow("Keypoints and Matches Output.", - // img_display_test); std::cout << "Press spacebar to pause." << std::endl; while - // (cv::waitKey(1000) != 32) {} } // TEST(SFM_TEST, structure_from_motion) { diff --git a/experimental/learn_descriptors/symphony_lake_parser.hh b/experimental/learn_descriptors/symphony_lake_parser.hh index dd8db32be..fd361013b 100644 --- a/experimental/learn_descriptors/symphony_lake_parser.hh +++ b/experimental/learn_descriptors/symphony_lake_parser.hh @@ -10,6 +10,14 @@ #include "symphony_lake_dataset/SurveyVector.h" namespace robot::experimental::learn_descriptors { +class SymphonyLakeCamParams { + public: + constexpr static double fx = 759.308, fy = 690.44; + constexpr static double px = 370.915, py = 250.909; + + constexpr static double k1 = -0.302805, k2 = 0.171088, k3 = 0.0; + constexpr static double p1 = 0.001151, p2 = -0.00038; +}; class DataParser { public: template From 4fc09c583f0bfdc44c54674fb4799a125e66f0ce Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Fri, 7 Mar 2025 14:19:04 -0500 Subject: [PATCH 09/45] fixed up poses in graph. need to figure out triangulation and maybe tranlsation scaling next. --- experimental/learn_descriptors/BUILD | 5 +- .../structure_from_motion.cc | 64 +++++--- .../structure_from_motion.hh | 3 +- .../structure_from_motion_test.cc | 140 ++++++++++++------ 4 files changed, 149 insertions(+), 63 deletions(-) diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index 2a2e1b14b..7220c019b 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -21,6 +21,9 @@ cc_library( hdrs = ["structure_from_motion.hh"], visibility = ["//visibility:public"], srcs = ["structure_from_motion.cc"], + copts = [ + "-Wno-error=unused-parameter", + ], deps = [ "@opencv//:opencv", "@gtsam//:gtsam", @@ -34,7 +37,7 @@ cc_test( name = "structure_from_motion_test", srcs = ["structure_from_motion_test.cc"], copts = [ - "-Wno-unused-parameter", + "-Wno-error=unused-parameter", ], deps = [ "@com_google_googletest//:gtest_main", diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index a70df5c03..d18db1b58 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -6,6 +6,7 @@ #include "common/geometry/camera.hh" #include "common/geometry/translate_types.hh" +#include "gtsam/geometry/triangulation.h" #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" #include "gtsam/slam/BetweenFactor.h" #include "gtsam/slam/PriorFactor.h" @@ -17,11 +18,10 @@ namespace geom = robot::geometry; namespace robot::experimental::learn_descriptors { -const Eigen::Affine3d StructureFromMotion::T_symlake_boat_cam = []() { - Eigen::Affine3d transform = Eigen::Affine3d::Identity(); - transform.translate(Eigen::Vector3d::Zero()); - transform.rotate(Eigen::Matrix3d(Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(1, 0, 0)) * - Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0, 1, 0)))); +const Eigen::Isometry3d StructureFromMotion::T_symlake_boat_cam = []() { + Eigen::Isometry3d transform = Eigen::Isometry3d::Identity(); + transform.linear() = (Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0, 1, 0)) * + Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0, 0, 1))).toRotationMatrix(); return transform; }(); @@ -34,8 +34,9 @@ std::string pose_to_string(gtsam::Pose3 pose) { // const gtsam::Pose3 StructureFromMotion::default_initial_pose = []() { // Eigen::Isometry3d // }(); -const gtsam::Pose3 StructureFromMotion::default_initial_pose = - gtsam::Pose3(gtsam::Rot3(T_symlake_boat_cam.rotation()), T_symlake_boat_cam.translation()); +const gtsam::Pose3 StructureFromMotion::default_initial_pose(StructureFromMotion::T_symlake_boat_cam.matrix()); +// const gtsam::Pose3 StructureFromMotion::default_initial_pose = +// gtsam::Pose3(gtsam::Rot3(T_symlake_boat_cam.rotation()), T_symlake_boat_cam.translation()); // gtsam::Pose3( // gtsam::Rot3(Eigen::Matrix3d( // Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix() * @@ -86,7 +87,6 @@ void StructureFromMotion::add_image(const cv::Mat &img) { gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1), gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), between_value, pose_noise_model); - gtsam::Pose3 T_cam_landmark(gtsam::Rot3::Identity(), gtsam::Point3(0, 0, 1)); gtsam::Pose3 T_world_cam1 = backend_.get_current_initial_values().at( gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1)); gtsam::Pose3 T_world_cam2 = backend_.get_current_initial_values().at( @@ -293,12 +293,12 @@ Backend::Backend() { gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); K_ = boost::make_shared(K); - initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K); + // initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K); } Backend::Backend(gtsam::Cal3_S2 K) { K_ = boost::make_shared(K); - initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K); + // initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K); } void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value) { @@ -330,6 +330,35 @@ void Backend::add_between_factor(const gtsam::Symbol &symbol_1, initial_estimate_.at(symbol_1).compose(value)); } +std::pair, std::vector> Backend::get_obs_for_lmk(const gtsam::Symbol &lmk_symbol) { + std::vector cam_poses; + std::vector observations; + + // Iterate over all factors in the graph + for (const auto& factor : graph_) { + auto projFactor = boost::dynamic_pointer_cast< + gtsam::GenericProjectionFactor>(factor); + + if (projFactor && projFactor->keys().at(1) == lmk_symbol) { + // Get the camera pose symbol + gtsam::Symbol cameraSymbol = projFactor->keys().at(0); + + // Retrieve the camera pose from values + if (!initial_estimate_.exists(cameraSymbol)) continue; + gtsam::Pose3 cameraPose = initial_estimate_.at(cameraSymbol); + + // Get the 2D observation (keypoint measurement) + gtsam::Point2 observation = projFactor->measured(); + + // Store the pose and corresponding observation + observations.push_back(observation); + cam_poses.push_back(cameraPose); + } + } + return std::pair, std::vector>(cam_poses, observations); +} + + void Backend::add_landmarks(const std::vector &landmarks) { for (const Landmark &landmark : landmarks) { add_landmark(landmark); @@ -337,14 +366,8 @@ void Backend::add_landmarks(const std::vector &landmarks) { } void Backend::add_landmark(const Landmark &landmark) { - // graph_.emplace_shared>( - // landmark.projection, landmark_noise_, landmark.cam_pose_symbol, - // landmark.lmk_factor_symbol, K_); - auto factor = boost::make_shared>( - landmark.projection, landmark_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol, + graph_.emplace_shared>(landmark.projection, landmark_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol, K_); - // Add the factor to the graph. - graph_.add(factor); if (!initial_estimate_.exists(landmark.cam_pose_symbol)) { throw std::runtime_error( "landmark.cam_pose_symbol must already exist in Backend.initial_estimate_ before " @@ -352,12 +375,17 @@ void Backend::add_landmark(const Landmark &landmark) { } gtsam::Point3 p_world_lmk_estimate = initial_estimate_.at(landmark.cam_pose_symbol) - .transformTo(landmark.p_cam_lmk_guess); + * landmark.p_cam_lmk_guess; if (!initial_estimate_.exists(landmark.lmk_factor_symbol)) { initial_estimate_.insert(landmark.lmk_factor_symbol, p_world_lmk_estimate); } else { initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate); } + // std::pair, std::vector> lmk_obs = get_obs_for_lmk(landmark.lmk_factor_symbol); + // if (lmk_obs.first.size() >= 2) { + // p_world_lmk_estimate = gtsam::triangulatePoint3(lmk_obs.first, K_, gtsam::Point2Vector(lmk_obs.second.begin(), lmk_obs.second.end())); + // initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate); + // } } void Backend::solve_graph() { diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh index a1fb2ce27..e7d0c8e7e 100644 --- a/experimental/learn_descriptors/structure_from_motion.hh +++ b/experimental/learn_descriptors/structure_from_motion.hh @@ -91,6 +91,7 @@ class Backend { void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, const T &value, const gtsam::SharedNoiseModel &model); + std::pair, std::vector> get_obs_for_lmk(const gtsam::Symbol &lmk_symbol); void add_landmarks(const std::vector &landmarks); void add_landmark(const Landmark &landmark); @@ -123,7 +124,7 @@ void Backend::add_between_factor(const gtsam::Symbol &, const gtsam class StructureFromMotion { public: - static const Eigen::Affine3d T_symlake_boat_cam; + static const Eigen::Isometry3d T_symlake_boat_cam; static const gtsam::Pose3 default_initial_pose; /** * @param D is vector (5x1) of the distortion coefficients (k1, k2, p1, p2, k3) diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc index 82e4b84da..a9494c5fb 100644 --- a/experimental/learn_descriptors/structure_from_motion_test.cc +++ b/experimental/learn_descriptors/structure_from_motion_test.cc @@ -121,13 +121,27 @@ TEST(SFM_TEST, frontend_pipeline_sweep) { } } -TEST(SFM_TEST, sfm_snippet_two) { +// TEST(SFM_TEST, boat_and_cam_frames) { +// // world is boat frame. +x is boat "forward", +y is the right side, +z is "down" into the water/hull +// Eigen::Isometry3d T_boat_cam = StructureFromMotion::T_symlake_boat_cam; +// T_boat_cam.translation() = Eigen::Vector3d::Ones(); +// geometry::viz_scene(std::vector{T_boat_cam}, std::vector()); +// } + +TEST(SFM_TEST, sfm_snippet_small) { + const std::vector indices {120, 190}; // 0-199 DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); const symphony_lake_dataset::Survey &survey = survey_vector.get(0); - const symphony_lake_dataset::ImagePoint image_point = survey.getImagePoint(180); - const std::vector images = {survey.loadImageByImageIndex(180), - survey.loadImageByImageIndex(185)}; + const symphony_lake_dataset::ImagePoint image_point = survey.getImagePoint(indices.front()); + std::vector images; + + for (const int &idx : indices) { + images.push_back(survey.loadImageByImageIndex(idx)); + } + + // = {survey.loadImageByImageIndex(idx_1), + // survey.loadImageByImageIndex(idx_2)}; // const size_t img_width = image_point.width, img_height = image_point.height; const double fx = image_point.fx, fy = image_point.fy; @@ -165,55 +179,95 @@ TEST(SFM_TEST, sfm_snippet_two) { } // std::cout << poses_world.front() << std::endl; - robot::geometry::viz_scene(poses_world, points_world, true, true); + geometry::viz_scene(poses_world, points_world, true, false); - // sfm.solve_structure(); + sfm.solve_structure(); + + const gtsam::Values result_values = sfm.get_structure_result(); + std::vector final_poses; + std::vector final_lmks; + for (size_t i = 0; i < images.size(); i++) { + final_poses.emplace_back(result_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)).matrix()); + } + for (int i = 0; i < static_cast(sfm.get_matches().size()); i++) { + // NOTE: this j for lmk_symbol is wrong. + for (int j = 0; j < static_cast(sfm.get_matches()[i].size()); j++) { + gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j); + if (initial_values.exists(lmk_symbol)) { + final_lmks.emplace_back(initial_values.at(lmk_symbol)); + } else { + std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl; + } + } + } + geometry::viz_scene(final_poses, final_lmks, true, false); } -// TEST(SFM_TEST, structure_from_motion) { -// const size_t img_width = 640; -// const size_t img_height = 480; -// const double fx = 500.0; -// const double fy = fx; -// const double cx = img_width / 2.0; -// const double cy = img_height / 2.0; +// TEST(SFM_TEST, sfm_building) { +// const int idx_start = 50, idx_end = 120, idx_skip = 2; +// DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); +// const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); +// const symphony_lake_dataset::Survey &survey = survey_vector.get(0); +// const symphony_lake_dataset::ImagePoint image_point = survey.getImagePoint(idx_start); +// // const size_t img_width = image_point.width, img_height = image_point.height; +// const double fx = image_point.fx, fy = image_point.fy; +// const double cx = image_point.cx, cy = image_point.cy; // gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); +// Eigen::Matrix D = +// (Eigen::Matrix() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2, +// SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3) +// .finished(); -// StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K); -// DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); -// // DataParser::Generator generator = data_parser.create_img_generator(); -// const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); +// StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D); +// cv::Mat image; +// for (int j = idx_start; j < idx_end; j+=idx_skip) { +// image = survey.loadImageByImageIndex(j); +// sfm.add_image(image); +// } -// cv::Mat image; -// for (int i = 0; i < static_cast(survey_vector.getNumSurveys()); i++) { -// const symphony_lake_dataset::Survey &survey = survey_vector.get(i); -// // for (int j = 0; j < static_cast(survey.getNumImages()); j++) { -// // image = survey.loadImageByImageIndex(j); -// // sfm.add_image(image); -// // } -// for (int j = 0; j < 2; j++) { -// image = survey.loadImageByImageIndex(j); -// sfm.add_image(image); +// const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values(); +// std::vector poses_world; +// for (size_t i = 0; i < (idx_end - idx_start)/idx_skip; i++) { +// gtsam::Pose3 pose = +// initial_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)); +// poses_world.emplace_back(pose.matrix()); +// } +// std::vector points_world; +// for (int i = 0; i < static_cast(sfm.get_matches().size()); i++) { +// // NOTE: this j for lmk_symbol is wrong. +// for (int j = 0; j < static_cast(sfm.get_matches()[i].size()); j++) { +// gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j); +// if (initial_values.exists(lmk_symbol)) { +// points_world.emplace_back(initial_values.at(lmk_symbol)); +// } else { +// std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl; +// } // } // } -// // DataParser::Generator::iterator img_itr = generator.begin(); -// // while (img_itr != generator.end()) { -// // if ((*img_itr).empty()) { -// // continue; -// // } -// // std::cout << "ooga booga" << std::endl; -// // std::cout << "image is empty: " << (*img_itr).empty() << std::endl; -// // cv::imshow("ooga", *img_itr); -// // cv::waitKey(1000); -// // sfm.add_image(*img_itr); -// // ++img_itr; -// // } +// // std::cout << poses_world.front() << std::endl; + +// geometry::viz_scene(poses_world, points_world, true, false); + // sfm.solve_structure(); -// // gtsam::Values output = sfm.get_structure_result(); -// // output.print("result values: "); -// // for (size_t i = 0; i < sfm.get_num_images_added(); i++) { -// // } +// const gtsam::Values result_values = sfm.get_structure_result(); +// std::vector final_poses; +// std::vector final_lmks; +// for (size_t i = 0; i < (idx_end - idx_start)/idx_skip; i++) { +// final_poses.emplace_back(result_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)).matrix()); +// } +// for (int i = 0; i < static_cast(sfm.get_matches().size()); i++) { +// // NOTE: this j for lmk_symbol is wrong. +// for (int j = 0; j < static_cast(sfm.get_matches()[i].size()); j++) { +// gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j); +// if (initial_values.exists(lmk_symbol)) { +// final_lmks.emplace_back(initial_values.at(lmk_symbol)); +// } else { +// std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl; +// } +// } +// } +// geometry::viz_scene(final_poses, final_lmks, true, false); // } } // namespace robot::experimental::learn_descriptors From 697f16d9ee06630588993a28220577160fabd0e2 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Sun, 16 Mar 2025 22:17:10 -0400 Subject: [PATCH 10/45] MVP GPSFactor + GenericProjectionFactor sfm. Also figured out Transforms. Next: map lmk_symbols to landmarks to handle more than 2 images properly. --- experimental/learn_descriptors/BUILD | 6 +- .../structure_from_motion.cc | 128 ++++++++++++------ .../structure_from_motion.hh | 26 +++- .../structure_from_motion_test.cc | 60 ++++---- .../learn_descriptors/symphony_lake_parser.cc | 52 +++++++ .../learn_descriptors/symphony_lake_parser.hh | 14 ++ .../symphony_lake_parser_test.cc | 46 +++++-- 7 files changed, 249 insertions(+), 83 deletions(-) diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index 7220c019b..977347d42 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -43,7 +43,8 @@ cc_test( "@com_google_googletest//:gtest_main", ":symphony_lake_parser", ":structure_from_motion", - "//common/geometry:opencv_viz" + "//common/geometry:opencv_viz", + "//common/geometry:translate_types" ] ) @@ -71,7 +72,8 @@ cc_test( deps = [ "@com_google_googletest//:gtest_main", ":symphony_lake_parser", - "//common:check" + "//common:check", + "//common/geometry:opencv_viz" ] ) diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index d18db1b58..1c3d2bd43 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -21,7 +21,7 @@ namespace robot::experimental::learn_descriptors { const Eigen::Isometry3d StructureFromMotion::T_symlake_boat_cam = []() { Eigen::Isometry3d transform = Eigen::Isometry3d::Identity(); transform.linear() = (Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0, 1, 0)) * - Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0, 0, 1))).toRotationMatrix(); + Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d(0, 0, 1))).toRotationMatrix(); return transform; }(); @@ -29,7 +29,7 @@ std::string pose_to_string(gtsam::Pose3 pose) { std::stringstream ss; ss << pose; return ss.str(); -} +}; // const gtsam::Pose3 StructureFromMotion::default_initial_pose = []() { // Eigen::Isometry3d @@ -48,7 +48,7 @@ const gtsam::Pose3 StructureFromMotion::default_initial_pose(StructureFromMotion StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, Eigen::Matrix D, gtsam::Pose3 initial_pose, - Frontend::MatcherType frontend_matcher) { + Frontend::MatcherType frontend_matcher) : initial_pose_(initial_pose) { frontend_ = Frontend(frontend_extractor, frontend_matcher); backend_ = Backend(K); @@ -59,10 +59,10 @@ StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extrac } void StructureFromMotion::set_initial_pose(gtsam::Pose3 initial_pose) { - backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, 0), initial_pose); + backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, 0), initial_pose, gtsam::noiseModel::Isotropic::Sigma(6, 0)); } -void StructureFromMotion::add_image(const cv::Mat &img) { +void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_world_cam) { cv::Mat img_undistorted; cv::undistort(img, img_undistorted, K_, D_); std::pair, cv::Mat> keypoints_and_descriptors = @@ -76,42 +76,49 @@ void StructureFromMotion::add_image(const cv::Mat &img) { // img_keypoints_and_descriptors_.back().second, keypoints_and_descriptors.second); // Frontend::enforce_bijective_matches(matches); // matches_.push_back(matches); - gtsam::SharedNoiseModel pose_noise_model = gtsam::noiseModel::Diagonal::Sigmas( - (gtsam::Vector6() << 0.1, 0.1, 0.1, 0.01, 0.01, 0.01).finished()); - gtsam::Pose3 between_value( - geom::estimate_c0_c1( - img_keypoints_and_descriptors_.back().first, keypoints_and_descriptors.first, - matches, geom::eigen_mat_to_cv(geom::get_intrinsic_matrix(get_backend().get_K()))) - .matrix()); - backend_.add_between_factor( - gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1), - gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), between_value, - pose_noise_model); - gtsam::Pose3 T_world_cam1 = backend_.get_current_initial_values().at( - gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1)); - gtsam::Pose3 T_world_cam2 = backend_.get_current_initial_values().at( - gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added())); + // gtsam::Rot3 R_c0_c1( + // geom::estimate_c0_c1( + // img_keypoints_and_descriptors_.back().first, keypoints_and_descriptors.first, + // matches, geom::eigen_mat_to_cv(geom::get_intrinsic_matrix(get_backend().get_K()))).linear()); + + const gtsam::Symbol sym_T_w_c0(Backend::pose_symbol_char, get_num_images_added() - 1); + const gtsam::Symbol sym_T_w_c1(Backend::pose_symbol_char, get_num_images_added()); + // backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), gps_prior, backend_.get_translation_noise()); + // gtsam::Pose3 T_w_c0 = backend_.get_current_initial_values().at(sym_T_w_c0); + // gtsam::Pose3 T_c0_c1( + // R_c0_c1, + // t_world_gps - T_w_c0.translation()); + // backend_.add_between_factor( + // sym_T_w_c0, + // sym_T_w_c1, + // T_c0_c1, + // backend_.get_pose_noise()); + backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise(), T_world_cam.rotation()); + for (const cv::DMatch match : matches) { - Backend::Landmark landmark_cam_1( + // indexing each landmark symbol with landmark_count_ could create issues later on. + // lmks indices will be duplicated with greater than 2 images + // make map from lmk to idx? + Backend::Landmark landmark_cam_0( gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), - gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1), + sym_T_w_c0, gtsam::Point2( static_cast( img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x), static_cast( img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)), backend_.get_K(), 3.0); - Backend::Landmark landmark_cam_2( + Backend::Landmark landmark_cam_1( gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), - gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), + sym_T_w_c1, gtsam::Point2( static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.x), static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.y)), backend_.get_K(), 3.0); - landmarks_[get_num_images_added() - 1].push_back(landmark_cam_1); - landmarks_[get_num_images_added()].push_back(landmark_cam_2); + landmarks_[get_num_images_added() - 1].push_back(landmark_cam_0); + landmarks_[get_num_images_added()].push_back(landmark_cam_1); + backend_.add_landmark(landmark_cam_0); backend_.add_landmark(landmark_cam_1); - backend_.add_landmark(landmark_cam_2); landmark_count_++; } // std::cout << "number of landmarks: " << count << std::endl; @@ -301,13 +308,24 @@ Backend::Backend(gtsam::Cal3_S2 K) { // initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K); } -void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value) { - graph_.emplace_shared>(symbol, value, pose_noise_); +template <> +void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value, const gtsam::SharedNoiseModel &noise) { + graph_.emplace_shared>(symbol, value, noise); initial_estimate_.insert_or_assign(symbol, value); // std::cout << "adding a prior factor! with symbol: " << symbol << std::endl; // initial_estimate_.print("values after adding prior: "); } +template <> +void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Point3 &value, const gtsam::SharedNoiseModel &noise) { + graph_.emplace_shared>(symbol, value, noise); + gtsam::Rot3 R; + if (initial_estimate_.exists(symbol)) { + R = initial_estimate_.at(symbol).rotation(); + } + initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R, value)); +} + template <> void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, @@ -325,9 +343,16 @@ void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, const gtsam::Rot3 &value, const gtsam::SharedNoiseModel &model) { - graph_.emplace_shared>(symbol_1, symbol_2, value, model); + graph_.emplace_shared>(symbol_1, symbol_2, value, model); initial_estimate_.insert_or_assign(symbol_2, - initial_estimate_.at(symbol_1).compose(value)); + initial_estimate_.at(symbol_1).compose(gtsam::Pose3(value, gtsam::Point3::Zero()))); +} + +void Backend::add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &t_world_cam, const gtsam::SharedNoiseModel &model, + const gtsam::Rot3 & R_world_cam) { + + graph_.emplace_shared(symbol, t_world_cam, model); + initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R_world_cam, t_world_cam)); } std::pair, std::vector> Backend::get_obs_for_lmk(const gtsam::Symbol &lmk_symbol) { @@ -376,16 +401,39 @@ void Backend::add_landmark(const Landmark &landmark) { gtsam::Point3 p_world_lmk_estimate = initial_estimate_.at(landmark.cam_pose_symbol) * landmark.p_cam_lmk_guess; - if (!initial_estimate_.exists(landmark.lmk_factor_symbol)) { - initial_estimate_.insert(landmark.lmk_factor_symbol, p_world_lmk_estimate); - } else { - initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate); + initial_estimate_.insert_or_assign(landmark.lmk_factor_symbol, p_world_lmk_estimate); + + std::pair, std::vector> lmk_obs = get_obs_for_lmk(landmark.lmk_factor_symbol); + if (lmk_obs.first.size() >= 2) { + try { + // Attempt triangulation using DLT (or the GTSAM provided method) + p_world_lmk_estimate = gtsam::triangulatePoint3( + lmk_obs.first, K_, + gtsam::Point2Vector(lmk_obs.second.begin(), lmk_obs.second.end())); + + // Optional: perform an explicit cheirality check + bool valid = true; + for (const auto &pose : lmk_obs.first) { + // Transform point to the camera coordinate system. + // transformTo() converts a world point to the camera frame. + gtsam::Point3 p_cam = pose.transformTo(p_world_lmk_estimate); + if (p_cam.z() <= 0) { // Check that the depth is positive + valid = false; + break; + } + } + + if (valid) { + initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate); + } else { + std::cerr << "Triangulated landmark failed cheirality check; keeping initial guess." << std::endl; + } + } catch (const gtsam::TriangulationCheiralityException &e) { + // Handle the exception gracefully by logging and retaining the previous estimate. + std::cerr << "Triangulation Cheirality Exception: " << e.what() + << ". Keeping initial landmark estimate." << std::endl; + } } - // std::pair, std::vector> lmk_obs = get_obs_for_lmk(landmark.lmk_factor_symbol); - // if (lmk_obs.first.size() >= 2) { - // p_world_lmk_estimate = gtsam::triangulatePoint3(lmk_obs.first, K_, gtsam::Point2Vector(lmk_obs.second.begin(), lmk_obs.second.end())); - // initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate); - // } } void Backend::solve_graph() { diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh index e7d0c8e7e..86c9ce42c 100644 --- a/experimental/learn_descriptors/structure_from_motion.hh +++ b/experimental/learn_descriptors/structure_from_motion.hh @@ -13,6 +13,7 @@ #include "gtsam/linear/NoiseModel.h" #include "gtsam/nonlinear/NonlinearFactorGraph.h" #include "gtsam/nonlinear/Values.h" +#include "gtsam/navigation/GPSFactor.h" #include "opencv2/opencv.hpp" namespace robot::experimental::learn_descriptors { @@ -62,6 +63,7 @@ class Backend { public: static constexpr char pose_symbol_char = 'x'; static constexpr char pose_rot_symbol_char = 'r'; + static constexpr char pose_translation_symbol_char = 't'; static constexpr char pose_bearing_symbol_char = 'b'; static constexpr char landmark_symbol_char = 'l'; static constexpr char camera_symbol_char = 'k'; @@ -85,11 +87,16 @@ class Backend { Backend(gtsam::Cal3_S2 K); ~Backend(){}; - void add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value); + template + void add_prior_factor(const gtsam::Symbol &symbol, const T &value, const gtsam::SharedNoiseModel & model); template void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, const T &value, const gtsam::SharedNoiseModel &model); + + + void add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &p_world_gps, const gtsam::SharedNoiseModel& model, + const gtsam::Rot3 &R_world_cam = gtsam::Rot3::Identity()); std::pair, std::vector> get_obs_for_lmk(const gtsam::Symbol &lmk_symbol); void add_landmarks(const std::vector &landmarks); @@ -101,6 +108,11 @@ class Backend { const gtsam::Values &get_result() const { return result_; }; const gtsam::Cal3_S2 &get_K() const { return *K_; }; + const gtsam::SharedNoiseModel get_lmk_noise() { return landmark_noise_; }; + const gtsam::SharedNoiseModel get_pose_noise() { return pose_noise_; }; + const gtsam::SharedNoiseModel get_translation_noise() { return translation_noise_; }; + const gtsam::SharedNoiseModel get_gps_noise() { return gps_noise_; }; + private: gtsam::Cal3_S2::shared_ptr K_; @@ -112,8 +124,17 @@ class Backend { gtsam::noiseModel::Isotropic::Sigma(2, 1.0); gtsam::noiseModel::Diagonal::shared_ptr pose_noise_ = gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01)); + gtsam::noiseModel::Isotropic::shared_ptr translation_noise_ = + gtsam::noiseModel::Isotropic::Sigma(2, 0.1); + gtsam::noiseModel::Isotropic::shared_ptr gps_noise_ = gtsam::noiseModel::Isotropic::Sigma(3, 2.); }; +template<> +void Backend::add_prior_factor(const gtsam::Symbol &, const gtsam::Pose3 &, const gtsam::SharedNoiseModel &); + +template <> +void Backend::add_prior_factor(const gtsam::Symbol &, const gtsam::Point3 &, const gtsam::SharedNoiseModel &); + template <> void Backend::add_between_factor(const gtsam::Symbol &, const gtsam::Symbol &, const gtsam::Pose3 &, @@ -136,7 +157,7 @@ class StructureFromMotion { ~StructureFromMotion(){}; void set_initial_pose(gtsam::Pose3 initial_pose); - void add_image(const cv::Mat &img); + void add_image(const cv::Mat &img, const gtsam::Pose3 &T_world_cam); void solve_structure() { backend_.solve_graph(); }; const gtsam::Values &get_structure_result() { return backend_.get_result(); }; using MatchFunction = std::function &)>; @@ -151,6 +172,7 @@ class StructureFromMotion { const std::vector> get_matches() { return matches_; }; private: + gtsam::Pose3 initial_pose_; std::vector, cv::Mat>> img_keypoints_and_descriptors_; std::vector> matches_; std::vector> landmarks_; diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc index a9494c5fb..3bbf1c9fa 100644 --- a/experimental/learn_descriptors/structure_from_motion_test.cc +++ b/experimental/learn_descriptors/structure_from_motion_test.cc @@ -5,6 +5,7 @@ #include "Eigen/Geometry" #include "common/geometry/opencv_viz.hh" +#include "common/geometry/translate_types.hh" #include "experimental/learn_descriptors/symphony_lake_parser.hh" #include "gtest/gtest.h" @@ -129,38 +130,43 @@ TEST(SFM_TEST, frontend_pipeline_sweep) { // } TEST(SFM_TEST, sfm_snippet_small) { - const std::vector indices {120, 190}; // 0-199 + const std::vector indices {120, 130}; // 0-199 DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); const symphony_lake_dataset::Survey &survey = survey_vector.get(0); - const symphony_lake_dataset::ImagePoint image_point = survey.getImagePoint(indices.front()); - std::vector images; + const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front()); - for (const int &idx : indices) { - images.push_back(survey.loadImageByImageIndex(idx)); - } - - // = {survey.loadImageByImageIndex(idx_1), - // survey.loadImageByImageIndex(idx_2)}; - - // const size_t img_width = image_point.width, img_height = image_point.height; - const double fx = image_point.fx, fy = image_point.fy; - const double cx = image_point.cx, cy = image_point.cy; + // const size_t img_width = img_pt_first.width, img_height = img_pt_first.height; + const double fx = img_pt_first.fx, fy = img_pt_first.fy; + const double cx = img_pt_first.cx, cy = img_pt_first.cy; gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); Eigen::Matrix D = (Eigen::Matrix() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2, SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3) .finished(); + + // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera + // T_earth_boat0 = + Eigen::Isometry3d T_earth_world = DataParser::get_T_world_boat(img_pt_first); + Eigen::Isometry3d T_world_camera0 = DataParser::get_T_boat_camera(img_pt_first); + StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, gtsam::Pose3(T_world_camera0.matrix())); - StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D); - - for (const cv::Mat &image : images) { - sfm.add_image(image); + for (const int &idx : indices) { + const cv::Mat img = survey.loadImageByImageIndex(idx); + const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); + Eigen::Isometry3d T_earth_boat = DataParser::get_T_world_boat(img_pt); + Eigen::Isometry3d T_world_boat = T_earth_world.inverse() * T_earth_boat; + Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt); + + sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix())); } + // for (const cv::Mat &image : images) { + // sfm.add_image(image); + // } const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values(); std::vector poses_world; - for (size_t i = 0; i < images.size(); i++) { + for (size_t i = 0; i < indices.size(); i++) { gtsam::Pose3 pose = initial_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)); poses_world.emplace_back(pose.matrix()); @@ -179,14 +185,18 @@ TEST(SFM_TEST, sfm_snippet_small) { } // std::cout << poses_world.front() << std::endl; - geometry::viz_scene(poses_world, points_world, true, false); + geometry::viz_scene(poses_world, points_world, true, true); + + std::cout << "Solving for structure!" << std::endl; sfm.solve_structure(); + std::cout << "Solution complete." << std::endl; + const gtsam::Values result_values = sfm.get_structure_result(); std::vector final_poses; std::vector final_lmks; - for (size_t i = 0; i < images.size(); i++) { + for (size_t i = 0; i < indices.size(); i++) { final_poses.emplace_back(result_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)).matrix()); } for (int i = 0; i < static_cast(sfm.get_matches().size()); i++) { @@ -200,7 +210,7 @@ TEST(SFM_TEST, sfm_snippet_small) { } } } - geometry::viz_scene(final_poses, final_lmks, true, false); + geometry::viz_scene(final_poses, final_lmks, true, true); } // TEST(SFM_TEST, sfm_building) { @@ -208,11 +218,11 @@ TEST(SFM_TEST, sfm_snippet_small) { // DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); // const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); // const symphony_lake_dataset::Survey &survey = survey_vector.get(0); -// const symphony_lake_dataset::ImagePoint image_point = survey.getImagePoint(idx_start); +// const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(idx_start); -// // const size_t img_width = image_point.width, img_height = image_point.height; -// const double fx = image_point.fx, fy = image_point.fy; -// const double cx = image_point.cx, cy = image_point.cy; +// // const size_t img_width = img_pt_first.width, img_height = img_pt_first.height; +// const double fx = img_pt_first.fx, fy = img_pt_first.fy; +// const double cx = img_pt_first.cx, cy = img_pt_first.cy; // gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); // Eigen::Matrix D = // (Eigen::Matrix() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2, diff --git a/experimental/learn_descriptors/symphony_lake_parser.cc b/experimental/learn_descriptors/symphony_lake_parser.cc index 377e367cb..8146c6a36 100644 --- a/experimental/learn_descriptors/symphony_lake_parser.cc +++ b/experimental/learn_descriptors/symphony_lake_parser.cc @@ -5,6 +5,22 @@ #include "common/check.hh" namespace robot::experimental::learn_descriptors { +const Eigen::Vector3d DataParser::t_boat_cam(-0.420, 0, 0.255); + +const Eigen::Isometry3d DataParser::T_boat_gps((Eigen::Matrix4d() << + 1, 0, 0, 0.080, + 0, 1, 0, -0.118, + 0, 0, 1, 0.010, + 0, 0, 0, 1 +).finished()); + +const Eigen::Isometry3d DataParser::T_boat_imu = []() { + Eigen::Isometry3d T_boat_imu; + T_boat_imu.translation() = Eigen::Vector3d(-0.494, 0, -0.089); + T_boat_imu.linear() = Eigen::AngleAxisd(M_PI, Eigen::Vector3d::UnitX()).matrix(); + return T_boat_imu; +}(); + DataParser::DataParser(const std::filesystem::path &image_root_dir, const std::vector &survey_list) : image_root_dir_(image_root_dir), survey_list_(survey_list) { @@ -35,4 +51,40 @@ Eigen::Affine3d DataParser::get_T_world_camera(size_t survey_idx, size_t img_idx T_world_camera.rotate(R); return T_world_camera; } + +const Eigen::Isometry3d DataParser::get_T_boat_camera(const symphony_lake_dataset::ImagePoint &img_pt) { + return get_T_boat_camera(img_pt.pan, img_pt.tilt); +} +const Eigen::Isometry3d DataParser::get_T_boat_camera(double theta_pan, double theta_tilt) { + Eigen::AngleAxisd R_z(-theta_pan, Eigen::Vector3d::UnitZ()); // pan + Eigen::AngleAxisd R_y(-theta_tilt, Eigen::Vector3d::UnitY()); // tilt + Eigen::Matrix3d R_boat_gimbal(R_z * R_y); + + Eigen::Matrix3d R_gimbal_camera = (Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d::UnitX()) + * Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d::UnitY())).matrix(); + Eigen::Matrix3d R_boat_camera(R_boat_gimbal * R_gimbal_camera); + + Eigen::Isometry3d T_boat_cam; + T_boat_cam.translation() = t_boat_cam; + T_boat_cam.linear() = R_boat_camera; + return T_boat_cam; +} + +const Eigen::Matrix3d DataParser::get_R_world_boat(double theta_compass) { + Eigen::Matrix3d R_world_imu = Eigen::AngleAxisd(theta_compass, Eigen::Vector3d::UnitZ()).matrix(); + Eigen::Matrix3d R_imu_boat = T_boat_imu.inverse().linear(); + Eigen::Matrix3d R_world_boat = R_world_imu * R_imu_boat; + return R_world_boat; +} + +const Eigen::Isometry3d DataParser::get_T_world_boat(const symphony_lake_dataset::ImagePoint &img_pt) { + Eigen::Matrix3d R_world_boat = get_R_world_boat(img_pt.theta); + + Eigen::Isometry3d T_world_gps; + T_world_gps.linear() = R_world_boat * T_boat_gps.linear(); + T_world_gps.translation() = Eigen::Vector3d(img_pt.x, img_pt.y, 0); + + Eigen::Isometry3d T_world_boat = T_world_gps * T_boat_gps.inverse(); + return T_world_boat; +} } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/symphony_lake_parser.hh b/experimental/learn_descriptors/symphony_lake_parser.hh index fd361013b..664f5ccdb 100644 --- a/experimental/learn_descriptors/symphony_lake_parser.hh +++ b/experimental/learn_descriptors/symphony_lake_parser.hh @@ -68,12 +68,26 @@ class DataParser { } } + static const Eigen::Vector3d t_boat_cam; + static const Eigen::Isometry3d T_boat_gps; + static const Eigen::Isometry3d T_boat_imu; + DataParser(const std::filesystem::path &image_root_dir, const std::vector &survey_list); ~DataParser(); Eigen::Affine3d get_T_world_camera(size_t survey_idx, size_t image_idx, bool use_gps = false, bool use_compass = false); + static const Eigen::Isometry3d get_T_boat_camera(const symphony_lake_dataset::ImagePoint &img_pt); + static const Eigen::Isometry3d get_T_boat_camera(double theta_pan, double theta_tilt); + /// @brief get_R_world_boat assuming z_axis_boat dot z_axis_world ~ -1 + /// @param theta_compass in radians + /// @return R_world_boat + static const Eigen::Matrix3d get_R_world_boat(double theta_compass); + /// @brief get_T_world_boat assuming z_axis_boat dot z_axis_world ~ -1 + /// @param img_pt + /// @return T_world_boat + static const Eigen::Isometry3d get_T_world_boat(const symphony_lake_dataset::ImagePoint &img_pt); const symphony_lake_dataset::SurveyVector &get_surveys() const { return surveys_; }; Generator create_img_generator() { return image_generator(surveys_); }; diff --git a/experimental/learn_descriptors/symphony_lake_parser_test.cc b/experimental/learn_descriptors/symphony_lake_parser_test.cc index 0b22cdd3c..c7c2329ef 100644 --- a/experimental/learn_descriptors/symphony_lake_parser_test.cc +++ b/experimental/learn_descriptors/symphony_lake_parser_test.cc @@ -1,4 +1,5 @@ #include "experimental/learn_descriptors/symphony_lake_parser.hh" +#include "common/geometry/opencv_viz.hh" #include #include @@ -55,20 +56,37 @@ TEST(SymphonyLakeParserTest, snippet_140106) { } } } -TEST(SymphonyLakeParserTest, snippet_140106_generator_test) { +// TEST(SymphonyLakeParserTest, snippet_140106_generator_test) { +// DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); +// DataParser::Generator generator = +// (SymphonyLakeDatasetTestHelper::get_test_parser()).create_img_generator(); +// DataParser::Generator::iterator img_iter = generator.begin(); +// while (img_iter != generator.end()) { +// cv::Mat img = *img_iter; +// // if (!is_test()) { +// // cv::imshow("Symphony Dataset Image", img); +// // cv::waitKey(200); +// // } +// // cv::imshow("Symphony Dataset Image", img); +// // cv::waitKey(200); +// ++img_iter; +// } +// } + +TEST(SymphonyLakeParserTest, test_frames) { + const std::vector indices {120, 190}; // 0-199 DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); - DataParser::Generator generator = - (SymphonyLakeDatasetTestHelper::get_test_parser()).create_img_generator(); - DataParser::Generator::iterator img_iter = generator.begin(); - while (img_iter != generator.end()) { - cv::Mat img = *img_iter; - // if (!is_test()) { - // cv::imshow("Symphony Dataset Image", img); - // cv::waitKey(200); - // } - // cv::imshow("Symphony Dataset Image", img); - // cv::waitKey(200); - ++img_iter; - } + const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); + const symphony_lake_dataset::Survey &survey = survey_vector.get(0); + const symphony_lake_dataset::ImagePoint image_point_first = survey.getImagePoint(indices.front()); + + std::vector poses_viz; + poses_viz.push_back(DataParser::get_T_boat_camera(image_point_first)); + poses_viz.push_back(DataParser::T_boat_gps); + poses_viz.push_back(DataParser::T_boat_imu); + + std::cout << "pan: " << image_point_first.pan << "\ntilt: " << image_point_first.tilt << std::endl; + + geometry::viz_scene(poses_viz, std::vector(), true, true); } } // namespace robot::experimental::learn_descriptors From 3919f8af60d571c0e1d52af8707cd5c06a0f8f69 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Mon, 17 Mar 2025 23:56:06 -0400 Subject: [PATCH 11/45] Can now add N cameras and landmarks should correspond correctly in gtsam graph. TODO: Transforms seem off. --- .../structure_from_motion.cc | 70 ++++---- .../structure_from_motion.hh | 25 ++- .../structure_from_motion_test.cc | 159 ++++++++++-------- 3 files changed, 155 insertions(+), 99 deletions(-) diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index 1c3d2bd43..b0ef8ae75 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -67,8 +67,9 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo cv::undistort(img, img_undistorted, K_, D_); std::pair, cv::Mat> keypoints_and_descriptors = frontend_.get_keypoints_and_descriptors(img); - landmarks_.push_back(std::vector()); - if (get_num_images_added() > 0) { + keypoint_to_landmarks_.push_back(std::unordered_map()); + const size_t idx_img_current = get_num_images_added(); + if (idx_img_current > 0) { std::vector matches = get_matches(img_keypoints_and_descriptors_.back().second, keypoints_and_descriptors.second, Frontend::enforce_bijective_matches); @@ -81,8 +82,8 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo // img_keypoints_and_descriptors_.back().first, keypoints_and_descriptors.first, // matches, geom::eigen_mat_to_cv(geom::get_intrinsic_matrix(get_backend().get_K()))).linear()); - const gtsam::Symbol sym_T_w_c0(Backend::pose_symbol_char, get_num_images_added() - 1); - const gtsam::Symbol sym_T_w_c1(Backend::pose_symbol_char, get_num_images_added()); + const gtsam::Symbol sym_T_w_c0(Backend::pose_symbol_char, idx_img_current - 1); + const gtsam::Symbol sym_T_w_c1(Backend::pose_symbol_char, idx_img_current); // backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), gps_prior, backend_.get_translation_noise()); // gtsam::Pose3 T_w_c0 = backend_.get_current_initial_values().at(sym_T_w_c0); // gtsam::Pose3 T_c0_c1( @@ -96,36 +97,43 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise(), T_world_cam.rotation()); for (const cv::DMatch match : matches) { - // indexing each landmark symbol with landmark_count_ could create issues later on. - // lmks indices will be duplicated with greater than 2 images - // make map from lmk to idx? - Backend::Landmark landmark_cam_0( - gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), - sym_T_w_c0, - gtsam::Point2( - static_cast( - img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x), - static_cast( - img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)), - backend_.get_K(), 3.0); - Backend::Landmark landmark_cam_1( - gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), - sym_T_w_c1, - gtsam::Point2( - static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.x), - static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.y)), - backend_.get_K(), 3.0); - landmarks_[get_num_images_added() - 1].push_back(landmark_cam_0); - landmarks_[get_num_images_added()].push_back(landmark_cam_1); + const cv::KeyPoint kpt_cam0 = img_keypoints_and_descriptors_.back().first[match.queryIdx]; + const cv::KeyPoint kpt_cam1 = keypoints_and_descriptors.first[match.trainIdx]; + + if (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) == keypoint_to_landmarks_[idx_img_current - 1].end()) { + const Backend::Landmark landmark_cam_0( + gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), + sym_T_w_c0, + gtsam::Point2( + static_cast( + img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x), + static_cast( + img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)), + backend_.get_K(), 3.0); + keypoint_to_landmarks_[idx_img_current - 1].emplace(kpt_cam0, landmark_cam_0); + landmark_count_++; + } + // std::cout << "kpt_cam0 in keypoint_to_landmarks_: " << + // (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) == keypoint_to_landmarks_[idx_img_current - 1].end()) + // << std::endl; + const Backend::Landmark landmark_cam_0 = keypoint_to_landmarks_[idx_img_current - 1].at(kpt_cam0); + + // technically don't need this conditional when enforcing bijectivity + if (keypoint_to_landmarks_[idx_img_current].find(kpt_cam1) == keypoint_to_landmarks_[idx_img_current].end()) { + const Backend::Landmark landmark_cam_1( + landmark_cam_0.lmk_factor_symbol, + sym_T_w_c1, + gtsam::Point2( + static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.x), + static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.y)), + backend_.get_K(), 3.0); + keypoint_to_landmarks_[idx_img_current].emplace(kpt_cam1, landmark_cam_1); + } + const Backend::Landmark landmark_cam_1 = keypoint_to_landmarks_[idx_img_current].at(kpt_cam1); + backend_.add_landmark(landmark_cam_0); backend_.add_landmark(landmark_cam_1); - landmark_count_++; } - // std::cout << "number of landmarks: " << count << std::endl; - // std::cout << "number of landmarks added to graph: " << [this](){int count = 0; for (const - // auto &landmark : landmarks_){count += landmark.size();} return count; }() << std::endl; - // backend_.get_current_initial_values().print("Current initial values: "); - // std::cout << "landmark_count_: " << landmark_count_ << std::endl; } img_keypoints_and_descriptors_.push_back(keypoints_and_descriptors); } diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh index 86c9ce42c..2f7112293 100644 --- a/experimental/learn_descriptors/structure_from_motion.hh +++ b/experimental/learn_descriptors/structure_from_motion.hh @@ -16,6 +16,28 @@ #include "gtsam/navigation/GPSFactor.h" #include "opencv2/opencv.hpp" +namespace std { + template <> + struct hash { + size_t operator()(const cv::KeyPoint& kp) const { + size_t h1 = hash()(kp.pt.x); + size_t h2 = hash()(kp.pt.y); + size_t h3 = hash()(kp.size); + size_t h4 = hash()(kp.angle); + size_t h5 = hash()(kp.response); + size_t h6 = hash()(kp.octave); + size_t h7 = hash()(kp.class_id); + + return (h1 ^ (h2 << 1)) ^ (h3 << 2) ^ (h4 << 3) ^ (h5 << 4) ^ (h6 << 5) ^ (h7 << 6); + } + }; +} +namespace cv{ + inline bool operator==(const cv::KeyPoint& kp1, const cv::KeyPoint& kp2) { + return kp1.pt == kp2.pt && kp1.size == kp2.size && kp1.angle == kp2.angle && + kp1.response == kp2.response && kp1.octave == kp2.octave && kp1.class_id == kp2.class_id; + } +} namespace robot::experimental::learn_descriptors { class Frontend { public: @@ -146,7 +168,7 @@ void Backend::add_between_factor(const gtsam::Symbol &, const gtsam class StructureFromMotion { public: static const Eigen::Isometry3d T_symlake_boat_cam; - static const gtsam::Pose3 default_initial_pose; + static const gtsam::Pose3 default_initial_pose; /** * @param D is vector (5x1) of the distortion coefficients (k1, k2, p1, p2, k3) */ @@ -176,6 +198,7 @@ class StructureFromMotion { std::vector, cv::Mat>> img_keypoints_and_descriptors_; std::vector> matches_; std::vector> landmarks_; + std::vector> keypoint_to_landmarks_; size_t landmark_count_ = 0; cv::Mat K_; diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc index 3bbf1c9fa..546827e9d 100644 --- a/experimental/learn_descriptors/structure_from_motion_test.cc +++ b/experimental/learn_descriptors/structure_from_motion_test.cc @@ -151,8 +151,10 @@ TEST(SFM_TEST, sfm_snippet_small) { Eigen::Isometry3d T_world_camera0 = DataParser::get_T_boat_camera(img_pt_first); StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, gtsam::Pose3(T_world_camera0.matrix())); + std::vector img_vector; for (const int &idx : indices) { const cv::Mat img = survey.loadImageByImageIndex(idx); + img_vector.push_back(img); const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); Eigen::Isometry3d T_earth_boat = DataParser::get_T_world_boat(img_pt); Eigen::Isometry3d T_world_boat = T_earth_world.inverse() * T_earth_boat; @@ -213,71 +215,94 @@ TEST(SFM_TEST, sfm_snippet_small) { geometry::viz_scene(final_poses, final_lmks, true, true); } -// TEST(SFM_TEST, sfm_building) { -// const int idx_start = 50, idx_end = 120, idx_skip = 2; -// DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); -// const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); -// const symphony_lake_dataset::Survey &survey = survey_vector.get(0); -// const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(idx_start); - -// // const size_t img_width = img_pt_first.width, img_height = img_pt_first.height; -// const double fx = img_pt_first.fx, fy = img_pt_first.fy; -// const double cx = img_pt_first.cx, cy = img_pt_first.cy; -// gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); -// Eigen::Matrix D = -// (Eigen::Matrix() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2, -// SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3) -// .finished(); - -// StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D); -// cv::Mat image; -// for (int j = idx_start; j < idx_end; j+=idx_skip) { -// image = survey.loadImageByImageIndex(j); -// sfm.add_image(image); -// } - -// const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values(); -// std::vector poses_world; -// for (size_t i = 0; i < (idx_end - idx_start)/idx_skip; i++) { -// gtsam::Pose3 pose = -// initial_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)); -// poses_world.emplace_back(pose.matrix()); -// } -// std::vector points_world; -// for (int i = 0; i < static_cast(sfm.get_matches().size()); i++) { -// // NOTE: this j for lmk_symbol is wrong. -// for (int j = 0; j < static_cast(sfm.get_matches()[i].size()); j++) { -// gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j); -// if (initial_values.exists(lmk_symbol)) { -// points_world.emplace_back(initial_values.at(lmk_symbol)); -// } else { -// std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl; -// } -// } -// } -// // std::cout << poses_world.front() << std::endl; - -// geometry::viz_scene(poses_world, points_world, true, false); - -// sfm.solve_structure(); - -// const gtsam::Values result_values = sfm.get_structure_result(); -// std::vector final_poses; -// std::vector final_lmks; -// for (size_t i = 0; i < (idx_end - idx_start)/idx_skip; i++) { -// final_poses.emplace_back(result_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)).matrix()); -// } -// for (int i = 0; i < static_cast(sfm.get_matches().size()); i++) { -// // NOTE: this j for lmk_symbol is wrong. -// for (int j = 0; j < static_cast(sfm.get_matches()[i].size()); j++) { -// gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j); -// if (initial_values.exists(lmk_symbol)) { -// final_lmks.emplace_back(initial_values.at(lmk_symbol)); -// } else { -// std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl; -// } -// } -// } -// geometry::viz_scene(final_poses, final_lmks, true, false); -// } +TEST(SFM_TEST, sfm_building) { + // indices 0-199 + const std::vector indices = []() { + std::vector tmp; + for (int i = 0; i < 200; i += 10) { + tmp.push_back(i); + } + return tmp; + }(); + DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); + const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); + const symphony_lake_dataset::Survey &survey = survey_vector.get(0); + const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front()); + + // const size_t img_width = img_pt_first.width, img_height = img_pt_first.height; + const double fx = img_pt_first.fx, fy = img_pt_first.fy; + const double cx = img_pt_first.cx, cy = img_pt_first.cy; + gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); + Eigen::Matrix D = + (Eigen::Matrix() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2, + SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3) + .finished(); + + // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera + Eigen::Isometry3d T_earth_world = DataParser::get_T_world_boat(img_pt_first); + Eigen::Isometry3d T_world_camera0 = DataParser::get_T_boat_camera(img_pt_first); + StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, gtsam::Pose3(T_world_camera0.matrix())); + + for (const int &idx : indices) { + const cv::Mat img = survey.loadImageByImageIndex(idx); + const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); + + Eigen::Isometry3d T_earth_boat = DataParser::get_T_world_boat(img_pt); + Eigen::Isometry3d T_world_boat = T_earth_world.inverse() * T_earth_boat; + Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt); + + sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix())); + } + // for (const cv::Mat &image : images) { + // sfm.add_image(image); + // } + + const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values(); + std::vector poses_world; + for (size_t i = 0; i < indices.size(); i++) { + gtsam::Pose3 pose = + initial_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)); + poses_world.emplace_back(pose.matrix()); + } + std::vector points_world; + for (int i = 0; i < static_cast(sfm.get_matches().size()); i++) { + // NOTE: this j for lmk_symbol is wrong. + for (int j = 0; j < static_cast(sfm.get_matches()[i].size()); j++) { + gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j); + if (initial_values.exists(lmk_symbol)) { + points_world.emplace_back(initial_values.at(lmk_symbol)); + } else { + std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl; + } + } + } + // std::cout << poses_world.front() << std::endl; + + geometry::viz_scene(poses_world, points_world, true, true); + + std::cout << "Solving for structure!" << std::endl; + + sfm.solve_structure(); + + std::cout << "Solution complete." << std::endl; + + const gtsam::Values result_values = sfm.get_structure_result(); + std::vector final_poses; + std::vector final_lmks; + for (size_t i = 0; i < indices.size(); i++) { + final_poses.emplace_back(result_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)).matrix()); + } + for (int i = 0; i < static_cast(sfm.get_matches().size()); i++) { + // NOTE: this j for lmk_symbol is wrong. + for (int j = 0; j < static_cast(sfm.get_matches()[i].size()); j++) { + gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j); + if (initial_values.exists(lmk_symbol)) { + final_lmks.emplace_back(initial_values.at(lmk_symbol)); + } else { + std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl; + } + } + } + geometry::viz_scene(final_poses, final_lmks, true, true); +} } // namespace robot::experimental::learn_descriptors From f05522a5b3e7892845a0305572d084408e4a7204 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Tue, 18 Mar 2025 00:54:46 -0400 Subject: [PATCH 12/45] commented out bad function T_world_camera --- .../structure_from_motion.cc | 1 + .../structure_from_motion_test.cc | 2 + .../learn_descriptors/symphony_lake_parser.cc | 42 +++++++++---------- .../learn_descriptors/symphony_lake_parser.hh | 4 +- 4 files changed, 26 insertions(+), 23 deletions(-) diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index b0ef8ae75..fd7d0e4ef 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -95,6 +95,7 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo // T_c0_c1, // backend_.get_pose_noise()); backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise(), T_world_cam.rotation()); + // backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise()); for (const cv::DMatch match : matches) { const cv::KeyPoint kpt_cam0 = img_keypoints_and_descriptors_.back().first[match.queryIdx]; diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc index 546827e9d..61c816444 100644 --- a/experimental/learn_descriptors/structure_from_motion_test.cc +++ b/experimental/learn_descriptors/structure_from_motion_test.cc @@ -159,6 +159,8 @@ TEST(SFM_TEST, sfm_snippet_small) { Eigen::Isometry3d T_earth_boat = DataParser::get_T_world_boat(img_pt); Eigen::Isometry3d T_world_boat = T_earth_world.inverse() * T_earth_boat; Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt); + + // T_world_cam.linear() = T_world_camera0.linear().matrix(); sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix())); } diff --git a/experimental/learn_descriptors/symphony_lake_parser.cc b/experimental/learn_descriptors/symphony_lake_parser.cc index 8146c6a36..467c57160 100644 --- a/experimental/learn_descriptors/symphony_lake_parser.cc +++ b/experimental/learn_descriptors/symphony_lake_parser.cc @@ -30,27 +30,27 @@ DataParser::DataParser(const std::filesystem::path &image_root_dir, } DataParser::~DataParser() {} -Eigen::Affine3d DataParser::get_T_world_camera(size_t survey_idx, size_t img_idx, bool use_gps, - bool use_compass) { - const symphony_lake_dataset::ImagePoint img_point = - surveys_.get(survey_idx).getImagePoint(img_idx); - Eigen::Vector3d t; - Eigen::Matrix3d R; - Eigen::Affine3d T_world_camera; - if (use_gps) { - t[0] = img_point.x; - t[1] = img_point.y; - } - // coordinate frame is x-axis north, y-axis east, z-axis down (to Earth's core) - double yaw = use_compass ? img_point.theta - img_point.pan : img_point.pan; - Eigen::Matrix3d R_y(Eigen::AngleAxisd(img_point.tilt, Eigen::Vector3d::UnitY())); - Eigen::Matrix3d R_x = Eigen::Matrix3d::Identity(); - Eigen::Matrix3d R_z(Eigen::AngleAxisd(yaw, Eigen::Vector3d::UnitZ())); - R = R_x * R_y * R_z; - T_world_camera.translate(t); - T_world_camera.rotate(R); - return T_world_camera; -} +// Eigen::Affine3d DataParser::get_T_world_camera(size_t survey_idx, size_t img_idx, bool use_gps, +// bool use_compass) { +// const symphony_lake_dataset::ImagePoint img_point = +// surveys_.get(survey_idx).getImagePoint(img_idx); +// Eigen::Vector3d t; +// Eigen::Matrix3d R; +// Eigen::Affine3d T_world_camera; +// if (use_gps) { +// t[0] = img_point.x; +// t[1] = img_point.y; +// } +// // coordinate frame is x-axis north, y-axis east, z-axis down (to Earth's core) +// double yaw = use_compass ? img_point.theta - img_point.pan : img_point.pan; +// Eigen::Matrix3d R_y(Eigen::AngleAxisd(img_point.tilt, Eigen::Vector3d::UnitY())); +// Eigen::Matrix3d R_x = Eigen::Matrix3d::Identity(); +// Eigen::Matrix3d R_z(Eigen::AngleAxisd(yaw, Eigen::Vector3d::UnitZ())); +// R = R_x * R_y * R_z; +// T_world_camera.translate(t); +// T_world_camera.rotate(R); +// return T_world_camera; +// } const Eigen::Isometry3d DataParser::get_T_boat_camera(const symphony_lake_dataset::ImagePoint &img_pt) { return get_T_boat_camera(img_pt.pan, img_pt.tilt); diff --git a/experimental/learn_descriptors/symphony_lake_parser.hh b/experimental/learn_descriptors/symphony_lake_parser.hh index 664f5ccdb..5311a29f5 100644 --- a/experimental/learn_descriptors/symphony_lake_parser.hh +++ b/experimental/learn_descriptors/symphony_lake_parser.hh @@ -76,8 +76,8 @@ class DataParser { const std::vector &survey_list); ~DataParser(); - Eigen::Affine3d get_T_world_camera(size_t survey_idx, size_t image_idx, bool use_gps = false, - bool use_compass = false); + // Eigen::Affine3d get_T_world_camera(size_t survey_idx, size_t image_idx, bool use_gps = false, + // bool use_compass = false); static const Eigen::Isometry3d get_T_boat_camera(const symphony_lake_dataset::ImagePoint &img_pt); static const Eigen::Isometry3d get_T_boat_camera(double theta_pan, double theta_tilt); /// @brief get_R_world_boat assuming z_axis_boat dot z_axis_world ~ -1 From 81c3b4562f66ef0285248f3c75599eef7546d47b Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Fri, 4 Apr 2025 12:51:48 -0400 Subject: [PATCH 13/45] Uploading progress --- common/geometry/opencv_viz.cc | 4 +- common/geometry/opencv_viz.hh | 3 +- experimental/learn_descriptors/BUILD | 3 +- .../structure_from_motion.cc | 172 ++++++++++++------ .../structure_from_motion.hh | 80 ++++---- .../structure_from_motion_test.cc | 111 +++++------ .../learn_descriptors/symphony_lake_parser.cc | 82 ++++----- .../learn_descriptors/symphony_lake_parser.hh | 64 +------ .../symphony_lake_parser_test.cc | 84 ++++++--- 9 files changed, 317 insertions(+), 286 deletions(-) diff --git a/common/geometry/opencv_viz.cc b/common/geometry/opencv_viz.cc index 66df62a9a..6d11aca22 100644 --- a/common/geometry/opencv_viz.cc +++ b/common/geometry/opencv_viz.cc @@ -25,8 +25,8 @@ cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) { void viz_scene(const std::vector &poses_world, const std::vector &points_world, const bool show_grid, - const bool show_origin) { - cv::viz::Viz3d window("Viz Scene"); + const bool show_origin, const std::string &window_name) { + cv::viz::Viz3d window(window_name); constexpr double pose_size = .5; for (unsigned int i = 0; i < poses_world.size(); i++) { diff --git a/common/geometry/opencv_viz.hh b/common/geometry/opencv_viz.hh index cdfcfe857..8d2b3a506 100644 --- a/common/geometry/opencv_viz.hh +++ b/common/geometry/opencv_viz.hh @@ -1,4 +1,5 @@ #pragma once +#include #include #include "Eigen/Core" @@ -8,5 +9,5 @@ namespace robot::geometry { void viz_scene(const std::vector &poses_world, const std::vector &points_world, const bool show_grid = true, - const bool show_origin = true); + const bool show_origin = true, const std::string &window_name = "Viz Window"); } \ No newline at end of file diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index 977347d42..dab2c184e 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -29,7 +29,8 @@ cc_library( "@gtsam//:gtsam", "@eigen", "//common/geometry:camera", - "//common/geometry:translate_types" + "//common/geometry:translate_types", + "//common/geometry:opencv_viz" ] ) diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index fd7d0e4ef..d9b145a18 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -5,6 +5,7 @@ #include #include "common/geometry/camera.hh" +#include "common/geometry/opencv_viz.hh" #include "common/geometry/translate_types.hh" #include "gtsam/geometry/triangulation.h" #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" @@ -19,9 +20,10 @@ namespace geom = robot::geometry; namespace robot::experimental::learn_descriptors { const Eigen::Isometry3d StructureFromMotion::T_symlake_boat_cam = []() { - Eigen::Isometry3d transform = Eigen::Isometry3d::Identity(); + Eigen::Isometry3d transform = Eigen::Isometry3d::Identity(); transform.linear() = (Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0, 1, 0)) * - Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d(0, 0, 1))).toRotationMatrix(); + Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d(0, 0, 1))) + .toRotationMatrix(); return transform; }(); @@ -34,7 +36,8 @@ std::string pose_to_string(gtsam::Pose3 pose) { // const gtsam::Pose3 StructureFromMotion::default_initial_pose = []() { // Eigen::Isometry3d // }(); -const gtsam::Pose3 StructureFromMotion::default_initial_pose(StructureFromMotion::T_symlake_boat_cam.matrix()); +const gtsam::Pose3 StructureFromMotion::default_initial_pose( + StructureFromMotion::T_symlake_boat_cam.matrix()); // const gtsam::Pose3 StructureFromMotion::default_initial_pose = // gtsam::Pose3(gtsam::Rot3(T_symlake_boat_cam.rotation()), T_symlake_boat_cam.translation()); // gtsam::Pose3( @@ -48,7 +51,8 @@ const gtsam::Pose3 StructureFromMotion::default_initial_pose(StructureFromMotion StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, Eigen::Matrix D, gtsam::Pose3 initial_pose, - Frontend::MatcherType frontend_matcher) : initial_pose_(initial_pose) { + Frontend::MatcherType frontend_matcher) + : initial_pose_(initial_pose) { frontend_ = Frontend(frontend_extractor, frontend_matcher); backend_ = Backend(K); @@ -59,7 +63,8 @@ StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extrac } void StructureFromMotion::set_initial_pose(gtsam::Pose3 initial_pose) { - backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, 0), initial_pose, gtsam::noiseModel::Isotropic::Sigma(6, 0)); + backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, 0), initial_pose, + gtsam::noiseModel::Isotropic::Sigma(6, 0)); } void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_world_cam) { @@ -80,31 +85,35 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo // gtsam::Rot3 R_c0_c1( // geom::estimate_c0_c1( // img_keypoints_and_descriptors_.back().first, keypoints_and_descriptors.first, - // matches, geom::eigen_mat_to_cv(geom::get_intrinsic_matrix(get_backend().get_K()))).linear()); + // matches, + // geom::eigen_mat_to_cv(geom::get_intrinsic_matrix(get_backend().get_K()))).linear()); const gtsam::Symbol sym_T_w_c0(Backend::pose_symbol_char, idx_img_current - 1); const gtsam::Symbol sym_T_w_c1(Backend::pose_symbol_char, idx_img_current); - // backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), gps_prior, backend_.get_translation_noise()); - // gtsam::Pose3 T_w_c0 = backend_.get_current_initial_values().at(sym_T_w_c0); - // gtsam::Pose3 T_c0_c1( - // R_c0_c1, + // backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, + // get_num_images_added()), gps_prior, backend_.get_translation_noise()); gtsam::Pose3 + // T_w_c0 = backend_.get_current_initial_values().at(sym_T_w_c0); gtsam::Pose3 + // T_c0_c1( + // R_c0_c1, // t_world_gps - T_w_c0.translation()); // backend_.add_between_factor( // sym_T_w_c0, - // sym_T_w_c1, + // sym_T_w_c1, // T_c0_c1, - // backend_.get_pose_noise()); - backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise(), T_world_cam.rotation()); + // backend_.get_pose_noise()); + backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise(), + T_world_cam.rotation()); // backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise()); for (const cv::DMatch match : matches) { - const cv::KeyPoint kpt_cam0 = img_keypoints_and_descriptors_.back().first[match.queryIdx]; + const cv::KeyPoint kpt_cam0 = + img_keypoints_and_descriptors_.back().first[match.queryIdx]; const cv::KeyPoint kpt_cam1 = keypoints_and_descriptors.first[match.trainIdx]; - if (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) == keypoint_to_landmarks_[idx_img_current - 1].end()) { + if (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) == + keypoint_to_landmarks_[idx_img_current - 1].end()) { const Backend::Landmark landmark_cam_0( - gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), - sym_T_w_c0, + gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), sym_T_w_c0, gtsam::Point2( static_cast( img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x), @@ -113,24 +122,27 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo backend_.get_K(), 3.0); keypoint_to_landmarks_[idx_img_current - 1].emplace(kpt_cam0, landmark_cam_0); landmark_count_++; - } - // std::cout << "kpt_cam0 in keypoint_to_landmarks_: " << - // (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) == keypoint_to_landmarks_[idx_img_current - 1].end()) - // << std::endl; - const Backend::Landmark landmark_cam_0 = keypoint_to_landmarks_[idx_img_current - 1].at(kpt_cam0); + } + // std::cout << "kpt_cam0 in keypoint_to_landmarks_: " << + // (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) == + // keypoint_to_landmarks_[idx_img_current - 1].end()) + // << std::endl; + const Backend::Landmark landmark_cam_0 = + keypoint_to_landmarks_[idx_img_current - 1].at(kpt_cam0); // technically don't need this conditional when enforcing bijectivity - if (keypoint_to_landmarks_[idx_img_current].find(kpt_cam1) == keypoint_to_landmarks_[idx_img_current].end()) { + if (keypoint_to_landmarks_[idx_img_current].find(kpt_cam1) == + keypoint_to_landmarks_[idx_img_current].end()) { const Backend::Landmark landmark_cam_1( - landmark_cam_0.lmk_factor_symbol, - sym_T_w_c1, + landmark_cam_0.lmk_factor_symbol, sym_T_w_c1, gtsam::Point2( static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.x), static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.y)), backend_.get_K(), 3.0); - keypoint_to_landmarks_[idx_img_current].emplace(kpt_cam1, landmark_cam_1); - } - const Backend::Landmark landmark_cam_1 = keypoint_to_landmarks_[idx_img_current].at(kpt_cam1); + keypoint_to_landmarks_[idx_img_current].emplace(kpt_cam1, landmark_cam_1); + } + const Backend::Landmark landmark_cam_1 = + keypoint_to_landmarks_[idx_img_current].at(kpt_cam1); backend_.add_landmark(landmark_cam_0); backend_.add_landmark(landmark_cam_1); @@ -141,7 +153,7 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo std::vector StructureFromMotion::get_matches( const cv::Mat &descriptors_1, const cv::Mat &descriptors_2, - std::optional post_process_func) { + std::optional post_process_func) { std::vector matches = frontend_.get_matches(descriptors_1, descriptors_2); if (post_process_func) { (*post_process_func)(matches); @@ -318,7 +330,8 @@ Backend::Backend(gtsam::Cal3_S2 K) { } template <> -void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value, const gtsam::SharedNoiseModel &noise) { +void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value, + const gtsam::SharedNoiseModel &noise) { graph_.emplace_shared>(symbol, value, noise); initial_estimate_.insert_or_assign(symbol, value); // std::cout << "adding a prior factor! with symbol: " << symbol << std::endl; @@ -326,12 +339,13 @@ void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 & } template <> -void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Point3 &value, const gtsam::SharedNoiseModel &noise) { +void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Point3 &value, + const gtsam::SharedNoiseModel &noise) { graph_.emplace_shared>(symbol, value, noise); gtsam::Rot3 R; - if (initial_estimate_.exists(symbol)) { - R = initial_estimate_.at(symbol).rotation(); - } + if (initial_estimate_.exists(symbol)) { + R = initial_estimate_.at(symbol).rotation(); + } initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R, value)); } @@ -352,27 +366,28 @@ void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, const gtsam::Rot3 &value, const gtsam::SharedNoiseModel &model) { - graph_.emplace_shared>(symbol_1, symbol_2, value, model); + graph_.emplace_shared>(symbol_1, symbol_2, value, model); initial_estimate_.insert_or_assign(symbol_2, - initial_estimate_.at(symbol_1).compose(gtsam::Pose3(value, gtsam::Point3::Zero()))); + initial_estimate_.at(symbol_1).compose( + gtsam::Pose3(value, gtsam::Point3::Zero()))); } -void Backend::add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &t_world_cam, const gtsam::SharedNoiseModel &model, - const gtsam::Rot3 & R_world_cam) { - +void Backend::add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &t_world_cam, + const gtsam::SharedNoiseModel &model, const gtsam::Rot3 &R_world_cam) { graph_.emplace_shared(symbol, t_world_cam, model); initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R_world_cam, t_world_cam)); } -std::pair, std::vector> Backend::get_obs_for_lmk(const gtsam::Symbol &lmk_symbol) { +std::pair, std::vector> Backend::get_obs_for_lmk( + const gtsam::Symbol &lmk_symbol) { std::vector cam_poses; std::vector observations; // Iterate over all factors in the graph - for (const auto& factor : graph_) { + for (const auto &factor : graph_) { auto projFactor = boost::dynamic_pointer_cast< gtsam::GenericProjectionFactor>(factor); - + if (projFactor && projFactor->keys().at(1) == lmk_symbol) { // Get the camera pose symbol gtsam::Symbol cameraSymbol = projFactor->keys().at(0); @@ -389,10 +404,10 @@ std::pair, std::vector> Backend::get_ob cam_poses.push_back(cameraPose); } } - return std::pair, std::vector>(cam_poses, observations); + return std::pair, std::vector>(cam_poses, + observations); } - void Backend::add_landmarks(const std::vector &landmarks) { for (const Landmark &landmark : landmarks) { add_landmark(landmark); @@ -400,7 +415,8 @@ void Backend::add_landmarks(const std::vector &landmarks) { } void Backend::add_landmark(const Landmark &landmark) { - graph_.emplace_shared>(landmark.projection, landmark_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol, + graph_.emplace_shared>( + landmark.projection, landmark_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol, K_); if (!initial_estimate_.exists(landmark.cam_pose_symbol)) { throw std::runtime_error( @@ -408,18 +424,18 @@ void Backend::add_landmark(const Landmark &landmark) { "add_landmark is called."); } gtsam::Point3 p_world_lmk_estimate = - initial_estimate_.at(landmark.cam_pose_symbol) - * landmark.p_cam_lmk_guess; + initial_estimate_.at(landmark.cam_pose_symbol) * landmark.p_cam_lmk_guess; initial_estimate_.insert_or_assign(landmark.lmk_factor_symbol, p_world_lmk_estimate); - std::pair, std::vector> lmk_obs = get_obs_for_lmk(landmark.lmk_factor_symbol); + std::pair, std::vector> lmk_obs = + get_obs_for_lmk(landmark.lmk_factor_symbol); if (lmk_obs.first.size() >= 2) { try { // Attempt triangulation using DLT (or the GTSAM provided method) p_world_lmk_estimate = gtsam::triangulatePoint3( - lmk_obs.first, K_, + lmk_obs.first, K_, gtsam::Point2Vector(lmk_obs.second.begin(), lmk_obs.second.end())); - + // Optional: perform an explicit cheirality check bool valid = true; for (const auto &pose : lmk_obs.first) { @@ -431,15 +447,16 @@ void Backend::add_landmark(const Landmark &landmark) { break; } } - + if (valid) { initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate); } else { - std::cerr << "Triangulated landmark failed cheirality check; keeping initial guess." << std::endl; + std::cerr << "Triangulated landmark failed cheirality check; keeping initial guess." + << std::endl; } } catch (const gtsam::TriangulationCheiralityException &e) { // Handle the exception gracefully by logging and retaining the previous estimate. - std::cerr << "Triangulation Cheirality Exception: " << e.what() + std::cerr << "Triangulation Cheirality Exception: " << e.what() << ". Keeping initial landmark estimate." << std::endl; } } @@ -450,4 +467,53 @@ void Backend::solve_graph() { result_ = optimizer.optimize(); } +void Backend::solve_graph(const int num_steps, + std::optional inter_debug_func) { + gtsam::LevenbergMarquardtParams params; + params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. + params.maxIterations = 1; // We'll manually step it + gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, params); + + double prev_error = optimizer.error(); + for (int i = 0; i < num_steps; i++) { + optimizer.iterate(); + double curr_error = optimizer.error(); + + if (inter_debug_func) { + (*inter_debug_func)(optimizer.values(), i); + } + if (std::abs(prev_error - curr_error) < 1e-6) { + std::cout << "Converged at iteration " << i << "\n"; + break; + } + } + result_ = optimizer.values(); +} + +void StructureFromMotion::graph_values(const gtsam::Values &values, + const std::string &window_name) { + std::vector final_poses; + std::vector final_lmks; + for (size_t i = 0; i < img_keypoints_and_descriptors_.size(); i++) { + final_poses.emplace_back( + values.at(gtsam::Symbol(get_backend().pose_symbol_char, i)).matrix()); + } + for (int i = 0; i < static_cast(get_matches().size()); i++) { + // NOTE: this j for lmk_symbol is wrong. + for (int j = 0; j < static_cast(get_matches()[i].size()); j++) { + gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j); + if (values.exists(lmk_symbol)) { + final_lmks.emplace_back(values.at(lmk_symbol)); + } else { + std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl; + } + } + } + geometry::viz_scene(final_poses, final_lmks, true, true, window_name); +} + +void StructureFromMotion::solve_structure( + const int num_steps, std::optional inter_debug_func) { + backend_.solve_graph(num_steps, inter_debug_func); +} } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh index 2f7112293..7cc454558 100644 --- a/experimental/learn_descriptors/structure_from_motion.hh +++ b/experimental/learn_descriptors/structure_from_motion.hh @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -11,33 +12,33 @@ #include "gtsam/geometry/Pose3.h" #include "gtsam/inference/Symbol.h" #include "gtsam/linear/NoiseModel.h" +#include "gtsam/navigation/GPSFactor.h" #include "gtsam/nonlinear/NonlinearFactorGraph.h" #include "gtsam/nonlinear/Values.h" -#include "gtsam/navigation/GPSFactor.h" #include "opencv2/opencv.hpp" namespace std { - template <> - struct hash { - size_t operator()(const cv::KeyPoint& kp) const { - size_t h1 = hash()(kp.pt.x); - size_t h2 = hash()(kp.pt.y); - size_t h3 = hash()(kp.size); - size_t h4 = hash()(kp.angle); - size_t h5 = hash()(kp.response); - size_t h6 = hash()(kp.octave); - size_t h7 = hash()(kp.class_id); - - return (h1 ^ (h2 << 1)) ^ (h3 << 2) ^ (h4 << 3) ^ (h5 << 4) ^ (h6 << 5) ^ (h7 << 6); - } - }; -} -namespace cv{ - inline bool operator==(const cv::KeyPoint& kp1, const cv::KeyPoint& kp2) { - return kp1.pt == kp2.pt && kp1.size == kp2.size && kp1.angle == kp2.angle && - kp1.response == kp2.response && kp1.octave == kp2.octave && kp1.class_id == kp2.class_id; +template <> +struct hash { + size_t operator()(const cv::KeyPoint &kp) const { + size_t h1 = hash()(kp.pt.x); + size_t h2 = hash()(kp.pt.y); + size_t h3 = hash()(kp.size); + size_t h4 = hash()(kp.angle); + size_t h5 = hash()(kp.response); + size_t h6 = hash()(kp.octave); + size_t h7 = hash()(kp.class_id); + + return (h1 ^ (h2 << 1)) ^ (h3 << 2) ^ (h4 << 3) ^ (h5 << 4) ^ (h6 << 5) ^ (h7 << 6); } +}; +} // namespace std +namespace cv { +inline bool operator==(const cv::KeyPoint &kp1, const cv::KeyPoint &kp2) { + return kp1.pt == kp2.pt && kp1.size == kp2.size && kp1.angle == kp2.angle && + kp1.response == kp2.response && kp1.octave == kp2.octave && kp1.class_id == kp2.class_id; } +} // namespace cv namespace robot::experimental::learn_descriptors { class Frontend { public: @@ -110,21 +111,27 @@ class Backend { ~Backend(){}; template - void add_prior_factor(const gtsam::Symbol &symbol, const T &value, const gtsam::SharedNoiseModel & model); + void add_prior_factor(const gtsam::Symbol &symbol, const T &value, + const gtsam::SharedNoiseModel &model); template void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, const T &value, const gtsam::SharedNoiseModel &model); - - void add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &p_world_gps, const gtsam::SharedNoiseModel& model, - const gtsam::Rot3 &R_world_cam = gtsam::Rot3::Identity()); + void add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &p_world_gps, + const gtsam::SharedNoiseModel &model, + const gtsam::Rot3 &R_world_cam = gtsam::Rot3::Identity()); - std::pair, std::vector> get_obs_for_lmk(const gtsam::Symbol &lmk_symbol); + std::pair, std::vector> get_obs_for_lmk( + const gtsam::Symbol &lmk_symbol); void add_landmarks(const std::vector &landmarks); void add_landmark(const Landmark &landmark); void solve_graph(); + typedef int epoch; + using graph_step_debug_func = std::function; + void solve_graph(const int num_steps, + std::optional inter_debug_func = std::nullopt); const gtsam::Values &get_current_initial_values() const { return initial_estimate_; }; const gtsam::Values &get_result() const { return result_; }; @@ -148,14 +155,17 @@ class Backend { gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01)); gtsam::noiseModel::Isotropic::shared_ptr translation_noise_ = gtsam::noiseModel::Isotropic::Sigma(2, 0.1); - gtsam::noiseModel::Isotropic::shared_ptr gps_noise_ = gtsam::noiseModel::Isotropic::Sigma(3, 2.); + gtsam::noiseModel::Isotropic::shared_ptr gps_noise_ = + gtsam::noiseModel::Isotropic::Sigma(3, 2.); }; -template<> -void Backend::add_prior_factor(const gtsam::Symbol &, const gtsam::Pose3 &, const gtsam::SharedNoiseModel &); +template <> +void Backend::add_prior_factor(const gtsam::Symbol &, const gtsam::Pose3 &, + const gtsam::SharedNoiseModel &); template <> -void Backend::add_prior_factor(const gtsam::Symbol &, const gtsam::Point3 &, const gtsam::SharedNoiseModel &); +void Backend::add_prior_factor(const gtsam::Symbol &, const gtsam::Point3 &, + const gtsam::SharedNoiseModel &); template <> void Backend::add_between_factor(const gtsam::Symbol &, const gtsam::Symbol &, @@ -167,8 +177,8 @@ void Backend::add_between_factor(const gtsam::Symbol &, const gtsam class StructureFromMotion { public: - static const Eigen::Isometry3d T_symlake_boat_cam; - static const gtsam::Pose3 default_initial_pose; + static const Eigen::Isometry3d T_symlake_boat_cam; + static const gtsam::Pose3 default_initial_pose; /** * @param D is vector (5x1) of the distortion coefficients (k1, k2, p1, p2, k3) */ @@ -181,11 +191,15 @@ class StructureFromMotion { void set_initial_pose(gtsam::Pose3 initial_pose); void add_image(const cv::Mat &img, const gtsam::Pose3 &T_world_cam); void solve_structure() { backend_.solve_graph(); }; + void solve_structure( + const int num_steps, + std::optional inter_debug_func = std::nullopt); const gtsam::Values &get_structure_result() { return backend_.get_result(); }; - using MatchFunction = std::function &)>; + using match_function = std::function &)>; std::vector get_matches( const cv::Mat &descriptors_1, const cv::Mat &descriptors_2, - std::optional post_process_func = std::nullopt); + std::optional post_process_func = std::nullopt); + void graph_values(const gtsam::Values &values, const std::string &window_name = "graph values"); Frontend get_frontend() { return frontend_; }; Backend get_backend() { return backend_; } diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc index 61c816444..c856abd71 100644 --- a/experimental/learn_descriptors/structure_from_motion_test.cc +++ b/experimental/learn_descriptors/structure_from_motion_test.cc @@ -2,6 +2,7 @@ #include #include +#include #include "Eigen/Geometry" #include "common/geometry/opencv_viz.hh" @@ -122,15 +123,16 @@ TEST(SFM_TEST, frontend_pipeline_sweep) { } } -// TEST(SFM_TEST, boat_and_cam_frames) { -// // world is boat frame. +x is boat "forward", +y is the right side, +z is "down" into the water/hull -// Eigen::Isometry3d T_boat_cam = StructureFromMotion::T_symlake_boat_cam; +// TEST(SFM_TEST, boat_and_cam_frames) { +// // world is boat frame. +x is boat "forward", +y is the right side, +z is "down" into the +// water/hull Eigen::Isometry3d T_boat_cam = StructureFromMotion::T_symlake_boat_cam; // T_boat_cam.translation() = Eigen::Vector3d::Ones(); -// geometry::viz_scene(std::vector{T_boat_cam}, std::vector()); +// geometry::viz_scene(std::vector{T_boat_cam}, +// std::vector()); // } TEST(SFM_TEST, sfm_snippet_small) { - const std::vector indices {120, 130}; // 0-199 + const std::vector indices{120, 130}; // 0-199 DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); const symphony_lake_dataset::Survey &survey = survey_vector.get(0); @@ -144,24 +146,25 @@ TEST(SFM_TEST, sfm_snippet_small) { (Eigen::Matrix() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2, SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3) .finished(); - + // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera - // T_earth_boat0 = + // T_earth_boat0 = Eigen::Isometry3d T_earth_world = DataParser::get_T_world_boat(img_pt_first); Eigen::Isometry3d T_world_camera0 = DataParser::get_T_boat_camera(img_pt_first); - StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, gtsam::Pose3(T_world_camera0.matrix())); + StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, + gtsam::Pose3(T_world_camera0.matrix())); std::vector img_vector; - for (const int &idx : indices) { + for (const int &idx : indices) { const cv::Mat img = survey.loadImageByImageIndex(idx); img_vector.push_back(img); - const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); + const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); Eigen::Isometry3d T_earth_boat = DataParser::get_T_world_boat(img_pt); Eigen::Isometry3d T_world_boat = T_earth_world.inverse() * T_earth_boat; Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt); // T_world_cam.linear() = T_world_camera0.linear().matrix(); - + sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix())); } // for (const cv::Mat &image : images) { @@ -201,7 +204,9 @@ TEST(SFM_TEST, sfm_snippet_small) { std::vector final_poses; std::vector final_lmks; for (size_t i = 0; i < indices.size(); i++) { - final_poses.emplace_back(result_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)).matrix()); + final_poses.emplace_back( + result_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)) + .matrix()); } for (int i = 0; i < static_cast(sfm.get_matches().size()); i++) { // NOTE: this j for lmk_symbol is wrong. @@ -239,72 +244,50 @@ TEST(SFM_TEST, sfm_building) { (Eigen::Matrix() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2, SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3) .finished(); - - // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera - Eigen::Isometry3d T_earth_world = DataParser::get_T_world_boat(img_pt_first); - Eigen::Isometry3d T_world_camera0 = DataParser::get_T_boat_camera(img_pt_first); - StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, gtsam::Pose3(T_world_camera0.matrix())); - for (const int &idx : indices) { + // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera + Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first); + Eigen::Isometry3d T_world_boat0; + T_world_boat0.linear() = T_earth_boat0.linear(); + Eigen::Isometry3d T_world_camera0 = T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first); + StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, + gtsam::Pose3(T_world_camera0.matrix())); + + for (const int &idx : indices) { const cv::Mat img = survey.loadImageByImageIndex(idx); - const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); + const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); + + Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt); + T_world_boat.translation() -= T_earth_boat0.translation(); - Eigen::Isometry3d T_earth_boat = DataParser::get_T_world_boat(img_pt); - Eigen::Isometry3d T_world_boat = T_earth_world.inverse() * T_earth_boat; Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt); - + sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix())); } - // for (const cv::Mat &image : images) { - // sfm.add_image(image); - // } const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values(); - std::vector poses_world; - for (size_t i = 0; i < indices.size(); i++) { - gtsam::Pose3 pose = - initial_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)); - poses_world.emplace_back(pose.matrix()); - } - std::vector points_world; - for (int i = 0; i < static_cast(sfm.get_matches().size()); i++) { - // NOTE: this j for lmk_symbol is wrong. - for (int j = 0; j < static_cast(sfm.get_matches()[i].size()); j++) { - gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j); - if (initial_values.exists(lmk_symbol)) { - points_world.emplace_back(initial_values.at(lmk_symbol)); - } else { - std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl; - } - } - } - // std::cout << poses_world.front() << std::endl; - - geometry::viz_scene(poses_world, points_world, true, true); + sfm.graph_values(initial_values, "initial values"); std::cout << "Solving for structure!" << std::endl; - sfm.solve_structure(); + Backend::graph_step_debug_func solve_iter_debug_func = [&sfm](const gtsam::Values &vals, + const Backend::epoch iter) { + std::cout << "iteration " << iter << " complete!"; + std::string window_name = "Iteration_" + std::to_string(iter); + sfm.graph_values(vals, window_name); + }; + sfm.solve_structure(5, solve_iter_debug_func); std::cout << "Solution complete." << std::endl; const gtsam::Values result_values = sfm.get_structure_result(); - std::vector final_poses; - std::vector final_lmks; - for (size_t i = 0; i < indices.size(); i++) { - final_poses.emplace_back(result_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)).matrix()); - } - for (int i = 0; i < static_cast(sfm.get_matches().size()); i++) { - // NOTE: this j for lmk_symbol is wrong. - for (int j = 0; j < static_cast(sfm.get_matches()[i].size()); j++) { - gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j); - if (initial_values.exists(lmk_symbol)) { - final_lmks.emplace_back(initial_values.at(lmk_symbol)); - } else { - std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl; - } - } - } - geometry::viz_scene(final_poses, final_lmks, true, true); + sfm.graph_values(result_values, "optimized values"); +} + +TEST(SFM_TEST, random_test) { + std::cout << DataParser::T_boat_gps.matrix() << std::endl; + geometry::viz_scene( + std::vector{DataParser::T_boat_imu, DataParser::T_boat_gps}, + std::vector{DataParser::t_boat_cam}); } } // namespace robot::experimental::learn_descriptors diff --git a/experimental/learn_descriptors/symphony_lake_parser.cc b/experimental/learn_descriptors/symphony_lake_parser.cc index 467c57160..4740deb1e 100644 --- a/experimental/learn_descriptors/symphony_lake_parser.cc +++ b/experimental/learn_descriptors/symphony_lake_parser.cc @@ -5,14 +5,10 @@ #include "common/check.hh" namespace robot::experimental::learn_descriptors { -const Eigen::Vector3d DataParser::t_boat_cam(-0.420, 0, 0.255); +const Eigen::Vector3d DataParser::t_boat_cam(-0.430, 0, 0.255); -const Eigen::Isometry3d DataParser::T_boat_gps((Eigen::Matrix4d() << - 1, 0, 0, 0.080, - 0, 1, 0, -0.118, - 0, 0, 1, 0.010, - 0, 0, 0, 1 -).finished()); +const Eigen::Isometry3d DataParser::T_boat_gps( + (Eigen::Matrix4d() << 1, 0, 0, 0.080, 0, 1, 0, -0.1183, 0, 0, 1, 0.010, 0, 0, 0, 1).finished()); const Eigen::Isometry3d DataParser::T_boat_imu = []() { Eigen::Isometry3d T_boat_imu; @@ -30,60 +26,50 @@ DataParser::DataParser(const std::filesystem::path &image_root_dir, } DataParser::~DataParser() {} -// Eigen::Affine3d DataParser::get_T_world_camera(size_t survey_idx, size_t img_idx, bool use_gps, -// bool use_compass) { -// const symphony_lake_dataset::ImagePoint img_point = -// surveys_.get(survey_idx).getImagePoint(img_idx); -// Eigen::Vector3d t; -// Eigen::Matrix3d R; -// Eigen::Affine3d T_world_camera; -// if (use_gps) { -// t[0] = img_point.x; -// t[1] = img_point.y; -// } -// // coordinate frame is x-axis north, y-axis east, z-axis down (to Earth's core) -// double yaw = use_compass ? img_point.theta - img_point.pan : img_point.pan; -// Eigen::Matrix3d R_y(Eigen::AngleAxisd(img_point.tilt, Eigen::Vector3d::UnitY())); -// Eigen::Matrix3d R_x = Eigen::Matrix3d::Identity(); -// Eigen::Matrix3d R_z(Eigen::AngleAxisd(yaw, Eigen::Vector3d::UnitZ())); -// R = R_x * R_y * R_z; -// T_world_camera.translate(t); -// T_world_camera.rotate(R); -// return T_world_camera; -// } - -const Eigen::Isometry3d DataParser::get_T_boat_camera(const symphony_lake_dataset::ImagePoint &img_pt) { +const Eigen::Isometry3d DataParser::get_T_boat_camera( + const symphony_lake_dataset::ImagePoint &img_pt) { return get_T_boat_camera(img_pt.pan, img_pt.tilt); } const Eigen::Isometry3d DataParser::get_T_boat_camera(double theta_pan, double theta_tilt) { - Eigen::AngleAxisd R_z(-theta_pan, Eigen::Vector3d::UnitZ()); // pan - Eigen::AngleAxisd R_y(-theta_tilt, Eigen::Vector3d::UnitY()); // tilt - Eigen::Matrix3d R_boat_gimbal(R_z * R_y); - - Eigen::Matrix3d R_gimbal_camera = (Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d::UnitX()) - * Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d::UnitY())).matrix(); + Eigen::AngleAxisd R_z(-theta_pan, Eigen::Vector3d::UnitZ()); // pan + Eigen::AngleAxisd R_y(-theta_tilt, Eigen::Vector3d::UnitY()); // tilt + Eigen::Matrix3d R_boat_gimbal(R_z * R_y); + + Eigen::Matrix3d R_gimbal_camera = (Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d::UnitX()) * + Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d::UnitY())) + .matrix(); Eigen::Matrix3d R_boat_camera(R_boat_gimbal * R_gimbal_camera); - + Eigen::Isometry3d T_boat_cam; T_boat_cam.translation() = t_boat_cam; T_boat_cam.linear() = R_boat_camera; return T_boat_cam; } -const Eigen::Matrix3d DataParser::get_R_world_boat(double theta_compass) { - Eigen::Matrix3d R_world_imu = Eigen::AngleAxisd(theta_compass, Eigen::Vector3d::UnitZ()).matrix(); - Eigen::Matrix3d R_imu_boat = T_boat_imu.inverse().linear(); - Eigen::Matrix3d R_world_boat = R_world_imu * R_imu_boat; - return R_world_boat; -} - -const Eigen::Isometry3d DataParser::get_T_world_boat(const symphony_lake_dataset::ImagePoint &img_pt) { - Eigen::Matrix3d R_world_boat = get_R_world_boat(img_pt.theta); - +const Eigen::Isometry3d DataParser::get_T_world_gps( + const symphony_lake_dataset::ImagePoint &img_pt) { Eigen::Isometry3d T_world_gps; - T_world_gps.linear() = R_world_boat * T_boat_gps.linear(); + // img_pt.theta is in the North East Down frame, not lattitude longitude coords. However, + // because of the alignment of the imu and gps, this rotation math holds. + T_world_gps.linear() = Eigen::AngleAxisd(img_pt.theta, Eigen::Vector3d::UnitZ()).matrix(); T_world_gps.translation() = Eigen::Vector3d(img_pt.x, img_pt.y, 0); + return T_world_gps; +} + +const Eigen::Isometry3d DataParser::get_T_world_boat( + const symphony_lake_dataset::ImagePoint &img_pt) { + // Eigen::Isometry3d T_world_gps; + + // Eigen::Matrix3d R_world_imu = get_R_world_imu(img_pt.theta); + // Eigen::Isometry3d T_imu_gps = T_boat_imu.inverse() * T_boat_gps; + // Eigen::Matrix3d R_world_gps = R_world_imu * T_imu_gps.linear(); + + // T_world_gps.linear() = R_world_gps; + // T_world_gps.translation() = Eigen::Vector3d(img_pt.x, img_pt.y, 0); + + // Eigen::Isometry3d T_world_boat = T_world_gps * T_boat_gps.inverse(); + Eigen::Isometry3d T_world_gps = get_T_world_gps(img_pt); Eigen::Isometry3d T_world_boat = T_world_gps * T_boat_gps.inverse(); return T_world_boat; } diff --git a/experimental/learn_descriptors/symphony_lake_parser.hh b/experimental/learn_descriptors/symphony_lake_parser.hh index 5311a29f5..5c6e91b54 100644 --- a/experimental/learn_descriptors/symphony_lake_parser.hh +++ b/experimental/learn_descriptors/symphony_lake_parser.hh @@ -20,56 +20,8 @@ class SymphonyLakeCamParams { }; class DataParser { public: - template - struct Generator { - struct promise_type { - T value; - std::suspend_always yield_value(T v) { - value = v; - return {}; - } - std::suspend_always initial_suspend() { return {}; } - std::suspend_always final_suspend() noexcept { return {}; } - Generator get_return_object() { return Generator{handle_type::from_promise(*this)}; } - void return_void() {} - void unhandled_exception() { std::terminate(); } - }; - - using handle_type = std::coroutine_handle; - - Generator(handle_type h) : handle(h) {} - ~Generator() { - if (handle) handle.destroy(); - } - - struct iterator { - handle_type handle; - bool operator!=(std::default_sentinel_t) const { return !handle.done(); } - iterator &operator++() { - handle.resume(); - return *this; - } - T operator*() const { return handle.promise().value; } - }; - - iterator begin() { return iterator{handle}; } - std::default_sentinel_t end() { return {}; } - - private: - handle_type handle; - }; - - Generator image_generator(const symphony_lake_dataset::SurveyVector &survey_vector) { - for (int i = 0; i < static_cast(survey_vector.getNumSurveys()); i++) { - const symphony_lake_dataset::Survey &survey = survey_vector.get(i); - for (int j = 0; j < static_cast(survey.getNumImages()); j++) { - co_yield survey.loadImageByImageIndex(j); - } - } - } - static const Eigen::Vector3d t_boat_cam; - static const Eigen::Isometry3d T_boat_gps; + static const Eigen::Isometry3d T_boat_gps; static const Eigen::Isometry3d T_boat_imu; DataParser(const std::filesystem::path &image_root_dir, @@ -78,19 +30,17 @@ class DataParser { // Eigen::Affine3d get_T_world_camera(size_t survey_idx, size_t image_idx, bool use_gps = false, // bool use_compass = false); - static const Eigen::Isometry3d get_T_boat_camera(const symphony_lake_dataset::ImagePoint &img_pt); + static const Eigen::Isometry3d get_T_boat_camera( + const symphony_lake_dataset::ImagePoint &img_pt); static const Eigen::Isometry3d get_T_boat_camera(double theta_pan, double theta_tilt); - /// @brief get_R_world_boat assuming z_axis_boat dot z_axis_world ~ -1 - /// @param theta_compass in radians - /// @return R_world_boat - static const Eigen::Matrix3d get_R_world_boat(double theta_compass); + static const Eigen::Isometry3d get_T_world_gps(const symphony_lake_dataset::ImagePoint &img_pt); /// @brief get_T_world_boat assuming z_axis_boat dot z_axis_world ~ -1 - /// @param img_pt + /// @param img_pt /// @return T_world_boat - static const Eigen::Isometry3d get_T_world_boat(const symphony_lake_dataset::ImagePoint &img_pt); + static const Eigen::Isometry3d get_T_world_boat( + const symphony_lake_dataset::ImagePoint &img_pt); const symphony_lake_dataset::SurveyVector &get_surveys() const { return surveys_; }; - Generator create_img_generator() { return image_generator(surveys_); }; private: std::filesystem::path image_root_dir_; diff --git a/experimental/learn_descriptors/symphony_lake_parser_test.cc b/experimental/learn_descriptors/symphony_lake_parser_test.cc index c7c2329ef..05093dedb 100644 --- a/experimental/learn_descriptors/symphony_lake_parser_test.cc +++ b/experimental/learn_descriptors/symphony_lake_parser_test.cc @@ -1,5 +1,4 @@ #include "experimental/learn_descriptors/symphony_lake_parser.hh" -#include "common/geometry/opencv_viz.hh" #include #include @@ -8,6 +7,7 @@ #include #include "common/check.hh" +#include "common/geometry/opencv_viz.hh" #include "gtest/gtest.h" #include "opencv2/opencv.hpp" @@ -56,37 +56,67 @@ TEST(SymphonyLakeParserTest, snippet_140106) { } } } -// TEST(SymphonyLakeParserTest, snippet_140106_generator_test) { -// DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); -// DataParser::Generator generator = -// (SymphonyLakeDatasetTestHelper::get_test_parser()).create_img_generator(); -// DataParser::Generator::iterator img_iter = generator.begin(); -// while (img_iter != generator.end()) { -// cv::Mat img = *img_iter; -// // if (!is_test()) { -// // cv::imshow("Symphony Dataset Image", img); -// // cv::waitKey(200); -// // } -// // cv::imshow("Symphony Dataset Image", img); -// // cv::waitKey(200); -// ++img_iter; -// } -// } - -TEST(SymphonyLakeParserTest, test_frames) { - const std::vector indices {120, 190}; // 0-199 + +TEST(SymphonyLakeParserTest, test_cam_frames) { + const std::vector indices = []() { + std::vector tmp; + for (int i = 0; i < 200; i += 10) { + tmp.push_back(i); + } + return tmp; + }(); DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); const symphony_lake_dataset::Survey &survey = survey_vector.get(0); - const symphony_lake_dataset::ImagePoint image_point_first = survey.getImagePoint(indices.front()); + const symphony_lake_dataset::ImagePoint image_point_first = + survey.getImagePoint(indices.front()); + + std::vector cam_frames; - std::vector poses_viz; - poses_viz.push_back(DataParser::get_T_boat_camera(image_point_first)); - poses_viz.push_back(DataParser::T_boat_gps); - poses_viz.push_back(DataParser::T_boat_imu); + // NOTE: the world in these images is east, north, up centered at boat0 translation + Eigen::Vector3d t_world_boat0 = DataParser::get_T_world_boat(image_point_first).translation(); - std::cout << "pan: " << image_point_first.pan << "\ntilt: " << image_point_first.tilt << std::endl; + for (size_t i = 0; i < indices.size(); i++) { + const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(indices[i]); + Eigen::Isometry3d T_world_boatidx = DataParser::get_T_world_boat(img_pt); + Eigen::Isometry3d T_boatidx_camidx = + DataParser::get_T_boat_camera(img_pt); // current boat to current camera + + Eigen::Isometry3d T_world_camidx = T_world_boatidx * T_boatidx_camidx; + T_world_camidx.translation() -= t_world_boat0; + + cam_frames.push_back(T_world_camidx); + } + + geometry::viz_scene(cam_frames, std::vector(), true, true, "test_cam_frames"); +} + +TEST(SymphonyLakeParserTest, test_gps_frames) { + const std::vector indices = []() { + std::vector tmp; + for (int i = 0; i < 200; i += 10) { + tmp.push_back(i); + } + return tmp; + }(); + DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); + const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); + const symphony_lake_dataset::Survey &survey = survey_vector.get(0); + const symphony_lake_dataset::ImagePoint image_point_first = + survey.getImagePoint(indices.front()); + + std::vector gps_frames; + + // NOTE: the world in these images is east, north, up centered at boat0 translation + Eigen::Vector3d t_world_gps0(image_point_first.x, image_point_first.y, 0); + + for (size_t i = 0; i < indices.size(); i++) { + const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(indices[i]); + Eigen::Isometry3d T_world_gpsidx = DataParser::get_T_world_gps(img_pt); + T_world_gpsidx.translation() -= t_world_gps0; + gps_frames.push_back(T_world_gpsidx); + } - geometry::viz_scene(poses_viz, std::vector(), true, true); + geometry::viz_scene(gps_frames, std::vector(), true, true, "test_gps_frames"); } } // namespace robot::experimental::learn_descriptors From f65345f2e39e0184e48b1a7834360745497b44ed Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Fri, 4 Apr 2025 13:11:14 -0400 Subject: [PATCH 14/45] made world fram for SymphonyLakeParserTest.test_gps_frames the North, East, Up frame with translation at boat0 --- experimental/learn_descriptors/symphony_lake_parser_test.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/experimental/learn_descriptors/symphony_lake_parser_test.cc b/experimental/learn_descriptors/symphony_lake_parser_test.cc index 05093dedb..2a800db9b 100644 --- a/experimental/learn_descriptors/symphony_lake_parser_test.cc +++ b/experimental/learn_descriptors/symphony_lake_parser_test.cc @@ -108,12 +108,13 @@ TEST(SymphonyLakeParserTest, test_gps_frames) { std::vector gps_frames; // NOTE: the world in these images is east, north, up centered at boat0 translation - Eigen::Vector3d t_world_gps0(image_point_first.x, image_point_first.y, 0); + Eigen::Vector3d t_world_boat0 = DataParser::get_T_world_boat(image_point_first).translation(); + // Eigen::Vector3d t_world_gps0(image_point_first.x, image_point_first.y, 0); for (size_t i = 0; i < indices.size(); i++) { const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(indices[i]); Eigen::Isometry3d T_world_gpsidx = DataParser::get_T_world_gps(img_pt); - T_world_gpsidx.translation() -= t_world_gps0; + T_world_gpsidx.translation() -= t_world_boat0; gps_frames.push_back(T_world_gpsidx); } From 3ef706c77b66df983b7fa92ed5ceb6c2f672ec9e Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Tue, 8 Apr 2025 10:59:30 -0400 Subject: [PATCH 15/45] created a spatial scene helper to create test scenes. did first pass at refactoring feature handling in SFM, SFM is currently broken. --- experimental/learn_descriptors/BUILD | 60 +++++++- .../learn_descriptors/feature_manager.hh | 30 ++++ .../learn_descriptors/feature_manager_test.cc | 65 +++++++++ experimental/learn_descriptors/feature_set.hh | 68 +++++++++ experimental/learn_descriptors/gtsam_test.cc | 4 + .../learn_descriptors/spatial_scene_test.hh | 53 +++++++ .../spatial_scene_test_cube.hh | 21 +++ .../structure_from_motion.cc | 136 +++++++++--------- .../structure_from_motion.hh | 48 ++++--- 9 files changed, 395 insertions(+), 90 deletions(-) create mode 100644 experimental/learn_descriptors/feature_manager.hh create mode 100644 experimental/learn_descriptors/feature_manager_test.cc create mode 100644 experimental/learn_descriptors/feature_set.hh create mode 100644 experimental/learn_descriptors/spatial_scene_test.hh create mode 100644 experimental/learn_descriptors/spatial_scene_test_cube.hh diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index dab2c184e..87936dbdb 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -30,7 +30,8 @@ cc_library( "@eigen", "//common/geometry:camera", "//common/geometry:translate_types", - "//common/geometry:opencv_viz" + "//common/geometry:opencv_viz", + ":feature_manager" ] ) @@ -54,7 +55,6 @@ cc_library( hdrs = ["symphony_lake_parser.hh"], copts = [ "-Wno-unused-parameter", - "-std=c++20", ], data = ["@symphony_lake_snippet"], visibility = ["//visibility:public"], @@ -95,3 +95,59 @@ cc_binary( "@opencv//:opencv" ] ) + +cc_library( + name = "feature_set", + hdrs = ["feature_set.hh"], + visibility = ["//visibility:public"], + deps = [ + "@gtsam//:gtsam", + "@opencv//:opencv" + ] +) + +cc_library( + name = "feature_manager", + hdrs = ["feature_manager.hh"], + visibility = ["//visibility:public"], + deps = [ + "@gtsam//:gtsam", + "@opencv//:opencv", + ":feature_set", + ] +) + +cc_test( + name = "feature_manager_test", + srcs = ["feature_manager_test.cc"], + deps = [ + ":feature_manager", + "@com_google_googletest//:gtest_main", + "@gtsam//:gtsam", + "@opencv//:opencv", + "@eigen", + ":spatial_scene_test_cube" + ] +) + + +cc_library( + name = "spatial_scene_test", + hdrs = ["spatial_scene_test.hh"], + visibility = ["//visibility:public"], + deps = [ + "@gtsam//:gtsam", + "@opencv//:opencv", + "@eigen" + ] +) + +cc_library( + name = "spatial_scene_test_cube", + hdrs = ["spatial_scene_test_cube.hh"], + visibility = ["//visibility:public"], + deps = [ + "@eigen", + ":spatial_scene_test" + ] +) \ No newline at end of file diff --git a/experimental/learn_descriptors/feature_manager.hh b/experimental/learn_descriptors/feature_manager.hh new file mode 100644 index 000000000..0c66d514d --- /dev/null +++ b/experimental/learn_descriptors/feature_manager.hh @@ -0,0 +1,30 @@ +#pragma once +#include +#include +#include +#include + +#include "experimental/learn_descriptors/feature_set.hh" +#include "gtsam/inference/Symbol.h" +#include "opencv2/opencv.hpp" + +namespace robot::experimental::learn_descriptors { +class FeatureManager { + public: + void append_img_data(const std::vector &kpts, const cv::Mat &descriptors) { + feature_sets_.push_back(FeatureSet(kpts, descriptors)); + }; + void insert_symbol(const size_t idx_img, const cv::KeyPoint &kpt, const gtsam::Symbol &symbol) { + feature_sets_[idx_img].insert_symbol(kpt, symbol); + }; + + std::optional get_symbol(const size_t idx_img, const cv::KeyPoint &kpt) { + return feature_sets_[idx_img].get_symbol(kpt); + }; + + size_t get_num_images_added() { return feature_sets_.size(); }; + + private: + std::vector feature_sets_; +}; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/feature_manager_test.cc b/experimental/learn_descriptors/feature_manager_test.cc new file mode 100644 index 000000000..73f198011 --- /dev/null +++ b/experimental/learn_descriptors/feature_manager_test.cc @@ -0,0 +1,65 @@ +#include "experimental/learn_descriptors/feature_manager.hh" + +#include "Eigen/Dense" +#include "experimental/learn_descriptors/spatial_scene_test_cube.hh" +#include "gtest/gtest.h" +#include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/PinholeCamera.h" +#include "gtsam/geometry/Point3.h" +#include "gtsam/geometry/Pose3.h" +#include "gtsam/geometry/Rot3.h" +#include "opencv2/opencv.hpp" + +namespace robot::experimental::learn_descriptors { +TEST(feature_manager_test, test) { + FeatureManager feature_manager; + SpatialSceneTestCube test_cube(1.f); + + const size_t img_width = 640; + const size_t img_height = 480; + const double fx = 500.0; + const double fy = fx; + const double cx = img_width / 2.0; + const double cy = img_height / 2.0; + + gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy)); + + Eigen::Matrix3d rotation0( + Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + Eigen::AngleAxis(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); + gtsam::Pose3 pose0(gtsam::Rot3(rotation0), gtsam::Point3(4, 0, 0)); + gtsam::PinholeCamera camera0(pose0, *K); + + gtsam::Pose3 pose1( + gtsam::Rot3(Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + rotation0), + gtsam::Point3(0, 4, 0)); + gtsam::PinholeCamera camera1(pose1, *K); + + gtsam::Pose3 pose2( + gtsam::Rot3(Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + rotation0), + gtsam::Point3(0, 4, 0)); + gtsam::PinholeCamera camera2(pose1, *K); + + std::vector> cameras{camera0, camera1}; + + std::vector projected_pixels = + test_cube.get_projected_pixels(camera0, cv::Size(img_width, img_height)); + cv::Mat image(480, 640, CV_8UC3, cv::Scalar(255, 255, 255)); + for (const Eigen::Vector2d &pt : projected_pixels) { + cv::circle(image, cv::Point2i(static_cast(pt[0]), static_cast(pt[1])), 4, + cv::Scalar(0, 0, 0)); + } + cv::imshow("projected cubes", image); + cv::waitKey(0); + + std::vector, cv::Size>> cams{ + std::pair, cv::Size>(camera0, + cv::Size(img_width, img_height)), + std::pair, cv::Size>(camera1, + cv::Size(img_width, img_height))}; + std::vector> correspondences = + test_cube.get_corresponding_pixels(cams); +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/feature_set.hh b/experimental/learn_descriptors/feature_set.hh new file mode 100644 index 000000000..65099cffe --- /dev/null +++ b/experimental/learn_descriptors/feature_set.hh @@ -0,0 +1,68 @@ +#pragma once +#include +#include +#include +#include + +#include "gtsam/geometry/Pose3.h" +#include "gtsam/inference/Symbol.h" +#include "opencv2/opencv.hpp" + +namespace std { +template <> +struct hash { + size_t operator()(const cv::KeyPoint &kp) const { + size_t h1 = hash()(kp.pt.x); + size_t h2 = hash()(kp.pt.y); + size_t h3 = hash()(kp.size); + size_t h4 = hash()(kp.angle); + size_t h5 = hash()(kp.response); + size_t h6 = hash()(kp.octave); + size_t h7 = hash()(kp.class_id); + + return (h1 ^ (h2 << 1)) ^ (h3 << 2) ^ (h4 << 3) ^ (h5 << 4) ^ (h6 << 5) ^ (h7 << 6); + } +}; +} // namespace std +namespace cv { +inline bool operator==(const cv::KeyPoint &kp1, const cv::KeyPoint &kp2) { + return kp1.pt == kp2.pt && kp1.size == kp2.size && kp1.angle == kp2.angle && + kp1.response == kp2.response && kp1.octave == kp2.octave && kp1.class_id == kp2.class_id; +} +} // namespace cv +namespace robot::experimental::learn_descriptors { +class FeatureSet { + public: + struct keypoints_descriptors { + keypoints_descriptors(const std::vector &keypoints, + const cv::Mat &descriptors) + : kpts(keypoints), descriptors(descriptors) {} + std::vector kpts; + cv::Mat descriptors; + }; + + FeatureSet(const std::vector &kpts, const cv::Mat &descriptors) + : kpts_descriptors_(keypoints_descriptors(kpts, descriptors)){}; + ~FeatureSet() = default; + + void insert_symbol(const cv::KeyPoint &kpt, const gtsam::Symbol &symbol) { + if (kpt_to_symbol_.find(kpt) != kpt_to_symbol_.end()) { + std::stringstream error_msg; + error_msg << "Keypoint already has symbol! Keypoint has symbol: " + << kpt_to_symbol_.at(kpt); + throw std::runtime_error(error_msg.str()); + } + kpt_to_symbol_.emplace(kpt, symbol); + }; + std::optional get_symbol(const cv::KeyPoint &kpt) { + if (kpt_to_symbol_.find(kpt) != kpt_to_symbol_.end()) { + return kpt_to_symbol_.at(kpt); + } + return std::nullopt; + }; + + private: + keypoints_descriptors kpts_descriptors_; + std::unordered_map kpt_to_symbol_; +}; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/gtsam_test.cc b/experimental/learn_descriptors/gtsam_test.cc index 96b643d03..019d0c7c4 100644 --- a/experimental/learn_descriptors/gtsam_test.cc +++ b/experimental/learn_descriptors/gtsam_test.cc @@ -1,10 +1,14 @@ + #include #include #include "Eigen/Core" #include "gtest/gtest.h" +#include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/PinholeCamera.h" #include "gtsam/geometry/Point3.h" #include "gtsam/geometry/Pose3.h" +#include "gtsam/geometry/Rot3.h" #include "gtsam/inference/Symbol.h" #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" #include "gtsam/nonlinear/Values.h" diff --git a/experimental/learn_descriptors/spatial_scene_test.hh b/experimental/learn_descriptors/spatial_scene_test.hh new file mode 100644 index 000000000..60daafb28 --- /dev/null +++ b/experimental/learn_descriptors/spatial_scene_test.hh @@ -0,0 +1,53 @@ +#pragma once +#include +#include + +#include "Eigen/Core" +#include "gtsam/geometry/PinholeCamera.h" +#include "opencv2/opencv.hpp" + +class ProjectionHelper { + public: + static bool pixel_in_range(Eigen::Vector2d pixel, size_t img_width, size_t img_height) { + return pixel[0] >= 0 && pixel[0] < img_width && pixel[1] >= 0 && pixel[1] < img_height; + } +}; +namespace robot::experimental::learn_descriptors { +class SpatialSceneTest { + public: + SpatialSceneTest() = default; + SpatialSceneTest(std::vector points) : points_(points){}; + const std::vector &get_points() { return points_; }; + template + std::vector get_projected_pixels(const gtsam::PinholeCamera &camera, + const cv::Size &img_size) { + std::vector pixels; + for (const Eigen::Vector3d &pt : points_) { + Eigen::Vector2d pixel = camera.project(pt); + if (ProjectionHelper::pixel_in_range(pixel, img_size.width, img_size.height)) { + pixels.push_back(pixel); + } + } + return pixels; + }; + template + const std::vector> get_corresponding_pixels( + const std::vector, cv::Size>> &cam_and_img_size) { + std::vector> corresponding_pixels; + for (const Eigen::Vector3d &pt : points_) { + Eigen::Vector2d pixel_cam0 = cam_and_img_size[0].first.project(pt); + Eigen::Vector2d pixel_cam1 = cam_and_img_size[1].first.project(pt); + if (ProjectionHelper::pixel_in_range(pixel_cam0, cam_and_img_size[0].second.width, + cam_and_img_size[0].second.height) && + ProjectionHelper::pixel_in_range(pixel_cam1, cam_and_img_size[1].second.width, + cam_and_img_size[1].second.height)) { + corresponding_pixels.emplace_back(pixel_cam0, pixel_cam1); + } + } + return corresponding_pixels; + }; + + protected: + std::vector points_; +}; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/spatial_scene_test_cube.hh b/experimental/learn_descriptors/spatial_scene_test_cube.hh new file mode 100644 index 000000000..b8ff6aea6 --- /dev/null +++ b/experimental/learn_descriptors/spatial_scene_test_cube.hh @@ -0,0 +1,21 @@ +#pragma once +#include + +#include "Eigen/Core" +#include "experimental/learn_descriptors/spatial_scene_test.hh" + +namespace robot::experimental::learn_descriptors { +class SpatialSceneTestCube : public SpatialSceneTest { + public: + SpatialSceneTestCube(const float cube_size) : SpatialSceneTest() { + points_.push_back(Eigen::Vector3d(0, 0, 0)); + points_.push_back(Eigen::Vector3d(cube_size, 0, 0)); + points_.push_back(Eigen::Vector3d(cube_size, cube_size, 0)); + points_.push_back(Eigen::Vector3d(0, cube_size, 0)); + points_.push_back(Eigen::Vector3d(0, 0, cube_size)); + points_.push_back(Eigen::Vector3d(cube_size, 0, cube_size)); + points_.push_back(Eigen::Vector3d(cube_size, cube_size, cube_size)); + points_.push_back(Eigen::Vector3d(0, cube_size, cube_size)); + }; +}; +} // namespace robot::experimental::learn_descriptors diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index d9b145a18..cad7869d5 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -7,6 +7,7 @@ #include "common/geometry/camera.hh" #include "common/geometry/opencv_viz.hh" #include "common/geometry/translate_types.hh" +#include "gtsam/geometry/Point2.h" #include "gtsam/geometry/triangulation.h" #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" #include "gtsam/slam/BetweenFactor.h" @@ -33,20 +34,8 @@ std::string pose_to_string(gtsam::Pose3 pose) { return ss.str(); }; -// const gtsam::Pose3 StructureFromMotion::default_initial_pose = []() { -// Eigen::Isometry3d -// }(); const gtsam::Pose3 StructureFromMotion::default_initial_pose( StructureFromMotion::T_symlake_boat_cam.matrix()); -// const gtsam::Pose3 StructureFromMotion::default_initial_pose = -// gtsam::Pose3(gtsam::Rot3(T_symlake_boat_cam.rotation()), T_symlake_boat_cam.translation()); -// gtsam::Pose3( -// gtsam::Rot3(Eigen::Matrix3d( -// Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix() * -// Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 1, 0)).toRotationMatrix() -// )), -// gtsam::Point3::Identity() -// ); StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, Eigen::Matrix D, @@ -72,77 +61,92 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo cv::undistort(img, img_undistorted, K_, D_); std::pair, cv::Mat> keypoints_and_descriptors = frontend_.get_keypoints_and_descriptors(img); - keypoint_to_landmarks_.push_back(std::unordered_map()); - const size_t idx_img_current = get_num_images_added(); + feature_manager_.append_img_data(keypoints_and_descriptors.first, + keypoints_and_descriptors.second); + // keypoint_to_landmarks_.push_back(std::unordered_map()); + // const size_t idx_img_current = get_num_images_added(); + const size_t idx_img_current = feature_manager_.get_num_images_added() - 1; + std::cout << "current index " << idx_img_current << std::endl; if (idx_img_current > 0) { std::vector matches = get_matches(img_keypoints_and_descriptors_.back().second, keypoints_and_descriptors.second, Frontend::enforce_bijective_matches); - // std::vector matches = frontend_.get_matches( - // img_keypoints_and_descriptors_.back().second, keypoints_and_descriptors.second); - // Frontend::enforce_bijective_matches(matches); - // matches_.push_back(matches); - // gtsam::Rot3 R_c0_c1( - // geom::estimate_c0_c1( - // img_keypoints_and_descriptors_.back().first, keypoints_and_descriptors.first, - // matches, - // geom::eigen_mat_to_cv(geom::get_intrinsic_matrix(get_backend().get_K()))).linear()); const gtsam::Symbol sym_T_w_c0(Backend::pose_symbol_char, idx_img_current - 1); const gtsam::Symbol sym_T_w_c1(Backend::pose_symbol_char, idx_img_current); - // backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, - // get_num_images_added()), gps_prior, backend_.get_translation_noise()); gtsam::Pose3 - // T_w_c0 = backend_.get_current_initial_values().at(sym_T_w_c0); gtsam::Pose3 - // T_c0_c1( - // R_c0_c1, - // t_world_gps - T_w_c0.translation()); - // backend_.add_between_factor( - // sym_T_w_c0, - // sym_T_w_c1, - // T_c0_c1, - // backend_.get_pose_noise()); + backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise(), T_world_cam.rotation()); - // backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise()); for (const cv::DMatch match : matches) { const cv::KeyPoint kpt_cam0 = img_keypoints_and_descriptors_.back().first[match.queryIdx]; const cv::KeyPoint kpt_cam1 = keypoints_and_descriptors.first[match.trainIdx]; - if (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) == - keypoint_to_landmarks_[idx_img_current - 1].end()) { - const Backend::Landmark landmark_cam_0( - gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), sym_T_w_c0, - gtsam::Point2( - static_cast( - img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x), - static_cast( - img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)), - backend_.get_K(), 3.0); - keypoint_to_landmarks_[idx_img_current - 1].emplace(kpt_cam0, landmark_cam_0); + size_t key; + unsigned char chr; + + auto maybe_symbol0 = feature_manager_.get_symbol(idx_img_current - 1, kpt_cam0); + if (maybe_symbol0) { + key = (*maybe_symbol0).key(); + chr = (*maybe_symbol0).chr(); + } else { + gtsam::Symbol symbol_temp = + gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_); + key = symbol_temp.key(); + chr = symbol_temp.chr(); landmark_count_++; + feature_manager_.insert_symbol(idx_img_current - 1, kpt_cam0, symbol_temp); } - // std::cout << "kpt_cam0 in keypoint_to_landmarks_: " << - // (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) == - // keypoint_to_landmarks_[idx_img_current - 1].end()) - // << std::endl; - const Backend::Landmark landmark_cam_0 = - keypoint_to_landmarks_[idx_img_current - 1].at(kpt_cam0); - - // technically don't need this conditional when enforcing bijectivity - if (keypoint_to_landmarks_[idx_img_current].find(kpt_cam1) == - keypoint_to_landmarks_[idx_img_current].end()) { - const Backend::Landmark landmark_cam_1( - landmark_cam_0.lmk_factor_symbol, sym_T_w_c1, - gtsam::Point2( - static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.x), - static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.y)), - backend_.get_K(), 3.0); - keypoint_to_landmarks_[idx_img_current].emplace(kpt_cam1, landmark_cam_1); - } - const Backend::Landmark landmark_cam_1 = - keypoint_to_landmarks_[idx_img_current].at(kpt_cam1); + gtsam::Symbol symbol = gtsam::Symbol(key, chr); + feature_manager_.insert_symbol(idx_img_current, kpt_cam1, symbol); + + const Backend::Landmark landmark_cam_0( + symbol, sym_T_w_c0, + gtsam::Point2( + static_cast( + img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x), + static_cast( + img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)), + backend_.get_K(), 3.0); + + const Backend::Landmark landmark_cam_1( + symbol, sym_T_w_c1, + gtsam::Point2( + static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.x), + static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.y)), + backend_.get_K(), 3.0); + + // if (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) == + // keypoint_to_landmarks_[idx_img_current - 1].end()) { + // const Backend::Landmark landmark_cam_0( + // gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), sym_T_w_c0, + // gtsam::Point2( + // static_cast( + // img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x), + // static_cast( + // img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)), + // backend_.get_K(), 3.0); + // keypoint_to_landmarks_[idx_img_current - 1].emplace(kpt_cam0, landmark_cam_0); + // landmark_count_++; + // } + + // const Backend::Landmark landmark_cam_0 = + // keypoint_to_landmarks_[idx_img_current - 1].at(kpt_cam0); + + // // technically don't need this conditional when enforcing bijectivity + // if (keypoint_to_landmarks_[idx_img_current].find(kpt_cam1) == + // keypoint_to_landmarks_[idx_img_current].end()) { + // const Backend::Landmark landmark_cam_1( + // landmark_cam_0.lmk_factor_symbol, sym_T_w_c1, + // gtsam::Point2( + // static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.x), + // static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.y)), + // backend_.get_K(), 3.0); + // keypoint_to_landmarks_[idx_img_current].emplace(kpt_cam1, landmark_cam_1); + // } + // const Backend::Landmark landmark_cam_1 = + // keypoint_to_landmarks_[idx_img_current].at(kpt_cam1); backend_.add_landmark(landmark_cam_0); backend_.add_landmark(landmark_cam_1); diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh index 7cc454558..ada233108 100644 --- a/experimental/learn_descriptors/structure_from_motion.hh +++ b/experimental/learn_descriptors/structure_from_motion.hh @@ -8,6 +8,7 @@ #include "Eigen/Core" #include "common/geometry/camera.hh" +#include "experimental/learn_descriptors/feature_manager.hh" #include "gtsam/geometry/Point3.h" #include "gtsam/geometry/Pose3.h" #include "gtsam/inference/Symbol.h" @@ -17,28 +18,29 @@ #include "gtsam/nonlinear/Values.h" #include "opencv2/opencv.hpp" -namespace std { -template <> -struct hash { - size_t operator()(const cv::KeyPoint &kp) const { - size_t h1 = hash()(kp.pt.x); - size_t h2 = hash()(kp.pt.y); - size_t h3 = hash()(kp.size); - size_t h4 = hash()(kp.angle); - size_t h5 = hash()(kp.response); - size_t h6 = hash()(kp.octave); - size_t h7 = hash()(kp.class_id); - - return (h1 ^ (h2 << 1)) ^ (h3 << 2) ^ (h4 << 3) ^ (h5 << 4) ^ (h6 << 5) ^ (h7 << 6); - } -}; -} // namespace std -namespace cv { -inline bool operator==(const cv::KeyPoint &kp1, const cv::KeyPoint &kp2) { - return kp1.pt == kp2.pt && kp1.size == kp2.size && kp1.angle == kp2.angle && - kp1.response == kp2.response && kp1.octave == kp2.octave && kp1.class_id == kp2.class_id; -} -} // namespace cv +// namespace std { +// template <> +// struct hash { +// size_t operator()(const cv::KeyPoint &kp) const { +// size_t h1 = hash()(kp.pt.x); +// size_t h2 = hash()(kp.pt.y); +// size_t h3 = hash()(kp.size); +// size_t h4 = hash()(kp.angle); +// size_t h5 = hash()(kp.response); +// size_t h6 = hash()(kp.octave); +// size_t h7 = hash()(kp.class_id); + +// return (h1 ^ (h2 << 1)) ^ (h3 << 2) ^ (h4 << 3) ^ (h5 << 4) ^ (h6 << 5) ^ (h7 << 6); +// } +// }; +// } // namespace std +// namespace cv { +// inline bool operator==(const cv::KeyPoint &kp1, const cv::KeyPoint &kp2) { +// return kp1.pt == kp2.pt && kp1.size == kp2.size && kp1.angle == kp2.angle && +// kp1.response == kp2.response && kp1.octave == kp2.octave && kp1.class_id == +// kp2.class_id; +// } +// } // namespace cv namespace robot::experimental::learn_descriptors { class Frontend { public: @@ -208,6 +210,8 @@ class StructureFromMotion { const std::vector> get_matches() { return matches_; }; private: + FeatureManager feature_manager_; + gtsam::Pose3 initial_pose_; std::vector, cv::Mat>> img_keypoints_and_descriptors_; std::vector> matches_; From ef3ec99abb74a2a9675a3c91fb65a82d17f63a4a Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Fri, 11 Apr 2025 13:32:05 -0400 Subject: [PATCH 16/45] WIP. refactoring StructureFromMotion. Added some synthetic scenes for testing spatial pipeline components (gtsam stuff). Feature manager implementation and incorporation in StructureFromMotion still needs work. --- experimental/learn_descriptors/BUILD | 84 +++- experimental/learn_descriptors/backend.cc | 192 ++++++++ experimental/learn_descriptors/backend.hh | 114 +++++ .../learn_descriptors/backend_test.cc | 49 +++ .../learn_descriptors/feature_manager.hh | 19 + experimental/learn_descriptors/feature_set.hh | 1 + experimental/learn_descriptors/frontend.cc | 155 +++++++ experimental/learn_descriptors/frontend.hh | 51 +++ .../learn_descriptors/frontend_test.cc | 115 +++++ .../learn_descriptors/spatial_scene_test.hh | 53 --- .../spatial_scene_test_cube.hh | 21 - .../learn_descriptors/spatial_test_scene.cc | 82 ++++ .../learn_descriptors/spatial_test_scene.hh | 50 +++ .../spatial_test_scene_cube.hh | 21 + .../spatial_test_scene_cube_test.cc | 37 ++ .../structure_from_motion.cc | 409 +----------------- .../structure_from_motion.hh | 170 +------- .../structure_from_motion_test.cc | 165 +------ 18 files changed, 992 insertions(+), 796 deletions(-) create mode 100644 experimental/learn_descriptors/backend.cc create mode 100644 experimental/learn_descriptors/backend.hh create mode 100644 experimental/learn_descriptors/backend_test.cc create mode 100644 experimental/learn_descriptors/frontend.cc create mode 100644 experimental/learn_descriptors/frontend.hh create mode 100644 experimental/learn_descriptors/frontend_test.cc delete mode 100644 experimental/learn_descriptors/spatial_scene_test.hh delete mode 100644 experimental/learn_descriptors/spatial_scene_test_cube.hh create mode 100644 experimental/learn_descriptors/spatial_test_scene.cc create mode 100644 experimental/learn_descriptors/spatial_test_scene.hh create mode 100644 experimental/learn_descriptors/spatial_test_scene_cube.hh create mode 100644 experimental/learn_descriptors/spatial_test_scene_cube_test.cc diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index 87936dbdb..e6701405d 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -28,10 +28,10 @@ cc_library( "@opencv//:opencv", "@gtsam//:gtsam", "@eigen", - "//common/geometry:camera", - "//common/geometry:translate_types", "//common/geometry:opencv_viz", - ":feature_manager" + ":feature_manager", + ":frontend", + ":backend" ] ) @@ -126,15 +126,16 @@ cc_test( "@gtsam//:gtsam", "@opencv//:opencv", "@eigen", - ":spatial_scene_test_cube" + ":spatial_test_scene_cube" ] ) cc_library( - name = "spatial_scene_test", - hdrs = ["spatial_scene_test.hh"], + name = "spatial_test_scene", + hdrs = ["spatial_test_scene.hh"], visibility = ["//visibility:public"], + srcs = ["spatial_test_scene.cc"], deps = [ "@gtsam//:gtsam", "@opencv//:opencv", @@ -143,11 +144,74 @@ cc_library( ) cc_library( - name = "spatial_scene_test_cube", - hdrs = ["spatial_scene_test_cube.hh"], + name = "spatial_test_scene_cube", + hdrs = ["spatial_test_scene_cube.hh"], visibility = ["//visibility:public"], deps = [ "@eigen", - ":spatial_scene_test" + ":spatial_test_scene" + ] +) + +cc_test( + name = "spatial_test_scene_cube_test", + srcs = ["spatial_test_scene_cube_test.cc"], + deps = [ + "@com_google_googletest//:gtest_main", + ":spatial_test_scene_cube", + "//common/geometry:opencv_viz", + "@eigen" ] -) \ No newline at end of file +) + +cc_library( + name = "frontend", + hdrs = ["frontend.hh"], + visibility = ["//visibility:public"], + srcs = ["frontend.cc"], + copts = [ + "-Wno-error=unused-parameter", + ], + deps = [ + "@opencv//:opencv", + ] +) + + +cc_test( + name = "frontend_test", + srcs = ["frontend_test.cc"], + deps = [ + "@com_google_googletest//:gtest_main", + ":frontend" + ] +) + +cc_library( + name = "backend", + hdrs = ["backend.hh"], + visibility = ["//visibility:public"], + srcs = ["backend.cc"], + copts = [ + "-Wno-error=unused-parameter", + ], + deps = [ + "@opencv//:opencv", + "@gtsam//:gtsam", + "//common/geometry:camera", + ":feature_manager" + ] +) + + +cc_test( + name = "backend_test", + srcs = ["backend_test.cc"], + deps = [ + "@com_google_googletest//:gtest_main", + "@opencv//:opencv", + ":backend", + ":spatial_scene_test_cube", + ":feature_manager" + ] +) diff --git a/experimental/learn_descriptors/backend.cc b/experimental/learn_descriptors/backend.cc new file mode 100644 index 000000000..bb601780c --- /dev/null +++ b/experimental/learn_descriptors/backend.cc @@ -0,0 +1,192 @@ +#include "experimental/learn_descriptors/backend.hh" + +#include "gtsam/geometry/triangulation.h" +#include "gtsam/navigation/GPSFactor.h" +#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" +#include "gtsam/slam/BetweenFactor.h" +#include "gtsam/slam/PriorFactor.h" +#include "gtsam/slam/ProjectionFactor.h" + +namespace robot::experimental::learn_descriptors { + +Backend::Backend(std::shared_ptr feature_manager) + : feature_manager_(feature_manager) { + const size_t img_width = 640; + const size_t img_height = 480; + const double fx = 500.0; + const double fy = fx; + const double cx = img_width / 2.0; + const double cy = img_height / 2.0; + + gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); + K_ = boost::make_shared(K); + // initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K); +} + +Backend::Backend(std::shared_ptr feature_manager, gtsam::Cal3_S2 K) + : feature_manager_(feature_manager) { + K_ = boost::make_shared(K); + // initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K); +} + +template <> +void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value, + const gtsam::SharedNoiseModel &noise) { + graph_.emplace_shared>(symbol, value, noise); + initial_estimate_.insert_or_assign(symbol, value); + // std::cout << "adding a prior factor! with symbol: " << symbol << std::endl; + // initial_estimate_.print("values after adding prior: "); +} + +template <> +void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Point3 &value, + const gtsam::SharedNoiseModel &noise) { + graph_.emplace_shared>(symbol, value, noise); + gtsam::Rot3 R; + if (initial_estimate_.exists(symbol)) { + R = initial_estimate_.at(symbol).rotation(); + } + initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R, value)); +} + +template <> +void Backend::add_between_factor(const gtsam::Symbol &symbol_1, + const gtsam::Symbol &symbol_2, + const gtsam::Pose3 &value, + const gtsam::SharedNoiseModel &model) { + graph_.emplace_shared>(symbol_1, symbol_2, value, model); + // std::cout << "adding between factor. symbol_1: " << symbol_1 << ". symbol_2: " << symbol_2 << + // std::endl; initial_estimate_.print("values when adding between factor: "); + initial_estimate_.insert_or_assign(symbol_2, + initial_estimate_.at(symbol_1).compose(value)); +} + +template <> +void Backend::add_between_factor(const gtsam::Symbol &symbol_1, + const gtsam::Symbol &symbol_2, + const gtsam::Rot3 &value, + const gtsam::SharedNoiseModel &model) { + graph_.emplace_shared>(symbol_1, symbol_2, value, model); + initial_estimate_.insert_or_assign(symbol_2, + initial_estimate_.at(symbol_1).compose( + gtsam::Pose3(value, gtsam::Point3::Zero()))); +} + +void Backend::add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &t_world_cam, + const gtsam::SharedNoiseModel &model, const gtsam::Rot3 &R_world_cam) { + graph_.emplace_shared(symbol, t_world_cam, model); + initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R_world_cam, t_world_cam)); +} + +std::pair, std::vector> Backend::get_obs_for_lmk( + const gtsam::Symbol &lmk_symbol) { + std::vector cam_poses; + std::vector observations; + + // Iterate over all factors in the graph + for (const auto &factor : graph_) { + auto projFactor = boost::dynamic_pointer_cast< + gtsam::GenericProjectionFactor>(factor); + + if (projFactor && projFactor->keys().at(1) == lmk_symbol) { + // Get the camera pose symbol + gtsam::Symbol cameraSymbol = projFactor->keys().at(0); + + // Retrieve the camera pose from values + if (!initial_estimate_.exists(cameraSymbol)) continue; + gtsam::Pose3 cameraPose = initial_estimate_.at(cameraSymbol); + + // Get the 2D observation (keypoint measurement) + gtsam::Point2 observation = projFactor->measured(); + + // Store the pose and corresponding observation + observations.push_back(observation); + cam_poses.push_back(cameraPose); + } + } + return std::pair, std::vector>(cam_poses, + observations); +} + +void Backend::add_landmarks(const std::vector &landmarks) { + for (const Landmark &landmark : landmarks) { + add_landmark(landmark); + } +} + +void Backend::add_landmark(const Landmark &landmark) { + graph_.emplace_shared>( + landmark.projection, landmark_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol, + K_); + if (!initial_estimate_.exists(landmark.cam_pose_symbol)) { + throw std::runtime_error( + "landmark.cam_pose_symbol must already exist in Backend.initial_estimate_ before " + "add_landmark is called."); + } + gtsam::Point3 p_world_lmk_estimate = + initial_estimate_.at(landmark.cam_pose_symbol) * landmark.p_cam_lmk_guess; + initial_estimate_.insert_or_assign(landmark.lmk_factor_symbol, p_world_lmk_estimate); + + std::pair, std::vector> lmk_obs = + get_obs_for_lmk(landmark.lmk_factor_symbol); + if (lmk_obs.first.size() >= 2) { + try { + // Attempt triangulation using DLT (or the GTSAM provided method) + p_world_lmk_estimate = gtsam::triangulatePoint3( + lmk_obs.first, K_, + gtsam::Point2Vector(lmk_obs.second.begin(), lmk_obs.second.end())); + + // Optional: perform an explicit cheirality check + bool valid = true; + for (const auto &pose : lmk_obs.first) { + // Transform point to the camera coordinate system. + // transformTo() converts a world point to the camera frame. + gtsam::Point3 p_cam = pose.transformTo(p_world_lmk_estimate); + if (p_cam.z() <= 0) { // Check that the depth is positive + valid = false; + break; + } + } + + if (valid) { + initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate); + } else { + std::cerr << "Triangulated landmark failed cheirality check; keeping initial guess." + << std::endl; + } + } catch (const gtsam::TriangulationCheiralityException &e) { + // Handle the exception gracefully by logging and retaining the previous estimate. + std::cerr << "Triangulation Cheirality Exception: " << e.what() + << ". Keeping initial landmark estimate." << std::endl; + } + } +} + +void Backend::solve_graph() { + gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_); + result_ = optimizer.optimize(); +} + +void Backend::solve_graph(const int num_steps, + std::optional inter_debug_func) { + gtsam::LevenbergMarquardtParams params; + params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. + params.maxIterations = 1; // We'll manually step it + gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, params); + + double prev_error = optimizer.error(); + for (int i = 0; i < num_steps; i++) { + optimizer.iterate(); + double curr_error = optimizer.error(); + + if (inter_debug_func) { + (*inter_debug_func)(optimizer.values(), i); + } + if (std::abs(prev_error - curr_error) < 1e-6) { + std::cout << "Converged at iteration " << i << "\n"; + break; + } + } + result_ = optimizer.values(); +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/backend.hh b/experimental/learn_descriptors/backend.hh new file mode 100644 index 000000000..fa988b905 --- /dev/null +++ b/experimental/learn_descriptors/backend.hh @@ -0,0 +1,114 @@ +#pragma once + +#include +#include +#include +#include + +#include "common/geometry/camera.hh" +#include "experimental/learn_descriptors/feature_manager.hh" +#include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/Point2.h" +#include "gtsam/geometry/Point3.h" +#include "gtsam/geometry/Pose3.h" +#include "gtsam/geometry/Rot3.h" +#include "gtsam/inference/Symbol.h" +#include "gtsam/linear/NoiseModel.h" +#include "gtsam/nonlinear/NonlinearFactorGraph.h" +#include "gtsam/nonlinear/Values.h" + +namespace robot::experimental::learn_descriptors { +class Backend { + public: + static constexpr char pose_symbol_char = 'x'; + static constexpr char pose_rot_symbol_char = 'r'; + static constexpr char pose_translation_symbol_char = 't'; + static constexpr char pose_bearing_symbol_char = 'b'; + static constexpr char landmark_symbol_char = 'l'; + static constexpr char camera_symbol_char = 'k'; + + struct Landmark { + Landmark(const gtsam::Symbol &lmk_factor_symbol, const gtsam::Symbol &cam_pose_symbol, + const gtsam::Point2 &projection, const gtsam::Cal3_S2 K, + float initial_depth_guess = 5.0) + : lmk_factor_symbol(lmk_factor_symbol), + cam_pose_symbol(cam_pose_symbol), + projection(projection), + p_cam_lmk_guess(robot::geometry::deproject(robot::geometry::get_intrinsic_matrix(K), + projection, initial_depth_guess)){}; + const gtsam::Symbol lmk_factor_symbol; + const gtsam::Symbol cam_pose_symbol; + const gtsam::Point2 projection; + const gtsam::Point3 p_cam_lmk_guess; + }; + + Backend(std::shared_ptr feature_manager = nullptr); + Backend(std::shared_ptr feature_manager, gtsam::Cal3_S2 K); + ~Backend(){}; + + template + void add_prior_factor(const gtsam::Symbol &symbol, const T &value, + const gtsam::SharedNoiseModel &model); + + template + void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, + const T &value, const gtsam::SharedNoiseModel &model); + + void add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &p_world_gps, + const gtsam::SharedNoiseModel &model, + const gtsam::Rot3 &R_world_cam = gtsam::Rot3::Identity()); + + std::pair, std::vector> get_obs_for_lmk( + const gtsam::Symbol &lmk_symbol); + void add_landmarks(const std::vector &landmarks); + void add_landmark(const Landmark &landmark); + + void solve_graph(); + typedef int epoch; + using graph_step_debug_func = std::function; + void solve_graph(const int num_steps, + std::optional inter_debug_func = std::nullopt); + + const gtsam::Values &get_current_initial_values() const { return initial_estimate_; }; + const gtsam::Values &get_result() const { return result_; }; + const gtsam::Cal3_S2 &get_K() const { return *K_; }; + + const gtsam::SharedNoiseModel get_lmk_noise() { return landmark_noise_; }; + const gtsam::SharedNoiseModel get_pose_noise() { return pose_noise_; }; + const gtsam::SharedNoiseModel get_translation_noise() { return translation_noise_; }; + const gtsam::SharedNoiseModel get_gps_noise() { return gps_noise_; }; + + private: + std::shared_ptr feature_manager_; + gtsam::Cal3_S2::shared_ptr K_; + + gtsam::Values initial_estimate_; + gtsam::Values result_; + gtsam::NonlinearFactorGraph graph_; + + gtsam::noiseModel::Isotropic::shared_ptr landmark_noise_ = + gtsam::noiseModel::Isotropic::Sigma(2, 1.0); + gtsam::noiseModel::Diagonal::shared_ptr pose_noise_ = + gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01)); + gtsam::noiseModel::Isotropic::shared_ptr translation_noise_ = + gtsam::noiseModel::Isotropic::Sigma(2, 0.1); + gtsam::noiseModel::Isotropic::shared_ptr gps_noise_ = + gtsam::noiseModel::Isotropic::Sigma(3, 2.); +}; + +template <> +void Backend::add_prior_factor(const gtsam::Symbol &, const gtsam::Pose3 &, + const gtsam::SharedNoiseModel &); + +template <> +void Backend::add_prior_factor(const gtsam::Symbol &, const gtsam::Point3 &, + const gtsam::SharedNoiseModel &); + +template <> +void Backend::add_between_factor(const gtsam::Symbol &, const gtsam::Symbol &, + const gtsam::Pose3 &, + const gtsam::SharedNoiseModel &); +template <> +void Backend::add_between_factor(const gtsam::Symbol &, const gtsam::Symbol &, + const gtsam::Rot3 &, const gtsam::SharedNoiseModel &); +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/backend_test.cc b/experimental/learn_descriptors/backend_test.cc new file mode 100644 index 000000000..94d7b5067 --- /dev/null +++ b/experimental/learn_descriptors/backend_test.cc @@ -0,0 +1,49 @@ +#include "experimental/learn_descriptors/backend.hh" + +#include "experimental/learn_descriptors/feature_manager.hh" +#include "gtest/gtest.h" +#include "spatial_scene_test_cube.hh" + +namespace robot::experimental::learn_descriptors { +TEST(BackendTest, cube) { + SpatialTestSceneCube test_scene(1.0f); + + const size_t img_width = 640; + const size_t img_height = 480; + const cv::Size img_size(640, 480); + const double fx = 500.0; + const double fy = fx; + const double cx = img_width / 2.0; + const double cy = img_height / 2.0; + gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy)); + + Backend backend(std::make_shared(), *K); + test_scene.add_rand_cameras_face_origin(7, 2.0, 6.0, *K); + + std::vector> cameras = test_scene.get_cameras(); + std::vector camera_poses; + camera_poses.reserve(cameras.size()); + for (const gtsam::PinholeCamera &T_world_cam : cameras) { + camera_poses.emplace_back(T_world_cam.pose().matrix()); + } + + backend.add_prior_factor(gtsam::Symbol(backend.camera_symbol_char, 0), camera_poses.front(), + backend.get_pose_noise()); + for (int i = 1; i < camera_poses.size(); i++) { + backend.add_factor_GPS(gtsam::Symbol(backend.camera_symbol_char, i), camera_poses[i], + backend.get_gps_noise()); + + std::pair, cv::Size> cam_and_img_sizes[2] = { + std::pair, cv::Size>(cameras[i - 1], img_size), + std::pair, cv::Size>(cameras[i], img_size)}; + std::vector> + lmks = test_scene.get_corresponding_pixels(cam_and_img_sizes); + for (const &std::pair < SpatialTestScene::ProjectedPoint, + SpatialTestScene::ProjectedPoint >> lmk : lmks) { + // Backend::Landmark() + } + } + + geometry::viz_scene(camera_poses, test_scene.get_points()); +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/feature_manager.hh b/experimental/learn_descriptors/feature_manager.hh index 0c66d514d..d11fb8132 100644 --- a/experimental/learn_descriptors/feature_manager.hh +++ b/experimental/learn_descriptors/feature_manager.hh @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "experimental/learn_descriptors/feature_set.hh" @@ -16,6 +17,11 @@ class FeatureManager { }; void insert_symbol(const size_t idx_img, const cv::KeyPoint &kpt, const gtsam::Symbol &symbol) { feature_sets_[idx_img].insert_symbol(kpt, symbol); + if (symbols_to_idx_img_.find(symbol) != symbols_to_idx_img_.end()) { + symbols_to_idx_img_.at(symbol).push_back(idx_img); + } else { + symbols_to_idx_img_.emplace(symbol, std::vector{idx_img}); + } }; std::optional get_symbol(const size_t idx_img, const cv::KeyPoint &kpt) { @@ -24,7 +30,20 @@ class FeatureManager { size_t get_num_images_added() { return feature_sets_.size(); }; + std::unordered_map> get_added_symbols() { + return symbols_to_idx_img_; + }; + + bool get_idxs_for_symbol(const gtsam::Symbol &symbol, std::vector &out_cam_idxs) { + if (symbols_to_idx_img_.find(symbol) != symbols_to_idx_img_.end()) { + out_cam_idxs = symbols_to_idx_img_.at(symbol); + return true; + } + return false; + } + private: std::vector feature_sets_; + std::unordered_map> symbols_to_idx_img_; }; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/feature_set.hh b/experimental/learn_descriptors/feature_set.hh index 65099cffe..5aa35260c 100644 --- a/experimental/learn_descriptors/feature_set.hh +++ b/experimental/learn_descriptors/feature_set.hh @@ -64,5 +64,6 @@ class FeatureSet { private: keypoints_descriptors kpts_descriptors_; std::unordered_map kpt_to_symbol_; + std::unordered_map symbol_to_kpt_; }; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc new file mode 100644 index 000000000..46368a31e --- /dev/null +++ b/experimental/learn_descriptors/frontend.cc @@ -0,0 +1,155 @@ +#include "experimental/learn_descriptors/frontend.hh" + +#include + +namespace robot::experimental::learn_descriptors { + +Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matcher) { + extractor_type_ = frontend_algorithm; + matcher_type_ = frontend_matcher; + + switch (extractor_type_) { + case ExtractorType::SIFT: + feature_extractor_ = cv::SIFT::create(); + break; + case ExtractorType::ORB: + feature_extractor_ = cv::ORB::create(); + break; + default: + // Error handling needed? + break; + } + switch (matcher_type_) { + case MatcherType::BRUTE_FORCE: + descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2); + break; + case MatcherType::KNN: + descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2); + break; + case MatcherType::FLANN: + if (frontend_algorithm == ExtractorType::ORB) { + throw std::invalid_argument("FLANN can not be used with ORB."); + } + descriptor_matcher_ = cv::FlannBasedMatcher::create(); + break; + default: + // Error handling needed? + break; + } +} + +std::pair, cv::Mat> Frontend::get_keypoints_and_descriptors( + const cv::Mat &img) const { + std::vector keypoints; + cv::Mat descriptors; + switch (extractor_type_) { + default: // the opencv extractors have the same function signature + feature_extractor_->detectAndCompute(img, cv::noArray(), keypoints, descriptors); + break; + } + return std::pair, cv::Mat>(keypoints, descriptors); +} + +std::vector Frontend::get_matches(const cv::Mat &descriptors1, + const cv::Mat &descriptors2) const { + std::vector matches; + switch (matcher_type_) { + case MatcherType::BRUTE_FORCE: + get_brute_matches(descriptors1, descriptors2, matches); + break; + case MatcherType::KNN: + get_KNN_matches(descriptors1, descriptors2, matches); + break; + case MatcherType::FLANN: + get_FLANN_matches(descriptors1, descriptors2, matches); + break; + default: + break; + } + std::sort(matches.begin(), matches.end()); + return matches; +} + +void Frontend::threshold_matches(std::vector &matches, float dist_threshhold) { + matches.erase(std::remove_if(matches.begin(), matches.end(), + [dist_threshhold](const cv::DMatch &match) { + return match.distance > dist_threshhold; + }), + matches.end()); +} + +void Frontend::enforce_bijective_matches(std::vector &matches) { + std::unordered_map bestQueryMatch; + std::unordered_map bestTrainMatch; + + for (const auto &match : matches) { + int queryIdx = match.queryIdx; + int trainIdx = match.trainIdx; + + if (bestQueryMatch.find(queryIdx) == bestQueryMatch.end() || + match.distance < bestQueryMatch[queryIdx].distance) { + bestQueryMatch[queryIdx] = match; + } + + if (bestTrainMatch.find(trainIdx) == bestTrainMatch.end() || + match.distance < bestTrainMatch[trainIdx].distance) { + bestTrainMatch[trainIdx] = match; + } + } + + matches.erase(std::remove_if(matches.begin(), matches.end(), + [&bestQueryMatch, &bestTrainMatch](const cv::DMatch &match) { + int queryIdx = match.queryIdx; + int trainIdx = match.trainIdx; + + return bestQueryMatch[queryIdx].trainIdx != trainIdx || + bestTrainMatch[trainIdx].queryIdx != queryIdx; + }), + matches.end()); +} + +bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const { + if (matcher_type_ != MatcherType::BRUTE_FORCE) { + return false; + } + matches_out.clear(); + descriptor_matcher_->match(descriptors1, descriptors2, matches_out); + return true; +} + +bool Frontend::get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const { + if (matcher_type_ != MatcherType::KNN) { + return false; + } + std::vector> knn_matches; + descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2); + const float ratio_thresh = 0.7f; + matches_out.clear(); + // Lowe's Ratio Test + for (size_t i = 0; i < knn_matches.size(); i++) { + if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) { + matches_out.push_back(knn_matches[i][0]); + } + } + return true; +} + +bool Frontend::get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const { + if (matcher_type_ != MatcherType::FLANN) { + return false; + } + std::vector> knn_matches; + descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2); + const float ratio_thresh = 0.7f; + matches_out.clear(); + for (size_t i = 0; i < knn_matches.size(); i++) { + if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) { + matches_out.push_back(knn_matches[i][0]); + } + } + return true; +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh new file mode 100644 index 000000000..9b720cc06 --- /dev/null +++ b/experimental/learn_descriptors/frontend.hh @@ -0,0 +1,51 @@ +#pragma once + +#include +#include + +#include "opencv2/opencv.hpp" + +namespace robot::experimental::learn_descriptors { +class Frontend { + public: + enum class ExtractorType { SIFT, ORB }; + enum class MatcherType { BRUTE_FORCE, KNN, FLANN }; + + Frontend(){}; + Frontend(ExtractorType frontend_extractor, MatcherType frontend_matcher); + ~Frontend(){}; + + const ExtractorType &get_extractor_type() const { return extractor_type_; }; + const MatcherType &get_matcher_type() const { return matcher_type_; }; + + std::pair, cv::Mat> get_keypoints_and_descriptors( + const cv::Mat &img) const; + std::vector get_matches(const cv::Mat &descriptors1, + const cv::Mat &descriptors2) const; + static void threshold_matches(std::vector &matches, float dist_threshhold); + static void enforce_bijective_matches(std::vector &matches); + static void draw_keypoints(const cv::Mat &img, std::vector keypoints, + cv::Mat img_keypoints_out) { + cv::drawKeypoints(img, keypoints, img_keypoints_out, cv::Scalar::all(-1), + cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS); + } + static void draw_matches(const cv::Mat &img1, std::vector keypoints1, + const cv::Mat &img2, std::vector keypoints2, + std::vector matches, cv::Mat img_matches_out) { + cv::drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches_out); + } + + private: + bool get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const; + bool get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const; + bool get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const; + ExtractorType extractor_type_; + MatcherType matcher_type_; + + cv::Ptr feature_extractor_; + cv::Ptr descriptor_matcher_; +}; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/frontend_test.cc b/experimental/learn_descriptors/frontend_test.cc new file mode 100644 index 000000000..8ab092d3e --- /dev/null +++ b/experimental/learn_descriptors/frontend_test.cc @@ -0,0 +1,115 @@ +#include "experimental/learn_descriptors/frontend.hh" + +#include +#include +#include + +#include "gtest/gtest.h" +#include "opencv2/opencv.hpp" + +namespace robot::experimental::learn_descriptors { +TEST(FrontendTest, pipeline_sweep) { + const size_t width = 640; + const size_t height = 480; + + const size_t pixel_shift_x = 20; + const size_t PIXEL_COMP_TOL = 20; + + cv::Mat image_1 = cv::Mat::zeros(height, width, CV_8UC3); + cv::Mat image_2; + + cv::Mat translation_mat = (cv::Mat_(2, 3) << 1, 0, pixel_shift_x, 0, 1, 0); + cv::Point rect_points[4] = {{150, 200}, {350, 200}, {350, 300}, {150, 300}}; + float com_x = 0.0f, com_y = 0.0f; + for (const cv::Point &rect_point : rect_points) { + com_x += rect_point.x; + com_y += rect_point.y; + } + com_x *= 0.25; + com_y *= 0.25; + cv::Point rotation_center(com_x, com_y); + cv::Mat rotation_matrix = cv::getRotationMatrix2D(rotation_center, 45, 1.0); + + const size_t line_spacing = 100; + for (size_t i = 0; i <= width / line_spacing; i++) { + size_t x = i * line_spacing + (width % line_spacing) / 2; + size_t b = i * 255.0 / (width / line_spacing); + size_t g = 255.0 - i * 255.0 / (width / line_spacing); + cv::line(image_1, cv::Point(x, 0), cv::Point(x, height - 1), cv::Scalar(b, g, 0), 2); + } + for (size_t i = 0; i <= height / line_spacing; i++) { + size_t y = i * line_spacing + (height % line_spacing) / 2; + size_t b = i * 255.0 / (width / line_spacing); + size_t g = 255.0 - i * 255.0 / (width / line_spacing); + cv::line(image_1, cv::Point(0, y), cv::Point(width - 1, y), cv::Scalar(b, g, 0), 2); + } + + cv::warpAffine(image_1, image_1, rotation_matrix, image_1.size()); + cv::warpAffine(image_1, image_2, translation_mat, image_1.size()); + + // cv::Mat img_test_disp; + // cv::hconcat(image_1, image_2, img_test_disp); + // cv::imshow("Test", img_test_disp); + // cv::waitKey(1000); + + Frontend::ExtractorType extractor_types[2] = {Frontend::ExtractorType::SIFT, + Frontend::ExtractorType::ORB}; + Frontend::MatcherType matcher_types[3] = {Frontend::MatcherType::BRUTE_FORCE, + Frontend::MatcherType::FLANN, + Frontend::MatcherType::KNN}; + + Frontend frontend; + std::pair, cv::Mat> keypoints_descriptors_pair_1; + std::pair, cv::Mat> keypoints_descriptors_pair_2; + std::vector matches; + cv::Mat img_keypoints_out_1(height, width, CV_8UC3), + img_keypoints_out_2(height, width, CV_8UC3), img_matches_out(height, 2 * width, CV_8UC3); + // cv::Mat img_display_test; + for (Frontend::ExtractorType extractor_type : extractor_types) { + for (Frontend::MatcherType matcher_type : matcher_types) { + // printf("started frontend combination: (%d, %d)\n", static_cast(extractor_type), + // static_cast(matcher_type)); + try { + frontend = Frontend(extractor_type, matcher_type); + } catch (const std::invalid_argument &e) { + assert(std::string(e.what()) == "FLANN can not be used with ORB."); // very jank... + continue; + } + keypoints_descriptors_pair_1 = frontend.get_keypoints_and_descriptors(image_1); + keypoints_descriptors_pair_2 = frontend.get_keypoints_and_descriptors(image_2); + matches = frontend.get_matches(keypoints_descriptors_pair_1.second, + keypoints_descriptors_pair_2.second); + frontend.draw_keypoints(image_1, keypoints_descriptors_pair_1.first, + img_keypoints_out_1); + frontend.draw_keypoints(image_2, keypoints_descriptors_pair_2.first, + img_keypoints_out_2); + frontend.draw_matches(image_1, keypoints_descriptors_pair_1.first, image_2, + keypoints_descriptors_pair_2.first, matches, img_matches_out); + // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test); + // cv::vconcat(img_display_test, img_matches_out, img_display_test); + // std::stringstream text; + // text << "Extractor " << static_cast(extractor_type) << ", matcher " + // << static_cast(matcher_type); + // cv::putText(img_display_test, text.str(), cv::Point(20, height - 50), + // cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2, cv::LINE_AA); + // cv::imshow("Keypoints and Matches Output.", img_display_test); + // std::cout << "Hold spacebar to pause." << std::endl; + // while (cv::waitKey(1000) == 32) { + // } + // printf("completed frontend combination: (%d, %d)\n", + // static_cast(extractor_type), + // static_cast(matcher_type)); + if (extractor_type != Frontend::ExtractorType::ORB) { // don't check ORB for now + for (const cv::DMatch match : matches) { + EXPECT_NEAR(keypoints_descriptors_pair_1.first[match.queryIdx].pt.x - + keypoints_descriptors_pair_2.first[match.trainIdx].pt.x, + pixel_shift_x, pixel_shift_x + PIXEL_COMP_TOL); + EXPECT_NEAR(keypoints_descriptors_pair_2.first[match.trainIdx].pt.y - + keypoints_descriptors_pair_1.first[match.queryIdx].pt.y, + 0, PIXEL_COMP_TOL); + } + } + } + } +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/spatial_scene_test.hh b/experimental/learn_descriptors/spatial_scene_test.hh deleted file mode 100644 index 60daafb28..000000000 --- a/experimental/learn_descriptors/spatial_scene_test.hh +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once -#include -#include - -#include "Eigen/Core" -#include "gtsam/geometry/PinholeCamera.h" -#include "opencv2/opencv.hpp" - -class ProjectionHelper { - public: - static bool pixel_in_range(Eigen::Vector2d pixel, size_t img_width, size_t img_height) { - return pixel[0] >= 0 && pixel[0] < img_width && pixel[1] >= 0 && pixel[1] < img_height; - } -}; -namespace robot::experimental::learn_descriptors { -class SpatialSceneTest { - public: - SpatialSceneTest() = default; - SpatialSceneTest(std::vector points) : points_(points){}; - const std::vector &get_points() { return points_; }; - template - std::vector get_projected_pixels(const gtsam::PinholeCamera &camera, - const cv::Size &img_size) { - std::vector pixels; - for (const Eigen::Vector3d &pt : points_) { - Eigen::Vector2d pixel = camera.project(pt); - if (ProjectionHelper::pixel_in_range(pixel, img_size.width, img_size.height)) { - pixels.push_back(pixel); - } - } - return pixels; - }; - template - const std::vector> get_corresponding_pixels( - const std::vector, cv::Size>> &cam_and_img_size) { - std::vector> corresponding_pixels; - for (const Eigen::Vector3d &pt : points_) { - Eigen::Vector2d pixel_cam0 = cam_and_img_size[0].first.project(pt); - Eigen::Vector2d pixel_cam1 = cam_and_img_size[1].first.project(pt); - if (ProjectionHelper::pixel_in_range(pixel_cam0, cam_and_img_size[0].second.width, - cam_and_img_size[0].second.height) && - ProjectionHelper::pixel_in_range(pixel_cam1, cam_and_img_size[1].second.width, - cam_and_img_size[1].second.height)) { - corresponding_pixels.emplace_back(pixel_cam0, pixel_cam1); - } - } - return corresponding_pixels; - }; - - protected: - std::vector points_; -}; -} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/spatial_scene_test_cube.hh b/experimental/learn_descriptors/spatial_scene_test_cube.hh deleted file mode 100644 index b8ff6aea6..000000000 --- a/experimental/learn_descriptors/spatial_scene_test_cube.hh +++ /dev/null @@ -1,21 +0,0 @@ -#pragma once -#include - -#include "Eigen/Core" -#include "experimental/learn_descriptors/spatial_scene_test.hh" - -namespace robot::experimental::learn_descriptors { -class SpatialSceneTestCube : public SpatialSceneTest { - public: - SpatialSceneTestCube(const float cube_size) : SpatialSceneTest() { - points_.push_back(Eigen::Vector3d(0, 0, 0)); - points_.push_back(Eigen::Vector3d(cube_size, 0, 0)); - points_.push_back(Eigen::Vector3d(cube_size, cube_size, 0)); - points_.push_back(Eigen::Vector3d(0, cube_size, 0)); - points_.push_back(Eigen::Vector3d(0, 0, cube_size)); - points_.push_back(Eigen::Vector3d(cube_size, 0, cube_size)); - points_.push_back(Eigen::Vector3d(cube_size, cube_size, cube_size)); - points_.push_back(Eigen::Vector3d(0, cube_size, cube_size)); - }; -}; -} // namespace robot::experimental::learn_descriptors diff --git a/experimental/learn_descriptors/spatial_test_scene.cc b/experimental/learn_descriptors/spatial_test_scene.cc new file mode 100644 index 000000000..6be58e661 --- /dev/null +++ b/experimental/learn_descriptors/spatial_test_scene.cc @@ -0,0 +1,82 @@ +#include "experimental/learn_descriptors/spatial_test_scene.hh" + +#include + +namespace robot::experimental::learn_descriptors { +template +std::vector SpatialTestScene::get_projected_pixels( + const gtsam::PinholeCamera &camera, const cv::Size &img_size) { + std::vector pixels; + for (size_t i = 0; i < points_.size(); i++) { + const Eigen::Vector3d pt = points_[i]; + Eigen::Vector2d pixel = camera.project(pt); + if (ProjectionHelper::pixel_in_range(pixel, img_size.width, img_size.height)) { + ProjectedPoint projected_pt(i, pixel); + pixels.push_back(projected_pt); + } + } + return pixels; +}; + +template +std::vector> +SpatialTestScene::get_corresponding_pixels( + const std::pair, cv::Size> (&cam_and_img_size)[2]) { + std::vector> corresponding_pixels; + for (size_t i = 0; i < points_.size(); i++) { + const Eigen::Vector3d pt = points_[i]; + Eigen::Vector2d pixel_cam0 = cam_and_img_size[0].first.project(pt); + Eigen::Vector2d pixel_cam1 = cam_and_img_size[1].first.project(pt); + if (ProjectionHelper::pixel_in_range(pixel_cam0, cam_and_img_size[0].second.width, + cam_and_img_size[0].second.height) && + ProjectionHelper::pixel_in_range(pixel_cam1, cam_and_img_size[1].second.width, + cam_and_img_size[1].second.height)) { + corresponding_pixels.emplace_back(ProjectedPoint(i, pixel_cam0), + ProjectedPoint(i, pixel_cam1)); + } + } + return corresponding_pixels; +}; + +void SpatialTestScene::add_point(Eigen::Vector3d point) { points_.push_back(point); }; + +void SpatialTestScene::add_points(std::vector points) { + points_.insert(points_.end(), points.begin(), points.end()); +} + +void SpatialTestScene::add_camera(gtsam::PinholeCamera camera) { + cameras_.push_back(camera); +} + +void SpatialTestScene::add_cameras(std::vector> cameras) { + cameras_.insert(cameras_.end(), cameras.begin(), cameras.end()); +} + +void SpatialTestScene::add_rand_cameras_face_origin(int num_cameras, double min_radius_origin, + double max_radius_origin, + const gtsam::Cal3_S2 &K) { + std::random_device rd; + std::mt19937 gen(rd()); + + std::uniform_real_distribution dist_radius(min_radius_origin, max_radius_origin); + std::uniform_real_distribution dist_omega(0.0, 2 * M_PI); + + for (int i = 0; i < num_cameras; i++) { + double angle_z = dist_radius(gen); + Eigen::Isometry3d R_z; + R_z.linear() = Eigen::AngleAxisd(angle_z, Eigen::Vector3d::UnitZ()).matrix(); + double angle_x = dist_radius(gen); + Eigen::Isometry3d R_y; + R_y.linear() = Eigen::AngleAxisd(angle_x, Eigen::Vector3d::UnitY()).matrix(); + + double radius = dist_radius(gen); + Eigen::Isometry3d T_world_cam; + T_world_cam.translation() = Eigen::Vector3d(radius, 0, 0); + T_world_cam.linear() = (Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d::UnitY()) * + Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d::UnitZ())) + .matrix(); + T_world_cam = R_z * R_y * T_world_cam; + cameras_.push_back(gtsam::PinholeCamera(gtsam::Pose3(T_world_cam.matrix()), K)); + } +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/spatial_test_scene.hh b/experimental/learn_descriptors/spatial_test_scene.hh new file mode 100644 index 000000000..bb7b714c4 --- /dev/null +++ b/experimental/learn_descriptors/spatial_test_scene.hh @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +#include + +#include "Eigen/Dense" +#include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/PinholeCamera.h" +#include "opencv2/opencv.hpp" + +class ProjectionHelper { + public: + static bool pixel_in_range(Eigen::Vector2d pixel, size_t img_width, size_t img_height) { + return pixel[0] >= 0 && pixel[0] < img_width && pixel[1] >= 0 && pixel[1] < img_height; + } +}; +namespace robot::experimental::learn_descriptors { +class SpatialTestScene { + public: + struct ProjectedPoint { + ProjectedPoint(const size_t pt_idx, const Eigen::Vector2d &pixel) + : pt_idx(pt_idx), pixel(pixel){}; + size_t pt_idx; + Eigen::Vector2d pixel; + }; + SpatialTestScene() = default; + SpatialTestScene(std::vector points) : points_(points){}; + template + std::vector get_projected_pixels(const gtsam::PinholeCamera &camera, + const cv::Size &img_size); + template + std::vector> get_corresponding_pixels( + const std::pair, cv::Size> (&cam_and_img_size)[2]); + + void add_point(Eigen::Vector3d point); + void add_points(std::vector points); + void add_camera(gtsam::PinholeCamera camera); + void add_cameras(std::vector> cameras); + void add_rand_cameras_face_origin(int num_cameras, double min_radius_origin, + double max_radius_origin, const gtsam::Cal3_S2 &K); + + std::vector &get_points() { return points_; }; + std::vector> &get_cameras() { return cameras_; }; + + protected: + std::vector points_; + std::vector> cameras_; +}; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/spatial_test_scene_cube.hh b/experimental/learn_descriptors/spatial_test_scene_cube.hh new file mode 100644 index 000000000..306afae4b --- /dev/null +++ b/experimental/learn_descriptors/spatial_test_scene_cube.hh @@ -0,0 +1,21 @@ +#pragma once +#include + +#include "Eigen/Core" +#include "experimental/learn_descriptors/spatial_test_scene.hh" + +namespace robot::experimental::learn_descriptors { +class SpatialTestSceneCube : public SpatialTestScene { + public: + SpatialTestSceneCube(const float cube_size) : SpatialTestScene() { + points_.push_back(Eigen::Vector3d(-cube_size / 2, -cube_size / 2, -cube_size / 2)); + points_.push_back(Eigen::Vector3d(-cube_size / 2, cube_size / 2, -cube_size / 2)); + points_.push_back(Eigen::Vector3d(cube_size / 2, -cube_size / 2, -cube_size / 2)); + points_.push_back(Eigen::Vector3d(cube_size / 2, cube_size / 2, -cube_size / 2)); + points_.push_back(Eigen::Vector3d(-cube_size / 2, -cube_size / 2, cube_size / 2)); + points_.push_back(Eigen::Vector3d(-cube_size / 2, cube_size / 2, cube_size / 2)); + points_.push_back(Eigen::Vector3d(cube_size / 2, -cube_size / 2, cube_size / 2)); + points_.push_back(Eigen::Vector3d(cube_size / 2, cube_size / 2, cube_size / 2)); + }; +}; +} // namespace robot::experimental::learn_descriptors diff --git a/experimental/learn_descriptors/spatial_test_scene_cube_test.cc b/experimental/learn_descriptors/spatial_test_scene_cube_test.cc new file mode 100644 index 000000000..0a244081e --- /dev/null +++ b/experimental/learn_descriptors/spatial_test_scene_cube_test.cc @@ -0,0 +1,37 @@ +#include "experimental/learn_descriptors/spatial_test_scene_cube.hh" + +#include + +#include "Eigen/Dense" +#include "common/geometry/opencv_viz.hh" +#include "gtest/gtest.h" + +namespace robot::experimental::learn_descriptors { +TEST(SpatialTestSceneCubeTest, viz_cube) { + SpatialTestSceneCube test_scene(1.0f); + geometry::viz_scene(std::vector(), test_scene.get_points()); +} + +TEST(SpatialTestSceneCubeTest, viz_cube_with_cameras) { + SpatialTestSceneCube test_scene(1.0f); + + const size_t img_width = 640; + const size_t img_height = 480; + const double fx = 500.0; + const double fy = fx; + const double cx = img_width / 2.0; + const double cy = img_height / 2.0; + gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy)); + + test_scene.add_rand_cameras_face_origin(7, 2.0, 6.0, *K); + + std::vector> cameras = test_scene.get_cameras(); + std::vector camera_poses; + camera_poses.reserve(cameras.size()); + for (const gtsam::PinholeCamera &T_world_cam : cameras) { + camera_poses.emplace_back(T_world_cam.pose().matrix()); + } + + geometry::viz_scene(camera_poses, test_scene.get_points()); +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index cad7869d5..b28d8f009 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -8,11 +8,6 @@ #include "common/geometry/opencv_viz.hh" #include "common/geometry/translate_types.hh" #include "gtsam/geometry/Point2.h" -#include "gtsam/geometry/triangulation.h" -#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" -#include "gtsam/slam/BetweenFactor.h" -#include "gtsam/slam/PriorFactor.h" -#include "gtsam/slam/ProjectionFactor.h" namespace fs = std::filesystem; @@ -41,9 +36,9 @@ StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extrac gtsam::Cal3_S2 K, Eigen::Matrix D, gtsam::Pose3 initial_pose, Frontend::MatcherType frontend_matcher) - : initial_pose_(initial_pose) { + : feature_manager_(std::make_shared()), initial_pose_(initial_pose) { frontend_ = Frontend(frontend_extractor, frontend_matcher); - backend_ = Backend(K); + backend_ = Backend(feature_manager_, K); K_ = (cv::Mat_(3, 3) << K.fx(), 0, K.px(), 0, K.fy(), K.py(), 0, 0, 1); D_ = (cv::Mat_(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0)); @@ -61,11 +56,11 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo cv::undistort(img, img_undistorted, K_, D_); std::pair, cv::Mat> keypoints_and_descriptors = frontend_.get_keypoints_and_descriptors(img); - feature_manager_.append_img_data(keypoints_and_descriptors.first, - keypoints_and_descriptors.second); + feature_manager_->append_img_data(keypoints_and_descriptors.first, + keypoints_and_descriptors.second); // keypoint_to_landmarks_.push_back(std::unordered_map()); // const size_t idx_img_current = get_num_images_added(); - const size_t idx_img_current = feature_manager_.get_num_images_added() - 1; + const size_t idx_img_current = feature_manager_->get_num_images_added() - 1; std::cout << "current index " << idx_img_current << std::endl; if (idx_img_current > 0) { std::vector matches = @@ -83,26 +78,28 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo img_keypoints_and_descriptors_.back().first[match.queryIdx]; const cv::KeyPoint kpt_cam1 = keypoints_and_descriptors.first[match.trainIdx]; - size_t key; + size_t idx; unsigned char chr; - auto maybe_symbol0 = feature_manager_.get_symbol(idx_img_current - 1, kpt_cam0); + auto maybe_symbol0 = feature_manager_->get_symbol(idx_img_current - 1, kpt_cam0); if (maybe_symbol0) { - key = (*maybe_symbol0).key(); + idx = (*maybe_symbol0).index(); chr = (*maybe_symbol0).chr(); } else { gtsam::Symbol symbol_temp = gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_); - key = symbol_temp.key(); + idx = symbol_temp.index(); chr = symbol_temp.chr(); landmark_count_++; - feature_manager_.insert_symbol(idx_img_current - 1, kpt_cam0, symbol_temp); + feature_manager_->insert_symbol(idx_img_current - 1, kpt_cam0, symbol_temp); } - gtsam::Symbol symbol = gtsam::Symbol(key, chr); - feature_manager_.insert_symbol(idx_img_current, kpt_cam1, symbol); + gtsam::Symbol symbol_lmk = gtsam::Symbol(chr, idx); + std::cout << "value: char: " << chr << " , idx: " << idx << ". " << symbol_lmk + << std::endl; + feature_manager_->insert_symbol(idx_img_current, kpt_cam1, symbol_lmk); const Backend::Landmark landmark_cam_0( - symbol, sym_T_w_c0, + symbol_lmk, sym_T_w_c0, gtsam::Point2( static_cast( img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x), @@ -111,43 +108,12 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo backend_.get_K(), 3.0); const Backend::Landmark landmark_cam_1( - symbol, sym_T_w_c1, + symbol_lmk, sym_T_w_c1, gtsam::Point2( static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.x), static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.y)), backend_.get_K(), 3.0); - // if (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) == - // keypoint_to_landmarks_[idx_img_current - 1].end()) { - // const Backend::Landmark landmark_cam_0( - // gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), sym_T_w_c0, - // gtsam::Point2( - // static_cast( - // img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x), - // static_cast( - // img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)), - // backend_.get_K(), 3.0); - // keypoint_to_landmarks_[idx_img_current - 1].emplace(kpt_cam0, landmark_cam_0); - // landmark_count_++; - // } - - // const Backend::Landmark landmark_cam_0 = - // keypoint_to_landmarks_[idx_img_current - 1].at(kpt_cam0); - - // // technically don't need this conditional when enforcing bijectivity - // if (keypoint_to_landmarks_[idx_img_current].find(kpt_cam1) == - // keypoint_to_landmarks_[idx_img_current].end()) { - // const Backend::Landmark landmark_cam_1( - // landmark_cam_0.lmk_factor_symbol, sym_T_w_c1, - // gtsam::Point2( - // static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.x), - // static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.y)), - // backend_.get_K(), 3.0); - // keypoint_to_landmarks_[idx_img_current].emplace(kpt_cam1, landmark_cam_1); - // } - // const Backend::Landmark landmark_cam_1 = - // keypoint_to_landmarks_[idx_img_current].at(kpt_cam1); - backend_.add_landmark(landmark_cam_0); backend_.add_landmark(landmark_cam_1); } @@ -166,352 +132,19 @@ std::vector StructureFromMotion::get_matches( return matches; } -Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matcher) { - extractor_type_ = frontend_algorithm; - matcher_type_ = frontend_matcher; - - switch (extractor_type_) { - case ExtractorType::SIFT: - feature_extractor_ = cv::SIFT::create(); - break; - case ExtractorType::ORB: - feature_extractor_ = cv::ORB::create(); - break; - default: - // Error handling needed? - break; - } - switch (matcher_type_) { - case MatcherType::BRUTE_FORCE: - descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2); - break; - case MatcherType::KNN: - descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2); - break; - case MatcherType::FLANN: - if (frontend_algorithm == ExtractorType::ORB) { - throw std::invalid_argument("FLANN can not be used with ORB."); - } - descriptor_matcher_ = cv::FlannBasedMatcher::create(); - break; - default: - // Error handling needed? - break; - } -} - -std::pair, cv::Mat> Frontend::get_keypoints_and_descriptors( - const cv::Mat &img) const { - std::vector keypoints; - cv::Mat descriptors; - switch (extractor_type_) { - default: // the opencv extractors have the same function signature - feature_extractor_->detectAndCompute(img, cv::noArray(), keypoints, descriptors); - break; - } - return std::pair, cv::Mat>(keypoints, descriptors); -} - -std::vector Frontend::get_matches(const cv::Mat &descriptors1, - const cv::Mat &descriptors2) const { - std::vector matches; - switch (matcher_type_) { - case MatcherType::BRUTE_FORCE: - get_brute_matches(descriptors1, descriptors2, matches); - break; - case MatcherType::KNN: - get_KNN_matches(descriptors1, descriptors2, matches); - break; - case MatcherType::FLANN: - get_FLANN_matches(descriptors1, descriptors2, matches); - break; - default: - break; - } - std::sort(matches.begin(), matches.end()); - return matches; -} - -void Frontend::threshold_matches(std::vector &matches, float dist_threshhold) { - matches.erase(std::remove_if(matches.begin(), matches.end(), - [dist_threshhold](const cv::DMatch &match) { - return match.distance > dist_threshhold; - }), - matches.end()); -} - -void Frontend::enforce_bijective_matches(std::vector &matches) { - std::unordered_map bestQueryMatch; - std::unordered_map bestTrainMatch; - - for (const auto &match : matches) { - int queryIdx = match.queryIdx; - int trainIdx = match.trainIdx; - - if (bestQueryMatch.find(queryIdx) == bestQueryMatch.end() || - match.distance < bestQueryMatch[queryIdx].distance) { - bestQueryMatch[queryIdx] = match; - } - - if (bestTrainMatch.find(trainIdx) == bestTrainMatch.end() || - match.distance < bestTrainMatch[trainIdx].distance) { - bestTrainMatch[trainIdx] = match; - } - } - - matches.erase(std::remove_if(matches.begin(), matches.end(), - [&bestQueryMatch, &bestTrainMatch](const cv::DMatch &match) { - int queryIdx = match.queryIdx; - int trainIdx = match.trainIdx; - - return bestQueryMatch[queryIdx].trainIdx != trainIdx || - bestTrainMatch[trainIdx].queryIdx != queryIdx; - }), - matches.end()); -} - -bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const { - if (matcher_type_ != MatcherType::BRUTE_FORCE) { - return false; - } - matches_out.clear(); - descriptor_matcher_->match(descriptors1, descriptors2, matches_out); - return true; -} - -bool Frontend::get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const { - if (matcher_type_ != MatcherType::KNN) { - return false; - } - std::vector> knn_matches; - descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2); - const float ratio_thresh = 0.7f; - matches_out.clear(); - // Lowe's Ratio Test - for (size_t i = 0; i < knn_matches.size(); i++) { - if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) { - matches_out.push_back(knn_matches[i][0]); - } - } - return true; -} - -bool Frontend::get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const { - if (matcher_type_ != MatcherType::FLANN) { - return false; - } - std::vector> knn_matches; - descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2); - const float ratio_thresh = 0.7f; - matches_out.clear(); - for (size_t i = 0; i < knn_matches.size(); i++) { - if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) { - matches_out.push_back(knn_matches[i][0]); - } - } - return true; -} - -Backend::Backend() { - const size_t img_width = 640; - const size_t img_height = 480; - const double fx = 500.0; - const double fy = fx; - const double cx = img_width / 2.0; - const double cy = img_height / 2.0; - - gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); - K_ = boost::make_shared(K); - // initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K); -} - -Backend::Backend(gtsam::Cal3_S2 K) { - K_ = boost::make_shared(K); - // initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K); -} - -template <> -void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value, - const gtsam::SharedNoiseModel &noise) { - graph_.emplace_shared>(symbol, value, noise); - initial_estimate_.insert_or_assign(symbol, value); - // std::cout << "adding a prior factor! with symbol: " << symbol << std::endl; - // initial_estimate_.print("values after adding prior: "); -} - -template <> -void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Point3 &value, - const gtsam::SharedNoiseModel &noise) { - graph_.emplace_shared>(symbol, value, noise); - gtsam::Rot3 R; - if (initial_estimate_.exists(symbol)) { - R = initial_estimate_.at(symbol).rotation(); - } - initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R, value)); -} - -template <> -void Backend::add_between_factor(const gtsam::Symbol &symbol_1, - const gtsam::Symbol &symbol_2, - const gtsam::Pose3 &value, - const gtsam::SharedNoiseModel &model) { - graph_.emplace_shared>(symbol_1, symbol_2, value, model); - // std::cout << "adding between factor. symbol_1: " << symbol_1 << ". symbol_2: " << symbol_2 << - // std::endl; initial_estimate_.print("values when adding between factor: "); - initial_estimate_.insert_or_assign(symbol_2, - initial_estimate_.at(symbol_1).compose(value)); -} - -template <> -void Backend::add_between_factor(const gtsam::Symbol &symbol_1, - const gtsam::Symbol &symbol_2, - const gtsam::Rot3 &value, - const gtsam::SharedNoiseModel &model) { - graph_.emplace_shared>(symbol_1, symbol_2, value, model); - initial_estimate_.insert_or_assign(symbol_2, - initial_estimate_.at(symbol_1).compose( - gtsam::Pose3(value, gtsam::Point3::Zero()))); -} - -void Backend::add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &t_world_cam, - const gtsam::SharedNoiseModel &model, const gtsam::Rot3 &R_world_cam) { - graph_.emplace_shared(symbol, t_world_cam, model); - initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R_world_cam, t_world_cam)); -} - -std::pair, std::vector> Backend::get_obs_for_lmk( - const gtsam::Symbol &lmk_symbol) { - std::vector cam_poses; - std::vector observations; - - // Iterate over all factors in the graph - for (const auto &factor : graph_) { - auto projFactor = boost::dynamic_pointer_cast< - gtsam::GenericProjectionFactor>(factor); - - if (projFactor && projFactor->keys().at(1) == lmk_symbol) { - // Get the camera pose symbol - gtsam::Symbol cameraSymbol = projFactor->keys().at(0); - - // Retrieve the camera pose from values - if (!initial_estimate_.exists(cameraSymbol)) continue; - gtsam::Pose3 cameraPose = initial_estimate_.at(cameraSymbol); - - // Get the 2D observation (keypoint measurement) - gtsam::Point2 observation = projFactor->measured(); - - // Store the pose and corresponding observation - observations.push_back(observation); - cam_poses.push_back(cameraPose); - } - } - return std::pair, std::vector>(cam_poses, - observations); -} - -void Backend::add_landmarks(const std::vector &landmarks) { - for (const Landmark &landmark : landmarks) { - add_landmark(landmark); - } -} - -void Backend::add_landmark(const Landmark &landmark) { - graph_.emplace_shared>( - landmark.projection, landmark_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol, - K_); - if (!initial_estimate_.exists(landmark.cam_pose_symbol)) { - throw std::runtime_error( - "landmark.cam_pose_symbol must already exist in Backend.initial_estimate_ before " - "add_landmark is called."); - } - gtsam::Point3 p_world_lmk_estimate = - initial_estimate_.at(landmark.cam_pose_symbol) * landmark.p_cam_lmk_guess; - initial_estimate_.insert_or_assign(landmark.lmk_factor_symbol, p_world_lmk_estimate); - - std::pair, std::vector> lmk_obs = - get_obs_for_lmk(landmark.lmk_factor_symbol); - if (lmk_obs.first.size() >= 2) { - try { - // Attempt triangulation using DLT (or the GTSAM provided method) - p_world_lmk_estimate = gtsam::triangulatePoint3( - lmk_obs.first, K_, - gtsam::Point2Vector(lmk_obs.second.begin(), lmk_obs.second.end())); - - // Optional: perform an explicit cheirality check - bool valid = true; - for (const auto &pose : lmk_obs.first) { - // Transform point to the camera coordinate system. - // transformTo() converts a world point to the camera frame. - gtsam::Point3 p_cam = pose.transformTo(p_world_lmk_estimate); - if (p_cam.z() <= 0) { // Check that the depth is positive - valid = false; - break; - } - } - - if (valid) { - initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate); - } else { - std::cerr << "Triangulated landmark failed cheirality check; keeping initial guess." - << std::endl; - } - } catch (const gtsam::TriangulationCheiralityException &e) { - // Handle the exception gracefully by logging and retaining the previous estimate. - std::cerr << "Triangulation Cheirality Exception: " << e.what() - << ". Keeping initial landmark estimate." << std::endl; - } - } -} - -void Backend::solve_graph() { - gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_); - result_ = optimizer.optimize(); -} - -void Backend::solve_graph(const int num_steps, - std::optional inter_debug_func) { - gtsam::LevenbergMarquardtParams params; - params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. - params.maxIterations = 1; // We'll manually step it - gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, params); - - double prev_error = optimizer.error(); - for (int i = 0; i < num_steps; i++) { - optimizer.iterate(); - double curr_error = optimizer.error(); - - if (inter_debug_func) { - (*inter_debug_func)(optimizer.values(), i); - } - if (std::abs(prev_error - curr_error) < 1e-6) { - std::cout << "Converged at iteration " << i << "\n"; - break; - } - } - result_ = optimizer.values(); -} - void StructureFromMotion::graph_values(const gtsam::Values &values, const std::string &window_name) { std::vector final_poses; std::vector final_lmks; - for (size_t i = 0; i < img_keypoints_and_descriptors_.size(); i++) { + for (size_t i = 0; i < feature_manager_->get_num_images_added(); i++) { final_poses.emplace_back( values.at(gtsam::Symbol(get_backend().pose_symbol_char, i)).matrix()); } - for (int i = 0; i < static_cast(get_matches().size()); i++) { - // NOTE: this j for lmk_symbol is wrong. - for (int j = 0; j < static_cast(get_matches()[i].size()); j++) { - gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j); - if (values.exists(lmk_symbol)) { - final_lmks.emplace_back(values.at(lmk_symbol)); - } else { - std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl; - } + for (const gtsam::Symbol &lmk_symbol : feature_manager_->get_added_symbols()) { + if (!values.exists(lmk_symbol)) { + std::cout << "WTF " << lmk_symbol << std::endl; } + final_lmks.emplace_back(values.at(lmk_symbol)); } geometry::viz_scene(final_poses, final_lmks, true, true, window_name); } diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh index ada233108..a4e3850d2 100644 --- a/experimental/learn_descriptors/structure_from_motion.hh +++ b/experimental/learn_descriptors/structure_from_motion.hh @@ -2,181 +2,23 @@ #include #include +#include +#include #include #include #include -#include "Eigen/Core" +#include "Eigen/Dense" #include "common/geometry/camera.hh" +#include "experimental/learn_descriptors/backend.hh" #include "experimental/learn_descriptors/feature_manager.hh" -#include "gtsam/geometry/Point3.h" +#include "experimental/learn_descriptors/frontend.hh" #include "gtsam/geometry/Pose3.h" #include "gtsam/inference/Symbol.h" -#include "gtsam/linear/NoiseModel.h" -#include "gtsam/navigation/GPSFactor.h" -#include "gtsam/nonlinear/NonlinearFactorGraph.h" #include "gtsam/nonlinear/Values.h" #include "opencv2/opencv.hpp" -// namespace std { -// template <> -// struct hash { -// size_t operator()(const cv::KeyPoint &kp) const { -// size_t h1 = hash()(kp.pt.x); -// size_t h2 = hash()(kp.pt.y); -// size_t h3 = hash()(kp.size); -// size_t h4 = hash()(kp.angle); -// size_t h5 = hash()(kp.response); -// size_t h6 = hash()(kp.octave); -// size_t h7 = hash()(kp.class_id); - -// return (h1 ^ (h2 << 1)) ^ (h3 << 2) ^ (h4 << 3) ^ (h5 << 4) ^ (h6 << 5) ^ (h7 << 6); -// } -// }; -// } // namespace std -// namespace cv { -// inline bool operator==(const cv::KeyPoint &kp1, const cv::KeyPoint &kp2) { -// return kp1.pt == kp2.pt && kp1.size == kp2.size && kp1.angle == kp2.angle && -// kp1.response == kp2.response && kp1.octave == kp2.octave && kp1.class_id == -// kp2.class_id; -// } -// } // namespace cv namespace robot::experimental::learn_descriptors { -class Frontend { - public: - enum class ExtractorType { SIFT, ORB }; - enum class MatcherType { BRUTE_FORCE, KNN, FLANN }; - - Frontend(){}; - Frontend(ExtractorType frontend_extractor, MatcherType frontend_matcher); - ~Frontend(){}; - - const ExtractorType &get_extractor_type() const { return extractor_type_; }; - const MatcherType &get_matcher_type() const { return matcher_type_; }; - - std::pair, cv::Mat> get_keypoints_and_descriptors( - const cv::Mat &img) const; - std::vector get_matches(const cv::Mat &descriptors1, - const cv::Mat &descriptors2) const; - static void threshold_matches(std::vector &matches, float dist_threshhold); - static void enforce_bijective_matches(std::vector &matches); - static void draw_keypoints(const cv::Mat &img, std::vector keypoints, - cv::Mat img_keypoints_out) { - cv::drawKeypoints(img, keypoints, img_keypoints_out, cv::Scalar::all(-1), - cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS); - } - static void draw_matches(const cv::Mat &img1, std::vector keypoints1, - const cv::Mat &img2, std::vector keypoints2, - std::vector matches, cv::Mat img_matches_out) { - cv::drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches_out); - } - - private: - bool get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const; - bool get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const; - bool get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const; - ExtractorType extractor_type_; - MatcherType matcher_type_; - - cv::Ptr feature_extractor_; - cv::Ptr descriptor_matcher_; -}; -class Backend { - public: - static constexpr char pose_symbol_char = 'x'; - static constexpr char pose_rot_symbol_char = 'r'; - static constexpr char pose_translation_symbol_char = 't'; - static constexpr char pose_bearing_symbol_char = 'b'; - static constexpr char landmark_symbol_char = 'l'; - static constexpr char camera_symbol_char = 'k'; - - struct Landmark { - Landmark(const gtsam::Symbol &lmk_factor_symbol, const gtsam::Symbol &cam_pose_symbol, - const gtsam::Point2 &projection, const gtsam::Cal3_S2 K, - float initial_depth_guess = 5.0) - : lmk_factor_symbol(lmk_factor_symbol), - cam_pose_symbol(cam_pose_symbol), - projection(projection), - p_cam_lmk_guess(robot::geometry::deproject(robot::geometry::get_intrinsic_matrix(K), - projection, initial_depth_guess)){}; - const gtsam::Symbol lmk_factor_symbol; - const gtsam::Symbol cam_pose_symbol; - const gtsam::Point2 projection; - const gtsam::Point3 p_cam_lmk_guess; - }; - - Backend(); - Backend(gtsam::Cal3_S2 K); - ~Backend(){}; - - template - void add_prior_factor(const gtsam::Symbol &symbol, const T &value, - const gtsam::SharedNoiseModel &model); - - template - void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, - const T &value, const gtsam::SharedNoiseModel &model); - - void add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &p_world_gps, - const gtsam::SharedNoiseModel &model, - const gtsam::Rot3 &R_world_cam = gtsam::Rot3::Identity()); - - std::pair, std::vector> get_obs_for_lmk( - const gtsam::Symbol &lmk_symbol); - void add_landmarks(const std::vector &landmarks); - void add_landmark(const Landmark &landmark); - - void solve_graph(); - typedef int epoch; - using graph_step_debug_func = std::function; - void solve_graph(const int num_steps, - std::optional inter_debug_func = std::nullopt); - - const gtsam::Values &get_current_initial_values() const { return initial_estimate_; }; - const gtsam::Values &get_result() const { return result_; }; - const gtsam::Cal3_S2 &get_K() const { return *K_; }; - - const gtsam::SharedNoiseModel get_lmk_noise() { return landmark_noise_; }; - const gtsam::SharedNoiseModel get_pose_noise() { return pose_noise_; }; - const gtsam::SharedNoiseModel get_translation_noise() { return translation_noise_; }; - const gtsam::SharedNoiseModel get_gps_noise() { return gps_noise_; }; - - private: - gtsam::Cal3_S2::shared_ptr K_; - - gtsam::Values initial_estimate_; - gtsam::Values result_; - gtsam::NonlinearFactorGraph graph_; - - gtsam::noiseModel::Isotropic::shared_ptr landmark_noise_ = - gtsam::noiseModel::Isotropic::Sigma(2, 1.0); - gtsam::noiseModel::Diagonal::shared_ptr pose_noise_ = - gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01)); - gtsam::noiseModel::Isotropic::shared_ptr translation_noise_ = - gtsam::noiseModel::Isotropic::Sigma(2, 0.1); - gtsam::noiseModel::Isotropic::shared_ptr gps_noise_ = - gtsam::noiseModel::Isotropic::Sigma(3, 2.); -}; - -template <> -void Backend::add_prior_factor(const gtsam::Symbol &, const gtsam::Pose3 &, - const gtsam::SharedNoiseModel &); - -template <> -void Backend::add_prior_factor(const gtsam::Symbol &, const gtsam::Point3 &, - const gtsam::SharedNoiseModel &); - -template <> -void Backend::add_between_factor(const gtsam::Symbol &, const gtsam::Symbol &, - const gtsam::Pose3 &, - const gtsam::SharedNoiseModel &); -template <> -void Backend::add_between_factor(const gtsam::Symbol &, const gtsam::Symbol &, - const gtsam::Rot3 &, const gtsam::SharedNoiseModel &); - class StructureFromMotion { public: static const Eigen::Isometry3d T_symlake_boat_cam; @@ -210,7 +52,7 @@ class StructureFromMotion { const std::vector> get_matches() { return matches_; }; private: - FeatureManager feature_manager_; + std::shared_ptr feature_manager_; gtsam::Pose3 initial_pose_; std::vector, cv::Mat>> img_keypoints_and_descriptors_; diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc index c856abd71..3da914853 100644 --- a/experimental/learn_descriptors/structure_from_motion_test.cc +++ b/experimental/learn_descriptors/structure_from_motion_test.cc @@ -18,120 +18,7 @@ class GtsamTestHelper { }; namespace robot::experimental::learn_descriptors { -TEST(SFM_TEST, frontend_pipeline_sweep) { - const size_t width = 640; - const size_t height = 480; - - const size_t pixel_shift_x = 20; - const size_t PIXEL_COMP_TOL = 20; - - cv::Mat image_1 = cv::Mat::zeros(height, width, CV_8UC3); - cv::Mat image_2; - - cv::Mat translation_mat = (cv::Mat_(2, 3) << 1, 0, pixel_shift_x, 0, 1, 0); - cv::Point rect_points[4] = {{150, 200}, {350, 200}, {350, 300}, {150, 300}}; - float com_x = 0.0f, com_y = 0.0f; - for (const cv::Point &rect_point : rect_points) { - com_x += rect_point.x; - com_y += rect_point.y; - } - com_x *= 0.25; - com_y *= 0.25; - cv::Point rotation_center(com_x, com_y); - cv::Mat rotation_matrix = cv::getRotationMatrix2D(rotation_center, 45, 1.0); - - const size_t line_spacing = 100; - for (size_t i = 0; i <= width / line_spacing; i++) { - size_t x = i * line_spacing + (width % line_spacing) / 2; - size_t b = i * 255.0 / (width / line_spacing); - size_t g = 255.0 - i * 255.0 / (width / line_spacing); - cv::line(image_1, cv::Point(x, 0), cv::Point(x, height - 1), cv::Scalar(b, g, 0), 2); - } - for (size_t i = 0; i <= height / line_spacing; i++) { - size_t y = i * line_spacing + (height % line_spacing) / 2; - size_t b = i * 255.0 / (width / line_spacing); - size_t g = 255.0 - i * 255.0 / (width / line_spacing); - cv::line(image_1, cv::Point(0, y), cv::Point(width - 1, y), cv::Scalar(b, g, 0), 2); - } - - cv::warpAffine(image_1, image_1, rotation_matrix, image_1.size()); - cv::warpAffine(image_1, image_2, translation_mat, image_1.size()); - - // cv::Mat img_test_disp; - // cv::hconcat(image_1, image_2, img_test_disp); - // cv::imshow("Test", img_test_disp); - // cv::waitKey(1000); - - Frontend::ExtractorType extractor_types[2] = {Frontend::ExtractorType::SIFT, - Frontend::ExtractorType::ORB}; - Frontend::MatcherType matcher_types[3] = {Frontend::MatcherType::BRUTE_FORCE, - Frontend::MatcherType::FLANN, - Frontend::MatcherType::KNN}; - - Frontend frontend; - std::pair, cv::Mat> keypoints_descriptors_pair_1; - std::pair, cv::Mat> keypoints_descriptors_pair_2; - std::vector matches; - cv::Mat img_keypoints_out_1(height, width, CV_8UC3), - img_keypoints_out_2(height, width, CV_8UC3), img_matches_out(height, 2 * width, CV_8UC3); - // cv::Mat img_display_test; - for (Frontend::ExtractorType extractor_type : extractor_types) { - for (Frontend::MatcherType matcher_type : matcher_types) { - // printf("started frontend combination: (%d, %d)\n", static_cast(extractor_type), - // static_cast(matcher_type)); - try { - frontend = Frontend(extractor_type, matcher_type); - } catch (const std::invalid_argument &e) { - assert(std::string(e.what()) == "FLANN can not be used with ORB."); // very jank... - continue; - } - keypoints_descriptors_pair_1 = frontend.get_keypoints_and_descriptors(image_1); - keypoints_descriptors_pair_2 = frontend.get_keypoints_and_descriptors(image_2); - matches = frontend.get_matches(keypoints_descriptors_pair_1.second, - keypoints_descriptors_pair_2.second); - frontend.draw_keypoints(image_1, keypoints_descriptors_pair_1.first, - img_keypoints_out_1); - frontend.draw_keypoints(image_2, keypoints_descriptors_pair_2.first, - img_keypoints_out_2); - frontend.draw_matches(image_1, keypoints_descriptors_pair_1.first, image_2, - keypoints_descriptors_pair_2.first, matches, img_matches_out); - // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test); - // cv::vconcat(img_display_test, img_matches_out, img_display_test); - // std::stringstream text; - // text << "Extractor " << static_cast(extractor_type) << ", matcher " - // << static_cast(matcher_type); - // cv::putText(img_display_test, text.str(), cv::Point(20, height - 50), - // cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2, cv::LINE_AA); - // cv::imshow("Keypoints and Matches Output.", img_display_test); - // std::cout << "Press spacebar to pause." << std::endl; - // while (cv::waitKey(1000) == 32) { - // } - // printf("completed frontend combination: (%d, %d)\n", - // static_cast(extractor_type), - // static_cast(matcher_type)); - if (extractor_type != Frontend::ExtractorType::ORB) { // don't check ORB for now - for (const cv::DMatch match : matches) { - EXPECT_NEAR(keypoints_descriptors_pair_1.first[match.queryIdx].pt.x - - keypoints_descriptors_pair_2.first[match.trainIdx].pt.x, - pixel_shift_x, pixel_shift_x + PIXEL_COMP_TOL); - EXPECT_NEAR(keypoints_descriptors_pair_2.first[match.trainIdx].pt.y - - keypoints_descriptors_pair_1.first[match.queryIdx].pt.y, - 0, PIXEL_COMP_TOL); - } - } - } - } -} - -// TEST(SFM_TEST, boat_and_cam_frames) { -// // world is boat frame. +x is boat "forward", +y is the right side, +z is "down" into the -// water/hull Eigen::Isometry3d T_boat_cam = StructureFromMotion::T_symlake_boat_cam; -// T_boat_cam.translation() = Eigen::Vector3d::Ones(); -// geometry::viz_scene(std::vector{T_boat_cam}, -// std::vector()); -// } - -TEST(SFM_TEST, sfm_snippet_small) { +TEST(StructureFromMotiontest, sfm_snippet_small) { const std::vector indices{120, 130}; // 0-199 DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); @@ -163,36 +50,12 @@ TEST(SFM_TEST, sfm_snippet_small) { Eigen::Isometry3d T_world_boat = T_earth_world.inverse() * T_earth_boat; Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt); - // T_world_cam.linear() = T_world_camera0.linear().matrix(); - sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix())); } - // for (const cv::Mat &image : images) { - // sfm.add_image(image); - // } const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values(); - std::vector poses_world; - for (size_t i = 0; i < indices.size(); i++) { - gtsam::Pose3 pose = - initial_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)); - poses_world.emplace_back(pose.matrix()); - } - std::vector points_world; - for (int i = 0; i < static_cast(sfm.get_matches().size()); i++) { - // NOTE: this j for lmk_symbol is wrong. - for (int j = 0; j < static_cast(sfm.get_matches()[i].size()); j++) { - gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j); - if (initial_values.exists(lmk_symbol)) { - points_world.emplace_back(initial_values.at(lmk_symbol)); - } else { - std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl; - } - } - } - // std::cout << poses_world.front() << std::endl; - geometry::viz_scene(poses_world, points_world, true, true); + sfm.graph_values(initial_values, "initial_values"); std::cout << "Solving for structure!" << std::endl; @@ -201,28 +64,10 @@ TEST(SFM_TEST, sfm_snippet_small) { std::cout << "Solution complete." << std::endl; const gtsam::Values result_values = sfm.get_structure_result(); - std::vector final_poses; - std::vector final_lmks; - for (size_t i = 0; i < indices.size(); i++) { - final_poses.emplace_back( - result_values.at(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)) - .matrix()); - } - for (int i = 0; i < static_cast(sfm.get_matches().size()); i++) { - // NOTE: this j for lmk_symbol is wrong. - for (int j = 0; j < static_cast(sfm.get_matches()[i].size()); j++) { - gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j); - if (initial_values.exists(lmk_symbol)) { - final_lmks.emplace_back(initial_values.at(lmk_symbol)); - } else { - std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl; - } - } - } - geometry::viz_scene(final_poses, final_lmks, true, true); + sfm.graph_values(result_values, "result_values"); } -TEST(SFM_TEST, sfm_building) { +TEST(StructureFromMotiontest, sfm_building) { // indices 0-199 const std::vector indices = []() { std::vector tmp; @@ -284,7 +129,7 @@ TEST(SFM_TEST, sfm_building) { sfm.graph_values(result_values, "optimized values"); } -TEST(SFM_TEST, random_test) { +TEST(StructureFromMotiontest, random_test) { std::cout << DataParser::T_boat_gps.matrix() << std::endl; geometry::viz_scene( std::vector{DataParser::T_boat_imu, DataParser::T_boat_gps}, From d586265c7d2d211cc16cde25c4e3f6fb7c7c83f7 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Fri, 18 Apr 2025 13:52:49 -0400 Subject: [PATCH 17/45] WIP. Focus on sfm_mvp.cc first, then fill out object-oriented structure for pipeline --- experimental/learn_descriptors/BUILD | 60 +++++- experimental/learn_descriptors/backend.hh | 9 +- experimental/learn_descriptors/frame.cc | 9 + experimental/learn_descriptors/frame.hh | 27 +++ .../learn_descriptors/frontend_definitions.hh | 42 ++++ experimental/learn_descriptors/sfm_mvp.cc | 197 ++++++++++++++++++ .../structure_from_motion.cc | 39 ++-- .../structure_from_motion.hh | 4 +- .../structure_from_motion_test.cc | 1 + .../structure_from_motion_types.hh | 19 ++ 10 files changed, 379 insertions(+), 28 deletions(-) create mode 100644 experimental/learn_descriptors/frame.cc create mode 100644 experimental/learn_descriptors/frame.hh create mode 100644 experimental/learn_descriptors/frontend_definitions.hh create mode 100644 experimental/learn_descriptors/sfm_mvp.cc create mode 100644 experimental/learn_descriptors/structure_from_motion_types.hh diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index e6701405d..9ebb247ec 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -29,7 +29,7 @@ cc_library( "@gtsam//:gtsam", "@eigen", "//common/geometry:opencv_viz", - ":feature_manager", + # ":feature_manager", ":frontend", ":backend" ] @@ -46,7 +46,8 @@ cc_test( ":symphony_lake_parser", ":structure_from_motion", "//common/geometry:opencv_viz", - "//common/geometry:translate_types" + "//common/geometry:translate_types", + ":frame" ] ) @@ -96,6 +97,59 @@ cc_binary( ] ) +cc_library( + name = "frontend_definitions", + hdrs = ["frontend_definitions.hh"], + visibility = ["//visibility:public"], + deps = [ + "@gtsam//:gtsam", + ":structure_from_motion_types" + ] +) + +cc_library( + name = "structure_from_motion_types", + hdrs = ["structure_from_motion_types.hh"], + visibility = ["//visibility:public"], + deps = [ + "@gtsam//:gtsam", + "@opencv//:opencv" + ] +) + +cc_library( + name = "frame", + hdrs = ["frame.hh"], + srcs = ["frame.cc"], + visibility = ["//visibility:public"], + deps = [ + "@opencv//:opencv", + ":frontend_definitions", + "@gtsam//:gtsam" + ] +) + +cc_test( + name = "sfm_mvp", + srcs = ["sfm_mvp.cc"], + copts = [ + "-Wno-error=unused-parameter", + "-Wno-error=unused-variable", + ], + deps = [ + ":frontend", + "@com_google_googletest//:gtest_main", + "@gtsam//:gtsam", + "@opencv//:opencv", + "@eigen", + ":spatial_test_scene_cube", + ":symphony_lake_parser", + ":frame", + ":structure_from_motion_types" + ] +) + + cc_library( name = "feature_set", hdrs = ["feature_set.hh"], @@ -199,7 +253,7 @@ cc_library( "@opencv//:opencv", "@gtsam//:gtsam", "//common/geometry:camera", - ":feature_manager" + # ":feature_manager" ] ) diff --git a/experimental/learn_descriptors/backend.hh b/experimental/learn_descriptors/backend.hh index fa988b905..968bb26aa 100644 --- a/experimental/learn_descriptors/backend.hh +++ b/experimental/learn_descriptors/backend.hh @@ -6,7 +6,7 @@ #include #include "common/geometry/camera.hh" -#include "experimental/learn_descriptors/feature_manager.hh" +// #include "experimental/learn_descriptors/feature_manager.hh" #include "gtsam/geometry/Cal3_S2.h" #include "gtsam/geometry/Point2.h" #include "gtsam/geometry/Point3.h" @@ -42,8 +42,9 @@ class Backend { const gtsam::Point3 p_cam_lmk_guess; }; - Backend(std::shared_ptr feature_manager = nullptr); - Backend(std::shared_ptr feature_manager, gtsam::Cal3_S2 K); + // Backend(std::shared_ptr feature_manager = nullptr); + // Backend(std::shared_ptr feature_manager, gtsam::Cal3_S2 K); + Backend(gtsam::Cal3_S2::shared_ptr K) : K_(K){}; ~Backend(){}; template @@ -79,7 +80,7 @@ class Backend { const gtsam::SharedNoiseModel get_gps_noise() { return gps_noise_; }; private: - std::shared_ptr feature_manager_; + // std::shared_ptr feature_manager_; gtsam::Cal3_S2::shared_ptr K_; gtsam::Values initial_estimate_; diff --git a/experimental/learn_descriptors/frame.cc b/experimental/learn_descriptors/frame.cc new file mode 100644 index 000000000..139c27858 --- /dev/null +++ b/experimental/learn_descriptors/frame.cc @@ -0,0 +1,9 @@ +#include "experimental/learn_descriptors/frame.hh" + +namespace robot::experimental::learn_descriptors { +void Frame::add_keypoints(const KeypointsCV& kpts) { + kpts_.insert(kpts_.end(), kpts.begin(), kpts.end()); +}; + +void Frame::assign_descriptors(const cv::Mat& descriptors) { descriptors_ = descriptors; }; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/frame.hh b/experimental/learn_descriptors/frame.hh new file mode 100644 index 000000000..720aa1f59 --- /dev/null +++ b/experimental/learn_descriptors/frame.hh @@ -0,0 +1,27 @@ +#pragma once +#include "experimental/learn_descriptors/frontend_definitions.hh" +#include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/Pose3.h" +#include "opencv2/opencv.hpp" + +namespace robot::experimental::learn_descriptors { +class Frame { + public: + Frame(const FrameId& id, const cv::Mat& img_undistorted, const gtsam::Cal3_S2::shared_ptr& K, + const gtsam::Pose3& T_wrld_grnd_truth) + : id_(id), img_(img_undistorted), K_(K), groundtruth_(T_wrld_grnd_truth) {} + + void add_keypoints(const KeypointsCV& kpts); + void assign_descriptors(const cv::Mat& descriptors); + + const KeypointsCV get_keypoints() { return kpts_; }; + const cv::Mat get_descriptors() { return descriptors_; }; + + const FrameId id_; + const cv::Mat img_; + gtsam::Cal3_S2::shared_ptr K_; + KeypointsCV kpts_; + cv::Mat descriptors_; + gtsam::Pose3 groundtruth_; +}; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/frontend_definitions.hh b/experimental/learn_descriptors/frontend_definitions.hh new file mode 100644 index 000000000..9883230b3 --- /dev/null +++ b/experimental/learn_descriptors/frontend_definitions.hh @@ -0,0 +1,42 @@ +#pragma once + +#include +#include +#include + +#include "experimental/learn_descriptors/structure_from_motion_types.hh" +#include "opencv2/opencv.hpp" + +namespace std { +template <> +struct hash> { + size_t operator()(const std::pair& p) const { + size_t h1 = std::hash()(p.first); + size_t h2 = std::hash()(p.second.x) ^ (std::hash()(p.second.y) << 1); + return h1 ^ (h2 << 1); // combine hashes + } +}; +} // namespace std + +namespace robot::experimental::learn_descriptors { +class FeatureTrack { + public: + std::vector> obs_; + + bool in_ba_graph_; + + FeatureTrack(FrameId frame_id, const KeypointCV& px) { obs_.emplace_back(frame_id, px); } + + void print() const { + std::cout << "Feature track with cameras: "; + for (size_t i = 0u; i < obs_.size(); i++) { + std::cout << " " << obs_[i].first << " "; + } + std::cout << std::endl; + } +}; + +using FeatureTracks = std::unordered_map; +using LandmarkIdMap = std::unordered_map, LandmarkId>; + +} // namespace robot::experimental::learn_descriptors diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc new file mode 100644 index 000000000..3df433238 --- /dev/null +++ b/experimental/learn_descriptors/sfm_mvp.cc @@ -0,0 +1,197 @@ +#include + +#include "Eigen/Dense" +#include "experimental/learn_descriptors/frame.hh" +#include "experimental/learn_descriptors/frontend.hh" +#include "experimental/learn_descriptors/frontend_definitions.hh" +#include "experimental/learn_descriptors/structure_from_motion_types.hh" +#include "experimental/learn_descriptors/symphony_lake_parser.hh" +#include "gtest/gtest.h" +#include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/Point2.h" +#include "gtsam/geometry/Point3.h" +#include "gtsam/geometry/Pose3.h" +#include "gtsam/geometry/Rot3.h" +#include "gtsam/geometry/triangulation.h" +#include "gtsam/inference/Symbol.h" +#include "gtsam/linear/NoiseModel.h" +#include "gtsam/navigation/GPSFactor.h" +#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" +#include "gtsam/nonlinear/NonlinearFactorGraph.h" +#include "gtsam/nonlinear/Values.h" +#include "gtsam/slam/BetweenFactor.h" +#include "gtsam/slam/PriorFactor.h" +#include "gtsam/slam/ProjectionFactor.h" +#include "opencv2/opencv.hpp" + +namespace robot::experimental::learn_descriptors { +TEST(SFMMvp, sfm_building_manual) { + // indices 0-199 + const std::vector indices = []() { + std::vector tmp; + for (int i = 0; i < 200; i += 10) { + tmp.push_back(i); + } + return tmp; + }(); + DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); + const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); + const symphony_lake_dataset::Survey &survey = survey_vector.get(0); + const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front()); + + // const size_t img_width = img_pt_first.width, img_height = img_pt_first.height; + const double fx = img_pt_first.fx, fy = img_pt_first.fy; + const double cx = img_pt_first.cx, cy = img_pt_first.cy; + gtsam::Cal3_S2::shared_ptr K = boost::make_shared(fx, fy, 0, cx, cy); + Eigen::Matrix D = + (Eigen::Matrix() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2, + SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3) + .finished(); + cv::Mat K_mat = (cv::Mat_(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1); + cv::Mat D_mat = (cv::Mat_(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0)); + + // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera + Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first); + Eigen::Isometry3d T_world_boat0; + T_world_boat0.linear() = T_earth_boat0.linear(); + Eigen::Isometry3d T_world_camera0 = T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first); + // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, + // gtsam::Pose3(T_world_camera0.matrix())); + Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN); + + gtsam::Values initial_estimate_; + gtsam::NonlinearFactorGraph graph_; + + gtsam::noiseModel::Isotropic::shared_ptr landmark_noise = + gtsam::noiseModel::Isotropic::Sigma(2, 1.0); + gtsam::noiseModel::Diagonal::shared_ptr pose_noise = + gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01)); + gtsam::noiseModel::Isotropic::shared_ptr translation_noise = + gtsam::noiseModel::Isotropic::Sigma(2, 0.1); + gtsam::noiseModel::Isotropic::shared_ptr gps_noise = gtsam::noiseModel::Isotropic::Sigma(3, 2.); + + std::vector frames; + FrameId id = 0; + for (const int &idx : indices) { + const cv::Mat img = survey.loadImageByImageIndex(idx); + cv::Mat img_undistorted; + cv::undistort(img, img_undistorted, K_mat, D_mat); + const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); + + Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt); + T_world_boat.translation() -= T_earth_boat0.translation(); + Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt); + gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix()); + + graph_.emplace_shared>(gtsam::Symbol('x', id), + T_world_cam_gtsam, pose_noise); + + Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix())); + + std::pair, cv::Mat> kpts_descs = + frontend.get_keypoints_and_descriptors(img_undistorted); + KeypointsCV kpts; + for (auto kpt : kpts_descs.first) { + kpts.push_back(kpt.pt); + } + frame.add_keypoints(kpts); + frame.assign_descriptors(kpts_descs.second); + + frames.push_back(frame); + + id++; + } + + FeatureTracks feature_tracks; + LandmarkIdMap lmk_id_map; + LandmarkId lmk_id; + for (size_t i = 0; i < indices.size() - 1; i++) { + std::vector matches = + frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + frontend.enforce_bijective_matches(matches); + for (const cv::DMatch match : matches) { + const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; + + auto key = std::make_pair(i, kpt_cam0); + if (lmk_id_map.find(key) != lmk_id_map.end()) { + auto id = lmk_id_map.at(key); + feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0); + feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1); + } else { + FeatureTrack feature_track(i, kpt_cam0); + feature_track.obs_.emplace_back(i + 1, kpt_cam1); + feature_tracks.emplace(lmk_id, feature_track); + lmk_id_map.emplace(std::make_pair(i, kpt_cam0), lmk_id); + lmk_id++; + } + } + } + + std::vector> projection_factors; + for (const std::pair &lmk_feat : feature_tracks) { + // std::vector cam_poses; + // std::vector< + LandmarkId lmk_id = lmk_feat.first; + FeatureTrack feature_track = lmk_feat.second; + for (const std::pair &feat_track : feature_track.obs_) { + graph_.emplace_shared>( + feat_track.second, landmark_noise, gtsam::Symbol('x', feat_track.first), + gtsam::Symbol('l', lmk_id), K); + } + } + + if (feature_track.obs_.size() >= 2) { + try { + // Attempt triangulation using DLT (or the GTSAM provided method) + gtsam::Point3 p_ = gtsam::triangulatePoint3( + lmk_obs.first, K, + gtsam::Point2Vector(lmk_obs.second.begin(), lmk_obs.second.end())); + + // Optional: perform an explicit cheirality check + bool valid = true; + for (const auto &pose : lmk_obs.first) { + // Transform point to the camera coordinate system. + // transformTo() converts a world point to the camera frame. + gtsam::Point3 p_cam = pose.transformTo(p_world_lmk_estimate); + if (p_cam.z() <= 0) { // Check that the depth is positive + valid = false; + break; + } + } + + if (valid) { + initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate); + } else { + std::cerr << "Triangulated landmark failed cheirality check; keeping " + "initial guess." + << std::endl; + } + } catch (const gtsam::TriangulationCheiralityException &e) { + // Handle the exception gracefully by logging and retaining the previous + // estimate. + std::cerr << "Triangulation Cheirality Exception: " << e.what() + << ". Keeping initial landmark estimate." << std::endl; + } + } +} + +// const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values(); +// sfm.graph_values(initial_values, "initial values"); + +// std::cout << "Solving for structure!" << std::endl; + +// Backend::graph_step_debug_func solve_iter_debug_func = [&sfm](const gtsam::Values &vals, +// const Backend::epoch iter) { +// std::cout << "iteration " << iter << " complete!"; +// std::string window_name = "Iteration_" + std::to_string(iter); +// sfm.graph_values(vals, window_name); +// }; +// sfm.solve_structure(5, solve_iter_debug_func); + +// std::cout << "Solution complete." << std::endl; + +// const gtsam::Values result_values = sfm.get_structure_result(); +// sfm.graph_values(result_values, "optimized values"); +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index b28d8f009..5db771119 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -35,10 +35,11 @@ const gtsam::Pose3 StructureFromMotion::default_initial_pose( StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, Eigen::Matrix D, gtsam::Pose3 initial_pose, - Frontend::MatcherType frontend_matcher) - : feature_manager_(std::make_shared()), initial_pose_(initial_pose) { + Frontend::MatcherType frontend_matcher) { + // : feature_manager_(std::make_shared()), initial_pose_(initial_pose) { frontend_ = Frontend(frontend_extractor, frontend_matcher); - backend_ = Backend(feature_manager_, K); + backend_ = Backend(K); + // backend_ = Backend(feature_manager_, K); K_ = (cv::Mat_(3, 3) << K.fx(), 0, K.px(), 0, K.fy(), K.py(), 0, 0, 1); D_ = (cv::Mat_(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0)); @@ -56,11 +57,11 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo cv::undistort(img, img_undistorted, K_, D_); std::pair, cv::Mat> keypoints_and_descriptors = frontend_.get_keypoints_and_descriptors(img); - feature_manager_->append_img_data(keypoints_and_descriptors.first, - keypoints_and_descriptors.second); + // feature_manager_->append_img_data(keypoints_and_descriptors.first, + // keypoints_and_descriptors.second); // keypoint_to_landmarks_.push_back(std::unordered_map()); // const size_t idx_img_current = get_num_images_added(); - const size_t idx_img_current = feature_manager_->get_num_images_added() - 1; + // const size_t idx_img_current = feature_manager_->get_num_images_added() - 1; std::cout << "current index " << idx_img_current << std::endl; if (idx_img_current > 0) { std::vector matches = @@ -81,7 +82,7 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo size_t idx; unsigned char chr; - auto maybe_symbol0 = feature_manager_->get_symbol(idx_img_current - 1, kpt_cam0); + // auto maybe_symbol0 = feature_manager_->get_symbol(idx_img_current - 1, kpt_cam0); if (maybe_symbol0) { idx = (*maybe_symbol0).index(); chr = (*maybe_symbol0).chr(); @@ -91,12 +92,12 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo idx = symbol_temp.index(); chr = symbol_temp.chr(); landmark_count_++; - feature_manager_->insert_symbol(idx_img_current - 1, kpt_cam0, symbol_temp); + // feature_manager_->insert_symbol(idx_img_current - 1, kpt_cam0, symbol_temp); } gtsam::Symbol symbol_lmk = gtsam::Symbol(chr, idx); std::cout << "value: char: " << chr << " , idx: " << idx << ". " << symbol_lmk << std::endl; - feature_manager_->insert_symbol(idx_img_current, kpt_cam1, symbol_lmk); + // feature_manager_->insert_symbol(idx_img_current, kpt_cam1, symbol_lmk); const Backend::Landmark landmark_cam_0( symbol_lmk, sym_T_w_c0, @@ -136,16 +137,16 @@ void StructureFromMotion::graph_values(const gtsam::Values &values, const std::string &window_name) { std::vector final_poses; std::vector final_lmks; - for (size_t i = 0; i < feature_manager_->get_num_images_added(); i++) { - final_poses.emplace_back( - values.at(gtsam::Symbol(get_backend().pose_symbol_char, i)).matrix()); - } - for (const gtsam::Symbol &lmk_symbol : feature_manager_->get_added_symbols()) { - if (!values.exists(lmk_symbol)) { - std::cout << "WTF " << lmk_symbol << std::endl; - } - final_lmks.emplace_back(values.at(lmk_symbol)); - } + // for (size_t i = 0; i < feature_manager_->get_num_images_added(); i++) { + // final_poses.emplace_back( + // values.at(gtsam::Symbol(get_backend().pose_symbol_char, i)).matrix()); + // } + // for (const gtsam::Symbol &lmk_symbol : feature_manager_->get_added_symbols()) { + // if (!values.exists(lmk_symbol)) { + // std::cout << "WTF " << lmk_symbol << std::endl; + // } + // final_lmks.emplace_back(values.at(lmk_symbol)); + // } geometry::viz_scene(final_poses, final_lmks, true, true, window_name); } diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh index a4e3850d2..bed65c15f 100644 --- a/experimental/learn_descriptors/structure_from_motion.hh +++ b/experimental/learn_descriptors/structure_from_motion.hh @@ -11,7 +11,7 @@ #include "Eigen/Dense" #include "common/geometry/camera.hh" #include "experimental/learn_descriptors/backend.hh" -#include "experimental/learn_descriptors/feature_manager.hh" +// #include "experimental/learn_descriptors/feature_manager.hh" #include "experimental/learn_descriptors/frontend.hh" #include "gtsam/geometry/Pose3.h" #include "gtsam/inference/Symbol.h" @@ -52,7 +52,7 @@ class StructureFromMotion { const std::vector> get_matches() { return matches_; }; private: - std::shared_ptr feature_manager_; + // std::shared_ptr feature_manager_; gtsam::Pose3 initial_pose_; std::vector, cv::Mat>> img_keypoints_and_descriptors_; diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc index 3da914853..752daa6a3 100644 --- a/experimental/learn_descriptors/structure_from_motion_test.cc +++ b/experimental/learn_descriptors/structure_from_motion_test.cc @@ -7,6 +7,7 @@ #include "Eigen/Geometry" #include "common/geometry/opencv_viz.hh" #include "common/geometry/translate_types.hh" +#include "experimental/learn_descriptors/frame.hh" #include "experimental/learn_descriptors/symphony_lake_parser.hh" #include "gtest/gtest.h" diff --git a/experimental/learn_descriptors/structure_from_motion_types.hh b/experimental/learn_descriptors/structure_from_motion_types.hh new file mode 100644 index 000000000..582a3c70a --- /dev/null +++ b/experimental/learn_descriptors/structure_from_motion_types.hh @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + +#include "gtsam/geometry/Point3.h" +#include "opencv2/opencv.hpp" + +using Timestamp = std::int64_t; + +using FrameId = std::uint64_t; // Frame id is used as the index of gtsam symbol + // (not as a gtsam key). +using LandmarkId = long; // -1 for invalid landmarks +using LandmarkIds = std::vector; +using Landmark = gtsam::Point3; +using Landmarks = std::vector>; + +using KeypointCV = cv::Point2f; +using KeypointsCV = std::vector; From a8cc405503c312199cf33e7fe71967ce2a663b42 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Fri, 18 Apr 2025 21:27:00 -0400 Subject: [PATCH 18/45] Debug time --- experimental/learn_descriptors/BUILD | 3 +- experimental/learn_descriptors/sfm_mvp.cc | 401 ++++++++++++++-------- 2 files changed, 260 insertions(+), 144 deletions(-) diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index 9ebb247ec..681ebde27 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -145,7 +145,8 @@ cc_test( ":spatial_test_scene_cube", ":symphony_lake_parser", ":frame", - ":structure_from_motion_types" + ":structure_from_motion_types", + "//common/geometry:opencv_viz" ] ) diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc index 3df433238..b2686f7b5 100644 --- a/experimental/learn_descriptors/sfm_mvp.cc +++ b/experimental/learn_descriptors/sfm_mvp.cc @@ -1,6 +1,8 @@ +#include #include #include "Eigen/Dense" +#include "common/geometry/opencv_viz.hh" #include "experimental/learn_descriptors/frame.hh" #include "experimental/learn_descriptors/frontend.hh" #include "experimental/learn_descriptors/frontend_definitions.hh" @@ -25,164 +27,278 @@ #include "opencv2/opencv.hpp" namespace robot::experimental::learn_descriptors { -TEST(SFMMvp, sfm_building_manual) { - // indices 0-199 - const std::vector indices = []() { - std::vector tmp; - for (int i = 0; i < 200; i += 10) { - tmp.push_back(i); - } - return tmp; - }(); - DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); - const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); - const symphony_lake_dataset::Survey &survey = survey_vector.get(0); - const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front()); - - // const size_t img_width = img_pt_first.width, img_height = img_pt_first.height; - const double fx = img_pt_first.fx, fy = img_pt_first.fy; - const double cx = img_pt_first.cx, cy = img_pt_first.cy; - gtsam::Cal3_S2::shared_ptr K = boost::make_shared(fx, fy, 0, cx, cy); - Eigen::Matrix D = - (Eigen::Matrix() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2, - SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3) - .finished(); - cv::Mat K_mat = (cv::Mat_(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1); - cv::Mat D_mat = (cv::Mat_(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0)); - - // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera - Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first); - Eigen::Isometry3d T_world_boat0; - T_world_boat0.linear() = T_earth_boat0.linear(); - Eigen::Isometry3d T_world_camera0 = T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first); - // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, - // gtsam::Pose3(T_world_camera0.matrix())); - Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN); - - gtsam::Values initial_estimate_; - gtsam::NonlinearFactorGraph graph_; - - gtsam::noiseModel::Isotropic::shared_ptr landmark_noise = - gtsam::noiseModel::Isotropic::Sigma(2, 1.0); - gtsam::noiseModel::Diagonal::shared_ptr pose_noise = - gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01)); - gtsam::noiseModel::Isotropic::shared_ptr translation_noise = - gtsam::noiseModel::Isotropic::Sigma(2, 0.1); - gtsam::noiseModel::Isotropic::shared_ptr gps_noise = gtsam::noiseModel::Isotropic::Sigma(3, 2.); - - std::vector frames; - FrameId id = 0; - for (const int &idx : indices) { - const cv::Mat img = survey.loadImageByImageIndex(idx); - cv::Mat img_undistorted; - cv::undistort(img, img_undistorted, K_mat, D_mat); - const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); - - Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt); - T_world_boat.translation() -= T_earth_boat0.translation(); - Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt); - gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix()); - - graph_.emplace_shared>(gtsam::Symbol('x', id), - T_world_cam_gtsam, pose_noise); - - Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix())); - - std::pair, cv::Mat> kpts_descs = - frontend.get_keypoints_and_descriptors(img_undistorted); - KeypointsCV kpts; - for (auto kpt : kpts_descs.first) { - kpts.push_back(kpt.pt); +class SFMMvpHelper { + public: + static bool attempt_triangulate(const std::vector &cam_poses, + const std::vector &cam_obs, + gtsam::Cal3_S2::shared_ptr K, + gtsam::Point3 &out_p_world_landmark) { + bool valid = true; + if (cam_poses.size() >= 2) { + try { + // Attempt triangulation using DLT (or the GTSAM provided method) + gtsam::Point3 p_world_lmk = gtsam::triangulatePoint3( + cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end())); + + out_p_world_landmark = p_world_lmk; + + // Optional: perform an explicit cheirality check + for (const auto &pose : cam_poses) { + // Transform point to the camera coordinate system. + // transformTo() converts a world point to the camera frame. + gtsam::Point3 p_cam_lmk = pose.transformTo(p_world_lmk); + if (p_cam_lmk.z() <= 0) { // Check that the depth is positive + valid = false; + break; + } } - frame.add_keypoints(kpts); - frame.assign_descriptors(kpts_descs.second); + } catch (const gtsam::TriangulationCheiralityException &e) { + // Handle the exception gracefully by logging and retaining the previous + // estimate. + std::cerr << "Triangulation Cheirality Exception: " << e.what() + << ". Keeping initial landmark estimate." << std::endl; + } + } else { + valid = false; + } + return valid; + } - frames.push_back(frame); + static void graph_values( + const gtsam::Values &values, const std::string &window_name, + const std::vector &symbols_pose, + const std::vector &symbols_landmarks) { + std::vector final_poses; + std::vector final_lmks; + for (const gtsam::Symbol &symbol_pose : symbols_pose) { + final_poses.emplace_back(values.at(symbol_pose).matrix()); + } + for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) { + if (!values.exists(symbol_lmk)) { + std::cout << "WTF " << symbol_lmk << std::endl; + } + final_lmks.emplace_back(values.at(symbol_lmk)); + } + geometry::viz_scene(final_poses, final_lmks, true, true, window_name); + } +}; - id++; +TEST(SFMMvp, sfm_building_manual) { + // indices 0-199 + const std::vector indices = []() { + std::vector tmp; + for (int i = 0; i < 200; i += 10) { + tmp.push_back(i); } + return tmp; + }(); + DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); + const symphony_lake_dataset::SurveyVector &survey_vector = + data_parser.get_surveys(); + const symphony_lake_dataset::Survey &survey = survey_vector.get(0); + const symphony_lake_dataset::ImagePoint img_pt_first = + survey.getImagePoint(indices.front()); - FeatureTracks feature_tracks; - LandmarkIdMap lmk_id_map; - LandmarkId lmk_id; - for (size_t i = 0; i < indices.size() - 1; i++) { - std::vector matches = - frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); - frontend.enforce_bijective_matches(matches); - for (const cv::DMatch match : matches) { - const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; - - auto key = std::make_pair(i, kpt_cam0); - if (lmk_id_map.find(key) != lmk_id_map.end()) { - auto id = lmk_id_map.at(key); - feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0); - feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1); - } else { - FeatureTrack feature_track(i, kpt_cam0); - feature_track.obs_.emplace_back(i + 1, kpt_cam1); - feature_tracks.emplace(lmk_id, feature_track); - lmk_id_map.emplace(std::make_pair(i, kpt_cam0), lmk_id); - lmk_id++; - } - } + // const size_t img_width = img_pt_first.width, img_height = + // img_pt_first.height; + const double fx = img_pt_first.fx, fy = img_pt_first.fy; + const double cx = img_pt_first.cx, cy = img_pt_first.cy; + gtsam::Cal3_S2::shared_ptr K = + boost::make_shared(fx, fy, 0, cx, cy); + Eigen::Matrix D = + (Eigen::Matrix() << SymphonyLakeCamParams::k1, + SymphonyLakeCamParams::k2, SymphonyLakeCamParams::p1, + SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3) + .finished(); + cv::Mat K_mat = (cv::Mat_(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), + K->py(), 0, 0, 1); + cv::Mat D_mat = + (cv::Mat_(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0)); + + // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 + // * T_boat_camera + Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first); + Eigen::Isometry3d T_world_boat0; + T_world_boat0.linear() = T_earth_boat0.linear(); + Eigen::Isometry3d T_world_camera0 = + T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first); + // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, + // gtsam::Pose3(T_world_camera0.matrix())); + Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN); + + gtsam::Values initial_estimate_; + gtsam::NonlinearFactorGraph graph_; + + gtsam::noiseModel::Isotropic::shared_ptr landmark_noise = + gtsam::noiseModel::Isotropic::Sigma(2, 1.0); + gtsam::noiseModel::Diagonal::shared_ptr pose_noise = + gtsam::noiseModel::Diagonal::Sigmas( + gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01)); + gtsam::noiseModel::Isotropic::shared_ptr translation_noise = + gtsam::noiseModel::Isotropic::Sigma(2, 0.1); + gtsam::noiseModel::Isotropic::shared_ptr gps_noise = + gtsam::noiseModel::Isotropic::Sigma(3, 2.); + + // populate frames and cam_poses + std::vector frames; + FrameId id = 0; + std::unordered_map cam_pose; + for (const int &idx : indices) { + const cv::Mat img = survey.loadImageByImageIndex(idx); + cv::Mat img_undistorted; + cv::undistort(img, img_undistorted, K_mat, D_mat); + const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); + + Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt); + T_world_boat.translation() -= T_earth_boat0.translation(); + Eigen::Isometry3d T_world_cam = + T_world_boat * DataParser::get_T_boat_camera(img_pt); + gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix()); + cam_pose.emplace(id, T_world_cam_gtsam); + + graph_.emplace_shared>( + gtsam::Symbol('x', id), T_world_cam_gtsam, pose_noise); + + Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix())); + + std::pair, cv::Mat> kpts_descs = + frontend.get_keypoints_and_descriptors(img_undistorted); + KeypointsCV kpts; + for (const cv::KeyPoint &kpt : kpts_descs.first) { + kpts.push_back(kpt.pt); } + frame.add_keypoints(kpts); + frame.assign_descriptors(kpts_descs.second); - std::vector> projection_factors; - for (const std::pair &lmk_feat : feature_tracks) { - // std::vector cam_poses; - // std::vector< - LandmarkId lmk_id = lmk_feat.first; - FeatureTrack feature_track = lmk_feat.second; - for (const std::pair &feat_track : feature_track.obs_) { - graph_.emplace_shared>( - feat_track.second, landmark_noise, gtsam::Symbol('x', feat_track.first), - gtsam::Symbol('l', lmk_id), K); - } + frames.push_back(frame); + + id++; + } + + // populate feature_tracks and lmk_id_map + FeatureTracks feature_tracks; + LandmarkIdMap lmk_id_map; + LandmarkId lmk_id; + for (size_t i = 0; i < indices.size() - 1; i++) { + std::vector matches = frontend.get_matches( + frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + frontend.enforce_bijective_matches(matches); + for (const cv::DMatch match : matches) { + const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; + + auto key = std::make_pair(i, kpt_cam0); + if (lmk_id_map.find(key) != lmk_id_map.end()) { + auto id = lmk_id_map.at(key); + feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0); + feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1); + } else { + FeatureTrack feature_track(i, kpt_cam0); + feature_track.obs_.emplace_back(i + 1, kpt_cam1); + feature_tracks.emplace(lmk_id, feature_track); + lmk_id_map.emplace(std::make_pair(i, kpt_cam0), lmk_id); + lmk_id++; + } } + } - if (feature_track.obs_.size() >= 2) { - try { - // Attempt triangulation using DLT (or the GTSAM provided method) - gtsam::Point3 p_ = gtsam::triangulatePoint3( - lmk_obs.first, K, - gtsam::Point2Vector(lmk_obs.second.begin(), lmk_obs.second.end())); - - // Optional: perform an explicit cheirality check - bool valid = true; - for (const auto &pose : lmk_obs.first) { - // Transform point to the camera coordinate system. - // transformTo() converts a world point to the camera frame. - gtsam::Point3 p_cam = pose.transformTo(p_world_lmk_estimate); - if (p_cam.z() <= 0) { // Check that the depth is positive - valid = false; - break; - } - } - - if (valid) { - initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate); - } else { - std::cerr << "Triangulated landmark failed cheirality check; keeping " - "initial guess." - << std::endl; - } - } catch (const gtsam::TriangulationCheiralityException &e) { - // Handle the exception gracefully by logging and retaining the previous - // estimate. - std::cerr << "Triangulation Cheirality Exception: " << e.what() - << ". Keeping initial landmark estimate." << std::endl; - } + // populate graph + std::vector symbols_pose; + std::vector symbols_landmarks; + for (const std::pair lmk_feat : feature_tracks) { + std::vector feat_cam_poses; + std::vector feat_kpts; + LandmarkId lmk_id = lmk_feat.first; + FeatureTrack feature_track = lmk_feat.second; + const gtsam::Symbol symbol_lmk('l', lmk_id); + symbols_landmarks.push_back(symbol_lmk); + for (const std::pair &feat_track : + feature_track.obs_) { + initial_estimate_.insert_or_assign(symbol_lmk, gtsam::Point3(0, 0, 0)); + graph_.emplace_shared< + gtsam::GenericProjectionFactor>( + gtsam::Point2(feat_track.second.x, feat_track.second.y), + landmark_noise, gtsam::Symbol('x', feat_track.first), symbol_lmk, K); + + feat_cam_poses.push_back(cam_pose[feat_track.first]); + feat_kpts.push_back( + gtsam::Point2(feat_track.second.x, feat_track.second.y)); + } + gtsam::Point3 p_wrld_kpt; + bool triangulate_success = SFMMvpHelper::attempt_triangulate( + feat_cam_poses, feat_kpts, K, p_wrld_kpt); + if (triangulate_success) { + initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt); } + } + graph_.emplace_shared>( + gtsam::Symbol('x', 0), cam_pose.at(0), pose_noise); + initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), cam_pose.at(0)); + symbols_pose.push_back(gtsam::Symbol('x', 0)); + for (const std::pair cam_id_pose : cam_pose) { + if (cam_id_pose.first == 0) { + continue; + } + const gtsam::Symbol key_cam('x', cam_id_pose.first); + symbols_pose.push_back(key_cam); + gtsam::Point3 p_wrld_cam = cam_id_pose.second.translation(); + graph_.emplace_shared(key_cam, p_wrld_cam, gps_noise); + initial_estimate_.insert_or_assign(key_cam, + gtsam::Pose3(gtsam::Rot3(), p_wrld_cam)); + } + + gtsam::LevenbergMarquardtParams params; + params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. + params.maxIterations = 1; // We'll manually step it + gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, + params); + + double prev_error = optimizer.error(); + const int num_steps = 5; + const bool viz_itr = true; + typedef int epoch; + std::function &, + const std::vector &)> + graph_itr_debug_func = + [&](const gtsam::Values &vals, const epoch iter, + const std::vector &symbols_pose, + const std::vector &symbols_landmarks) { + std::cout << "iteration " << iter << " complete!"; + std::string window_name = "Iteration_" + std::to_string(iter); + SFMMvpHelper::graph_values(vals, window_name, symbols_pose, + symbols_landmarks); + }; + + SFMMvpHelper::graph_values(initial_estimate_, "Initial Graph", symbols_pose, + symbols_landmarks); + for (int i = 0; i < num_steps; i++) { + optimizer.iterate(); + double curr_error = optimizer.error(); + + if (viz_itr) { + graph_itr_debug_func(optimizer.values(), i, symbols_pose, + symbols_landmarks); + } + if (std::abs(prev_error - curr_error) < 1e-6) { + std::cout << "Converged at iteration " << i << "\n"; + break; + } + } + const gtsam::Values result = optimizer.values(); } -// const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values(); +// const gtsam::Values initial_values = +// sfm.get_backend().get_current_initial_values(); // sfm.graph_values(initial_values, "initial values"); // std::cout << "Solving for structure!" << std::endl; -// Backend::graph_step_debug_func solve_iter_debug_func = [&sfm](const gtsam::Values &vals, -// const Backend::epoch iter) { +// Backend::graph_step_debug_func solve_iter_debug_func = [&sfm](const +// gtsam::Values &vals, +// const +// Backend::epoch +// iter) { // std::cout << "iteration " << iter << " complete!"; // std::string window_name = "Iteration_" + std::to_string(iter); // sfm.graph_values(vals, window_name); @@ -193,5 +309,4 @@ TEST(SFMMvp, sfm_building_manual) { // const gtsam::Values result_values = sfm.get_structure_result(); // sfm.graph_values(result_values, "optimized values"); -} } // namespace robot::experimental::learn_descriptors \ No newline at end of file From 981a0f2967221c08636d7656680fda208f1050a2 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Tue, 22 Apr 2025 12:45:18 -0400 Subject: [PATCH 19/45] SFM MVP working. Tried out an iterative + global optimization --- experimental/learn_descriptors/BUILD | 11 + .../learn_descriptors/frontend_definitions.hh | 3 +- .../get_colmap_groundtruth.cc | 82 ++ experimental/learn_descriptors/sfm_mvp.cc | 845 ++++++++++++------ 4 files changed, 680 insertions(+), 261 deletions(-) create mode 100644 experimental/learn_descriptors/get_colmap_groundtruth.cc diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index 681ebde27..666668ad0 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -270,3 +270,14 @@ cc_test( ":feature_manager" ] ) + +cc_binary( + name = "get_colmap_groundtruth", + srcs = ["get_colmap_groundtruth.cc"], + copts = [ + "-Wno-error=unused-parameter", + ], + deps = [ + "symphony_lake_parser" + ] +) \ No newline at end of file diff --git a/experimental/learn_descriptors/frontend_definitions.hh b/experimental/learn_descriptors/frontend_definitions.hh index 9883230b3..188b91e53 100644 --- a/experimental/learn_descriptors/frontend_definitions.hh +++ b/experimental/learn_descriptors/frontend_definitions.hh @@ -37,6 +37,7 @@ class FeatureTrack { }; using FeatureTracks = std::unordered_map; -using LandmarkIdMap = std::unordered_map, LandmarkId>; +using FrameLandmarkIdMap = std::unordered_map, LandmarkId>; +using LandmarkFrameIdMap = std::unordered_map>; } // namespace robot::experimental::learn_descriptors diff --git a/experimental/learn_descriptors/get_colmap_groundtruth.cc b/experimental/learn_descriptors/get_colmap_groundtruth.cc new file mode 100644 index 000000000..6afc9fff1 --- /dev/null +++ b/experimental/learn_descriptors/get_colmap_groundtruth.cc @@ -0,0 +1,82 @@ +#include +#include +#include +#include +#include +#include + +#include "experimental/learn_descriptors/symphony_lake_parser.hh" + +namespace ld = robot::experimental::learn_descriptors; + +struct ImagePose { + std::string filename; + double X; + double Y; + double Z; + double heading; +}; + +void writeCsv(const std::string& path, const std::vector& rows) { + std::ofstream out(path); + if (!out) { + throw std::runtime_error("Failed to open " + path); + } + + out << "filename,X,Y,Z,heading\n"; + + out << std::fixed << std::setprecision(6); + for (const auto& r : rows) { + out << r.filename << ',' << r.X << ',' << r.Y << ',' << r.Z << ',' << r.heading << '\n'; + } +} + +void writeTxt(const std::string& path, const std::vector& rows) { + std::ofstream out(path); + if (!out) { + throw std::runtime_error("Failed to open " + path); + } + + out << std::fixed << std::setprecision(6); + for (const auto& r : rows) { + out << r.filename << ' ' << r.X << ' ' << r.Y << ' ' << r.Z << ' ' << r.heading << '\n'; + } +} + +int main() { + ld::DataParser data_parser("/home/pizzaroll04/Documents/datasets", + std::vector{"symphony_lake_building_test"}); + + const symphony_lake_dataset::SurveyVector& survey_vector = data_parser.get_surveys(); + const symphony_lake_dataset::Survey& survey = survey_vector.get(0); + + std::vector poses; + for (int i = 0; i < 718; i++) { + symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(i); + const Eigen::Isometry3d T_wrld_cam = + data_parser.get_T_world_boat(img_pt) * data_parser.get_T_boat_camera(img_pt); + ImagePose img_pose; + + std::ostringstream oss; + oss << std::setw(4) << std::setfill('0') << i; + img_pose.filename = oss.str() + ".jpg"; + img_pose.X = T_wrld_cam.translation().x(); + img_pose.Y = T_wrld_cam.translation().y(); + img_pose.Z = T_wrld_cam.translation().z(); + Eigen::Matrix3d R = T_wrld_cam.linear(); + img_pose.heading = std::atan2(R(1, 0), R(0, 0)); + + poses.push_back(img_pose); + } + + try { + writeTxt("/home/pizzaroll04/Documents/datasets/symphony_lake_building_test/ref_images.txt", + poses); + std::cout << "Wrote " << poses.size() << " rows to ref_images.csv\n"; + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << "\n"; + return 1; + } + + return 0; +} diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc index b2686f7b5..f27c1ede7 100644 --- a/experimental/learn_descriptors/sfm_mvp.cc +++ b/experimental/learn_descriptors/sfm_mvp.cc @@ -26,287 +26,612 @@ #include "gtsam/slam/ProjectionFactor.h" #include "opencv2/opencv.hpp" +namespace std { +template <> +struct hash { + size_t operator()(const gtsam::Symbol &s) const { return hash()(s.key()); } +}; +} // namespace std + namespace robot::experimental::learn_descriptors { class SFMMvpHelper { - public: - static bool attempt_triangulate(const std::vector &cam_poses, - const std::vector &cam_obs, - gtsam::Cal3_S2::shared_ptr K, - gtsam::Point3 &out_p_world_landmark) { - bool valid = true; - if (cam_poses.size() >= 2) { - try { - // Attempt triangulation using DLT (or the GTSAM provided method) - gtsam::Point3 p_world_lmk = gtsam::triangulatePoint3( - cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end())); - - out_p_world_landmark = p_world_lmk; - - // Optional: perform an explicit cheirality check - for (const auto &pose : cam_poses) { - // Transform point to the camera coordinate system. - // transformTo() converts a world point to the camera frame. - gtsam::Point3 p_cam_lmk = pose.transformTo(p_world_lmk); - if (p_cam_lmk.z() <= 0) { // Check that the depth is positive + public: + // struct SymbolHasher { + // size_t operator()(const gtsam::Symbol &s) const { return std::hash()(s.key()); + // } + // }; + + static bool attempt_triangulate(const std::vector &cam_poses, + const std::vector &cam_obs, + gtsam::Cal3_S2::shared_ptr K, + gtsam::Point3 &out_p_world_landmark) { + bool valid = true; + if (cam_poses.size() >= 2) { + try { + // Attempt triangulation using DLT (or the GTSAM provided method) + gtsam::Point3 p_world_lmk = gtsam::triangulatePoint3( + cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end())); + + out_p_world_landmark = p_world_lmk; + + // Optional: perform an explicit cheirality check + for (const auto &pose : cam_poses) { + // Transform point to the camera coordinate system. + // transformTo() converts a world point to the camera frame. + gtsam::Point3 p_cam_lmk = pose.transformTo(p_world_lmk); + if (p_cam_lmk.z() <= 0) { // Check that the depth is positive + valid = false; + break; + } + } + } catch (const gtsam::TriangulationCheiralityException &e) { + // Handle the exception gracefully by logging and retaining the previous + // estimate. + std::cerr << "Triangulation Cheirality Exception: " << e.what() + << ". Keeping initial landmark estimate." << std::endl; + } + } else { valid = false; - break; - } } - } catch (const gtsam::TriangulationCheiralityException &e) { - // Handle the exception gracefully by logging and retaining the previous - // estimate. - std::cerr << "Triangulation Cheirality Exception: " << e.what() - << ". Keeping initial landmark estimate." << std::endl; - } - } else { - valid = false; + return valid; } - return valid; - } - - static void graph_values( - const gtsam::Values &values, const std::string &window_name, - const std::vector &symbols_pose, - const std::vector &symbols_landmarks) { - std::vector final_poses; - std::vector final_lmks; - for (const gtsam::Symbol &symbol_pose : symbols_pose) { - final_poses.emplace_back(values.at(symbol_pose).matrix()); + + static void graph_values(const gtsam::Values &values, const std::string &window_name, + const std::vector &symbols_pose, + const std::vector &symbols_landmarks) { + std::vector final_poses; + std::vector final_lmks; + for (const gtsam::Symbol &symbol_pose : symbols_pose) { + final_poses.emplace_back(values.at(symbol_pose).matrix()); + } + for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) { + if (!values.exists(symbol_lmk)) { + std::cout << "WTF " << symbol_lmk << std::endl; + } + final_lmks.emplace_back(values.at(symbol_lmk)); + } + geometry::viz_scene(final_poses, final_lmks, true, true, window_name); } - for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) { - if (!values.exists(symbol_lmk)) { - std::cout << "WTF " << symbol_lmk << std::endl; - } - final_lmks.emplace_back(values.at(symbol_lmk)); + + static gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, + const gtsam::Values &values, + const std::vector &symbols_pose, + const std::vector &symbols_landmarks, + bool viz_itr = false) { + gtsam::LevenbergMarquardtParams params; + params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. + params.maxIterations = 1; + gtsam::LevenbergMarquardtOptimizer optimizer(graph, values, params); + + double prev_error = optimizer.error(); + const int num_steps = 5; + typedef int epoch; + std::function &, + const std::vector &)> + graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter, + const std::vector &symbols_pose, + const std::vector &symbols_landmarks) { + std::cout << "iteration " << iter << " complete!"; + std::string window_name = "Iteration_" + std::to_string(iter); + SFMMvpHelper::graph_values(vals, window_name, symbols_pose, symbols_landmarks); + }; + + for (int i = 0; i < num_steps; i++) { + optimizer.iterate(); + double curr_error = optimizer.error(); + + if (viz_itr) { + graph_itr_debug_func(optimizer.values(), i, symbols_pose, symbols_landmarks); + } + if (std::abs(prev_error - curr_error) < 1e-6) { + std::cout << "Converged at iteration " << i << "\n"; + break; + } + } + return optimizer.values(); } - geometry::viz_scene(final_poses, final_lmks, true, true, window_name); - } -}; -TEST(SFMMvp, sfm_building_manual) { - // indices 0-199 - const std::vector indices = []() { - std::vector tmp; - for (int i = 0; i < 200; i += 10) { - tmp.push_back(i); + static gtsam::Pose3 averagePoses(const std::vector &poses, int maxIter = 10) { + if (poses.empty()) throw std::runtime_error("Empty pose vector"); + + gtsam::Pose3 mean = poses[0]; + + for (int iter = 0; iter < maxIter; ++iter) { + gtsam::Vector6 total = gtsam::Vector6::Zero(); + + for (const auto &pose : poses) { + gtsam::Pose3 delta = mean.between(pose); + total += gtsam::Pose3::Logmap(delta); + } + + total /= poses.size(); + mean = mean.compose(gtsam::Pose3::Expmap(total)); + } + + return mean; } - return tmp; - }(); - DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); - const symphony_lake_dataset::SurveyVector &survey_vector = - data_parser.get_surveys(); - const symphony_lake_dataset::Survey &survey = survey_vector.get(0); - const symphony_lake_dataset::ImagePoint img_pt_first = - survey.getImagePoint(indices.front()); - - // const size_t img_width = img_pt_first.width, img_height = - // img_pt_first.height; - const double fx = img_pt_first.fx, fy = img_pt_first.fy; - const double cx = img_pt_first.cx, cy = img_pt_first.cy; - gtsam::Cal3_S2::shared_ptr K = - boost::make_shared(fx, fy, 0, cx, cy); - Eigen::Matrix D = - (Eigen::Matrix() << SymphonyLakeCamParams::k1, - SymphonyLakeCamParams::k2, SymphonyLakeCamParams::p1, - SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3) - .finished(); - cv::Mat K_mat = (cv::Mat_(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), - K->py(), 0, 0, 1); - cv::Mat D_mat = - (cv::Mat_(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0)); - - // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 - // * T_boat_camera - Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first); - Eigen::Isometry3d T_world_boat0; - T_world_boat0.linear() = T_earth_boat0.linear(); - Eigen::Isometry3d T_world_camera0 = - T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first); - // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, - // gtsam::Pose3(T_world_camera0.matrix())); - Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN); - - gtsam::Values initial_estimate_; - gtsam::NonlinearFactorGraph graph_; - - gtsam::noiseModel::Isotropic::shared_ptr landmark_noise = - gtsam::noiseModel::Isotropic::Sigma(2, 1.0); - gtsam::noiseModel::Diagonal::shared_ptr pose_noise = - gtsam::noiseModel::Diagonal::Sigmas( - gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01)); - gtsam::noiseModel::Isotropic::shared_ptr translation_noise = - gtsam::noiseModel::Isotropic::Sigma(2, 0.1); - gtsam::noiseModel::Isotropic::shared_ptr gps_noise = - gtsam::noiseModel::Isotropic::Sigma(3, 2.); - - // populate frames and cam_poses - std::vector frames; - FrameId id = 0; - std::unordered_map cam_pose; - for (const int &idx : indices) { - const cv::Mat img = survey.loadImageByImageIndex(idx); - cv::Mat img_undistorted; - cv::undistort(img, img_undistorted, K_mat, D_mat); - const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); - - Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt); - T_world_boat.translation() -= T_earth_boat0.translation(); - Eigen::Isometry3d T_world_cam = - T_world_boat * DataParser::get_T_boat_camera(img_pt); - gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix()); - cam_pose.emplace(id, T_world_cam_gtsam); - - graph_.emplace_shared>( - gtsam::Symbol('x', id), T_world_cam_gtsam, pose_noise); - - Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix())); - - std::pair, cv::Mat> kpts_descs = - frontend.get_keypoints_and_descriptors(img_undistorted); - KeypointsCV kpts; - for (const cv::KeyPoint &kpt : kpts_descs.first) { - kpts.push_back(kpt.pt); + + static gtsam::Point3 averagePoints(const std::vector &points) { + if (points.empty()) throw std::runtime_error("Empty point vector"); + gtsam::Point3 sum(0, 0, 0); + for (const auto &pt : points) sum += pt; + return sum / points.size(); } - frame.add_keypoints(kpts); - frame.assign_descriptors(kpts_descs.second); - - frames.push_back(frame); - - id++; - } - - // populate feature_tracks and lmk_id_map - FeatureTracks feature_tracks; - LandmarkIdMap lmk_id_map; - LandmarkId lmk_id; - for (size_t i = 0; i < indices.size() - 1; i++) { - std::vector matches = frontend.get_matches( - frames[i].get_descriptors(), frames[i + 1].get_descriptors()); - frontend.enforce_bijective_matches(matches); - for (const cv::DMatch match : matches) { - const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; - - auto key = std::make_pair(i, kpt_cam0); - if (lmk_id_map.find(key) != lmk_id_map.end()) { - auto id = lmk_id_map.at(key); - feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0); - feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1); - } else { - FeatureTrack feature_track(i, kpt_cam0); - feature_track.obs_.emplace_back(i + 1, kpt_cam1); - feature_tracks.emplace(lmk_id, feature_track); - lmk_id_map.emplace(std::make_pair(i, kpt_cam0), lmk_id); - lmk_id++; - } +}; + +TEST(SFMMvp, sfm_building_manual_global) { + const std::vector indices{60, 80, 100, 120, 140}; + DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); + const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); + const symphony_lake_dataset::Survey &survey = survey_vector.get(0); + const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front()); + + // const size_t img_width = img_pt_first.width, img_height = + // img_pt_first.height; + const double fx = img_pt_first.fx, fy = img_pt_first.fy; + const double cx = img_pt_first.cx, cy = img_pt_first.cy; + gtsam::Cal3_S2::shared_ptr K = boost::make_shared(fx, fy, 0, cx, cy); + Eigen::Matrix D = + (Eigen::Matrix() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2, + SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3) + .finished(); + cv::Mat K_mat = (cv::Mat_(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1); + cv::Mat D_mat = (cv::Mat_(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0)); + + // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 + // * T_boat_camera + Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first); + Eigen::Isometry3d T_world_boat0; + T_world_boat0.linear() = T_earth_boat0.linear(); + Eigen::Isometry3d T_world_camera0 = T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first); + // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, + // gtsam::Pose3(T_world_camera0.matrix())); + Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN); + + gtsam::Values initial_estimate_; + gtsam::NonlinearFactorGraph graph_; + + gtsam::noiseModel::Isotropic::shared_ptr landmark_noise = + gtsam::noiseModel::Isotropic::Sigma(2, 1.0); + gtsam::Vector6 prior_sigmas; + prior_sigmas << 0.04, 0.04, 0.09, 0.01, 0.01, 0.01; + gtsam::Vector3 gps_sigmas; + gps_sigmas << 2.1, 2.1, 0.1; + gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise = + gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas); + gtsam::noiseModel::Diagonal::shared_ptr gps_noise = + gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas); + + // populate frames and cam_poses + std::vector frames; + FrameId id = 0; + std::unordered_map cam_pose; + std::vector cam_isometries; + for (const int &idx : indices) { + const cv::Mat img = survey.loadImageByImageIndex(idx); + cv::Mat img_undistorted; + cv::undistort(img, img_undistorted, K_mat, D_mat); + const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); + + Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt); + T_world_boat.translation() -= T_earth_boat0.translation(); + Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt); + gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix()); + cam_pose.emplace(id, T_world_cam_gtsam); + cam_isometries.push_back(T_world_cam); + + // graph_.emplace_shared>(gtsam::Symbol('x', id), + // T_world_cam_gtsam, pose_noise); + + Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix())); + + std::pair, cv::Mat> kpts_descs = + frontend.get_keypoints_and_descriptors(img_undistorted); + KeypointsCV kpts; + for (const cv::KeyPoint &kpt : kpts_descs.first) { + kpts.push_back(kpt.pt); + } + frame.add_keypoints(kpts); + frame.assign_descriptors(kpts_descs.second); + + frames.push_back(frame); + + id++; } - } - - // populate graph - std::vector symbols_pose; - std::vector symbols_landmarks; - for (const std::pair lmk_feat : feature_tracks) { - std::vector feat_cam_poses; - std::vector feat_kpts; - LandmarkId lmk_id = lmk_feat.first; - FeatureTrack feature_track = lmk_feat.second; - const gtsam::Symbol symbol_lmk('l', lmk_id); - symbols_landmarks.push_back(symbol_lmk); - for (const std::pair &feat_track : - feature_track.obs_) { - initial_estimate_.insert_or_assign(symbol_lmk, gtsam::Point3(0, 0, 0)); - graph_.emplace_shared< - gtsam::GenericProjectionFactor>( - gtsam::Point2(feat_track.second.x, feat_track.second.y), - landmark_noise, gtsam::Symbol('x', feat_track.first), symbol_lmk, K); - - feat_cam_poses.push_back(cam_pose[feat_track.first]); - feat_kpts.push_back( - gtsam::Point2(feat_track.second.x, feat_track.second.y)); + + geometry::viz_scene(cam_isometries, std::vector()); + + // populate feature_tracks and lmk_id_map + FeatureTracks feature_tracks; + FrameLandmarkIdMap lmk_id_map; + LandmarkId lmk_id = 0; + for (size_t i = 0; i < indices.size() - 1; i++) { + std::vector matches = + frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + frontend.enforce_bijective_matches(matches); + for (const cv::DMatch match : matches) { + const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; + + auto key = std::make_pair(i, kpt_cam0); + if (lmk_id_map.find(key) != lmk_id_map.end()) { + auto id = lmk_id_map.at(key); + feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0); + feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1); + } else { + FeatureTrack feature_track(i, kpt_cam0); + feature_track.obs_.emplace_back(i + 1, kpt_cam1); + feature_tracks.emplace(lmk_id, feature_track); + lmk_id_map.emplace(std::make_pair(i, kpt_cam0), lmk_id); + lmk_id++; + } + } } - gtsam::Point3 p_wrld_kpt; - bool triangulate_success = SFMMvpHelper::attempt_triangulate( - feat_cam_poses, feat_kpts, K, p_wrld_kpt); - if (triangulate_success) { - initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt); + + // populate graph + std::vector symbols_pose; + std::vector symbols_landmarks; + for (const std::pair lmk_feat : feature_tracks) { + std::vector feat_cam_poses; + std::vector feat_kpts; + LandmarkId lmk_id = lmk_feat.first; + FeatureTrack feature_track = lmk_feat.second; + const gtsam::Symbol symbol_lmk('l', lmk_id); + symbols_landmarks.push_back(symbol_lmk); + for (const std::pair &feat_track : feature_track.obs_) { + initial_estimate_.insert_or_assign(symbol_lmk, gtsam::Point3(0, 0, 0)); + graph_.emplace_shared>( + gtsam::Point2(feat_track.second.x, feat_track.second.y), landmark_noise, + gtsam::Symbol('x', feat_track.first), symbol_lmk, K); + + feat_cam_poses.push_back(cam_pose[feat_track.first]); + feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y)); + } + gtsam::Point3 p_wrld_kpt; + bool triangulate_success = + SFMMvpHelper::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt); + if (triangulate_success) { + initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt); + } } - } - graph_.emplace_shared>( - gtsam::Symbol('x', 0), cam_pose.at(0), pose_noise); - initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), cam_pose.at(0)); - symbols_pose.push_back(gtsam::Symbol('x', 0)); - for (const std::pair cam_id_pose : cam_pose) { - if (cam_id_pose.first == 0) { - continue; + graph_.emplace_shared>(gtsam::Symbol('x', 0), cam_pose.at(0), + prior_pose_noise); + initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), cam_pose.at(0)); + symbols_pose.push_back(gtsam::Symbol('x', 0)); + for (const std::pair cam_id_pose : cam_pose) { + if (cam_id_pose.first == 0) { + continue; + } + const gtsam::Symbol key_cam('x', cam_id_pose.first); + symbols_pose.push_back(key_cam); + gtsam::Point3 p_wrld_cam = cam_id_pose.second.translation(); + graph_.emplace_shared(key_cam, p_wrld_cam, gps_noise); + initial_estimate_.insert_or_assign(key_cam, cam_id_pose.second); + // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_wrld_cam)); } - const gtsam::Symbol key_cam('x', cam_id_pose.first); - symbols_pose.push_back(key_cam); - gtsam::Point3 p_wrld_cam = cam_id_pose.second.translation(); - graph_.emplace_shared(key_cam, p_wrld_cam, gps_noise); - initial_estimate_.insert_or_assign(key_cam, - gtsam::Pose3(gtsam::Rot3(), p_wrld_cam)); - } - - gtsam::LevenbergMarquardtParams params; - params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. - params.maxIterations = 1; // We'll manually step it - gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, - params); - - double prev_error = optimizer.error(); - const int num_steps = 5; - const bool viz_itr = true; - typedef int epoch; - std::function &, - const std::vector &)> - graph_itr_debug_func = - [&](const gtsam::Values &vals, const epoch iter, - const std::vector &symbols_pose, - const std::vector &symbols_landmarks) { + + // initial_estimate_.print("huh"); + SFMMvpHelper::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks); + + gtsam::LevenbergMarquardtParams params; + params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. + params.maxIterations = 1; // We'll manually step it + gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, params); + + double prev_error = optimizer.error(); + const int num_steps = 5; + const bool viz_itr = true; + typedef int epoch; + std::function &, + const std::vector &)> + graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter, + const std::vector &symbols_pose, + const std::vector &symbols_landmarks) { std::cout << "iteration " << iter << " complete!"; std::string window_name = "Iteration_" + std::to_string(iter); - SFMMvpHelper::graph_values(vals, window_name, symbols_pose, - symbols_landmarks); - }; - - SFMMvpHelper::graph_values(initial_estimate_, "Initial Graph", symbols_pose, - symbols_landmarks); - for (int i = 0; i < num_steps; i++) { - optimizer.iterate(); - double curr_error = optimizer.error(); - - if (viz_itr) { - graph_itr_debug_func(optimizer.values(), i, symbols_pose, - symbols_landmarks); - } - if (std::abs(prev_error - curr_error) < 1e-6) { - std::cout << "Converged at iteration " << i << "\n"; - break; + SFMMvpHelper::graph_values(vals, window_name, symbols_pose, symbols_landmarks); + }; + + SFMMvpHelper::graph_values(initial_estimate_, "Initial Graph", symbols_pose, symbols_landmarks); + for (int i = 0; i < num_steps; i++) { + optimizer.iterate(); + double curr_error = optimizer.error(); + + if (viz_itr) { + graph_itr_debug_func(optimizer.values(), i, symbols_pose, symbols_landmarks); + } + if (std::abs(prev_error - curr_error) < 1e-6) { + std::cout << "Converged at iteration " << i << "\n"; + break; + } } - } - const gtsam::Values result = optimizer.values(); + const gtsam::Values result = optimizer.values(); } -// const gtsam::Values initial_values = -// sfm.get_backend().get_current_initial_values(); -// sfm.graph_values(initial_values, "initial values"); +TEST(SFMMvp, sfm_building_manual_incremental) { + const std::vector indices{60, 80, 100, 120, 140}; + DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); + const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); + const symphony_lake_dataset::Survey &survey = survey_vector.get(0); + const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front()); + + // const size_t img_width = img_pt_first.width, img_height = + // img_pt_first.height; + const double fx = img_pt_first.fx, fy = img_pt_first.fy; + const double cx = img_pt_first.cx, cy = img_pt_first.cy; + gtsam::Cal3_S2::shared_ptr K = boost::make_shared(fx, fy, 0, cx, cy); + Eigen::Matrix D = + (Eigen::Matrix() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2, + SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3) + .finished(); + cv::Mat K_mat = (cv::Mat_(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1); + cv::Mat D_mat = (cv::Mat_(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0)); + + // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 + // * T_boat_camera + Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first); + Eigen::Isometry3d T_world_boat0; + T_world_boat0.linear() = T_earth_boat0.linear(); + Eigen::Isometry3d T_world_camera0 = T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first); + // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, + // gtsam::Pose3(T_world_camera0.matrix())); + Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN); -// std::cout << "Solving for structure!" << std::endl; + gtsam::Values initial_estimate_; + gtsam::NonlinearFactorGraph graph_; -// Backend::graph_step_debug_func solve_iter_debug_func = [&sfm](const -// gtsam::Values &vals, -// const -// Backend::epoch -// iter) { -// std::cout << "iteration " << iter << " complete!"; -// std::string window_name = "Iteration_" + std::to_string(iter); -// sfm.graph_values(vals, window_name); -// }; -// sfm.solve_structure(5, solve_iter_debug_func); + gtsam::noiseModel::Isotropic::shared_ptr landmark_noise = + gtsam::noiseModel::Isotropic::Sigma(2, 1.0); + gtsam::Vector6 prior_sigmas; + prior_sigmas << 0.04, 0.04, 0.09, 2.1, 2.1, 0.1; + gtsam::Vector3 gps_sigmas; + gps_sigmas << 2.1, 2.1, 0.1; + gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise = + gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas); + gtsam::noiseModel::Diagonal::shared_ptr gps_noise = + gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas); + + // populate frames and cam_poses + std::vector frames; + FrameId id = 0; + std::unordered_map cam_pose; + std::vector cam_isometries; + for (const int &idx : indices) { + const cv::Mat img = survey.loadImageByImageIndex(idx); + cv::Mat img_undistorted; + cv::undistort(img, img_undistorted, K_mat, D_mat); + const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); + + Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt); + T_world_boat.translation() -= T_earth_boat0.translation(); + Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt); + gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix()); + cam_pose.emplace(id, T_world_cam_gtsam); + cam_isometries.push_back(T_world_cam); + + // graph_.emplace_shared>(gtsam::Symbol('x', id), + // T_world_cam_gtsam, pose_noise); + + Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix())); + + std::pair, cv::Mat> kpts_descs = + frontend.get_keypoints_and_descriptors(img_undistorted); + KeypointsCV kpts; + for (const cv::KeyPoint &kpt : kpts_descs.first) { + kpts.push_back(kpt.pt); + } + frame.add_keypoints(kpts); + frame.assign_descriptors(kpts_descs.second); -// std::cout << "Solution complete." << std::endl; + frames.push_back(frame); + + id++; + } + + geometry::viz_scene(cam_isometries, std::vector()); + + // populate feature_tracks and lmk_id_map + FeatureTracks feature_tracks; + FrameLandmarkIdMap lmk_id_map; + LandmarkId lmk_id = 0; + for (size_t i = 0; i < indices.size() - 1; i++) { + std::vector matches = + frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + frontend.enforce_bijective_matches(matches); + for (const cv::DMatch match : matches) { + const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; + + auto key = std::make_pair(frames[i].id_, kpt_cam0); + if (lmk_id_map.find(key) != lmk_id_map.end()) { + auto id = lmk_id_map.at(key); + feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0); + feature_tracks.at(id).obs_.emplace_back(frames[i + 1].id_, kpt_cam1); + } else { + FeatureTrack feature_track(i, kpt_cam0); + feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); + feature_tracks.emplace(lmk_id, feature_track); + lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id); + lmk_id++; + } + } + } + + // populate graph + std::unordered_map> symbols_poses_values_iter; + std::unordered_map> symbols_landmarks_values_iter; + std::vector symbols_pose; + std::vector symbols_landmarks; + for (const std::pair lmk_feat : feature_tracks) { + std::vector feat_cam_poses; + std::vector feat_kpts; + LandmarkId lmk_id = lmk_feat.first; + FeatureTrack feature_track = lmk_feat.second; + const gtsam::Symbol symbol_lmk('l', lmk_id); + symbols_landmarks.push_back(symbol_lmk); + gtsam::Point3 p_wrld_kpt(0, 0, 0); + for (const std::pair &feat_track : feature_track.obs_) { + initial_estimate_.insert_or_assign(symbol_lmk, gtsam::Point3(0, 0, 0)); + symbols_landmarks_values_iter.emplace( + symbol_lmk, std::vector{gtsam::Point3(0, 0, 0)}); + graph_.emplace_shared>( + gtsam::Point2(feat_track.second.x, feat_track.second.y), landmark_noise, + gtsam::Symbol('x', feat_track.first), symbol_lmk, K); + + feat_cam_poses.push_back(cam_pose[feat_track.first]); + feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y)); + } + bool triangulate_success = + SFMMvpHelper::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt); + if (triangulate_success) { + initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt); + } + if (symbols_landmarks_values_iter.find(symbol_lmk) != symbols_landmarks_values_iter.end()) { + symbols_landmarks_values_iter[symbol_lmk].push_back(p_wrld_kpt); + } else { + symbols_landmarks_values_iter.emplace(symbol_lmk, + std::vector{p_wrld_kpt}); + } + } + graph_.emplace_shared>(gtsam::Symbol('x', 0), cam_pose.at(0), + prior_pose_noise); + initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), cam_pose.at(0)); + symbols_pose.push_back(gtsam::Symbol('x', 0)); + symbols_poses_values_iter.emplace(gtsam::Symbol('x', 0), + std::vector{cam_pose.at(0)}); + for (const std::pair cam_id_pose : cam_pose) { + if (cam_id_pose.first == 0) { + continue; + } + const gtsam::Symbol key_cam('x', cam_id_pose.first); + symbols_pose.push_back(key_cam); + gtsam::Point3 p_wrld_cam = cam_id_pose.second.translation(); + graph_.emplace_shared(key_cam, p_wrld_cam, gps_noise); + initial_estimate_.insert_or_assign(key_cam, cam_id_pose.second); + symbols_poses_values_iter.emplace(key_cam, std::vector{cam_id_pose.second}); + // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_wrld_cam)); + } + + SFMMvpHelper::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks); + + // do local optimizations and add to iter cache + // const int window = 2; + std::cout << "length of cam_poses: " << cam_pose.size() << std::endl; + for (const auto &id_pose : cam_pose) { + std::cout << "id: " << id_pose.first << "\tpose: " << id_pose.second << std::endl; + } + for (size_t i = 0; i < indices.size() - 1; i++) { + std::cout << "Local optimization " << i << std::endl; + gtsam::Values local_estimate_; + gtsam::NonlinearFactorGraph local_graph_; + + std::vector poses{gtsam::Symbol('x', i), gtsam::Symbol('x', i + 1)}; + std::vector lmks; + + local_graph_.emplace_shared>( + gtsam::PriorFactor(poses[0], cam_pose.at(i), prior_pose_noise)); + std::cout << "fuck" << std::endl; + local_graph_.emplace_shared>( + gtsam::PriorFactor(poses[1], cam_pose.at(i + 1), prior_pose_noise)); + std::cout << "mega fuck" << std::endl; + local_estimate_.insert_or_assign(poses[0], cam_pose.at(i)); + local_estimate_.insert_or_assign(poses[1], cam_pose.at(i + 1)); + + std::vector matches = + frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + frontend.enforce_bijective_matches(matches); + std::vector feat_cam_poses{cam_pose.at(i), cam_pose.at(i + 1)}; + + std::vector viz_poses{Eigen::Isometry3d(feat_cam_poses[0].matrix()), + Eigen::Isometry3d(feat_cam_poses[1].matrix())}; + std::vector viz_lmks; + for (const cv::DMatch match : matches) { + std::vector feat_kpts; + const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; + feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y); + feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y); + + auto key = std::make_pair(frames[i].id_, kpt_cam0); + if (lmk_id_map.find(key) != lmk_id_map.end()) { + auto id = lmk_id_map.at(key); + std::cout << "good" << std::endl; + // do nothing + // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0); + // feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1); + } else { + std::cerr << "this shouldn't happen right?" << std::endl; + FeatureTrack feature_track(frames[i].id_, kpt_cam0); + feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); + feature_tracks.emplace(lmk_id, feature_track); + lmk_id_map.emplace(key, lmk_id); + lmk_id++; + } + std::cout << "oog" << std::endl; + const gtsam::Symbol symbol_lmk = + gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))); + // if (gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))) != + // gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam1)))) { + // std::cerr << "UH OH" << std::endl; + // } else { + // std::cout << "cool" << std::endl; + // } + lmks.push_back(symbol_lmk); + local_graph_ + .emplace_shared>( + feat_kpts[0], landmark_noise, poses[0], symbol_lmk, K); + local_graph_ + .emplace_shared>( + feat_kpts[1], landmark_noise, poses[1], symbol_lmk, K); + + gtsam::Point3 p_wrld_kpt; + bool triangulate_success = + SFMMvpHelper::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt); + if (triangulate_success) { + local_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt); + viz_lmks.push_back(p_wrld_kpt); + } + if (symbols_landmarks_values_iter.find(symbol_lmk) != + symbols_landmarks_values_iter.end()) { + symbols_landmarks_values_iter[symbol_lmk].push_back(p_wrld_kpt); + } else { + symbols_landmarks_values_iter.emplace(symbol_lmk, + std::vector{p_wrld_kpt}); + } + } + std::cout << "setup complete!" << std::endl; + geometry::viz_scene(viz_poses, viz_lmks, true, true, + "Local Optimization " + std::to_string(i)); + + const gtsam::Values symbols_result_local = SFMMvpHelper::optimize_graph( + local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false); + + for (const gtsam::Symbol &symbol_pose : poses) { + const gtsam::Pose3 T_wrld_cam = symbols_result_local.at(symbol_pose); + symbols_poses_values_iter.at(symbol_pose).push_back(T_wrld_cam); + } + for (const gtsam::Symbol &symbol_lmk : lmks) { + const gtsam::Point3 p_wrld_lmk = symbols_result_local.at(symbol_lmk); + symbols_landmarks_values_iter.at(symbol_lmk).push_back(p_wrld_lmk); + } + } + + std::cout << "\nLocal Optimizations Complete!" << std::endl; + + for (std::pair> sym_pose : symbols_poses_values_iter) { + initial_estimate_.insert_or_assign(sym_pose.first, + SFMMvpHelper::averagePoses(sym_pose.second)); + } + for (std::pair> sym_lmk : + symbols_landmarks_values_iter) { + initial_estimate_.insert_or_assign(sym_lmk.first, + SFMMvpHelper::averagePoints(sym_lmk.second)); + } + + const gtsam::Values result = + SFMMvpHelper::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks); + std::cout << "about to visualize result" << std::endl; + SFMMvpHelper::graph_values(result, "Result", symbols_pose, symbols_landmarks); +} -// const gtsam::Values result_values = sfm.get_structure_result(); -// sfm.graph_values(result_values, "optimized values"); } // namespace robot::experimental::learn_descriptors \ No newline at end of file From e482d5216ad3c81aa9ec81854e6a00382603d8d0 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Fri, 25 Apr 2025 13:48:55 -0400 Subject: [PATCH 20/45] added matching that chooses best bijective that are twice as good as second match. working on filtering spatial points, pipeline currently broken --- experimental/learn_descriptors/BUILD | 4 +- .../learn_descriptors/backend_test.cc | 2 +- experimental/learn_descriptors/frontend.cc | 45 +++++++++ experimental/learn_descriptors/frontend.hh | 1 + .../learn_descriptors/frontend_definitions.hh | 4 +- experimental/learn_descriptors/sfm_mvp.cc | 98 ++++++++++++++----- 6 files changed, 122 insertions(+), 32 deletions(-) diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index 666668ad0..254f6c36b 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -266,8 +266,8 @@ cc_test( "@com_google_googletest//:gtest_main", "@opencv//:opencv", ":backend", - ":spatial_scene_test_cube", - ":feature_manager" + ":feature_manager", + ":spatial_test_scene_cube" ] ) diff --git a/experimental/learn_descriptors/backend_test.cc b/experimental/learn_descriptors/backend_test.cc index 94d7b5067..dfe413b03 100644 --- a/experimental/learn_descriptors/backend_test.cc +++ b/experimental/learn_descriptors/backend_test.cc @@ -2,7 +2,7 @@ #include "experimental/learn_descriptors/feature_manager.hh" #include "gtest/gtest.h" -#include "spatial_scene_test_cube.hh" +#include "experimental/learn_descriptors/spatial_test_scene_cube.hh" namespace robot::experimental::learn_descriptors { TEST(BackendTest, cube) { diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc index 46368a31e..3ef2c408e 100644 --- a/experimental/learn_descriptors/frontend.cc +++ b/experimental/learn_descriptors/frontend.cc @@ -108,6 +108,51 @@ void Frontend::enforce_bijective_matches(std::vector &matches) { matches.end()); } +void Frontend::enforce_bijective_buffer_matches(std::vector &matches) { + // Store best and second-best matches per query_idx + std::unordered_map> best_two_query_matches; + for (const auto &match : matches) { + int query_idx = match.queryIdx; + float dist = match.distance; + + if (!best_two_query_matches.count(query_idx)) { + best_two_query_matches[query_idx] = {match, std::numeric_limits::max()}; + } else { + auto &[best_match, second_best_dist] = best_two_query_matches[query_idx]; + if (dist < best_match.distance) { + second_best_dist = best_match.distance; + best_match = match; + } else if (dist < second_best_dist) { + second_best_dist = dist; + } + } + } + + // Keep matches where best < 0.5 * second_best + std::vector filtered; + for (const auto &[query_idx, pair] : best_two_query_matches) { + const auto &[best_match, second_best_dist] = pair; + if (best_match.distance < 0.5f * second_best_dist) { + filtered.push_back(best_match); + } + } + + // Enforce bijection: each train_idx should also be best for only one query_idx + std::unordered_map best_train_match; + for (const auto &match : filtered) { + int train_idx = match.trainIdx; + if (!best_train_match.count(train_idx) || + match.distance < best_train_match[train_idx].distance) { + best_train_match[train_idx] = match; + } + } + + matches.clear(); + for (const auto &[train_idx, match] : best_train_match) { + matches.push_back(match); + } +} + bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, std::vector &matches_out) const { if (matcher_type_ != MatcherType::BRUTE_FORCE) { diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh index 9b720cc06..70baf55bd 100644 --- a/experimental/learn_descriptors/frontend.hh +++ b/experimental/learn_descriptors/frontend.hh @@ -24,6 +24,7 @@ class Frontend { const cv::Mat &descriptors2) const; static void threshold_matches(std::vector &matches, float dist_threshhold); static void enforce_bijective_matches(std::vector &matches); + static void enforce_bijective_buffer_matches(std::vector &matches); static void draw_keypoints(const cv::Mat &img, std::vector keypoints, cv::Mat img_keypoints_out) { cv::drawKeypoints(img, keypoints, img_keypoints_out, cv::Scalar::all(-1), diff --git a/experimental/learn_descriptors/frontend_definitions.hh b/experimental/learn_descriptors/frontend_definitions.hh index 188b91e53..fac8b67b3 100644 --- a/experimental/learn_descriptors/frontend_definitions.hh +++ b/experimental/learn_descriptors/frontend_definitions.hh @@ -10,7 +10,7 @@ namespace std { template <> struct hash> { - size_t operator()(const std::pair& p) const { + size_t operator()(const std::pair &p) const { size_t h1 = std::hash()(p.first); size_t h2 = std::hash()(p.second.x) ^ (std::hash()(p.second.y) << 1); return h1 ^ (h2 << 1); // combine hashes @@ -25,7 +25,7 @@ class FeatureTrack { bool in_ba_graph_; - FeatureTrack(FrameId frame_id, const KeypointCV& px) { obs_.emplace_back(frame_id, px); } + FeatureTrack(FrameId frame_id, const KeypointCV &px) { obs_.emplace_back(frame_id, px); } void print() const { std::cout << "Feature track with cameras: "; diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc index f27c1ede7..85ef7d3d1 100644 --- a/experimental/learn_descriptors/sfm_mvp.cc +++ b/experimental/learn_descriptors/sfm_mvp.cc @@ -157,6 +157,15 @@ class SFMMvpHelper { for (const auto &pt : points) sum += pt; return sum / points.size(); } + + static gtsam::Point3 get_variance(const std::vector &points) { + const gtsam::Point3 mean = SFMMvpHelper::averagePoints(points); + gtsam::Point3 var(0, 0, 0); + for (const gtsam::Point3 &pt : points) { + var += (mean - pt).array().square().matrix(); + } + return var / points.size(); + } }; TEST(SFMMvp, sfm_building_manual_global) { @@ -248,7 +257,8 @@ TEST(SFMMvp, sfm_building_manual_global) { for (size_t i = 0; i < indices.size() - 1; i++) { std::vector matches = frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); - frontend.enforce_bijective_matches(matches); + // frontend.enforce_bijective_matches(matches); + frontend.enforce_bijective_buffer_matches(matches); for (const cv::DMatch match : matches) { const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; @@ -350,6 +360,13 @@ TEST(SFMMvp, sfm_building_manual_global) { TEST(SFMMvp, sfm_building_manual_incremental) { const std::vector indices{60, 80, 100, 120, 140}; + // const std::vector indices = []() { + // std::vector tmp; + // for (int i = 0; i < 200; i += 10) { + // tmp.push_back(i); + // } + // return tmp; + // }(); DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); const symphony_lake_dataset::Survey &survey = survey_vector.get(0); @@ -409,9 +426,6 @@ TEST(SFMMvp, sfm_building_manual_incremental) { cam_pose.emplace(id, T_world_cam_gtsam); cam_isometries.push_back(T_world_cam); - // graph_.emplace_shared>(gtsam::Symbol('x', id), - // T_world_cam_gtsam, pose_noise); - Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix())); std::pair, cv::Mat> kpts_descs = @@ -437,7 +451,7 @@ TEST(SFMMvp, sfm_building_manual_incremental) { for (size_t i = 0; i < indices.size() - 1; i++) { std::vector matches = frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); - frontend.enforce_bijective_matches(matches); + frontend.enforce_bijective_buffer_matches(matches); for (const cv::DMatch match : matches) { const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; @@ -457,40 +471,66 @@ TEST(SFMMvp, sfm_building_manual_incremental) { } } - // populate graph - std::unordered_map> symbols_poses_values_iter; - std::unordered_map> symbols_landmarks_values_iter; - std::vector symbols_pose; - std::vector symbols_landmarks; + // triangulate all of the points + + std::unordered_map lmk_triangulated_map; + std::vector triangulated_lmks; for (const std::pair lmk_feat : feature_tracks) { std::vector feat_cam_poses; std::vector feat_kpts; LandmarkId lmk_id = lmk_feat.first; FeatureTrack feature_track = lmk_feat.second; - const gtsam::Symbol symbol_lmk('l', lmk_id); - symbols_landmarks.push_back(symbol_lmk); gtsam::Point3 p_wrld_kpt(0, 0, 0); for (const std::pair &feat_track : feature_track.obs_) { - initial_estimate_.insert_or_assign(symbol_lmk, gtsam::Point3(0, 0, 0)); - symbols_landmarks_values_iter.emplace( - symbol_lmk, std::vector{gtsam::Point3(0, 0, 0)}); - graph_.emplace_shared>( - gtsam::Point2(feat_track.second.x, feat_track.second.y), landmark_noise, - gtsam::Symbol('x', feat_track.first), symbol_lmk, K); - feat_cam_poses.push_back(cam_pose[feat_track.first]); feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y)); } bool triangulate_success = SFMMvpHelper::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt); if (triangulate_success) { - initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt); - } - if (symbols_landmarks_values_iter.find(symbol_lmk) != symbols_landmarks_values_iter.end()) { - symbols_landmarks_values_iter[symbol_lmk].push_back(p_wrld_kpt); + lmk_triangulated_map.emplace(lmk_id, p_wrld_kpt); + triangulated_lmks.push_back(p_wrld_kpt); } else { + continue; + } + } + // filter points + geometry::viz_scene(std::vector(), triangulated_lmks, true, true, + "Unfiltered points"); + const gtsam::Point3 variance_pts = SFMMvpHelper::get_variance(triangulated_lmks); + const gtsam::Point3 std_dev_pts = variance_pts.array().sqrt().matrix(); + const gtsam::Point3 mean_pts = SFMMvpHelper::averagePoints(triangulated_lmks); + std::unordered_map lmk_triangulated_map_filtered; + std::vector filtered_points; + std::cout << "original var " << variance_pts << std::endl; + for (const std::pair lmk_id_pt : lmk_triangulated_map) { + const gtsam::Point3 dist_mean = (lmk_id_pt.second - mean_pts).array().abs().matrix(); + if ((dist_mean.array() <= (3 * std_dev_pts).array()).all()) { + lmk_triangulated_map_filtered.emplace(lmk_id_pt); + filtered_points.push_back(lmk_id_pt.second); + } + } + std::cout << "filtered variance " << SFMMvpHelper::get_variance(filtered_points) << std::endl; + geometry::viz_scene(std::vector(), filtered_points, true, true, + "Unfiltered points"); + + // add filtered points to graph + std::unordered_map> symbols_poses_values_iter; + std::unordered_map> symbols_landmarks_values_iter; + std::vector symbols_pose; + std::vector symbols_landmarks; + for (const std::pair lmk_id_pt : lmk_triangulated_map_filtered) { + LandmarkId lmk_id = lmk_id_pt.first; + const gtsam::Point3 p_wrld_kpt = lmk_id_pt.second; + FeatureTrack feature_track = feature_tracks.at(lmk_id); + const gtsam::Symbol symbol_lmk('l', lmk_id); + for (const std::pair &feat_track : feature_track.obs_) { + initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt); symbols_landmarks_values_iter.emplace(symbol_lmk, std::vector{p_wrld_kpt}); + graph_.emplace_shared>( + gtsam::Point2(feat_track.second.x, feat_track.second.y), landmark_noise, + gtsam::Symbol('x', feat_track.first), symbol_lmk, K); } } graph_.emplace_shared>(gtsam::Symbol('x', 0), cam_pose.at(0), @@ -512,7 +552,8 @@ TEST(SFMMvp, sfm_building_manual_incremental) { // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_wrld_cam)); } - SFMMvpHelper::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks); + // SFMMvpHelper::graph_values(initial_estimate_, "Confirmation", symbols_pose, + // symbols_landmarks); // do local optimizations and add to iter cache // const int window = 2; @@ -555,6 +596,9 @@ TEST(SFMMvp, sfm_building_manual_incremental) { auto key = std::make_pair(frames[i].id_, kpt_cam0); if (lmk_id_map.find(key) != lmk_id_map.end()) { auto id = lmk_id_map.at(key); + if (lmk_triangulated_map_filtered.find(id) == lmk_triangulated_map_filtered.end()) { + continue; + } std::cout << "good" << std::endl; // do nothing // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0); @@ -600,8 +644,8 @@ TEST(SFMMvp, sfm_building_manual_incremental) { } } std::cout << "setup complete!" << std::endl; - geometry::viz_scene(viz_poses, viz_lmks, true, true, - "Local Optimization " + std::to_string(i)); + // geometry::viz_scene(viz_poses, viz_lmks, true, true, + // "Local Optimization " + std::to_string(i)); const gtsam::Values symbols_result_local = SFMMvpHelper::optimize_graph( local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false); @@ -616,7 +660,7 @@ TEST(SFMMvp, sfm_building_manual_incremental) { } } - std::cout << "\nLocal Optimizations Complete!" << std::endl; + std::cout << "\nLocal Optimizations Complete!\n" << std::endl; for (std::pair> sym_pose : symbols_poses_values_iter) { initial_estimate_.insert_or_assign(sym_pose.first, From 1d03689218c923384c05667199d608ed44d3b24e Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Tue, 24 Jun 2025 17:45:16 -0400 Subject: [PATCH 21/45] make transform names more clear --- .../get_colmap_groundtruth.cc | 2 +- experimental/learn_descriptors/sfm_mvp.cc | 38 ++++++++++--------- .../structure_from_motion_test.cc | 17 +++++---- .../learn_descriptors/symphony_lake_parser.cc | 32 ++++++++-------- .../learn_descriptors/symphony_lake_parser.hh | 13 ++++--- .../symphony_lake_parser_test.cc | 12 +++--- 6 files changed, 60 insertions(+), 54 deletions(-) diff --git a/experimental/learn_descriptors/get_colmap_groundtruth.cc b/experimental/learn_descriptors/get_colmap_groundtruth.cc index 6afc9fff1..7115daf0d 100644 --- a/experimental/learn_descriptors/get_colmap_groundtruth.cc +++ b/experimental/learn_descriptors/get_colmap_groundtruth.cc @@ -54,7 +54,7 @@ int main() { for (int i = 0; i < 718; i++) { symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(i); const Eigen::Isometry3d T_wrld_cam = - data_parser.get_T_world_boat(img_pt) * data_parser.get_T_boat_camera(img_pt); + data_parser.get_world_from_boat(img_pt) * data_parser.get_boat_from_camera(img_pt); ImagePose img_pose; std::ostringstream oss; diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc index 85ef7d3d1..68820b02e 100644 --- a/experimental/learn_descriptors/sfm_mvp.cc +++ b/experimental/learn_descriptors/sfm_mvp.cc @@ -187,12 +187,13 @@ TEST(SFMMvp, sfm_building_manual_global) { cv::Mat K_mat = (cv::Mat_(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1); cv::Mat D_mat = (cv::Mat_(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0)); - // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 - // * T_boat_camera - Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first); - Eigen::Isometry3d T_world_boat0; - T_world_boat0.linear() = T_earth_boat0.linear(); - Eigen::Isometry3d T_world_camera0 = T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first); + // let world be the first boat base recorded. T_world_camera0 = earth_from_boat0 + // * boat_from_camera + Eigen::Isometry3d earth_from_boat0 = DataParser::get_world_from_boat(img_pt_first); + Eigen::Isometry3d world_from_boat0; + world_from_boat0.linear() = earth_from_boat0.linear(); + Eigen::Isometry3d T_world_camera0 = + world_from_boat0 * DataParser::get_boat_from_camera(img_pt_first); // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, // gtsam::Pose3(T_world_camera0.matrix())); Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN); @@ -222,9 +223,9 @@ TEST(SFMMvp, sfm_building_manual_global) { cv::undistort(img, img_undistorted, K_mat, D_mat); const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); - Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt); - T_world_boat.translation() -= T_earth_boat0.translation(); - Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt); + Eigen::Isometry3d world_from_boat = DataParser::get_world_from_boat(img_pt); + world_from_boat.translation() -= earth_from_boat0.translation(); + Eigen::Isometry3d T_world_cam = world_from_boat * DataParser::get_boat_from_camera(img_pt); gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix()); cam_pose.emplace(id, T_world_cam_gtsam); cam_isometries.push_back(T_world_cam); @@ -384,12 +385,12 @@ TEST(SFMMvp, sfm_building_manual_incremental) { cv::Mat K_mat = (cv::Mat_(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1); cv::Mat D_mat = (cv::Mat_(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0)); - // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 - // * T_boat_camera - Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first); - Eigen::Isometry3d T_world_boat0; - T_world_boat0.linear() = T_earth_boat0.linear(); - Eigen::Isometry3d T_world_camera0 = T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first); + // let world be the first boat base recorded. T_world_camera0 = earth_from_boat0 + Eigen::Isometry3d earth_from_boat0 = DataParser::get_world_from_boat(img_pt_first); + Eigen::Isometry3d world_from_boat0; + world_from_boat0.linear() = earth_from_boat0.linear(); + Eigen::Isometry3d T_world_camera0 = + world_from_boat0 * DataParser::get_boat_from_camera(img_pt_first); // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, // gtsam::Pose3(T_world_camera0.matrix())); Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN); @@ -419,9 +420,9 @@ TEST(SFMMvp, sfm_building_manual_incremental) { cv::undistort(img, img_undistorted, K_mat, D_mat); const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); - Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt); - T_world_boat.translation() -= T_earth_boat0.translation(); - Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt); + Eigen::Isometry3d world_from_boat = DataParser::get_world_from_boat(img_pt); + world_from_boat.translation() -= earth_from_boat0.translation(); + Eigen::Isometry3d T_world_cam = world_from_boat * DataParser::get_boat_from_camera(img_pt); gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix()); cam_pose.emplace(id, T_world_cam_gtsam); cam_isometries.push_back(T_world_cam); @@ -672,6 +673,7 @@ TEST(SFMMvp, sfm_building_manual_incremental) { SFMMvpHelper::averagePoints(sym_lmk.second)); } + // do global optimization const gtsam::Values result = SFMMvpHelper::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks); std::cout << "about to visualize result" << std::endl; diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc index 752daa6a3..cfbad2f4a 100644 --- a/experimental/learn_descriptors/structure_from_motion_test.cc +++ b/experimental/learn_descriptors/structure_from_motion_test.cc @@ -37,8 +37,8 @@ TEST(StructureFromMotiontest, sfm_snippet_small) { // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera // T_earth_boat0 = - Eigen::Isometry3d T_earth_world = DataParser::get_T_world_boat(img_pt_first); - Eigen::Isometry3d T_world_camera0 = DataParser::get_T_boat_camera(img_pt_first); + Eigen::Isometry3d T_earth_world = DataParser::get_world_from_boat(img_pt_first); + Eigen::Isometry3d T_world_camera0 = DataParser::get_boat_from_camera(img_pt_first); StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, gtsam::Pose3(T_world_camera0.matrix())); @@ -47,9 +47,9 @@ TEST(StructureFromMotiontest, sfm_snippet_small) { const cv::Mat img = survey.loadImageByImageIndex(idx); img_vector.push_back(img); const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); - Eigen::Isometry3d T_earth_boat = DataParser::get_T_world_boat(img_pt); + Eigen::Isometry3d T_earth_boat = DataParser::get_world_from_boat(img_pt); Eigen::Isometry3d T_world_boat = T_earth_world.inverse() * T_earth_boat; - Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt); + Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_boat_from_camera(img_pt); sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix())); } @@ -92,10 +92,11 @@ TEST(StructureFromMotiontest, sfm_building) { .finished(); // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera - Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first); + Eigen::Isometry3d T_earth_boat0 = DataParser::get_world_from_boat(img_pt_first); Eigen::Isometry3d T_world_boat0; T_world_boat0.linear() = T_earth_boat0.linear(); - Eigen::Isometry3d T_world_camera0 = T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first); + Eigen::Isometry3d T_world_camera0 = + T_world_boat0 * DataParser::get_boat_from_camera(img_pt_first); StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, gtsam::Pose3(T_world_camera0.matrix())); @@ -103,10 +104,10 @@ TEST(StructureFromMotiontest, sfm_building) { const cv::Mat img = survey.loadImageByImageIndex(idx); const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); - Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt); + Eigen::Isometry3d T_world_boat = DataParser::get_world_from_boat(img_pt); T_world_boat.translation() -= T_earth_boat0.translation(); - Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt); + Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_boat_from_camera(img_pt); sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix())); } diff --git a/experimental/learn_descriptors/symphony_lake_parser.cc b/experimental/learn_descriptors/symphony_lake_parser.cc index 4740deb1e..f06ebd116 100644 --- a/experimental/learn_descriptors/symphony_lake_parser.cc +++ b/experimental/learn_descriptors/symphony_lake_parser.cc @@ -26,11 +26,11 @@ DataParser::DataParser(const std::filesystem::path &image_root_dir, } DataParser::~DataParser() {} -const Eigen::Isometry3d DataParser::get_T_boat_camera( +const Eigen::Isometry3d DataParser::get_boat_from_camera( const symphony_lake_dataset::ImagePoint &img_pt) { - return get_T_boat_camera(img_pt.pan, img_pt.tilt); + return get_boat_from_camera(img_pt.pan, img_pt.tilt); } -const Eigen::Isometry3d DataParser::get_T_boat_camera(double theta_pan, double theta_tilt) { +const Eigen::Isometry3d DataParser::get_boat_from_camera(double theta_pan, double theta_tilt) { Eigen::AngleAxisd R_z(-theta_pan, Eigen::Vector3d::UnitZ()); // pan Eigen::AngleAxisd R_y(-theta_tilt, Eigen::Vector3d::UnitY()); // tilt Eigen::Matrix3d R_boat_gimbal(R_z * R_y); @@ -46,31 +46,31 @@ const Eigen::Isometry3d DataParser::get_T_boat_camera(double theta_pan, double t return T_boat_cam; } -const Eigen::Isometry3d DataParser::get_T_world_gps( +const Eigen::Isometry3d DataParser::get_world_from_gps( const symphony_lake_dataset::ImagePoint &img_pt) { - Eigen::Isometry3d T_world_gps; + Eigen::Isometry3d world_from_gps; // img_pt.theta is in the North East Down frame, not lattitude longitude coords. However, // because of the alignment of the imu and gps, this rotation math holds. - T_world_gps.linear() = Eigen::AngleAxisd(img_pt.theta, Eigen::Vector3d::UnitZ()).matrix(); - T_world_gps.translation() = Eigen::Vector3d(img_pt.x, img_pt.y, 0); - return T_world_gps; + world_from_gps.linear() = Eigen::AngleAxisd(img_pt.theta, Eigen::Vector3d::UnitZ()).matrix(); + world_from_gps.translation() = Eigen::Vector3d(img_pt.x, img_pt.y, 0); + return world_from_gps; } -const Eigen::Isometry3d DataParser::get_T_world_boat( +const Eigen::Isometry3d DataParser::get_world_from_boat( const symphony_lake_dataset::ImagePoint &img_pt) { - // Eigen::Isometry3d T_world_gps; + // Eigen::Isometry3d world_from_gps; // Eigen::Matrix3d R_world_imu = get_R_world_imu(img_pt.theta); // Eigen::Isometry3d T_imu_gps = T_boat_imu.inverse() * T_boat_gps; // Eigen::Matrix3d R_world_gps = R_world_imu * T_imu_gps.linear(); - // T_world_gps.linear() = R_world_gps; - // T_world_gps.translation() = Eigen::Vector3d(img_pt.x, img_pt.y, 0); + // world_from_gps.linear() = R_world_gps; + // world_from_gps.translation() = Eigen::Vector3d(img_pt.x, img_pt.y, 0); - // Eigen::Isometry3d T_world_boat = T_world_gps * T_boat_gps.inverse(); + // Eigen::Isometry3d world_from_boat = world_from_gps * T_boat_gps.inverse(); - Eigen::Isometry3d T_world_gps = get_T_world_gps(img_pt); - Eigen::Isometry3d T_world_boat = T_world_gps * T_boat_gps.inverse(); - return T_world_boat; + Eigen::Isometry3d world_from_gps = get_world_from_gps(img_pt); + Eigen::Isometry3d world_from_boat = world_from_gps * T_boat_gps.inverse(); + return world_from_boat; } } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/symphony_lake_parser.hh b/experimental/learn_descriptors/symphony_lake_parser.hh index 5c6e91b54..536771d06 100644 --- a/experimental/learn_descriptors/symphony_lake_parser.hh +++ b/experimental/learn_descriptors/symphony_lake_parser.hh @@ -30,14 +30,15 @@ class DataParser { // Eigen::Affine3d get_T_world_camera(size_t survey_idx, size_t image_idx, bool use_gps = false, // bool use_compass = false); - static const Eigen::Isometry3d get_T_boat_camera( + static const Eigen::Isometry3d get_boat_from_camera( const symphony_lake_dataset::ImagePoint &img_pt); - static const Eigen::Isometry3d get_T_boat_camera(double theta_pan, double theta_tilt); - static const Eigen::Isometry3d get_T_world_gps(const symphony_lake_dataset::ImagePoint &img_pt); - /// @brief get_T_world_boat assuming z_axis_boat dot z_axis_world ~ -1 + static const Eigen::Isometry3d get_boat_from_camera(double theta_pan, double theta_tilt); + static const Eigen::Isometry3d get_world_from_gps( + const symphony_lake_dataset::ImagePoint &img_pt); + /// @brief get_world_from_boat assuming z_axis_boat dot z_axis_world ~ -1 /// @param img_pt - /// @return T_world_boat - static const Eigen::Isometry3d get_T_world_boat( + /// @return world_from_boat + static const Eigen::Isometry3d get_world_from_boat( const symphony_lake_dataset::ImagePoint &img_pt); const symphony_lake_dataset::SurveyVector &get_surveys() const { return surveys_; }; diff --git a/experimental/learn_descriptors/symphony_lake_parser_test.cc b/experimental/learn_descriptors/symphony_lake_parser_test.cc index 2a800db9b..eb81ad37f 100644 --- a/experimental/learn_descriptors/symphony_lake_parser_test.cc +++ b/experimental/learn_descriptors/symphony_lake_parser_test.cc @@ -74,13 +74,14 @@ TEST(SymphonyLakeParserTest, test_cam_frames) { std::vector cam_frames; // NOTE: the world in these images is east, north, up centered at boat0 translation - Eigen::Vector3d t_world_boat0 = DataParser::get_T_world_boat(image_point_first).translation(); + Eigen::Vector3d t_world_boat0 = + DataParser::get_world_from_boat(image_point_first).translation(); for (size_t i = 0; i < indices.size(); i++) { const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(indices[i]); - Eigen::Isometry3d T_world_boatidx = DataParser::get_T_world_boat(img_pt); + Eigen::Isometry3d T_world_boatidx = DataParser::get_world_from_boat(img_pt); Eigen::Isometry3d T_boatidx_camidx = - DataParser::get_T_boat_camera(img_pt); // current boat to current camera + DataParser::get_boat_from_camera(img_pt); // current boat to current camera Eigen::Isometry3d T_world_camidx = T_world_boatidx * T_boatidx_camidx; T_world_camidx.translation() -= t_world_boat0; @@ -108,12 +109,13 @@ TEST(SymphonyLakeParserTest, test_gps_frames) { std::vector gps_frames; // NOTE: the world in these images is east, north, up centered at boat0 translation - Eigen::Vector3d t_world_boat0 = DataParser::get_T_world_boat(image_point_first).translation(); + Eigen::Vector3d t_world_boat0 = + DataParser::get_world_from_boat(image_point_first).translation(); // Eigen::Vector3d t_world_gps0(image_point_first.x, image_point_first.y, 0); for (size_t i = 0; i < indices.size(); i++) { const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(indices[i]); - Eigen::Isometry3d T_world_gpsidx = DataParser::get_T_world_gps(img_pt); + Eigen::Isometry3d T_world_gpsidx = DataParser::get_world_from_gps(img_pt); T_world_gpsidx.translation() -= t_world_boat0; gps_frames.push_back(T_world_gpsidx); } From 94d5e5e5746d2b538a42ce50b319f4a8c0117804 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Wed, 25 Jun 2025 17:18:21 -0400 Subject: [PATCH 22/45] WIP --- common/geometry/camera.cc | 6 +- common/geometry/camera.hh | 6 +- common/geometry/camera_test.cc | 51 +- experimental/learn_descriptors/BUILD | 21 +- .../learn_descriptors/incremental_sfm_mvp.cc | 676 +++++++++--------- 5 files changed, 408 insertions(+), 352 deletions(-) diff --git a/common/geometry/camera.cc b/common/geometry/camera.cc index b13718570..4994914c9 100644 --- a/common/geometry/camera.cc +++ b/common/geometry/camera.cc @@ -24,9 +24,9 @@ Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel return depth * K.inverse() * Eigen::Vector3d(pixel_inhomog(0), pixel_inhomog(1), 1.); } -Eigen::Isometry3d estimate_c0_c1(const std::vector &kpts0, - const std::vector &kpts1, - const std::vector &matches, const cv::Mat &K) { +Eigen::Isometry3d estimate_c0_from_cam1(const std::vector &kpts0, + const std::vector &kpts1, + const std::vector &matches, const cv::Mat &K) { Eigen::Isometry3d result; std::vector pts1; std::vector pts2; diff --git a/common/geometry/camera.hh b/common/geometry/camera.hh index 82b74c818..2feea1954 100644 --- a/common/geometry/camera.hh +++ b/common/geometry/camera.hh @@ -21,7 +21,7 @@ Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector3d &pixel Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel_inhomog, const double depth); -Eigen::Isometry3d estimate_c0_c1(const std::vector &kpts0, - const std::vector &kpts1, - const std::vector &matches, const cv::Mat &K); +Eigen::Isometry3d estimate_c0_from_cam1(const std::vector &kpts0, + const std::vector &kpts1, + const std::vector &matches, const cv::Mat &K); } // namespace robot::geometry \ No newline at end of file diff --git a/common/geometry/camera_test.cc b/common/geometry/camera_test.cc index 392f876f6..ca2594abf 100644 --- a/common/geometry/camera_test.cc +++ b/common/geometry/camera_test.cc @@ -57,7 +57,7 @@ TEST(CameraTest, test_estimate_pose) { Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); - const int initial_size = p_W_cube.size(); + // const int initial_size = p_W_cube.size(); const Eigen::Vector3d p_world_cube_center(cube_size / 2, cube_size / 2, cube_size / 2); for (const Eigen::Vector3d &point_W_cube : p_W_cube) { p_W_cube.emplace_back(R_new_points * (point_W_cube - p_world_cube_center) + @@ -76,38 +76,37 @@ TEST(CameraTest, test_estimate_pose) { Eigen::Matrix3d K_eig = cv_to_eigen_mat(K); std::vector poses; - Eigen::Matrix3d R_world_cam0( + Eigen::Matrix3d R_world_from_cam0( Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * Eigen::AngleAxisd(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); - Eigen::Isometry3d T_world_cam0 = Eigen::Isometry3d::Identity(); - T_world_cam0.linear() = R_world_cam0; - T_world_cam0.translation() = Eigen::Vector3d(4, cube_size / 2, cube_size / 2); - poses.push_back(T_world_cam0); + Eigen::Isometry3d world_from_cam0 = Eigen::Isometry3d::Identity(); + world_from_cam0.linear() = R_world_from_cam0; + world_from_cam0.translation() = Eigen::Vector3d(4, cube_size / 2, cube_size / 2); + poses.push_back(world_from_cam0); - Eigen::Matrix3d R_world_cam0_to_cam1( + Eigen::Matrix3d R_world_45deg( Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix()); - Eigen::Isometry3d T_world_cam1; - T_world_cam1.linear() = R_world_cam0_to_cam1 * T_world_cam0.linear(); - T_world_cam1.translation() = - R_world_cam0_to_cam1 * (T_world_cam0.translation() - p_world_cube_center) + - p_world_cube_center; - poses.push_back(T_world_cam1); + Eigen::Isometry3d world_from_cam1; + world_from_cam1.linear() = R_world_45deg * world_from_cam0.linear(); + world_from_cam1.translation() = + R_world_45deg * (world_from_cam0.translation() - p_world_cube_center) + p_world_cube_center; + poses.push_back(world_from_cam1); std::vector kpts0; std::vector kpts1; std::vector matches; for (const Eigen::Vector3d &point_W_cube : p_W_cube) { - Eigen::Vector3d p_cam0_cubep = - (T_world_cam0.inverse() * + Eigen::Vector3d p_cube_points_in_cam0 = + (world_from_cam0.inverse() * Eigen::Vector4d(point_W_cube(0), point_W_cube(1), point_W_cube(2), 1.)) .head<3>(); - Eigen::Vector3d p_cam1_cubep = - (T_world_cam1.inverse() * + Eigen::Vector3d p_cube_points_in_cam1 = + (world_from_cam1.inverse() * Eigen::Vector4d(point_W_cube(0), point_W_cube(1), point_W_cube(2), 1.)) .head<3>(); - Eigen::Vector3d pxl_c0_pcube_homog = K_eig * p_cam0_cubep; - Eigen::Vector3d pxl_c1_pcube_homog = K_eig * p_cam1_cubep; + Eigen::Vector3d pxl_c0_pcube_homog = K_eig * p_cube_points_in_cam0; + Eigen::Vector3d pxl_c1_pcube_homog = K_eig * p_cube_points_in_cam1; Eigen::Vector2d pxl_c0_pcube = pxl_c0_pcube_homog.head<2>() / pxl_c0_pcube_homog(2); Eigen::Vector2d pxl_c1_pcube = pxl_c1_pcube_homog.head<2>() / pxl_c1_pcube_homog(2); if (CameraTestHelper::pixel_in_range(pxl_c0_pcube, img_width, img_height) && @@ -118,14 +117,14 @@ TEST(CameraTest, test_estimate_pose) { } } - Eigen::Isometry3d T_cam0_cam1_estimate = estimate_c0_c1(kpts0, kpts1, matches, K); - Eigen::Isometry3d T_cam0_cam1_scale = T_world_cam0.inverse() * T_world_cam1; - T_cam0_cam1_scale.translation() /= T_cam0_cam1_scale.translation().norm(); + Eigen::Isometry3d cam0_from_cam1_estimate = estimate_c0_from_cam1(kpts0, kpts1, matches, K); + Eigen::Isometry3d cam0_from_cam1 = world_from_cam0.inverse() * world_from_cam1; + cam0_from_cam1.translation() /= cam0_from_cam1.translation().norm(); EXPECT_TRUE( - T_cam0_cam1_estimate.translation().isApprox(T_cam0_cam1_scale.translation(), 0.000001)); - EXPECT_TRUE(T_cam0_cam1_estimate.linear().isApprox(T_cam0_cam1_scale.linear(), 0.001)); + cam0_from_cam1_estimate.translation().isApprox(cam0_from_cam1.translation(), 0.000001)); + EXPECT_TRUE(cam0_from_cam1_estimate.linear().isApprox(cam0_from_cam1.linear(), 0.001)); - poses.emplace_back(T_world_cam0 * T_cam0_cam1_estimate); - // viz_scene(poses, p_W_cube); + poses.emplace_back(world_from_cam0 * cam0_from_cam1_estimate); + viz_scene(poses, p_W_cube); } } // namespace robot::geometry \ No newline at end of file diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index a5b1a9e22..d0a748b1b 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -360,6 +360,25 @@ cc_binary( ":frame", ":structure_from_motion_types", ":frontend", - "@boost//:smart_ptr" + "@boost//:smart_ptr", + ] +) + +cc_binary( + name = "incrememtal_sfm_mvp", + srcs = ["incrememtal_sfm_mvp.cc"], + copts = [ + "-Wno-error=unused-parameter", + ], + deps = [ + "@opencv//:opencv", + "@gtsam//:gtsam", + ":frontend_definitions", + ":frontend", + ":four_seasons_parser", + "@eigen", + "//visualization/opencv:opencv_viz", + ":frame", + ":structure_from_motion_types" ] ) \ No newline at end of file diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc index d50d71208..021770c62 100644 --- a/experimental/learn_descriptors/incremental_sfm_mvp.cc +++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc @@ -2,6 +2,7 @@ #include "Eigen/Core" #include "Eigen/Geometry" +#include "experimental/learn_descriptors/four_seasons_parser.hh" #include "experimental/learn_descriptors/frame.hh" #include "experimental/learn_descriptors/frontend.hh" #include "experimental/learn_descriptors/frontend_definitions.hh" @@ -156,327 +157,364 @@ static gtsam::Point3 get_variance(const std::vector &points) { } }; // namespace detail -int main() { +int main(int argc, const char **argv) { + using namespace robot::experimental::learn_descriptors; + // clang-format off + cxxopts::Options options("four_seasons_parser_example", "Demonstrate usage of four_seasons_parser"); + options.add_options() + ("data_dir", "Path to dataset root directory", cxxopts::value()) + ("calibration_dir", "Path to dataset calibration directory", cxxopts::value()) + ("help", "Print usage"); + // clang-format on + + auto args = options.parse(argc, argv); + + const auto check_required = [&](const std::string &opt) { + if (args.count(opt) == 0) { + std::cout << "Missing " << opt << " argument" << std::endl; + std::cout << options.help() << std::endl; + std::exit(1); + } + }; + + if (args.count("help")) { + std::cout << options.help() << std::endl; + return 0; + } + + check_required("data_dir"); + check_required("calibration_dir"); + + const std::filesystem::path path_data = args["data_dir"].as(); + const std::filesystem::path path_calibration = args["calibration_dir"].as(); + FourSeasonsParser parser(path_data, path_calibration); + std::cout << "incremental_sfm_mvp!" << std::endl; - // const std::vector indices{60, 80, 100, 120, 140}; - // // const std::vector indices = []() { - // // std::vector tmp; - // // for (int i = 0; i < 200; i += 10) { - // // tmp.push_back(i); - // // } - // // return tmp; - // // }(); - // DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); - // const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); - // const symphony_lake_dataset::Survey &survey = survey_vector.get(0); - // const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front()); - - // // const size_t img_width = img_pt_first.width, img_height = - // // img_pt_first.height; - // const double fx = img_pt_first.fx, fy = img_pt_first.fy; - // const double cx = img_pt_first.cx, cy = img_pt_first.cy; - // gtsam::Cal3_S2::shared_ptr K = boost::make_shared(fx, fy, 0, cx, cy); - // Eigen::Matrix D = - // (Eigen::Matrix() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2, - // SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3) - // .finished(); - // cv::Mat K_mat = (cv::Mat_(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, - // 1); cv::Mat D_mat = (cv::Mat_(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0)); - - // // let world be the first boat base recorded. T_world_camera0 = earth_from_boat0 - // Eigen::Isometry3d earth_from_boat0 = DataParser::get_world_from_boat(img_pt_first); - // Eigen::Isometry3d world_from_boat0; - // world_from_boat0.linear() = earth_from_boat0.linear(); - // Eigen::Isometry3d T_world_camera0 = - // world_from_boat0 * DataParser::get_boat_from_camera(img_pt_first); - // // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, - // // gtsam::Pose3(T_world_camera0.matrix())); - // Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN); - - // gtsam::Values initial_estimate_; - // gtsam::NonlinearFactorGraph graph_; - - // gtsam::noiseModel::Isotropic::shared_ptr landmark_noise = - // gtsam::noiseModel::Isotropic::Sigma(2, 1.0); - // gtsam::Vector6 prior_sigmas; - // prior_sigmas << 0.04, 0.04, 0.09, 2.1, 2.1, 0.1; - // gtsam::Vector3 gps_sigmas; - // gps_sigmas << 2.1, 2.1, 0.1; - // gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise = - // gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas); - // gtsam::noiseModel::Diagonal::shared_ptr gps_noise = - // gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas); - - // // populate frames and cam_poses - // std::vector frames; - // FrameId id = 0; - // std::unordered_map cam_pose; - // std::vector cam_isometries; - // for (const int &idx : indices) { - // const cv::Mat img = survey.loadImageByImageIndex(idx); - // cv::Mat img_undistorted; - // cv::undistort(img, img_undistorted, K_mat, D_mat); - // const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); - - // Eigen::Isometry3d world_from_boat = DataParser::get_world_from_boat(img_pt); - // world_from_boat.translation() -= earth_from_boat0.translation(); - // Eigen::Isometry3d T_world_cam = world_from_boat * - // DataParser::get_boat_from_camera(img_pt); gtsam::Pose3 - // T_world_cam_gtsam(T_world_cam.matrix()); cam_pose.emplace(id, T_world_cam_gtsam); - // cam_isometries.push_back(T_world_cam); - - // Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix())); - - // std::pair, cv::Mat> kpts_descs = - // frontend.get_keypoints_and_descriptors(img_undistorted); - // KeypointsCV kpts; - // for (const cv::KeyPoint &kpt : kpts_descs.first) { - // kpts.push_back(kpt.pt); - // } - // frame.add_keypoints(kpts); - // frame.assign_descriptors(kpts_descs.second); - - // frames.push_back(frame); - - // id++; - // } - - // geometry::viz_scene(cam_isometries, std::vector()); - - // // populate feature_tracks and lmk_id_map - // FeatureTracks feature_tracks; - // FrameLandmarkIdMap lmk_id_map; - // LandmarkId lmk_id = 0; - // for (size_t i = 0; i < indices.size() - 1; i++) { - // std::vector matches = - // frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); - // frontend.enforce_bijective_buffer_matches(matches); - // for (const cv::DMatch match : matches) { - // const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.`queryIdx]; - // const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; - - // auto key = std::make_pair(frames[i].id_, kpt_cam0); - // if (lmk_id_map.find(key) != lmk_id_map.end()) { - // auto id = lmk_id_map.at(key); - // feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0); - // feature_tracks.at(id).obs_.emplace_back(frames[i + 1].id_, kpt_cam1); - // } else { - // FeatureTrack feature_track(i, kpt_cam0); - // feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); - // feature_tracks.emplace(lmk_id, feature_track); - // lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id); - // lmk_id++; - // } - // } - // } - - // // triangulate all of the points - - // std::unordered_map lmk_triangulated_map; - // std::vector triangulated_lmks; - // for (const std::pair lmk_feat : feature_tracks) { - // std::vector feat_cam_poses; - // std::vector feat_kpts; - // LandmarkId lmk_id = lmk_feat.first; - // FeatureTrack feature_track = lmk_feat.second; - // gtsam::Point3 p_wrld_kpt(0, 0, 0); - // for (const std::pair &feat_track : feature_track.obs_) { - // feat_cam_poses.push_back(cam_pose[feat_track.first]); - // feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y)); - // } - // bool triangulate_success = - // detail::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt); - // if (triangulate_success) { - // lmk_triangulated_map.emplace(lmk_id, p_wrld_kpt); - // triangulated_lmks.push_back(p_wrld_kpt); - // } else { - // continue; - // } - // } - // // filter points - // geometry::viz_scene(std::vector(), triangulated_lmks, true, true, - // "Unfiltered points"); - // const gtsam::Point3 variance_pts = detail::get_variance(triangulated_lmks); - // const gtsam::Point3 std_dev_pts = variance_pts.array().sqrt().matrix(); - // const gtsam::Point3 mean_pts = detail::averagePoints(triangulated_lmks); - // std::unordered_map lmk_triangulated_map_filtered; - // std::vector filtered_points; - // std::cout << "original var " << variance_pts << std::endl; - // for (const std::pair lmk_id_pt : lmk_triangulated_map) { - // const gtsam::Point3 dist_mean = (lmk_id_pt.second - mean_pts).array().abs().matrix(); - // if ((dist_mean.array() <= (3 * std_dev_pts).array()).all()) { - // lmk_triangulated_map_filtered.emplace(lmk_id_pt); - // filtered_points.push_back(lmk_id_pt.second); - // } - // } - // std::cout << "filtered variance " << detail::get_variance(filtered_points) << std::endl; - // geometry::viz_scene(std::vector(), filtered_points, true, true, - // "Unfiltered points"); - - // // add filtered points to graph - // std::unordered_map> symbols_poses_values_iter; - // std::unordered_map> symbols_landmarks_values_iter; - // std::vector symbols_pose; - // std::vector symbols_landmarks; - // for (const std::pair lmk_id_pt : lmk_triangulated_map_filtered) { - // LandmarkId lmk_id = lmk_id_pt.first; - // const gtsam::Point3 p_wrld_kpt = lmk_id_pt.second; - // FeatureTrack feature_track = feature_tracks.at(lmk_id); - // const gtsam::Symbol symbol_lmk('l', lmk_id); - // for (const std::pair &feat_track : feature_track.obs_) { - // initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt); - // symbols_landmarks_values_iter.emplace(symbol_lmk, - // std::vector{p_wrld_kpt}); - // graph_.emplace_shared>( - // gtsam::Point2(feat_track.second.x, feat_track.second.y), landmark_noise, - // gtsam::Symbol('x', feat_track.first), symbol_lmk, K); - // } - // } - // graph_.emplace_shared>(gtsam::Symbol('x', 0), - // cam_pose.at(0), - // prior_pose_noise); - // initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), cam_pose.at(0)); - // symbols_pose.push_back(gtsam::Symbol('x', 0)); - // symbols_poses_values_iter.emplace(gtsam::Symbol('x', 0), - // std::vector{cam_pose.at(0)}); - // for (const std::pair cam_id_pose : cam_pose) { - // if (cam_id_pose.first == 0) { - // continue; - // } - // const gtsam::Symbol key_cam('x', cam_id_pose.first); - // symbols_pose.push_back(key_cam); - // gtsam::Point3 p_wrld_cam = cam_id_pose.second.translation(); - // graph_.emplace_shared(key_cam, p_wrld_cam, gps_noise); - // initial_estimate_.insert_or_assign(key_cam, cam_id_pose.second); - // symbols_poses_values_iter.emplace(key_cam, - // std::vector{cam_id_pose.second}); - // // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_wrld_cam)); - // } - - // // detail::graph_values(initial_estimate_, "Confirmation", symbols_pose, - // // symbols_landmarks); - - // // do local optimizations and add to iter cache - // // const int window = 2; - // std::cout << "length of cam_poses: " << cam_pose.size() << std::endl; - // for (const auto &id_pose : cam_pose) { - // std::cout << "id: " << id_pose.first << "\tpose: " << id_pose.second << std::endl; - // } - // for (size_t i = 0; i < indices.size() - 1; i++) { - // std::cout << "Local optimization " << i << std::endl; - // gtsam::Values local_estimate_; - // gtsam::NonlinearFactorGraph local_graph_; - - // std::vector poses{gtsam::Symbol('x', i), gtsam::Symbol('x', i + 1)}; - // std::vector lmks; - - // local_graph_.emplace_shared>( - // gtsam::PriorFactor(poses[0], cam_pose.at(i), prior_pose_noise)); - // std::cout << "fuck" << std::endl; - // local_graph_.emplace_shared>( - // gtsam::PriorFactor(poses[1], cam_pose.at(i + 1), prior_pose_noise)); - // std::cout << "mega fuck" << std::endl; - // local_estimate_.insert_or_assign(poses[0], cam_pose.at(i)); - // local_estimate_.insert_or_assign(poses[1], cam_pose.at(i + 1)); - - // std::vector matches = - // frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); - // frontend.enforce_bijective_matches(matches); - // std::vector feat_cam_poses{cam_pose.at(i), cam_pose.at(i + 1)}; - - // std::vector viz_poses{Eigen::Isometry3d(feat_cam_poses[0].matrix()), - // Eigen::Isometry3d(feat_cam_poses[1].matrix())}; - // std::vector viz_lmks; - // for (const cv::DMatch match : matches) { - // std::vector feat_kpts; - // const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; - // const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; - // feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y); - // feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y); - - // auto key = std::make_pair(frames[i].id_, kpt_cam0); - // if (lmk_id_map.find(key) != lmk_id_map.end()) { - // auto id = lmk_id_map.at(key); - // if (lmk_triangulated_map_filtered.find(id) == - // lmk_triangulated_map_filtered.end()) { - // continue; - // } - // std::cout << "good" << std::endl; - // // do nothing - // // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0); - // // feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1); - // } else { - // std::cerr << "this shouldn't happen right?" << std::endl; - // FeatureTrack feature_track(frames[i].id_, kpt_cam0); - // feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); - // feature_tracks.emplace(lmk_id, feature_track); - // lmk_id_map.emplace(key, lmk_id); - // lmk_id++; - // } - // std::cout << "oog" << std::endl; - // const gtsam::Symbol symbol_lmk = - // gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))); - // // if (gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))) != - // // gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam1)))) { - // // std::cerr << "UH OH" << std::endl; - // // } else { - // // std::cout << "cool" << std::endl; - // // } - // lmks.push_back(symbol_lmk); - // local_graph_ - // .emplace_shared>( - // feat_kpts[0], landmark_noise, poses[0], symbol_lmk, K); - // local_graph_ - // .emplace_shared>( - // feat_kpts[1], landmark_noise, poses[1], symbol_lmk, K); - - // gtsam::Point3 p_wrld_kpt; - // bool triangulate_success = - // detail::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt); - // if (triangulate_success) { - // local_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt); - // viz_lmks.push_back(p_wrld_kpt); - // } - // if (symbols_landmarks_values_iter.find(symbol_lmk) != - // symbols_landmarks_values_iter.end()) { - // symbols_landmarks_values_iter[symbol_lmk].push_back(p_wrld_kpt); - // } else { - // symbols_landmarks_values_iter.emplace(symbol_lmk, - // std::vector{p_wrld_kpt}); - // } + const std::vector indices{100, 120, 140, 160, 180}; + // const std::vector indices = []() { + // std::vector tmp; + // for (int i = 0; i < 200; i += 10) { + // tmp.push_back(i); // } - // std::cout << "setup complete!" << std::endl; - // // geometry::viz_scene(viz_poses, viz_lmks, true, true, - // // "Local Optimization " + std::to_string(i)); + // return tmp; + // }(); + // FourSeasonsParser parser() + // const size_t img_width = img_pt_first.width, img_height = + // img_pt_first.height; + const FourSeasonsParser::CameraCalibrationFisheye cal_parser_left_cam = + parser.camera_calibration(); + gtsam::Cal3_S2::shared_ptr K = + boost::make_shared(cal_parser_left_cam.fx, cal_parser_left_cam.fy, 0, + cal_parser_left_cam.cx, cal_parser_left_cam.cy); + cv::Mat K_mat = (cv::Mat_(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1); + cv::Mat D_mat = (cv::Mat_(4, 1) << cal_parser_left_cam.k1, cal_parser_left_cam.k2, + cal_parser_left_cam.k3, cal_parser_left_cam.k4); + + // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, + // gtsam::Pose3(T_world_camera0.matrix())); + Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN); + + gtsam::Values initial_estimate_; + gtsam::NonlinearFactorGraph graph_; + + gtsam::noiseModel::Isotropic::shared_ptr landmark_noise = + gtsam::noiseModel::Isotropic::Sigma(2, 1.0); + gtsam::Vector6 prior_sigmas; + prior_sigmas << 0.04, 0.04, 0.09, 2.1, 2.1, 0.1; + gtsam::Vector3 gps_sigmas; + gps_sigmas << 2.1, 2.1, 0.1; + gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise = + gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas); + gtsam::noiseModel::Diagonal::shared_ptr gps_noise = + gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas); + + // populate frames and cam_poses + std::vector frames; + FrameId id = 0; + std::unordered_map poses_cam_init; + std::vector cam_references; + Eigen::Matrix4d scale_mat_reference = Eigen::Matrix4d::Identity(); + scale_mat_reference(0, 0) = scale_mat_reference(1, 1) = scale_mat_reference(2, 2) = + parser.gnss_scale(); + for (const int &idx : indices) { + const cv::Mat img = survey.loadImageByImageIndex(idx); + cv::Mat img_undistorted; + cv::fisheye::undistort(img, img_undistorted, K_mat, D_mat); + Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix())); + + std::pair, cv::Mat> kpts_descs = + frontend.get_keypoints_and_descriptors(img_undistorted); + KeypointsCV kpts; + for (const cv::KeyPoint &kpt : kpts_descs.first) { + kpts.push_back(kpt.pt); + } + frame.add_keypoints(kpts); + frame.assign_descriptors(kpts_descs.second); + + frames.push_back(frame); + + const ImagePoint img_pt = parser.get_image_point(idx); + + Eigen::Isometry3d world_from_cam; + if (id == 0) { + // for now we will use an informative prior based on reference + world_from_cam = Eigen::Isometry3d((parser.S_from_AS()->matrix() * scale_mat_reference * + img_pt.AS_w_from_gnss_cam->matrix())); + } else if (img_pt.gps_gcs) { + const Eigen::Vector3d gps_gcs( + img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude, + img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0); + const Eigen::Vector3d p_gps_in_ECEF = parser.ECEF_from_gcs(gps_gcs); + world_from_cam.translation() = + (parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix() * p_gps_in_ECEF; + + Eigen:: + } + poses_cam_init.emplace(id, world_from_cam.matrix()); + + if (img_pt.AS_w_from_gnss_cam) { + cam_references.push_back( + Eigen::Isometry3d((parser.S_from_AS()->matrix() * scale_mat_reference * + img_pt.AS_w_from_gnss_cam->matrix()))); + } else { + std::clog << "Warning: no reference data at img_pt " << idx << std::endl; + } - // const gtsam::Values symbols_result_local = detail::optimize_graph( - // local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false); + id++; + } - // for (const gtsam::Symbol &symbol_pose : poses) { - // const gtsam::Pose3 T_wrld_cam = symbols_result_local.at(symbol_pose); - // symbols_poses_values_iter.at(symbol_pose).push_back(T_wrld_cam); - // } - // for (const gtsam::Symbol &symbol_lmk : lmks) { - // const gtsam::Point3 p_wrld_lmk = symbols_result_local.at(symbol_lmk); - // symbols_landmarks_values_iter.at(symbol_lmk).push_back(p_wrld_lmk); - // } - // } - - // std::cout << "\nLocal Optimizations Complete!\n" << std::endl; - - // for (std::pair> sym_pose : - // symbols_poses_values_iter) { - // initial_estimate_.insert_or_assign(sym_pose.first, - // detail::averagePoses(sym_pose.second)); - // } - // for (std::pair> sym_lmk : - // symbols_landmarks_values_iter) { - // initial_estimate_.insert_or_assign(sym_lmk.first, detail::averagePoints(sym_lmk.second)); - // } - - // // do global optimization - // const gtsam::Values result = - // detail::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks); - // std::cout << "about to visualize result" << std::endl; - // detail::graph_values(result, "Result", symbols_pose, symbols_landmarks); + geometry::viz_scene(cam_references, std::vector(), true, true, "References"); + + // populate feature_tracks and lmk_id_map + // TODO: exhaustively get matches + FeatureTracks feature_tracks; + FrameLandmarkIdMap lmk_id_map; + LandmarkId lmk_id = 0; + for (size_t i = 0; i < indices.size() - 1; i++) { + std::vector matches = + frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + frontend.enforce_bijective_buffer_matches(matches); + for (const cv::DMatch match : matches) { + const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.`queryIdx]; + const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; + + auto key = std::make_pair(frames[i].id_, kpt_cam0); + if (lmk_id_map.find(key) != lmk_id_map.end()) { + auto id = lmk_id_map.at(key); + feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0); + feature_tracks.at(id).obs_.emplace_back(frames[i + 1].id_, kpt_cam1); + } else { + FeatureTrack feature_track(i, kpt_cam0); + feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); + feature_tracks.emplace(lmk_id, feature_track); + lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id); + lmk_id++; + } + } + } + + // triangulate all of the points + + std::unordered_map lmk_triangulated_map; + std::vector triangulated_lmks; + for (const std::pair lmk_feat : feature_tracks) { + std::vector feat_cam_poses; + std::vector feat_kpts; + LandmarkId lmk_id = lmk_feat.first; + FeatureTrack feature_track = lmk_feat.second; + gtsam::Point3 p_wrld_kpt(0, 0, 0); + for (const std::pair &feat_track : feature_track.obs_) { + feat_cam_poses.push_back(poses_cam_init[feat_track.first]); + feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y)); + } + bool triangulate_success = + detail::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt); + if (triangulate_success) { + lmk_triangulated_map.emplace(lmk_id, p_wrld_kpt); + triangulated_lmks.push_back(p_wrld_kpt); + } else { + continue; + } + } + // filter points + geometry::viz_scene(std::vector(), triangulated_lmks, true, true, + "Unfiltered points"); + const gtsam::Point3 variance_pts = detail::get_variance(triangulated_lmks); + const gtsam::Point3 std_dev_pts = variance_pts.array().sqrt().matrix(); + const gtsam::Point3 mean_pts = detail::averagePoints(triangulated_lmks); + std::unordered_map lmk_triangulated_map_filtered; + std::vector filtered_points; + std::cout << "original var " << variance_pts << std::endl; + for (const std::pair lmk_id_pt : lmk_triangulated_map) { + const gtsam::Point3 dist_mean = (lmk_id_pt.second - mean_pts).array().abs().matrix(); + if ((dist_mean.array() <= (3 * std_dev_pts).array()).all()) { + lmk_triangulated_map_filtered.emplace(lmk_id_pt); + filtered_points.push_back(lmk_id_pt.second); + } + } + std::cout << "filtered variance " << detail::get_variance(filtered_points) << std::endl; + geometry::viz_scene(std::vector(), filtered_points, true, true, + "Unfiltered points"); + + // add filtered points to graph + std::unordered_map> symbols_poses_values_iter; + std::unordered_map> symbols_landmarks_values_iter; + std::vector symbols_pose; + std::vector symbols_landmarks; + for (const std::pair lmk_id_pt : lmk_triangulated_map_filtered) { + LandmarkId lmk_id = lmk_id_pt.first; + const gtsam::Point3 p_wrld_kpt = lmk_id_pt.second; + FeatureTrack feature_track = feature_tracks.at(lmk_id); + const gtsam::Symbol symbol_lmk('l', lmk_id); + for (const std::pair &feat_track : feature_track.obs_) { + initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt); + symbols_landmarks_values_iter.emplace(symbol_lmk, + std::vector{p_wrld_kpt}); + graph_.emplace_shared>( + gtsam::Point2(feat_track.second.x, feat_track.second.y), landmark_noise, + gtsam::Symbol('x', feat_track.first), symbol_lmk, K); + } + } + graph_.emplace_shared>(gtsam::Symbol('x', 0), + poses_cam_init.at(0), prior_pose_noise); + initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), poses_cam_init.at(0)); + symbols_pose.push_back(gtsam::Symbol('x', 0)); + symbols_poses_values_iter.emplace(gtsam::Symbol('x', 0), + std::vector{poses_cam_init.at(0)}); + for (const std::pair cam_id_pose : poses_cam_init) { + if (cam_id_pose.first == 0) { + continue; + } + const gtsam::Symbol key_cam('x', cam_id_pose.first); + symbols_pose.push_back(key_cam); + gtsam::Point3 p_wrld_cam = cam_id_pose.second.translation(); + graph_.emplace_shared(key_cam, p_wrld_cam, gps_noise); + initial_estimate_.insert_or_assign(key_cam, cam_id_pose.second); + symbols_poses_values_iter.emplace(key_cam, std::vector{cam_id_pose.second}); + // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_wrld_cam)); + } + + // detail::graph_values(initial_estimate_, "Confirmation", symbols_pose, + // symbols_landmarks); + + // do local optimizations and add to iter cache + // const int window = 2; + std::cout << "length of cam_poses: " << poses_cam_init.size() << std::endl; + for (const auto &id_pose : poses_cam_init) { + std::cout << "id: " << id_pose.first << "\tpose: " << id_pose.second << std::endl; + } + for (size_t i = 0; i < indices.size() - 1; i++) { + std::cout << "Local optimization " << i << std::endl; + gtsam::Values local_estimate_; + gtsam::NonlinearFactorGraph local_graph_; + + std::vector poses{gtsam::Symbol('x', i), gtsam::Symbol('x', i + 1)}; + std::vector lmks; + + local_graph_.emplace_shared>( + gtsam::PriorFactor(poses[0], poses_cam_init.at(i), prior_pose_noise)); + std::cout << "fuck" << std::endl; + local_graph_.emplace_shared>( + gtsam::PriorFactor(poses[1], poses_cam_init.at(i + 1), prior_pose_noise)); + std::cout << "mega fuck" << std::endl; + local_estimate_.insert_or_assign(poses[0], poses_cam_init.at(i)); + local_estimate_.insert_or_assign(poses[1], poses_cam_init.at(i + 1)); + + std::vector matches = + frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + frontend.enforce_bijective_matches(matches); + std::vector feat_cam_poses{poses_cam_init.at(i), poses_cam_init.at(i + 1)}; + + std::vector viz_poses{Eigen::Isometry3d(feat_cam_poses[0].matrix()), + Eigen::Isometry3d(feat_cam_poses[1].matrix())}; + std::vector viz_lmks; + for (const cv::DMatch match : matches) { + std::vector feat_kpts; + const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; + feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y); + feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y); + + auto key = std::make_pair(frames[i].id_, kpt_cam0); + if (lmk_id_map.find(key) != lmk_id_map.end()) { + auto id = lmk_id_map.at(key); + if (lmk_triangulated_map_filtered.find(id) == lmk_triangulated_map_filtered.end()) { + continue; + } + std::cout << "good" << std::endl; + // do nothing + // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0); + // feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1); + } else { + std::cerr << "this shouldn't happen right?" << std::endl; + FeatureTrack feature_track(frames[i].id_, kpt_cam0); + feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); + feature_tracks.emplace(lmk_id, feature_track); + lmk_id_map.emplace(key, lmk_id); + lmk_id++; + } + std::cout << "oog" << std::endl; + const gtsam::Symbol symbol_lmk = + gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))); + // if (gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))) != + // gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam1)))) { + // std::cerr << "UH OH" << std::endl; + // } else { + // std::cout << "cool" << std::endl; + // } + lmks.push_back(symbol_lmk); + local_graph_ + .emplace_shared>( + feat_kpts[0], landmark_noise, poses[0], symbol_lmk, K); + local_graph_ + .emplace_shared>( + feat_kpts[1], landmark_noise, poses[1], symbol_lmk, K); + + gtsam::Point3 p_wrld_kpt; + bool triangulate_success = + detail::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt); + if (triangulate_success) { + local_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt); + viz_lmks.push_back(p_wrld_kpt); + } + if (symbols_landmarks_values_iter.find(symbol_lmk) != + symbols_landmarks_values_iter.end()) { + symbols_landmarks_values_iter[symbol_lmk].push_back(p_wrld_kpt); + } else { + symbols_landmarks_values_iter.emplace(symbol_lmk, + std::vector{p_wrld_kpt}); + } + } + std::cout << "setup complete!" << std::endl; + // geometry::viz_scene(viz_poses, viz_lmks, true, true, + // "Local Optimization " + std::to_string(i)); + + const gtsam::Values symbols_result_local = detail::optimize_graph( + local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false); + + for (const gtsam::Symbol &symbol_pose : poses) { + const gtsam::Pose3 T_wrld_cam = symbols_result_local.at(symbol_pose); + symbols_poses_values_iter.at(symbol_pose).push_back(T_wrld_cam); + } + for (const gtsam::Symbol &symbol_lmk : lmks) { + const gtsam::Point3 p_wrld_lmk = symbols_result_local.at(symbol_lmk); + symbols_landmarks_values_iter.at(symbol_lmk).push_back(p_wrld_lmk); + } + } + + std::cout << "\nLocal Optimizations Complete!\n" << std::endl; + + for (std::pair> sym_pose : symbols_poses_values_iter) { + initial_estimate_.insert_or_assign(sym_pose.first, detail::averagePoses(sym_pose.second)); + } + for (std::pair> sym_lmk : + symbols_landmarks_values_iter) { + initial_estimate_.insert_or_assign(sym_lmk.first, detail::averagePoints(sym_lmk.second)); + } + + // do global optimization + const gtsam::Values result = + detail::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks); + std::cout << "about to visualize result" << std::endl; + detail::graph_values(result, "Result", symbols_pose, symbols_landmarks); } \ No newline at end of file From 32052d2d5accc03c23389242e1335a1d17ccab3a Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Thu, 26 Jun 2025 17:20:44 -0400 Subject: [PATCH 23/45] WIP. pipeline code first pass, optimization is failing --- experimental/learn_descriptors/BUILD | 26 +- .../learn_descriptors/four_seasons_parser.cc | 28 +- .../four_seasons_parser_example.cc | 2 +- .../four_seasons_parser_example_viz.cc | 2 +- experimental/learn_descriptors/frame.hh | 6 +- .../learn_descriptors/frontend_test.cc | 41 +- .../learn_descriptors/incremental_sfm_mvp.cc | 508 ++++++++++-------- 7 files changed, 350 insertions(+), 263 deletions(-) diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index d0a748b1b..0c22053f8 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -353,32 +353,14 @@ cc_binary( deps = [ ":four_seasons_parser", "@eigen", - "@opencv//:opencv", - "@gtsam//:gtsam", + "@opencv", + "@gtsam", "//visualization/opencv:opencv_viz", ":spatial_test_scene_cube", ":frame", ":structure_from_motion_types", ":frontend", - "@boost//:smart_ptr", + "@boost//:smart_ptr", + "@cxxopts", ] ) - -cc_binary( - name = "incrememtal_sfm_mvp", - srcs = ["incrememtal_sfm_mvp.cc"], - copts = [ - "-Wno-error=unused-parameter", - ], - deps = [ - "@opencv//:opencv", - "@gtsam//:gtsam", - ":frontend_definitions", - ":frontend", - ":four_seasons_parser", - "@eigen", - "//visualization/opencv:opencv_viz", - ":frame", - ":structure_from_motion_types" - ] -) \ No newline at end of file diff --git a/experimental/learn_descriptors/four_seasons_parser.cc b/experimental/learn_descriptors/four_seasons_parser.cc index 4d7d91b21..75886a552 100644 --- a/experimental/learn_descriptors/four_seasons_parser.cc +++ b/experimental/learn_descriptors/four_seasons_parser.cc @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,8 @@ #include "nmea/message/rmc.hpp" #include "nmea/sentence.hpp" +#define IMU_HZ 30.0 + namespace robot::experimental::learn_descriptors { using namespace detail; FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, @@ -44,7 +47,8 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, gps_parser_help::TimeGPSList gps_time_list = gps_parser_help::create_gps_time_data_map(path_gps); - const double imu_hz = 30.0; + // std::cout << "imu dt in nanoseconds: " << std::to_string((1.0 / IMU_HZ) * 1e9) << std::endl; + size_t id = 0; size_t gps_idx = 0; for (const std::pair>& pair_time_data : img_time_list) { @@ -97,12 +101,25 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, << std::endl; } while (gps_idx < gps_time_list.size() && time_key > gps_time_list[gps_idx].first && - time_key - gps_time_list[gps_idx].first > (1.0 / imu_hz) * 1e9) { + time_key - gps_time_list[gps_idx].first > (1.0 / IMU_HZ) * 1e9) { gps_idx++; + // if (gps_idx < 200) { + // std::stringstream ss; + // ss << "uh oh: " << gps_idx; + // if (time_key > gps_time_list[gps_idx].first) { + // ss << "\ttime_key - gps_time: " << (time_key - gps_time_list[gps_idx].first); + // } else { + // ss << "\tgps_time - time_key: " << (gps_time_list[gps_idx].first - time_key); + // } + // std::cout << ss.str() << std::endl; + // } } // find the closest gps point whose time is before the current image capture time if (gps_idx < gps_time_list.size() && - time_key - gps_time_list[gps_idx].first <= (1.0 / imu_hz) * 1e9) { + time_key - gps_time_list[gps_idx].first <= (1.0 / IMU_HZ) * 1e9) { img_pt.gps_gcs = gps_time_list[gps_idx].second; + // std::cout << "adding gps data at img_pt: " << img_pt.id << " with gps_idx: " << + // gps_idx + // << std::endl; gps_idx++; } img_pt_vector_.push_back(img_pt); @@ -313,7 +330,10 @@ TimeGPSList create_gps_time_data_map(const std::filesystem::path& path_gps) { gps_data.longitude = gga.longitude.get(); if (gga.altitude.exists()) gps_data.altitude = gga.altitude.get(); if (gga.utc.get() == time_of_day_last) { // GGA messages for this dataset come - // after RMC messages + // after RMC messages + // std::cout << "adding gps data with time of day: " << gga.utc.get() + // << " and unix time: " + // << gps_utc_to_unix_time(date_last, gga.utc.get()) << std::endl; time_map_gps.push_back( std::make_pair(gps_utc_to_unix_time(date_last, gga.utc.get()), gps_data)); } diff --git a/experimental/learn_descriptors/four_seasons_parser_example.cc b/experimental/learn_descriptors/four_seasons_parser_example.cc index 0b5cadf7f..48c496579 100644 --- a/experimental/learn_descriptors/four_seasons_parser_example.cc +++ b/experimental/learn_descriptors/four_seasons_parser_example.cc @@ -59,7 +59,7 @@ int main(int argc, const char** argv) { img_first_and_last); cv::imshow("First + Last Images", img_first_and_last); - for (size_t i = 0; i < parser.num_images(); i += 99) { + for (size_t i = 0; i < parser.num_images(); i += 1) { cv::Mat img = parser.load_image(i); const std::string img_str = "img " + std::to_string(i); cv::putText(img, img_str, cv::Point(10, 20), cv::FONT_HERSHEY_SIMPLEX, 0.5, diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc index 6b6954170..f0e5c5e5c 100644 --- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc +++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -10,6 +9,7 @@ #include "common/check.hh" #include "cxxopts.hpp" #include "experimental/learn_descriptors/four_seasons_parser.hh" +#include "opencv2/opencv.hpp" #include "visualization/opencv/opencv_viz.hh" namespace lrn_desc = robot::experimental::learn_descriptors; diff --git a/experimental/learn_descriptors/frame.hh b/experimental/learn_descriptors/frame.hh index 720aa1f59..5668da713 100644 --- a/experimental/learn_descriptors/frame.hh +++ b/experimental/learn_descriptors/frame.hh @@ -1,4 +1,6 @@ #pragma once +#include + #include "experimental/learn_descriptors/frontend_definitions.hh" #include "gtsam/geometry/Cal3_S2.h" #include "gtsam/geometry/Pose3.h" @@ -10,6 +12,8 @@ class Frame { Frame(const FrameId& id, const cv::Mat& img_undistorted, const gtsam::Cal3_S2::shared_ptr& K, const gtsam::Pose3& T_wrld_grnd_truth) : id_(id), img_(img_undistorted), K_(K), groundtruth_(T_wrld_grnd_truth) {} + Frame(const FrameId& id, const cv::Mat& img_undistorted, const gtsam::Cal3_S2::shared_ptr& K) + : id_(id), img_(img_undistorted), K_(K) {} void add_keypoints(const KeypointsCV& kpts); void assign_descriptors(const cv::Mat& descriptors); @@ -22,6 +26,6 @@ class Frame { gtsam::Cal3_S2::shared_ptr K_; KeypointsCV kpts_; cv::Mat descriptors_; - gtsam::Pose3 groundtruth_; + std::optional groundtruth_; }; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/frontend_test.cc b/experimental/learn_descriptors/frontend_test.cc index 8ab092d3e..d90142e3c 100644 --- a/experimental/learn_descriptors/frontend_test.cc +++ b/experimental/learn_descriptors/frontend_test.cc @@ -47,10 +47,10 @@ TEST(FrontendTest, pipeline_sweep) { cv::warpAffine(image_1, image_1, rotation_matrix, image_1.size()); cv::warpAffine(image_1, image_2, translation_mat, image_1.size()); - // cv::Mat img_test_disp; - // cv::hconcat(image_1, image_2, img_test_disp); - // cv::imshow("Test", img_test_disp); - // cv::waitKey(1000); + cv::Mat img_test_disp; + cv::hconcat(image_1, image_2, img_test_disp); + cv::imshow("Test", img_test_disp); + cv::waitKey(1000); Frontend::ExtractorType extractor_types[2] = {Frontend::ExtractorType::SIFT, Frontend::ExtractorType::ORB}; @@ -64,11 +64,11 @@ TEST(FrontendTest, pipeline_sweep) { std::vector matches; cv::Mat img_keypoints_out_1(height, width, CV_8UC3), img_keypoints_out_2(height, width, CV_8UC3), img_matches_out(height, 2 * width, CV_8UC3); - // cv::Mat img_display_test; + cv::Mat img_display_test; for (Frontend::ExtractorType extractor_type : extractor_types) { for (Frontend::MatcherType matcher_type : matcher_types) { - // printf("started frontend combination: (%d, %d)\n", static_cast(extractor_type), - // static_cast(matcher_type)); + printf("started frontend combination: (%d, %d)\n", static_cast(extractor_type), + static_cast(matcher_type)); try { frontend = Frontend(extractor_type, matcher_type); } catch (const std::invalid_argument &e) { @@ -85,20 +85,19 @@ TEST(FrontendTest, pipeline_sweep) { img_keypoints_out_2); frontend.draw_matches(image_1, keypoints_descriptors_pair_1.first, image_2, keypoints_descriptors_pair_2.first, matches, img_matches_out); - // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test); - // cv::vconcat(img_display_test, img_matches_out, img_display_test); - // std::stringstream text; - // text << "Extractor " << static_cast(extractor_type) << ", matcher " - // << static_cast(matcher_type); - // cv::putText(img_display_test, text.str(), cv::Point(20, height - 50), - // cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2, cv::LINE_AA); - // cv::imshow("Keypoints and Matches Output.", img_display_test); - // std::cout << "Hold spacebar to pause." << std::endl; - // while (cv::waitKey(1000) == 32) { - // } - // printf("completed frontend combination: (%d, %d)\n", - // static_cast(extractor_type), - // static_cast(matcher_type)); + cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test); + cv::vconcat(img_display_test, img_matches_out, img_display_test); + std::stringstream text; + text << "Extractor " << static_cast(extractor_type) << ", matcher " + << static_cast(matcher_type); + cv::putText(img_display_test, text.str(), cv::Point(20, height - 50), + cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2, cv::LINE_AA); + cv::imshow("Keypoints and Matches Output.", img_display_test); + std::cout << "Hold spacebar to pause." << std::endl; + while (cv::waitKey(1000) == 32) { + } + printf("completed frontend combination: (%d, %d)\n", static_cast(extractor_type), + static_cast(matcher_type)); if (extractor_type != Frontend::ExtractorType::ORB) { // don't check ORB for now for (const cv::DMatch match : matches) { EXPECT_NEAR(keypoints_descriptors_pair_1.first[match.queryIdx].pt.x - diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc index 021770c62..649d30479 100644 --- a/experimental/learn_descriptors/incremental_sfm_mvp.cc +++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc @@ -1,7 +1,11 @@ +#include +#include +#include #include #include "Eigen/Core" #include "Eigen/Geometry" +#include "cxxopts.hpp" #include "experimental/learn_descriptors/four_seasons_parser.hh" #include "experimental/learn_descriptors/frame.hh" #include "experimental/learn_descriptors/frontend.hh" @@ -22,14 +26,16 @@ #include "gtsam/slam/BetweenFactor.h" #include "gtsam/slam/PriorFactor.h" #include "gtsam/slam/ProjectionFactor.h" -#include "opencv2/opencv.hpp" #include "visualization/opencv/opencv_viz.hh" -namespace detail { -// struct SymbolHasher { -// size_t operator()(const gtsam::Symbol &s) const { return std::hash()(s.key()); -// } -// }; +namespace std { +template <> +struct hash { + size_t operator()(const gtsam::Symbol &s) const { return hash()(s.key()); } +}; +} // namespace std + +namespace detail_sfm { bool attempt_triangulate(const std::vector &cam_poses, const std::vector &cam_obs, gtsam::Cal3_S2::shared_ptr K, @@ -56,7 +62,7 @@ bool attempt_triangulate(const std::vector &cam_poses, } catch (const gtsam::TriangulationCheiralityException &e) { // Handle the exception gracefully by logging and retaining the previous // estimate. - std::cerr << "Triangulation Cheirality Exception: " << e.what() + std::cerr << "attamp_triangulation failed. Likely cheirality exception: " << e.what() << ". Keeping initial landmark estimate." << std::endl; } } else { @@ -102,7 +108,7 @@ static gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const std::vector &symbols_landmarks) { std::cout << "iteration " << iter << " complete!"; std::string window_name = "Iteration_" + std::to_string(iter); - detail::graph_values(vals, window_name, symbols_pose, symbols_landmarks); + graph_values(vals, window_name, symbols_pose, symbols_landmarks); }; for (int i = 0; i < num_steps; i++) { @@ -148,14 +154,14 @@ static gtsam::Point3 averagePoints(const std::vector &points) { } static gtsam::Point3 get_variance(const std::vector &points) { - const gtsam::Point3 mean = detail::averagePoints(points); + const gtsam::Point3 mean = averagePoints(points); gtsam::Point3 var(0, 0, 0); for (const gtsam::Point3 &pt : points) { var += (mean - pt).array().square().matrix(); } return var / points.size(); } -}; // namespace detail +}; // namespace detail_sfm int main(int argc, const char **argv) { using namespace robot::experimental::learn_descriptors; @@ -190,14 +196,14 @@ int main(int argc, const char **argv) { FourSeasonsParser parser(path_data, path_calibration); std::cout << "incremental_sfm_mvp!" << std::endl; - const std::vector indices{100, 120, 140, 160, 180}; - // const std::vector indices = []() { - // std::vector tmp; - // for (int i = 0; i < 200; i += 10) { - // tmp.push_back(i); - // } - // return tmp; - // }(); + // const std::vector indices{650, 675, 140, 160, 180}; + const std::vector indices = []() { + std::vector tmp; + for (int i = 660; i < 700; i += 3) { + tmp.push_back(i); + } + return tmp; + }(); // FourSeasonsParser parser() // const size_t img_width = img_pt_first.width, img_height = // img_pt_first.height; @@ -231,117 +237,171 @@ int main(int argc, const char **argv) { // populate frames and cam_poses std::vector frames; FrameId id = 0; - std::unordered_map poses_cam_init; - std::vector cam_references; + std::unordered_map + id_to_initial_world_from_cam; // these are for initial guesses + std::vector references_world_from_cam; Eigen::Matrix4d scale_mat_reference = Eigen::Matrix4d::Identity(); scale_mat_reference(0, 0) = scale_mat_reference(1, 1) = scale_mat_reference(2, 2) = parser.gnss_scale(); for (const int &idx : indices) { - const cv::Mat img = survey.loadImageByImageIndex(idx); - cv::Mat img_undistorted; - cv::fisheye::undistort(img, img_undistorted, K_mat, D_mat); - Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix())); - - std::pair, cv::Mat> kpts_descs = - frontend.get_keypoints_and_descriptors(img_undistorted); - KeypointsCV kpts; - for (const cv::KeyPoint &kpt : kpts_descs.first) { - kpts.push_back(kpt.pt); - } - frame.add_keypoints(kpts); - frame.assign_descriptors(kpts_descs.second); - - frames.push_back(frame); - const ImagePoint img_pt = parser.get_image_point(idx); - + // Eigen::Isometry3d world_from_cam; if (id == 0) { // for now we will use an informative prior based on reference - world_from_cam = Eigen::Isometry3d((parser.S_from_AS()->matrix() * scale_mat_reference * - img_pt.AS_w_from_gnss_cam->matrix())); + world_from_cam = Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference * + img_pt.AS_w_from_gnss_cam->matrix()) + .matrix()); } else if (img_pt.gps_gcs) { const Eigen::Vector3d gps_gcs( img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude, img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0); const Eigen::Vector3d p_gps_in_ECEF = parser.ECEF_from_gcs(gps_gcs); + const Eigen::Vector4d p_gps_in_ECEF_hom(p_gps_in_ECEF.x(), p_gps_in_ECEF.y(), + p_gps_in_ECEF.z(), 1.0); world_from_cam.translation() = - (parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix() * p_gps_in_ECEF; + ((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix() * + p_gps_in_ECEF_hom) + .head<3>(); - Eigen:: + // Eigen:: } - poses_cam_init.emplace(id, world_from_cam.matrix()); + id_to_initial_world_from_cam.emplace(id, world_from_cam.matrix()); + + // populate frame + cv::Mat img_undistorted = parser.load_image(idx); + // cv::Mat img = parser.load_image(idx); + // cv::Mat img_undistorted; + // cv::fisheye::undistortImage(img, img_undistorted, K_mat, D_mat); + + // std::stringstream ss; + // ss << "image " << idx; + // cv::imshow(ss.str(), img); + // cv::waitKey(0); + // cv::destroyWindow(ss.str()); + std::optional frame; if (img_pt.AS_w_from_gnss_cam) { - cam_references.push_back( - Eigen::Isometry3d((parser.S_from_AS()->matrix() * scale_mat_reference * - img_pt.AS_w_from_gnss_cam->matrix()))); + const Eigen::Isometry3d world_from_cam_reference = + Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference * + img_pt.AS_w_from_gnss_cam->matrix())); + references_world_from_cam.push_back(world_from_cam_reference); + frame.emplace(id, img_undistorted, K, gtsam::Pose3(world_from_cam_reference.matrix())); } else { + frame.emplace(id, img_undistorted, K); std::clog << "Warning: no reference data at img_pt " << idx << std::endl; } + std::pair, cv::Mat> kpts_descs = + frontend.get_keypoints_and_descriptors(img_undistorted); + KeypointsCV kpts; + for (const cv::KeyPoint &kpt : kpts_descs.first) { + kpts.push_back(kpt.pt); + } + std::cout << "idx " << idx << " has " << kpts.size() << " kpts" << std::endl; + frame->add_keypoints(kpts); + frame->assign_descriptors(kpts_descs.second); + + frames.push_back(*frame); + id++; } - geometry::viz_scene(cam_references, std::vector(), true, true, "References"); + robot::geometry::viz_scene(references_world_from_cam, std::vector(), true, + true, "References"); // populate feature_tracks and lmk_id_map - // TODO: exhaustively get matches + // TODO: "smart" matching, using initial poses to filter which pairs make sense to compute + // matches FeatureTracks feature_tracks; FrameLandmarkIdMap lmk_id_map; LandmarkId lmk_id = 0; - for (size_t i = 0; i < indices.size() - 1; i++) { - std::vector matches = - frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); - frontend.enforce_bijective_buffer_matches(matches); - for (const cv::DMatch match : matches) { - const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.`queryIdx]; - const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; - - auto key = std::make_pair(frames[i].id_, kpt_cam0); - if (lmk_id_map.find(key) != lmk_id_map.end()) { - auto id = lmk_id_map.at(key); - feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0); - feature_tracks.at(id).obs_.emplace_back(frames[i + 1].id_, kpt_cam1); - } else { - FeatureTrack feature_track(i, kpt_cam0); - feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); - feature_tracks.emplace(lmk_id, feature_track); - lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id); - lmk_id++; + bool exhaustive = false; + if (exhaustive) { + for (size_t i = 0; i < indices.size() - 1; i++) { + for (size_t j = i + 1; i < indices.size(); j++) { + std::vector matches = + frontend.get_matches(frames[i].get_descriptors(), frames[j].get_descriptors()); + // DIAL TO MESS WITH + frontend.enforce_bijective_buffer_matches(matches); + for (const cv::DMatch match : matches) { + const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames[j].get_keypoints()[match.trainIdx]; + + auto key = std::make_pair(frames[i].id_, kpt_cam0); + if (lmk_id_map.find(key) != lmk_id_map.end()) { + auto id = lmk_id_map.at(key); + feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0); + feature_tracks.at(id).obs_.emplace_back(frames[j].id_, kpt_cam1); + } else { + FeatureTrack feature_track(i, kpt_cam0); + feature_track.obs_.emplace_back(frames[j].id_, kpt_cam1); + feature_tracks.emplace(lmk_id, feature_track); + lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id); + lmk_id++; + } + } + } + } + } else { // successive only + for (size_t i = 0; i < indices.size() - 1; i++) { + std::vector matches = + frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + frontend.enforce_bijective_buffer_matches(matches); + for (const cv::DMatch match : matches) { + const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; + + auto key = std::make_pair(frames[i].id_, kpt_cam0); + if (lmk_id_map.find(key) != lmk_id_map.end()) { + auto id = lmk_id_map.at(key); + feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0); + feature_tracks.at(id).obs_.emplace_back(frames[i + 1].id_, kpt_cam1); + } else { + FeatureTrack feature_track(i, kpt_cam0); + feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); + feature_tracks.emplace(lmk_id, feature_track); + lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id); + lmk_id++; + } } } } // triangulate all of the points - - std::unordered_map lmk_triangulated_map; + std::unordered_map + lmk_triangulated_map; // points are p_kpt_in_world std::vector triangulated_lmks; for (const std::pair lmk_feat : feature_tracks) { - std::vector feat_cam_poses; + std::vector world_from_cams; std::vector feat_kpts; - LandmarkId lmk_id = lmk_feat.first; - FeatureTrack feature_track = lmk_feat.second; - gtsam::Point3 p_wrld_kpt(0, 0, 0); + const LandmarkId lmk_id = lmk_feat.first; + const FeatureTrack feature_track = lmk_feat.second; + gtsam::Point3 p_kpt_in_world(0, 0, 0); for (const std::pair &feat_track : feature_track.obs_) { - feat_cam_poses.push_back(poses_cam_init[feat_track.first]); + world_from_cams.push_back(id_to_initial_world_from_cam[feat_track.first]); feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y)); } bool triangulate_success = - detail::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt); + detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, p_kpt_in_world); if (triangulate_success) { - lmk_triangulated_map.emplace(lmk_id, p_wrld_kpt); - triangulated_lmks.push_back(p_wrld_kpt); + lmk_triangulated_map.emplace(lmk_id, p_kpt_in_world); + triangulated_lmks.push_back(p_kpt_in_world); } else { continue; } } - // filter points - geometry::viz_scene(std::vector(), triangulated_lmks, true, true, - "Unfiltered points"); - const gtsam::Point3 variance_pts = detail::get_variance(triangulated_lmks); + // robot::geometry::viz_scene(std::vector(), triangulated_lmks, true, true, + // "Unfiltered, triangulated points"); + std::cout << "heart beat 1" << std::endl; + + // filter points via variance + // TODO: filter based on quality and/or quantity of matches + std::cout << "triangulated_lmks.size(): " << triangulated_lmks.size() << std::endl; + const gtsam::Point3 variance_pts = detail_sfm::get_variance(triangulated_lmks); + std::cout << "heart beat 2" << std::endl; const gtsam::Point3 std_dev_pts = variance_pts.array().sqrt().matrix(); - const gtsam::Point3 mean_pts = detail::averagePoints(triangulated_lmks); + const gtsam::Point3 mean_pts = detail_sfm::averagePoints(triangulated_lmks); std::unordered_map lmk_triangulated_map_filtered; std::vector filtered_points; std::cout << "original var " << variance_pts << std::endl; @@ -352,169 +412,191 @@ int main(int argc, const char **argv) { filtered_points.push_back(lmk_id_pt.second); } } - std::cout << "filtered variance " << detail::get_variance(filtered_points) << std::endl; - geometry::viz_scene(std::vector(), filtered_points, true, true, - "Unfiltered points"); + std::cout << "filtered variance " << detail_sfm::get_variance(filtered_points) << std::endl; + // robot::geometry::viz_scene(std::vector(), filtered_points, true, true, + // "Unfiltered points"); + + // ############# BACKEND ############### // add filtered points to graph std::unordered_map> symbols_poses_values_iter; std::unordered_map> symbols_landmarks_values_iter; std::vector symbols_pose; std::vector symbols_landmarks; - for (const std::pair lmk_id_pt : lmk_triangulated_map_filtered) { - LandmarkId lmk_id = lmk_id_pt.first; - const gtsam::Point3 p_wrld_kpt = lmk_id_pt.second; + for (const auto &[lmk_id, p_kpt_in_world] : lmk_triangulated_map_filtered) { + // LandmarkId lmk_id = lmk_id_pt.first; + // const gtsam::Point3 p_kpt_in_world = lmk_id_pt.second; FeatureTrack feature_track = feature_tracks.at(lmk_id); const gtsam::Symbol symbol_lmk('l', lmk_id); - for (const std::pair &feat_track : feature_track.obs_) { - initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt); + for (const auto &[frame_id, obs] : feature_track.obs_) { + initial_estimate_.insert_or_assign(symbol_lmk, p_kpt_in_world); symbols_landmarks_values_iter.emplace(symbol_lmk, - std::vector{p_wrld_kpt}); + std::vector{p_kpt_in_world}); graph_.emplace_shared>( - gtsam::Point2(feat_track.second.x, feat_track.second.y), landmark_noise, - gtsam::Symbol('x', feat_track.first), symbol_lmk, K); + gtsam::Point2(obs.x, obs.y), landmark_noise, gtsam::Symbol('x', frame_id), + symbol_lmk, K); } } - graph_.emplace_shared>(gtsam::Symbol('x', 0), - poses_cam_init.at(0), prior_pose_noise); - initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), poses_cam_init.at(0)); + + // add gps factors + graph_.emplace_shared>( + gtsam::Symbol('x', 0), id_to_initial_world_from_cam.at(0), prior_pose_noise); + initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), id_to_initial_world_from_cam.at(0)); symbols_pose.push_back(gtsam::Symbol('x', 0)); - symbols_poses_values_iter.emplace(gtsam::Symbol('x', 0), - std::vector{poses_cam_init.at(0)}); - for (const std::pair cam_id_pose : poses_cam_init) { - if (cam_id_pose.first == 0) { + symbols_poses_values_iter.emplace( + gtsam::Symbol('x', 0), std::vector{id_to_initial_world_from_cam.at(0)}); + for (const auto &[frame_id, estimate_world_from_cam] : id_to_initial_world_from_cam) { + if (frame_id == 0) { continue; } - const gtsam::Symbol key_cam('x', cam_id_pose.first); + const gtsam::Symbol key_cam('x', frame_id); symbols_pose.push_back(key_cam); - gtsam::Point3 p_wrld_cam = cam_id_pose.second.translation(); - graph_.emplace_shared(key_cam, p_wrld_cam, gps_noise); - initial_estimate_.insert_or_assign(key_cam, cam_id_pose.second); - symbols_poses_values_iter.emplace(key_cam, std::vector{cam_id_pose.second}); - // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_wrld_cam)); + gtsam::Point3 p_cam_in_world = estimate_world_from_cam.translation(); + graph_.emplace_shared(key_cam, p_cam_in_world, gps_noise); + initial_estimate_.insert_or_assign(key_cam, estimate_world_from_cam); + symbols_poses_values_iter.emplace(key_cam, + std::vector{estimate_world_from_cam}); + // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_cam_in_world)); } - // detail::graph_values(initial_estimate_, "Confirmation", symbols_pose, + // detail_sfm::graph_values(initial_estimate_, "Confirmation", symbols_pose, // symbols_landmarks); - - // do local optimizations and add to iter cache - // const int window = 2; - std::cout << "length of cam_poses: " << poses_cam_init.size() << std::endl; - for (const auto &id_pose : poses_cam_init) { - std::cout << "id: " << id_pose.first << "\tpose: " << id_pose.second << std::endl; - } - for (size_t i = 0; i < indices.size() - 1; i++) { - std::cout << "Local optimization " << i << std::endl; - gtsam::Values local_estimate_; - gtsam::NonlinearFactorGraph local_graph_; - - std::vector poses{gtsam::Symbol('x', i), gtsam::Symbol('x', i + 1)}; - std::vector lmks; - - local_graph_.emplace_shared>( - gtsam::PriorFactor(poses[0], poses_cam_init.at(i), prior_pose_noise)); - std::cout << "fuck" << std::endl; - local_graph_.emplace_shared>( - gtsam::PriorFactor(poses[1], poses_cam_init.at(i + 1), prior_pose_noise)); - std::cout << "mega fuck" << std::endl; - local_estimate_.insert_or_assign(poses[0], poses_cam_init.at(i)); - local_estimate_.insert_or_assign(poses[1], poses_cam_init.at(i + 1)); - - std::vector matches = - frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); - frontend.enforce_bijective_matches(matches); - std::vector feat_cam_poses{poses_cam_init.at(i), poses_cam_init.at(i + 1)}; - - std::vector viz_poses{Eigen::Isometry3d(feat_cam_poses[0].matrix()), - Eigen::Isometry3d(feat_cam_poses[1].matrix())}; - std::vector viz_lmks; - for (const cv::DMatch match : matches) { - std::vector feat_kpts; - const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; - feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y); - feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y); - - auto key = std::make_pair(frames[i].id_, kpt_cam0); - if (lmk_id_map.find(key) != lmk_id_map.end()) { - auto id = lmk_id_map.at(key); - if (lmk_triangulated_map_filtered.find(id) == lmk_triangulated_map_filtered.end()) { - continue; + std::cout << "heart beat 3" << std::endl; + + bool local_optimizations = true; + if (local_optimizations) { + // TODO: do local optimizations between groups of any n nubmer of cameras with >= x number + // of matches + // do local optimizations and add to iter cache + // const int window = 2; + std::cout << "length of cam_poses: " << id_to_initial_world_from_cam.size() << std::endl; + for (const auto &[frame_id, world_from_cam] : id_to_initial_world_from_cam) { + std::cout << "id: " << frame_id << "\tpose: " << world_from_cam << std::endl; + } + for (size_t i = 0; i < indices.size() - 1; i++) { + std::cout << "Local optimization " << i << std::endl; + gtsam::Values local_estimate_; + gtsam::NonlinearFactorGraph local_graph_; + + std::vector symbols_poses{gtsam::Symbol('x', i), + gtsam::Symbol('x', i + 1)}; + std::vector symbols_lmks; + + local_graph_.emplace_shared>( + gtsam::PriorFactor( + symbols_poses[0], id_to_initial_world_from_cam.at(i), prior_pose_noise)); + std::cout << "fuck" << std::endl; + local_graph_.emplace_shared>( + gtsam::PriorFactor( + symbols_poses[1], id_to_initial_world_from_cam.at(i + 1), prior_pose_noise)); + std::cout << "mega fuck" << std::endl; + local_estimate_.insert_or_assign(symbols_poses[0], id_to_initial_world_from_cam.at(i)); + local_estimate_.insert_or_assign(symbols_poses[1], + id_to_initial_world_from_cam.at(i + 1)); + + std::vector matches = + frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + // DIAL TO MESS WITH + frontend.enforce_bijective_matches(matches); + std::vector world_from_cams{id_to_initial_world_from_cam.at(i), + id_to_initial_world_from_cam.at(i + 1)}; + + std::vector viz_world_from_cams{ + Eigen::Isometry3d(world_from_cams[0].matrix()), + Eigen::Isometry3d(world_from_cams[1].matrix())}; + std::vector viz_lmks; + for (const cv::DMatch match : matches) { + std::vector feat_kpts; + const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; + feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y); + feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y); + + const std::pair key_lmk_id = + std::make_pair(frames[i].id_, kpt_cam0); + if (lmk_id_map.find(key_lmk_id) != lmk_id_map.end()) { + LandmarkId match_lmk_id = lmk_id_map.at(key_lmk_id); + if (lmk_triangulated_map_filtered.find(match_lmk_id) == + lmk_triangulated_map_filtered.end()) { + continue; + } + std::cout << "good" << std::endl; + // do nothing + // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0); + // feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1); + } else { + std::cerr << "ERROR: this shouldn't happen right?" << std::endl; + FeatureTrack feature_track(frames[i].id_, kpt_cam0); + feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); + feature_tracks.emplace(lmk_id, feature_track); + lmk_id_map.emplace(key_lmk_id, lmk_id); + lmk_id++; + } + std::cout << "oog" << std::endl; + const gtsam::Symbol symbol_lmk = gtsam::Symbol('l', lmk_id_map.at(key_lmk_id)); + // if (gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))) != + // gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam1)))) { + // std::cerr << "UH OH" << std::endl; + // } else { + // std::cout << "cool" << std::endl; + // } + symbols_lmks.push_back(symbol_lmk); + local_graph_ + .emplace_shared>( + feat_kpts[0], landmark_noise, symbols_poses[0], symbol_lmk, K); + local_graph_ + .emplace_shared>( + feat_kpts[1], landmark_noise, symbols_poses[1], symbol_lmk, K); + + gtsam::Point3 p_kpt_in_world; + bool triangulate_success = + detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, p_kpt_in_world); + if (triangulate_success) { + local_estimate_.insert_or_assign(symbol_lmk, p_kpt_in_world); + viz_lmks.push_back(p_kpt_in_world); + } + if (symbols_landmarks_values_iter.find(symbol_lmk) != + symbols_landmarks_values_iter.end()) { + symbols_landmarks_values_iter[symbol_lmk].push_back(p_kpt_in_world); + } else { + symbols_landmarks_values_iter.emplace( + symbol_lmk, std::vector{p_kpt_in_world}); } - std::cout << "good" << std::endl; - // do nothing - // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0); - // feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1); - } else { - std::cerr << "this shouldn't happen right?" << std::endl; - FeatureTrack feature_track(frames[i].id_, kpt_cam0); - feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); - feature_tracks.emplace(lmk_id, feature_track); - lmk_id_map.emplace(key, lmk_id); - lmk_id++; } - std::cout << "oog" << std::endl; - const gtsam::Symbol symbol_lmk = - gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))); - // if (gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))) != - // gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam1)))) { - // std::cerr << "UH OH" << std::endl; - // } else { - // std::cout << "cool" << std::endl; - // } - lmks.push_back(symbol_lmk); - local_graph_ - .emplace_shared>( - feat_kpts[0], landmark_noise, poses[0], symbol_lmk, K); - local_graph_ - .emplace_shared>( - feat_kpts[1], landmark_noise, poses[1], symbol_lmk, K); - - gtsam::Point3 p_wrld_kpt; - bool triangulate_success = - detail::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt); - if (triangulate_success) { - local_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt); - viz_lmks.push_back(p_wrld_kpt); + std::cout << "setup complete!" << std::endl; + // robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, true, true, + // "Local Optimization " + std::to_string(i)); + + const gtsam::Values symbols_result_local = detail_sfm::optimize_graph( + local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false); + + for (const gtsam::Symbol &symbol_pose : symbols_poses) { + const gtsam::Pose3 T_wrld_cam = symbols_result_local.at(symbol_pose); + symbols_poses_values_iter.at(symbol_pose).push_back(T_wrld_cam); } - if (symbols_landmarks_values_iter.find(symbol_lmk) != - symbols_landmarks_values_iter.end()) { - symbols_landmarks_values_iter[symbol_lmk].push_back(p_wrld_kpt); - } else { - symbols_landmarks_values_iter.emplace(symbol_lmk, - std::vector{p_wrld_kpt}); + for (const gtsam::Symbol &symbol_lmk : symbols_lmks) { + const gtsam::Point3 p_wrld_lmk = symbols_result_local.at(symbol_lmk); + symbols_landmarks_values_iter.at(symbol_lmk).push_back(p_wrld_lmk); } } - std::cout << "setup complete!" << std::endl; - // geometry::viz_scene(viz_poses, viz_lmks, true, true, - // "Local Optimization " + std::to_string(i)); - const gtsam::Values symbols_result_local = detail::optimize_graph( - local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false); + std::cout << "\nLocal Optimizations Complete!\n" << std::endl; - for (const gtsam::Symbol &symbol_pose : poses) { - const gtsam::Pose3 T_wrld_cam = symbols_result_local.at(symbol_pose); - symbols_poses_values_iter.at(symbol_pose).push_back(T_wrld_cam); + for (std::pair> sym_pose : + symbols_poses_values_iter) { + initial_estimate_.insert_or_assign(sym_pose.first, + detail_sfm::averagePoses(sym_pose.second)); } - for (const gtsam::Symbol &symbol_lmk : lmks) { - const gtsam::Point3 p_wrld_lmk = symbols_result_local.at(symbol_lmk); - symbols_landmarks_values_iter.at(symbol_lmk).push_back(p_wrld_lmk); + for (std::pair> sym_lmk : + symbols_landmarks_values_iter) { + initial_estimate_.insert_or_assign(sym_lmk.first, + detail_sfm::averagePoints(sym_lmk.second)); } } - std::cout << "\nLocal Optimizations Complete!\n" << std::endl; - - for (std::pair> sym_pose : symbols_poses_values_iter) { - initial_estimate_.insert_or_assign(sym_pose.first, detail::averagePoses(sym_pose.second)); - } - for (std::pair> sym_lmk : - symbols_landmarks_values_iter) { - initial_estimate_.insert_or_assign(sym_lmk.first, detail::averagePoints(sym_lmk.second)); - } - // do global optimization const gtsam::Values result = - detail::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks); + detail_sfm::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks); std::cout << "about to visualize result" << std::endl; - detail::graph_values(result, "Result", symbols_pose, symbols_landmarks); + detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks); } \ No newline at end of file From ecbba4131619e36d1f2f775f6cda3e15b24ff88d Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Fri, 27 Jun 2025 16:21:56 -0400 Subject: [PATCH 24/45] pipeline working. unsure about quality of results. unoptimized point cloud seemed decent --- experimental/learn_descriptors/frontend.hh | 4 +- .../learn_descriptors/incremental_sfm_mvp.cc | 129 +++++++++++------- experimental/learn_descriptors/sfm_mvp.cc | 11 +- .../structure_from_motion.cc | 2 +- .../symphony_lake_parser_test.cc | 8 +- visualization/opencv/opencv_viz.cc | 63 ++++++++- visualization/opencv/opencv_viz.hh | 26 +++- visualization/opencv/opencv_viz_test.cc | 76 ++++++++--- 8 files changed, 226 insertions(+), 93 deletions(-) diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh index 70baf55bd..2b8c7b9f7 100644 --- a/experimental/learn_descriptors/frontend.hh +++ b/experimental/learn_descriptors/frontend.hh @@ -26,13 +26,13 @@ class Frontend { static void enforce_bijective_matches(std::vector &matches); static void enforce_bijective_buffer_matches(std::vector &matches); static void draw_keypoints(const cv::Mat &img, std::vector keypoints, - cv::Mat img_keypoints_out) { + cv::Mat &img_keypoints_out) { cv::drawKeypoints(img, keypoints, img_keypoints_out, cv::Scalar::all(-1), cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS); } static void draw_matches(const cv::Mat &img1, std::vector keypoints1, const cv::Mat &img2, std::vector keypoints2, - std::vector matches, cv::Mat img_matches_out) { + std::vector matches, cv::Mat &img_matches_out) { cv::drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches_out); } diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc index 649d30479..6c5ceb2a3 100644 --- a/experimental/learn_descriptors/incremental_sfm_mvp.cc +++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc @@ -39,7 +39,7 @@ namespace detail_sfm { bool attempt_triangulate(const std::vector &cam_poses, const std::vector &cam_obs, gtsam::Cal3_S2::shared_ptr K, - gtsam::Point3 &out_p_world_landmark) { + gtsam::Point3 &out_p_world_landmark, const bool verbose = true) { bool valid = true; if (cam_poses.size() >= 2) { try { @@ -62,8 +62,9 @@ bool attempt_triangulate(const std::vector &cam_poses, } catch (const gtsam::TriangulationCheiralityException &e) { // Handle the exception gracefully by logging and retaining the previous // estimate. - std::cerr << "attamp_triangulation failed. Likely cheirality exception: " << e.what() - << ". Keeping initial landmark estimate." << std::endl; + if (verbose) + std::cerr << "attempt_triangulation failed. Likely cheirality exception: " + << e.what() << ". Keeping initial landmark estimate." << std::endl; } } else { valid = false; @@ -85,7 +86,8 @@ static void graph_values(const gtsam::Values &values, const std::string &window_ } final_lmks.emplace_back(values.at(symbol_lmk)); } - robot::geometry::viz_scene(final_poses, final_lmks, true, true, window_name); + robot::geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::black(), true, true, + window_name); } static gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, @@ -196,14 +198,14 @@ int main(int argc, const char **argv) { FourSeasonsParser parser(path_data, path_calibration); std::cout << "incremental_sfm_mvp!" << std::endl; - // const std::vector indices{650, 675, 140, 160, 180}; - const std::vector indices = []() { - std::vector tmp; - for (int i = 660; i < 700; i += 3) { - tmp.push_back(i); - } - return tmp; - }(); + const std::vector indices{380, 387, 393}; + // const std::vector indices = []() { + // std::vector tmp; + // for (int i = 660; i < 681; i += 10) { + // tmp.push_back(i); + // } + // return tmp; + // }(); // FourSeasonsParser parser() // const size_t img_width = img_pt_first.width, img_height = // img_pt_first.height; @@ -238,13 +240,17 @@ int main(int argc, const char **argv) { std::vector frames; FrameId id = 0; std::unordered_map - id_to_initial_world_from_cam; // these are for initial guesses + id_to_initial_world_from_cam; // these are for initial guesses + Eigen::Vector3d t_first_cam_in_world; // to create our own world std::vector references_world_from_cam; + std::vector viz_references_world_from_cam; Eigen::Matrix4d scale_mat_reference = Eigen::Matrix4d::Identity(); scale_mat_reference(0, 0) = scale_mat_reference(1, 1) = scale_mat_reference(2, 2) = parser.gnss_scale(); + std::vector> kpt_list; for (const int &idx : indices) { const ImagePoint img_pt = parser.get_image_point(idx); + std::cout << img_pt.to_string() << std::endl; // Eigen::Isometry3d world_from_cam; if (id == 0) { @@ -252,6 +258,7 @@ int main(int argc, const char **argv) { world_from_cam = Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference * img_pt.AS_w_from_gnss_cam->matrix()) .matrix()); + t_first_cam_in_world = world_from_cam.translation(); } else if (img_pt.gps_gcs) { const Eigen::Vector3d gps_gcs( img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude, @@ -266,6 +273,7 @@ int main(int argc, const char **argv) { // Eigen:: } + world_from_cam.translation() = world_from_cam.translation() - t_first_cam_in_world; id_to_initial_world_from_cam.emplace(id, world_from_cam.matrix()); // populate frame @@ -276,15 +284,19 @@ int main(int argc, const char **argv) { // std::stringstream ss; // ss << "image " << idx; - // cv::imshow(ss.str(), img); + // cv::imshow(ss.str(), img_undistorted); // cv::waitKey(0); // cv::destroyWindow(ss.str()); std::optional frame; if (img_pt.AS_w_from_gnss_cam) { - const Eigen::Isometry3d world_from_cam_reference = + Eigen::Isometry3d world_from_cam_reference = Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference * img_pt.AS_w_from_gnss_cam->matrix())); + world_from_cam_reference.translation() = + world_from_cam_reference.translation() - t_first_cam_in_world; + viz_references_world_from_cam.emplace_back(world_from_cam_reference, + "img " + std::to_string(idx)); references_world_from_cam.push_back(world_from_cam_reference); frame.emplace(id, img_undistorted, K, gtsam::Pose3(world_from_cam_reference.matrix())); } else { @@ -294,11 +306,12 @@ int main(int argc, const char **argv) { std::pair, cv::Mat> kpts_descs = frontend.get_keypoints_and_descriptors(img_undistorted); + kpt_list.push_back(kpts_descs.first); KeypointsCV kpts; for (const cv::KeyPoint &kpt : kpts_descs.first) { kpts.push_back(kpt.pt); } - std::cout << "idx " << idx << " has " << kpts.size() << " kpts" << std::endl; + // std::cout << "idx " << idx << " has " << kpts.size() << " kpts" << std::endl; frame->add_keypoints(kpts); frame->assign_descriptors(kpts_descs.second); @@ -307,8 +320,9 @@ int main(int argc, const char **argv) { id++; } - robot::geometry::viz_scene(references_world_from_cam, std::vector(), true, - true, "References"); + // robot::geometry::viz_scene(viz_references_world_from_cam, + // std::vector(), true, true, + // "References"); // populate feature_tracks and lmk_id_map // TODO: "smart" matching, using initial poses to filter which pairs make sense to compute @@ -316,14 +330,26 @@ int main(int argc, const char **argv) { FeatureTracks feature_tracks; FrameLandmarkIdMap lmk_id_map; LandmarkId lmk_id = 0; - bool exhaustive = false; + bool exhaustive = true; if (exhaustive) { for (size_t i = 0; i < indices.size() - 1; i++) { - for (size_t j = i + 1; i < indices.size(); j++) { + std::cout << "i: " << i << std::endl; + for (size_t j = i + 1; j < indices.size(); j++) { + std::cout << "j: " << j << std::endl; std::vector matches = frontend.get_matches(frames[i].get_descriptors(), frames[j].get_descriptors()); // DIAL TO MESS WITH frontend.enforce_bijective_buffer_matches(matches); + // std::cout << "matching heartbeat" << std::endl; + // cv::Mat viz_matches; + // Frontend::draw_matches(parser.load_image(i), kpt_list[i], parser.load_image(j), + // kpt_list[j], matches, viz_matches); + // std::cout << "image height: " << viz_matches.rows << " width: " << + // viz_matches.cols + // << std::endl; + // cv::imshow("viz_matches", viz_matches); + // cv::waitKey(0); + std::cout << "still alive with: " << matches.size() << " matches" << std::endl; for (const cv::DMatch match : matches) { const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; const KeypointCV kpt_cam1 = frames[j].get_keypoints()[match.trainIdx]; @@ -343,6 +369,7 @@ int main(int argc, const char **argv) { } } } + std::cout << "done processing matches" << std::endl; } else { // successive only for (size_t i = 0; i < indices.size() - 1; i++) { std::vector matches = @@ -367,33 +394,36 @@ int main(int argc, const char **argv) { } } } + std::cout << "heartbeat" << std::endl; // triangulate all of the points - std::unordered_map - lmk_triangulated_map; // points are p_kpt_in_world + std::unordered_map lmk_triangulated_map; // points are kpt_in_world + std::vector triangulated_lmks_viz; std::vector triangulated_lmks; for (const std::pair lmk_feat : feature_tracks) { std::vector world_from_cams; std::vector feat_kpts; const LandmarkId lmk_id = lmk_feat.first; const FeatureTrack feature_track = lmk_feat.second; - gtsam::Point3 p_kpt_in_world(0, 0, 0); + gtsam::Point3 kpt_in_world(0, 0, 0); for (const std::pair &feat_track : feature_track.obs_) { world_from_cams.push_back(id_to_initial_world_from_cam[feat_track.first]); feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y)); } bool triangulate_success = - detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, p_kpt_in_world); + detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world, false); if (triangulate_success) { - lmk_triangulated_map.emplace(lmk_id, p_kpt_in_world); - triangulated_lmks.push_back(p_kpt_in_world); + lmk_triangulated_map.emplace(lmk_id, kpt_in_world); + triangulated_lmks.push_back(kpt_in_world); + triangulated_lmks_viz.emplace_back(kpt_in_world, "lmk " + std::to_string(lmk_id)); } else { continue; } } - // robot::geometry::viz_scene(std::vector(), triangulated_lmks, true, true, - // "Unfiltered, triangulated points"); - std::cout << "heart beat 1" << std::endl; + std::cout << "there are " << triangulated_lmks.size() << " triangulated lmks" << std::endl; + robot::geometry::viz_scene(viz_references_world_from_cam, triangulated_lmks_viz, + cv::viz::Color::black(), true, true, + "Unfiltered, triangulated points"); // filter points via variance // TODO: filter based on quality and/or quantity of matches @@ -413,8 +443,9 @@ int main(int argc, const char **argv) { } } std::cout << "filtered variance " << detail_sfm::get_variance(filtered_points) << std::endl; - // robot::geometry::viz_scene(std::vector(), filtered_points, true, true, - // "Unfiltered points"); + robot::geometry::viz_scene(std::vector(), filtered_points, + cv::viz::Color::black(), true, true, + "Filtered, triangulated points"); // ############# BACKEND ############### @@ -423,15 +454,15 @@ int main(int argc, const char **argv) { std::unordered_map> symbols_landmarks_values_iter; std::vector symbols_pose; std::vector symbols_landmarks; - for (const auto &[lmk_id, p_kpt_in_world] : lmk_triangulated_map_filtered) { + for (const auto &[lmk_id, kpt_in_world] : lmk_triangulated_map_filtered) { // LandmarkId lmk_id = lmk_id_pt.first; - // const gtsam::Point3 p_kpt_in_world = lmk_id_pt.second; + // const gtsam::Point3 kpt_in_world = lmk_id_pt.second; FeatureTrack feature_track = feature_tracks.at(lmk_id); const gtsam::Symbol symbol_lmk('l', lmk_id); for (const auto &[frame_id, obs] : feature_track.obs_) { - initial_estimate_.insert_or_assign(symbol_lmk, p_kpt_in_world); + initial_estimate_.insert_or_assign(symbol_lmk, kpt_in_world); symbols_landmarks_values_iter.emplace(symbol_lmk, - std::vector{p_kpt_in_world}); + std::vector{kpt_in_world}); graph_.emplace_shared>( gtsam::Point2(obs.x, obs.y), landmark_noise, gtsam::Symbol('x', frame_id), symbol_lmk, K); @@ -463,7 +494,7 @@ int main(int argc, const char **argv) { // symbols_landmarks); std::cout << "heart beat 3" << std::endl; - bool local_optimizations = true; + bool local_optimizations = false; if (local_optimizations) { // TODO: do local optimizations between groups of any n nubmer of cameras with >= x number // of matches @@ -501,10 +532,10 @@ int main(int argc, const char **argv) { std::vector world_from_cams{id_to_initial_world_from_cam.at(i), id_to_initial_world_from_cam.at(i + 1)}; - std::vector viz_world_from_cams{ - Eigen::Isometry3d(world_from_cams[0].matrix()), - Eigen::Isometry3d(world_from_cams[1].matrix())}; - std::vector viz_lmks; + std::vector viz_world_from_cams{ + {Eigen::Isometry3d(world_from_cams[0].matrix()), "cam 0"}, + {Eigen::Isometry3d(world_from_cams[1].matrix()), "cam 1"}}; + std::vector viz_lmks; for (const cv::DMatch match : matches) { std::vector feat_kpts; const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; @@ -548,24 +579,24 @@ int main(int argc, const char **argv) { .emplace_shared>( feat_kpts[1], landmark_noise, symbols_poses[1], symbol_lmk, K); - gtsam::Point3 p_kpt_in_world; + gtsam::Point3 kpt_in_world; bool triangulate_success = - detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, p_kpt_in_world); + detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world); if (triangulate_success) { - local_estimate_.insert_or_assign(symbol_lmk, p_kpt_in_world); - viz_lmks.push_back(p_kpt_in_world); + local_estimate_.insert_or_assign(symbol_lmk, kpt_in_world); + viz_lmks.emplace_back(kpt_in_world); } if (symbols_landmarks_values_iter.find(symbol_lmk) != symbols_landmarks_values_iter.end()) { - symbols_landmarks_values_iter[symbol_lmk].push_back(p_kpt_in_world); + symbols_landmarks_values_iter[symbol_lmk].push_back(kpt_in_world); } else { - symbols_landmarks_values_iter.emplace( - symbol_lmk, std::vector{p_kpt_in_world}); + symbols_landmarks_values_iter.emplace(symbol_lmk, + std::vector{kpt_in_world}); } } std::cout << "setup complete!" << std::endl; - // robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, true, true, - // "Local Optimization " + std::to_string(i)); + robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, cv::viz::Color::black(), true, + true, "Local Optimization " + std::to_string(i)); const gtsam::Values symbols_result_local = detail_sfm::optimize_graph( local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false); diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc index 597941c7e..21966cf4a 100644 --- a/experimental/learn_descriptors/sfm_mvp.cc +++ b/experimental/learn_descriptors/sfm_mvp.cc @@ -91,7 +91,8 @@ class SFMMvpHelper { } final_lmks.emplace_back(values.at(symbol_lmk)); } - geometry::viz_scene(final_poses, final_lmks, true, true, window_name); + geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::black(), true, true, + window_name); } static gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, @@ -497,8 +498,8 @@ TEST(SFMMvp, sfm_building_manual_incremental) { } } // filter points - geometry::viz_scene(std::vector(), triangulated_lmks, true, true, - "Unfiltered points"); + geometry::viz_scene(std::vector(), triangulated_lmks, + cv::viz::Color::black(), true, true, "Unfiltered points"); const gtsam::Point3 variance_pts = SFMMvpHelper::get_variance(triangulated_lmks); const gtsam::Point3 std_dev_pts = variance_pts.array().sqrt().matrix(); const gtsam::Point3 mean_pts = SFMMvpHelper::averagePoints(triangulated_lmks); @@ -513,8 +514,8 @@ TEST(SFMMvp, sfm_building_manual_incremental) { } } std::cout << "filtered variance " << SFMMvpHelper::get_variance(filtered_points) << std::endl; - geometry::viz_scene(std::vector(), filtered_points, true, true, - "Unfiltered points"); + geometry::viz_scene(std::vector(), filtered_points, cv::viz::Color::black(), + true, true, "Unfiltered points"); // add filtered points to graph std::unordered_map> symbols_poses_values_iter; diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index db5cedd86..02dc28a2d 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -150,7 +150,7 @@ void StructureFromMotion::graph_values(const gtsam::Values &values, // } // final_lmks.emplace_back(values.at(lmk_symbol)); // } - geometry::viz_scene(final_poses, final_lmks, true, true, window_name); + geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::black(), true, true, window_name); } void StructureFromMotion::solve_structure( diff --git a/experimental/learn_descriptors/symphony_lake_parser_test.cc b/experimental/learn_descriptors/symphony_lake_parser_test.cc index 91a275b6f..a8c309c4e 100644 --- a/experimental/learn_descriptors/symphony_lake_parser_test.cc +++ b/experimental/learn_descriptors/symphony_lake_parser_test.cc @@ -89,8 +89,8 @@ TEST(SymphonyLakeParserTest, test_cam_frames) { cam_frames.push_back(T_world_camidx); } - // geometry::viz_scene(cam_frames, std::vector(), true, true, - // "test_cam_frames"); + // geometry::viz_scene(cam_frames, std::vector(), cv::viz::Color::black(), + // true, true, "test_cam_frames"); } TEST(SymphonyLakeParserTest, test_gps_frames) { @@ -121,7 +121,7 @@ TEST(SymphonyLakeParserTest, test_gps_frames) { gps_frames.push_back(T_world_gpsidx); } - // geometry::viz_scene(gps_frames, std::vector(), true, true, - // "test_gps_frames"); + // geometry::viz_scene(gps_frames, std::vector(), cv::viz::Color::black(), + // true, true, "test_gps_frames"); } } // namespace robot::experimental::learn_descriptors diff --git a/visualization/opencv/opencv_viz.cc b/visualization/opencv/opencv_viz.cc index 1e1bc914b..9af1f14b9 100644 --- a/visualization/opencv/opencv_viz.cc +++ b/visualization/opencv/opencv_viz.cc @@ -24,25 +24,68 @@ cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) { return axis * theta; } -void viz_scene(const std::vector &poses_world, - const std::vector &points_world, const bool show_grid, - const bool show_origin, const std::string &window_name) { +void viz_scene(const std::vector &world_from_poses, + const std::vector &points_in_world, + const cv::viz ::Color color_background, const bool show_grid, const bool show_origin, + const std::string &window_name, const double text_scale) { + std::vector viz_poses; + std::vector viz_points; + for (const Eigen::Isometry3d &world_from_pose : world_from_poses) { + viz_poses.emplace_back(world_from_pose); + } + for (const Eigen::Vector3d &t_point_in_world : points_in_world) { + viz_points.emplace_back(t_point_in_world); + } + viz_scene(viz_poses, viz_points, color_background, show_grid, show_origin, window_name, + text_scale); +} + +void viz_scene(const std::vector &world_from_poses, + const std::vector &points_in_world, const cv::viz ::Color color_background, + const bool show_grid, const bool show_origin, const std::string &window_name, + const double text_scale) { cv::viz::Viz3d window(window_name); + window.setBackgroundColor(color_background); + + constexpr bool ALWAYS_FACE_CAMERA = true; constexpr double pose_size = .5; - for (unsigned int i = 0; i < poses_world.size(); i++) { - cv::Affine3d cv_pose(eigen_mat_to_cv(Eigen::Matrix4d(poses_world[i].matrix()))); + for (unsigned int i = 0; i < world_from_poses.size(); i++) { + cv::Affine3d cv_pose( + eigen_mat_to_cv(Eigen::Matrix4d(world_from_poses[i].world_from_pose.matrix()))); window.showWidget("rigid_transform_" + std::to_string(i), cv::viz::WCoordinateSystem(pose_size), cv_pose); + if (world_from_poses[i].label) { + window.showWidget( + "text_pose_" + std::to_string(i), + cv::viz::WText3D( + *(world_from_poses[i].label), + cv::Point3d(eigen_vec_to_cv( + world_from_poses[i].world_from_pose.translation() + + Eigen::Vector3d(0, 0, + 0.001))), // small offset is for occasional rendering bug + text_scale, ALWAYS_FACE_CAMERA)); + } } constexpr double point_radius = 0.08; constexpr int sphere_res = 10; const cv::viz::Color point_color = cv::viz::Color::celestial_blue(); - for (unsigned int i = 0; i < points_world.size(); i++) { - const Eigen::Vector3d &point = points_world[i]; + for (unsigned int i = 0; i < points_in_world.size(); i++) { + const Eigen::Vector3d &point = points_in_world[i].t_point_in_world; window.showWidget("point_" + std::to_string(i), cv::viz::WSphere(cv::Point3d(point[0], point[1], point[2]), point_radius, sphere_res, point_color)); + if (points_in_world[i].label) { + window.showWidget( + "text_point_" + std::to_string(i), + cv::viz::WText3D( + *(points_in_world[i].label), + cv::Point3d(eigen_vec_to_cv( + points_in_world[i].t_point_in_world + + Eigen::Vector3d(0, 0, + 0.001))), // small offset is for occasional rendering bug + text_scale, ALWAYS_FACE_CAMERA)); + } } if (show_origin) { @@ -64,6 +107,12 @@ void viz_scene(const std::vector &poses_world, cv::Point3d(0, 0, 1), cells, cell_sizes, color)); } + std::cout << "viz heartbeat" << std::endl; + // window.setViewerPose(cv::Affine3d( + // eigen_mat_to_cv(Eigen::Matrix4d(world_from_poses[0].world_from_pose.matrix())))); + window.setViewerPose(cv::Affine3d::Identity()); + std::cout << "success" << std::endl; + window.spin(); } } // namespace robot::geometry diff --git a/visualization/opencv/opencv_viz.hh b/visualization/opencv/opencv_viz.hh index d6474f58f..28dbc197b 100644 --- a/visualization/opencv/opencv_viz.hh +++ b/visualization/opencv/opencv_viz.hh @@ -1,12 +1,30 @@ #pragma once +#include #include #include #include "Eigen/Core" #include "Eigen/Geometry" +#include "opencv2/viz.hpp" namespace robot::geometry { -void viz_scene(const std::vector &poses_world, - const std::vector &points_world, const bool show_grid = true, - const bool show_origin = true, const std::string &window_name = "Viz Window"); -} \ No newline at end of file +struct VizPose { + const Eigen::Isometry3d world_from_pose; + const std::optional label; +}; + +struct VizPoint { + const Eigen::Vector3d t_point_in_world; + const std::optional label; +}; +void viz_scene(const std::vector &world_from_poses, + const std::vector &points_in_world, + const cv::viz::Color color_background = cv::viz::Color::black(), + const bool show_grid = true, const bool show_origin = true, + const std::string &window_name = "Viz Window", const double text_scale = 0.1); +void viz_scene(const std::vector &world_from_poses, + const std::vector &points_in_world, + const cv::viz::Color color_background = cv::viz::Color::black(), + const bool show_grid = true, const bool show_origin = true, + const std::string &window_name = "Viz Window", const double text_scale = 0.1); +} // namespace robot::geometry \ No newline at end of file diff --git a/visualization/opencv/opencv_viz_test.cc b/visualization/opencv/opencv_viz_test.cc index 8d10decb3..502f8e4f4 100644 --- a/visualization/opencv/opencv_viz_test.cc +++ b/visualization/opencv/opencv_viz_test.cc @@ -26,23 +26,23 @@ TEST(OpencvVizTest, demo) { TEXT_SCALE, ALWAYS_FACE_CAMERA)); constexpr bool FIXED_TEXT = false; - cv::Affine3d world_to_fixed_text(cv::Affine3d::Vec3{M_PI_2, 0.0, 0.0}, // rotation - {0.2, 0.4, 0.6} // translation + cv::Affine3d world_from_fixed_text(cv::Affine3d::Vec3{M_PI_2, 0.0, 0.0}, // rotation + {0.2, 0.4, 0.6} // translation ); window.showWidget( "text_3d_fixed", cv::viz::WText3D("hello world fixed!", cv::Point3d(0.0, 0.0, 0.0), TEXT_SCALE, FIXED_TEXT), - world_to_fixed_text); + world_from_fixed_text); constexpr double COORD_SCALE = 0.2; window.showWidget("text_3d_fixed_frame", cv::viz::WCoordinateSystem(COORD_SCALE), - world_to_fixed_text); + world_from_fixed_text); constexpr double CIRCLE_RADIUS_M = 0.5; - cv::Affine3d world_to_circle(cv::Affine3d::Vec3{0.0, 0.0, 0.0}, // rotation - {0.0, 0.0, 1.5} // translation + cv::Affine3d world_from_circle(cv::Affine3d::Vec3{0.0, 0.0, 0.0}, // rotation + {0.0, 0.0, 1.5} // translation ); - window.showWidget("circle", cv::viz::WCircle(CIRCLE_RADIUS_M), world_to_circle); + window.showWidget("circle", cv::viz::WCircle(CIRCLE_RADIUS_M), world_from_circle); window.spin(); } @@ -59,22 +59,56 @@ TEST(OpencvVizTest, cube_test) { cube_W.push_back(Eigen::Vector3d(cube_size, cube_size, cube_size)); cube_W.push_back(Eigen::Vector3d(0, cube_size, cube_size)); - std::vector poses; + std::vector world_from_cams; - Eigen::Matrix3d R_W_cam0( + Eigen::Matrix3d R_world_from_cam0( Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * Eigen::AngleAxisd(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); - Eigen::Isometry3d T_W_cam0; - T_W_cam0.translation() = Eigen::Vector3d(4, 0, 0); - T_W_cam0.linear() = R_W_cam0; - poses.push_back(T_W_cam0); - - Eigen::Isometry3d T_W_cam1; - T_W_cam1.linear() = - Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * R_W_cam0; - T_W_cam1.translation() = Eigen::Vector3d(0, 4, 0); - poses.push_back(T_W_cam1); - - viz_scene(poses, cube_W); + Eigen::Isometry3d world_from_cam0; + world_from_cam0.translation() = Eigen::Vector3d(4, 0, 0); + world_from_cam0.linear() = R_world_from_cam0; + world_from_cams.push_back(world_from_cam0); + + Eigen::Isometry3d world_from_cam1; + world_from_cam1.linear() = + Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + R_world_from_cam0; + world_from_cam1.translation() = Eigen::Vector3d(0, 4, 0); + world_from_cams.push_back(world_from_cam1); + + viz_scene(world_from_cams, cube_W); +} + +TEST(OpencvVizTest, cube_test_labeled) { + std::vector cube_points_in_world; + float cube_size = 1.0f; + cube_points_in_world.emplace_back(Eigen::Vector3d(0, 0, 0), "cube_point_1"); + cube_points_in_world.emplace_back(Eigen::Vector3d(cube_size, 0, 0), "cube_point_2"); + cube_points_in_world.emplace_back(Eigen::Vector3d(cube_size, cube_size, 0), "cube_point_3"); + cube_points_in_world.emplace_back(Eigen::Vector3d(0, cube_size, 0), "cube_point_4"); + cube_points_in_world.emplace_back(Eigen::Vector3d(0, 0, cube_size), "cube_point_5"); + cube_points_in_world.emplace_back(Eigen::Vector3d(cube_size, 0, cube_size), "cube_point_6"); + cube_points_in_world.emplace_back(Eigen::Vector3d(cube_size, cube_size, cube_size), + "cube_point_7"); + cube_points_in_world.emplace_back(Eigen::Vector3d(0, cube_size, cube_size), "cube_point_8"); + + std::vector world_from_cams; + + Eigen::Matrix3d R_world_from_cam0( + Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + Eigen::AngleAxisd(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); + Eigen::Isometry3d world_from_cam0; + world_from_cam0.translation() = Eigen::Vector3d(4, 0, 0); + world_from_cam0.linear() = R_world_from_cam0; + world_from_cams.emplace_back(world_from_cam0, "world_from_cam0"); + + Eigen::Isometry3d world_from_cam1; + world_from_cam1.linear() = + Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * + R_world_from_cam0; + world_from_cam1.translation() = Eigen::Vector3d(0, 4, 0); + world_from_cams.emplace_back(world_from_cam1, "world_from_cam1"); + + viz_scene(world_from_cams, cube_points_in_world); } } // namespace robot::geometry \ No newline at end of file From bc8da627ab471871361a96def01da1f4991c547c Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Mon, 30 Jun 2025 17:02:59 -0400 Subject: [PATCH 25/45] WIP, sfm works with a few imgaes --- common/geometry/BUILD | 10 + common/geometry/camera.cc | 7 +- common/geometry/camera.hh | 6 +- common/geometry/camera_test.cc | 2 +- common/geometry/trajectory_evaluation.hh | 17 ++ experimental/learn_descriptors/BUILD | 7 +- .../learn_descriptors/incremental_sfm_mvp.cc | 204 ++++++++++++------ experimental/learn_descriptors/map.hh | 4 + experimental/learn_descriptors/map.proto | 15 ++ ....cc => spatial_test_scene_cube_example.cc} | 2 +- visualization/opencv/opencv_viz.cc | 7 +- 11 files changed, 204 insertions(+), 77 deletions(-) create mode 100644 common/geometry/trajectory_evaluation.hh create mode 100644 experimental/learn_descriptors/map.hh create mode 100644 experimental/learn_descriptors/map.proto rename experimental/learn_descriptors/{spatial_test_scene_cube_test.cc => spatial_test_scene_cube_example.cc} (94%) diff --git a/common/geometry/BUILD b/common/geometry/BUILD index 606304189..2ef17fb83 100644 --- a/common/geometry/BUILD +++ b/common/geometry/BUILD @@ -64,4 +64,14 @@ cc_test( "//visualization/opencv:opencv_viz", ":translate_types" ] +) + +cc_library( + name = "trajectory_evaluation", + hdrs = ["trajectory_evaluation.hh"], + srcs = ["trajectory_evaluation.cc"], + visibility = ["//visibility:public"], + deps = [ + "@eigen", + ] ) \ No newline at end of file diff --git a/common/geometry/camera.cc b/common/geometry/camera.cc index 4994914c9..7e5a5fb3a 100644 --- a/common/geometry/camera.cc +++ b/common/geometry/camera.cc @@ -24,9 +24,10 @@ Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel return depth * K.inverse() * Eigen::Vector3d(pixel_inhomog(0), pixel_inhomog(1), 1.); } -Eigen::Isometry3d estimate_c0_from_cam1(const std::vector &kpts0, - const std::vector &kpts1, - const std::vector &matches, const cv::Mat &K) { +Eigen::Isometry3d estimate_cam0_from_cam1(const std::vector &kpts0, + const std::vector &kpts1, + const std::vector &matches, + const cv::Mat &K) { Eigen::Isometry3d result; std::vector pts1; std::vector pts2; diff --git a/common/geometry/camera.hh b/common/geometry/camera.hh index 2feea1954..9164e4129 100644 --- a/common/geometry/camera.hh +++ b/common/geometry/camera.hh @@ -21,7 +21,7 @@ Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector3d &pixel Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel_inhomog, const double depth); -Eigen::Isometry3d estimate_c0_from_cam1(const std::vector &kpts0, - const std::vector &kpts1, - const std::vector &matches, const cv::Mat &K); +Eigen::Isometry3d estimate_cam0_from_cam1(const std::vector &kpts0, + const std::vector &kpts1, + const std::vector &matches, const cv::Mat &K); } // namespace robot::geometry \ No newline at end of file diff --git a/common/geometry/camera_test.cc b/common/geometry/camera_test.cc index ca2594abf..19f5389cd 100644 --- a/common/geometry/camera_test.cc +++ b/common/geometry/camera_test.cc @@ -117,7 +117,7 @@ TEST(CameraTest, test_estimate_pose) { } } - Eigen::Isometry3d cam0_from_cam1_estimate = estimate_c0_from_cam1(kpts0, kpts1, matches, K); + Eigen::Isometry3d cam0_from_cam1_estimate = estimate_cam0_from_cam1(kpts0, kpts1, matches, K); Eigen::Isometry3d cam0_from_cam1 = world_from_cam0.inverse() * world_from_cam1; cam0_from_cam1.translation() /= cam0_from_cam1.translation().norm(); EXPECT_TRUE( diff --git a/common/geometry/trajectory_evaluation.hh b/common/geometry/trajectory_evaluation.hh new file mode 100644 index 000000000..c3832359a --- /dev/null +++ b/common/geometry/trajectory_evaluation.hh @@ -0,0 +1,17 @@ +#pragma once +#include +#include + +#include "Eigen/Core" + +namespace robot::geometry { +const std::vector get_absolute_trajectory_error(const std::vector &traj1, + const std::vector &traj2) { + assert(traj1.size() == traj2.size()); + std::vector result; + for (size_t i = 0; i < traj2.size(); i++) { + result.push_back((traj2[i] - traj1[i]).norm()); + } + return result; +} +} // namespace robot::geometry \ No newline at end of file diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index 0c22053f8..bc7e84bd5 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -264,9 +264,9 @@ cc_library( ] ) -cc_test( - name = "spatial_test_scene_cube_test", - srcs = ["spatial_test_scene_cube_test.cc"], +cc_binary( + name = "spatial_test_scene_cube_example", + srcs = ["spatial_test_scene_cube_example.cc"], deps = [ "@com_google_googletest//:gtest_main", ":spatial_test_scene_cube", @@ -362,5 +362,6 @@ cc_binary( ":frontend", "@boost//:smart_ptr", "@cxxopts", + "//common/geometry:camera" ] ) diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc index 6c5ceb2a3..0d8da7623 100644 --- a/experimental/learn_descriptors/incremental_sfm_mvp.cc +++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc @@ -5,6 +5,7 @@ #include "Eigen/Core" #include "Eigen/Geometry" +#include "common/geometry/camera.hh" #include "cxxopts.hpp" #include "experimental/learn_descriptors/four_seasons_parser.hh" #include "experimental/learn_descriptors/frame.hh" @@ -37,17 +38,18 @@ struct hash { namespace detail_sfm { -bool attempt_triangulate(const std::vector &cam_poses, - const std::vector &cam_obs, gtsam::Cal3_S2::shared_ptr K, - gtsam::Point3 &out_p_world_landmark, const bool verbose = true) { - bool valid = true; +std::optional attempt_triangulate(const std::vector &cam_poses, + const std::vector &cam_obs, + gtsam::Cal3_S2::shared_ptr K, + const bool verbose = true) { + std::optional result; if (cam_poses.size() >= 2) { try { // Attempt triangulation using DLT (or the GTSAM provided method) gtsam::Point3 p_world_lmk = gtsam::triangulatePoint3( cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end())); - out_p_world_landmark = p_world_lmk; + result = p_world_lmk; // Optional: perform an explicit cheirality check for (const auto &pose : cam_poses) { @@ -55,7 +57,6 @@ bool attempt_triangulate(const std::vector &cam_poses, // transformTo() converts a world point to the camera frame. gtsam::Point3 p_cam_lmk = pose.transformTo(p_world_lmk); if (p_cam_lmk.z() <= 0) { // Check that the depth is positive - valid = false; break; } } @@ -66,35 +67,69 @@ bool attempt_triangulate(const std::vector &cam_poses, std::cerr << "attempt_triangulation failed. Likely cheirality exception: " << e.what() << ". Keeping initial landmark estimate." << std::endl; } - } else { - valid = false; } - return valid; + return result; } -static void graph_values(const gtsam::Values &values, const std::string &window_name, - const std::vector &symbols_pose, - const std::vector &symbols_landmarks) { - std::vector final_poses; - std::vector final_lmks; +// bool attempt_triangulate(const std::vector &cam_poses, +// const std::vector &cam_obs, gtsam::Cal3_S2::shared_ptr K, +// gtsam::Point3 &out_p_world_landmark, const bool verbose = true) { +// bool valid = true; +// if (cam_poses.size() >= 2) { +// try { +// // Attempt triangulation using DLT (or the GTSAM provided method) +// gtsam::Point3 p_world_lmk = gtsam::triangulatePoint3( +// cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end())); + +// out_p_world_landmark = p_world_lmk; + +// // Optional: perform an explicit cheirality check +// for (const auto &pose : cam_poses) { +// // Transform point to the camera coordinate system. +// // transformTo() converts a world point to the camera frame. +// gtsam::Point3 p_cam_lmk = pose.transformTo(p_world_lmk); +// if (p_cam_lmk.z() <= 0) { // Check that the depth is positive +// valid = false; +// break; +// } +// } +// } catch (const gtsam::TriangulationCheiralityException &e) { +// // Handle the exception gracefully by logging and retaining the previous +// // estimate. +// if (verbose) +// std::cerr << "attempt_triangulation failed. Likely cheirality exception: " +// << e.what() << ". Keeping initial landmark estimate." << std::endl; +// } +// } else { +// valid = false; +// } +// return valid; +// } + +void graph_values(const gtsam::Values &values, const std::string &window_name, + const std::vector &symbols_pose, + const std::vector &symbols_landmarks) { + std::vector final_poses; + std::vector final_lmks; for (const gtsam::Symbol &symbol_pose : symbols_pose) { - final_poses.emplace_back(values.at(symbol_pose).matrix()); + final_poses.emplace_back(Eigen::Isometry3d(values.at(symbol_pose).matrix()), + symbol_pose.string()); } for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) { if (!values.exists(symbol_lmk)) { std::cout << "WTF " << symbol_lmk << std::endl; } - final_lmks.emplace_back(values.at(symbol_lmk)); + final_lmks.emplace_back(values.at(symbol_lmk), symbol_lmk.string()); } - robot::geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::black(), true, true, + std::cout << "About to viz gtsam::Values with " << values.size() << " variables." << std::endl; + robot::geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true, window_name); } -static gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, - const gtsam::Values &values, - const std::vector &symbols_pose, - const std::vector &symbols_landmarks, - bool viz_itr = false) { +gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gtsam::Values &values, + const std::vector &symbols_pose, + const std::vector &symbols_landmarks, + bool viz_itr = false) { gtsam::LevenbergMarquardtParams params; params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. params.maxIterations = 1; @@ -128,7 +163,7 @@ static gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, return optimizer.values(); } -static gtsam::Pose3 averagePoses(const std::vector &poses, int maxIter = 10) { +gtsam::Pose3 averagePoses(const std::vector &poses, int maxIter = 10) { if (poses.empty()) throw std::runtime_error("Empty pose vector"); gtsam::Pose3 mean = poses[0]; @@ -148,14 +183,14 @@ static gtsam::Pose3 averagePoses(const std::vector &poses, int max return mean; } -static gtsam::Point3 averagePoints(const std::vector &points) { +gtsam::Point3 averagePoints(const std::vector &points) { if (points.empty()) throw std::runtime_error("Empty point vector"); gtsam::Point3 sum(0, 0, 0); for (const auto &pt : points) sum += pt; return sum / points.size(); } -static gtsam::Point3 get_variance(const std::vector &points) { +gtsam::Point3 get_variance(const std::vector &points) { const gtsam::Point3 mean = averagePoints(points); gtsam::Point3 var(0, 0, 0); for (const gtsam::Point3 &pt : points) { @@ -163,6 +198,8 @@ static gtsam::Point3 get_variance(const std::vector &points) { } return var / points.size(); } + +// const std::vector get_absolute_trajectory_error(const std::vector &) {} }; // namespace detail_sfm int main(int argc, const char **argv) { @@ -198,8 +235,10 @@ int main(int argc, const char **argv) { FourSeasonsParser parser(path_data, path_calibration); std::cout << "incremental_sfm_mvp!" << std::endl; - const std::vector indices{380, 387, 393}; - // const std::vector indices = []() { + const std::vector indices{581, 609, + 633}; // indices with all data fields on neighborhood_5_train + // const std::vector indices{581, 593, 609, 621, 633, 636, 639, 642}; // indices with + // all data fields on neighborhood_5_train const std::vector indices = []() { // std::vector tmp; // for (int i = 660; i < 681; i += 10) { // tmp.push_back(i); @@ -240,7 +279,7 @@ int main(int argc, const char **argv) { std::vector frames; FrameId id = 0; std::unordered_map - id_to_initial_world_from_cam; // these are for initial guesses + id_to_initial_world_from_cam; // these are for initial guesses!! Eigen::Vector3d t_first_cam_in_world; // to create our own world std::vector references_world_from_cam; std::vector viz_references_world_from_cam; @@ -266,10 +305,14 @@ int main(int argc, const char **argv) { const Eigen::Vector3d p_gps_in_ECEF = parser.ECEF_from_gcs(gps_gcs); const Eigen::Vector4d p_gps_in_ECEF_hom(p_gps_in_ECEF.x(), p_gps_in_ECEF.y(), p_gps_in_ECEF.z(), 1.0); - world_from_cam.translation() = - ((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix() * - p_gps_in_ECEF_hom) - .head<3>(); + world_from_cam = Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference * + img_pt.AS_w_from_gnss_cam->matrix()) + .matrix()); + std::cout << "ok" << std::endl; + // world_from_cam.translation() = + // ((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix() * + // p_gps_in_ECEF_hom) + // .head<3>(); // Eigen:: } @@ -320,9 +363,9 @@ int main(int argc, const char **argv) { id++; } - // robot::geometry::viz_scene(viz_references_world_from_cam, - // std::vector(), true, true, - // "References"); + robot::geometry::viz_scene(viz_references_world_from_cam, + std::vector(), cv::viz::Color::brown(), + true, true, "References"); // populate feature_tracks and lmk_id_map // TODO: "smart" matching, using initial poses to filter which pairs make sense to compute @@ -330,7 +373,7 @@ int main(int argc, const char **argv) { FeatureTracks feature_tracks; FrameLandmarkIdMap lmk_id_map; LandmarkId lmk_id = 0; - bool exhaustive = true; + constexpr bool exhaustive = true; if (exhaustive) { for (size_t i = 0; i < indices.size() - 1; i++) { std::cout << "i: " << i << std::endl; @@ -340,6 +383,7 @@ int main(int argc, const char **argv) { frontend.get_matches(frames[i].get_descriptors(), frames[j].get_descriptors()); // DIAL TO MESS WITH frontend.enforce_bijective_buffer_matches(matches); + // std::cout << "matching heartbeat" << std::endl; // cv::Mat viz_matches; // Frontend::draw_matches(parser.load_image(i), kpt_list[i], parser.load_image(j), @@ -349,6 +393,24 @@ int main(int argc, const char **argv) { // << std::endl; // cv::imshow("viz_matches", viz_matches); // cv::waitKey(0); + std::vector cv_kpts_1; + std::vector cv_kpts_2; + for (const KeypointCV &kpt : frames[i].get_keypoints()) { + cv::KeyPoint cv_kpt; + cv_kpt.pt = kpt; + cv_kpts_1.push_back(cv_kpt); + } + for (const KeypointCV &kpt : frames[j].get_keypoints()) { + cv::KeyPoint cv_kpt; + cv_kpt.pt = kpt; + cv_kpts_2.push_back(cv_kpt); + } + Eigen::Isometry3d world_from_camj(id_to_initial_world_from_cam.at(j).matrix()); + world_from_camj.linear() = + (Eigen::Isometry3d(id_to_initial_world_from_cam.at(i).matrix()) * + robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat)) + .linear(); + id_to_initial_world_from_cam.at(j) = gtsam::Pose3(world_from_camj.matrix()); std::cout << "still alive with: " << matches.size() << " matches" << std::endl; for (const cv::DMatch match : matches) { const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; @@ -394,9 +456,17 @@ int main(int argc, const char **argv) { } } } + std::vector viz_world_from_cam_init; + for (const auto &[id, world_from_cam] : id_to_initial_world_from_cam) { + viz_world_from_cam_init.emplace_back(Eigen::Isometry3d(world_from_cam.matrix()), + std::to_string(id)); + } + + robot::geometry::viz_scene(viz_world_from_cam_init, std::vector(), + cv::viz::Color::brown(), true, true, "world_from_cam_estimates"); std::cout << "heartbeat" << std::endl; - // triangulate all of the points + // TRIANGULATE all of the points (for initial guess) std::unordered_map lmk_triangulated_map; // points are kpt_in_world std::vector triangulated_lmks_viz; std::vector triangulated_lmks; @@ -405,24 +475,28 @@ int main(int argc, const char **argv) { std::vector feat_kpts; const LandmarkId lmk_id = lmk_feat.first; const FeatureTrack feature_track = lmk_feat.second; - gtsam::Point3 kpt_in_world(0, 0, 0); + // gtsam::Point3 kpt_in_world(0, 0, 0); for (const std::pair &feat_track : feature_track.obs_) { world_from_cams.push_back(id_to_initial_world_from_cam[feat_track.first]); feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y)); } - bool triangulate_success = - detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world, false); - if (triangulate_success) { - lmk_triangulated_map.emplace(lmk_id, kpt_in_world); - triangulated_lmks.push_back(kpt_in_world); - triangulated_lmks_viz.emplace_back(kpt_in_world, "lmk " + std::to_string(lmk_id)); + std::optional kpt_in_world = + detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, false); + // bool triangulate_success = + // detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world, false); + if (kpt_in_world) { + std::cout << "triangulation success: " << *kpt_in_world << std::endl; + lmk_triangulated_map.emplace(lmk_id, *kpt_in_world); + triangulated_lmks.push_back(*kpt_in_world); + triangulated_lmks_viz.emplace_back(*kpt_in_world, "lmk " + std::to_string(lmk_id)); } else { + std::cout << "triangulation not success, leaving it out." << std::endl; continue; } } std::cout << "there are " << triangulated_lmks.size() << " triangulated lmks" << std::endl; robot::geometry::viz_scene(viz_references_world_from_cam, triangulated_lmks_viz, - cv::viz::Color::black(), true, true, + cv::viz::Color::brown(), true, true, "Unfiltered, triangulated points"); // filter points via variance @@ -443,8 +517,9 @@ int main(int argc, const char **argv) { } } std::cout << "filtered variance " << detail_sfm::get_variance(filtered_points) << std::endl; + std::cout << "number of filtered points: " << lmk_triangulated_map_filtered.size() << std::endl; robot::geometry::viz_scene(std::vector(), filtered_points, - cv::viz::Color::black(), true, true, + cv::viz::Color::brown(), true, true, "Filtered, triangulated points"); // ############# BACKEND ############### @@ -461,6 +536,7 @@ int main(int argc, const char **argv) { const gtsam::Symbol symbol_lmk('l', lmk_id); for (const auto &[frame_id, obs] : feature_track.obs_) { initial_estimate_.insert_or_assign(symbol_lmk, kpt_in_world); + symbols_landmarks.push_back(symbol_lmk); symbols_landmarks_values_iter.emplace(symbol_lmk, std::vector{kpt_in_world}); graph_.emplace_shared>( @@ -490,11 +566,10 @@ int main(int argc, const char **argv) { // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_cam_in_world)); } - // detail_sfm::graph_values(initial_estimate_, "Confirmation", symbols_pose, - // symbols_landmarks); + detail_sfm::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks); std::cout << "heart beat 3" << std::endl; - bool local_optimizations = false; + constexpr bool local_optimizations = false; if (local_optimizations) { // TODO: do local optimizations between groups of any n nubmer of cameras with >= x number // of matches @@ -579,23 +654,25 @@ int main(int argc, const char **argv) { .emplace_shared>( feat_kpts[1], landmark_noise, symbols_poses[1], symbol_lmk, K); - gtsam::Point3 kpt_in_world; - bool triangulate_success = - detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world); - if (triangulate_success) { - local_estimate_.insert_or_assign(symbol_lmk, kpt_in_world); - viz_lmks.emplace_back(kpt_in_world); - } - if (symbols_landmarks_values_iter.find(symbol_lmk) != - symbols_landmarks_values_iter.end()) { - symbols_landmarks_values_iter[symbol_lmk].push_back(kpt_in_world); - } else { - symbols_landmarks_values_iter.emplace(symbol_lmk, - std::vector{kpt_in_world}); + std::optional kpt_in_world = + detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K); + // gtsam::Point3 kpt_in_world; + // bool triangulate_success = + // detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world); + if (kpt_in_world) { + local_estimate_.insert_or_assign(symbol_lmk, *kpt_in_world); + viz_lmks.emplace_back(*kpt_in_world); + if (symbols_landmarks_values_iter.find(symbol_lmk) != + symbols_landmarks_values_iter.end()) { + symbols_landmarks_values_iter[symbol_lmk].push_back(*kpt_in_world); + } else { + symbols_landmarks_values_iter.emplace( + symbol_lmk, std::vector{*kpt_in_world}); + } } } std::cout << "setup complete!" << std::endl; - robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, cv::viz::Color::black(), true, + robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, cv::viz::Color::brown(), true, true, "Local Optimization " + std::to_string(i)); const gtsam::Values symbols_result_local = detail_sfm::optimize_graph( @@ -629,5 +706,6 @@ int main(int argc, const char **argv) { const gtsam::Values result = detail_sfm::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks); std::cout << "about to visualize result" << std::endl; + result.print(); detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks); } \ No newline at end of file diff --git a/experimental/learn_descriptors/map.hh b/experimental/learn_descriptors/map.hh new file mode 100644 index 000000000..a22c1e5c7 --- /dev/null +++ b/experimental/learn_descriptors/map.hh @@ -0,0 +1,4 @@ +#pragma once +// #include "Eigen/" + +namespace robot::experimental::learn_descriptors {} \ No newline at end of file diff --git a/experimental/learn_descriptors/map.proto b/experimental/learn_descriptors/map.proto new file mode 100644 index 000000000..67d0c5db0 --- /dev/null +++ b/experimental/learn_descriptors/map.proto @@ -0,0 +1,15 @@ + +syntax = "proto3"; + +package robot.experimental.learn_descriptors; + +message Map { + message Quaternion { + // Represents a quaternion as a + bi + cj + dk + double a = 1; + double b = 2; + double c = 3; + double d = 4; + } + Quaternion quaternion = 1; +} diff --git a/experimental/learn_descriptors/spatial_test_scene_cube_test.cc b/experimental/learn_descriptors/spatial_test_scene_cube_example.cc similarity index 94% rename from experimental/learn_descriptors/spatial_test_scene_cube_test.cc rename to experimental/learn_descriptors/spatial_test_scene_cube_example.cc index ab7b06208..ad1130338 100644 --- a/experimental/learn_descriptors/spatial_test_scene_cube_test.cc +++ b/experimental/learn_descriptors/spatial_test_scene_cube_example.cc @@ -32,6 +32,6 @@ TEST(SpatialTestSceneCubeTest, viz_cube_with_cameras) { camera_poses.emplace_back(T_world_cam.pose().matrix()); } - // geometry::viz_scene(camera_poses, test_scene.get_points()); + geometry::viz_scene(camera_poses, test_scene.get_points()); } } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/visualization/opencv/opencv_viz.cc b/visualization/opencv/opencv_viz.cc index 9af1f14b9..d8f693ea6 100644 --- a/visualization/opencv/opencv_viz.cc +++ b/visualization/opencv/opencv_viz.cc @@ -72,6 +72,7 @@ void viz_scene(const std::vector &world_from_poses, const cv::viz::Color point_color = cv::viz::Color::celestial_blue(); for (unsigned int i = 0; i < points_in_world.size(); i++) { const Eigen::Vector3d &point = points_in_world[i].t_point_in_world; + std::cout << "pt to viz: " << point << std::endl; window.showWidget("point_" + std::to_string(i), cv::viz::WSphere(cv::Point3d(point[0], point[1], point[2]), point_radius, sphere_res, point_color)); @@ -107,11 +108,11 @@ void viz_scene(const std::vector &world_from_poses, cv::Point3d(0, 0, 1), cells, cell_sizes, color)); } - std::cout << "viz heartbeat" << std::endl; + // std::cout << "viz heartbeat" << std::endl; // window.setViewerPose(cv::Affine3d( // eigen_mat_to_cv(Eigen::Matrix4d(world_from_poses[0].world_from_pose.matrix())))); - window.setViewerPose(cv::Affine3d::Identity()); - std::cout << "success" << std::endl; + // window.setViewerPose(cv::Affine3d::Identity()); + // std::cout << "success" << std::endl; window.spin(); } From 28bd689d38316985253836307763ec4198d345cb Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Tue, 1 Jul 2025 15:48:34 -0400 Subject: [PATCH 26/45] WIP --- common/geometry/camera.cc | 1 + .../four_seasons_parser_example_viz.cc | 48 ++++- experimental/learn_descriptors/frontend.cc | 3 +- experimental/learn_descriptors/frontend.hh | 3 +- .../learn_descriptors/frontend_test.cc | 4 +- .../learn_descriptors/incremental_sfm_mvp.cc | 181 ++++++++++++------ experimental/learn_descriptors/sfm_mvp.cc | 4 +- .../structure_from_motion.cc | 2 +- visualization/opencv/opencv_viz.cc | 1 - 9 files changed, 165 insertions(+), 82 deletions(-) diff --git a/common/geometry/camera.cc b/common/geometry/camera.cc index 7e5a5fb3a..c7908c4f9 100644 --- a/common/geometry/camera.cc +++ b/common/geometry/camera.cc @@ -28,6 +28,7 @@ Eigen::Isometry3d estimate_cam0_from_cam1(const std::vector &kpts0 const std::vector &kpts1, const std::vector &matches, const cv::Mat &K) { + assert(kpts1.size() != 0 && kpts0.size() != 0); Eigen::Isometry3d result; std::vector pts1; std::vector pts2; diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc index f0e5c5e5c..639192cd9 100644 --- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc +++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc @@ -48,23 +48,53 @@ int main(int argc, const char** argv) { ROBOT_CHECK(parser.num_images() != 0); - std::vector w_from_gnss_cams; // camera frames from visual world frame + // std::vector + // w_from_gnss_cams; // camera frames from visual world + // frame + // std::vector w_from_gcs_cams; // camera frames from visual world + // frame + std::vector viz_frames; // camera frames from visual world frame + Eigen::Isometry3d scale_mat = Eigen::Isometry3d::Identity(); std::cout << "gnss scale: " << parser.gnss_scale() << std::endl; scale_mat.linear() *= parser.gnss_scale(); std::cout << "scale mat: " << scale_mat.matrix() << std::endl; - for (size_t i = 100; i < std::min(static_cast(800), parser.num_images()); i += 49) { + constexpr size_t START = 100; + const size_t END = std::min(static_cast(800), parser.num_images()); + for (size_t i = START; i < END; i += 49) { const lrn_desc::ImagePoint img_pt = parser.get_image_point(i); Eigen::Isometry3d AS_w_from_gnss_cam = scale_mat * Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix()); Eigen::Isometry3d w_from_gnss_cam = Eigen::Isometry3d(parser.S_from_AS().matrix()) * AS_w_from_gnss_cam; - Eigen::Isometry3d w_from_vio_cam = Eigen::Isometry3d(parser.S_from_AS().matrix()) * - Eigen::Isometry3d(img_pt.AS_w_from_vio_cam->matrix()); - w_from_gnss_cams.push_back(w_from_gnss_cam); - std::cout << "\npose gnss metric " << i << w_from_gnss_cam.matrix() << std::endl; - std::cout << "pose vio" << i << w_from_vio_cam.matrix() << std::endl; + Eigen::Vector3d + // Eigen::Isometry3d w_from_vio_cam = Eigen::Isometry3d(parser.S_from_AS().matrix()) * + // Eigen::Isometry3d(img_pt.AS_w_from_vio_cam->matrix()); + viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i)); + if (i == START) { + viz_frames.emplace_back(w_from_gnss_cam, "x_gps_" + std::to_string(i)); + } else if (img_pt.gps_gcs) { + const Eigen::Vector3d gcs_coordinate( + img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude, + img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0); + const Eigen::Vector3d ECEF_from_gps = parser.ECEF_from_gcs(gcs_coordinate); + const Eigen::Vector4d ECEF_from_gps_hom(ECEF_from_gps.x(), ECEF_from_gps.y(), + ECEF_from_gps.z(), 1); + // Eigen::Isometry3d w_from_gcs_gps = + Eigen::Vector4d gps_in_w = + Eigen::Matrix4d((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix()) * + ECEF_from_gps_hom; + Eigen::Isometry3d w_from_gcs_gps; + w_from_gcs_gps.translation() = gps_in_w.head<3>(); + viz_frames.emplace_back(w_from_gcs_gps, "x_gps_" + std::to_string(i)); + const Eigen::Vector3d ref_to_gps_in_world = + w_from_gcs_gps.translation() - w_from_gnss_cam.translation(); + std::cout << "ref_" << i << "_to_gps_in_world: " << ref_to_gps_in_world << std::endl; + } + // std::cout << "\npose gnss metric " << i << w_from_gnss_cam.matrix() << std::endl; + // std::cout << "pose vio" << i << w_from_vio_cam.matrix() << std::endl; } - std::cout << "got " << w_from_gnss_cams.size() << " poses" << std::endl; - robot::geometry::viz_scene(w_from_gnss_cams, std::vector()); + std::cout << "got " << viz_frames.size() << " poses" << std::endl; + robot::geometry::viz_scene(viz_frames, std::vector(), + cv::viz::Color::brown()); } \ No newline at end of file diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc index 3ef2c408e..134c4914a 100644 --- a/experimental/learn_descriptors/frontend.cc +++ b/experimental/learn_descriptors/frontend.cc @@ -38,8 +38,7 @@ Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matche } } -std::pair, cv::Mat> Frontend::get_keypoints_and_descriptors( - const cv::Mat &img) const { +std::pair, cv::Mat> Frontend::extract_features(const cv::Mat &img) const { std::vector keypoints; cv::Mat descriptors; switch (extractor_type_) { diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh index 2b8c7b9f7..12332e4fb 100644 --- a/experimental/learn_descriptors/frontend.hh +++ b/experimental/learn_descriptors/frontend.hh @@ -18,8 +18,7 @@ class Frontend { const ExtractorType &get_extractor_type() const { return extractor_type_; }; const MatcherType &get_matcher_type() const { return matcher_type_; }; - std::pair, cv::Mat> get_keypoints_and_descriptors( - const cv::Mat &img) const; + std::pair, cv::Mat> extract_features(const cv::Mat &img) const; std::vector get_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2) const; static void threshold_matches(std::vector &matches, float dist_threshhold); diff --git a/experimental/learn_descriptors/frontend_test.cc b/experimental/learn_descriptors/frontend_test.cc index d90142e3c..25e60a690 100644 --- a/experimental/learn_descriptors/frontend_test.cc +++ b/experimental/learn_descriptors/frontend_test.cc @@ -75,8 +75,8 @@ TEST(FrontendTest, pipeline_sweep) { assert(std::string(e.what()) == "FLANN can not be used with ORB."); // very jank... continue; } - keypoints_descriptors_pair_1 = frontend.get_keypoints_and_descriptors(image_1); - keypoints_descriptors_pair_2 = frontend.get_keypoints_and_descriptors(image_2); + keypoints_descriptors_pair_1 = frontend.extract_features(image_1); + keypoints_descriptors_pair_2 = frontend.extract_features(image_2); matches = frontend.get_matches(keypoints_descriptors_pair_1.second, keypoints_descriptors_pair_2.second); frontend.draw_keypoints(image_1, keypoints_descriptors_pair_1.first, diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc index 0d8da7623..214cd3105 100644 --- a/experimental/learn_descriptors/incremental_sfm_mvp.cc +++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc @@ -1,3 +1,4 @@ +#include #include #include #include @@ -41,53 +42,81 @@ namespace detail_sfm { std::optional attempt_triangulate(const std::vector &cam_poses, const std::vector &cam_obs, gtsam::Cal3_S2::shared_ptr K, + const double max_reproj_error = 2.0, const bool verbose = true) { - std::optional result; + gtsam::Point3 p_lmk_in_world; if (cam_poses.size() >= 2) { try { // Attempt triangulation using DLT (or the GTSAM provided method) - gtsam::Point3 p_world_lmk = gtsam::triangulatePoint3( + p_lmk_in_world = gtsam::triangulatePoint3( cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end())); - result = p_world_lmk; - - // Optional: perform an explicit cheirality check - for (const auto &pose : cam_poses) { - // Transform point to the camera coordinate system. - // transformTo() converts a world point to the camera frame. - gtsam::Point3 p_cam_lmk = pose.transformTo(p_world_lmk); - if (p_cam_lmk.z() <= 0) { // Check that the depth is positive - break; - } - } } catch (const gtsam::TriangulationCheiralityException &e) { // Handle the exception gracefully by logging and retaining the previous // estimate. if (verbose) std::cerr << "attempt_triangulation failed. Likely cheirality exception: " - << e.what() << ". Keeping initial landmark estimate." << std::endl; + << e.what() << ". Discarding involved keypoints." << std::endl; + } + } + // Optional: perform an explicit cheirality check + for (const auto &pose : cam_poses) { + // Transform point to the camera coordinate system. + // transformTo() converts a world point to the camera frame. + gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world); + if (p_cam_lmk.z() <= 0) { // Check that the depth is positive + return std::nullopt; + } + } + + // Cheirality & reprojection checks + for (size_t i = 0; i < cam_poses.size(); ++i) { + const auto &pose = cam_poses[i]; + // Cheirality + gtsam::Point3 p_cam = pose.transformTo(p_lmk_in_world); + if (p_cam.z() <= 0) { + if (verbose) { + std::cerr << "[triangulate] point behind camera " << i << " (z=" << p_cam.z() + << ")\n"; + } + return std::nullopt; + } + // Reprojection error + if (max_reproj_error > 0) { + gtsam::PinholeCamera cam(pose, *K); + const auto reproj = cam.project(p_lmk_in_world); + const double err = (reproj - cam_obs[i]).norm(); + if (err > max_reproj_error) { + if (verbose) { + std::cerr << "[triangulate] reprojection error too large on view " << i << " (" + << err << " px)\n"; + } + return std::nullopt; + } } } - return result; + + return p_lmk_in_world; } // bool attempt_triangulate(const std::vector &cam_poses, -// const std::vector &cam_obs, gtsam::Cal3_S2::shared_ptr K, -// gtsam::Point3 &out_p_world_landmark, const bool verbose = true) { +// const std::vector &cam_obs, +// gtsam::Cal3_S2::shared_ptr K, gtsam::Point3 &out_p_world_landmark, +// const bool verbose = true) { // bool valid = true; // if (cam_poses.size() >= 2) { // try { // // Attempt triangulation using DLT (or the GTSAM provided method) -// gtsam::Point3 p_world_lmk = gtsam::triangulatePoint3( +// gtsam::Point3 p_lmk_in_world = gtsam::triangulatePoint3( // cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end())); -// out_p_world_landmark = p_world_lmk; +// out_p_world_landmark = p_lmk_in_world; // // Optional: perform an explicit cheirality check // for (const auto &pose : cam_poses) { // // Transform point to the camera coordinate system. // // transformTo() converts a world point to the camera frame. -// gtsam::Point3 p_cam_lmk = pose.transformTo(p_world_lmk); +// gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world); // if (p_cam_lmk.z() <= 0) { // Check that the depth is positive // valid = false; // break; @@ -129,14 +158,13 @@ void graph_values(const gtsam::Values &values, const std::string &window_name, gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gtsam::Values &values, const std::vector &symbols_pose, const std::vector &symbols_landmarks, - bool viz_itr = false) { + const int num_epochs = 5, bool viz_itr = false) { gtsam::LevenbergMarquardtParams params; params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. params.maxIterations = 1; gtsam::LevenbergMarquardtOptimizer optimizer(graph, values, params); double prev_error = optimizer.error(); - const int num_steps = 5; typedef int epoch; std::function &, const std::vector &)> @@ -148,7 +176,7 @@ gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gts graph_values(vals, window_name, symbols_pose, symbols_landmarks); }; - for (int i = 0; i < num_steps; i++) { + for (int i = 0; i < num_epochs; i++) { optimizer.iterate(); double curr_error = optimizer.error(); @@ -199,7 +227,19 @@ gtsam::Point3 get_variance(const std::vector &points) { return var / points.size(); } -// const std::vector get_absolute_trajectory_error(const std::vector &) {} +double rotation_error(const Eigen::Isometry3d &T_est, const Eigen::Isometry3d &T_gt) { + Eigen::Matrix3d R_err = T_gt.rotation().transpose() * T_est.rotation(); + + // 2. Compute trace and clamp to [-1,1] for numerical safety + double tr = R_err.trace(); + double cos_theta = std::min(1.0, std::max(-1.0, (tr - 1.0) / 2.0)); + + // 3. Recover angle (in radians) + return std::acos(cos_theta); +} + +// const std::vector get_absolute_trajectory_error(const std::vector &) +// {} }; // namespace detail_sfm int main(int argc, const char **argv) { @@ -235,9 +275,10 @@ int main(int argc, const char **argv) { FourSeasonsParser parser(path_data, path_calibration); std::cout << "incremental_sfm_mvp!" << std::endl; - const std::vector indices{581, 609, - 633}; // indices with all data fields on neighborhood_5_train - // const std::vector indices{581, 593, 609, 621, 633, 636, 639, 642}; // indices with + // const std::vector indices{581, 609, + // 633}; // indices with all data fields on neighborhood_5_train + const std::vector indices{581, 593, 609, 621, 633, 636, 639, 642}; // indices with + constexpr bool visualize = false; // all data fields on neighborhood_5_train const std::vector indices = []() { // std::vector tmp; // for (int i = 660; i < 681; i += 10) { @@ -320,13 +361,13 @@ int main(int argc, const char **argv) { id_to_initial_world_from_cam.emplace(id, world_from_cam.matrix()); // populate frame - cv::Mat img_undistorted = parser.load_image(idx); - // cv::Mat img = parser.load_image(idx); - // cv::Mat img_undistorted; - // cv::fisheye::undistortImage(img, img_undistorted, K_mat, D_mat); + cv::Mat img = parser.load_image(idx); + cv::Mat img_undistorted; + cv::fisheye::undistortImage(img, img_undistorted, K_mat, D_mat, K_mat, img.size()); // std::stringstream ss; - // ss << "image " << idx; + // ss << "image " << idx << " of size " + // << "(" << img_undistorted.cols << ", " << img_undistorted.rows << ")"; // cv::imshow(ss.str(), img_undistorted); // cv::waitKey(0); // cv::destroyWindow(ss.str()); @@ -348,7 +389,8 @@ int main(int argc, const char **argv) { } std::pair, cv::Mat> kpts_descs = - frontend.get_keypoints_and_descriptors(img_undistorted); + frontend.extract_features(img_undistorted); + std::cout << "kpts_descs.size(): " << kpts_descs.first.size() << std::endl; kpt_list.push_back(kpts_descs.first); KeypointsCV kpts; for (const cv::KeyPoint &kpt : kpts_descs.first) { @@ -362,10 +404,10 @@ int main(int argc, const char **argv) { id++; } - - robot::geometry::viz_scene(viz_references_world_from_cam, - std::vector(), cv::viz::Color::brown(), - true, true, "References"); + if (visualize) + robot::geometry::viz_scene(viz_references_world_from_cam, + std::vector(), + cv::viz::Color::brown(), true, true, "References"); // populate feature_tracks and lmk_id_map // TODO: "smart" matching, using initial poses to filter which pairs make sense to compute @@ -384,15 +426,6 @@ int main(int argc, const char **argv) { // DIAL TO MESS WITH frontend.enforce_bijective_buffer_matches(matches); - // std::cout << "matching heartbeat" << std::endl; - // cv::Mat viz_matches; - // Frontend::draw_matches(parser.load_image(i), kpt_list[i], parser.load_image(j), - // kpt_list[j], matches, viz_matches); - // std::cout << "image height: " << viz_matches.rows << " width: " << - // viz_matches.cols - // << std::endl; - // cv::imshow("viz_matches", viz_matches); - // cv::waitKey(0); std::vector cv_kpts_1; std::vector cv_kpts_2; for (const KeypointCV &kpt : frames[i].get_keypoints()) { @@ -411,7 +444,6 @@ int main(int argc, const char **argv) { robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat)) .linear(); id_to_initial_world_from_cam.at(j) = gtsam::Pose3(world_from_camj.matrix()); - std::cout << "still alive with: " << matches.size() << " matches" << std::endl; for (const cv::DMatch match : matches) { const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; const KeypointCV kpt_cam1 = frames[j].get_keypoints()[match.trainIdx]; @@ -462,8 +494,11 @@ int main(int argc, const char **argv) { std::to_string(id)); } - robot::geometry::viz_scene(viz_world_from_cam_init, std::vector(), - cv::viz::Color::brown(), true, true, "world_from_cam_estimates"); + std::cout << "pre heartbeat" << std::endl; + if (visualize) + robot::geometry::viz_scene(viz_world_from_cam_init, + std::vector(), + cv::viz::Color::brown(), true, true, "world_from_cam_estimates"); std::cout << "heartbeat" << std::endl; // TRIANGULATE all of the points (for initial guess) @@ -485,7 +520,6 @@ int main(int argc, const char **argv) { // bool triangulate_success = // detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world, false); if (kpt_in_world) { - std::cout << "triangulation success: " << *kpt_in_world << std::endl; lmk_triangulated_map.emplace(lmk_id, *kpt_in_world); triangulated_lmks.push_back(*kpt_in_world); triangulated_lmks_viz.emplace_back(*kpt_in_world, "lmk " + std::to_string(lmk_id)); @@ -495,9 +529,10 @@ int main(int argc, const char **argv) { } } std::cout << "there are " << triangulated_lmks.size() << " triangulated lmks" << std::endl; - robot::geometry::viz_scene(viz_references_world_from_cam, triangulated_lmks_viz, - cv::viz::Color::brown(), true, true, - "Unfiltered, triangulated points"); + if (visualize) + robot::geometry::viz_scene(viz_references_world_from_cam, triangulated_lmks_viz, + cv::viz::Color::brown(), true, true, + "Unfiltered, triangulated points"); // filter points via variance // TODO: filter based on quality and/or quantity of matches @@ -518,9 +553,10 @@ int main(int argc, const char **argv) { } std::cout << "filtered variance " << detail_sfm::get_variance(filtered_points) << std::endl; std::cout << "number of filtered points: " << lmk_triangulated_map_filtered.size() << std::endl; - robot::geometry::viz_scene(std::vector(), filtered_points, - cv::viz::Color::brown(), true, true, - "Filtered, triangulated points"); + if (visualize) + robot::geometry::viz_scene(std::vector(), filtered_points, + cv::viz::Color::brown(), true, true, + "Filtered, triangulated points"); // ############# BACKEND ############### @@ -566,7 +602,7 @@ int main(int argc, const char **argv) { // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_cam_in_world)); } - detail_sfm::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks); + // detail_sfm::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks); std::cout << "heart beat 3" << std::endl; constexpr bool local_optimizations = false; @@ -672,8 +708,9 @@ int main(int argc, const char **argv) { } } std::cout << "setup complete!" << std::endl; - robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, cv::viz::Color::brown(), true, - true, "Local Optimization " + std::to_string(i)); + if (visualize) + robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, cv::viz::Color::brown(), + true, true, "Local Optimization " + std::to_string(i)); const gtsam::Values symbols_result_local = detail_sfm::optimize_graph( local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false); @@ -704,8 +741,26 @@ int main(int argc, const char **argv) { // do global optimization const gtsam::Values result = - detail_sfm::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks); - std::cout << "about to visualize result" << std::endl; - result.print(); - detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks); + detail_sfm::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks, 0); + + if (visualize) { + std::cout << "about to visualize result" << std::endl; + result.print(); + detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks); + } + + // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference + double sum_traj_error = 0; + double sum_rot_error = 0; + for (size_t i = 0; i < symbols_pose.size(); i++) { + const gtsam::Pose3 traj_pose = result.at(symbols_pose[i]); + sum_traj_error += std::pow( + (references_world_from_cam[i].translation() - traj_pose.translation()).norm(), 2); + sum_rot_error += detail_sfm::rotation_error(references_world_from_cam[i], + Eigen::Isometry3d(traj_pose.matrix())); + } + std::cout << "sum_rot_error: " << sum_rot_error << std::endl; + double rmse_ate = std::sqrt(sum_traj_error / symbols_pose.size()); + double rmse_rot = std::sqrt(sum_rot_error / symbols_pose.size()); + std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl; } \ No newline at end of file diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc index 21966cf4a..9d41270d7 100644 --- a/experimental/learn_descriptors/sfm_mvp.cc +++ b/experimental/learn_descriptors/sfm_mvp.cc @@ -238,7 +238,7 @@ TEST(SFMMvp, sfm_building_manual_global) { Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix())); std::pair, cv::Mat> kpts_descs = - frontend.get_keypoints_and_descriptors(img_undistorted); + frontend.extract_features(img_undistorted); KeypointsCV kpts; for (const cv::KeyPoint &kpt : kpts_descs.first) { kpts.push_back(kpt.pt); @@ -432,7 +432,7 @@ TEST(SFMMvp, sfm_building_manual_incremental) { Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix())); std::pair, cv::Mat> kpts_descs = - frontend.get_keypoints_and_descriptors(img_undistorted); + frontend.extract_features(img_undistorted); KeypointsCV kpts; for (const cv::KeyPoint &kpt : kpts_descs.first) { kpts.push_back(kpt.pt); diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index 02dc28a2d..df84bb7cf 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -59,7 +59,7 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo cv::Mat img_undistorted; cv::undistort(img, img_undistorted, K_, D_); std::pair, cv::Mat> keypoints_and_descriptors = - frontend_.get_keypoints_and_descriptors(img); + frontend_.extract_features(img); feature_manager_->append_img_data(keypoints_and_descriptors.first, keypoints_and_descriptors.second); keypoint_to_landmarks_.push_back(std::unordered_map()); diff --git a/visualization/opencv/opencv_viz.cc b/visualization/opencv/opencv_viz.cc index d8f693ea6..d750555ed 100644 --- a/visualization/opencv/opencv_viz.cc +++ b/visualization/opencv/opencv_viz.cc @@ -72,7 +72,6 @@ void viz_scene(const std::vector &world_from_poses, const cv::viz::Color point_color = cv::viz::Color::celestial_blue(); for (unsigned int i = 0; i < points_in_world.size(); i++) { const Eigen::Vector3d &point = points_in_world[i].t_point_in_world; - std::cout << "pt to viz: " << point << std::endl; window.showWidget("point_" + std::to_string(i), cv::viz::WSphere(cv::Point3d(point[0], point[1], point[2]), point_radius, sphere_res, point_color)); From dc8dd641033394bdae52c0ae2d5b8deb697fc657 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Wed, 2 Jul 2025 14:43:03 -0400 Subject: [PATCH 27/45] Debugging discrepancy between gps data and reference data --- .../learn_descriptors/four_seasons_parser.cc | 22 ++---- .../four_seasons_parser_example_viz.cc | 67 ++++++++++++++----- experimental/learn_descriptors/image_point.hh | 1 + 3 files changed, 57 insertions(+), 33 deletions(-) diff --git a/experimental/learn_descriptors/four_seasons_parser.cc b/experimental/learn_descriptors/four_seasons_parser.cc index 75886a552..8c850aa2e 100644 --- a/experimental/learn_descriptors/four_seasons_parser.cc +++ b/experimental/learn_descriptors/four_seasons_parser.cc @@ -103,23 +103,10 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, while (gps_idx < gps_time_list.size() && time_key > gps_time_list[gps_idx].first && time_key - gps_time_list[gps_idx].first > (1.0 / IMU_HZ) * 1e9) { gps_idx++; - // if (gps_idx < 200) { - // std::stringstream ss; - // ss << "uh oh: " << gps_idx; - // if (time_key > gps_time_list[gps_idx].first) { - // ss << "\ttime_key - gps_time: " << (time_key - gps_time_list[gps_idx].first); - // } else { - // ss << "\tgps_time - time_key: " << (gps_time_list[gps_idx].first - time_key); - // } - // std::cout << ss.str() << std::endl; - // } } // find the closest gps point whose time is before the current image capture time if (gps_idx < gps_time_list.size() && time_key - gps_time_list[gps_idx].first <= (1.0 / IMU_HZ) * 1e9) { img_pt.gps_gcs = gps_time_list[gps_idx].second; - // std::cout << "adding gps data at img_pt: " << img_pt.id << " with gps_idx: " << - // gps_idx - // << std::endl; gps_idx++; } img_pt_vector_.push_back(img_pt); @@ -325,11 +312,12 @@ TimeGPSList create_gps_time_data_map(const std::filesystem::path& path_gps) { if (nmea_sentence->type() == "GGA") { nmea::gga gga(*nmea_sentence); if (gga.utc.exists() && gga.latitude.exists() && gga.longitude.exists()) { - ImagePoint::GPSData gps_data; - gps_data.latitude = gga.latitude.get(); - gps_data.longitude = gga.longitude.get(); - if (gga.altitude.exists()) gps_data.altitude = gga.altitude.get(); if (gga.utc.get() == time_of_day_last) { // GGA messages for this dataset come + ImagePoint::GPSData gps_data; + gps_data.seq = gps_utc_to_unix_time(date_last, gga.utc.get()); + gps_data.latitude = gga.latitude.get(); + gps_data.longitude = gga.longitude.get(); + if (gga.altitude.exists()) gps_data.altitude = gga.altitude.get(); // after RMC messages // std::cout << "adding gps data with time of day: " << gga.utc.get() // << " and unix time: " diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc index 639192cd9..bfa8bdf3d 100644 --- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc +++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc @@ -53,7 +53,8 @@ int main(int argc, const char** argv) { // frame // std::vector w_from_gcs_cams; // camera frames from visual world // frame - std::vector viz_frames; // camera frames from visual world frame + std::vector viz_frames; // camera frames from visual world frame + std::vector viz_points; // camera frames from visual world frame Eigen::Isometry3d scale_mat = Eigen::Isometry3d::Identity(); std::cout << "gnss scale: " << parser.gnss_scale() << std::endl; @@ -61,22 +62,58 @@ int main(int argc, const char** argv) { std::cout << "scale mat: " << scale_mat.matrix() << std::endl; constexpr size_t START = 100; const size_t END = std::min(static_cast(800), parser.num_images()); + std::optional altitude_gps_from_gnss_cam; for (size_t i = START; i < END; i += 49) { + std::cout << "\nImage point " << i << std::endl; const lrn_desc::ImagePoint img_pt = parser.get_image_point(i); + std::cout << "Image point seq: " << img_pt.seq << std::endl; + if (!img_pt.AS_w_from_gnss_cam) { + continue; + } Eigen::Isometry3d AS_w_from_gnss_cam = scale_mat * Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix()); + // Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix()); + std::cout << "img_pt.AS_w_from_gnss_cam->matrix()" << img_pt.AS_w_from_gnss_cam->matrix() + << std::endl; + std::cout << "AS_w_from_gnss_cam: " << AS_w_from_gnss_cam.matrix() << std::endl; Eigen::Isometry3d w_from_gnss_cam = Eigen::Isometry3d(parser.S_from_AS().matrix()) * AS_w_from_gnss_cam; - Eigen::Vector3d - // Eigen::Isometry3d w_from_vio_cam = Eigen::Isometry3d(parser.S_from_AS().matrix()) * - // Eigen::Isometry3d(img_pt.AS_w_from_vio_cam->matrix()); - viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i)); + w_from_gnss_cam.translation().x() += 1.2; + w_from_gnss_cam.translation().y() += 1.2; + // Eigen::Isometry3d w_from_vio_cam = Eigen::Isometry3d(parser.S_from_AS().matrix()) * + // Eigen::Isometry3d(img_pt.AS_w_from_vio_cam->matrix()); + std::cout << "w_from_gnss_cam: " << w_from_gnss_cam.matrix() << std::endl; + viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i)); + viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i)); if (i == START) { - viz_frames.emplace_back(w_from_gnss_cam, "x_gps_" + std::to_string(i)); + break; + viz_points.emplace_back(w_from_gnss_cam.translation(), "x_gps_" + std::to_string(i)); } else if (img_pt.gps_gcs) { - const Eigen::Vector3d gcs_coordinate( + std::cout << "gps time: " << img_pt.gps_gcs->seq << std::endl; + Eigen::Vector3d gcs_coordinate( img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude, img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0); + + std::cout << "gcs_coordinate altitude: " + << (img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0) << std::endl; + + // compare the reference in gcs to the gps in gcs + Eigen::Isometry3d ECEF_from_gnss_cam = + Eigen::Isometry3d( + (parser.e_from_gpsw() * parser.w_from_gpsw().inverse()).matrix()) * + w_from_gnss_cam; + const Eigen::Vector3d gnss_cam_in_gcs = + parser.gcs_from_ECEF(ECEF_from_gnss_cam.translation()); + + if (!altitude_gps_from_gnss_cam) { + altitude_gps_from_gnss_cam = gnss_cam_in_gcs.z() - *(img_pt.gps_gcs->altitude); + } + gcs_coordinate.z() += *altitude_gps_from_gnss_cam; + + std::cout << std::setprecision(20) << "gnss_cam_in_gcs: " << gnss_cam_in_gcs + << "\ngps_gcs: " << gcs_coordinate + << "\ngps-gnss_cam: " << (gcs_coordinate - gnss_cam_in_gcs) << std::endl; + const Eigen::Vector3d ECEF_from_gps = parser.ECEF_from_gcs(gcs_coordinate); const Eigen::Vector4d ECEF_from_gps_hom(ECEF_from_gps.x(), ECEF_from_gps.y(), ECEF_from_gps.z(), 1); @@ -84,17 +121,15 @@ int main(int argc, const char** argv) { Eigen::Vector4d gps_in_w = Eigen::Matrix4d((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix()) * ECEF_from_gps_hom; - Eigen::Isometry3d w_from_gcs_gps; - w_from_gcs_gps.translation() = gps_in_w.head<3>(); - viz_frames.emplace_back(w_from_gcs_gps, "x_gps_" + std::to_string(i)); - const Eigen::Vector3d ref_to_gps_in_world = - w_from_gcs_gps.translation() - w_from_gnss_cam.translation(); - std::cout << "ref_" << i << "_to_gps_in_world: " << ref_to_gps_in_world << std::endl; + viz_points.emplace_back(gps_in_w.head<3>(), "x_gps_" + std::to_string(i)); + const Eigen::Vector3d gps_from_ref_in_world = + gps_in_w.head<3>() - w_from_gnss_cam.translation(); + std::cout << "gps_from_ref_in_world: " << gps_from_ref_in_world << std::endl; } // std::cout << "\npose gnss metric " << i << w_from_gnss_cam.matrix() << std::endl; // std::cout << "pose vio" << i << w_from_vio_cam.matrix() << std::endl; } - std::cout << "got " << viz_frames.size() << " poses" << std::endl; - robot::geometry::viz_scene(viz_frames, std::vector(), - cv::viz::Color::brown()); + std::cout << "\ngot " << viz_frames.size() << " poses" << std::endl; + std::cout << "got " << viz_points.size() << " points" << std::endl; + robot::geometry::viz_scene(viz_frames, viz_points, cv::viz::Color::brown()); } \ No newline at end of file diff --git a/experimental/learn_descriptors/image_point.hh b/experimental/learn_descriptors/image_point.hh index 109f49296..33d48e9f3 100644 --- a/experimental/learn_descriptors/image_point.hh +++ b/experimental/learn_descriptors/image_point.hh @@ -15,6 +15,7 @@ namespace robot::experimental::learn_descriptors { struct ImagePoint { struct GPSData { + size_t seq; // time in nanoseconds, may differ from shutter time double latitude; double longitude; std::optional altitude; // meters above sea level From e90e90956a53be8c551634c505e829b1a95338b7 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Thu, 3 Jul 2025 16:42:48 -0400 Subject: [PATCH 28/45] incorporated new four seasons parser changes --- experimental/learn_descriptors/BUILD | 218 +----------------- .../learn_descriptors/four_seasons_parser.cc | 128 +++++++--- .../learn_descriptors/four_seasons_parser.hh | 19 +- .../four_seasons_parser_example.cc | 3 +- .../four_seasons_parser_example_viz.cc | 80 ++++--- .../four_seasons_parser_test.cc | 18 +- experimental/learn_descriptors/image_point.hh | 63 ++++- 7 files changed, 240 insertions(+), 289 deletions(-) diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index bc7e84bd5..01d7e9a83 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -75,7 +75,6 @@ cc_test( deps = [ "@com_google_googletest//:gtest_main", ":symphony_lake_parser", - "//common:check", "//visualization/opencv:opencv_viz" ] ) @@ -111,7 +110,8 @@ cc_library( "@com_google_absl//absl/strings:str_format", ":image_point", "@nmea", - "@geographiclib" + "@geographiclib", + "//common:check" ] ) @@ -150,218 +150,6 @@ cc_test( ":four_seasons_parser", "@nmea", "@geographiclib", - ] -) - -cc_library( - name = "frontend_definitions", - hdrs = ["frontend_definitions.hh"], - visibility = ["//visibility:public"], - deps = [ - "@gtsam//:gtsam", - ":structure_from_motion_types" - ] -) - -cc_library( - name = "structure_from_motion_types", - hdrs = ["structure_from_motion_types.hh"], - visibility = ["//visibility:public"], - deps = [ - "@gtsam//:gtsam", - "@opencv//:opencv" - ] -) - -cc_library( - name = "frame", - hdrs = ["frame.hh"], - srcs = ["frame.cc"], - visibility = ["//visibility:public"], - deps = [ - "@opencv//:opencv", - ":frontend_definitions", - "@gtsam//:gtsam" - ] -) - -cc_test( - name = "sfm_mvp", - srcs = ["sfm_mvp.cc"], - copts = [ - "-Wno-error=unused-parameter", - "-Wno-error=unused-variable", - ], - deps = [ - ":frontend", - "@com_google_googletest//:gtest_main", - "@gtsam//:gtsam", - "@opencv//:opencv", - "@eigen", - ":spatial_test_scene_cube", - ":symphony_lake_parser", - ":frame", - ":structure_from_motion_types", - "//visualization/opencv:opencv_viz" - ] -) - - -cc_library( - name = "feature_set", - hdrs = ["feature_set.hh"], - visibility = ["//visibility:public"], - deps = [ - "@gtsam//:gtsam", - "@opencv//:opencv" - ] -) - -cc_library( - name = "feature_manager", - hdrs = ["feature_manager.hh"], - visibility = ["//visibility:public"], - deps = [ - "@gtsam//:gtsam", - "@opencv//:opencv", - ":feature_set", - ] -) - -cc_test( - name = "feature_manager_test", - srcs = ["feature_manager_test.cc"], - deps = [ - ":feature_manager", - "@com_google_googletest//:gtest_main", - "@gtsam//:gtsam", - "@opencv//:opencv", - "@eigen", - ":spatial_test_scene_cube" - ] -) - - -cc_library( - name = "spatial_test_scene", - hdrs = ["spatial_test_scene.hh"], - visibility = ["//visibility:public"], - srcs = ["spatial_test_scene.cc"], - deps = [ - "@gtsam//:gtsam", - "@opencv//:opencv", - "@eigen" - ] -) - -cc_library( - name = "spatial_test_scene_cube", - hdrs = ["spatial_test_scene_cube.hh"], - visibility = ["//visibility:public"], - deps = [ - "@eigen", - ":spatial_test_scene" - ] -) - -cc_binary( - name = "spatial_test_scene_cube_example", - srcs = ["spatial_test_scene_cube_example.cc"], - deps = [ - "@com_google_googletest//:gtest_main", - ":spatial_test_scene_cube", - "//visualization/opencv:opencv_viz", - "@eigen" - ] -) - -cc_library( - name = "frontend", - hdrs = ["frontend.hh"], - visibility = ["//visibility:public"], - srcs = ["frontend.cc"], - copts = [ - "-Wno-error=unused-parameter", - ], - deps = [ - "@opencv//:opencv", - ] -) - - -cc_test( - name = "frontend_test", - srcs = ["frontend_test.cc"], - deps = [ - "@com_google_googletest//:gtest_main", - ":frontend" - ] -) - -cc_library( - name = "backend", - hdrs = ["backend.hh"], - visibility = ["//visibility:public"], - srcs = ["backend.cc"], - copts = [ - "-Wno-error=unused-parameter", - ], - deps = [ - "@opencv//:opencv", - "@gtsam//:gtsam", - "//common/geometry:camera", - "@boost//:smart_ptr", - ":feature_manager" - ] -) - - -cc_test( - name = "backend_test", - srcs = ["backend_test.cc"], - copts = [ - "-Wno-error=unused-parameter", - ], - deps = [ - "@com_google_googletest//:gtest_main", - "@opencv//:opencv", - ":backend", - ":feature_manager", - ":spatial_test_scene_cube", - "//visualization/opencv:opencv_viz" - ] -) - -cc_binary( - name = "get_colmap_groundtruth", - srcs = ["get_colmap_groundtruth.cc"], - copts = [ - "-Wno-error=unused-parameter", - ], - deps = [ - ":symphony_lake_parser" - ] -) - -cc_binary( - name = "incremental_sfm_mvp", - srcs = ["incremental_sfm_mvp.cc"], - copts = [ - "-Wno-error=unused-parameter", - "-Wno-error=unused-function", - ], - deps = [ - ":four_seasons_parser", - "@eigen", - "@opencv", - "@gtsam", - "//visualization/opencv:opencv_viz", - ":spatial_test_scene_cube", - ":frame", - ":structure_from_motion_types", - ":frontend", - "@boost//:smart_ptr", - "@cxxopts", - "//common/geometry:camera" + "//common:check" ] ) diff --git a/experimental/learn_descriptors/four_seasons_parser.cc b/experimental/learn_descriptors/four_seasons_parser.cc index 8c850aa2e..fdf76afde 100644 --- a/experimental/learn_descriptors/four_seasons_parser.cc +++ b/experimental/learn_descriptors/four_seasons_parser.cc @@ -1,13 +1,15 @@ #include "experimental/learn_descriptors/four_seasons_parser.hh" -#include - #include +#include +#include #include #include #include +#include #include #include +#include #include #include @@ -17,14 +19,13 @@ #include "GeographicLib/LocalCartesian.hpp" #include "absl/strings/str_split.h" #include "absl/strings/strip.h" +#include "common/check.hh" #include "common/liegroups/se3.hh" #include "common/time/robot_time.hh" #include "nmea/message/gga.hpp" #include "nmea/message/rmc.hpp" #include "nmea/sentence.hpp" -#define IMU_HZ 30.0 - namespace robot::experimental::learn_descriptors { using namespace detail; FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, @@ -45,12 +46,9 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, txt_parser_help::TimeDataMap vio_poses_time_map = txt_parser_help::create_vio_time_data_map(path_vio, min_time_sig_figs); gps_parser_help::TimeGPSList gps_time_list = - gps_parser_help::create_gps_time_data_map(path_gps); - - // std::cout << "imu dt in nanoseconds: " << std::to_string((1.0 / IMU_HZ) * 1e9) << std::endl; + gps_parser_help::create_gps_time_data_list(path_gps); size_t id = 0; - size_t gps_idx = 0; for (const std::pair>& pair_time_data : img_time_list) { const size_t time_key = pair_time_data.first; ImagePoint img_pt; @@ -100,22 +98,33 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, std::clog << "There is no AS_w_from_vio_cam data at img_pt with id: " << id << std::endl; } - while (gps_idx < gps_time_list.size() && time_key > gps_time_list[gps_idx].first && - time_key - gps_time_list[gps_idx].first > (1.0 / IMU_HZ) * 1e9) { - gps_idx++; - } // find the closest gps point whose time is before the current image capture time - if (gps_idx < gps_time_list.size() && - time_key - gps_time_list[gps_idx].first <= (1.0 / IMU_HZ) * 1e9) { - img_pt.gps_gcs = gps_time_list[gps_idx].second; - gps_idx++; - } img_pt_vector_.push_back(img_pt); id++; } + // popoulate gps to nearest img time + // TODO: could be linear time... but good enough + for (const auto& [time_unix_ns, gps_data] : gps_time_list) { + auto it = std::lower_bound(img_pt_vector_.begin(), img_pt_vector_.end(), time_unix_ns, + [](const ImagePoint& img_pt, const size_t query_unix_time) { + return img_pt.seq < query_unix_time; + }); + size_t insert_idx = std::distance(img_pt_vector_.begin(), it); + if (it != img_pt_vector_.begin() && + detail::abs_diff(it->seq, time_unix_ns) > + detail::abs_diff(std::prev(it)->seq, time_unix_ns)) { + insert_idx--; + } + // NOTE: in future, could perhaps use gps data that isn't associated with an img_pt in some + // way. maybe to help with interpolation, estimate velocity + if (detail::abs_diff(img_pt_vector_[insert_idx].seq, time_unix_ns) < + FourSeasonsParser::CAM_CAP_DELTA_NS) { + img_pt_vector_[insert_idx].gps_gcs = gps_data; + } + } } -cv::Mat FourSeasonsParser::load_image(const size_t idx) const { - return get_image_point(idx).load_image(img_dir_); +cv::Mat FourSeasonsParser::load_image(const size_t m) const { + return get_image_point(m).load_image(img_dir_); } FourSeasonsParser::FourSeasonsTransforms::FourSeasonsTransforms( @@ -291,8 +300,8 @@ size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_da std::chrono::sys_time timestamp = date + utc_time_day_ns; return timestamp.time_since_epoch().count(); } -TimeGPSList create_gps_time_data_map(const std::filesystem::path& path_gps) { - TimeGPSList time_map_gps; +TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps) { + TimeGPSList time_list_gps; std::ifstream file_gps(path_gps); std::string line; std::optional nmea_sentence; @@ -300,7 +309,7 @@ TimeGPSList create_gps_time_data_map(const std::filesystem::path& path_gps) { date_last.day = 255; date_last.month = 255; date_last.year = 255; - double time_of_day_last = 0; + double time_of_day_last = -1.0; while (std::getline(file_gps, line) && !line.empty()) { try { nmea_sentence = nmea::sentence(line); @@ -312,18 +321,14 @@ TimeGPSList create_gps_time_data_map(const std::filesystem::path& path_gps) { if (nmea_sentence->type() == "GGA") { nmea::gga gga(*nmea_sentence); if (gga.utc.exists() && gga.latitude.exists() && gga.longitude.exists()) { - if (gga.utc.get() == time_of_day_last) { // GGA messages for this dataset come + if (std::abs(gga.utc.get() - time_of_day_last) < + 1e-3) { // GGA messages for this dataset come second ImagePoint::GPSData gps_data; gps_data.seq = gps_utc_to_unix_time(date_last, gga.utc.get()); gps_data.latitude = gga.latitude.get(); gps_data.longitude = gga.longitude.get(); if (gga.altitude.exists()) gps_data.altitude = gga.altitude.get(); - // after RMC messages - // std::cout << "adding gps data with time of day: " << gga.utc.get() - // << " and unix time: " - // << gps_utc_to_unix_time(date_last, gga.utc.get()) << std::endl; - time_map_gps.push_back( - std::make_pair(gps_utc_to_unix_time(date_last, gga.utc.get()), gps_data)); + time_list_gps.push_back(std::make_pair(gps_data.seq, gps_data)); } } } else if (nmea_sentence->type() == "RMC") { @@ -332,10 +337,73 @@ TimeGPSList create_gps_time_data_map(const std::filesystem::path& path_gps) { date_last = rmc.date.get(); time_of_day_last = rmc.utc.get(); } + } else if (nmea_sentence->type() == "GST") { + std::optional gst_data = parse_gpgst(nmea_sentence->nmea_string()); + if (gst_data && std::abs(gst_data->utc_time_ns - time_of_day_last) < + 1e-3) { // GST message for this dataset come third + size_t unix_time_ns = gps_utc_to_unix_time(date_last, gst_data->utc_time_ns); + if (time_list_gps.back().first == unix_time_ns) { + time_list_gps.back().second.uncertainty.emplace( + gst_data->sigma_lat_m, gst_data->sigma_lon_m, gst_data->sigma_alt_m, + gst_data->error_orientation_deg, gst_data->rms_range_error_m); + } + } } } - return time_map_gps; + return time_list_gps; } + +std::vector split_nmea_sentence(const std::string& sentence) { + std::vector fields; + std::string field; + std::stringstream ss(sentence); + + while (std::getline(ss, field, ',')) { + // remove checksum from the last field if present + auto asterisk = field.find('*'); + if (asterisk != std::string::npos) field = field.substr(0, asterisk); + fields.push_back(field); + } + + return fields; +} +double time_of_day_seconds(const double utc_time_hhmmss) { + int hours = static_cast(utc_time_hhmmss / 10000); + int minutes = static_cast((utc_time_hhmmss - hours * 10000) / 100); + double seconds = utc_time_hhmmss - hours * 10000 - minutes * 100; + return hours * 3600 + minutes * 60 + seconds; +} +std::optional parse_gpgst(const std::string& sentence) { + if (sentence.substr(0, 6) != "$GPGST") { + return std::nullopt; + } + + std::vector fields = split_nmea_sentence(sentence); + if (fields.size() != 9) { + return std::nullopt; + } + for (size_t i = 0; i < fields.size(); i++) { + if (fields[i].empty()) return std::nullopt; + } + + GSTData gst; + gst.utc_time_ns = time_of_day_seconds(std::stod(fields[1])); + gst.rms_range_error_m = std::stod(fields[2]); + gst.error_semi_major_m = std::stod(fields[3]); + gst.error_semi_minor_m = std::stod(fields[4]); + gst.error_orientation_deg = std::stod(fields[5]); + gst.sigma_lat_m = std::stod(fields[6]); + gst.sigma_lon_m = std::stod(fields[7]); + gst.sigma_alt_m = std::stod(fields[8]); + + return gst; +} + } // namespace gps_parser_help + +template +size_t abs_diff(const T& a, const T& b) { + return a > b ? a - b : b - a; +} } // namespace detail } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/four_seasons_parser.hh b/experimental/learn_descriptors/four_seasons_parser.hh index 0f3d2c7fb..9709bc396 100644 --- a/experimental/learn_descriptors/four_seasons_parser.hh +++ b/experimental/learn_descriptors/four_seasons_parser.hh @@ -17,6 +17,8 @@ namespace robot::experimental::learn_descriptors { class FourSeasonsParser { public: + static constexpr double CAM_HZ = 30.0; + static constexpr double CAM_CAP_DELTA_NS = 1e9 / CAM_HZ; struct CameraCalibrationFisheye { double fx, fy, cx, cy, k1, k2, k3, k4; }; @@ -70,6 +72,8 @@ class FourSeasonsParser { }; namespace detail { +template +std::size_t abs_diff(const T& a, const T& b); namespace txt_parser_help { using TimeDataMap = std::unordered_map>; using TimeDataList = std::vector>>; @@ -120,8 +124,21 @@ const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio } // namespace txt_parser_help namespace gps_parser_help { using TimeGPSList = std::vector>; +struct GSTData { + double utc_time_ns; + double rms_range_error_m; + double error_semi_major_m; + double error_semi_minor_m; + double error_orientation_deg; + double sigma_lat_m; + double sigma_lon_m; + double sigma_alt_m; +}; +std::vector split_nmea_sentence(const std::string& nmea_sentence); +double time_of_day_seconds(const double utc_time_hhmmss); +std::optional parse_gpgst(const std::string& nmea_sentence); size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds); -TimeGPSList create_gps_time_data_map(const std::filesystem::path& path_gps); +TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps); } // namespace gps_parser_help } // namespace detail } // namespace robot::experimental::learn_descriptors diff --git a/experimental/learn_descriptors/four_seasons_parser_example.cc b/experimental/learn_descriptors/four_seasons_parser_example.cc index 48c496579..064f404ef 100644 --- a/experimental/learn_descriptors/four_seasons_parser_example.cc +++ b/experimental/learn_descriptors/four_seasons_parser_example.cc @@ -98,8 +98,7 @@ int main(int argc, const char** argv) { 0.5, cv::Scalar(0, 255, 0), 2); cv::putText(img, ss_gps.str(), cv::Point(10, 80), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 2); - std::cout << "img_pt " << i << "\n\t" << ss_AS_w_from_gnss_cam.str() << "\n\t" - << ss_AS_w_from_vio_cam.str() << "\n\t" << ss_gps.str() << std::endl; + std::cout << img_pt.to_string() << std::endl; cv::imshow("FourSeasonsParserExample", img); int key = cv::waitKey(0); diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc index bfa8bdf3d..bca7ec2c4 100644 --- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc +++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -48,11 +49,6 @@ int main(int argc, const char** argv) { ROBOT_CHECK(parser.num_images() != 0); - // std::vector - // w_from_gnss_cams; // camera frames from visual world - // frame - // std::vector w_from_gcs_cams; // camera frames from visual world - // frame std::vector viz_frames; // camera frames from visual world frame std::vector viz_points; // camera frames from visual world frame @@ -60,43 +56,32 @@ int main(int argc, const char** argv) { std::cout << "gnss scale: " << parser.gnss_scale() << std::endl; scale_mat.linear() *= parser.gnss_scale(); std::cout << "scale mat: " << scale_mat.matrix() << std::endl; - constexpr size_t START = 100; - const size_t END = std::min(static_cast(800), parser.num_images()); std::optional altitude_gps_from_gnss_cam; - for (size_t i = START; i < END; i += 49) { - std::cout << "\nImage point " << i << std::endl; + std::vector gps_ns_delta_from_shutter; + for (size_t i = 0; i < parser.num_images(); i += 1) { const lrn_desc::ImagePoint img_pt = parser.get_image_point(i); - std::cout << "Image point seq: " << img_pt.seq << std::endl; + std::cout << img_pt.to_string() << std::endl; if (!img_pt.AS_w_from_gnss_cam) { continue; } - Eigen::Isometry3d AS_w_from_gnss_cam = - scale_mat * Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix()); - // Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix()); - std::cout << "img_pt.AS_w_from_gnss_cam->matrix()" << img_pt.AS_w_from_gnss_cam->matrix() - << std::endl; - std::cout << "AS_w_from_gnss_cam: " << AS_w_from_gnss_cam.matrix() << std::endl; - Eigen::Isometry3d w_from_gnss_cam = - Eigen::Isometry3d(parser.S_from_AS().matrix()) * AS_w_from_gnss_cam; - w_from_gnss_cam.translation().x() += 1.2; - w_from_gnss_cam.translation().y() += 1.2; - // Eigen::Isometry3d w_from_vio_cam = Eigen::Isometry3d(parser.S_from_AS().matrix()) * - // Eigen::Isometry3d(img_pt.AS_w_from_vio_cam->matrix()); - std::cout << "w_from_gnss_cam: " << w_from_gnss_cam.matrix() << std::endl; + Eigen::Isometry3d w_from_gnss_cam(Eigen::Isometry3d(parser.S_from_AS().matrix()) * + scale_mat * + Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix())); + viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i)); viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i)); - if (i == START) { - break; - viz_points.emplace_back(w_from_gnss_cam.translation(), "x_gps_" + std::to_string(i)); - } else if (img_pt.gps_gcs) { - std::cout << "gps time: " << img_pt.gps_gcs->seq << std::endl; + if (img_pt.gps_gcs) { + if (img_pt.gps_gcs->seq > img_pt.seq) { + gps_ns_delta_from_shutter.push_back( + static_cast(img_pt.gps_gcs->seq - img_pt.seq)); + } else { + gps_ns_delta_from_shutter.push_back( + -static_cast(img_pt.seq - img_pt.gps_gcs->seq)); + } Eigen::Vector3d gcs_coordinate( img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude, img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0); - std::cout << "gcs_coordinate altitude: " - << (img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0) << std::endl; - // compare the reference in gcs to the gps in gcs Eigen::Isometry3d ECEF_from_gnss_cam = Eigen::Isometry3d( @@ -109,7 +94,6 @@ int main(int argc, const char** argv) { altitude_gps_from_gnss_cam = gnss_cam_in_gcs.z() - *(img_pt.gps_gcs->altitude); } gcs_coordinate.z() += *altitude_gps_from_gnss_cam; - std::cout << std::setprecision(20) << "gnss_cam_in_gcs: " << gnss_cam_in_gcs << "\ngps_gcs: " << gcs_coordinate << "\ngps-gnss_cam: " << (gcs_coordinate - gnss_cam_in_gcs) << std::endl; @@ -117,7 +101,6 @@ int main(int argc, const char** argv) { const Eigen::Vector3d ECEF_from_gps = parser.ECEF_from_gcs(gcs_coordinate); const Eigen::Vector4d ECEF_from_gps_hom(ECEF_from_gps.x(), ECEF_from_gps.y(), ECEF_from_gps.z(), 1); - // Eigen::Isometry3d w_from_gcs_gps = Eigen::Vector4d gps_in_w = Eigen::Matrix4d((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix()) * ECEF_from_gps_hom; @@ -126,10 +109,33 @@ int main(int argc, const char** argv) { gps_in_w.head<3>() - w_from_gnss_cam.translation(); std::cout << "gps_from_ref_in_world: " << gps_from_ref_in_world << std::endl; } - // std::cout << "\npose gnss metric " << i << w_from_gnss_cam.matrix() << std::endl; - // std::cout << "pose vio" << i << w_from_vio_cam.matrix() << std::endl; } + const double sum = + std::accumulate(gps_ns_delta_from_shutter.begin(), gps_ns_delta_from_shutter.end(), 0.0); + double avg_ns_gps_delta = sum / gps_ns_delta_from_shutter.size(); + double var_ns_gps_delta; + double var_sum = 0.0; + size_t num_greater_cam_hz = 0; + + for (const double delta : gps_ns_delta_from_shutter) { + var_sum += std::pow(delta - avg_ns_gps_delta, 2); + if (delta > lrn_desc::FourSeasonsParser::CAM_CAP_DELTA_NS) { + num_greater_cam_hz++; + } + } + var_ns_gps_delta = var_sum / gps_ns_delta_from_shutter.size(); + double max_delta = + *std::max_element(gps_ns_delta_from_shutter.begin(), gps_ns_delta_from_shutter.end()); + ROBOT_CHECK(max_delta < lrn_desc::FourSeasonsParser::CAM_CAP_DELTA_NS); + std::cout << "\nGPS Analysis: " << std::endl; + std::cout << "\tavg delta time ns img_pt_seq from gps_gcs_seq: " << avg_ns_gps_delta + << std::endl; + std::cout << "\tstd_var delta time ns img_pt_seq from gps_gcs_seq: " + << std::pow(var_ns_gps_delta, 0.5) << std::endl; + std::cout << "\tnum deltas greater than cam_hz: " << num_greater_cam_hz << std::endl; + std::cout << "max delta: " << max_delta << std::endl; std::cout << "\ngot " << viz_frames.size() << " poses" << std::endl; std::cout << "got " << viz_points.size() << " points" << std::endl; - robot::geometry::viz_scene(viz_frames, viz_points, cv::viz::Color::brown()); -} \ No newline at end of file + robot::geometry::viz_scene(viz_frames, viz_points, cv::viz::Color::brown(), true, true, + "Viz Trajectory + GPS Points"); +} diff --git a/experimental/learn_descriptors/four_seasons_parser_test.cc b/experimental/learn_descriptors/four_seasons_parser_test.cc index b5662039e..ff49d935d 100644 --- a/experimental/learn_descriptors/four_seasons_parser_test.cc +++ b/experimental/learn_descriptors/four_seasons_parser_test.cc @@ -16,6 +16,7 @@ #include "GeographicLib/LocalCartesian.hpp" #include "absl/strings/str_split.h" #include "absl/strings/strip.h" +#include "common/check.hh" #include "common/liegroups/se3.hh" #include "gtest/gtest.h" #include "nmea/message/gga.hpp" @@ -104,6 +105,8 @@ TEST(FourSeasonsParserTest, parser_test) { (parser.e_from_gpsw() * parser.w_from_gpsw().inverse() * parser.S_from_AS()).matrix() * scale_mat); + std::vector gps_ns_delta_from_shutter; + for (size_t i = 0; i < parser.num_images(); i++) { const ImagePoint& img_pt = parser.get_image_point(i); const cv::Mat img = parser.load_image(i); @@ -181,10 +184,23 @@ TEST(FourSeasonsParserTest, parser_test) { img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude, img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0); - EXPECT_NEAR(raw_gps_gcs.x(), gnss_gps_gcs.x(), 1e-4); // lattitude + EXPECT_NEAR(raw_gps_gcs.x(), gnss_gps_gcs.x(), 1e-4); // latitude EXPECT_NEAR(raw_gps_gcs.y(), gnss_gps_gcs.y(), 1e-4); // longitude EXPECT_NEAR(raw_gps_gcs.z(), gnss_gps_gcs.z(), 60); // height above sea level } + if (img_pt.gps_gcs) { + if (img_pt.gps_gcs->seq > img_pt.seq) { + gps_ns_delta_from_shutter.push_back( + static_cast(img_pt.gps_gcs->seq - img_pt.seq)); + } else { + gps_ns_delta_from_shutter.push_back( + -static_cast(img_pt.seq - img_pt.gps_gcs->seq)); + } + } } + + double max_delta = + *std::max_element(gps_ns_delta_from_shutter.begin(), gps_ns_delta_from_shutter.end()); + ROBOT_CHECK(max_delta < FourSeasonsParser::CAM_CAP_DELTA_NS); } } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/image_point.hh b/experimental/learn_descriptors/image_point.hh index 33d48e9f3..c46708d17 100644 --- a/experimental/learn_descriptors/image_point.hh +++ b/experimental/learn_descriptors/image_point.hh @@ -15,13 +15,62 @@ namespace robot::experimental::learn_descriptors { struct ImagePoint { struct GPSData { - size_t seq; // time in nanoseconds, may differ from shutter time + struct Uncertainty { + double sigma_lat_mitude; + double sigma_longitude; + double sigma_altitude; + double orientation_deg; + double rms_range_error_m; + + // diagonal latitude, longitude, altitude covariance in meters squared + Eigen::Matrix3d to_LLA_covariance() const { + Eigen::Matrix3d cov = Eigen::Matrix3d::Zero(); + cov(0, 0) = sigma_lat_mitude * sigma_lat_mitude; + cov(1, 1) = sigma_longitude * sigma_longitude; + cov(2, 2) = sigma_altitude * sigma_altitude; + return cov; + } + + // Convert lat/lon covariance to ENU at given latitude + Eigen::Matrix3d to_ENU_covariance(double lat_deg) const { + double lat_rad = lat_deg * M_PI / 180.0; + + // meters per degree scaling + double kLat = + 111132.92 - 559.82 * std::cos(2 * lat_rad) + 1.175 * std::cos(4 * lat_rad); + double kLon = 111412.84 * std::cos(lat_rad) - 93.5 * std::cos(3 * lat_rad); + + Eigen::Matrix3d lla_cov = to_LLA_covariance(); + + // rotate lat/lon covariance (upper-left 2x2) using orientation + double theta = orientation_deg * M_PI / 180.0; + Eigen::Matrix2d R; + R << std::cos(theta), -std::sin(theta), std::sin(theta), std::cos(theta); + + Eigen::Matrix2d D = lla_cov.topLeftCorner<2, 2>(); + Eigen::Matrix2d rotated_latlon_cov = R * D * R.transpose(); + + // apply linear scaling to convert degrees to meters in EN + Eigen::Matrix2d J; + J << kLat, 0, 0, kLon; + + Eigen::Matrix2d en_cov = J * rotated_latlon_cov * J.transpose(); + + Eigen::Matrix3d enu_cov = Eigen::Matrix3d::Zero(); + enu_cov.topLeftCorner<2, 2>() = en_cov; + enu_cov(2, 2) = lla_cov(2, 2); // alt variance unchanged + + return enu_cov; + } + }; + size_t seq; // time in nanoseconds, may differ from image capturetime double latitude; double longitude; std::optional altitude; // meters above sea level + std::optional uncertainty; }; size_t id; // idx for DB - size_t seq; // time in nanoseconds (also the name of image) + size_t seq; // time in nanoseconds of image capture (also the name of image) std::optional AS_w_from_gnss_cam; // globally opimized arbitrary scale visual world (gnss + VIO + loop // closure + etc.) from cam @@ -54,13 +103,21 @@ struct ImagePoint { } ss << "\n\tgps_gcs: "; if (gps_gcs) { + ss << "\n\t\tseq: " << gps_gcs->seq; ss << "\n\t\t" << gps_gcs->latitude << "\t" << gps_gcs->longitude << "\t"; if (gps_gcs->altitude) { ss << *(gps_gcs->altitude); } else { ss << "alt N/A"; } - + ss << "\n\t\tsigma: "; + if (gps_gcs->uncertainty) { + ss << gps_gcs->uncertainty->sigma_lat_mitude << "\t" + << gps_gcs->uncertainty->sigma_longitude << "\t" + << gps_gcs->uncertainty->sigma_altitude; + } else { + ss << "N/A"; + } } else { ss << "N/A"; } From fa8ba85d0ea4b3970dd7b67446a190d63f6d17ec Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Tue, 8 Jul 2025 17:30:48 -0400 Subject: [PATCH 29/45] WIP make SFM object-oriented --- common/geometry/BUILD | 5 +- common/geometry/camera.cc | 44 +++- common/geometry/camera.hh | 9 +- common/geometry/camera_test.cc | 29 ++- experimental/learn_descriptors/BUILD | 216 ++++++++++++++++++ experimental/learn_descriptors/frontend.cc | 150 +++++++++--- experimental/learn_descriptors/frontend.hh | 57 +++-- .../learn_descriptors/frontend_definitions.hh | 2 - .../learn_descriptors/frontend_test.cc | 4 +- .../learn_descriptors/incremental_sfm_mvp.cc | 66 ++++-- experimental/learn_descriptors/sfm_mvp.cc | 6 +- .../structure_from_motion.cc | 8 +- .../structure_from_motion.hh | 4 +- 13 files changed, 489 insertions(+), 111 deletions(-) diff --git a/common/geometry/BUILD b/common/geometry/BUILD index 2ef17fb83..6b11a2b02 100644 --- a/common/geometry/BUILD +++ b/common/geometry/BUILD @@ -48,9 +48,10 @@ cc_library( visibility = ["//visibility:public"], deps = [ "@eigen", - "@opencv//:opencv", + "@opencv", ":translate_types", - "@gtsam" + "@gtsam", + "//common:check" ] ) diff --git a/common/geometry/camera.cc b/common/geometry/camera.cc index c7908c4f9..38733d6f4 100644 --- a/common/geometry/camera.cc +++ b/common/geometry/camera.cc @@ -1,5 +1,9 @@ #include "common/geometry/camera.hh" +#include +#include + +#include "common/check.hh" #include "common/geometry/translate_types.hh" namespace robot::geometry { @@ -24,11 +28,11 @@ Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel return depth * K.inverse() * Eigen::Vector3d(pixel_inhomog(0), pixel_inhomog(1), 1.); } -Eigen::Isometry3d estimate_cam0_from_cam1(const std::vector &kpts0, - const std::vector &kpts1, - const std::vector &matches, - const cv::Mat &K) { - assert(kpts1.size() != 0 && kpts0.size() != 0); +std::optional estimate_cam0_from_cam1(const std::vector &kpts0, + const std::vector &kpts1, + const std::vector &matches, + const cv::Mat &K) { + ROBOT_CHECK(matches.size() >= 5 && kpts1.size() >= 5 && kpts0.size() >= 5); Eigen::Isometry3d result; std::vector pts1; std::vector pts2; @@ -36,12 +40,28 @@ Eigen::Isometry3d estimate_cam0_from_cam1(const std::vector &kpts0 pts1.push_back(kpts0[match.queryIdx].pt); pts2.push_back(kpts1[match.trainIdx].pt); } - cv::Mat E = cv::findEssentialMat(pts1, pts2, K, cv::RANSAC, 0.999, 1.0); - cv::Mat R_c1_c0, t_c1_c0; - cv::recoverPose(E, pts1, pts2, K, R_c1_c0, t_c1_c0); - result.linear() = cv_to_eigen_mat(R_c1_c0); - result.translation() = cv_to_eigen_mat(t_c1_c0); - result = result.inverse(); - return result; + ROBOT_CHECK(pts1.size() == pts2.size() && pts1.size() >= 5); + // std::cout << "heartbeat 3" << std::endl; + try { + cv::Mat E = cv::findEssentialMat(pts1, pts2, K, cv::RANSAC, 0.999, 1.0); + cv::Mat R_c1_c0, t_c1_c0; + // std::cout << "heartbest 4" << std::endl; + ROBOT_CHECK(!E.empty(), "Essential matrix is empty."); + // TOOD: handle multiple returned candidate E matrices better (they are stacked on top of + // each other in E) + if (E.rows > 3) { + E = E.rowRange(0, 3); + } + // std::cout << E << std::endl; + cv::recoverPose(E, pts1, pts2, K, R_c1_c0, t_c1_c0); + result.linear() = cv_to_eigen_mat(R_c1_c0); + result.translation() = cv_to_eigen_mat(t_c1_c0); + result = result.inverse(); + return result; + } catch (const std::exception e) { + std::cerr << "Failed to estimate pose up to scale cam0_from_cam1.\n" + << e.what() << std::endl; + return std::nullopt; + } } } // namespace robot::geometry \ No newline at end of file diff --git a/common/geometry/camera.hh b/common/geometry/camera.hh index 9164e4129..8048bac87 100644 --- a/common/geometry/camera.hh +++ b/common/geometry/camera.hh @@ -1,4 +1,6 @@ #pragma once +#include + #include "Eigen/Core" #include "Eigen/Geometry" #include "gtsam/geometry/Cal3_S2.h" @@ -21,7 +23,8 @@ Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector3d &pixel Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel_inhomog, const double depth); -Eigen::Isometry3d estimate_cam0_from_cam1(const std::vector &kpts0, - const std::vector &kpts1, - const std::vector &matches, const cv::Mat &K); +std::optional estimate_cam0_from_cam1(const std::vector &kpts0, + const std::vector &kpts1, + const std::vector &matches, + const cv::Mat &K); } // namespace robot::geometry \ No newline at end of file diff --git a/common/geometry/camera_test.cc b/common/geometry/camera_test.cc index 19f5389cd..4d8b810fa 100644 --- a/common/geometry/camera_test.cc +++ b/common/geometry/camera_test.cc @@ -1,5 +1,7 @@ #include "common/geometry/camera.hh" +#include + #include "common/geometry/translate_types.hh" #include "gtest/gtest.h" #include "visualization/opencv/opencv_viz.hh" @@ -111,20 +113,23 @@ TEST(CameraTest, test_estimate_pose) { Eigen::Vector2d pxl_c1_pcube = pxl_c1_pcube_homog.head<2>() / pxl_c1_pcube_homog(2); if (CameraTestHelper::pixel_in_range(pxl_c0_pcube, img_width, img_height) && CameraTestHelper::pixel_in_range(pxl_c1_pcube, img_width, img_height)) { - kpts0.push_back(cv::KeyPoint(pxl_c0_pcube[0], pxl_c0_pcube[1], 3)); - kpts1.push_back(cv::KeyPoint(pxl_c1_pcube[0], pxl_c1_pcube[1], 3)); - matches.emplace_back(cv::DMatch(kpts0.size() - 1, kpts1.size() - 1, 0)); + kpts0.emplace_back(pxl_c0_pcube[0], pxl_c0_pcube[1], 3); + kpts1.emplace_back(pxl_c1_pcube[0], pxl_c1_pcube[1], 3); + matches.emplace_back(kpts0.size() - 1, kpts1.size() - 1, 0); } } - Eigen::Isometry3d cam0_from_cam1_estimate = estimate_cam0_from_cam1(kpts0, kpts1, matches, K); - Eigen::Isometry3d cam0_from_cam1 = world_from_cam0.inverse() * world_from_cam1; - cam0_from_cam1.translation() /= cam0_from_cam1.translation().norm(); - EXPECT_TRUE( - cam0_from_cam1_estimate.translation().isApprox(cam0_from_cam1.translation(), 0.000001)); - EXPECT_TRUE(cam0_from_cam1_estimate.linear().isApprox(cam0_from_cam1.linear(), 0.001)); - - poses.emplace_back(world_from_cam0 * cam0_from_cam1_estimate); - viz_scene(poses, p_W_cube); + std::optional cam0_from_cam1_estimate = + estimate_cam0_from_cam1(kpts0, kpts1, matches, K); + if (cam0_from_cam1_estimate) { + Eigen::Isometry3d cam0_from_cam1 = world_from_cam0.inverse() * world_from_cam1; + cam0_from_cam1.translation() /= cam0_from_cam1.translation().norm(); + EXPECT_TRUE(cam0_from_cam1_estimate->translation().isApprox(cam0_from_cam1.translation(), + 0.000001)); + EXPECT_TRUE(cam0_from_cam1_estimate->linear().isApprox(cam0_from_cam1.linear(), 0.001)); + + poses.emplace_back(world_from_cam0 * *cam0_from_cam1_estimate); + viz_scene(poses, p_W_cube); + } } } // namespace robot::geometry \ No newline at end of file diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index 01d7e9a83..5d3fbf77c 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -153,3 +153,219 @@ cc_test( "//common:check" ] ) + +cc_library( + name = "frontend_definitions", + hdrs = ["frontend_definitions.hh"], + visibility = ["//visibility:public"], + deps = [ + "@gtsam//:gtsam", + ":structure_from_motion_types" + ] +) + +cc_library( + name = "structure_from_motion_types", + hdrs = ["structure_from_motion_types.hh"], + visibility = ["//visibility:public"], + deps = [ + "@gtsam//:gtsam", + "@opencv//:opencv" + ] +) + +cc_library( + name = "frame", + hdrs = ["frame.hh"], + srcs = ["frame.cc"], + visibility = ["//visibility:public"], + deps = [ + "@opencv//:opencv", + ":frontend_definitions", + "@gtsam//:gtsam" + ] +) + +cc_test( + name = "sfm_mvp", + srcs = ["sfm_mvp.cc"], + copts = [ + "-Wno-error=unused-parameter", + "-Wno-error=unused-variable", + ], + deps = [ + ":frontend", + "@com_google_googletest//:gtest_main", + "@gtsam//:gtsam", + "@opencv//:opencv", + "@eigen", + ":spatial_test_scene_cube", + ":symphony_lake_parser", + ":frame", + ":structure_from_motion_types", + "//visualization/opencv:opencv_viz" + ] +) + + +cc_library( + name = "feature_set", + hdrs = ["feature_set.hh"], + visibility = ["//visibility:public"], + deps = [ + "@gtsam//:gtsam", + "@opencv//:opencv" + ] +) + +cc_library( + name = "feature_manager", + hdrs = ["feature_manager.hh"], + visibility = ["//visibility:public"], + deps = [ + "@gtsam//:gtsam", + "@opencv//:opencv", + ":feature_set", + ] +) + +cc_test( + name = "feature_manager_test", + srcs = ["feature_manager_test.cc"], + deps = [ + ":feature_manager", + "@com_google_googletest//:gtest_main", + "@gtsam//:gtsam", + "@opencv//:opencv", + "@eigen", + ":spatial_test_scene_cube" + ] +) + + +cc_library( + name = "spatial_test_scene", + hdrs = ["spatial_test_scene.hh"], + visibility = ["//visibility:public"], + srcs = ["spatial_test_scene.cc"], + deps = [ + "@gtsam//:gtsam", + "@opencv//:opencv", + "@eigen" + ] +) + +cc_library( + name = "spatial_test_scene_cube", + hdrs = ["spatial_test_scene_cube.hh"], + visibility = ["//visibility:public"], + deps = [ + "@eigen", + ":spatial_test_scene" + ] +) + +cc_binary( + name = "spatial_test_scene_cube_example", + srcs = ["spatial_test_scene_cube_example.cc"], + deps = [ + "@com_google_googletest//:gtest_main", + ":spatial_test_scene_cube", + "//visualization/opencv:opencv_viz", + "@eigen" + ] +) + +cc_library( + name = "frontend", + hdrs = ["frontend.hh"], + visibility = ["//visibility:public"], + srcs = ["frontend.cc"], + copts = [ + "-Wno-error=unused-parameter", + ], + deps = [ + "@opencv//:opencv", + ":frame", + ":frontend_definitions", + ":structure_from_motion_types" + ] +) + + +cc_test( + name = "frontend_test", + srcs = ["frontend_test.cc"], + deps = [ + "@com_google_googletest//:gtest_main", + ":frontend" + ] +) + +cc_library( + name = "backend", + hdrs = ["backend.hh"], + visibility = ["//visibility:public"], + srcs = ["backend.cc"], + copts = [ + "-Wno-error=unused-parameter", + ], + deps = [ + "@opencv//:opencv", + "@gtsam//:gtsam", + "//common/geometry:camera", + "@boost//:smart_ptr", + ":feature_manager" + ] +) + + +cc_test( + name = "backend_test", + srcs = ["backend_test.cc"], + copts = [ + "-Wno-error=unused-parameter", + ], + deps = [ + "@com_google_googletest//:gtest_main", + "@opencv//:opencv", + ":backend", + ":feature_manager", + ":spatial_test_scene_cube", + "//visualization/opencv:opencv_viz" + ] +) + +cc_binary( + name = "get_colmap_groundtruth", + srcs = ["get_colmap_groundtruth.cc"], + copts = [ + "-Wno-error=unused-parameter", + ], + deps = [ + ":symphony_lake_parser" + ] +) + +cc_binary( + name = "incremental_sfm_mvp", + srcs = ["incremental_sfm_mvp.cc"], + copts = [ + "-Wno-error=unused-parameter", + "-Wno-error=unused-function", + ], + deps = [ + ":four_seasons_parser", + "@eigen", + "@opencv", + "@gtsam", + "//visualization/opencv:opencv_viz", + ":spatial_test_scene_cube", + ":frame", + ":structure_from_motion_types", + ":frontend", + "@boost//:smart_ptr", + "@cxxopts", + "//common/geometry:camera" + ] +) diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc index 134c4914a..194122420 100644 --- a/experimental/learn_descriptors/frontend.cc +++ b/experimental/learn_descriptors/frontend.cc @@ -4,30 +4,27 @@ namespace robot::experimental::learn_descriptors { -Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matcher) { - extractor_type_ = frontend_algorithm; - matcher_type_ = frontend_matcher; - - switch (extractor_type_) { - case ExtractorType::SIFT: +Frontend::Frontend(FrontendParams params_) : params_(params_) { + switch (params_.extractor_type) { + case FrontendParams::ExtractorType::SIFT: feature_extractor_ = cv::SIFT::create(); break; - case ExtractorType::ORB: + case FrontendParams::ExtractorType::ORB: feature_extractor_ = cv::ORB::create(); break; default: // Error handling needed? break; } - switch (matcher_type_) { - case MatcherType::BRUTE_FORCE: + switch (params_.matcher_type) { + case FrontendParams::MatcherType::BRUTE_FORCE: descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2); break; - case MatcherType::KNN: + case FrontendParams::MatcherType::KNN: descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2); break; - case MatcherType::FLANN: - if (frontend_algorithm == ExtractorType::ORB) { + case FrontendParams::MatcherType::FLANN: + if (params_.extractor_type == FrontendParams::ExtractorType::ORB) { throw std::invalid_argument("FLANN can not be used with ORB."); } descriptor_matcher_ = cv::FlannBasedMatcher::create(); @@ -38,10 +35,101 @@ Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matche } } +void Frontend::populate_frames() { + for (const cv::Mat &img : images_) { + } +} + +void Frontend::match_frames_and_build_tracks() { + if (params_.exhaustive) { + for (size_t i = 0; i < indices.size() - 1; i += 4) { + // std::cout << "i: " << i << std::endl; + for (size_t j = i + 1; j < indices.size(); j++) { + // std::cout << "j: " << j << std::endl; + std::vector matches = frontend.compute_matches( + frames[i].get_descriptors(), frames[j].get_descriptors()); + // DIAL TO MESS WITH + frontend.enforce_bijective_buffer_matches(matches); + + if (matches.size() < 5) { + continue; + } + + std::vector cv_kpts_1; + std::vector cv_kpts_2; + for (const KeypointCV &kpt : frames[i].get_keypoints()) { + cv::KeyPoint cv_kpt; + cv_kpt.pt = kpt; + cv_kpts_1.push_back(cv_kpt); + } + for (const KeypointCV &kpt : frames[j].get_keypoints()) { + cv::KeyPoint cv_kpt; + cv_kpt.pt = kpt; + cv_kpts_2.push_back(cv_kpt); + } + Eigen::Isometry3d world_from_camj(id_to_initial_world_from_cam.at(j).matrix()); + // std::cout << "heartbeat" << std::endl; + std::optional scale_cam0_from_cam1 = + robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat); + if (!scale_cam0_from_cam1) { + continue; + } + world_from_camj.linear() = + (Eigen::Isometry3d(id_to_initial_world_from_cam.at(i).matrix()) * + *scale_cam0_from_cam1) + .linear(); + // std::cout << "heartbeat 2" << std::endl; + id_to_initial_world_from_cam.at(j) = gtsam::Pose3(world_from_camj.matrix()); + for (const cv::DMatch match : matches) { + const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames[j].get_keypoints()[match.trainIdx]; + + auto key = std::make_pair(frames[i].id_, kpt_cam0); + if (lmk_id_map.find(key) != lmk_id_map.end()) { + auto id = lmk_id_map.at(key); + feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0); + feature_tracks.at(id).obs_.emplace_back(frames[j].id_, kpt_cam1); + } else { + FeatureTrack feature_track(i, kpt_cam0); + feature_track.obs_.emplace_back(frames[j].id_, kpt_cam1); + feature_tracks.emplace(lmk_id, feature_track); + lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id); + lmk_id++; + } + } + } + } + std::cout << "done processing matches" << std::endl; + } else { // successive only + for (size_t i = 0; i < indices.size() - 1; i++) { + std::vector matches = frontend.compute_matches( + frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + frontend.enforce_bijective_buffer_matches(matches); + for (const cv::DMatch match : matches) { + const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; + + auto key = std::make_pair(frames[i].id_, kpt_cam0); + if (lmk_id_map.find(key) != lmk_id_map.end()) { + auto id = lmk_id_map.at(key); + feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0); + feature_tracks.at(id).obs_.emplace_back(frames[i + 1].id_, kpt_cam1); + } else { + FeatureTrack feature_track(i, kpt_cam0); + feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); + feature_tracks.emplace(lmk_id, feature_track); + lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id); + lmk_id++; + } + } + } + } +} + std::pair, cv::Mat> Frontend::extract_features(const cv::Mat &img) const { std::vector keypoints; cv::Mat descriptors; - switch (extractor_type_) { + switch (params_.extractor_type) { default: // the opencv extractors have the same function signature feature_extractor_->detectAndCompute(img, cv::noArray(), keypoints, descriptors); break; @@ -49,18 +137,18 @@ std::pair, cv::Mat> Frontend::extract_features(const c return std::pair, cv::Mat>(keypoints, descriptors); } -std::vector Frontend::get_matches(const cv::Mat &descriptors1, - const cv::Mat &descriptors2) const { +std::vector Frontend::compute_matches(const cv::Mat &descriptors1, + const cv::Mat &descriptors2) const { std::vector matches; - switch (matcher_type_) { - case MatcherType::BRUTE_FORCE: - get_brute_matches(descriptors1, descriptors2, matches); + switch (params_.matcher_type) { + case FrontendParams::MatcherType::BRUTE_FORCE: + compute_brute_matches(descriptors1, descriptors2, matches); break; - case MatcherType::KNN: - get_KNN_matches(descriptors1, descriptors2, matches); + case FrontendParams::MatcherType::KNN: + compute_KNN_matches(descriptors1, descriptors2, matches); break; - case MatcherType::FLANN: - get_FLANN_matches(descriptors1, descriptors2, matches); + case FrontendParams::MatcherType::FLANN: + compute_FLANN_matches(descriptors1, descriptors2, matches); break; default: break; @@ -152,9 +240,9 @@ void Frontend::enforce_bijective_buffer_matches(std::vector &matches } } -bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const { - if (matcher_type_ != MatcherType::BRUTE_FORCE) { +bool Frontend::compute_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const { + if (params_.matcher_type != FrontendParams::MatcherType::BRUTE_FORCE) { return false; } matches_out.clear(); @@ -162,9 +250,9 @@ bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &des return true; } -bool Frontend::get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const { - if (matcher_type_ != MatcherType::KNN) { +bool Frontend::compute_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const { + if (params_.matcher_type != FrontendParams::MatcherType::KNN) { return false; } std::vector> knn_matches; @@ -180,9 +268,9 @@ bool Frontend::get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descr return true; } -bool Frontend::get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const { - if (matcher_type_ != MatcherType::FLANN) { +bool Frontend::compute_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const { + if (params_.matcher_type != FrontendParams::MatcherType::FLANN) { return false; } std::vector> knn_matches; diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh index 12332e4fb..85b112c66 100644 --- a/experimental/learn_descriptors/frontend.hh +++ b/experimental/learn_descriptors/frontend.hh @@ -3,24 +3,43 @@ #include #include +#include "experimental/learn_descriptors/frame.hh" +#include "experimental/learn_descriptors/frontend_definitions.hh" +#include "experimental/learn_descriptors/structure_from_motion_types.hh" #include "opencv2/opencv.hpp" namespace robot::experimental::learn_descriptors { -class Frontend { - public: +struct FrontendParams { enum class ExtractorType { SIFT, ORB }; enum class MatcherType { BRUTE_FORCE, KNN, FLANN }; - - Frontend(){}; - Frontend(ExtractorType frontend_extractor, MatcherType frontend_matcher); + ExtractorType extractor_type = ExtractorType::SIFT; + MatcherType matcher_type = MatcherType::KNN; + bool exhaustive = true; + bool incremental = false; // a.k.a. images are successive +}; +class Frontend { + public: + static constexpr char symbol_pose_char = 'x'; + static constexpr char symbol_lmk_char = 'l'; + explicit Frontend(FrontendParams params); ~Frontend(){}; - const ExtractorType &get_extractor_type() const { return extractor_type_; }; - const MatcherType &get_matcher_type() const { return matcher_type_; }; + void populate_frames(); + void match_frames_and_build_tracks(); + + void add_image(const cv::Mat &img) { images_.push_back(img); }; + void add_images(const std::vector &imgs) { + for (const cv::Mat &img : imgs) { + images_.push_back(img); + } + }; + + const FrontendParams::ExtractorType &extractor_type() const { return params_.extractor_type; }; + const FrontendParams::MatcherType &matcher_type() const { return params_.matcher_type; }; std::pair, cv::Mat> extract_features(const cv::Mat &img) const; - std::vector get_matches(const cv::Mat &descriptors1, - const cv::Mat &descriptors2) const; + std::vector compute_matches(const cv::Mat &descriptors1, + const cv::Mat &descriptors2) const; static void threshold_matches(std::vector &matches, float dist_threshhold); static void enforce_bijective_matches(std::vector &matches); static void enforce_bijective_buffer_matches(std::vector &matches); @@ -36,16 +55,20 @@ class Frontend { } private: - bool get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const; - bool get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const; - bool get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, - std::vector &matches_out) const; - ExtractorType extractor_type_; - MatcherType matcher_type_; + bool compute_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const; + bool compute_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const; + bool compute_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2, + std::vector &matches_out) const; + FrontendParams params_; cv::Ptr feature_extractor_; cv::Ptr descriptor_matcher_; + + std::vector images_; + FeatureTracks feature_tracks_; + FrameLandmarkIdMap lmk_id_map_; + std::vector frames_; }; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/frontend_definitions.hh b/experimental/learn_descriptors/frontend_definitions.hh index fac8b67b3..2965d6c97 100644 --- a/experimental/learn_descriptors/frontend_definitions.hh +++ b/experimental/learn_descriptors/frontend_definitions.hh @@ -38,6 +38,4 @@ class FeatureTrack { using FeatureTracks = std::unordered_map; using FrameLandmarkIdMap = std::unordered_map, LandmarkId>; -using LandmarkFrameIdMap = std::unordered_map>; - } // namespace robot::experimental::learn_descriptors diff --git a/experimental/learn_descriptors/frontend_test.cc b/experimental/learn_descriptors/frontend_test.cc index 25e60a690..28e7dc0c8 100644 --- a/experimental/learn_descriptors/frontend_test.cc +++ b/experimental/learn_descriptors/frontend_test.cc @@ -77,8 +77,8 @@ TEST(FrontendTest, pipeline_sweep) { } keypoints_descriptors_pair_1 = frontend.extract_features(image_1); keypoints_descriptors_pair_2 = frontend.extract_features(image_2); - matches = frontend.get_matches(keypoints_descriptors_pair_1.second, - keypoints_descriptors_pair_2.second); + matches = frontend.compute_matches(keypoints_descriptors_pair_1.second, + keypoints_descriptors_pair_2.second); frontend.draw_keypoints(image_1, keypoints_descriptors_pair_1.first, img_keypoints_out_1); frontend.draw_keypoints(image_2, keypoints_descriptors_pair_2.first, diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc index 214cd3105..ff7df085c 100644 --- a/experimental/learn_descriptors/incremental_sfm_mvp.cc +++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -277,7 +278,18 @@ int main(int argc, const char **argv) { std::cout << "incremental_sfm_mvp!" << std::endl; // const std::vector indices{581, 609, // 633}; // indices with all data fields on neighborhood_5_train - const std::vector indices{581, 593, 609, 621, 633, 636, 639, 642}; // indices with + // const std::vector indices{581, 593, 609, 621, 633, 636, 639, 642}; // indices with + // get indices with fully populated img_pt containing full gps data and reference + const std::vector indices = [&parser]() -> std::vector { + std::vector tmp; + for (size_t i = 0; i < parser.num_images(); i++) { + const ImagePoint img_pt = parser.get_image_point(i); + if (img_pt.AS_w_from_gnss_cam && img_pt.gps_gcs && img_pt.gps_gcs && + img_pt.gps_gcs->uncertainty && img_pt.gps_gcs->altitude) + tmp.push_back(i); + } + return tmp; + }(); constexpr bool visualize = false; // all data fields on neighborhood_5_train const std::vector indices = []() { // std::vector tmp; @@ -300,7 +312,9 @@ int main(int argc, const char **argv) { // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, // gtsam::Pose3(T_world_camera0.matrix())); - Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN); + FrontendParams params{FrontendParams::ExtractorType::SIFT, FrontendParams::MatcherType::KNN, + true, false}; + Frontend frontend(params); gtsam::Values initial_estimate_; gtsam::NonlinearFactorGraph graph_; @@ -349,7 +363,7 @@ int main(int argc, const char **argv) { world_from_cam = Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference * img_pt.AS_w_from_gnss_cam->matrix()) .matrix()); - std::cout << "ok" << std::endl; + // std::cout << "ok" << std::endl; // world_from_cam.translation() = // ((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix() * // p_gps_in_ECEF_hom) @@ -417,15 +431,19 @@ int main(int argc, const char **argv) { LandmarkId lmk_id = 0; constexpr bool exhaustive = true; if (exhaustive) { - for (size_t i = 0; i < indices.size() - 1; i++) { - std::cout << "i: " << i << std::endl; + for (size_t i = 0; i < indices.size() - 1; i += 4) { + // std::cout << "i: " << i << std::endl; for (size_t j = i + 1; j < indices.size(); j++) { - std::cout << "j: " << j << std::endl; - std::vector matches = - frontend.get_matches(frames[i].get_descriptors(), frames[j].get_descriptors()); + // std::cout << "j: " << j << std::endl; + std::vector matches = frontend.compute_matches( + frames[i].get_descriptors(), frames[j].get_descriptors()); // DIAL TO MESS WITH frontend.enforce_bijective_buffer_matches(matches); + if (matches.size() < 5) { + continue; + } + std::vector cv_kpts_1; std::vector cv_kpts_2; for (const KeypointCV &kpt : frames[i].get_keypoints()) { @@ -439,10 +457,17 @@ int main(int argc, const char **argv) { cv_kpts_2.push_back(cv_kpt); } Eigen::Isometry3d world_from_camj(id_to_initial_world_from_cam.at(j).matrix()); + // std::cout << "heartbeat" << std::endl; + std::optional scale_cam0_from_cam1 = + robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat); + if (!scale_cam0_from_cam1) { + continue; + } world_from_camj.linear() = (Eigen::Isometry3d(id_to_initial_world_from_cam.at(i).matrix()) * - robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat)) + *scale_cam0_from_cam1) .linear(); + // std::cout << "heartbeat 2" << std::endl; id_to_initial_world_from_cam.at(j) = gtsam::Pose3(world_from_camj.matrix()); for (const cv::DMatch match : matches) { const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; @@ -466,8 +491,8 @@ int main(int argc, const char **argv) { std::cout << "done processing matches" << std::endl; } else { // successive only for (size_t i = 0; i < indices.size() - 1; i++) { - std::vector matches = - frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + std::vector matches = frontend.compute_matches( + frames[i].get_descriptors(), frames[i + 1].get_descriptors()); frontend.enforce_bijective_buffer_matches(matches); for (const cv::DMatch match : matches) { const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; @@ -494,12 +519,12 @@ int main(int argc, const char **argv) { std::to_string(id)); } - std::cout << "pre heartbeat" << std::endl; + // std::cout << "pre heartbeat" << std::endl; if (visualize) robot::geometry::viz_scene(viz_world_from_cam_init, std::vector(), cv::viz::Color::brown(), true, true, "world_from_cam_estimates"); - std::cout << "heartbeat" << std::endl; + // std::cout << "heartbeat" << std::endl; // TRIANGULATE all of the points (for initial guess) std::unordered_map lmk_triangulated_map; // points are kpt_in_world @@ -636,8 +661,8 @@ int main(int argc, const char **argv) { local_estimate_.insert_or_assign(symbols_poses[1], id_to_initial_world_from_cam.at(i + 1)); - std::vector matches = - frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + std::vector matches = frontend.compute_matches( + frames[i].get_descriptors(), frames[i + 1].get_descriptors()); // DIAL TO MESS WITH frontend.enforce_bijective_matches(matches); std::vector world_from_cams{id_to_initial_world_from_cam.at(i), @@ -743,12 +768,6 @@ int main(int argc, const char **argv) { const gtsam::Values result = detail_sfm::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks, 0); - if (visualize) { - std::cout << "about to visualize result" << std::endl; - result.print(); - detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks); - } - // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference double sum_traj_error = 0; double sum_rot_error = 0; @@ -763,4 +782,9 @@ int main(int argc, const char **argv) { double rmse_ate = std::sqrt(sum_traj_error / symbols_pose.size()); double rmse_rot = std::sqrt(sum_rot_error / symbols_pose.size()); std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl; + + std::cout << "about to visualize result" << std::endl; + result.print(); + detail_sfm::graph_values(result, "Result", symbols_pose, std::vector()); + // detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks); } \ No newline at end of file diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc index 9d41270d7..b4f98a01c 100644 --- a/experimental/learn_descriptors/sfm_mvp.cc +++ b/experimental/learn_descriptors/sfm_mvp.cc @@ -259,7 +259,7 @@ TEST(SFMMvp, sfm_building_manual_global) { LandmarkId lmk_id = 0; for (size_t i = 0; i < indices.size() - 1; i++) { std::vector matches = - frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + frontend.compute_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); // frontend.enforce_bijective_matches(matches); frontend.enforce_bijective_buffer_matches(matches); for (const cv::DMatch match : matches) { @@ -453,7 +453,7 @@ TEST(SFMMvp, sfm_building_manual_incremental) { LandmarkId lmk_id = 0; for (size_t i = 0; i < indices.size() - 1; i++) { std::vector matches = - frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + frontend.compute_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); frontend.enforce_bijective_buffer_matches(matches); for (const cv::DMatch match : matches) { const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; @@ -582,7 +582,7 @@ TEST(SFMMvp, sfm_building_manual_incremental) { local_estimate_.insert_or_assign(poses[1], cam_pose.at(i + 1)); std::vector matches = - frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + frontend.compute_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); frontend.enforce_bijective_matches(matches); std::vector feat_cam_poses{cam_pose.at(i), cam_pose.at(i + 1)}; diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index df84bb7cf..9ed6f6e5f 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -68,8 +68,8 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo std::cout << "current index " << idx_img_current << std::endl; if (idx_img_current > 0) { std::vector matches = - get_matches(img_keypoints_and_descriptors_.back().second, - keypoints_and_descriptors.second, Frontend::enforce_bijective_matches); + compute_matches(img_keypoints_and_descriptors_.back().second, + keypoints_and_descriptors.second, Frontend::enforce_bijective_matches); const gtsam::Symbol sym_T_w_c0(Backend::pose_symbol_char, idx_img_current - 1); const gtsam::Symbol sym_T_w_c1(Backend::pose_symbol_char, idx_img_current); @@ -125,10 +125,10 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo img_keypoints_and_descriptors_.push_back(keypoints_and_descriptors); } -std::vector StructureFromMotion::get_matches( +std::vector StructureFromMotion::compute_matches( const cv::Mat &descriptors_1, const cv::Mat &descriptors_2, std::optional post_process_func) { - std::vector matches = frontend_.get_matches(descriptors_1, descriptors_2); + std::vector matches = frontend_.compute_matches(descriptors_1, descriptors_2); if (post_process_func) { (*post_process_func)(matches); } diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh index a4e3850d2..90b6bcfbd 100644 --- a/experimental/learn_descriptors/structure_from_motion.hh +++ b/experimental/learn_descriptors/structure_from_motion.hh @@ -40,7 +40,7 @@ class StructureFromMotion { std::optional inter_debug_func = std::nullopt); const gtsam::Values &get_structure_result() { return backend_.get_result(); }; using match_function = std::function &)>; - std::vector get_matches( + std::vector compute_matches( const cv::Mat &descriptors_1, const cv::Mat &descriptors_2, std::optional post_process_func = std::nullopt); void graph_values(const gtsam::Values &values, const std::string &window_name = "graph values"); @@ -49,7 +49,7 @@ class StructureFromMotion { Backend get_backend() { return backend_; } size_t get_num_images_added() { return img_keypoints_and_descriptors_.size(); }; size_t get_landmark_count() { return landmark_count_; }; - const std::vector> get_matches() { return matches_; }; + const std::vector> compute_matches() { return matches_; }; private: std::shared_ptr feature_manager_; From 99c99f869ec5c331c0c84f41924867510b75a888 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Thu, 10 Jul 2025 17:31:18 -0400 Subject: [PATCH 30/45] WIP --- common/gps/BUILD | 23 + common/gps/frame_translation.cc | 32 + common/gps/frame_translation.hh | 8 + common/gps/frame_translation_test.cc | 29 + common/gps/geographiclib_test.cc | 4 +- common/gps/nmea_test.cc | 4 +- experimental/learn_descriptors/BUILD | 627 ++++++++++-------- .../learn_descriptors/camera_calibration.hh | 19 + .../learn_descriptors/four_seasons_parser.cc | 323 +-------- .../learn_descriptors/four_seasons_parser.hh | 128 +--- .../four_seasons_parser_detail.cc | 223 +++++++ .../four_seasons_parser_detail.hh | 86 +++ .../four_seasons_parser_example.cc | 4 +- .../four_seasons_parser_example_viz.cc | 7 +- .../four_seasons_parser_test.cc | 104 ++- .../four_seasons_transforms.cc | 67 ++ .../four_seasons_transforms.hh | 29 + experimental/learn_descriptors/frame.hh | 15 +- experimental/learn_descriptors/frontend.cc | 328 +++++++-- experimental/learn_descriptors/frontend.hh | 18 +- .../learn_descriptors/frontend_definitions.hh | 2 +- experimental/learn_descriptors/gps_data.cc | 0 experimental/learn_descriptors/gps_data.hh | 65 ++ experimental/learn_descriptors/image_point.hh | 129 +--- .../image_point_four_seasons.cc | 49 ++ .../image_point_four_seasons.hh | 90 +++ .../learn_descriptors/incremental_sfm_mvp.cc | 10 +- .../learn_descriptors/refactor_sfm_mvp.cc | 333 ++++++++++ experimental/learn_descriptors/sfm_mvp.cc | 2 +- .../spatial_test_scene_cube_example.cc | 3 +- .../structure_from_motion_types.hh | 10 +- 31 files changed, 1851 insertions(+), 920 deletions(-) create mode 100644 common/gps/frame_translation.cc create mode 100644 common/gps/frame_translation.hh create mode 100644 common/gps/frame_translation_test.cc create mode 100644 experimental/learn_descriptors/camera_calibration.hh create mode 100644 experimental/learn_descriptors/four_seasons_parser_detail.cc create mode 100644 experimental/learn_descriptors/four_seasons_parser_detail.hh create mode 100644 experimental/learn_descriptors/four_seasons_transforms.cc create mode 100644 experimental/learn_descriptors/four_seasons_transforms.hh create mode 100644 experimental/learn_descriptors/gps_data.cc create mode 100644 experimental/learn_descriptors/gps_data.hh create mode 100644 experimental/learn_descriptors/image_point_four_seasons.cc create mode 100644 experimental/learn_descriptors/image_point_four_seasons.hh create mode 100644 experimental/learn_descriptors/refactor_sfm_mvp.cc diff --git a/common/gps/BUILD b/common/gps/BUILD index ab02194e9..934bc7bec 100644 --- a/common/gps/BUILD +++ b/common/gps/BUILD @@ -16,4 +16,27 @@ cc_test( "@com_google_googletest//:gtest_main", "@geographiclib//:geographiclib", ] +) + +cc_library( + name = "frame_translation", + srcs = ["frame_translation.cc"], + hdrs = ["frame_translation.hh"], + visibility = ["//visibility:public"], + deps = [ + "@com_google_googletest//:gtest_main", + "@geographiclib//:geographiclib", + "@eigen" + ] +) + +cc_test( + name = "frame_translation_test", + srcs = ["frame_translation_test.cc"], + deps = [ + "@com_google_googletest//:gtest_main", + ":frame_translation", + "@eigen", + "//common:check" + ] ) \ No newline at end of file diff --git a/common/gps/frame_translation.cc b/common/gps/frame_translation.cc new file mode 100644 index 000000000..28c0875ea --- /dev/null +++ b/common/gps/frame_translation.cc @@ -0,0 +1,32 @@ +#include "common/gps/frame_translation.hh" + +#include "GeographicLib/Constants.hpp" +#include "GeographicLib/Geocentric.hpp" +#include "GeographicLib/Geodesic.hpp" +#include "GeographicLib/LocalCartesian.hpp" + +namespace robot::gps { +Eigen::Vector3d lla_from_ecef(const Eigen::Vector3d& t_place_from_ECEF) { + static const GeographicLib::Geocentric earth = GeographicLib::Geocentric::WGS84(); + + double x = t_place_from_ECEF.x(), y = t_place_from_ECEF.y(), z = t_place_from_ECEF.z(); + + double lat_deg, lon_deg, alt_m; + earth.Reverse(x, y, z, lat_deg, lon_deg, alt_m); + + return Eigen::Vector3d(lat_deg, lon_deg, alt_m); +} + +Eigen::Vector3d ecef_from_lla(const Eigen::Vector3d& gcs_coordinate) { + double lat_deg = gcs_coordinate.x(); + double lon_deg = gcs_coordinate.y(); + double alt_m = gcs_coordinate.z(); + + double x, y, z; + + static const GeographicLib::Geocentric earth = GeographicLib::Geocentric::WGS84(); + earth.Forward(lat_deg, lon_deg, alt_m, x, y, z); + + return Eigen::Vector3d(x, y, z); +} +} // namespace robot::gps \ No newline at end of file diff --git a/common/gps/frame_translation.hh b/common/gps/frame_translation.hh new file mode 100644 index 000000000..60d03c8e9 --- /dev/null +++ b/common/gps/frame_translation.hh @@ -0,0 +1,8 @@ +#pragma once + +#include "Eigen/Core" + +namespace robot::gps { +Eigen::Vector3d lla_from_ecef(const Eigen::Vector3d& t_place_from_ECEF); +Eigen::Vector3d ecef_from_lla(const Eigen::Vector3d& gcs_coordinate); +} // namespace robot::gps diff --git a/common/gps/frame_translation_test.cc b/common/gps/frame_translation_test.cc new file mode 100644 index 000000000..7e6afa47c --- /dev/null +++ b/common/gps/frame_translation_test.cc @@ -0,0 +1,29 @@ +#include "common/gps/frame_translation.hh" + +#include "Eigen/Core" +#include "common/check.hh" +#include "gtest/gtest.h" + +namespace robot::gps { +TEST(frame_translation_test, lla_from_ecef) { + const Eigen::Vector3d lla_easthampton_ma(42.2608, -72.6634, 51.82); // + const Eigen::Vector3d ecef_easthampton_ma = ecef_from_lla(lla_easthampton_ma); + + const Eigen::Vector3d ecef_expected_easthampton_ma(1408753.87, -4512832.60, 4267122.34); + + constexpr double tol = 1e-9; + ROBOT_CHECK(ecef_easthampton_ma.isApprox(ecef_expected_easthampton_ma, tol), + ecef_easthampton_ma, ecef_expected_easthampton_ma); +} + +TEST(frame_translation_test, ecef_from_lla) { + const Eigen::Vector3d ecef_palace_versailles(4205987.87, 155694.68, 4776399.15); + const Eigen::Vector3d lla_palace_versailles = lla_from_ecef(ecef_palace_versailles); + + const Eigen::Vector3d expected_lla_palace_versailles(48.8049440, 2.1199720, 131.985); + + constexpr double tol = 1e-4; + ROBOT_CHECK(lla_palace_versailles.isApprox(expected_lla_palace_versailles, tol), + lla_palace_versailles, expected_lla_palace_versailles); +} +} // namespace robot::gps \ No newline at end of file diff --git a/common/gps/geographiclib_test.cc b/common/gps/geographiclib_test.cc index 5ef8a1fb3..19bc98a2b 100644 --- a/common/gps/geographiclib_test.cc +++ b/common/gps/geographiclib_test.cc @@ -8,7 +8,7 @@ #include "GeographicLib/LocalCartesian.hpp" #include "gtest/gtest.h" -namespace robot::experimental::learn_descriptors { +namespace robot::gps { TEST(GeographiclibTest, airport_dist) { const GeographicLib::Geodesic& geod = GeographicLib::Geodesic::WGS84(); // Distance from JFK to LHR @@ -58,4 +58,4 @@ TEST(GeographiclibTest, local_cartestian) { EXPECT_NEAR(calais_height_meters, 264.915, 1e-3); } } -} // namespace robot::experimental::learn_descriptors \ No newline at end of file +} // namespace robot::gps \ No newline at end of file diff --git a/common/gps/nmea_test.cc b/common/gps/nmea_test.cc index c1be0aca4..25b38b09f 100644 --- a/common/gps/nmea_test.cc +++ b/common/gps/nmea_test.cc @@ -2,7 +2,7 @@ #include "nmea/message/gga.hpp" #include "nmea/sentence.hpp" -namespace robot::experimental::learn_descriptors { +namespace robot::gps { TEST(NmeaTest, parse) { // Read an NMEA string from your serial port std::string nmea_string = @@ -44,4 +44,4 @@ TEST(NmeaTest, generate) { EXPECT_EQ(nmea_sentence.get_field(1), "abc"); EXPECT_EQ(nmea_sentence.get_field(2), ""); } -} // namespace robot::experimental::learn_descriptors \ No newline at end of file +} // namespace robot::gps \ No newline at end of file diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index 5d3fbf77c..facd64bdf 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -1,371 +1,440 @@ -package(features=["warning_compile_flags"]) +package(features = ["warning_compile_flags"]) cc_library( - name = "learn_descriptors", - hdrs = ["learn_descriptors.hh"], - visibility = ["//visibility:public"], - srcs = ["learn_descriptors.cc"], + name = "learn_descriptors", + srcs = ["learn_descriptors.cc"], + hdrs = ["learn_descriptors.hh"], + visibility = ["//visibility:public"], ) cc_test( - name = "learn_descriptors_test", - srcs = ["learn_descriptors_test.cc"], - deps = [ - "@com_google_googletest//:gtest_main", - ":learn_descriptors", - ] + name = "learn_descriptors_test", + srcs = ["learn_descriptors_test.cc"], + deps = [ + ":learn_descriptors", + "@com_google_googletest//:gtest_main", + ], ) cc_library( - name = "structure_from_motion", - hdrs = ["structure_from_motion.hh"], - visibility = ["//visibility:public"], - srcs = ["structure_from_motion.cc"], - copts = [ - "-Wno-error=unused-parameter", - ], - deps = [ - "@opencv//:opencv", - "@gtsam//:gtsam", - "@eigen", - "//visualization/opencv:opencv_viz", - ":feature_manager", - ":frontend", - ":backend" - ] + name = "structure_from_motion", + srcs = ["structure_from_motion.cc"], + hdrs = ["structure_from_motion.hh"], + copts = [ + "-Wno-error=unused-parameter", + ], + visibility = ["//visibility:public"], + deps = [ + ":backend", + ":feature_manager", + ":frontend", + "//visualization/opencv:opencv_viz", + "@eigen", + "@gtsam", + "@opencv", + ], ) cc_test( - name = "structure_from_motion_test", - srcs = ["structure_from_motion_test.cc"], - copts = [ - "-Wno-error=unused-parameter", - ], - deps = [ - "@com_google_googletest//:gtest_main", - ":symphony_lake_parser", - ":structure_from_motion", - "//visualization/opencv:opencv_viz", - "//common/geometry:translate_types", - ":frame" - ] + name = "structure_from_motion_test", + srcs = ["structure_from_motion_test.cc"], + copts = [ + "-Wno-error=unused-parameter", + ], + deps = [ + ":frame", + ":structure_from_motion", + ":symphony_lake_parser", + "//common/geometry:translate_types", + "//visualization/opencv:opencv_viz", + "@com_google_googletest//:gtest_main", + ], ) cc_library( - name = "symphony_lake_parser", - hdrs = ["symphony_lake_parser.hh"], - copts = [ - "-Wno-unused-parameter", - ], - data = ["@symphony_lake_snippet//:files"], - visibility = ["//visibility:public"], - srcs = ["symphony_lake_parser.cc"], - deps = [ - "@symphony_lake_parser", - "//common:check", - "@eigen" - ] + name = "symphony_lake_parser", + srcs = ["symphony_lake_parser.cc"], + hdrs = ["symphony_lake_parser.hh"], + copts = [ + "-Wno-unused-parameter", + ], + data = ["@symphony_lake_snippet//:files"], + visibility = ["//visibility:public"], + deps = [ + "//common:check", + "@eigen", + "@symphony_lake_parser", + ], ) cc_test( - name = "symphony_lake_parser_test", - srcs = ["symphony_lake_parser_test.cc"], - copts = ["-Wno-unused-parameter"], - data = ["@symphony_lake_snippet//:files"], - deps = [ - "@com_google_googletest//:gtest_main", - ":symphony_lake_parser", - "//visualization/opencv:opencv_viz" - ] + name = "symphony_lake_parser_test", + srcs = ["symphony_lake_parser_test.cc"], + copts = ["-Wno-unused-parameter"], + data = ["@symphony_lake_snippet//:files"], + deps = [ + ":symphony_lake_parser", + "//visualization/opencv:opencv_viz", + "@com_google_googletest//:gtest_main", + ], ) cc_test( - name = "gtsam_test", - srcs = ["gtsam_test.cc"], - deps = [ - "@com_google_googletest//:gtest_main", - "@gtsam//:gtsam", - ] + name = "gtsam_test", + srcs = ["gtsam_test.cc"], + deps = [ + "@com_google_googletest//:gtest_main", + "@gtsam", + ], ) cc_library( - name = "image_point", + name = "image_point", hdrs = ["image_point.hh"], + visibility = ["//visibility:public"], deps = [ - "//common/liegroups:se3", - "@opencv//:opencv", - "@eigen" + "//common/liegroups:se3", + "@eigen", + ], +) + +cc_library( + name = "four_seasons_transforms", + hdrs = ["four_seasons_transforms.hh"], + srcs = ["four_seasons_transforms.cc"], + visibility = ["//visibility:public"], + deps = [ + ":four_seasons_parser_detail", + "//common/liegroups:se3", + "//common:check", + "@eigen", + ], +) + +cc_library( + name = "image_point_four_seasons", + hdrs = ["image_point_four_seasons.hh"], + srcs = ["image_point_four_seasons.cc"], + visibility = ["//visibility:public"], + deps = [ + ":image_point", + ":gps_data", + ":four_seasons_transforms", + "//common/liegroups:se3", + "//common/gps:frame_translation", + "@eigen", + "@opencv", + "@geographiclib", + ] +) + +cc_library( + name = "gps_data", + hdrs = ["gps_data.hh"], + visibility = ["//visibility:public"], + deps = [ + "@eigen", ] ) cc_library( - name = "four_seasons_parser", + name = "four_seasons_parser", hdrs = ["four_seasons_parser.hh"], srcs = ["four_seasons_parser.cc"], + visibility = ["//visibility:public"], deps = [ - "@eigen", - "@opencv//:opencv", - "//common/liegroups:se3", - "//common/time:robot_time", - "@com_google_absl//absl/strings:str_format", - ":image_point", - "@nmea", - "@geographiclib", - "//common:check" - ] + ":image_point_four_seasons", + ":four_seasons_transforms", + ":four_seasons_parser_detail", + "//common:check", + "//common/liegroups:se3", + "@eigen", + "@opencv", + ":camera_calibration" + ], +) + +cc_library( + name = "four_seasons_parser_detail", + hdrs = ["four_seasons_parser_detail.hh"], + srcs = ["four_seasons_parser_detail.cc"], + visibility = ["//visibility:public"], + deps = [ + ":camera_calibration", + ":gps_data", + "//common/liegroups:se3", + "@com_google_absl//absl/strings:str_format", + "@geographiclib", + "@nmea", + ], ) cc_binary( - name = "four_seasons_parser_example", - srcs = ["four_seasons_parser_example.cc"], - deps = [ - ":four_seasons_parser", - "@cxxopts//:cxxopts", - "//common:check", - "@geographiclib" - ] + name = "four_seasons_parser_example", + srcs = ["four_seasons_parser_example.cc"], + deps = [ + ":four_seasons_parser", + ":gps_data", + "//common:check", + "@cxxopts", + ], ) cc_binary( - name = "four_seasons_parser_example_viz", - srcs = ["four_seasons_parser_example_viz.cc"], - deps = [ - ":four_seasons_parser", - "@cxxopts//:cxxopts", - "//common:check", - "//visualization/opencv:opencv_viz", - "@eigen", - ] + name = "four_seasons_parser_example_viz", + srcs = ["four_seasons_parser_example_viz.cc"], + deps = [ + ":four_seasons_parser", + "//common:check", + "//common/gps:frame_translation", + "//visualization/opencv:opencv_viz", + "@cxxopts", + "@eigen", + ], ) cc_test( - name = "four_seasons_parser_test", - srcs = ["four_seasons_parser_test.cc"], - data = ["@four_seasons_snippet//:files"], - deps = [ - "@com_google_googletest//:gtest_main", - "//common/liegroups:se3", - "@com_google_absl//absl/strings:str_format", - "@eigen", - ":four_seasons_parser", - "@nmea", - "@geographiclib", - "//common:check" - ] + name = "four_seasons_parser_test", + srcs = ["four_seasons_parser_test.cc"], + data = ["@four_seasons_snippet//:files"], + deps = [ + ":four_seasons_parser", + ":four_seasons_parser_detail", + ":image_point_four_seasons", + "//common:check", + "//common/gps:frame_translation", + "//common/liegroups:se3", + "@com_google_absl//absl/strings:str_format", + "@com_google_googletest//:gtest_main", + "@eigen", + "@geographiclib", + "@nmea", + ":camera_calibration" + ], ) cc_library( - name = "frontend_definitions", - hdrs = ["frontend_definitions.hh"], - visibility = ["//visibility:public"], - deps = [ - "@gtsam//:gtsam", - ":structure_from_motion_types" - ] + name = "frontend_definitions", + hdrs = ["frontend_definitions.hh"], + visibility = ["//visibility:public"], + deps = [ + ":structure_from_motion_types", + "@gtsam", + ], ) cc_library( - name = "structure_from_motion_types", - hdrs = ["structure_from_motion_types.hh"], - visibility = ["//visibility:public"], - deps = [ - "@gtsam//:gtsam", - "@opencv//:opencv" - ] + name = "structure_from_motion_types", + hdrs = ["structure_from_motion_types.hh"], + visibility = ["//visibility:public"], + deps = [ + "@gtsam", + "@opencv", + ], ) cc_library( - name = "frame", - hdrs = ["frame.hh"], - srcs = ["frame.cc"], - visibility = ["//visibility:public"], - deps = [ - "@opencv//:opencv", - ":frontend_definitions", - "@gtsam//:gtsam" - ] + name = "frame", + srcs = ["frame.cc"], + hdrs = ["frame.hh"], + visibility = ["//visibility:public"], + deps = [ + ":frontend_definitions", + "@gtsam", + "@opencv", + ], ) cc_test( - name = "sfm_mvp", - srcs = ["sfm_mvp.cc"], + name = "sfm_mvp", + srcs = ["sfm_mvp.cc"], copts = [ - "-Wno-error=unused-parameter", - "-Wno-error=unused-variable", - ], - deps = [ - ":frontend", - "@com_google_googletest//:gtest_main", - "@gtsam//:gtsam", - "@opencv//:opencv", - "@eigen", - ":spatial_test_scene_cube", - ":symphony_lake_parser", - ":frame", - ":structure_from_motion_types", - "//visualization/opencv:opencv_viz" - ] + "-Wno-error=unused-parameter", + "-Wno-error=unused-variable", + ], + deps = [ + ":frame", + ":frontend", + ":spatial_test_scene_cube", + ":structure_from_motion_types", + ":symphony_lake_parser", + "//visualization/opencv:opencv_viz", + "@com_google_googletest//:gtest_main", + "@eigen", + "@gtsam", + "@opencv", + ], ) - cc_library( - name = "feature_set", - hdrs = ["feature_set.hh"], - visibility = ["//visibility:public"], - deps = [ - "@gtsam//:gtsam", - "@opencv//:opencv" - ] + name = "feature_set", + hdrs = ["feature_set.hh"], + visibility = ["//visibility:public"], + deps = [ + "@gtsam", + "@opencv", + ], ) cc_library( - name = "feature_manager", - hdrs = ["feature_manager.hh"], - visibility = ["//visibility:public"], - deps = [ - "@gtsam//:gtsam", - "@opencv//:opencv", - ":feature_set", - ] + name = "feature_manager", + hdrs = ["feature_manager.hh"], + visibility = ["//visibility:public"], + deps = [ + ":feature_set", + "@gtsam", + "@opencv", + ], ) cc_test( - name = "feature_manager_test", - srcs = ["feature_manager_test.cc"], - deps = [ - ":feature_manager", - "@com_google_googletest//:gtest_main", - "@gtsam//:gtsam", - "@opencv//:opencv", - "@eigen", - ":spatial_test_scene_cube" - ] + name = "feature_manager_test", + srcs = ["feature_manager_test.cc"], + deps = [ + ":feature_manager", + ":spatial_test_scene_cube", + "@com_google_googletest//:gtest_main", + "@eigen", + "@gtsam", + "@opencv", + ], ) - cc_library( - name = "spatial_test_scene", - hdrs = ["spatial_test_scene.hh"], - visibility = ["//visibility:public"], - srcs = ["spatial_test_scene.cc"], - deps = [ - "@gtsam//:gtsam", - "@opencv//:opencv", - "@eigen" - ] + name = "spatial_test_scene", + srcs = ["spatial_test_scene.cc"], + hdrs = ["spatial_test_scene.hh"], + visibility = ["//visibility:public"], + deps = [ + "@eigen", + "@gtsam", + "@opencv", + ], ) cc_library( - name = "spatial_test_scene_cube", - hdrs = ["spatial_test_scene_cube.hh"], - visibility = ["//visibility:public"], - deps = [ - "@eigen", - ":spatial_test_scene" - ] + name = "spatial_test_scene_cube", + hdrs = ["spatial_test_scene_cube.hh"], + visibility = ["//visibility:public"], + deps = [ + ":spatial_test_scene", + "@eigen", + ], ) cc_binary( - name = "spatial_test_scene_cube_example", - srcs = ["spatial_test_scene_cube_example.cc"], - deps = [ - "@com_google_googletest//:gtest_main", - ":spatial_test_scene_cube", - "//visualization/opencv:opencv_viz", - "@eigen" - ] + name = "spatial_test_scene_cube_example", + srcs = ["spatial_test_scene_cube_example.cc"], + deps = [ + ":spatial_test_scene_cube", + "//visualization/opencv:opencv_viz", + "@com_google_googletest//:gtest_main", + "@eigen", + ], ) cc_library( - name = "frontend", - hdrs = ["frontend.hh"], - visibility = ["//visibility:public"], - srcs = ["frontend.cc"], - copts = [ - "-Wno-error=unused-parameter", - ], - deps = [ - "@opencv//:opencv", - ":frame", - ":frontend_definitions", - ":structure_from_motion_types" - ] + name = "frontend", + srcs = ["frontend.cc"], + hdrs = ["frontend.hh"], + copts = [ + "-Wno-error=unused-parameter", + ], + visibility = ["//visibility:public"], + deps = [ + ":frame", + ":frontend_definitions", + ":image_point_four_seasons", + ":structure_from_motion_types", + "//common:check", + "@opencv", + "@gtsam", + "@eigen", + ], ) - cc_test( - name = "frontend_test", - srcs = ["frontend_test.cc"], - deps = [ - "@com_google_googletest//:gtest_main", - ":frontend" - ] + name = "frontend_test", + srcs = ["frontend_test.cc"], + deps = [ + ":frontend", + "@com_google_googletest//:gtest_main", + ], ) cc_library( - name = "backend", - hdrs = ["backend.hh"], - visibility = ["//visibility:public"], - srcs = ["backend.cc"], - copts = [ - "-Wno-error=unused-parameter", - ], - deps = [ - "@opencv//:opencv", - "@gtsam//:gtsam", - "//common/geometry:camera", - "@boost//:smart_ptr", - ":feature_manager" - ] + name = "backend", + srcs = ["backend.cc"], + hdrs = ["backend.hh"], + copts = [ + "-Wno-error=unused-parameter", + ], + visibility = ["//visibility:public"], + deps = [ + ":feature_manager", + "//common/geometry:camera", + "@boost//:smart_ptr", + "@gtsam", + "@opencv", + ], ) - cc_test( - name = "backend_test", - srcs = ["backend_test.cc"], + name = "backend_test", + srcs = ["backend_test.cc"], copts = [ - "-Wno-error=unused-parameter", - ], - deps = [ - "@com_google_googletest//:gtest_main", - "@opencv//:opencv", - ":backend", - ":feature_manager", - ":spatial_test_scene_cube", - "//visualization/opencv:opencv_viz" - ] + "-Wno-error=unused-parameter", + ], + deps = [ + ":backend", + ":feature_manager", + ":spatial_test_scene_cube", + "//visualization/opencv:opencv_viz", + "@com_google_googletest//:gtest_main", + "@opencv", + ], ) cc_binary( - name = "get_colmap_groundtruth", - srcs = ["get_colmap_groundtruth.cc"], - copts = [ - "-Wno-error=unused-parameter", - ], - deps = [ - ":symphony_lake_parser" - ] + name = "get_colmap_groundtruth", + srcs = ["get_colmap_groundtruth.cc"], + copts = [ + "-Wno-error=unused-parameter", + ], + deps = [ + ":symphony_lake_parser", + ], ) cc_binary( - name = "incremental_sfm_mvp", - srcs = ["incremental_sfm_mvp.cc"], - copts = [ - "-Wno-error=unused-parameter", - "-Wno-error=unused-function", - ], - deps = [ - ":four_seasons_parser", - "@eigen", - "@opencv", - "@gtsam", - "//visualization/opencv:opencv_viz", - ":spatial_test_scene_cube", - ":frame", - ":structure_from_motion_types", - ":frontend", - "@boost//:smart_ptr", - "@cxxopts", - "//common/geometry:camera" - ] + name = "incremental_sfm_mvp", + srcs = ["incremental_sfm_mvp.cc"], + copts = [ + "-Wno-error=unused-parameter", + "-Wno-error=unused-function", + ], + deps = [ + ":four_seasons_parser", + ":frame", + ":frontend", + ":spatial_test_scene_cube", + ":structure_from_motion_types", + "//common/geometry:camera", + "//visualization/opencv:opencv_viz", + "@boost//:smart_ptr", + "@cxxopts", + "@eigen", + "@gtsam", + "@opencv", + ":camera_calibration" + ], ) + +cc_library( + name = "camera_calibration", + hdrs = ["camera_calibration.hh"], + visibility = ["//visibility:public"], + deps = [ + "@opencv" + ] +) \ No newline at end of file diff --git a/experimental/learn_descriptors/camera_calibration.hh b/experimental/learn_descriptors/camera_calibration.hh new file mode 100644 index 000000000..cfbd6fb42 --- /dev/null +++ b/experimental/learn_descriptors/camera_calibration.hh @@ -0,0 +1,19 @@ +#pragma once + +#include "opencv2/opencv.hpp" + +namespace robot::experimental::learn_descriptors { +struct CameraCalibrationFisheye { + double fx, fy, cx, cy, k1, k2, k3, k4; + + cv::Mat k_mat() { + cv::Mat K_mat = (cv::Mat_(3, 3) << fx, 0, cx, 0, fy, py, 0, 0, 1); + return K_mat; + } + + cv::Mat d_mat() { + cv::Mat D_mat = (cv::Mat_(4, 1) << k1, k2, k3, k4); + return D_mat; + } +}; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/four_seasons_parser.cc b/experimental/learn_descriptors/four_seasons_parser.cc index fdf76afde..a295851f0 100644 --- a/experimental/learn_descriptors/four_seasons_parser.cc +++ b/experimental/learn_descriptors/four_seasons_parser.cc @@ -1,58 +1,59 @@ #include "experimental/learn_descriptors/four_seasons_parser.hh" -#include #include #include #include #include #include #include +#include #include #include -#include #include #include #include "Eigen/Core" #include "Eigen/Geometry" -#include "GeographicLib/Geocentric.hpp" -#include "GeographicLib/LocalCartesian.hpp" -#include "absl/strings/str_split.h" -#include "absl/strings/strip.h" -#include "common/check.hh" #include "common/liegroups/se3.hh" -#include "common/time/robot_time.hh" -#include "nmea/message/gga.hpp" -#include "nmea/message/rmc.hpp" -#include "nmea/sentence.hpp" +#include "experimental/learn_descriptors/camera_calibration.hh" +#include "experimental/learn_descriptors/four_seasons_parser_detail.hh" namespace robot::experimental::learn_descriptors { -using namespace detail; +using namespace detail::four_seasons_parser; FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, const std::filesystem::path& calibration_dir) : root_dir_(root_dir), img_dir_(root_dir / "distorted_images" / "cam0"), cal_(txt_parser_help::load_camera_calibration(calibration_dir)), - transforms_(root_dir / "Transformations.txt") { + shared_transforms_(std::make_shared( + root_dir / "Transformations.txt")) { const std::filesystem::path path_img = root_dir_ / "times.txt"; const std::filesystem::path path_gnss = root_dir_ / "GNSSPoses.txt"; const std::filesystem::path path_vio = root_dir_ / "result.txt"; const std::filesystem::path path_gps = root_dir_ / "septentrio.nmea"; const size_t min_time_sig_figs = txt_parser_help::min_sig_figs_result_time(path_vio); + std::cout << "heartbeat 0" << std::endl; txt_parser_help::TimeDataList img_time_list = txt_parser_help::create_img_time_data_list(path_img, min_time_sig_figs); + std::cout << "heartbeat 1" << std::endl; txt_parser_help::TimeDataMap gnss_poses_time_map = txt_parser_help::create_gnss_poses_time_data_map(path_gnss, min_time_sig_figs); + std::cout << "heartbeat 2" << std::endl; txt_parser_help::TimeDataMap vio_poses_time_map = txt_parser_help::create_vio_time_data_map(path_vio, min_time_sig_figs); + std::cout << "heartbeat 3" << std::endl; gps_parser_help::TimeGPSList gps_time_list = gps_parser_help::create_gps_time_data_list(path_gps); + std::cout << "heartbeat 4" << std::endl; size_t id = 0; for (const std::pair>& pair_time_data : img_time_list) { const size_t time_key = pair_time_data.first; - ImagePoint img_pt; + ImagePointFourSeasons img_pt; img_pt.id = id; + img_pt.K = cal_; + std::cout << "heartbeat 5" << std::endl; + img_pt.shared_static_transforms = shared_transforms_; img_pt.seq = std::stoull( pair_time_data.second[static_cast(txt_parser_help::ImgIdx::TIME_NS)]); if (gnss_poses_time_map.find(time_key) != gnss_poses_time_map.end()) { @@ -104,19 +105,19 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, // popoulate gps to nearest img time // TODO: could be linear time... but good enough for (const auto& [time_unix_ns, gps_data] : gps_time_list) { - auto it = std::lower_bound(img_pt_vector_.begin(), img_pt_vector_.end(), time_unix_ns, - [](const ImagePoint& img_pt, const size_t query_unix_time) { - return img_pt.seq < query_unix_time; - }); + auto it = + std::lower_bound(img_pt_vector_.begin(), img_pt_vector_.end(), time_unix_ns, + [](const ImagePointFourSeasons& img_pt, const size_t query_unix_time) { + return img_pt.seq < query_unix_time; + }); size_t insert_idx = std::distance(img_pt_vector_.begin(), it); if (it != img_pt_vector_.begin() && - detail::abs_diff(it->seq, time_unix_ns) > - detail::abs_diff(std::prev(it)->seq, time_unix_ns)) { + abs_diff(it->seq, time_unix_ns) > abs_diff(std::prev(it)->seq, time_unix_ns)) { insert_idx--; } // NOTE: in future, could perhaps use gps data that isn't associated with an img_pt in some // way. maybe to help with interpolation, estimate velocity - if (detail::abs_diff(img_pt_vector_[insert_idx].seq, time_unix_ns) < + if (abs_diff(img_pt_vector_[insert_idx].seq, time_unix_ns) < FourSeasonsParser::CAM_CAP_DELTA_NS) { img_pt_vector_[insert_idx].gps_gcs = gps_data; } @@ -126,284 +127,4 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, cv::Mat FourSeasonsParser::load_image(const size_t m) const { return get_image_point(m).load_image(img_dir_); } - -FourSeasonsParser::FourSeasonsTransforms::FourSeasonsTransforms( - const std::filesystem::path& path_transforms) { - std::ifstream file_transforms(path_transforms); - std::string line; - while (std::getline(file_transforms, line)) { - if (line.find("transform_S_AS") != std::string::npos) { - std::getline(file_transforms, line); - S_from_AS = get_transform_from_line(line); - } else if (line.find("TS_cam_imu") != std::string::npos) { - std::getline(file_transforms, line); - cam_from_imu = get_transform_from_line(line); - } else if (line.find("transform_w_gpsw") != std::string::npos) { - std::getline(file_transforms, line); - w_from_gpsw = get_transform_from_line(line); - } else if (line.find("transform_gps_imu") != std::string::npos) { - std::getline(file_transforms, line); - gps_from_imu = get_transform_from_line(line); - } else if (line.find("transform_e_gpsw") != std::string::npos) { - std::getline(file_transforms, line); - e_from_gpsw = get_transform_from_line(line); - } else if (line.find("GNSS scale") != std::string::npos) { - std::getline(file_transforms, line); - gnss_scale = std::stod(line); - } - } -} - -liegroups::SE3 FourSeasonsParser::FourSeasonsTransforms::get_transform_from_line( - const std::string& line) { - enum TransformEntry { T_X, T_Y, T_Z, Q_X, Q_Y, Q_Z, Q_W }; - std::vector parsed_transform_line = txt_parser_help::parse_line_adv(line, ","); - if (parsed_transform_line.size() < 7) { - std::stringstream error_stream; - error_stream << "parsed_transform_line doesn't have sufficient entries for " - "transform! parsed_transform_line.size(): " - << parsed_transform_line.size() << std::endl; - throw std::runtime_error(error_stream.str()); - } - std::vector transform_nums; - for (const std::string& num : parsed_transform_line) { - transform_nums.push_back(static_cast(std::stod(num))); - } - Eigen::Vector3d translation(transform_nums[TransformEntry::T_X], - transform_nums[TransformEntry::T_Y], - transform_nums[TransformEntry::T_Z]); - Eigen::Quaterniond rotation( - transform_nums[TransformEntry::Q_W], transform_nums[TransformEntry::Q_X], - transform_nums[TransformEntry::Q_Y], transform_nums[TransformEntry::Q_Z]); - return liegroups::SE3(rotation, translation); -} - -Eigen::Vector3d FourSeasonsParser::gcs_from_ECEF(const Eigen::Vector3d& t_place_from_ECEF) { - static const GeographicLib::Geocentric earth = GeographicLib::Geocentric::WGS84(); - - double x = t_place_from_ECEF.x(), y = t_place_from_ECEF.y(), z = t_place_from_ECEF.z(); - - double lat_deg, lon_deg, alt_m; - earth.Reverse(x, y, z, lat_deg, lon_deg, alt_m); - - return Eigen::Vector3d(lat_deg, lon_deg, alt_m); -} - -Eigen::Vector3d FourSeasonsParser::ECEF_from_gcs(const Eigen::Vector3d& gcs_coordinate) { - double lat_deg = gcs_coordinate.x(); - double lon_deg = gcs_coordinate.y(); - double alt_m = gcs_coordinate.z(); - - double x, y, z; - - static const GeographicLib::Geocentric earth = GeographicLib::Geocentric::WGS84(); - earth.Forward(lat_deg, lon_deg, alt_m, x, y, z); - - return Eigen::Vector3d(x, y, z); -} - -namespace detail { -namespace txt_parser_help { - -std::vector parse_line_adv(const std::string& line, const std::string& delim) { - if (delim == " ") { - return std::vector( - absl::StrSplit(line, absl::ByAnyChar(" \t\n\r"), absl::SkipWhitespace())); - } - return std::vector(absl::StrSplit(line, delim, absl::SkipWhitespace())); -} - -FourSeasonsParser::CameraCalibrationFisheye load_camera_calibration( - const std::filesystem::path& calibration_dir) { - std::ifstream file_calibration(calibration_dir / "calib_0.txt"); - std::string line_calibration; - std::getline(file_calibration, line_calibration); - std::vector parsed_calib_line = parse_line_adv(line_calibration, " "); - return FourSeasonsParser::CameraCalibrationFisheye{ - std::stod(parsed_calib_line[static_cast(CalibIdx::FX)]), - std::stod(parsed_calib_line[static_cast(CalibIdx::FY)]), - std::stod(parsed_calib_line[static_cast(CalibIdx::CX)]), - std::stod(parsed_calib_line[static_cast(CalibIdx::CY)]), - std::stod(parsed_calib_line[static_cast(CalibIdx::K1)]), - std::stod(parsed_calib_line[static_cast(CalibIdx::K2)]), - std::stod(parsed_calib_line[static_cast(CalibIdx::K3)]), - std::stod(parsed_calib_line[static_cast(CalibIdx::K4)])}; -} - -// minimum sig figs of time in seconds (of type double) for all entries of results.txt -size_t min_sig_figs_result_time(const std::filesystem::path& path_vio) { - std::ifstream file_vio(path_vio); - std::string line; - int min_figs = INT_MAX; - while (std::getline(file_vio, line)) { - const std::vector parsed_line = parse_line_adv(line, " "); - const std::string str_time_seconds = parsed_line[static_cast(ResultIdx::TIME_SEC)]; - const int figs = - str_time_seconds.size() + - (str_time_seconds.find('.') != std::string::npos ? -1 : 0); // '.' is not a fig - min_figs = std::min(min_figs, figs); - } - return min_figs; -} - -const TimeDataList create_img_time_data_list(const std::filesystem::path& path_img, - const size_t time_sig_figs) { - TimeDataList time_map_img; - std::ifstream file_img(path_img); - std::string line; - while (std::getline(file_img, line)) { - std::vector parsed_line = parse_line_adv(line, " "); - size_t time_ns = std::stoull(parsed_line[static_cast(ImgIdx::TIME_NS)]); - time_map_img.push_back( - std::make_pair(round_to_sig_figs(time_ns, time_sig_figs), parsed_line)); - } - return time_map_img; -} - -const TimeDataMap create_gnss_poses_time_data_map(const std::filesystem::path& path_gnss, - const size_t time_sig_figs) { - TimeDataMap time_map_gnss_poses; - std::ifstream file_gnss_poses(path_gnss); - std::string line; - std::getline(file_gnss_poses, - line); // advance a line to get past the top comment in GNSSPoses.txt - while (std::getline(file_gnss_poses, line)) { - std::vector parsed_line = parse_line_adv(line, ","); - size_t time_ns = std::stoull(parsed_line[static_cast(GPSIdx::TIME_NS)]); - time_map_gnss_poses.insert({round_to_sig_figs(time_ns, time_sig_figs), parsed_line}); - } - return time_map_gnss_poses; -} - -const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio, - const size_t time_sig_figs) { - TimeDataMap time_map_vio; - std::ifstream file_vio(path_vio); - std::string line; - while (std::getline(file_vio, line)) { - std::vector parsed_line = parse_line_adv(line, " "); - double time_seconds = std::stod(parsed_line[static_cast(ResultIdx::TIME_SEC)]); - size_t time_ns = time_seconds * 1e9; - time_map_vio.insert({round_to_sig_figs(time_ns, time_sig_figs), parsed_line}); - } - return time_map_vio; -} -} // namespace txt_parser_help - -namespace gps_parser_help { -size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds) { - std::chrono::sys_days date = std::chrono::year_month_day{ - std::chrono::year{utc_date.year + 2000}, std::chrono::month{utc_date.month}, - std::chrono::day{utc_date.day}}; - std::chrono::nanoseconds utc_time_day_ns = - std::chrono::nanoseconds{static_cast(utc_time_day_seconds * 1e9)}; - std::chrono::sys_time timestamp = date + utc_time_day_ns; - return timestamp.time_since_epoch().count(); -} -TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps) { - TimeGPSList time_list_gps; - std::ifstream file_gps(path_gps); - std::string line; - std::optional nmea_sentence; - nmea::date date_last; - date_last.day = 255; - date_last.month = 255; - date_last.year = 255; - double time_of_day_last = -1.0; - while (std::getline(file_gps, line) && !line.empty()) { - try { - nmea_sentence = nmea::sentence(line); - } catch (const std::exception& e) { - std::cerr << "failed to parse line as nmea sentence: " << e.what() - << "\ncontinuing to next line\n"; - continue; - } - if (nmea_sentence->type() == "GGA") { - nmea::gga gga(*nmea_sentence); - if (gga.utc.exists() && gga.latitude.exists() && gga.longitude.exists()) { - if (std::abs(gga.utc.get() - time_of_day_last) < - 1e-3) { // GGA messages for this dataset come second - ImagePoint::GPSData gps_data; - gps_data.seq = gps_utc_to_unix_time(date_last, gga.utc.get()); - gps_data.latitude = gga.latitude.get(); - gps_data.longitude = gga.longitude.get(); - if (gga.altitude.exists()) gps_data.altitude = gga.altitude.get(); - time_list_gps.push_back(std::make_pair(gps_data.seq, gps_data)); - } - } - } else if (nmea_sentence->type() == "RMC") { - nmea::rmc rmc(*nmea_sentence); - if (rmc.utc.exists() && rmc.date.exists()) { - date_last = rmc.date.get(); - time_of_day_last = rmc.utc.get(); - } - } else if (nmea_sentence->type() == "GST") { - std::optional gst_data = parse_gpgst(nmea_sentence->nmea_string()); - if (gst_data && std::abs(gst_data->utc_time_ns - time_of_day_last) < - 1e-3) { // GST message for this dataset come third - size_t unix_time_ns = gps_utc_to_unix_time(date_last, gst_data->utc_time_ns); - if (time_list_gps.back().first == unix_time_ns) { - time_list_gps.back().second.uncertainty.emplace( - gst_data->sigma_lat_m, gst_data->sigma_lon_m, gst_data->sigma_alt_m, - gst_data->error_orientation_deg, gst_data->rms_range_error_m); - } - } - } - } - return time_list_gps; -} - -std::vector split_nmea_sentence(const std::string& sentence) { - std::vector fields; - std::string field; - std::stringstream ss(sentence); - - while (std::getline(ss, field, ',')) { - // remove checksum from the last field if present - auto asterisk = field.find('*'); - if (asterisk != std::string::npos) field = field.substr(0, asterisk); - fields.push_back(field); - } - - return fields; -} -double time_of_day_seconds(const double utc_time_hhmmss) { - int hours = static_cast(utc_time_hhmmss / 10000); - int minutes = static_cast((utc_time_hhmmss - hours * 10000) / 100); - double seconds = utc_time_hhmmss - hours * 10000 - minutes * 100; - return hours * 3600 + minutes * 60 + seconds; -} -std::optional parse_gpgst(const std::string& sentence) { - if (sentence.substr(0, 6) != "$GPGST") { - return std::nullopt; - } - - std::vector fields = split_nmea_sentence(sentence); - if (fields.size() != 9) { - return std::nullopt; - } - for (size_t i = 0; i < fields.size(); i++) { - if (fields[i].empty()) return std::nullopt; - } - - GSTData gst; - gst.utc_time_ns = time_of_day_seconds(std::stod(fields[1])); - gst.rms_range_error_m = std::stod(fields[2]); - gst.error_semi_major_m = std::stod(fields[3]); - gst.error_semi_minor_m = std::stod(fields[4]); - gst.error_orientation_deg = std::stod(fields[5]); - gst.sigma_lat_m = std::stod(fields[6]); - gst.sigma_lon_m = std::stod(fields[7]); - gst.sigma_alt_m = std::stod(fields[8]); - - return gst; -} - -} // namespace gps_parser_help - -template -size_t abs_diff(const T& a, const T& b) { - return a > b ? a - b : b - a; -} -} // namespace detail } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/four_seasons_parser.hh b/experimental/learn_descriptors/four_seasons_parser.hh index 9709bc396..4458215c8 100644 --- a/experimental/learn_descriptors/four_seasons_parser.hh +++ b/experimental/learn_descriptors/four_seasons_parser.hh @@ -1,144 +1,56 @@ #pragma once -#include - #include #include +#include #include #include #include #include "common/liegroups/se3.hh" -#include "experimental/learn_descriptors/image_point.hh" -#include "nmea/message/gga.hpp" -#include "nmea/message/rmc.hpp" -#include "nmea/sentence.hpp" +#include "experimental/learn_descriptors/camera_calibration.hh" +#include "experimental/learn_descriptors/four_seasons_transforms.hh" +#include "experimental/learn_descriptors/image_point_four_seasons.hh" namespace robot::experimental::learn_descriptors { -class FourSeasonsParser { +class FourSeasonsParser : std::enable_shared_from_this { public: static constexpr double CAM_HZ = 30.0; static constexpr double CAM_CAP_DELTA_NS = 1e9 / CAM_HZ; - struct CameraCalibrationFisheye { - double fx, fy, cx, cy, k1, k2, k3, k4; - }; + FourSeasonsParser(const std::filesystem::path& root_dir, const std::filesystem::path& calibration_dir); + std::shared_ptr get_shared() { return shared_from_this(); } cv::Mat load_image(const size_t idx) const; - const ImagePoint& get_image_point(const size_t idx) const { return img_pt_vector_[idx]; }; + const ImagePointFourSeasons& get_image_point(const size_t idx) const { + return img_pt_vector_[idx]; + }; size_t num_images() const { return img_pt_vector_.size(); }; - const CameraCalibrationFisheye& camera_calibration() const { return cal_; }; + const std::shared_ptr& camera_calibration() const { return cal_; }; const liegroups::SE3& S_from_AS() const { - return transforms_.S_from_AS; + return shared_transforms_->S_from_AS; }; // metric scale from arbitrary (internal slam) scale - const liegroups::SE3& cam_from_imu() const { return transforms_.cam_from_imu; }; + const liegroups::SE3& cam_from_imu() const { return shared_transforms_->cam_from_imu; }; const liegroups::SE3& w_from_gpsw() const { - return transforms_.w_from_gpsw; + return shared_transforms_->w_from_gpsw; }; // visual world from local gps (ENU) const liegroups::SE3& gps_from_imu() const { - return transforms_.gps_from_imu; + return shared_transforms_->gps_from_imu; }; // phsyical onboard gps from physical onboard imu const liegroups::SE3& e_from_gpsw() const { - return transforms_.e_from_gpsw; + return shared_transforms_->e_from_gpsw; }; // ECEF from local gps (ENU) double gnss_scale() const { - return transforms_.gnss_scale; + return shared_transforms_->gnss_scale; }; // scale from vio frame to gnss frame. WARNING: will require retooling if the scales per // keyframe (pose) are not all one value. See more here: // https://github.com/pmwenzel/4seasons-dataset - static Eigen::Vector3d gcs_from_ECEF(const Eigen::Vector3d& t_place_from_ECEF); - static Eigen::Vector3d ECEF_from_gcs(const Eigen::Vector3d& gcs_coordinate); - protected: - struct FourSeasonsTransforms { - liegroups::SE3 S_from_AS; - liegroups::SE3 cam_from_imu; - liegroups::SE3 w_from_gpsw; - liegroups::SE3 gps_from_imu; - liegroups::SE3 e_from_gpsw; - double gnss_scale; - - FourSeasonsTransforms(const std::filesystem::path& path_transforms); - - private: - static liegroups::SE3 get_transform_from_line(const std::string& line); - }; const std::filesystem::path root_dir_; const std::filesystem::path img_dir_; - const CameraCalibrationFisheye cal_; - const FourSeasonsTransforms transforms_; - ImagePointVector img_pt_vector_; -}; - -namespace detail { -template -std::size_t abs_diff(const T& a, const T& b); -namespace txt_parser_help { -using TimeDataMap = std::unordered_map>; -using TimeDataList = std::vector>>; -enum class GPSIdx { - TIME_NS = 0, - TRAN_X = 1, - TRAN_Y = 2, - TRAN_Z = 3, - QUAT_X = 4, - QUAT_Y = 5, - QUAT_Z = 6, - QUAT_W = 7, -}; -enum class ImgIdx { - TIME_NS = 0, - TIME_SEC = 1, - EXPOSURE_TIME = 2 // I'm not completely sure if this is correct - Nico -}; -enum class ResultIdx { - TIME_SEC = 0, - TRAN_X = 1, - TRAN_Y = 2, - TRAN_Z = 3, - QUAT_X = 4, - QUAT_Y = 5, - QUAT_Z = 6, - QUAT_W = 7, -}; -enum class CalibIdx { FX = 1, FY = 2, CX = 3, CY = 4, K1 = 5, K2 = 6, K3 = 7, K4 = 8 }; -std::vector parse_line_adv(const std::string& line, const std::string& delim = " "); -FourSeasonsParser::CameraCalibrationFisheye load_camera_calibration( - const std::filesystem::path& calibration_dir); -template -T round_to_sig_figs(T val, int n) { - if (val == 0) return 0; - double d = static_cast(val); - int exponent = static_cast(std::floor(std::log10(std::abs(d)))); - double multiplier = std::pow(10.0, n - exponent - 1); - return static_cast(std::round(d * multiplier) / multiplier); -} -size_t min_sig_figs_result_time(const std::filesystem::path& path_vio); -const TimeDataList create_img_time_data_list(const std::filesystem::path& path_img, - const size_t time_sig_figs); -const TimeDataMap create_gnss_poses_time_data_map(const std::filesystem::path& path_gnss, - const size_t time_sig_figs); -const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio, - const size_t time_sig_figs); -} // namespace txt_parser_help -namespace gps_parser_help { -using TimeGPSList = std::vector>; -struct GSTData { - double utc_time_ns; - double rms_range_error_m; - double error_semi_major_m; - double error_semi_minor_m; - double error_orientation_deg; - double sigma_lat_m; - double sigma_lon_m; - double sigma_alt_m; + const std::shared_ptr cal_; + const std::shared_ptr shared_transforms_; + ImagePointFourSeasonsVector img_pt_vector_; }; -std::vector split_nmea_sentence(const std::string& nmea_sentence); -double time_of_day_seconds(const double utc_time_hhmmss); -std::optional parse_gpgst(const std::string& nmea_sentence); -size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds); -TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps); -} // namespace gps_parser_help -} // namespace detail } // namespace robot::experimental::learn_descriptors diff --git a/experimental/learn_descriptors/four_seasons_parser_detail.cc b/experimental/learn_descriptors/four_seasons_parser_detail.cc new file mode 100644 index 000000000..d9dc824a8 --- /dev/null +++ b/experimental/learn_descriptors/four_seasons_parser_detail.cc @@ -0,0 +1,223 @@ +#include "experimental/learn_descriptors/four_seasons_parser_detail.hh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/strings/str_split.h" +#include "absl/strings/strip.h" +#include "nmea/message/gga.hpp" +#include "nmea/message/rmc.hpp" +#include "nmea/sentence.hpp" + +namespace robot::experimental::learn_descriptors::detail::four_seasons_parser { + +// template +// size_t abs_diff(const T& a, const T& b) { +// return a > b ? a - b : b - a; +// } + +namespace txt_parser_help { + +std::vector parse_line_adv(const std::string& line, const std::string& delim) { + if (delim == " ") { + return std::vector( + absl::StrSplit(line, absl::ByAnyChar(" \t\n\r"), absl::SkipWhitespace())); + } + return std::vector(absl::StrSplit(line, delim, absl::SkipWhitespace())); +} + +std::shared_ptr load_camera_calibration( + const std::filesystem::path& calibration_dir) { + std::ifstream file_calibration(calibration_dir / "calib_0.txt"); + std::string line_calibration; + std::getline(file_calibration, line_calibration); + std::vector parsed_calib_line = parse_line_adv(line_calibration, " "); + return std::make_shared( + std::stod(parsed_calib_line[static_cast(CalibIdx::FX)]), + std::stod(parsed_calib_line[static_cast(CalibIdx::FY)]), + std::stod(parsed_calib_line[static_cast(CalibIdx::CX)]), + std::stod(parsed_calib_line[static_cast(CalibIdx::CY)]), + std::stod(parsed_calib_line[static_cast(CalibIdx::K1)]), + std::stod(parsed_calib_line[static_cast(CalibIdx::K2)]), + std::stod(parsed_calib_line[static_cast(CalibIdx::K3)]), + std::stod(parsed_calib_line[static_cast(CalibIdx::K4)])); +} + +// minimum sig figs of time in seconds (of type double) for all entries of results.txt +size_t min_sig_figs_result_time(const std::filesystem::path& path_vio) { + std::ifstream file_vio(path_vio); + std::string line; + int min_figs = INT_MAX; + while (std::getline(file_vio, line)) { + const std::vector parsed_line = parse_line_adv(line, " "); + const std::string str_time_seconds = parsed_line[static_cast(ResultIdx::TIME_SEC)]; + const int figs = + str_time_seconds.size() + + (str_time_seconds.find('.') != std::string::npos ? -1 : 0); // '.' is not a fig + min_figs = std::min(min_figs, figs); + } + return min_figs; +} + +const TimeDataList create_img_time_data_list(const std::filesystem::path& path_img, + const size_t time_sig_figs) { + TimeDataList time_map_img; + std::ifstream file_img(path_img); + std::string line; + while (std::getline(file_img, line)) { + std::vector parsed_line = parse_line_adv(line, " "); + size_t time_ns = std::stoull(parsed_line[static_cast(ImgIdx::TIME_NS)]); + time_map_img.push_back( + std::make_pair(round_to_sig_figs(time_ns, time_sig_figs), parsed_line)); + } + return time_map_img; +} + +const TimeDataMap create_gnss_poses_time_data_map(const std::filesystem::path& path_gnss, + const size_t time_sig_figs) { + TimeDataMap time_map_gnss_poses; + std::ifstream file_gnss_poses(path_gnss); + std::string line; + std::getline(file_gnss_poses, + line); // advance a line to get past the top comment in GNSSPoses.txt + while (std::getline(file_gnss_poses, line)) { + std::vector parsed_line = parse_line_adv(line, ","); + size_t time_ns = std::stoull(parsed_line[static_cast(GPSIdx::TIME_NS)]); + time_map_gnss_poses.insert({round_to_sig_figs(time_ns, time_sig_figs), parsed_line}); + } + return time_map_gnss_poses; +} + +const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio, + const size_t time_sig_figs) { + TimeDataMap time_map_vio; + std::ifstream file_vio(path_vio); + std::string line; + while (std::getline(file_vio, line)) { + std::vector parsed_line = parse_line_adv(line, " "); + double time_seconds = std::stod(parsed_line[static_cast(ResultIdx::TIME_SEC)]); + size_t time_ns = time_seconds * 1e9; + time_map_vio.insert({round_to_sig_figs(time_ns, time_sig_figs), parsed_line}); + } + return time_map_vio; +} +} // namespace txt_parser_help + +namespace gps_parser_help { +size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds) { + std::chrono::sys_days date = std::chrono::year_month_day{ + std::chrono::year{utc_date.year + 2000}, std::chrono::month{utc_date.month}, + std::chrono::day{utc_date.day}}; + std::chrono::nanoseconds utc_time_day_ns = + std::chrono::nanoseconds{static_cast(utc_time_day_seconds * 1e9)}; + std::chrono::sys_time timestamp = date + utc_time_day_ns; + return timestamp.time_since_epoch().count(); +} +TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps) { + TimeGPSList time_list_gps; + std::ifstream file_gps(path_gps); + std::string line; + std::optional nmea_sentence; + nmea::date date_last; + date_last.day = 255; + date_last.month = 255; + date_last.year = 255; + double time_of_day_last = -1.0; + while (std::getline(file_gps, line) && !line.empty()) { + try { + nmea_sentence = nmea::sentence(line); + } catch (const std::exception& e) { + std::cerr << "failed to parse line as nmea sentence: " << e.what() + << "\ncontinuing to next line\n"; + continue; + } + if (nmea_sentence->type() == "GGA") { + nmea::gga gga(*nmea_sentence); + if (gga.utc.exists() && gga.latitude.exists() && gga.longitude.exists()) { + if (std::abs(gga.utc.get() - time_of_day_last) < + 1e-3) { // GGA messages for this dataset come second + GPSData gps_data; + gps_data.seq = gps_utc_to_unix_time(date_last, gga.utc.get()); + gps_data.latitude = gga.latitude.get(); + gps_data.longitude = gga.longitude.get(); + if (gga.altitude.exists()) gps_data.altitude = gga.altitude.get(); + time_list_gps.push_back(std::make_pair(gps_data.seq, gps_data)); + } + } + } else if (nmea_sentence->type() == "RMC") { + nmea::rmc rmc(*nmea_sentence); + if (rmc.utc.exists() && rmc.date.exists()) { + date_last = rmc.date.get(); + time_of_day_last = rmc.utc.get(); + } + } else if (nmea_sentence->type() == "GST") { + std::optional gst_data = parse_gpgst(nmea_sentence->nmea_string()); + if (gst_data && std::abs(gst_data->utc_time_ns - time_of_day_last) < + 1e-3) { // GST message for this dataset come third + size_t unix_time_ns = gps_utc_to_unix_time(date_last, gst_data->utc_time_ns); + if (time_list_gps.back().first == unix_time_ns) { + time_list_gps.back().second.uncertainty.emplace( + gst_data->sigma_lat_m, gst_data->sigma_lon_m, gst_data->sigma_alt_m, + gst_data->error_orientation_deg, gst_data->rms_range_error_m); + } + } + } + } + return time_list_gps; +} + +std::vector split_nmea_sentence(const std::string& sentence) { + std::vector fields; + std::string field; + std::stringstream ss(sentence); + + while (std::getline(ss, field, ',')) { + // remove checksum from the last field if present + auto asterisk = field.find('*'); + if (asterisk != std::string::npos) field = field.substr(0, asterisk); + fields.push_back(field); + } + + return fields; +} +double time_of_day_seconds(const double utc_time_hhmmss) { + int hours = static_cast(utc_time_hhmmss / 10000); + int minutes = static_cast((utc_time_hhmmss - hours * 10000) / 100); + double seconds = utc_time_hhmmss - hours * 10000 - minutes * 100; + return hours * 3600 + minutes * 60 + seconds; +} +std::optional parse_gpgst(const std::string& sentence) { + if (sentence.substr(0, 6) != "$GPGST") { + return std::nullopt; + } + + std::vector fields = split_nmea_sentence(sentence); + if (fields.size() != 9) { + return std::nullopt; + } + for (size_t i = 0; i < fields.size(); i++) { + if (fields[i].empty()) return std::nullopt; + } + + GSTData gst; + gst.utc_time_ns = time_of_day_seconds(std::stod(fields[1])); + gst.rms_range_error_m = std::stod(fields[2]); + gst.error_semi_major_m = std::stod(fields[3]); + gst.error_semi_minor_m = std::stod(fields[4]); + gst.error_orientation_deg = std::stod(fields[5]); + gst.sigma_lat_m = std::stod(fields[6]); + gst.sigma_lon_m = std::stod(fields[7]); + gst.sigma_alt_m = std::stod(fields[8]); + + return gst; +} + +} // namespace gps_parser_help +} // namespace robot::experimental::learn_descriptors::detail::four_seasons_parser \ No newline at end of file diff --git a/experimental/learn_descriptors/four_seasons_parser_detail.hh b/experimental/learn_descriptors/four_seasons_parser_detail.hh new file mode 100644 index 000000000..0f0c30ad5 --- /dev/null +++ b/experimental/learn_descriptors/four_seasons_parser_detail.hh @@ -0,0 +1,86 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "experimental/learn_descriptors/camera_calibration.hh" +#include "experimental/learn_descriptors/gps_data.hh" +#include "nmea/object/date.hpp" + +namespace robot::experimental::learn_descriptors::detail::four_seasons_parser { +template +std::size_t abs_diff(const T& a, const T& b) { + return a > b ? a - b : b - a; +}; +namespace txt_parser_help { +using TimeDataMap = std::unordered_map>; +using TimeDataList = std::vector>>; +enum class GPSIdx { + TIME_NS = 0, + TRAN_X = 1, + TRAN_Y = 2, + TRAN_Z = 3, + QUAT_X = 4, + QUAT_Y = 5, + QUAT_Z = 6, + QUAT_W = 7, +}; +enum class ImgIdx { + TIME_NS = 0, + TIME_SEC = 1, + EXPOSURE_TIME = 2 // I'm not completely sure if this is correct - Nico +}; +enum class ResultIdx { + TIME_SEC = 0, + TRAN_X = 1, + TRAN_Y = 2, + TRAN_Z = 3, + QUAT_X = 4, + QUAT_Y = 5, + QUAT_Z = 6, + QUAT_W = 7, +}; +enum class CalibIdx { FX = 1, FY = 2, CX = 3, CY = 4, K1 = 5, K2 = 6, K3 = 7, K4 = 8 }; +std::vector parse_line_adv(const std::string& line, const std::string& delim = " "); +std::shared_ptr load_camera_calibration( + const std::filesystem::path& calibration_dir); +template +T round_to_sig_figs(T val, int n) { + if (val == 0) return 0; + double d = static_cast(val); + int exponent = static_cast(std::floor(std::log10(std::abs(d)))); + double multiplier = std::pow(10.0, n - exponent - 1); + return static_cast(std::round(d * multiplier) / multiplier); +} +size_t min_sig_figs_result_time(const std::filesystem::path& path_vio); +const TimeDataList create_img_time_data_list(const std::filesystem::path& path_img, + const size_t time_sig_figs); +const TimeDataMap create_gnss_poses_time_data_map(const std::filesystem::path& path_gnss, + const size_t time_sig_figs); +const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio, + const size_t time_sig_figs); +} // namespace txt_parser_help +namespace gps_parser_help { +using TimeGPSList = std::vector>; +struct GSTData { + double utc_time_ns; + double rms_range_error_m; + double error_semi_major_m; + double error_semi_minor_m; + double error_orientation_deg; + double sigma_lat_m; + double sigma_lon_m; + double sigma_alt_m; +}; +std::vector split_nmea_sentence(const std::string& nmea_sentence); +double time_of_day_seconds(const double utc_time_hhmmss); +std::optional parse_gpgst(const std::string& nmea_sentence); +size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds); +TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps); +} // namespace gps_parser_help +} // namespace robot::experimental::learn_descriptors::detail::four_seasons_parser diff --git a/experimental/learn_descriptors/four_seasons_parser_example.cc b/experimental/learn_descriptors/four_seasons_parser_example.cc index 064f404ef..e49467bfd 100644 --- a/experimental/learn_descriptors/four_seasons_parser_example.cc +++ b/experimental/learn_descriptors/four_seasons_parser_example.cc @@ -64,7 +64,7 @@ int main(int argc, const char** argv) { const std::string img_str = "img " + std::to_string(i); cv::putText(img, img_str, cv::Point(10, 20), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 2); - const lrn_desc::ImagePoint img_pt = parser.get_image_point(i); + const lrn_desc::ImagePointFourSeasons img_pt = parser.get_image_point(i); std::stringstream ss_AS_w_from_gnss_cam; ss_AS_w_from_gnss_cam << "AS_w_from_gnss_cam: "; if (img_pt.AS_w_from_gnss_cam) { @@ -84,7 +84,7 @@ int main(int argc, const char** argv) { std::stringstream ss_gps; ss_gps << "gps_gcs: "; if (img_pt.gps_gcs) { - const lrn_desc::ImagePoint::GPSData& gps_data = *img_pt.gps_gcs; + const lrn_desc::GPSData& gps_data = *img_pt.gps_gcs; ss_gps << "long: " << gps_data.longitude << ", lat: " << gps_data.latitude; if (gps_data.altitude) { ss_gps << ", alt: " << *gps_data.altitude; diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc index bca7ec2c4..9befd4f2a 100644 --- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc +++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc @@ -8,6 +8,7 @@ #include #include "common/check.hh" +#include "common/gps/frame_translation.hh" #include "cxxopts.hpp" #include "experimental/learn_descriptors/four_seasons_parser.hh" #include "opencv2/opencv.hpp" @@ -59,7 +60,7 @@ int main(int argc, const char** argv) { std::optional altitude_gps_from_gnss_cam; std::vector gps_ns_delta_from_shutter; for (size_t i = 0; i < parser.num_images(); i += 1) { - const lrn_desc::ImagePoint img_pt = parser.get_image_point(i); + const lrn_desc::ImagePointFourSeasons img_pt = parser.get_image_point(i); std::cout << img_pt.to_string() << std::endl; if (!img_pt.AS_w_from_gnss_cam) { continue; @@ -88,7 +89,7 @@ int main(int argc, const char** argv) { (parser.e_from_gpsw() * parser.w_from_gpsw().inverse()).matrix()) * w_from_gnss_cam; const Eigen::Vector3d gnss_cam_in_gcs = - parser.gcs_from_ECEF(ECEF_from_gnss_cam.translation()); + robot::gps::lla_from_ecef(ECEF_from_gnss_cam.translation()); if (!altitude_gps_from_gnss_cam) { altitude_gps_from_gnss_cam = gnss_cam_in_gcs.z() - *(img_pt.gps_gcs->altitude); @@ -98,7 +99,7 @@ int main(int argc, const char** argv) { << "\ngps_gcs: " << gcs_coordinate << "\ngps-gnss_cam: " << (gcs_coordinate - gnss_cam_in_gcs) << std::endl; - const Eigen::Vector3d ECEF_from_gps = parser.ECEF_from_gcs(gcs_coordinate); + const Eigen::Vector3d ECEF_from_gps = robot::gps::ecef_from_lla(gcs_coordinate); const Eigen::Vector4d ECEF_from_gps_hom(ECEF_from_gps.x(), ECEF_from_gps.y(), ECEF_from_gps.z(), 1); Eigen::Vector4d gps_in_w = diff --git a/experimental/learn_descriptors/four_seasons_parser_test.cc b/experimental/learn_descriptors/four_seasons_parser_test.cc index ff49d935d..1b1a940b9 100644 --- a/experimental/learn_descriptors/four_seasons_parser_test.cc +++ b/experimental/learn_descriptors/four_seasons_parser_test.cc @@ -10,14 +10,12 @@ #include "Eigen/Core" #include "Eigen/Geometry" -#include "GeographicLib/Constants.hpp" -#include "GeographicLib/Geocentric.hpp" -#include "GeographicLib/Geodesic.hpp" -#include "GeographicLib/LocalCartesian.hpp" #include "absl/strings/str_split.h" #include "absl/strings/strip.h" #include "common/check.hh" +#include "common/gps/frame_translation.hh" #include "common/liegroups/se3.hh" +#include "experimental/learn_descriptors/four_seasons_parser_detail.hh" #include "gtest/gtest.h" #include "nmea/message/gga.hpp" #include "nmea/message/rmc.hpp" @@ -35,6 +33,7 @@ static bool images_equal(cv::Mat img1, cv::Mat img2) { } namespace robot::experimental::learn_descriptors { +using namespace detail::four_seasons_parser; TEST(FourSeasonsParserTest, parser_test) { const std::filesystem::path dir_snippet = "external/four_seasons_snippet/recording_2020-04-07_11-33-45"; @@ -43,6 +42,7 @@ TEST(FourSeasonsParserTest, parser_test) { EXPECT_TRUE(std::filesystem::exists(dir_snippet)); EXPECT_TRUE(std::filesystem::exists(dir_calibration)); + std::cout << "heartbeat -1" << std::endl; FourSeasonsParser parser(dir_snippet, dir_calibration); // transformations test @@ -65,17 +65,17 @@ TEST(FourSeasonsParserTest, parser_test) { EXPECT_DOUBLE_EQ(gnss_scale, parser.gnss_scale()); // calibration test - const FourSeasonsParser::CameraCalibrationFisheye calibration_target = - detail::txt_parser_help::load_camera_calibration(dir_calibration); - const FourSeasonsParser::CameraCalibrationFisheye calibration = parser.camera_calibration(); - EXPECT_DOUBLE_EQ(calibration_target.cx, calibration.cx); - EXPECT_DOUBLE_EQ(calibration_target.cy, calibration.cy); - EXPECT_DOUBLE_EQ(calibration_target.fx, calibration.fx); - EXPECT_DOUBLE_EQ(calibration_target.fy, calibration.fy); - EXPECT_DOUBLE_EQ(calibration_target.k1, calibration.k1); - EXPECT_DOUBLE_EQ(calibration_target.k2, calibration.k2); - EXPECT_DOUBLE_EQ(calibration_target.k3, calibration.k3); - EXPECT_DOUBLE_EQ(calibration_target.k4, calibration.k4); + const std::shared_ptr calibration_target = + txt_parser_help::load_camera_calibration(dir_calibration); + const std::shared_ptr calibration = parser.camera_calibration(); + EXPECT_DOUBLE_EQ(calibration_target->cx, calibration->cx); + EXPECT_DOUBLE_EQ(calibration_target->cy, calibration->cy); + EXPECT_DOUBLE_EQ(calibration_target->fx, calibration->fx); + EXPECT_DOUBLE_EQ(calibration_target->fy, calibration->fy); + EXPECT_DOUBLE_EQ(calibration_target->k1, calibration->k1); + EXPECT_DOUBLE_EQ(calibration_target->k2, calibration->k2); + EXPECT_DOUBLE_EQ(calibration_target->k3, calibration->k3); + EXPECT_DOUBLE_EQ(calibration_target->k4, calibration->k4); EXPECT_NE(parser.num_images(), 0); @@ -87,17 +87,16 @@ TEST(FourSeasonsParserTest, parser_test) { const std::filesystem::path path_img_time = dir_snippet / "times.txt"; const std::filesystem::path path_gnss_poses = dir_snippet / "GNSSPoses.txt"; const std::filesystem::path path_vio = dir_snippet / "result.txt"; - const size_t min_sig_figs_time = detail::txt_parser_help::min_sig_figs_result_time(path_vio); + const size_t min_sig_figs_time = txt_parser_help::min_sig_figs_result_time(path_vio); // fetch data for all the fields (AS_w_from_gnss_cam, img times, result) - const detail::txt_parser_help::TimeDataList img_time_list = - detail::txt_parser_help::create_img_time_data_list(path_img_time, min_sig_figs_time); + const txt_parser_help::TimeDataList img_time_list = + txt_parser_help::create_img_time_data_list(path_img_time, min_sig_figs_time); EXPECT_EQ(img_time_list.size(), parser.num_images()); - const detail::txt_parser_help::TimeDataMap gnss_poses_time_map = - detail::txt_parser_help::create_gnss_poses_time_data_map(path_gnss_poses, - min_sig_figs_time); - const detail::txt_parser_help::TimeDataMap vio_time_map = - detail::txt_parser_help::create_vio_time_data_map(path_vio, min_sig_figs_time); + const txt_parser_help::TimeDataMap gnss_poses_time_map = + txt_parser_help::create_gnss_poses_time_data_map(path_gnss_poses, min_sig_figs_time); + const txt_parser_help::TimeDataMap vio_time_map = + txt_parser_help::create_vio_time_data_map(path_vio, min_sig_figs_time); Eigen::Matrix4d scale_mat = Eigen::Matrix4d::Identity(); scale_mat(0, 0) = scale_mat(1, 1) = scale_mat(2, 2) = parser.gnss_scale(); @@ -108,7 +107,7 @@ TEST(FourSeasonsParserTest, parser_test) { std::vector gps_ns_delta_from_shutter; for (size_t i = 0; i < parser.num_images(); i++) { - const ImagePoint& img_pt = parser.get_image_point(i); + const ImagePointFourSeasons& img_pt = parser.get_image_point(i); const cv::Mat img = parser.load_image(i); // seq (time in nanoseconds) test @@ -124,20 +123,22 @@ TEST(FourSeasonsParserTest, parser_test) { const std::vector& parsed_line_gnss_poses = gnss_poses_time_map.at(img_time_list[i].first); - Eigen::Vector3d gps_translation(std::stod(parsed_line_gnss_poses[static_cast( - detail::txt_parser_help::GPSIdx::TRAN_X)]), - std::stod(parsed_line_gnss_poses[static_cast( - detail::txt_parser_help::GPSIdx::TRAN_Y)]), - std::stod(parsed_line_gnss_poses[static_cast( - detail::txt_parser_help::GPSIdx::TRAN_Z)])); - Eigen::Quaterniond gps_quat(std::stod(parsed_line_gnss_poses[static_cast( - detail::txt_parser_help::GPSIdx::QUAT_W)]), - std::stod(parsed_line_gnss_poses[static_cast( - detail::txt_parser_help::GPSIdx::QUAT_X)]), - std::stod(parsed_line_gnss_poses[static_cast( - detail::txt_parser_help::GPSIdx::QUAT_Y)]), - std::stod(parsed_line_gnss_poses[static_cast( - detail::txt_parser_help::GPSIdx::QUAT_Z)])); + Eigen::Vector3d gps_translation( + std::stod( + parsed_line_gnss_poses[static_cast(txt_parser_help::GPSIdx::TRAN_X)]), + std::stod( + parsed_line_gnss_poses[static_cast(txt_parser_help::GPSIdx::TRAN_Y)]), + std::stod( + parsed_line_gnss_poses[static_cast(txt_parser_help::GPSIdx::TRAN_Z)])); + Eigen::Quaterniond gps_quat( + std::stod( + parsed_line_gnss_poses[static_cast(txt_parser_help::GPSIdx::QUAT_W)]), + std::stod( + parsed_line_gnss_poses[static_cast(txt_parser_help::GPSIdx::QUAT_X)]), + std::stod( + parsed_line_gnss_poses[static_cast(txt_parser_help::GPSIdx::QUAT_Y)]), + std::stod( + parsed_line_gnss_poses[static_cast(txt_parser_help::GPSIdx::QUAT_Z)])); EXPECT_TRUE(liegroups::SE3(gps_quat, gps_translation) .matrix() .isApprox(img_pt.AS_w_from_gnss_cam->matrix())); @@ -148,20 +149,16 @@ TEST(FourSeasonsParserTest, parser_test) { const std::vector& parsed_line_gps = vio_time_map.at(img_time_list[i].first); Eigen::Vector3d ground_truth_translation( - std::stod(parsed_line_gps[static_cast( - detail::txt_parser_help::ResultIdx::TRAN_X)]), - std::stod(parsed_line_gps[static_cast( - detail::txt_parser_help::ResultIdx::TRAN_Y)]), - std::stod(parsed_line_gps[static_cast( - detail::txt_parser_help::ResultIdx::TRAN_Z)])); - Eigen::Quaterniond ground_truth_quat(std::stod(parsed_line_gps[static_cast( - detail::txt_parser_help::ResultIdx::QUAT_W)]), - std::stod(parsed_line_gps[static_cast( - detail::txt_parser_help::ResultIdx::QUAT_X)]), - std::stod(parsed_line_gps[static_cast( - detail::txt_parser_help::ResultIdx::QUAT_Y)]), - std::stod(parsed_line_gps[static_cast( - detail::txt_parser_help::ResultIdx::QUAT_Z)])); + std::stod(parsed_line_gps[static_cast(txt_parser_help::ResultIdx::TRAN_X)]), + std::stod(parsed_line_gps[static_cast(txt_parser_help::ResultIdx::TRAN_Y)]), + std::stod( + parsed_line_gps[static_cast(txt_parser_help::ResultIdx::TRAN_Z)])); + Eigen::Quaterniond ground_truth_quat( + std::stod(parsed_line_gps[static_cast(txt_parser_help::ResultIdx::QUAT_W)]), + std::stod(parsed_line_gps[static_cast(txt_parser_help::ResultIdx::QUAT_X)]), + std::stod(parsed_line_gps[static_cast(txt_parser_help::ResultIdx::QUAT_Y)]), + std::stod( + parsed_line_gps[static_cast(txt_parser_help::ResultIdx::QUAT_Z)])); EXPECT_TRUE(liegroups::SE3(ground_truth_quat, ground_truth_translation) .matrix() .isApprox(img_pt.AS_w_from_vio_cam->matrix())); @@ -177,8 +174,7 @@ TEST(FourSeasonsParserTest, parser_test) { ECEF_from_gnss_cam * gps_from_cam.inverse(); const Eigen::Vector3d& t_ECEF_from_gnss_gps = ECEF_from_gnss_gps.translation(); - const Eigen::Vector3d gnss_gps_gcs = - FourSeasonsParser::gcs_from_ECEF(t_ECEF_from_gnss_gps); + const Eigen::Vector3d gnss_gps_gcs = gps::lla_from_ecef(t_ECEF_from_gnss_gps); const Eigen::Vector3d raw_gps_gcs( img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude, diff --git a/experimental/learn_descriptors/four_seasons_transforms.cc b/experimental/learn_descriptors/four_seasons_transforms.cc new file mode 100644 index 000000000..c6dc25ac4 --- /dev/null +++ b/experimental/learn_descriptors/four_seasons_transforms.cc @@ -0,0 +1,67 @@ +#include "experimental/learn_descriptors/four_seasons_transforms.hh" + +#include +#include +#include +#include + +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "common/check.hh" +#include "common/liegroups/se3.hh" +#include "experimental/learn_descriptors/four_seasons_parser_detail.hh" + +namespace robot::experimental::learn_descriptors { +FourSeasonsTransforms::StaticTransforms::StaticTransforms( + const std::filesystem::path& path_transforms) { + std::ifstream file_transforms(path_transforms); + ROBOT_CHECK(file_transforms, path_transforms); + std::string line; + while (std::getline(file_transforms, line)) { + if (line.find("transform_S_AS") != std::string::npos) { + std::getline(file_transforms, line); + S_from_AS = get_transform_from_line(line); + } else if (line.find("TS_cam_imu") != std::string::npos) { + std::getline(file_transforms, line); + cam_from_imu = get_transform_from_line(line); + } else if (line.find("transform_w_gpsw") != std::string::npos) { + std::getline(file_transforms, line); + w_from_gpsw = get_transform_from_line(line); + } else if (line.find("transform_gps_imu") != std::string::npos) { + std::getline(file_transforms, line); + gps_from_imu = get_transform_from_line(line); + } else if (line.find("transform_e_gpsw") != std::string::npos) { + std::getline(file_transforms, line); + e_from_gpsw = get_transform_from_line(line); + } else if (line.find("GNSS scale") != std::string::npos) { + std::getline(file_transforms, line); + gnss_scale = std::stod(line); + } + } +} + +liegroups::SE3 FourSeasonsTransforms::StaticTransforms::get_transform_from_line( + const std::string& line) { + enum TransformEntry { T_X, T_Y, T_Z, Q_X, Q_Y, Q_Z, Q_W }; + std::vector parsed_transform_line = + detail::four_seasons_parser::txt_parser_help::parse_line_adv(line, ","); + if (parsed_transform_line.size() < 7) { + std::stringstream error_stream; + error_stream << "parsed_transform_line doesn't have sufficient entries for " + "transform! parsed_transform_line.size(): " + << parsed_transform_line.size() << std::endl; + throw std::runtime_error(error_stream.str()); + } + std::vector transform_nums; + for (const std::string& num : parsed_transform_line) { + transform_nums.push_back(static_cast(std::stod(num))); + } + Eigen::Vector3d translation(transform_nums[TransformEntry::T_X], + transform_nums[TransformEntry::T_Y], + transform_nums[TransformEntry::T_Z]); + Eigen::Quaterniond rotation( + transform_nums[TransformEntry::Q_W], transform_nums[TransformEntry::Q_X], + transform_nums[TransformEntry::Q_Y], transform_nums[TransformEntry::Q_Z]); + return liegroups::SE3(rotation, translation); +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/four_seasons_transforms.hh b/experimental/learn_descriptors/four_seasons_transforms.hh new file mode 100644 index 000000000..9e8ab1bde --- /dev/null +++ b/experimental/learn_descriptors/four_seasons_transforms.hh @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + +#include "common/liegroups/se3.hh" + +namespace robot::experimental::learn_descriptors { +struct FourSeasonsTransforms { + struct StaticTransforms { + liegroups::SE3 S_from_AS; + liegroups::SE3 cam_from_imu; + liegroups::SE3 w_from_gpsw; + liegroups::SE3 gps_from_imu; + liegroups::SE3 e_from_gpsw; + double gnss_scale; // scale from vio frame to gnss frame. WARNING: will require retooling + // if the scales per + // keyframe (pose) are not all one value. See more here: + // https://github.com/pmwenzel/4seasons-dataset + + StaticTransforms(const std::filesystem::path& path_transforms); + + private: + static liegroups::SE3 get_transform_from_line(const std::string& line); + }; + // might consider defining a nested "DynamicTransforms" or "PerImageTransforms" at some point - + // Nico +}; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/frame.hh b/experimental/learn_descriptors/frame.hh index 5668da713..322a823ac 100644 --- a/experimental/learn_descriptors/frame.hh +++ b/experimental/learn_descriptors/frame.hh @@ -2,19 +2,15 @@ #include #include "experimental/learn_descriptors/frontend_definitions.hh" +#include "gtsam/base/Matrix.h" #include "gtsam/geometry/Cal3_S2.h" #include "gtsam/geometry/Pose3.h" +#include "gtsam/geometry/Rot3.h" #include "opencv2/opencv.hpp" namespace robot::experimental::learn_descriptors { class Frame { public: - Frame(const FrameId& id, const cv::Mat& img_undistorted, const gtsam::Cal3_S2::shared_ptr& K, - const gtsam::Pose3& T_wrld_grnd_truth) - : id_(id), img_(img_undistorted), K_(K), groundtruth_(T_wrld_grnd_truth) {} - Frame(const FrameId& id, const cv::Mat& img_undistorted, const gtsam::Cal3_S2::shared_ptr& K) - : id_(id), img_(img_undistorted), K_(K) {} - void add_keypoints(const KeypointsCV& kpts); void assign_descriptors(const cv::Mat& descriptors); @@ -22,10 +18,13 @@ class Frame { const cv::Mat get_descriptors() { return descriptors_; }; const FrameId id_; - const cv::Mat img_; + const cv::Mat undistorted_img_; gtsam::Cal3_S2::shared_ptr K_; KeypointsCV kpts_; cv::Mat descriptors_; - std::optional groundtruth_; + std::optional world_from_cam_groundtruth_; + std::optional cam_in_world_initial_guess_; + std::optional translation_covariance_in_cam_; + std::optional world_from_cam_initial_guess_; }; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc index 194122420..dfe24fa1d 100644 --- a/experimental/learn_descriptors/frontend.cc +++ b/experimental/learn_descriptors/frontend.cc @@ -1,6 +1,188 @@ #include "experimental/learn_descriptors/frontend.hh" +#include +#include #include +#include +#include + +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "common/check.hh" +#include "experimental/learn_descriptors/frame.hh" +#include "experimental/learn_descriptors/image_point.hh" +#include "experimental/learn_descriptors/structure_from_motion_types.hh" +#include "gtsam/geometry/Cal3_S2.h" +#include "opencv2/opencv.hpp" + +namespace detail_sfm { + +std::optional attempt_triangulate(const std::vector &cam_poses, + const std::vector &cam_obs, + gtsam::Cal3_S2::shared_ptr K, + const double max_reproj_error = 2.0, + const bool verbose = true) { + gtsam::Point3 p_lmk_in_world; + if (cam_poses.size() >= 2) { + try { + // Attempt triangulation using DLT (or the GTSAM provided method) + p_lmk_in_world = gtsam::triangulatePoint3( + cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end())); + + } catch (const gtsam::TriangulationCheiralityException &e) { + // Handle the exception gracefully by logging and retaining the previous + // estimate. + if (verbose) + std::cerr << "attempt_triangulation failed. Likely cheirality exception: " + << e.what() << ". Discarding involved keypoints." << std::endl; + } + } + // Optional: perform an explicit cheirality check + for (const auto &pose : cam_poses) { + // Transform point to the camera coordinate system. + // transformTo() converts a world point to the camera frame. + gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world); + if (p_cam_lmk.z() <= 0) { // Check that the depth is positive + return std::nullopt; + } + } + + // Cheirality & reprojection checks + for (size_t i = 0; i < cam_poses.size(); ++i) { + const auto &pose = cam_poses[i]; + // Cheirality + gtsam::Point3 p_cam = pose.transformTo(p_lmk_in_world); + if (p_cam.z() <= 0) { + if (verbose) { + std::cerr << "[triangulate] point behind camera " << i << " (z=" << p_cam.z() + << ")\n"; + } + return std::nullopt; + } + // Reprojection error + if (max_reproj_error > 0) { + gtsam::PinholeCamera cam(pose, *K); + const auto reproj = cam.project(p_lmk_in_world); + const double err = (reproj - cam_obs[i]).norm(); + if (err > max_reproj_error) { + if (verbose) { + std::cerr << "[triangulate] reprojection error too large on view " << i << " (" + << err << " px)\n"; + } + return std::nullopt; + } + } + } + + return p_lmk_in_world; +} + +void graph_values(const gtsam::Values &values, const std::string &window_name, + const std::vector &symbols_pose, + const std::vector &symbols_landmarks) { + std::vector final_poses; + std::vector final_lmks; + for (const gtsam::Symbol &symbol_pose : symbols_pose) { + final_poses.emplace_back(Eigen::Isometry3d(values.at(symbol_pose).matrix()), + symbol_pose.string()); + } + for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) { + if (!values.exists(symbol_lmk)) { + std::cout << "WTF " << symbol_lmk << std::endl; + } + final_lmks.emplace_back(values.at(symbol_lmk), symbol_lmk.string()); + } + std::cout << "About to viz gtsam::Values with " << values.size() << " variables." << std::endl; + robot::geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true, + window_name); +} + +gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gtsam::Values &values, + const std::vector &symbols_pose, + const std::vector &symbols_landmarks, + const int num_epochs = 5, bool viz_itr = false) { + gtsam::LevenbergMarquardtParams params; + params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. + params.maxIterations = 1; + gtsam::LevenbergMarquardtOptimizer optimizer(graph, values, params); + + double prev_error = optimizer.error(); + typedef int epoch; + std::function &, + const std::vector &)> + graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter, + const std::vector &symbols_pose, + const std::vector &symbols_landmarks) { + std::cout << "iteration " << iter << " complete!"; + std::string window_name = "Iteration_" + std::to_string(iter); + graph_values(vals, window_name, symbols_pose, symbols_landmarks); + }; + + for (int i = 0; i < num_epochs; i++) { + optimizer.iterate(); + double curr_error = optimizer.error(); + + if (viz_itr) { + graph_itr_debug_func(optimizer.values(), i, symbols_pose, symbols_landmarks); + } + if (std::abs(prev_error - curr_error) < 1e-6) { + std::cout << "Converged at iteration " << i << "\n"; + break; + } + } + return optimizer.values(); +} + +gtsam::Pose3 averagePoses(const std::vector &poses, int maxIter = 10) { + if (poses.empty()) throw std::runtime_error("Empty pose vector"); + + gtsam::Pose3 mean = poses[0]; + + for (int iter = 0; iter < maxIter; ++iter) { + gtsam::Vector6 total = gtsam::Vector6::Zero(); + + for (const auto &pose : poses) { + gtsam::Pose3 delta = mean.between(pose); + total += gtsam::Pose3::Logmap(delta); + } + + total /= poses.size(); + mean = mean.compose(gtsam::Pose3::Expmap(total)); + } + + return mean; +} + +gtsam::Point3 averagePoints(const std::vector &points) { + if (points.empty()) throw std::runtime_error("Empty point vector"); + gtsam::Point3 sum(0, 0, 0); + for (const auto &pt : points) sum += pt; + return sum / points.size(); +} + +gtsam::Point3 get_variance(const std::vector &points) { + const gtsam::Point3 mean = averagePoints(points); + gtsam::Point3 var(0, 0, 0); + for (const gtsam::Point3 &pt : points) { + var += (mean - pt).array().square().matrix(); + } + return var / points.size(); +} + +double rotation_error(const Eigen::Isometry3d &T_est, const Eigen::Isometry3d &T_gt) { + Eigen::Matrix3d R_err = T_gt.rotation().transpose() * T_est.rotation(); + + // 2. Compute trace and clamp to [-1,1] for numerical safety + double tr = R_err.trace(); + double cos_theta = std::min(1.0, std::max(-1.0, (tr - 1.0) / 2.0)); + + // 3. Recover angle (in radians) + return std::acos(cos_theta); +} + +// const std::vector get_absolute_trajectory_error(const std::vector &) +// {} +} // namespace detail_sfm namespace robot::experimental::learn_descriptors { @@ -36,94 +218,152 @@ Frontend::Frontend(FrontendParams params_) : params_(params_) { } void Frontend::populate_frames() { - for (const cv::Mat &img : images_) { + FrameId id = frames_.size(); + for (const ImageAndPoint &img_and_pt : image_and_points_) { + const ImagePoint &img_pt = img_and_pt.img_pt; + cv::Mat &img_undistorted; + cv::fisheye::undistortImage(img_and_pt.image_distorted, img_undistorted, img_pt.K->K_mat(), + img_pt.K->D_mat(), img_pt.K->K_mat(), img.size()); + auto [kpts, features] = extract_feaures(img_undistorted); + + gtsam::Cal3_S2::shared_ptr K = boost::make_shared( + img_pt.K->fx, img_pt.K->fy, 0, img_pt.K->cx, img_pt.K->cy); + + Frame frame{id, img_undistorted, K, kpts, features}; + const std::optional maybe_world_from_cam_grnd_trth = + img_pt.world_from_cam_ground_truth(); + if (maybe_world_from_cam_grnd_trth) { + frame.world_from_cam_ground_truth = + gtsam::Pose3(maybe_world_from_cam_grnd_trth->matrix()); + } + const std::optional maybe_cam_in_world = img_pt.cam_in_world(); + if (maybe_cam_in_world) { + frame.cam_in_world_initial_guess = *maybe_cam_in_world; + } + const std::optional maybe_translation_covariance_in_cam = + img_pt.translation_covariance_in_cam(); + if (maybe_translation_covariance_in_cam) { + frame.translation_covariance_in_cam = *maybe_translation_covariance_in_cam; + } + frames_.push_back(frame); + id++; } } void Frontend::match_frames_and_build_tracks() { if (params_.exhaustive) { - for (size_t i = 0; i < indices.size() - 1; i += 4) { - // std::cout << "i: " << i << std::endl; - for (size_t j = i + 1; j < indices.size(); j++) { - // std::cout << "j: " << j << std::endl; + for (size_t i = 0; i < frames_.size() - 1; i++) { + for (size_t j = i + 1; j < frames_.size(); j++) { std::vector matches = frontend.compute_matches( - frames[i].get_descriptors(), frames[j].get_descriptors()); - // DIAL TO MESS WITH + frames_[i].get_descriptors(), frames_[j].get_descriptors()); frontend.enforce_bijective_buffer_matches(matches); - if (matches.size() < 5) { continue; } std::vector cv_kpts_1; std::vector cv_kpts_2; - for (const KeypointCV &kpt : frames[i].get_keypoints()) { + for (const KeypointCV &kpt : frames_[i].get_keypoints()) { cv::KeyPoint cv_kpt; cv_kpt.pt = kpt; cv_kpts_1.push_back(cv_kpt); } - for (const KeypointCV &kpt : frames[j].get_keypoints()) { + for (const KeypointCV &kpt : frames_[j].get_keypoints()) { cv::KeyPoint cv_kpt; cv_kpt.pt = kpt; cv_kpts_2.push_back(cv_kpt); } - Eigen::Isometry3d world_from_camj(id_to_initial_world_from_cam.at(j).matrix()); - // std::cout << "heartbeat" << std::endl; std::optional scale_cam0_from_cam1 = robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat); if (!scale_cam0_from_cam1) { continue; } - world_from_camj.linear() = - (Eigen::Isometry3d(id_to_initial_world_from_cam.at(i).matrix()) * - *scale_cam0_from_cam1) - .linear(); - // std::cout << "heartbeat 2" << std::endl; - id_to_initial_world_from_cam.at(j) = gtsam::Pose3(world_from_camj.matrix()); + ROBOT_CHECK(frames_[i].world_from_cam_initial_guess_, + "This rotation should be populated."); + // this could use some work to verify the quality of the output, particularly inside + // of estiamte_cam0_from_cam1 + // also, at the moment I am not accounting for any covariance between the gps + // measurement and the unit translation vector from estimate_cam0_from_cam1 + + // can try averaging poses here as well + frames[j].world_from_cam_initial_guess_.emplace( + frames_[i].world_from_cam_initial_guess_->matrix() * + scale_cam0_from_cam1->matrix()); for (const cv::DMatch match : matches) { - const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames[j].get_keypoints()[match.trainIdx]; - - auto key = std::make_pair(frames[i].id_, kpt_cam0); - if (lmk_id_map.find(key) != lmk_id_map.end()) { - auto id = lmk_id_map.at(key); - feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0); - feature_tracks.at(id).obs_.emplace_back(frames[j].id_, kpt_cam1); + const KeypointCV kpt_cam0 = frames_[i].get_keypoints()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames_[j].get_keypoints()[match.trainIdx]; + + auto key = std::make_pair(frames_[i].id_, kpt_cam0); + if (lmk_id_map_.find(key) != lmk_id_map_.end()) { + const size_t id = lmk_id_map_.at(key); + ROBOT_CHECK(id < feature_tracks_.size(), + "lmk_id_map_ id's shuold not exceed feature_tracks_ size!"); + feature_tracks_[id].obs_.emplace_back(frames_[i].id_, kpt_cam0); + feature_tracks_[id].obs_.emplace_back(frames_[j].id_, kpt_cam1); } else { FeatureTrack feature_track(i, kpt_cam0); - feature_track.obs_.emplace_back(frames[j].id_, kpt_cam1); - feature_tracks.emplace(lmk_id, feature_track); - lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id); - lmk_id++; + feature_track.obs_.emplace_back(frames_[j].id_, kpt_cam1); + feature_tracks_.push_back(feature_track); + lmk_id_map_.emplace(std::make_pair(frames_[i].id_, kpt_cam0), + feature_tracks_.size() - 1); } } } } - std::cout << "done processing matches" << std::endl; } else { // successive only for (size_t i = 0; i < indices.size() - 1; i++) { std::vector matches = frontend.compute_matches( - frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + frames_[i].get_descriptors(), frames_[i + 1].get_descriptors()); frontend.enforce_bijective_buffer_matches(matches); + if (matches.size() < 5) { + continue; + } + + std::vector cv_kpts_1; + std::vector cv_kpts_2; + for (const KeypointCV &kpt : frames_[i].get_keypoints()) { + cv::KeyPoint cv_kpt; + cv_kpt.pt = kpt; + cv_kpts_1.push_back(cv_kpt); + } + for (const KeypointCV &kpt : frames_[i + 1].get_keypoints()) { + cv::KeyPoint cv_kpt; + cv_kpt.pt = kpt; + cv_kpts_2.push_back(cv_kpt); + } + std::optional scale_cam0_from_cam1 = + robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat); + if (!scale_cam0_from_cam1) { + continue; + } + ROBOT_CHECK(frames_[i].world_from_cam_initial_guess_, + "This rotation should be populated."); + frames_[i + 1].world_from_cam_initial_guess_.emplace( + frames_[i].world_from_cam_initial_guess_->matrix() * + scale_cam0_from_cam1->matrix()); + for (const cv::DMatch match : matches) { - const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; - - auto key = std::make_pair(frames[i].id_, kpt_cam0); - if (lmk_id_map.find(key) != lmk_id_map.end()) { - auto id = lmk_id_map.at(key); - feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0); - feature_tracks.at(id).obs_.emplace_back(frames[i + 1].id_, kpt_cam1); + const KeypointCV kpt_cam0 = frames_[i].get_keypoints()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames_[i + 1].get_keypoints()[match.trainIdx]; + + auto key = std::make_pair(frames_[i].id_, kpt_cam0); + if (lmk_id_map_.find(key) != lmk_id_map_.end()) { + const size_t id = lmk_id_map_.at(key); + ROBOT_CHECK(id < feature_tracks_.size(), + "lmk_id_map_ id's shuold not exceed feature_tracks_ size!"); + feature_tracks_[id].obs_.emplace_back(frames_[i].id_, kpt_cam0); + feature_tracks_[id].obs_.emplace_back(frames_[i + 1].id_, kpt_cam1); } else { FeatureTrack feature_track(i, kpt_cam0); - feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); - feature_tracks.emplace(lmk_id, feature_track); - lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id); - lmk_id++; + feature_track.obs_.emplace_back(frames_[i + 1].id_, kpt_cam1); + feature_tracks_.emplace(lmk_id, feature_track); + lmk_id_map_.emplace(std::make_pair(frames_[i].id_, kpt_cam0), + feature_tracks_.size() - 1); } } } } + std::cout << "done processing matches" << std::endl; } std::pair, cv::Mat> Frontend::extract_features(const cv::Mat &img) const { diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh index 85b112c66..d71338936 100644 --- a/experimental/learn_descriptors/frontend.hh +++ b/experimental/learn_descriptors/frontend.hh @@ -5,6 +5,7 @@ #include "experimental/learn_descriptors/frame.hh" #include "experimental/learn_descriptors/frontend_definitions.hh" +#include "experimental/learn_descriptors/image_point.hh" #include "experimental/learn_descriptors/structure_from_motion_types.hh" #include "opencv2/opencv.hpp" @@ -17,6 +18,10 @@ struct FrontendParams { bool exhaustive = true; bool incremental = false; // a.k.a. images are successive }; +struct ImageAndPoint { + cv::Mat image_distorted; + ImagePoint img_pt; +}; class Frontend { public: static constexpr char symbol_pose_char = 'x'; @@ -27,15 +32,18 @@ class Frontend { void populate_frames(); void match_frames_and_build_tracks(); - void add_image(const cv::Mat &img) { images_.push_back(img); }; - void add_images(const std::vector &imgs) { - for (const cv::Mat &img : imgs) { - images_.push_back(img); + void add_image(const ImageAndPoint &img_and_pt) { images_and_points_.push_back(img_and_pt); }; + void add_images(const std::vector &img_and_pts) { + for (const ImageAndPoint &img_and_pt : img_and_pts) { + images_and_points_.push_back(img_and_pt); } }; const FrontendParams::ExtractorType &extractor_type() const { return params_.extractor_type; }; const FrontendParams::MatcherType &matcher_type() const { return params_.matcher_type; }; + const FeatureTracks &feature_tracks() const { return feature_tracks_; }; + const FrameLandmarkIdMap &frame_landmark_id_map() const { return lmk_id_map_; }; + const std::vector &frames() const { return frames_; }; std::pair, cv::Mat> extract_features(const cv::Mat &img) const; std::vector compute_matches(const cv::Mat &descriptors1, @@ -66,7 +74,7 @@ class Frontend { cv::Ptr feature_extractor_; cv::Ptr descriptor_matcher_; - std::vector images_; + std::vector images_and_points_; FeatureTracks feature_tracks_; FrameLandmarkIdMap lmk_id_map_; std::vector frames_; diff --git a/experimental/learn_descriptors/frontend_definitions.hh b/experimental/learn_descriptors/frontend_definitions.hh index 2965d6c97..e2822ef42 100644 --- a/experimental/learn_descriptors/frontend_definitions.hh +++ b/experimental/learn_descriptors/frontend_definitions.hh @@ -36,6 +36,6 @@ class FeatureTrack { } }; -using FeatureTracks = std::unordered_map; +using FeatureTracks = std::vector; // each idx is the FeatureTracks's LandmarkId using FrameLandmarkIdMap = std::unordered_map, LandmarkId>; } // namespace robot::experimental::learn_descriptors diff --git a/experimental/learn_descriptors/gps_data.cc b/experimental/learn_descriptors/gps_data.cc new file mode 100644 index 000000000..e69de29bb diff --git a/experimental/learn_descriptors/gps_data.hh b/experimental/learn_descriptors/gps_data.hh new file mode 100644 index 000000000..13b032c25 --- /dev/null +++ b/experimental/learn_descriptors/gps_data.hh @@ -0,0 +1,65 @@ +#pragma once + +#include +#include +#include + +#include "Eigen/Core" + +namespace robot::experimental::learn_descriptors { +struct GPSData { + struct Uncertainty { + double sigma_lat_mitude; + double sigma_longitude; + double sigma_altitude; + double orientation_deg; + double rms_range_error_m; + + // diagonal latitude, longitude, altitude covariance in meters squared + Eigen::Matrix3d to_LLA_covariance() const { + Eigen::Matrix3d cov = Eigen::Matrix3d::Zero(); + cov(0, 0) = sigma_lat_mitude * sigma_lat_mitude; + cov(1, 1) = sigma_longitude * sigma_longitude; + cov(2, 2) = sigma_altitude * sigma_altitude; + return cov; + } + + // Convert lat/lon covariance to ENU at given latitude + Eigen::Matrix3d to_ENU_covariance(double lat_deg) const { + double lat_rad = lat_deg * M_PI / 180.0; + + // meters per degree scaling + double kLat = + 111132.92 - 559.82 * std::cos(2 * lat_rad) + 1.175 * std::cos(4 * lat_rad); + double kLon = 111412.84 * std::cos(lat_rad) - 93.5 * std::cos(3 * lat_rad); + + Eigen::Matrix3d lla_cov = to_LLA_covariance(); + + // rotate lat/lon covariance (upper-left 2x2) using orientation + double theta = orientation_deg * M_PI / 180.0; + Eigen::Matrix2d R; + R << std::cos(theta), -std::sin(theta), std::sin(theta), std::cos(theta); + + Eigen::Matrix2d D = lla_cov.topLeftCorner<2, 2>(); + Eigen::Matrix2d rotated_latlon_cov = R * D * R.transpose(); + + // apply linear scaling to convert degrees to meters in EN + Eigen::Matrix2d J; + J << kLat, 0, 0, kLon; + + Eigen::Matrix2d en_cov = J * rotated_latlon_cov * J.transpose(); + + Eigen::Matrix3d enu_cov = Eigen::Matrix3d::Zero(); + enu_cov.topLeftCorner<2, 2>() = en_cov; + enu_cov(2, 2) = lla_cov(2, 2); // alt variance unchanged + + return enu_cov; + } + }; + size_t seq; // time in nanoseconds, may differ from image capturetime + double latitude; + double longitude; + std::optional altitude; // meters above sea level + std::optional uncertainty; +}; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/image_point.hh b/experimental/learn_descriptors/image_point.hh index c46708d17..2fbaf52cb 100644 --- a/experimental/learn_descriptors/image_point.hh +++ b/experimental/learn_descriptors/image_point.hh @@ -2,132 +2,65 @@ #include #include -#include +#include #include #include #include #include "Eigen/Core" #include "Eigen/Geometry" +#include "camera_calibration.hh" #include "common/liegroups/se3.hh" -#include "common/time/robot_time.hh" +#include "experimental/learn_descriptors/camera_calibration.hh" namespace robot::experimental::learn_descriptors { struct ImagePoint { - struct GPSData { - struct Uncertainty { - double sigma_lat_mitude; - double sigma_longitude; - double sigma_altitude; - double orientation_deg; - double rms_range_error_m; + virtual ~ImagePoint() = default; - // diagonal latitude, longitude, altitude covariance in meters squared - Eigen::Matrix3d to_LLA_covariance() const { - Eigen::Matrix3d cov = Eigen::Matrix3d::Zero(); - cov(0, 0) = sigma_lat_mitude * sigma_lat_mitude; - cov(1, 1) = sigma_longitude * sigma_longitude; - cov(2, 2) = sigma_altitude * sigma_altitude; - return cov; - } - - // Convert lat/lon covariance to ENU at given latitude - Eigen::Matrix3d to_ENU_covariance(double lat_deg) const { - double lat_rad = lat_deg * M_PI / 180.0; - - // meters per degree scaling - double kLat = - 111132.92 - 559.82 * std::cos(2 * lat_rad) + 1.175 * std::cos(4 * lat_rad); - double kLon = 111412.84 * std::cos(lat_rad) - 93.5 * std::cos(3 * lat_rad); - - Eigen::Matrix3d lla_cov = to_LLA_covariance(); - - // rotate lat/lon covariance (upper-left 2x2) using orientation - double theta = orientation_deg * M_PI / 180.0; - Eigen::Matrix2d R; - R << std::cos(theta), -std::sin(theta), std::sin(theta), std::cos(theta); - - Eigen::Matrix2d D = lla_cov.topLeftCorner<2, 2>(); - Eigen::Matrix2d rotated_latlon_cov = R * D * R.transpose(); - - // apply linear scaling to convert degrees to meters in EN - Eigen::Matrix2d J; - J << kLat, 0, 0, kLon; - - Eigen::Matrix2d en_cov = J * rotated_latlon_cov * J.transpose(); - - Eigen::Matrix3d enu_cov = Eigen::Matrix3d::Zero(); - enu_cov.topLeftCorner<2, 2>() = en_cov; - enu_cov(2, 2) = lla_cov(2, 2); // alt variance unchanged - - return enu_cov; - } - }; - size_t seq; // time in nanoseconds, may differ from image capturetime - double latitude; - double longitude; - std::optional altitude; // meters above sea level - std::optional uncertainty; + size_t id; + std::shared_ptr K; + virtual std::optional world_from_cam_ground_truth() const { + return std::nullopt; }; - size_t id; // idx for DB - size_t seq; // time in nanoseconds of image capture (also the name of image) - std::optional - AS_w_from_gnss_cam; // globally opimized arbitrary scale visual world (gnss + VIO + loop - // closure + etc.) from cam - std::optional AS_w_from_vio_cam; // arbitrary scale world from vio result cam - std::optional gps_gcs; // raw gps measurement in gcs (global cordinate system) - - const std::string to_string() const { - auto se3_to_str = [](const liegroups::SE3& se3) { - const Eigen::Vector3d& t = se3.translation(); - const Eigen::Quaterniond& r = se3.so3().unit_quaternion(); + virtual std::optional cam_in_world() const { return std::nullopt; }; + virtual std::optional translation_covariance_in_cam() const { + return std::nullopt; + }; + std::string to_string() const { + auto vec3d_to_str = [](const Eigen::Vector3d& vec3d) { + std::stringstream ss; + ss << "[" << vec3d.x() << ", " << vec3d.y() << ", " << vec3d.z() << "]"; + return ss.str(); + }; + auto isom3d_to_str = [&vec3d_to_str](const Eigen::Isometry3d& isom3d) { + const Eigen::Vector3d& t = isom3d.translation(); + const Eigen::Quaterniond r(isom3d.rotation()); std::stringstream ss; - ss << "\t\tt: [" << t.x() << ", " << t.y() << ", " << t.z() << "]\n"; + ss << "\t\tt: " << vec3d_to_str(t) << "\n"; ss << "\t\tq: [" << r.w() << ", " << r.x() << ", " << r.y() << ", " << r.z() << "]"; return ss.str(); }; std::stringstream ss; ss << "Image Point " << id << ":\n"; - ss << "\tseq: " << seq << "\n"; - ss << "\tAS_w_from_gnss_cam: "; - if (AS_w_from_gnss_cam) { - ss << "\n" << se3_to_str(*AS_w_from_gnss_cam); + ss << "\tworld_from_cam_ground_truth: "; + if (world_from_cam_ground_truth()) { + ss << "\n" << isom3d_to_str(*world_from_cam_ground_truth()); } else { ss << "N/A"; } - ss << "\n\tAS_w_from_vio_cam: "; - if (AS_w_from_vio_cam) { - ss << "\n" << se3_to_str(*AS_w_from_vio_cam); + ss << "\n\tcam_in_world: "; + if (cam_in_world()) { + ss << vec3d_to_str(*cam_in_world()); } else { ss << "N/A"; } - ss << "\n\tgps_gcs: "; - if (gps_gcs) { - ss << "\n\t\tseq: " << gps_gcs->seq; - ss << "\n\t\t" << gps_gcs->latitude << "\t" << gps_gcs->longitude << "\t"; - if (gps_gcs->altitude) { - ss << *(gps_gcs->altitude); - } else { - ss << "alt N/A"; - } - ss << "\n\t\tsigma: "; - if (gps_gcs->uncertainty) { - ss << gps_gcs->uncertainty->sigma_lat_mitude << "\t" - << gps_gcs->uncertainty->sigma_longitude << "\t" - << gps_gcs->uncertainty->sigma_altitude; - } else { - ss << "N/A"; - } + ss << "\n\ttranslation_covariance_in_cam: "; + if (translation_covariance_in_cam()) { + ss << *translation_covariance_in_cam(); } else { ss << "N/A"; } return ss.str(); } - - cv::Mat load_image(const std::filesystem::path& img_dir) const { - const std::filesystem::path path(img_dir / (std::to_string(seq) + ".png")); - return cv::imread(path); - } }; -typedef std::vector ImagePointVector; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/image_point_four_seasons.cc b/experimental/learn_descriptors/image_point_four_seasons.cc new file mode 100644 index 000000000..73f531d93 --- /dev/null +++ b/experimental/learn_descriptors/image_point_four_seasons.cc @@ -0,0 +1,49 @@ +#include "experimental/learn_descriptors/image_point_four_seasons.hh" + +#include + +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "common/gps/frame_translation.hh" +#include "common/liegroups/se3.hh" +#include "experimental/learn_descriptors/four_seasons_transforms.hh" + +namespace robot::experimental::learn_descriptors { +ImagePointFourSeasons::~ImagePointFourSeasons() = default; + +std::optional ImagePointFourSeasons::world_from_cam_ground_truth() const { + if (!AS_w_from_gnss_cam) return std::nullopt; + Eigen::Matrix4d scale_mat_reference = Eigen::Matrix4d::Identity(); + scale_mat_reference(0, 0) = scale_mat_reference(1, 1) = scale_mat_reference(2, 2) = + shared_static_transforms->gnss_scale; + Eigen::Isometry3d world_from_cam_ground_truth( + (shared_static_transforms->S_from_AS.matrix() * scale_mat_reference * + AS_w_from_gnss_cam->matrix()) + .matrix()); + return world_from_cam_ground_truth; +} + +std::optional ImagePointFourSeasons::cam_in_world() const { + if (!gps_gcs) return std::nullopt; + const Eigen::Vector3d gps_in_ECEF(gps::ecef_from_lla( + Eigen::Vector3d(gps_gcs->latitude, gps_gcs->longitude, *gps_gcs->altitude))); + Eigen::Vector3d cam_in_world(shared_static_transforms->cam_from_imu * + shared_static_transforms->gps_from_imu.inverse() * + shared_static_transforms->w_from_gpsw * + shared_static_transforms->e_from_gpsw.inverse() * gps_in_ECEF); + return cam_in_world; +} + +std::optional ImagePointFourSeasons::translation_covariance_in_cam() const { + return gps_covariance_in_world(); +} + +std::optional ImagePointFourSeasons::gps_covariance_in_world() const { + if (!gps_gcs || !gps_gcs->uncertainty) return std::nullopt; + const Eigen::Matrix3d ENU_covariance( + gps_gcs->uncertainty->to_ENU_covariance(gps_gcs->latitude)); + const Eigen::Matrix3d w_from_ENU(shared_static_transforms->w_from_gpsw.so3().matrix()); + const Eigen::Matrix3d w_covariance(w_from_ENU * ENU_covariance * w_from_ENU.transpose()); + return w_covariance; +} +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/image_point_four_seasons.hh b/experimental/learn_descriptors/image_point_four_seasons.hh new file mode 100644 index 000000000..b636cd120 --- /dev/null +++ b/experimental/learn_descriptors/image_point_four_seasons.hh @@ -0,0 +1,90 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "common/liegroups/se3.hh" +#include "experimental/learn_descriptors/four_seasons_transforms.hh" +#include "experimental/learn_descriptors/gps_data.hh" +#include "experimental/learn_descriptors/image_point.hh" +#include "opencv2/opencv.hpp" + +namespace robot::experimental::learn_descriptors { +class FourSeasonsParser; +struct ImagePointFourSeasons : ImagePoint { + virtual ~ImagePointFourSeasons(); + + size_t id; // idx for DB + size_t seq; // time in nanoseconds of image capture (also the name of image) + std::optional + AS_w_from_gnss_cam; // globally opimized arbitrary scale visual world (gnss + VIO + loop + // closure + etc.) from cam + std::optional AS_w_from_vio_cam; // arbitrary scale world from vio result cam + std::optional gps_gcs; // raw gps measurement in gcs (global cordinate system) + std::shared_ptr shared_static_transforms; + + std::optional world_from_cam_ground_truth() const override; + std::optional cam_in_world() const override; + std::optional translation_covariance_in_cam() const override; + std::optional gps_covariance_in_world() const; + + std::string to_string() const { + auto se3_to_str = [](const liegroups::SE3& se3) { + const Eigen::Vector3d& t = se3.translation(); + const Eigen::Quaterniond& r = se3.so3().unit_quaternion(); + std::stringstream ss; + ss << "\t\tt: [" << t.x() << ", " << t.y() << ", " << t.z() << "]\n"; + ss << "\t\tq: [" << r.w() << ", " << r.x() << ", " << r.y() << ", " << r.z() << "]"; + return ss.str(); + }; + std::stringstream ss; + ss << "Image Point " << id << ":\n"; + ss << "\tseq: " << seq << "\n"; + ss << "\tAS_w_from_gnss_cam: "; + if (AS_w_from_gnss_cam) { + ss << "\n" << se3_to_str(*AS_w_from_gnss_cam); + } else { + ss << "N/A"; + } + ss << "\n\tAS_w_from_vio_cam: "; + if (AS_w_from_vio_cam) { + ss << "\n" << se3_to_str(*AS_w_from_vio_cam); + } else { + ss << "N/A"; + } + ss << "\n\tgps_gcs: "; + if (gps_gcs) { + ss << "\n\t\tseq: " << gps_gcs->seq; + ss << "\n\t\tval: " << gps_gcs->latitude << "\t" << gps_gcs->longitude << "\t"; + if (gps_gcs->altitude) { + ss << *(gps_gcs->altitude); + } else { + ss << "alt N/A"; + } + ss << "\n\t\tsigma: "; + if (gps_gcs->uncertainty) { + ss << gps_gcs->uncertainty->sigma_lat_mitude << "\t" + << gps_gcs->uncertainty->sigma_longitude << "\t" + << gps_gcs->uncertainty->sigma_altitude; + } else { + ss << "N/A"; + } + } else { + ss << "N/A"; + } + return ss.str(); + } + + cv::Mat load_image(const std::filesystem::path& img_dir) const { + const std::filesystem::path path(img_dir / (std::to_string(seq) + ".png")); + return cv::imread(path); + } +}; +typedef std::vector ImagePointFourSeasonsVector; +} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc index ff7df085c..2e056b71a 100644 --- a/experimental/learn_descriptors/incremental_sfm_mvp.cc +++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc @@ -9,6 +9,7 @@ #include "Eigen/Geometry" #include "common/geometry/camera.hh" #include "cxxopts.hpp" +#include "experimental/learn_descriptors/camera_calibration.hh" #include "experimental/learn_descriptors/four_seasons_parser.hh" #include "experimental/learn_descriptors/frame.hh" #include "experimental/learn_descriptors/frontend.hh" @@ -283,7 +284,7 @@ int main(int argc, const char **argv) { const std::vector indices = [&parser]() -> std::vector { std::vector tmp; for (size_t i = 0; i < parser.num_images(); i++) { - const ImagePoint img_pt = parser.get_image_point(i); + const ImagePointFourSeasons img_pt = parser.get_image_point(i); if (img_pt.AS_w_from_gnss_cam && img_pt.gps_gcs && img_pt.gps_gcs && img_pt.gps_gcs->uncertainty && img_pt.gps_gcs->altitude) tmp.push_back(i); @@ -301,8 +302,7 @@ int main(int argc, const char **argv) { // FourSeasonsParser parser() // const size_t img_width = img_pt_first.width, img_height = // img_pt_first.height; - const FourSeasonsParser::CameraCalibrationFisheye cal_parser_left_cam = - parser.camera_calibration(); + const CameraCalibrationFisheye cal_parser_left_cam = parser.camera_calibration(); gtsam::Cal3_S2::shared_ptr K = boost::make_shared(cal_parser_left_cam.fx, cal_parser_left_cam.fy, 0, cal_parser_left_cam.cx, cal_parser_left_cam.cy); @@ -343,7 +343,7 @@ int main(int argc, const char **argv) { parser.gnss_scale(); std::vector> kpt_list; for (const int &idx : indices) { - const ImagePoint img_pt = parser.get_image_point(idx); + const ImagePointFourSeasons img_pt = parser.get_image_point(idx); std::cout << img_pt.to_string() << std::endl; // Eigen::Isometry3d world_from_cam; @@ -357,7 +357,7 @@ int main(int argc, const char **argv) { const Eigen::Vector3d gps_gcs( img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude, img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0); - const Eigen::Vector3d p_gps_in_ECEF = parser.ECEF_from_gcs(gps_gcs); + const Eigen::Vector3d p_gps_in_ECEF = parser.ecef_from_lla(gps_gcs); const Eigen::Vector4d p_gps_in_ECEF_hom(p_gps_in_ECEF.x(), p_gps_in_ECEF.y(), p_gps_in_ECEF.z(), 1.0); world_from_cam = Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference * diff --git a/experimental/learn_descriptors/refactor_sfm_mvp.cc b/experimental/learn_descriptors/refactor_sfm_mvp.cc new file mode 100644 index 000000000..1abcba3b9 --- /dev/null +++ b/experimental/learn_descriptors/refactor_sfm_mvp.cc @@ -0,0 +1,333 @@ +#include +#include +#include +#include +#include +#include + +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "common/geometry/camera.hh" +#include "cxxopts.hpp" +#include "experimental/learn_descriptors/camera_calibration.hh" +#include "experimental/learn_descriptors/four_seasons_parser.hh" +#include "experimental/learn_descriptors/frame.hh" +#include "experimental/learn_descriptors/frontend.hh" +#include "experimental/learn_descriptors/frontend_definitions.hh" +#include "experimental/learn_descriptors/structure_from_motion_types.hh" +#include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/Point2.h" +#include "gtsam/geometry/Point3.h" +#include "gtsam/geometry/Pose3.h" +#include "gtsam/geometry/Rot3.h" +#include "gtsam/geometry/triangulation.h" +#include "gtsam/inference/Symbol.h" +#include "gtsam/linear/NoiseModel.h" +#include "gtsam/navigation/GPSFactor.h" +#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" +#include "gtsam/nonlinear/NonlinearFactorGraph.h" +#include "gtsam/nonlinear/Values.h" +#include "gtsam/slam/BetweenFactor.h" +#include "gtsam/slam/PriorFactor.h" +#include "gtsam/slam/ProjectionFactor.h" +#include "visualization/opencv/opencv_viz.hh" + +int main(int argc, const char **argv) { + using namespace robot::experimental::learn_descriptors; + // clang-format off + cxxopts::Options options("four_seasons_parser_example", "Demonstrate usage of four_seasons_parser"); + options.add_options() + ("data_dir", "Path to dataset root directory", cxxopts::value()) + ("calibration_dir", "Path to dataset calibration directory", cxxopts::value()) + ("help", "Print usage"); + // clang-format on + + auto args = options.parse(argc, argv); + + const auto check_required = [&](const std::string &opt) { + if (args.count(opt) == 0) { + std::cout << "Missing " << opt << " argument" << std::endl; + std::cout << options.help() << std::endl; + std::exit(1); + } + }; + + if (args.count("help")) { + std::cout << options.help() << std::endl; + return 0; + } + + check_required("data_dir"); + check_required("calibration_dir"); + + const std::filesystem::path path_data = args["data_dir"].as(); + const std::filesystem::path path_calibration = args["calibration_dir"].as(); + FourSeasonsParser parser(path_data, path_calibration); + + std::cout << "refactor_sfm_mvp!" << std::endl; + // const std::vector indices{581, 609, + // 633}; // indices with all data fields on neighborhood_5_train + // const std::vector indices{581, 593, 609, 621, 633, 636, 639, 642}; // indices with + // get indices with fully populated img_pt containing full gps data and reference + // const std::vector indices = [&parser]() -> std::vector { + // std::vector tmp; + // for (size_t i = 0; i < parser.num_images(); i++) { + // const ImagePointFourSeasons img_pt = parser.get_image_point(i); + // if (img_pt.AS_w_from_gnss_cam && img_pt.gps_gcs && img_pt.gps_gcs && + // img_pt.gps_gcs->uncertainty && img_pt.gps_gcs->altitude) + // tmp.push_back(i); + // } + // return tmp; + // }(); + constexpr bool visualize = false; + // all data fields on neighborhood_5_train const std::vector indices = []() { + // std::vector tmp; + // for (int i = 660; i < 681; i += 10) { + // tmp.push_back(i); + // } + // return tmp; + // }(); + // FourSeasonsParser parser() + // const size_t img_width = img_pt_first.width, img_height = + // img_pt_first.height; + + // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, + // gtsam::Pose3(T_world_camera0.matrix())); + FrontendParams params{FrontendParams::ExtractorType::SIFT, FrontendParams::MatcherType::KNN, + true, false}; + Frontend frontend(params); + + for (size_t i = 660, i < 1000; i += 5) { + frontend.add_images(ImageAndPoint{parser.load_image(i), parser.get_image_point(i)}); + } + frontend.populate_frames(); + frontend.match_frames_and_build_tracks(); + + // ############# BACKEND ############### + gtsam::Values initial_estimate_; + gtsam::NonlinearFactorGraph graph_; + + gtsam::noiseModel::Isotropic::shared_ptr landmark_noise = + gtsam::noiseModel::Isotropic::Sigma(2, 1.0); + gtsam::Vector6 prior_sigmas; + prior_sigmas << 0.04, 0.04, 0.09, 2.1, 2.1, 0.1; + gtsam::Vector3 gps_sigmas; + gps_sigmas << 2.1, 2.1, 0.1; + gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise = + gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas); + gtsam::noiseModel::Diagonal::shared_ptr gps_noise = + gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas); + + // add gps factors + const std::vector &frames = frontned.frames(); + graph.add(PriorFactor(Symbol('x', 0), gtsam::Pose3::Identity(), + noiseModel::Constrained::All(6) // effectively fixes the pose + )); + initial_estimate_.insert(Symbol('x', 0), gtsam::Pose3::Identity()); + for (size_t i = 1; i < frames.size(); i++) { + if (frames[i].) graph.add(gtsam::GPSFactor(Symbol('x', i), )); + } + + // add filtered points to graph + std::unordered_map> symbols_poses_values_iter; + std::unordered_map> symbols_landmarks_values_iter; + std::vector symbols_pose; + std::vector symbols_landmarks; + for (const auto &[lmk_id, kpt_in_world] : lmk_triangulated_map_filtered) { + // LandmarkId lmk_id = lmk_id_pt.first; + // const gtsam::Point3 kpt_in_world = lmk_id_pt.second; + FeatureTrack feature_track = feature_tracks.at(lmk_id); + const gtsam::Symbol symbol_lmk('l', lmk_id); + for (const auto &[frame_id, obs] : feature_track.obs_) { + initial_estimate_.insert_or_assign(symbol_lmk, kpt_in_world); + symbols_landmarks.push_back(symbol_lmk); + symbols_landmarks_values_iter.emplace(symbol_lmk, + std::vector{kpt_in_world}); + graph_.emplace_shared>( + gtsam::Point2(obs.x, obs.y), landmark_noise, gtsam::Symbol('x', frame_id), + symbol_lmk, K); + } + } + + // add gps factors + graph_.emplace_shared>( + gtsam::Symbol('x', 0), id_to_initial_world_from_cam.at(0), prior_pose_noise); + initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), id_to_initial_world_from_cam.at(0)); + symbols_pose.push_back(gtsam::Symbol('x', 0)); + symbols_poses_values_iter.emplace( + gtsam::Symbol('x', 0), std::vector{id_to_initial_world_from_cam.at(0)}); + for (const auto &[frame_id, estimate_world_from_cam] : id_to_initial_world_from_cam) { + if (frame_id == 0) { + continue; + } + const gtsam::Symbol key_cam('x', frame_id); + symbols_pose.push_back(key_cam); + gtsam::Point3 p_cam_in_world = estimate_world_from_cam.translation(); + graph_.emplace_shared(key_cam, p_cam_in_world, gps_noise); + initial_estimate_.insert_or_assign(key_cam, estimate_world_from_cam); + symbols_poses_values_iter.emplace(key_cam, + std::vector{estimate_world_from_cam}); + // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_cam_in_world)); + } + + // detail_sfm::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks); + std::cout << "heart beat 3" << std::endl; + + constexpr bool local_optimizations = false; + if (local_optimizations) { + // TODO: do local optimizations between groups of any n nubmer of cameras with >= x number + // of matches + // do local optimizations and add to iter cache + // const int window = 2; + std::cout << "length of cam_poses: " << id_to_initial_world_from_cam.size() << std::endl; + for (const auto &[frame_id, world_from_cam] : id_to_initial_world_from_cam) { + std::cout << "id: " << frame_id << "\tpose: " << world_from_cam << std::endl; + } + for (size_t i = 0; i < indices.size() - 1; i++) { + std::cout << "Local optimization " << i << std::endl; + gtsam::Values local_estimate_; + gtsam::NonlinearFactorGraph local_graph_; + + std::vector symbols_poses{gtsam::Symbol('x', i), + gtsam::Symbol('x', i + 1)}; + std::vector symbols_lmks; + + local_graph_.emplace_shared>( + gtsam::PriorFactor( + symbols_poses[0], id_to_initial_world_from_cam.at(i), prior_pose_noise)); + std::cout << "fuck" << std::endl; + local_graph_.emplace_shared>( + gtsam::PriorFactor( + symbols_poses[1], id_to_initial_world_from_cam.at(i + 1), prior_pose_noise)); + std::cout << "mega fuck" << std::endl; + local_estimate_.insert_or_assign(symbols_poses[0], id_to_initial_world_from_cam.at(i)); + local_estimate_.insert_or_assign(symbols_poses[1], + id_to_initial_world_from_cam.at(i + 1)); + + std::vector matches = frontend.compute_matches( + frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + // DIAL TO MESS WITH + frontend.enforce_bijective_matches(matches); + std::vector world_from_cams{id_to_initial_world_from_cam.at(i), + id_to_initial_world_from_cam.at(i + 1)}; + + std::vector viz_world_from_cams{ + {Eigen::Isometry3d(world_from_cams[0].matrix()), "cam 0"}, + {Eigen::Isometry3d(world_from_cams[1].matrix()), "cam 1"}}; + std::vector viz_lmks; + for (const cv::DMatch match : matches) { + std::vector feat_kpts; + const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; + feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y); + feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y); + + const std::pair key_lmk_id = + std::make_pair(frames[i].id_, kpt_cam0); + if (lmk_id_map.find(key_lmk_id) != lmk_id_map.end()) { + LandmarkId match_lmk_id = lmk_id_map.at(key_lmk_id); + if (lmk_triangulated_map_filtered.find(match_lmk_id) == + lmk_triangulated_map_filtered.end()) { + continue; + } + std::cout << "good" << std::endl; + // do nothing + // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0); + // feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1); + } else { + std::cerr << "ERROR: this shouldn't happen right?" << std::endl; + FeatureTrack feature_track(frames[i].id_, kpt_cam0); + feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); + feature_tracks.emplace(lmk_id, feature_track); + lmk_id_map.emplace(key_lmk_id, lmk_id); + lmk_id++; + } + std::cout << "oog" << std::endl; + const gtsam::Symbol symbol_lmk = gtsam::Symbol('l', lmk_id_map.at(key_lmk_id)); + // if (gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))) != + // gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam1)))) { + // std::cerr << "UH OH" << std::endl; + // } else { + // std::cout << "cool" << std::endl; + // } + symbols_lmks.push_back(symbol_lmk); + local_graph_ + .emplace_shared>( + feat_kpts[0], landmark_noise, symbols_poses[0], symbol_lmk, K); + local_graph_ + .emplace_shared>( + feat_kpts[1], landmark_noise, symbols_poses[1], symbol_lmk, K); + + std::optional kpt_in_world = + detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K); + // gtsam::Point3 kpt_in_world; + // bool triangulate_success = + // detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world); + if (kpt_in_world) { + local_estimate_.insert_or_assign(symbol_lmk, *kpt_in_world); + viz_lmks.emplace_back(*kpt_in_world); + if (symbols_landmarks_values_iter.find(symbol_lmk) != + symbols_landmarks_values_iter.end()) { + symbols_landmarks_values_iter[symbol_lmk].push_back(*kpt_in_world); + } else { + symbols_landmarks_values_iter.emplace( + symbol_lmk, std::vector{*kpt_in_world}); + } + } + } + std::cout << "setup complete!" << std::endl; + if (visualize) + robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, cv::viz::Color::brown(), + true, true, "Local Optimization " + std::to_string(i)); + + const gtsam::Values symbols_result_local = detail_sfm::optimize_graph( + local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false); + + for (const gtsam::Symbol &symbol_pose : symbols_poses) { + const gtsam::Pose3 T_wrld_cam = symbols_result_local.at(symbol_pose); + symbols_poses_values_iter.at(symbol_pose).push_back(T_wrld_cam); + } + for (const gtsam::Symbol &symbol_lmk : symbols_lmks) { + const gtsam::Point3 p_wrld_lmk = symbols_result_local.at(symbol_lmk); + symbols_landmarks_values_iter.at(symbol_lmk).push_back(p_wrld_lmk); + } + } + + std::cout << "\nLocal Optimizations Complete!\n" << std::endl; + + for (std::pair> sym_pose : + symbols_poses_values_iter) { + initial_estimate_.insert_or_assign(sym_pose.first, + detail_sfm::averagePoses(sym_pose.second)); + } + for (std::pair> sym_lmk : + symbols_landmarks_values_iter) { + initial_estimate_.insert_or_assign(sym_lmk.first, + detail_sfm::averagePoints(sym_lmk.second)); + } + } + + // do global optimization + const gtsam::Values result = + detail_sfm::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks, 0); + + // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference + double sum_traj_error = 0; + double sum_rot_error = 0; + for (size_t i = 0; i < symbols_pose.size(); i++) { + const gtsam::Pose3 traj_pose = result.at(symbols_pose[i]); + sum_traj_error += std::pow( + (references_world_from_cam[i].translation() - traj_pose.translation()).norm(), 2); + sum_rot_error += detail_sfm::rotation_error(references_world_from_cam[i], + Eigen::Isometry3d(traj_pose.matrix())); + } + std::cout << "sum_rot_error: " << sum_rot_error << std::endl; + double rmse_ate = std::sqrt(sum_traj_error / symbols_pose.size()); + double rmse_rot = std::sqrt(sum_rot_error / symbols_pose.size()); + std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl; + + std::cout << "about to visualize result" << std::endl; + result.print(); + detail_sfm::graph_values(result, "Result", symbols_pose, std::vector()); + // detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks); +} \ No newline at end of file diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc index b4f98a01c..a63015b66 100644 --- a/experimental/learn_descriptors/sfm_mvp.cc +++ b/experimental/learn_descriptors/sfm_mvp.cc @@ -235,7 +235,7 @@ TEST(SFMMvp, sfm_building_manual_global) { // graph_.emplace_shared>(gtsam::Symbol('x', id), // T_world_cam_gtsam, pose_noise); - Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix())); + Frame frame{id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix())}; std::pair, cv::Mat> kpts_descs = frontend.extract_features(img_undistorted); diff --git a/experimental/learn_descriptors/spatial_test_scene_cube_example.cc b/experimental/learn_descriptors/spatial_test_scene_cube_example.cc index ad1130338..c4f56d736 100644 --- a/experimental/learn_descriptors/spatial_test_scene_cube_example.cc +++ b/experimental/learn_descriptors/spatial_test_scene_cube_example.cc @@ -1,8 +1,7 @@ -#include "experimental/learn_descriptors/spatial_test_scene_cube.hh" - #include #include "Eigen/Dense" +#include "experimental/learn_descriptors/spatial_test_scene_cube.hh" #include "gtest/gtest.h" #include "visualization/opencv/opencv_viz.hh" diff --git a/experimental/learn_descriptors/structure_from_motion_types.hh b/experimental/learn_descriptors/structure_from_motion_types.hh index 582a3c70a..e8d0595bc 100644 --- a/experimental/learn_descriptors/structure_from_motion_types.hh +++ b/experimental/learn_descriptors/structure_from_motion_types.hh @@ -1,16 +1,16 @@ #pragma once -#include +#include #include #include "gtsam/geometry/Point3.h" #include "opencv2/opencv.hpp" -using Timestamp = std::int64_t; +using Timestamp = size_t; -using FrameId = std::uint64_t; // Frame id is used as the index of gtsam symbol - // (not as a gtsam key). -using LandmarkId = long; // -1 for invalid landmarks +using FrameId = size_t; // Frame id is used as the index of gtsam symbol + // (not as a gtsam key). +using LandmarkId = size_t; using LandmarkIds = std::vector; using Landmark = gtsam::Point3; using Landmarks = std::vector>; From 241aefe9d880eaeedd6989f294f0bccd9ef44305 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Fri, 11 Jul 2025 15:08:25 -0400 Subject: [PATCH 31/45] WIP --- experimental/learn_descriptors/BUILD | 25 + .../learn_descriptors/camera_calibration.hh | 2 +- .../learn_descriptors/four_seasons_parser.cc | 17 +- .../four_seasons_parser_detail.cc | 5 +- .../four_seasons_parser_example_viz.cc | 7 +- experimental/learn_descriptors/frame.hh | 14 +- experimental/learn_descriptors/frontend.cc | 134 ++--- .../learn_descriptors/frontend_definitions.hh | 3 +- .../image_point_four_seasons.cc | 11 +- .../learn_descriptors/incremental_sfm_mvp.cc | 41 +- .../learn_descriptors/refactor_sfm_mvp.cc | 456 +++++++++--------- experimental/learn_descriptors/sfm_mvp.cc | 18 +- 12 files changed, 366 insertions(+), 367 deletions(-) diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index facd64bdf..2c647712b 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -347,6 +347,7 @@ cc_library( ":image_point_four_seasons", ":structure_from_motion_types", "//common:check", + "//common/geometry:camera", "@opencv", "@gtsam", "@eigen", @@ -437,4 +438,28 @@ cc_library( deps = [ "@opencv" ] +) + +cc_binary( + name = "refactor_sfm_mvp", + srcs = ["refactor_sfm_mvp.cc"], + copts = [ + "-Wno-error=unused-parameter", + "-Wno-error=unused-function", + ], + deps = [ + ":four_seasons_parser", + ":frame", + ":frontend", + ":spatial_test_scene_cube", + ":structure_from_motion_types", + "//common/geometry:camera", + "//visualization/opencv:opencv_viz", + "@boost//:smart_ptr", + "@cxxopts", + "@eigen", + "@gtsam", + "@opencv", + ":camera_calibration" + ], ) \ No newline at end of file diff --git a/experimental/learn_descriptors/camera_calibration.hh b/experimental/learn_descriptors/camera_calibration.hh index cfbd6fb42..1e985acad 100644 --- a/experimental/learn_descriptors/camera_calibration.hh +++ b/experimental/learn_descriptors/camera_calibration.hh @@ -7,7 +7,7 @@ struct CameraCalibrationFisheye { double fx, fy, cx, cy, k1, k2, k3, k4; cv::Mat k_mat() { - cv::Mat K_mat = (cv::Mat_(3, 3) << fx, 0, cx, 0, fy, py, 0, 0, 1); + cv::Mat K_mat = (cv::Mat_(3, 3) << fx, 0, cx, 0, fy, cy, 0, 0, 1); return K_mat; } diff --git a/experimental/learn_descriptors/four_seasons_parser.cc b/experimental/learn_descriptors/four_seasons_parser.cc index a295851f0..0e2eb2be1 100644 --- a/experimental/learn_descriptors/four_seasons_parser.cc +++ b/experimental/learn_descriptors/four_seasons_parser.cc @@ -52,7 +52,6 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, ImagePointFourSeasons img_pt; img_pt.id = id; img_pt.K = cal_; - std::cout << "heartbeat 5" << std::endl; img_pt.shared_static_transforms = shared_transforms_; img_pt.seq = std::stoull( pair_time_data.second[static_cast(txt_parser_help::ImgIdx::TIME_NS)]); @@ -77,10 +76,10 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, std::stod( parsed_line_gnss_poses[static_cast(txt_parser_help::GPSIdx::QUAT_Z)])); img_pt.AS_w_from_gnss_cam = liegroups::SE3(R_gps_cam_from_AS_w, t_gps_cam_from_AS_w); - } else { - std::clog << "There is no AS_w_from_gnss_cam data at img_pt with id: " << id - << std::endl; - } + } // else { + // std::clog << "There is no AS_w_from_gnss_cam data at img_pt with id: " << id + // << std::endl; + // } if (vio_poses_time_map.find(time_key) != vio_poses_time_map.end()) { const std::vector& parsed_line_vio = vio_poses_time_map.at(time_key); Eigen::Vector3d t_AS_w_from_vio_cam( @@ -95,10 +94,10 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, std::stod( parsed_line_vio[static_cast(txt_parser_help::ResultIdx::QUAT_Z)])); img_pt.AS_w_from_vio_cam = liegroups::SE3(R_AS_w_from_vio_cam, t_AS_w_from_vio_cam); - } else { - std::clog << "There is no AS_w_from_vio_cam data at img_pt with id: " << id - << std::endl; - } + } // else { + // std::clog << "There is no AS_w_from_vio_cam data at img_pt with id: " << id + // << std::endl; + // } img_pt_vector_.push_back(img_pt); id++; } diff --git a/experimental/learn_descriptors/four_seasons_parser_detail.cc b/experimental/learn_descriptors/four_seasons_parser_detail.cc index d9dc824a8..65dac2893 100644 --- a/experimental/learn_descriptors/four_seasons_parser_detail.cc +++ b/experimental/learn_descriptors/four_seasons_parser_detail.cc @@ -111,6 +111,7 @@ const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio } // namespace txt_parser_help namespace gps_parser_help { + size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds) { std::chrono::sys_days date = std::chrono::year_month_day{ std::chrono::year{utc_date.year + 2000}, std::chrono::month{utc_date.month}, @@ -120,6 +121,7 @@ size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_da std::chrono::sys_time timestamp = date + utc_time_day_ns; return timestamp.time_since_epoch().count(); } + TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps) { TimeGPSList time_list_gps; std::ifstream file_gps(path_gps); @@ -187,12 +189,14 @@ std::vector split_nmea_sentence(const std::string& sentence) { return fields; } + double time_of_day_seconds(const double utc_time_hhmmss) { int hours = static_cast(utc_time_hhmmss / 10000); int minutes = static_cast((utc_time_hhmmss - hours * 10000) / 100); double seconds = utc_time_hhmmss - hours * 10000 - minutes * 100; return hours * 3600 + minutes * 60 + seconds; } + std::optional parse_gpgst(const std::string& sentence) { if (sentence.substr(0, 6) != "$GPGST") { return std::nullopt; @@ -218,6 +222,5 @@ std::optional parse_gpgst(const std::string& sentence) { return gst; } - } // namespace gps_parser_help } // namespace robot::experimental::learn_descriptors::detail::four_seasons_parser \ No newline at end of file diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc index 9befd4f2a..88511a6e3 100644 --- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc +++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc @@ -69,7 +69,6 @@ int main(int argc, const char** argv) { scale_mat * Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix())); - viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i)); viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i)); if (img_pt.gps_gcs) { if (img_pt.gps_gcs->seq > img_pt.seq) { @@ -105,7 +104,11 @@ int main(int argc, const char** argv) { Eigen::Vector4d gps_in_w = Eigen::Matrix4d((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix()) * ECEF_from_gps_hom; - viz_points.emplace_back(gps_in_w.head<3>(), "x_gps_" + std::to_string(i)); + const Eigen::Isometry3d cam_from_gps( + (parser.cam_from_imu() * parser.gps_from_imu().inverse()).matrix()); + Eigen::Vector3d cam_gps_in_w = gps_in_w.head<3>() + cam_from_gps.translation(); + viz_points.emplace_back(cam_gps_in_w, "x_cam_gps" + std::to_string(i)); + viz_points.emplace_back(gps_in_w.head<3>(), "x_gps" + std::to_string(i)); const Eigen::Vector3d gps_from_ref_in_world = gps_in_w.head<3>() - w_from_gnss_cam.translation(); std::cout << "gps_from_ref_in_world: " << gps_from_ref_in_world << std::endl; diff --git a/experimental/learn_descriptors/frame.hh b/experimental/learn_descriptors/frame.hh index 322a823ac..70be07d27 100644 --- a/experimental/learn_descriptors/frame.hh +++ b/experimental/learn_descriptors/frame.hh @@ -11,11 +11,19 @@ namespace robot::experimental::learn_descriptors { class Frame { public: + Frame(FrameId id, const cv::Mat img, gtsam::Cal3_S2::shared_ptr K, const KeypointsCV kpts, + const cv::Mat descriptors) + : id_(id), + undistorted_img_(img), + K_(std::move(K)), + kpts_(kpts), + descriptors_(descriptors) {} + void add_keypoints(const KeypointsCV& kpts); void assign_descriptors(const cv::Mat& descriptors); - const KeypointsCV get_keypoints() { return kpts_; }; - const cv::Mat get_descriptors() { return descriptors_; }; + const KeypointsCV& keypoint() { return kpts_; }; + const cv::Mat& descriptors() { return descriptors_; }; const FrameId id_; const cv::Mat undistorted_img_; @@ -24,7 +32,7 @@ class Frame { cv::Mat descriptors_; std::optional world_from_cam_groundtruth_; std::optional cam_in_world_initial_guess_; - std::optional translation_covariance_in_cam_; std::optional world_from_cam_initial_guess_; + std::optional translation_covariance_in_cam_; }; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc index dfe24fa1d..8b7d5077a 100644 --- a/experimental/learn_descriptors/frontend.cc +++ b/experimental/learn_descriptors/frontend.cc @@ -1,5 +1,6 @@ #include "experimental/learn_descriptors/frontend.hh" +#include #include #include #include @@ -9,10 +10,13 @@ #include "Eigen/Core" #include "Eigen/Geometry" #include "common/check.hh" +#include "common/geometry/camera.hh" #include "experimental/learn_descriptors/frame.hh" #include "experimental/learn_descriptors/image_point.hh" #include "experimental/learn_descriptors/structure_from_motion_types.hh" #include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/Pose3.h" +#include "gtsam/geometry/triangulation.h" #include "opencv2/opencv.hpp" namespace detail_sfm { @@ -77,62 +81,6 @@ std::optional attempt_triangulate(const std::vector return p_lmk_in_world; } -void graph_values(const gtsam::Values &values, const std::string &window_name, - const std::vector &symbols_pose, - const std::vector &symbols_landmarks) { - std::vector final_poses; - std::vector final_lmks; - for (const gtsam::Symbol &symbol_pose : symbols_pose) { - final_poses.emplace_back(Eigen::Isometry3d(values.at(symbol_pose).matrix()), - symbol_pose.string()); - } - for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) { - if (!values.exists(symbol_lmk)) { - std::cout << "WTF " << symbol_lmk << std::endl; - } - final_lmks.emplace_back(values.at(symbol_lmk), symbol_lmk.string()); - } - std::cout << "About to viz gtsam::Values with " << values.size() << " variables." << std::endl; - robot::geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true, - window_name); -} - -gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gtsam::Values &values, - const std::vector &symbols_pose, - const std::vector &symbols_landmarks, - const int num_epochs = 5, bool viz_itr = false) { - gtsam::LevenbergMarquardtParams params; - params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. - params.maxIterations = 1; - gtsam::LevenbergMarquardtOptimizer optimizer(graph, values, params); - - double prev_error = optimizer.error(); - typedef int epoch; - std::function &, - const std::vector &)> - graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter, - const std::vector &symbols_pose, - const std::vector &symbols_landmarks) { - std::cout << "iteration " << iter << " complete!"; - std::string window_name = "Iteration_" + std::to_string(iter); - graph_values(vals, window_name, symbols_pose, symbols_landmarks); - }; - - for (int i = 0; i < num_epochs; i++) { - optimizer.iterate(); - double curr_error = optimizer.error(); - - if (viz_itr) { - graph_itr_debug_func(optimizer.values(), i, symbols_pose, symbols_landmarks); - } - if (std::abs(prev_error - curr_error) < 1e-6) { - std::cout << "Converged at iteration " << i << "\n"; - break; - } - } - return optimizer.values(); -} - gtsam::Pose3 averagePoses(const std::vector &poses, int maxIter = 10) { if (poses.empty()) throw std::runtime_error("Empty pose vector"); @@ -219,31 +167,39 @@ Frontend::Frontend(FrontendParams params_) : params_(params_) { void Frontend::populate_frames() { FrameId id = frames_.size(); - for (const ImageAndPoint &img_and_pt : image_and_points_) { + for (const ImageAndPoint &img_and_pt : images_and_points_) { const ImagePoint &img_pt = img_and_pt.img_pt; - cv::Mat &img_undistorted; - cv::fisheye::undistortImage(img_and_pt.image_distorted, img_undistorted, img_pt.K->K_mat(), - img_pt.K->D_mat(), img_pt.K->K_mat(), img.size()); - auto [kpts, features] = extract_feaures(img_undistorted); + cv::Mat img_undistorted; + cv::fisheye::undistortImage(img_and_pt.image_distorted, img_undistorted, img_pt.K->k_mat(), + img_pt.K->d_mat(), img_pt.K->k_mat(), + img_and_pt.image_distorted.size()); + auto [kpts, descriptors] = extract_features(img_undistorted); + KeypointsCV kpts_cv; + for (const cv::KeyPoint &kpt : kpts) { + kpts_cv.push_back(kpt.pt); + } gtsam::Cal3_S2::shared_ptr K = boost::make_shared( img_pt.K->fx, img_pt.K->fy, 0, img_pt.K->cx, img_pt.K->cy); - Frame frame{id, img_undistorted, K, kpts, features}; + if (!K) std::cout << "UH OH" << std::endl; + + Frame frame(id, img_undistorted, K, kpts_cv, descriptors); const std::optional maybe_world_from_cam_grnd_trth = img_pt.world_from_cam_ground_truth(); if (maybe_world_from_cam_grnd_trth) { - frame.world_from_cam_ground_truth = + frame.world_from_cam_groundtruth_ = gtsam::Pose3(maybe_world_from_cam_grnd_trth->matrix()); } const std::optional maybe_cam_in_world = img_pt.cam_in_world(); if (maybe_cam_in_world) { - frame.cam_in_world_initial_guess = *maybe_cam_in_world; + frame.cam_in_world_initial_guess_ = *maybe_cam_in_world; } + if (id == 0) frame.world_from_cam_initial_guess_ = gtsam::Rot3::Identity(); const std::optional maybe_translation_covariance_in_cam = img_pt.translation_covariance_in_cam(); if (maybe_translation_covariance_in_cam) { - frame.translation_covariance_in_cam = *maybe_translation_covariance_in_cam; + frame.translation_covariance_in_cam_ = *maybe_translation_covariance_in_cam; } frames_.push_back(frame); id++; @@ -254,27 +210,32 @@ void Frontend::match_frames_and_build_tracks() { if (params_.exhaustive) { for (size_t i = 0; i < frames_.size() - 1; i++) { for (size_t j = i + 1; j < frames_.size(); j++) { - std::vector matches = frontend.compute_matches( - frames_[i].get_descriptors(), frames_[j].get_descriptors()); - frontend.enforce_bijective_buffer_matches(matches); + std::vector matches = + compute_matches(frames_[i].descriptors(), frames_[j].descriptors()); + enforce_bijective_buffer_matches(matches); if (matches.size() < 5) { continue; } std::vector cv_kpts_1; std::vector cv_kpts_2; - for (const KeypointCV &kpt : frames_[i].get_keypoints()) { + for (const KeypointCV &kpt : frames_[i].keypoint()) { cv::KeyPoint cv_kpt; cv_kpt.pt = kpt; cv_kpts_1.push_back(cv_kpt); } - for (const KeypointCV &kpt : frames_[j].get_keypoints()) { + for (const KeypointCV &kpt : frames_[j].keypoint()) { cv::KeyPoint cv_kpt; cv_kpt.pt = kpt; cv_kpts_2.push_back(cv_kpt); } std::optional scale_cam0_from_cam1 = - robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat); + robot::geometry::estimate_cam0_from_cam1( + cv_kpts_1, cv_kpts_2, matches, + images_and_points_[i] + .img_pt.K->k_mat()); // fine for now since all cameras have the same K, + // how to reconcile later though...? + if (!scale_cam0_from_cam1) { continue; } @@ -286,12 +247,12 @@ void Frontend::match_frames_and_build_tracks() { // measurement and the unit translation vector from estimate_cam0_from_cam1 // can try averaging poses here as well - frames[j].world_from_cam_initial_guess_.emplace( + frames_[j].world_from_cam_initial_guess_.emplace( frames_[i].world_from_cam_initial_guess_->matrix() * - scale_cam0_from_cam1->matrix()); + scale_cam0_from_cam1->linear().matrix()); for (const cv::DMatch match : matches) { - const KeypointCV kpt_cam0 = frames_[i].get_keypoints()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames_[j].get_keypoints()[match.trainIdx]; + const KeypointCV kpt_cam0 = frames_[i].keypoint()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames_[j].keypoint()[match.trainIdx]; auto key = std::make_pair(frames_[i].id_, kpt_cam0); if (lmk_id_map_.find(key) != lmk_id_map_.end()) { @@ -311,28 +272,29 @@ void Frontend::match_frames_and_build_tracks() { } } } else { // successive only - for (size_t i = 0; i < indices.size() - 1; i++) { - std::vector matches = frontend.compute_matches( - frames_[i].get_descriptors(), frames_[i + 1].get_descriptors()); - frontend.enforce_bijective_buffer_matches(matches); + for (size_t i = 0; i < frames_.size() - 1; i++) { + std::vector matches = + compute_matches(frames_[i].descriptors(), frames_[i + 1].descriptors()); + enforce_bijective_buffer_matches(matches); if (matches.size() < 5) { continue; } std::vector cv_kpts_1; std::vector cv_kpts_2; - for (const KeypointCV &kpt : frames_[i].get_keypoints()) { + for (const KeypointCV &kpt : frames_[i].keypoint()) { cv::KeyPoint cv_kpt; cv_kpt.pt = kpt; cv_kpts_1.push_back(cv_kpt); } - for (const KeypointCV &kpt : frames_[i + 1].get_keypoints()) { + for (const KeypointCV &kpt : frames_[i + 1].keypoint()) { cv::KeyPoint cv_kpt; cv_kpt.pt = kpt; cv_kpts_2.push_back(cv_kpt); } std::optional scale_cam0_from_cam1 = - robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat); + robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, + images_and_points_[i].img_pt.K->k_mat()); if (!scale_cam0_from_cam1) { continue; } @@ -340,11 +302,11 @@ void Frontend::match_frames_and_build_tracks() { "This rotation should be populated."); frames_[i + 1].world_from_cam_initial_guess_.emplace( frames_[i].world_from_cam_initial_guess_->matrix() * - scale_cam0_from_cam1->matrix()); + scale_cam0_from_cam1->linear().matrix()); for (const cv::DMatch match : matches) { - const KeypointCV kpt_cam0 = frames_[i].get_keypoints()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames_[i + 1].get_keypoints()[match.trainIdx]; + const KeypointCV kpt_cam0 = frames_[i].keypoint()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames_[i + 1].keypoint()[match.trainIdx]; auto key = std::make_pair(frames_[i].id_, kpt_cam0); if (lmk_id_map_.find(key) != lmk_id_map_.end()) { @@ -356,7 +318,7 @@ void Frontend::match_frames_and_build_tracks() { } else { FeatureTrack feature_track(i, kpt_cam0); feature_track.obs_.emplace_back(frames_[i + 1].id_, kpt_cam1); - feature_tracks_.emplace(lmk_id, feature_track); + feature_tracks_.push_back(feature_track); lmk_id_map_.emplace(std::make_pair(frames_[i].id_, kpt_cam0), feature_tracks_.size() - 1); } diff --git a/experimental/learn_descriptors/frontend_definitions.hh b/experimental/learn_descriptors/frontend_definitions.hh index e2822ef42..1a96aa6bb 100644 --- a/experimental/learn_descriptors/frontend_definitions.hh +++ b/experimental/learn_descriptors/frontend_definitions.hh @@ -5,6 +5,7 @@ #include #include "experimental/learn_descriptors/structure_from_motion_types.hh" +#include "gtsam/geometry/Point3.h" #include "opencv2/opencv.hpp" namespace std { @@ -29,7 +30,7 @@ class FeatureTrack { void print() const { std::cout << "Feature track with cameras: "; - for (size_t i = 0u; i < obs_.size(); i++) { + for (size_t i = 0; i < obs_.size(); i++) { std::cout << " " << obs_[i].first << " "; } std::cout << std::endl; diff --git a/experimental/learn_descriptors/image_point_four_seasons.cc b/experimental/learn_descriptors/image_point_four_seasons.cc index 73f531d93..2ad0468fa 100644 --- a/experimental/learn_descriptors/image_point_four_seasons.cc +++ b/experimental/learn_descriptors/image_point_four_seasons.cc @@ -27,10 +27,13 @@ std::optional ImagePointFourSeasons::cam_in_world() const { if (!gps_gcs) return std::nullopt; const Eigen::Vector3d gps_in_ECEF(gps::ecef_from_lla( Eigen::Vector3d(gps_gcs->latitude, gps_gcs->longitude, *gps_gcs->altitude))); - Eigen::Vector3d cam_in_world(shared_static_transforms->cam_from_imu * - shared_static_transforms->gps_from_imu.inverse() * - shared_static_transforms->w_from_gpsw * - shared_static_transforms->e_from_gpsw.inverse() * gps_in_ECEF); + const Eigen::Vector3d gps_in_world(shared_static_transforms->w_from_gpsw * + shared_static_transforms->e_from_gpsw.inverse() * + gps_in_ECEF); + const Eigen::Isometry3d cam_from_gps( + (shared_static_transforms->cam_from_imu * shared_static_transforms->gps_from_imu.inverse()) + .matrix()); + const Eigen::Vector3d cam_in_world(gps_in_world + cam_from_gps.translation()); return cam_in_world; } diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc index 2e056b71a..bafd667b5 100644 --- a/experimental/learn_descriptors/incremental_sfm_mvp.cc +++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc @@ -302,13 +302,14 @@ int main(int argc, const char **argv) { // FourSeasonsParser parser() // const size_t img_width = img_pt_first.width, img_height = // img_pt_first.height; - const CameraCalibrationFisheye cal_parser_left_cam = parser.camera_calibration(); + const std::shared_ptr cal_parser_left_cam = + parser.camera_calibration(); gtsam::Cal3_S2::shared_ptr K = - boost::make_shared(cal_parser_left_cam.fx, cal_parser_left_cam.fy, 0, - cal_parser_left_cam.cx, cal_parser_left_cam.cy); + boost::make_shared(cal_parser_left_cam->fx, cal_parser_left_cam->fy, 0, + cal_parser_left_cam->cx, cal_parser_left_cam->cy); cv::Mat K_mat = (cv::Mat_(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1); - cv::Mat D_mat = (cv::Mat_(4, 1) << cal_parser_left_cam.k1, cal_parser_left_cam.k2, - cal_parser_left_cam.k3, cal_parser_left_cam.k4); + cv::Mat D_mat = (cv::Mat_(4, 1) << cal_parser_left_cam->k1, cal_parser_left_cam->k2, + cal_parser_left_cam->k3, cal_parser_left_cam->k4); // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, // gtsam::Pose3(T_world_camera0.matrix())); @@ -435,8 +436,8 @@ int main(int argc, const char **argv) { // std::cout << "i: " << i << std::endl; for (size_t j = i + 1; j < indices.size(); j++) { // std::cout << "j: " << j << std::endl; - std::vector matches = frontend.compute_matches( - frames[i].get_descriptors(), frames[j].get_descriptors()); + std::vector matches = + frontend.compute_matches(frames[i].descriptors(), frames[j].descriptors()); // DIAL TO MESS WITH frontend.enforce_bijective_buffer_matches(matches); @@ -446,12 +447,12 @@ int main(int argc, const char **argv) { std::vector cv_kpts_1; std::vector cv_kpts_2; - for (const KeypointCV &kpt : frames[i].get_keypoints()) { + for (const KeypointCV &kpt : frames[i].keypoint()) { cv::KeyPoint cv_kpt; cv_kpt.pt = kpt; cv_kpts_1.push_back(cv_kpt); } - for (const KeypointCV &kpt : frames[j].get_keypoints()) { + for (const KeypointCV &kpt : frames[j].keypoint()) { cv::KeyPoint cv_kpt; cv_kpt.pt = kpt; cv_kpts_2.push_back(cv_kpt); @@ -470,8 +471,8 @@ int main(int argc, const char **argv) { // std::cout << "heartbeat 2" << std::endl; id_to_initial_world_from_cam.at(j) = gtsam::Pose3(world_from_camj.matrix()); for (const cv::DMatch match : matches) { - const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames[j].get_keypoints()[match.trainIdx]; + const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames[j].keypoint()[match.trainIdx]; auto key = std::make_pair(frames[i].id_, kpt_cam0); if (lmk_id_map.find(key) != lmk_id_map.end()) { @@ -491,12 +492,12 @@ int main(int argc, const char **argv) { std::cout << "done processing matches" << std::endl; } else { // successive only for (size_t i = 0; i < indices.size() - 1; i++) { - std::vector matches = frontend.compute_matches( - frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + std::vector matches = + frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors()); frontend.enforce_bijective_buffer_matches(matches); for (const cv::DMatch match : matches) { - const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; + const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx]; auto key = std::make_pair(frames[i].id_, kpt_cam0); if (lmk_id_map.find(key) != lmk_id_map.end()) { @@ -661,8 +662,8 @@ int main(int argc, const char **argv) { local_estimate_.insert_or_assign(symbols_poses[1], id_to_initial_world_from_cam.at(i + 1)); - std::vector matches = frontend.compute_matches( - frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + std::vector matches = + frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors()); // DIAL TO MESS WITH frontend.enforce_bijective_matches(matches); std::vector world_from_cams{id_to_initial_world_from_cam.at(i), @@ -674,8 +675,8 @@ int main(int argc, const char **argv) { std::vector viz_lmks; for (const cv::DMatch match : matches) { std::vector feat_kpts; - const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; + const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx]; feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y); feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y); @@ -695,7 +696,7 @@ int main(int argc, const char **argv) { std::cerr << "ERROR: this shouldn't happen right?" << std::endl; FeatureTrack feature_track(frames[i].id_, kpt_cam0); feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); - feature_tracks.emplace(lmk_id, feature_track); + feature_tracks.push_back(feature_track); lmk_id_map.emplace(key_lmk_id, lmk_id); lmk_id++; } diff --git a/experimental/learn_descriptors/refactor_sfm_mvp.cc b/experimental/learn_descriptors/refactor_sfm_mvp.cc index 1abcba3b9..22d63079c 100644 --- a/experimental/learn_descriptors/refactor_sfm_mvp.cc +++ b/experimental/learn_descriptors/refactor_sfm_mvp.cc @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -32,6 +33,136 @@ #include "gtsam/slam/ProjectionFactor.h" #include "visualization/opencv/opencv_viz.hh" +std::optional attempt_triangulate(const std::vector &cam_poses, + const std::vector &cam_obs, + gtsam::Cal3_S2::shared_ptr K, + const double max_reproj_error = 2.0, + const bool verbose = true) { + gtsam::Point3 p_lmk_in_world; + if (cam_poses.size() >= 2) { + try { + // Attempt triangulation using DLT (or the GTSAM provided method) + p_lmk_in_world = gtsam::triangulatePoint3( + cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end())); + + } catch (const gtsam::TriangulationCheiralityException &e) { + // Handle the exception gracefully by logging and retaining the previous + // estimate or discard + if (verbose) { + std::cerr << "[attempt_triangulate] failed. Likely cheirality exception: " + << e.what() << ". Discarding involved keypoints." << std::endl; + } + return std::nullopt; + } + } else { + return std::nullopt; + } + // Optional: perform an explicit cheirality check + for (const auto &pose : cam_poses) { + // Transform point to the camera coordinate system. + // transformTo() converts a world point to the camera frame. + gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world); + if (p_cam_lmk.z() <= 0) { // Check that the depth is positive + return std::nullopt; + } + } + + // Cheirality & reprojection checks + for (size_t i = 0; i < cam_poses.size(); ++i) { + const auto &pose = cam_poses[i]; + // Cheirality + gtsam::Point3 p_cam = pose.transformTo(p_lmk_in_world); + if (p_cam.z() <= 0) { + if (verbose) { + std::cerr << "[attempt_triangulate] point behind camera " << i + << " (z=" << p_cam.z() << ")\n"; + } + return std::nullopt; + } + // Reprojection error + if (max_reproj_error > 0) { + gtsam::PinholeCamera cam(pose, *K); + const auto reproj = cam.project(p_lmk_in_world); + const double err = (reproj - cam_obs[i]).norm(); + if (err > max_reproj_error) { + if (verbose) { + std::cerr << "[attempt_triangulate] reprojection error too large on view " << i + << " (" << err << " px)\n"; + } + return std::nullopt; + } + } + } + return p_lmk_in_world; +} + +void graph_values(const gtsam::Values &values, const std::string &window_name, + const std::vector &symbols_pose, + const std::vector &symbols_landmarks) { + std::vector final_poses; + std::vector final_lmks; + for (const gtsam::Symbol &symbol_pose : symbols_pose) { + final_poses.emplace_back(Eigen::Isometry3d(values.at(symbol_pose).matrix()), + symbol_pose.string()); + } + for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) { + if (!values.exists(symbol_lmk)) { + std::cout << "WTF " << symbol_lmk << std::endl; + } + final_lmks.emplace_back(values.at(symbol_lmk), symbol_lmk.string()); + } + std::cout << "About to viz gtsam::Values with " << values.size() << " variables." << std::endl; + robot::geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true, + window_name); +} + +gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gtsam::Values &values, + const std::vector &symbols_pose, + const std::vector &symbols_landmarks, + const int num_epochs = 5, bool viz_itr = false) { + gtsam::LevenbergMarquardtParams params; + params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. + params.maxIterations = 1; + gtsam::LevenbergMarquardtOptimizer optimizer(graph, values, params); + + double prev_error = optimizer.error(); + typedef int epoch; + std::function &, + const std::vector &)> + graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter, + const std::vector &symbols_pose, + const std::vector &symbols_landmarks) { + std::cout << "iteration " << iter << " complete!"; + std::string window_name = "Iteration_" + std::to_string(iter); + graph_values(vals, window_name, symbols_pose, symbols_landmarks); + }; + + for (int i = 0; i < num_epochs; i++) { + optimizer.iterate(); + double curr_error = optimizer.error(); + + if (viz_itr) { + graph_itr_debug_func(optimizer.values(), i, symbols_pose, symbols_landmarks); + } + if (std::abs(prev_error - curr_error) < 1e-6) { + std::cout << "Converged at iteration " << i << "\n"; + break; + } + } + return optimizer.values(); +} + +double rotation_error(const Eigen::Isometry3d &T_est, const Eigen::Isometry3d &T_gt) { + Eigen::Matrix3d R_err = T_gt.rotation().transpose() * T_est.rotation(); + + // 2. Compute trace and clamp to [-1,1] for numerical safety + double tr = R_err.trace(); + double cos_theta = std::min(1.0, std::max(-1.0, (tr - 1.0) / 2.0)); + + // 3. Recover angle (in radians) + return std::acos(cos_theta); +} + int main(int argc, const char **argv) { using namespace robot::experimental::learn_descriptors; // clang-format off @@ -65,269 +196,132 @@ int main(int argc, const char **argv) { FourSeasonsParser parser(path_data, path_calibration); std::cout << "refactor_sfm_mvp!" << std::endl; - // const std::vector indices{581, 609, - // 633}; // indices with all data fields on neighborhood_5_train - // const std::vector indices{581, 593, 609, 621, 633, 636, 639, 642}; // indices with - // get indices with fully populated img_pt containing full gps data and reference - // const std::vector indices = [&parser]() -> std::vector { - // std::vector tmp; - // for (size_t i = 0; i < parser.num_images(); i++) { - // const ImagePointFourSeasons img_pt = parser.get_image_point(i); - // if (img_pt.AS_w_from_gnss_cam && img_pt.gps_gcs && img_pt.gps_gcs && - // img_pt.gps_gcs->uncertainty && img_pt.gps_gcs->altitude) - // tmp.push_back(i); - // } - // return tmp; - // }(); - constexpr bool visualize = false; - // all data fields on neighborhood_5_train const std::vector indices = []() { - // std::vector tmp; - // for (int i = 660; i < 681; i += 10) { - // tmp.push_back(i); - // } - // return tmp; - // }(); - // FourSeasonsParser parser() - // const size_t img_width = img_pt_first.width, img_height = - // img_pt_first.height; - - // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, - // gtsam::Pose3(T_world_camera0.matrix())); + FrontendParams params{FrontendParams::ExtractorType::SIFT, FrontendParams::MatcherType::KNN, true, false}; Frontend frontend(params); - for (size_t i = 660, i < 1000; i += 5) { - frontend.add_images(ImageAndPoint{parser.load_image(i), parser.get_image_point(i)}); + for (size_t i = 660; i < 1000; i += 5) { + // if (!parser.get_image_point(i).K) std::cout << "UH OH" << std::endl; + frontend.add_image(ImageAndPoint{parser.load_image(i), parser.get_image_point(i)}); } frontend.populate_frames(); frontend.match_frames_and_build_tracks(); + std::cout << "first frame translation: " << *frontend.frames()[0].cam_in_world_initial_guess_ + << std::endl; + std::cout << "ground truth translation: " + << frontend.frames()[0].world_from_cam_groundtruth_->translation() << std::endl; + + std::vector viz_poses_init; + for (const Frame &frame : frontend.frames()) { + Eigen::Isometry3d world_from_cam_init; + world_from_cam_init.linear() = frame.world_from_cam_initial_guess_->matrix(); + world_from_cam_init.translation() = *frame.cam_in_world_initial_guess_; + viz_poses_init.emplace_back(world_from_cam_init, + gtsam::Symbol(Frontend::symbol_pose_char, frame.id_).string()); + } + robot::geometry::viz_scene(viz_poses_init, std::vector(), + cv::viz::Color::brown(), true, true, "Frontend initial guesses"); + // ############# BACKEND ############### gtsam::Values initial_estimate_; gtsam::NonlinearFactorGraph graph_; - gtsam::noiseModel::Isotropic::shared_ptr landmark_noise = - gtsam::noiseModel::Isotropic::Sigma(2, 1.0); + gtsam::noiseModel::Isotropic::shared_ptr landmark_noise = gtsam::noiseModel::Isotropic::Sigma( + 2, 1.0); // should probably mess with this, could also use essential matrix values to guide + // this potentially gtsam::Vector6 prior_sigmas; prior_sigmas << 0.04, 0.04, 0.09, 2.1, 2.1, 0.1; - gtsam::Vector3 gps_sigmas; - gps_sigmas << 2.1, 2.1, 0.1; + gtsam::Vector3 gps_sigmas_fallback; + gps_sigmas_fallback << 0.5, 0.5, 1.5; gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise = gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas); gtsam::noiseModel::Diagonal::shared_ptr gps_noise = - gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas); + gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas_fallback); // add gps factors - const std::vector &frames = frontned.frames(); - graph.add(PriorFactor(Symbol('x', 0), gtsam::Pose3::Identity(), - noiseModel::Constrained::All(6) // effectively fixes the pose - )); - initial_estimate_.insert(Symbol('x', 0), gtsam::Pose3::Identity()); + const std::vector &frames = frontend.frames(); + graph_.add(gtsam::PriorFactor( + gtsam::Symbol('x', 0), gtsam::Pose3::Identity(), + gtsam::noiseModel::Constrained::All(6) // effectively fixes the pose + )); + initial_estimate_.insert(gtsam::Symbol('x', 0), gtsam::Pose3::Identity()); + std::vector symbols_pose{gtsam::Symbol('x', 0)}; + std::vector world_from_cam_initial_guess; for (size_t i = 1; i < frames.size(); i++) { - if (frames[i].) graph.add(gtsam::GPSFactor(Symbol('x', i), )); + // should do some interpolation here most likely + const gtsam::Pose3 world_from_cam_estimate( + frames[i].world_from_cam_initial_guess_ ? *frames[i].world_from_cam_initial_guess_ + : gtsam::Rot3::Identity(), + frames[i].cam_in_world_initial_guess_ ? *frames[i].cam_in_world_initial_guess_ + : gtsam::Point3()); + world_from_cam_initial_guess.push_back(world_from_cam_estimate); + gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas( + frames[i].translation_covariance_in_cam_ + ? gtsam::Vector3((*frames[i].translation_covariance_in_cam_)(0, 0), + (*frames[i].translation_covariance_in_cam_)(1, 1), + (*frames[i].translation_covariance_in_cam_)(2, 2)) + : gps_sigmas_fallback); + const gtsam::Symbol cam_symbol('x', i); + if (frames[i].cam_in_world_initial_guess_) + graph_.add( + gtsam::GPSFactor(cam_symbol, *frames[i].cam_in_world_initial_guess_, gps_noise)); + initial_estimate_.insert(cam_symbol, world_from_cam_estimate); + symbols_pose.push_back(cam_symbol); } - // add filtered points to graph - std::unordered_map> symbols_poses_values_iter; - std::unordered_map> symbols_landmarks_values_iter; - std::vector symbols_pose; - std::vector symbols_landmarks; - for (const auto &[lmk_id, kpt_in_world] : lmk_triangulated_map_filtered) { - // LandmarkId lmk_id = lmk_id_pt.first; - // const gtsam::Point3 kpt_in_world = lmk_id_pt.second; - FeatureTrack feature_track = feature_tracks.at(lmk_id); - const gtsam::Symbol symbol_lmk('l', lmk_id); - for (const auto &[frame_id, obs] : feature_track.obs_) { - initial_estimate_.insert_or_assign(symbol_lmk, kpt_in_world); - symbols_landmarks.push_back(symbol_lmk); - symbols_landmarks_values_iter.emplace(symbol_lmk, - std::vector{kpt_in_world}); - graph_.emplace_shared>( - gtsam::Point2(obs.x, obs.y), landmark_noise, gtsam::Symbol('x', frame_id), - symbol_lmk, K); - } - } - - // add gps factors - graph_.emplace_shared>( - gtsam::Symbol('x', 0), id_to_initial_world_from_cam.at(0), prior_pose_noise); - initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), id_to_initial_world_from_cam.at(0)); - symbols_pose.push_back(gtsam::Symbol('x', 0)); - symbols_poses_values_iter.emplace( - gtsam::Symbol('x', 0), std::vector{id_to_initial_world_from_cam.at(0)}); - for (const auto &[frame_id, estimate_world_from_cam] : id_to_initial_world_from_cam) { - if (frame_id == 0) { - continue; - } - const gtsam::Symbol key_cam('x', frame_id); - symbols_pose.push_back(key_cam); - gtsam::Point3 p_cam_in_world = estimate_world_from_cam.translation(); - graph_.emplace_shared(key_cam, p_cam_in_world, gps_noise); - initial_estimate_.insert_or_assign(key_cam, estimate_world_from_cam); - symbols_poses_values_iter.emplace(key_cam, - std::vector{estimate_world_from_cam}); - // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_cam_in_world)); - } + std::cout << "heartbeat pre lmks in graph" << std::endl; - // detail_sfm::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks); - std::cout << "heart beat 3" << std::endl; - - constexpr bool local_optimizations = false; - if (local_optimizations) { - // TODO: do local optimizations between groups of any n nubmer of cameras with >= x number - // of matches - // do local optimizations and add to iter cache - // const int window = 2; - std::cout << "length of cam_poses: " << id_to_initial_world_from_cam.size() << std::endl; - for (const auto &[frame_id, world_from_cam] : id_to_initial_world_from_cam) { - std::cout << "id: " << frame_id << "\tpose: " << world_from_cam << std::endl; + // add landmarks to graph + std::vector symbols_lmks; + const FeatureTracks feature_tracks = frontend.feature_tracks(); + for (size_t i = 0; i < feature_tracks.size(); i++) { + std::vector world_from_lmk_cams; + std::vector lmk_observations; + for (const auto &[frame_id, keypoint_cv] : feature_tracks[i].obs_) { + world_from_lmk_cams.push_back(world_from_cam_initial_guess[frame_id]); + lmk_observations.emplace_back(keypoint_cv.x, keypoint_cv.y); } - for (size_t i = 0; i < indices.size() - 1; i++) { - std::cout << "Local optimization " << i << std::endl; - gtsam::Values local_estimate_; - gtsam::NonlinearFactorGraph local_graph_; - - std::vector symbols_poses{gtsam::Symbol('x', i), - gtsam::Symbol('x', i + 1)}; - std::vector symbols_lmks; - - local_graph_.emplace_shared>( - gtsam::PriorFactor( - symbols_poses[0], id_to_initial_world_from_cam.at(i), prior_pose_noise)); - std::cout << "fuck" << std::endl; - local_graph_.emplace_shared>( - gtsam::PriorFactor( - symbols_poses[1], id_to_initial_world_from_cam.at(i + 1), prior_pose_noise)); - std::cout << "mega fuck" << std::endl; - local_estimate_.insert_or_assign(symbols_poses[0], id_to_initial_world_from_cam.at(i)); - local_estimate_.insert_or_assign(symbols_poses[1], - id_to_initial_world_from_cam.at(i + 1)); - - std::vector matches = frontend.compute_matches( - frames[i].get_descriptors(), frames[i + 1].get_descriptors()); - // DIAL TO MESS WITH - frontend.enforce_bijective_matches(matches); - std::vector world_from_cams{id_to_initial_world_from_cam.at(i), - id_to_initial_world_from_cam.at(i + 1)}; - - std::vector viz_world_from_cams{ - {Eigen::Isometry3d(world_from_cams[0].matrix()), "cam 0"}, - {Eigen::Isometry3d(world_from_cams[1].matrix()), "cam 1"}}; - std::vector viz_lmks; - for (const cv::DMatch match : matches) { - std::vector feat_kpts; - const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; - feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y); - feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y); - - const std::pair key_lmk_id = - std::make_pair(frames[i].id_, kpt_cam0); - if (lmk_id_map.find(key_lmk_id) != lmk_id_map.end()) { - LandmarkId match_lmk_id = lmk_id_map.at(key_lmk_id); - if (lmk_triangulated_map_filtered.find(match_lmk_id) == - lmk_triangulated_map_filtered.end()) { - continue; - } - std::cout << "good" << std::endl; - // do nothing - // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0); - // feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1); - } else { - std::cerr << "ERROR: this shouldn't happen right?" << std::endl; - FeatureTrack feature_track(frames[i].id_, kpt_cam0); - feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); - feature_tracks.emplace(lmk_id, feature_track); - lmk_id_map.emplace(key_lmk_id, lmk_id); - lmk_id++; - } - std::cout << "oog" << std::endl; - const gtsam::Symbol symbol_lmk = gtsam::Symbol('l', lmk_id_map.at(key_lmk_id)); - // if (gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))) != - // gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam1)))) { - // std::cerr << "UH OH" << std::endl; - // } else { - // std::cout << "cool" << std::endl; - // } - symbols_lmks.push_back(symbol_lmk); - local_graph_ - .emplace_shared>( - feat_kpts[0], landmark_noise, symbols_poses[0], symbol_lmk, K); - local_graph_ - .emplace_shared>( - feat_kpts[1], landmark_noise, symbols_poses[1], symbol_lmk, K); - - std::optional kpt_in_world = - detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K); - // gtsam::Point3 kpt_in_world; - // bool triangulate_success = - // detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world); - if (kpt_in_world) { - local_estimate_.insert_or_assign(symbol_lmk, *kpt_in_world); - viz_lmks.emplace_back(*kpt_in_world); - if (symbols_landmarks_values_iter.find(symbol_lmk) != - symbols_landmarks_values_iter.end()) { - symbols_landmarks_values_iter[symbol_lmk].push_back(*kpt_in_world); - } else { - symbols_landmarks_values_iter.emplace( - symbol_lmk, std::vector{*kpt_in_world}); - } - } - } - std::cout << "setup complete!" << std::endl; - if (visualize) - robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, cv::viz::Color::brown(), - true, true, "Local Optimization " + std::to_string(i)); - - const gtsam::Values symbols_result_local = detail_sfm::optimize_graph( - local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false); - - for (const gtsam::Symbol &symbol_pose : symbols_poses) { - const gtsam::Pose3 T_wrld_cam = symbols_result_local.at(symbol_pose); - symbols_poses_values_iter.at(symbol_pose).push_back(T_wrld_cam); - } - for (const gtsam::Symbol &symbol_lmk : symbols_lmks) { - const gtsam::Point3 p_wrld_lmk = symbols_result_local.at(symbol_lmk); - symbols_landmarks_values_iter.at(symbol_lmk).push_back(p_wrld_lmk); + std::optional landmark_estimate = attempt_triangulate( + world_from_lmk_cams, lmk_observations, frames[0].K_); // all K are the same for now... + if (landmark_estimate) { + const gtsam::Symbol lmk_symbol(Frontend::symbol_lmk_char, i); + // if (!frames[i].K_) std::cout << "UH OH" << std::endl; + for (const auto &[frame_id, keypoint_cv] : feature_tracks[i].obs_) { + graph_.add(gtsam::GenericProjectionFactor( + gtsam::Point2(keypoint_cv.x, keypoint_cv.y), landmark_noise, + gtsam::Symbol(Frontend::symbol_pose_char, frame_id), lmk_symbol, frames[0].K_)); } + initial_estimate_.insert(lmk_symbol, *landmark_estimate); + symbols_lmks.push_back(lmk_symbol); } - - std::cout << "\nLocal Optimizations Complete!\n" << std::endl; - - for (std::pair> sym_pose : - symbols_poses_values_iter) { - initial_estimate_.insert_or_assign(sym_pose.first, - detail_sfm::averagePoses(sym_pose.second)); - } - for (std::pair> sym_lmk : - symbols_landmarks_values_iter) { - initial_estimate_.insert_or_assign(sym_lmk.first, - detail_sfm::averagePoints(sym_lmk.second)); - } + std::cout << "idx " << i << " out of " << feature_tracks.size() << std::endl; } + std::cout << "heartbeat" << std::endl; + // do global optimization const gtsam::Values result = - detail_sfm::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks, 0); - - // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference - double sum_traj_error = 0; - double sum_rot_error = 0; - for (size_t i = 0; i < symbols_pose.size(); i++) { - const gtsam::Pose3 traj_pose = result.at(symbols_pose[i]); - sum_traj_error += std::pow( - (references_world_from_cam[i].translation() - traj_pose.translation()).norm(), 2); - sum_rot_error += detail_sfm::rotation_error(references_world_from_cam[i], - Eigen::Isometry3d(traj_pose.matrix())); - } - std::cout << "sum_rot_error: " << sum_rot_error << std::endl; - double rmse_ate = std::sqrt(sum_traj_error / symbols_pose.size()); - double rmse_rot = std::sqrt(sum_rot_error / symbols_pose.size()); - std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl; + optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_lmks, 0); + + // // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference + // double sum_traj_error = 0; + // double sum_rot_error = 0; + // for (size_t i = 0; i < symbols_pose.size(); i++) { + // const gtsam::Pose3 traj_pose = result.at(symbols_pose[i]); + // sum_traj_error += std::pow( + // (references_world_from_cam[i].translation() - traj_pose.translation()).norm(), 2); + // sum_rot_error += + // rotation_error(references_world_from_cam[i], Eigen::Isometry3d(traj_pose.matrix())); + // } + // std::cout << "sum_rot_error: " << sum_rot_error << std::endl; + // double rmse_ate = std::sqrt(sum_traj_error / symbols_pose.size()); + // double rmse_rot = std::sqrt(sum_rot_error / symbols_pose.size()); + // std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl; std::cout << "about to visualize result" << std::endl; result.print(); - detail_sfm::graph_values(result, "Result", symbols_pose, std::vector()); + graph_values(result, "Result", symbols_pose, std::vector()); // detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks); } \ No newline at end of file diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc index a63015b66..9771f2a29 100644 --- a/experimental/learn_descriptors/sfm_mvp.cc +++ b/experimental/learn_descriptors/sfm_mvp.cc @@ -259,12 +259,12 @@ TEST(SFMMvp, sfm_building_manual_global) { LandmarkId lmk_id = 0; for (size_t i = 0; i < indices.size() - 1; i++) { std::vector matches = - frontend.compute_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors()); // frontend.enforce_bijective_matches(matches); frontend.enforce_bijective_buffer_matches(matches); for (const cv::DMatch match : matches) { - const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; + const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx]; auto key = std::make_pair(i, kpt_cam0); if (lmk_id_map.find(key) != lmk_id_map.end()) { @@ -453,11 +453,11 @@ TEST(SFMMvp, sfm_building_manual_incremental) { LandmarkId lmk_id = 0; for (size_t i = 0; i < indices.size() - 1; i++) { std::vector matches = - frontend.compute_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors()); frontend.enforce_bijective_buffer_matches(matches); for (const cv::DMatch match : matches) { - const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; + const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx]; auto key = std::make_pair(frames[i].id_, kpt_cam0); if (lmk_id_map.find(key) != lmk_id_map.end()) { @@ -582,7 +582,7 @@ TEST(SFMMvp, sfm_building_manual_incremental) { local_estimate_.insert_or_assign(poses[1], cam_pose.at(i + 1)); std::vector matches = - frontend.compute_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors()); + frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors()); frontend.enforce_bijective_matches(matches); std::vector feat_cam_poses{cam_pose.at(i), cam_pose.at(i + 1)}; @@ -591,8 +591,8 @@ TEST(SFMMvp, sfm_building_manual_incremental) { std::vector viz_lmks; for (const cv::DMatch match : matches) { std::vector feat_kpts; - const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx]; + const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx]; + const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx]; feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y); feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y); From 66ec22aff4ba3444467441ca1a3b5e5f09c8a7ea Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Mon, 14 Jul 2025 17:32:11 -0400 Subject: [PATCH 32/45] WIP. Use 5pt alg 'heading' to improve camera orientation initial guess --- experimental/learn_descriptors/BUILD | 3 + .../learn_descriptors/four_seasons_parser.cc | 5 -- .../four_seasons_parser_example_viz.cc | 60 +++++++++++++- experimental/learn_descriptors/frontend.cc | 32 ++++---- experimental/learn_descriptors/frontend.hh | 3 +- experimental/learn_descriptors/image_point.hh | 2 +- .../image_point_four_seasons.cc | 4 +- .../image_point_four_seasons.hh | 2 +- .../learn_descriptors/refactor_sfm_mvp.cc | 81 ++++++++++++------- 9 files changed, 134 insertions(+), 58 deletions(-) diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index 2c647712b..d17e1f8af 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -185,6 +185,7 @@ cc_binary( srcs = ["four_seasons_parser_example_viz.cc"], deps = [ ":four_seasons_parser", + ":frontend", "//common:check", "//common/gps:frame_translation", "//visualization/opencv:opencv_viz", @@ -453,6 +454,8 @@ cc_binary( ":frontend", ":spatial_test_scene_cube", ":structure_from_motion_types", + ":image_point", + ":image_point_four_seasons", "//common/geometry:camera", "//visualization/opencv:opencv_viz", "@boost//:smart_ptr", diff --git a/experimental/learn_descriptors/four_seasons_parser.cc b/experimental/learn_descriptors/four_seasons_parser.cc index 0e2eb2be1..d2bb76ffb 100644 --- a/experimental/learn_descriptors/four_seasons_parser.cc +++ b/experimental/learn_descriptors/four_seasons_parser.cc @@ -32,19 +32,14 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, const std::filesystem::path path_vio = root_dir_ / "result.txt"; const std::filesystem::path path_gps = root_dir_ / "septentrio.nmea"; const size_t min_time_sig_figs = txt_parser_help::min_sig_figs_result_time(path_vio); - std::cout << "heartbeat 0" << std::endl; txt_parser_help::TimeDataList img_time_list = txt_parser_help::create_img_time_data_list(path_img, min_time_sig_figs); - std::cout << "heartbeat 1" << std::endl; txt_parser_help::TimeDataMap gnss_poses_time_map = txt_parser_help::create_gnss_poses_time_data_map(path_gnss, min_time_sig_figs); - std::cout << "heartbeat 2" << std::endl; txt_parser_help::TimeDataMap vio_poses_time_map = txt_parser_help::create_vio_time_data_map(path_vio, min_time_sig_figs); - std::cout << "heartbeat 3" << std::endl; gps_parser_help::TimeGPSList gps_time_list = gps_parser_help::create_gps_time_data_list(path_gps); - std::cout << "heartbeat 4" << std::endl; size_t id = 0; for (const std::pair>& pair_time_data : img_time_list) { diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc index 88511a6e3..d9a6b89f1 100644 --- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc +++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc @@ -1,5 +1,7 @@ #include +#include #include +#include #include #include #include @@ -11,6 +13,7 @@ #include "common/gps/frame_translation.hh" #include "cxxopts.hpp" #include "experimental/learn_descriptors/four_seasons_parser.hh" +#include "experimental/learn_descriptors/frontend.hh" #include "opencv2/opencv.hpp" #include "visualization/opencv/opencv_viz.hh" @@ -48,6 +51,35 @@ int main(int argc, const char** argv) { lrn_desc::FourSeasonsParser parser(path_data, path_calibration); + lrn_desc::FrontendParams params{lrn_desc::FrontendParams::ExtractorType::SIFT, + lrn_desc::FrontendParams::MatcherType::KNN, true, false}; + lrn_desc::Frontend frontend(params); + constexpr size_t NUM_IMAGES = 100; + (void)NUM_IMAGES; + auto start = std::chrono::high_resolution_clock::now(); + for (size_t i = 0; i < NUM_IMAGES; i += 1) { + // for (const size_t i : std::vector{74, 82, 92}) { + const lrn_desc::ImagePointFourSeasons img_pt = parser.get_image_point(i); + frontend.add_image(lrn_desc::ImageAndPoint{ + parser.load_image(i), std::make_shared(img_pt)}); + } + auto duration = std::chrono::duration_cast( + std::chrono::high_resolution_clock::now() - start); + std::cout << "done adding! took " << duration.count() << " milliseconds" << std::endl; + frontend.populate_frames(); + start = std::chrono::high_resolution_clock::now(); + duration = std::chrono::duration_cast( + std::chrono::high_resolution_clock::now() - start); + std::cout << "done populating frames! took " << duration.count() << " milliseconds" + << std::endl; + // frontend.match_frames_and_build_tracks(); + // start = std::chrono::high_resolution_clock::now(); + // duration = std::chrono::duration_cast( + // std::chrono::high_resolution_clock::now() - start); + // std::cout << "done matching and building tracks! took " << duration.count() << " + // milliseconds" + // << std::endl; + ROBOT_CHECK(parser.num_images() != 0); std::vector viz_frames; // camera frames from visual world frame @@ -57,9 +89,12 @@ int main(int argc, const char** argv) { std::cout << "gnss scale: " << parser.gnss_scale() << std::endl; scale_mat.linear() *= parser.gnss_scale(); std::cout << "scale mat: " << scale_mat.matrix() << std::endl; + std::optional first_gps_to_cam; std::optional altitude_gps_from_gnss_cam; std::vector gps_ns_delta_from_shutter; - for (size_t i = 0; i < parser.num_images(); i += 1) { + // double ate_cam = 0.0, ate_gps = 0.0; + for (size_t i = 0; i < NUM_IMAGES; i += 1) { + // for (const size_t i : std::vector{74, 82, 92}) { const lrn_desc::ImagePointFourSeasons img_pt = parser.get_image_point(i); std::cout << img_pt.to_string() << std::endl; if (!img_pt.AS_w_from_gnss_cam) { @@ -68,8 +103,18 @@ int main(int argc, const char** argv) { Eigen::Isometry3d w_from_gnss_cam(Eigen::Isometry3d(parser.S_from_AS().matrix()) * scale_mat * Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix())); - viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i)); + Eigen::Isometry3d frontend_w_from_cam_groundtruth( + frontend.frames()[i].world_from_cam_groundtruth_->matrix()); + ROBOT_CHECK(frontend_w_from_cam_groundtruth.matrix().isApprox(w_from_gnss_cam.matrix()), + frontend_w_from_cam_groundtruth.matrix(), w_from_gnss_cam.matrix()); + // Eigen::Isometry3d frontend_w_from_cam_groundtruth(*img_pt.world_from_cam_ground_truth()); + viz_frames.emplace_back(frontend_w_from_cam_groundtruth, + "x_frontend_ref_" + std::to_string(i)); + // std::cout << "frontend_w_from_cam_groundtruth: " << + // frontend_w_from_cam_groundtruth.matrix() + // << std::endl; + if (img_pt.gps_gcs) { if (img_pt.gps_gcs->seq > img_pt.seq) { gps_ns_delta_from_shutter.push_back( @@ -93,6 +138,10 @@ int main(int argc, const char** argv) { if (!altitude_gps_from_gnss_cam) { altitude_gps_from_gnss_cam = gnss_cam_in_gcs.z() - *(img_pt.gps_gcs->altitude); } + if (!first_gps_to_cam) { + first_gps_to_cam = frontend.frames()[i].world_from_cam_groundtruth_->translation() - + *frontend.frames()[i].cam_in_world_initial_guess_; + } gcs_coordinate.z() += *altitude_gps_from_gnss_cam; std::cout << std::setprecision(20) << "gnss_cam_in_gcs: " << gnss_cam_in_gcs << "\ngps_gcs: " << gcs_coordinate @@ -106,14 +155,17 @@ int main(int argc, const char** argv) { ECEF_from_gps_hom; const Eigen::Isometry3d cam_from_gps( (parser.cam_from_imu() * parser.gps_from_imu().inverse()).matrix()); - Eigen::Vector3d cam_gps_in_w = gps_in_w.head<3>() + cam_from_gps.translation(); + Eigen::Vector3d cam_gps_in_w = gps_in_w.head<3>() - cam_from_gps.translation(); viz_points.emplace_back(cam_gps_in_w, "x_cam_gps" + std::to_string(i)); - viz_points.emplace_back(gps_in_w.head<3>(), "x_gps" + std::to_string(i)); + viz_points.emplace_back( + *frontend.frames()[i].cam_in_world_initial_guess_ + *first_gps_to_cam, + "x_frontend_cam_gps" + std::to_string(i)); const Eigen::Vector3d gps_from_ref_in_world = gps_in_w.head<3>() - w_from_gnss_cam.translation(); std::cout << "gps_from_ref_in_world: " << gps_from_ref_in_world << std::endl; } } + // std::cout << "ate_cam: " << ate_cam << "\tate_gps: " << ate_gps << std::endl; const double sum = std::accumulate(gps_ns_delta_from_shutter.begin(), gps_ns_delta_from_shutter.end(), 0.0); double avg_ns_gps_delta = sum / gps_ns_delta_from_shutter.size(); diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc index 8b7d5077a..a42a819bd 100644 --- a/experimental/learn_descriptors/frontend.cc +++ b/experimental/learn_descriptors/frontend.cc @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -168,36 +169,36 @@ Frontend::Frontend(FrontendParams params_) : params_(params_) { void Frontend::populate_frames() { FrameId id = frames_.size(); for (const ImageAndPoint &img_and_pt : images_and_points_) { - const ImagePoint &img_pt = img_and_pt.img_pt; + const std::shared_ptr shared_img_pt = img_and_pt.shared_img_pt; + std::cout << shared_img_pt->to_string() << std::endl; cv::Mat img_undistorted; - cv::fisheye::undistortImage(img_and_pt.image_distorted, img_undistorted, img_pt.K->k_mat(), - img_pt.K->d_mat(), img_pt.K->k_mat(), - img_and_pt.image_distorted.size()); + cv::fisheye::undistortImage(img_and_pt.image_distorted, img_undistorted, + shared_img_pt->K->k_mat(), shared_img_pt->K->d_mat(), + shared_img_pt->K->k_mat(), img_and_pt.image_distorted.size()); auto [kpts, descriptors] = extract_features(img_undistorted); KeypointsCV kpts_cv; for (const cv::KeyPoint &kpt : kpts) { kpts_cv.push_back(kpt.pt); } - gtsam::Cal3_S2::shared_ptr K = boost::make_shared( - img_pt.K->fx, img_pt.K->fy, 0, img_pt.K->cx, img_pt.K->cy); - - if (!K) std::cout << "UH OH" << std::endl; + gtsam::Cal3_S2::shared_ptr K = + boost::make_shared(shared_img_pt->K->fx, shared_img_pt->K->fy, 0, + shared_img_pt->K->cx, shared_img_pt->K->cy); Frame frame(id, img_undistorted, K, kpts_cv, descriptors); const std::optional maybe_world_from_cam_grnd_trth = - img_pt.world_from_cam_ground_truth(); + shared_img_pt->world_from_cam_ground_truth(); if (maybe_world_from_cam_grnd_trth) { frame.world_from_cam_groundtruth_ = gtsam::Pose3(maybe_world_from_cam_grnd_trth->matrix()); } - const std::optional maybe_cam_in_world = img_pt.cam_in_world(); + const std::optional maybe_cam_in_world = shared_img_pt->cam_in_world(); if (maybe_cam_in_world) { frame.cam_in_world_initial_guess_ = *maybe_cam_in_world; } if (id == 0) frame.world_from_cam_initial_guess_ = gtsam::Rot3::Identity(); const std::optional maybe_translation_covariance_in_cam = - img_pt.translation_covariance_in_cam(); + shared_img_pt->translation_covariance_in_cam(); if (maybe_translation_covariance_in_cam) { frame.translation_covariance_in_cam_ = *maybe_translation_covariance_in_cam; } @@ -233,8 +234,9 @@ void Frontend::match_frames_and_build_tracks() { robot::geometry::estimate_cam0_from_cam1( cv_kpts_1, cv_kpts_2, matches, images_and_points_[i] - .img_pt.K->k_mat()); // fine for now since all cameras have the same K, - // how to reconcile later though...? + .shared_img_pt->K + ->k_mat()); // fine for now since all cameras have the same K, + // how to reconcile later though...? if (!scale_cam0_from_cam1) { continue; @@ -293,8 +295,8 @@ void Frontend::match_frames_and_build_tracks() { cv_kpts_2.push_back(cv_kpt); } std::optional scale_cam0_from_cam1 = - robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, - images_and_points_[i].img_pt.K->k_mat()); + robot::geometry::estimate_cam0_from_cam1( + cv_kpts_1, cv_kpts_2, matches, images_and_points_[i].shared_img_pt->K->k_mat()); if (!scale_cam0_from_cam1) { continue; } diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh index d71338936..a9e1a01c2 100644 --- a/experimental/learn_descriptors/frontend.hh +++ b/experimental/learn_descriptors/frontend.hh @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -20,7 +21,7 @@ struct FrontendParams { }; struct ImageAndPoint { cv::Mat image_distorted; - ImagePoint img_pt; + std::shared_ptr shared_img_pt; }; class Frontend { public: diff --git a/experimental/learn_descriptors/image_point.hh b/experimental/learn_descriptors/image_point.hh index 2fbaf52cb..f4a4635b9 100644 --- a/experimental/learn_descriptors/image_point.hh +++ b/experimental/learn_descriptors/image_point.hh @@ -26,7 +26,7 @@ struct ImagePoint { virtual std::optional translation_covariance_in_cam() const { return std::nullopt; }; - std::string to_string() const { + virtual std::string to_string() const { auto vec3d_to_str = [](const Eigen::Vector3d& vec3d) { std::stringstream ss; ss << "[" << vec3d.x() << ", " << vec3d.y() << ", " << vec3d.z() << "]"; diff --git a/experimental/learn_descriptors/image_point_four_seasons.cc b/experimental/learn_descriptors/image_point_four_seasons.cc index 2ad0468fa..6cdec9d5f 100644 --- a/experimental/learn_descriptors/image_point_four_seasons.cc +++ b/experimental/learn_descriptors/image_point_four_seasons.cc @@ -12,7 +12,9 @@ namespace robot::experimental::learn_descriptors { ImagePointFourSeasons::~ImagePointFourSeasons() = default; std::optional ImagePointFourSeasons::world_from_cam_ground_truth() const { - if (!AS_w_from_gnss_cam) return std::nullopt; + if (!AS_w_from_gnss_cam) { + return std::nullopt; + } Eigen::Matrix4d scale_mat_reference = Eigen::Matrix4d::Identity(); scale_mat_reference(0, 0) = scale_mat_reference(1, 1) = scale_mat_reference(2, 2) = shared_static_transforms->gnss_scale; diff --git a/experimental/learn_descriptors/image_point_four_seasons.hh b/experimental/learn_descriptors/image_point_four_seasons.hh index b636cd120..a0c337c56 100644 --- a/experimental/learn_descriptors/image_point_four_seasons.hh +++ b/experimental/learn_descriptors/image_point_four_seasons.hh @@ -34,7 +34,7 @@ struct ImagePointFourSeasons : ImagePoint { std::optional translation_covariance_in_cam() const override; std::optional gps_covariance_in_world() const; - std::string to_string() const { + std::string to_string() const override { auto se3_to_str = [](const liegroups::SE3& se3) { const Eigen::Vector3d& t = se3.translation(); const Eigen::Quaterniond& r = se3.so3().unit_quaternion(); diff --git a/experimental/learn_descriptors/refactor_sfm_mvp.cc b/experimental/learn_descriptors/refactor_sfm_mvp.cc index 22d63079c..c2270e69a 100644 --- a/experimental/learn_descriptors/refactor_sfm_mvp.cc +++ b/experimental/learn_descriptors/refactor_sfm_mvp.cc @@ -1,9 +1,11 @@ #include #include #include +#include #include #include #include +#include #include #include "Eigen/Core" @@ -15,6 +17,7 @@ #include "experimental/learn_descriptors/frame.hh" #include "experimental/learn_descriptors/frontend.hh" #include "experimental/learn_descriptors/frontend_definitions.hh" +#include "experimental/learn_descriptors/image_point_four_seasons.hh" #include "experimental/learn_descriptors/structure_from_motion_types.hh" #include "gtsam/geometry/Cal3_S2.h" #include "gtsam/geometry/Point2.h" @@ -201,9 +204,11 @@ int main(int argc, const char **argv) { true, false}; Frontend frontend(params); - for (size_t i = 660; i < 1000; i += 5) { + for (size_t i = 660; i < 750; i += 5) { // if (!parser.get_image_point(i).K) std::cout << "UH OH" << std::endl; - frontend.add_image(ImageAndPoint{parser.load_image(i), parser.get_image_point(i)}); + frontend.add_image( + ImageAndPoint{parser.load_image(i), + std::make_shared(parser.get_image_point(i))}); } frontend.populate_frames(); frontend.match_frames_and_build_tracks(); @@ -214,13 +219,29 @@ int main(int argc, const char **argv) { << frontend.frames()[0].world_from_cam_groundtruth_->translation() << std::endl; std::vector viz_poses_init; + std::optional first_point; + std::optional first_grnd_trth; + (void)first_point; for (const Frame &frame : frontend.frames()) { - Eigen::Isometry3d world_from_cam_init; - world_from_cam_init.linear() = frame.world_from_cam_initial_guess_->matrix(); - world_from_cam_init.translation() = *frame.cam_in_world_initial_guess_; - viz_poses_init.emplace_back(world_from_cam_init, - gtsam::Symbol(Frontend::symbol_pose_char, frame.id_).string()); + if (frame.cam_in_world_initial_guess_) { + Eigen::Isometry3d world_from_cam_init; + world_from_cam_init.linear() = frame.world_from_cam_initial_guess_->matrix(); + if (!first_point) first_point = *frame.cam_in_world_initial_guess_; + world_from_cam_init.translation() = *frame.cam_in_world_initial_guess_ - *first_point; + std::cout << "world_from_cam_init: " << world_from_cam_init.matrix() << std::endl; + viz_poses_init.emplace_back( + world_from_cam_init, gtsam::Symbol(Frontend::symbol_pose_char, frame.id_).string()); + } + if (frame.world_from_cam_groundtruth_) { + std::cout << "adding a ground truth frame to viz!" << std::endl; + Eigen::Isometry3d w_from_cam_grnd_trth(frame.world_from_cam_groundtruth_->matrix()); + if (!first_grnd_trth) first_grnd_trth = w_from_cam_grnd_trth; + w_from_cam_grnd_trth.translation() -= first_grnd_trth->translation(); + viz_poses_init.emplace_back(w_from_cam_grnd_trth, + "x_grnd_" + std::to_string(frame.id_)); + } } + std::cout << "visualizing " << viz_poses_init.size() << " poses" << std::endl; robot::geometry::viz_scene(viz_poses_init, std::vector(), cv::viz::Color::brown(), true, true, "Frontend initial guesses"); @@ -242,34 +263,35 @@ int main(int argc, const char **argv) { // add gps factors const std::vector &frames = frontend.frames(); - graph_.add(gtsam::PriorFactor( - gtsam::Symbol('x', 0), gtsam::Pose3::Identity(), - gtsam::noiseModel::Constrained::All(6) // effectively fixes the pose - )); - initial_estimate_.insert(gtsam::Symbol('x', 0), gtsam::Pose3::Identity()); - std::vector symbols_pose{gtsam::Symbol('x', 0)}; - std::vector world_from_cam_initial_guess; - for (size_t i = 1; i < frames.size(); i++) { + std::vector symbols_pose; + std::unordered_map world_from_cam_initial_guess; + std::optional cam0_in_w; + for (const Frame &frame : frames) { + if (!frame.cam_in_world_initial_guess_) continue; + if (!cam0_in_w) cam0_in_w = *frame.cam_in_world_initial_guess_; // should do some interpolation here most likely - const gtsam::Pose3 world_from_cam_estimate( - frames[i].world_from_cam_initial_guess_ ? *frames[i].world_from_cam_initial_guess_ - : gtsam::Rot3::Identity(), - frames[i].cam_in_world_initial_guess_ ? *frames[i].cam_in_world_initial_guess_ - : gtsam::Point3()); - world_from_cam_initial_guess.push_back(world_from_cam_estimate); + const gtsam::Pose3 world_from_cam_estimate(frame.world_from_cam_initial_guess_ + ? *frame.world_from_cam_initial_guess_ + : gtsam::Rot3::Identity(), + *frame.cam_in_world_initial_guess_ - *cam0_in_w); + world_from_cam_initial_guess[frame.id_] = world_from_cam_estimate; gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas( - frames[i].translation_covariance_in_cam_ - ? gtsam::Vector3((*frames[i].translation_covariance_in_cam_)(0, 0), - (*frames[i].translation_covariance_in_cam_)(1, 1), - (*frames[i].translation_covariance_in_cam_)(2, 2)) + frame.translation_covariance_in_cam_ + ? gtsam::Vector3((*frame.translation_covariance_in_cam_)(0, 0), + (*frame.translation_covariance_in_cam_)(1, 1), + (*frame.translation_covariance_in_cam_)(2, 2)) : gps_sigmas_fallback); - const gtsam::Symbol cam_symbol('x', i); - if (frames[i].cam_in_world_initial_guess_) - graph_.add( - gtsam::GPSFactor(cam_symbol, *frames[i].cam_in_world_initial_guess_, gps_noise)); + const gtsam::Symbol cam_symbol('x', frame.id_); + graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise)); initial_estimate_.insert(cam_symbol, world_from_cam_estimate); symbols_pose.push_back(cam_symbol); } + std::vector viz_pose; + for (const auto &[id, pose] : world_from_cam_initial_guess) { + viz_pose.emplace_back(Eigen::Isometry3d(pose.matrix()), "x_" + std::to_string(id)); + } + robot::geometry::viz_scene(viz_pose, std::vector(), + cv::viz::Color::brown(), true, true, "Initial guesses in backend"); std::cout << "heartbeat pre lmks in graph" << std::endl; @@ -287,7 +309,6 @@ int main(int argc, const char **argv) { world_from_lmk_cams, lmk_observations, frames[0].K_); // all K are the same for now... if (landmark_estimate) { const gtsam::Symbol lmk_symbol(Frontend::symbol_lmk_char, i); - // if (!frames[i].K_) std::cout << "UH OH" << std::endl; for (const auto &[frame_id, keypoint_cv] : feature_tracks[i].obs_) { graph_.add(gtsam::GenericProjectionFactor( gtsam::Point2(keypoint_cv.x, keypoint_cv.y), landmark_noise, From 4134a65a6feeef8ecec312c67cd3112a2087bef4 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Tue, 15 Jul 2025 17:08:17 -0400 Subject: [PATCH 33/45] WIP --- experimental/learn_descriptors/BUILD | 3 +- experimental/learn_descriptors/backend.cc | 258 +++++++----------- experimental/learn_descriptors/backend.hh | 94 +------ .../learn_descriptors/backend_test.cc | 4 +- experimental/learn_descriptors/frame.hh | 3 + experimental/learn_descriptors/frontend.cc | 6 +- .../structure_from_motion.cc | 10 +- 7 files changed, 134 insertions(+), 244 deletions(-) diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index d17e1f8af..06e1d6b5e 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -373,7 +373,8 @@ cc_library( ], visibility = ["//visibility:public"], deps = [ - ":feature_manager", + ":structure_from_motion_types", + ":frame", "//common/geometry:camera", "@boost//:smart_ptr", "@gtsam", diff --git a/experimental/learn_descriptors/backend.cc b/experimental/learn_descriptors/backend.cc index b3b50e763..1b3559d0c 100644 --- a/experimental/learn_descriptors/backend.cc +++ b/experimental/learn_descriptors/backend.cc @@ -1,7 +1,11 @@ #include "experimental/learn_descriptors/backend.hh" +#include + #include "boost/make_shared.hpp" #include "experimental/learn_descriptors/feature_manager.hh" +#include "experimental/learn_descriptors/frame.hh" +#include "experimental/learn_descriptors/structure_from_motion_types.hh" #include "gtsam/geometry/triangulation.h" #include "gtsam/navigation/GPSFactor.h" #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" @@ -11,159 +15,6 @@ namespace robot::experimental::learn_descriptors { -Backend::Backend(std::shared_ptr feature_manager) - : feature_manager_(feature_manager) { - const size_t img_width = 640; - const size_t img_height = 480; - const double fx = 500.0; - const double fy = fx; - const double cx = img_width / 2.0; - const double cy = img_height / 2.0; - - gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); - K_ = boost::make_shared(K); - // initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K); -} - -Backend::Backend(std::shared_ptr feature_manager, gtsam::Cal3_S2 K) - : feature_manager_(feature_manager) { - K_ = boost::make_shared(K); - // initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K); -} - -template <> -void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value, - const gtsam::SharedNoiseModel &noise) { - graph_.emplace_shared>(symbol, value, noise); - initial_estimate_.insert_or_assign(symbol, value); - // std::cout << "adding a prior factor! with symbol: " << symbol << std::endl; - // initial_estimate_.print("values after adding prior: "); -} - -template <> -void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Point3 &value, - const gtsam::SharedNoiseModel &noise) { - graph_.emplace_shared>(symbol, value, noise); - gtsam::Rot3 R; - if (initial_estimate_.exists(symbol)) { - R = initial_estimate_.at(symbol).rotation(); - } - initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R, value)); -} - -template <> -void Backend::add_between_factor(const gtsam::Symbol &symbol_1, - const gtsam::Symbol &symbol_2, - const gtsam::Pose3 &value, - const gtsam::SharedNoiseModel &model) { - graph_.emplace_shared>(symbol_1, symbol_2, value, model); - // std::cout << "adding between factor. symbol_1: " << symbol_1 << ". symbol_2: " << symbol_2 << - // std::endl; initial_estimate_.print("values when adding between factor: "); - initial_estimate_.insert_or_assign(symbol_2, - initial_estimate_.at(symbol_1).compose(value)); -} - -template <> -void Backend::add_between_factor(const gtsam::Symbol &symbol_1, - const gtsam::Symbol &symbol_2, - const gtsam::Rot3 &value, - const gtsam::SharedNoiseModel &model) { - graph_.emplace_shared>(symbol_1, symbol_2, value, model); - initial_estimate_.insert_or_assign(symbol_2, - initial_estimate_.at(symbol_1).compose( - gtsam::Pose3(value, gtsam::Point3::Zero()))); -} - -void Backend::add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &t_world_cam, - const gtsam::SharedNoiseModel &model, const gtsam::Rot3 &R_world_cam) { - graph_.emplace_shared(symbol, t_world_cam, model); - initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R_world_cam, t_world_cam)); -} - -std::pair, std::vector> Backend::get_obs_for_lmk( - const gtsam::Symbol &lmk_symbol) { - std::vector cam_poses; - std::vector observations; - - // Iterate over all factors in the graph - for (const auto &factor : graph_) { - auto projFactor = boost::dynamic_pointer_cast< - gtsam::GenericProjectionFactor>(factor); - - if (projFactor && projFactor->keys().at(1) == lmk_symbol) { - // Get the camera pose symbol - gtsam::Symbol cameraSymbol = projFactor->keys().at(0); - - // Retrieve the camera pose from values - if (!initial_estimate_.exists(cameraSymbol)) continue; - gtsam::Pose3 cameraPose = initial_estimate_.at(cameraSymbol); - - // Get the 2D observation (keypoint measurement) - gtsam::Point2 observation = projFactor->measured(); - - // Store the pose and corresponding observation - observations.push_back(observation); - cam_poses.push_back(cameraPose); - } - } - return std::pair, std::vector>(cam_poses, - observations); -} - -void Backend::add_landmarks(const std::vector &landmarks) { - for (const Landmark &landmark : landmarks) { - add_landmark(landmark); - } -} - -void Backend::add_landmark(const Landmark &landmark) { - graph_.emplace_shared>( - landmark.projection, landmark_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol, - K_); - if (!initial_estimate_.exists(landmark.cam_pose_symbol)) { - throw std::runtime_error( - "landmark.cam_pose_symbol must already exist in Backend.initial_estimate_ before " - "add_landmark is called."); - } - gtsam::Point3 p_world_lmk_estimate = - initial_estimate_.at(landmark.cam_pose_symbol) * landmark.p_cam_lmk_guess; - initial_estimate_.insert_or_assign(landmark.lmk_factor_symbol, p_world_lmk_estimate); - - std::pair, std::vector> lmk_obs = - get_obs_for_lmk(landmark.lmk_factor_symbol); - if (lmk_obs.first.size() >= 2) { - try { - // Attempt triangulation using DLT (or the GTSAM provided method) - p_world_lmk_estimate = gtsam::triangulatePoint3( - lmk_obs.first, K_, - gtsam::Point2Vector(lmk_obs.second.begin(), lmk_obs.second.end())); - - // Optional: perform an explicit cheirality check - bool valid = true; - for (const auto &pose : lmk_obs.first) { - // Transform point to the camera coordinate system. - // transformTo() converts a world point to the camera frame. - gtsam::Point3 p_cam = pose.transformTo(p_world_lmk_estimate); - if (p_cam.z() <= 0) { // Check that the depth is positive - valid = false; - break; - } - } - - if (valid) { - initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate); - } else { - std::cerr << "Triangulated landmark failed cheirality check; keeping initial guess." - << std::endl; - } - } catch (const gtsam::TriangulationCheiralityException &e) { - // Handle the exception gracefully by logging and retaining the previous estimate. - std::cerr << "Triangulation Cheirality Exception: " << e.what() - << ". Keeping initial landmark estimate." << std::endl; - } - } -} - void Backend::solve_graph() { gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_); result_ = optimizer.optimize(); @@ -191,4 +42,105 @@ void Backend::solve_graph(const int num_steps, } result_ = optimizer.values(); } + +gtsam::Rot3 average_rotations(const std::vector& rotations, int max_iter = 10) { + if (rotations.empty()) throw std::runtime_error("No rotations to average"); + if (rotations.size() == 1) return rotations.front(); + + gtsam::Rot3 mean = rotations[0]; + + for (int iter = 0; iter < max_iter; ++iter) { + Eigen::Vector3d total = Eigen::Vector3d::Zero(); + for (const gtsam::Rot3& R : rotations) { + gtsam::Rot3 delta = mean.between(R); // R_mean^T * R + total += gtsam::Rot3::Logmap(delta); // Lie algebra element in R³ + } + total /= static_cast(rotations.size()); + mean = mean.compose(gtsam::Rot3::Expmap(total)); // update estimate + } + + return mean; +} + +void Backend::populate_rotation_estimate(std::vector& frames) { + struct FrameVertex { + Frame& frame; + std::unordered_map + neighbor_id_to_rot; // rot is frame_from_neighbor_frame + }; + std::unordered_map frame_tree; + // populate all frames and add children to neighbors + for (Frame& frame : frames) { + FrameVertex frame_vertex{frame}; + for (const auto [other_frame_id, frame_from_other_frame] : frame.frame_from_other_frames_) { + frame_vertex.neighbor_id_to_rot.emplace(other_frame_id, frame_from_other_frame); + } + frame_tree.emplace(frame.id_, frame_vertex); + } + // add parents to neighbors + for (const Frame& frame : frames) { + for (const) } + gtsam::NonlinearFactorGraph graph; + gtsam::Values initial; + // TODO: somehow get more informative/more robust noise values + // maybe: monte carlo sampling + reprojection, use essential matrix to guide covariance strength + gtsam::noiseModel::Diagonal::shared_ptr noise = + gtsam::noiseModel::Isotropic::Sigma(3, 0.1); // radians + + std::unordered_set frames_seen; + for (size_t i = 0; i < frames.size(); i++) { + const Frame& frame = frames[i]; + if (i == 0) { + initial.insert(gtsam::Symbol(symbol_char_rotation, i), + frame.world_from_cam_initial_guess_ + ? *frame.world_from_cam_initial_guess_ + : gtsam::Rot3::Identity()); + } + std::vector candidates_world_from_frame; + for (const auto& [other_frame_id, frame_from_other_frame] : + frame.frame_from_other_frames_) { + if (frames_seen.find(other_frame_id) == frames_seen.end()) { + candidates_world_from_frame.push_back(frame_from_other_frame); + } + } + initial.insert(gtsam::Symbol(symbol_char_rotation, frame.id_), + average_rotations(candidates_world_from_frame)); + frames_seen.insert(frame.id_); + } + + // Assume we have relative rotations Rij between node i and j + std::vector> edges = {{0, 1}, {1, 2}, {2, 3}, {3, 4}}; + std::vector relativeRotations = { + Rot3::RzRyRx(0.25, 0.0, 0.0), Rot3::RzRyRx(0.25, 0.0, 0.0), Rot3::RzRyRx(0.3, 0.0, 0.0), + Rot3::RzRyRx(0.2, 0.0, 0.0)}; + + // Add BetweenFactors for relative rotations + for (size_t k = 0; k < edges.size(); ++k) { + size_t i = edges[k].first; + size_t j = edges[k].second; + graph.emplace_shared>( + gtsam::Symbol(Backend::symbol_char_rotation, i), + gtsam::Symbol(Backend::symbol_char_rotation, j), relativeRotations[k], noise); + } + + // Fix the first rotation + initial.insert(gtsam::Symbol(Backend::symbol_char_rotation, 0), gtsam::Rot3::Identity()); + + // Initialize other rotations naively + for (size_t i = 1; i <= 4; ++i) { + initial.insert(gtsam::Symbol(Backend::symbol_char_rotation, i), + gtsam::Rot3::RzRyRx(0.0, 0.0, 0.0)); + } + + // Optimize + gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial); + gtsam::Values result = optimizer.optimize(); + + // Output results + for (size_t i = 0; i <= 4; ++i) { + gtsam::Rot3 Ri = result.at(gtsam::Symbol(Backend::symbol_char_rotation, i)); + std::cout << "Rotation " << i << ": " << Ri.rpy().transpose() << " (roll pitch yaw radians)" + << std::endl; + } +} } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/backend.hh b/experimental/learn_descriptors/backend.hh index 4a43ad1ec..c042de3cb 100644 --- a/experimental/learn_descriptors/backend.hh +++ b/experimental/learn_descriptors/backend.hh @@ -6,7 +6,7 @@ #include #include "common/geometry/camera.hh" -#include "experimental/learn_descriptors/feature_manager.hh" +#include "experimental/learn_descriptors/frame.hh" #include "gtsam/geometry/Cal3_S2.h" #include "gtsam/geometry/Point2.h" #include "gtsam/geometry/Point3.h" @@ -20,49 +20,12 @@ namespace robot::experimental::learn_descriptors { class Backend { public: - static constexpr char pose_symbol_char = 'x'; - static constexpr char pose_rot_symbol_char = 'r'; - static constexpr char pose_translation_symbol_char = 't'; - static constexpr char pose_bearing_symbol_char = 'b'; - static constexpr char landmark_symbol_char = 'l'; - static constexpr char camera_symbol_char = 'k'; - - struct Landmark { - Landmark(const gtsam::Symbol &lmk_factor_symbol, const gtsam::Symbol &cam_pose_symbol, - const gtsam::Point2 &projection, const gtsam::Cal3_S2 K, - float initial_depth_guess = 5.0) - : lmk_factor_symbol(lmk_factor_symbol), - cam_pose_symbol(cam_pose_symbol), - projection(projection), - p_cam_lmk_guess(robot::geometry::deproject(robot::geometry::get_intrinsic_matrix(K), - projection, initial_depth_guess)){}; - const gtsam::Symbol lmk_factor_symbol; - const gtsam::Symbol cam_pose_symbol; - const gtsam::Point2 projection; - const gtsam::Point3 p_cam_lmk_guess; - }; - - Backend(std::shared_ptr feature_manager = nullptr); - Backend(std::shared_ptr feature_manager, gtsam::Cal3_S2 K); - Backend(gtsam::Cal3_S2::shared_ptr K) : K_(K){}; - ~Backend(){}; - - template - void add_prior_factor(const gtsam::Symbol &symbol, const T &value, - const gtsam::SharedNoiseModel &model); - - template - void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, - const T &value, const gtsam::SharedNoiseModel &model); - - void add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &p_world_gps, - const gtsam::SharedNoiseModel &model, - const gtsam::Rot3 &R_world_cam = gtsam::Rot3::Identity()); - - std::pair, std::vector> get_obs_for_lmk( - const gtsam::Symbol &lmk_symbol); - void add_landmarks(const std::vector &landmarks); - void add_landmark(const Landmark &landmark); + static constexpr char symbol_char_pose = 'x'; + static constexpr char symbol_char_rotation = 'r'; + static constexpr char symbol_char_translation = 't'; + static constexpr char symbol_char_bearing = 'b'; + static constexpr char symbol_char_landmark = 'l'; + static constexpr char symbol_char_cam_cal = 'k'; void solve_graph(); typedef int epoch; @@ -70,46 +33,19 @@ class Backend { void solve_graph(const int num_steps, std::optional inter_debug_func = std::nullopt); - const gtsam::Values &get_current_initial_values() const { return initial_estimate_; }; - const gtsam::Values &get_result() const { return result_; }; - const gtsam::Cal3_S2 &get_K() const { return *K_; }; + static gtsam::Rot3 average_rotations(const std::vector &rotations, + int max_iter = 10); + // use rotation averaging to set the rotation initial guess for each frame in the world frame. + // the world frame will be the first frame in the vector + static void populate_rotation_estimate(std::vector &frames); - const gtsam::SharedNoiseModel get_lmk_noise() { return landmark_noise_; }; - const gtsam::SharedNoiseModel get_pose_noise() { return pose_noise_; }; - const gtsam::SharedNoiseModel get_translation_noise() { return translation_noise_; }; - const gtsam::SharedNoiseModel get_gps_noise() { return gps_noise_; }; + const gtsam::Values ¤t_initial_values() const { return initial_estimate_; }; + const gtsam::Values &result() const { return result_; }; private: - std::shared_ptr feature_manager_; - gtsam::Cal3_S2::shared_ptr K_; - gtsam::Values initial_estimate_; gtsam::Values result_; gtsam::NonlinearFactorGraph graph_; - - gtsam::noiseModel::Isotropic::shared_ptr landmark_noise_ = - gtsam::noiseModel::Isotropic::Sigma(2, 1.0); - gtsam::noiseModel::Diagonal::shared_ptr pose_noise_ = - gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01)); - gtsam::noiseModel::Isotropic::shared_ptr translation_noise_ = - gtsam::noiseModel::Isotropic::Sigma(2, 0.1); - gtsam::noiseModel::Isotropic::shared_ptr gps_noise_ = - gtsam::noiseModel::Isotropic::Sigma(3, 2.); }; - -template <> -void Backend::add_prior_factor(const gtsam::Symbol &, const gtsam::Pose3 &, - const gtsam::SharedNoiseModel &); - -template <> -void Backend::add_prior_factor(const gtsam::Symbol &, const gtsam::Point3 &, - const gtsam::SharedNoiseModel &); - -template <> -void Backend::add_between_factor(const gtsam::Symbol &, const gtsam::Symbol &, - const gtsam::Pose3 &, - const gtsam::SharedNoiseModel &); -template <> -void Backend::add_between_factor(const gtsam::Symbol &, const gtsam::Symbol &, - const gtsam::Rot3 &, const gtsam::SharedNoiseModel &); +using Landmark = gtsam::Point3; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/backend_test.cc b/experimental/learn_descriptors/backend_test.cc index d294fa15e..d1cbec44e 100644 --- a/experimental/learn_descriptors/backend_test.cc +++ b/experimental/learn_descriptors/backend_test.cc @@ -31,10 +31,10 @@ TEST(BackendTest, cube) { // camera_poses.emplace_back(T_world_cam.pose().matrix()); // } - // backend.add_prior_factor(gtsam::Symbol(backend.camera_symbol_char, 0), camera_poses.front(), + // backend.add_prior_factor(gtsam::Symbol(backend.symbol_char_cam_cal, 0), camera_poses.front(), // backend.get_pose_noise()); // for (size_t i = 1; i < camera_poses.size(); i++) { - // backend.add_factor_GPS(gtsam::Symbol(backend.camera_symbol_char, i), + // backend.add_factor_GPS(gtsam::Symbol(backend.symbol_char_cam_cal, i), // camera_poses[i].translation(), backend.get_gps_noise()); // std::pair, cv::Size> cam_and_img_sizes[2] = { diff --git a/experimental/learn_descriptors/frame.hh b/experimental/learn_descriptors/frame.hh index 70be07d27..b6eb43a47 100644 --- a/experimental/learn_descriptors/frame.hh +++ b/experimental/learn_descriptors/frame.hh @@ -1,5 +1,6 @@ #pragma once #include +#include #include "experimental/learn_descriptors/frontend_definitions.hh" #include "gtsam/base/Matrix.h" @@ -30,6 +31,8 @@ class Frame { gtsam::Cal3_S2::shared_ptr K_; KeypointsCV kpts_; cv::Mat descriptors_; + std::unordered_map + frame_from_other_frames_; // map of relative rotation for this_frame_from_frame_[FrameId] std::optional world_from_cam_groundtruth_; std::optional cam_in_world_initial_guess_; std::optional world_from_cam_initial_guess_; diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc index a42a819bd..1c527f5f2 100644 --- a/experimental/learn_descriptors/frontend.cc +++ b/experimental/learn_descriptors/frontend.cc @@ -248,10 +248,8 @@ void Frontend::match_frames_and_build_tracks() { // also, at the moment I am not accounting for any covariance between the gps // measurement and the unit translation vector from estimate_cam0_from_cam1 - // can try averaging poses here as well - frames_[j].world_from_cam_initial_guess_.emplace( - frames_[i].world_from_cam_initial_guess_->matrix() * - scale_cam0_from_cam1->linear().matrix()); + frames_[i].frame_from_other_frames_.emplace( + j, gtsam::Rot3(scale_cam0_from_cam1->linear().matrix())); for (const cv::DMatch match : matches) { const KeypointCV kpt_cam0 = frames_[i].keypoint()[match.queryIdx]; const KeypointCV kpt_cam1 = frames_[j].keypoint()[match.trainIdx]; diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index 9ed6f6e5f..c0c39d2a8 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -51,7 +51,7 @@ StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extrac } void StructureFromMotion::set_initial_pose(gtsam::Pose3 initial_pose) { - backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, 0), initial_pose, + backend_.add_prior_factor(gtsam::Symbol(Backend::symbol_char_pose, 0), initial_pose, gtsam::noiseModel::Isotropic::Sigma(6, 0)); } @@ -71,8 +71,8 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo compute_matches(img_keypoints_and_descriptors_.back().second, keypoints_and_descriptors.second, Frontend::enforce_bijective_matches); - const gtsam::Symbol sym_T_w_c0(Backend::pose_symbol_char, idx_img_current - 1); - const gtsam::Symbol sym_T_w_c1(Backend::pose_symbol_char, idx_img_current); + const gtsam::Symbol sym_T_w_c0(Backend::symbol_char_pose, idx_img_current - 1); + const gtsam::Symbol sym_T_w_c1(Backend::symbol_char_pose, idx_img_current); backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise(), T_world_cam.rotation()); @@ -91,7 +91,7 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo chr = (*maybe_symbol0).chr(); } else { gtsam::Symbol symbol_temp = - gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_); + gtsam::Symbol(Backend::symbol_char_landmark, landmark_count_); idx = symbol_temp.index(); chr = symbol_temp.chr(); landmark_count_++; @@ -142,7 +142,7 @@ void StructureFromMotion::graph_values(const gtsam::Values &values, std::vector final_lmks; for (size_t i = 0; i < feature_manager_->get_num_images_added(); i++) { final_poses.emplace_back( - values.at(gtsam::Symbol(get_backend().pose_symbol_char, i)).matrix()); + values.at(gtsam::Symbol(get_backend().symbol_char_pose, i)).matrix()); } // for (const gtsam::Symbol &lmk_symbol : feature_manager_->get_added_symbols()) { // if (!values.exists(lmk_symbol)) { From 7ef8d0046793413a8c6a24e163aa9dbcf67fb5a1 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Thu, 17 Jul 2025 17:01:26 -0400 Subject: [PATCH 34/45] WIP --- experimental/learn_descriptors/BUILD | 10 +- experimental/learn_descriptors/backend.cc | 112 +++++++++--------- .../learn_descriptors/backend_test.cc | 6 +- .../four_seasons_parser_example_viz.cc | 9 +- experimental/learn_descriptors/frontend.cc | 4 +- experimental/learn_descriptors/frontend.hh | 2 +- .../image_point_four_seasons.hh | 14 +++ .../learn_descriptors/incremental_sfm_mvp.cc | 47 ++++---- .../learn_descriptors/refactor_sfm_mvp.cc | 33 +++--- .../learn_descriptors/spatial_test_scene.cc | 105 ++++++++++++---- .../learn_descriptors/spatial_test_scene.hh | 72 +++++++++-- .../spatial_test_scene_cube.hh | 16 +-- .../spatial_test_scene_cube_example.cc | 48 +++++--- visualization/opencv/opencv_viz.cc | 13 +- visualization/opencv/opencv_viz.hh | 4 +- visualization/opencv/opencv_viz_test.cc | 4 +- 16 files changed, 327 insertions(+), 172 deletions(-) diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index 06e1d6b5e..f6bd3d71d 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -122,6 +122,7 @@ cc_library( ":four_seasons_transforms", "//common/liegroups:se3", "//common/gps:frame_translation", + "//common:check", "@eigen", "@opencv", "@geographiclib", @@ -310,6 +311,7 @@ cc_library( "@eigen", "@gtsam", "@opencv", + "//common:check" ], ) @@ -329,8 +331,9 @@ cc_binary( deps = [ ":spatial_test_scene_cube", "//visualization/opencv:opencv_viz", - "@com_google_googletest//:gtest_main", + "//common:check", "@eigen", + "@gtsam" ], ) @@ -450,13 +453,14 @@ cc_binary( "-Wno-error=unused-function", ], deps = [ + ":backend", ":four_seasons_parser", ":frame", ":frontend", - ":spatial_test_scene_cube", - ":structure_from_motion_types", ":image_point", ":image_point_four_seasons", + ":spatial_test_scene_cube", + ":structure_from_motion_types", "//common/geometry:camera", "//visualization/opencv:opencv_viz", "@boost//:smart_ptr", diff --git a/experimental/learn_descriptors/backend.cc b/experimental/learn_descriptors/backend.cc index 1b3559d0c..03cfd3622 100644 --- a/experimental/learn_descriptors/backend.cc +++ b/experimental/learn_descriptors/backend.cc @@ -1,12 +1,15 @@ #include "experimental/learn_descriptors/backend.hh" +#include +#include +#include #include #include "boost/make_shared.hpp" -#include "experimental/learn_descriptors/feature_manager.hh" #include "experimental/learn_descriptors/frame.hh" #include "experimental/learn_descriptors/structure_from_motion_types.hh" #include "gtsam/geometry/triangulation.h" +#include "gtsam/linear/NoiseModel.h" #include "gtsam/navigation/GPSFactor.h" #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" #include "gtsam/slam/BetweenFactor.h" @@ -63,84 +66,81 @@ gtsam::Rot3 average_rotations(const std::vector& rotations, int max } void Backend::populate_rotation_estimate(std::vector& frames) { + struct FrameVertex; + using SharedFrameVertex = std::shared_ptr; struct FrameVertex { + // std::unique_ptr frame; Frame& frame; + std::optional w_from_this; std::unordered_map neighbor_id_to_rot; // rot is frame_from_neighbor_frame + std::unordered_set parents; + std::unordered_set children; }; - std::unordered_map frame_tree; - // populate all frames and add children to neighbors + std::unordered_map frame_tree; for (Frame& frame : frames) { - FrameVertex frame_vertex{frame}; - for (const auto [other_frame_id, frame_from_other_frame] : frame.frame_from_other_frames_) { - frame_vertex.neighbor_id_to_rot.emplace(other_frame_id, frame_from_other_frame); - } - frame_tree.emplace(frame.id_, frame_vertex); + frame_tree.emplace(frame.id_, std::make_shared(frame)); } - // add parents to neighbors - for (const Frame& frame : frames) { - for (const) } + // populate all frames and add children to neighbors + std::queue q; // indices of Frame in frames + std::unordered_set seen_frame_id; + q.push(frames.front().id_); + gtsam::NonlinearFactorGraph graph; gtsam::Values initial; // TODO: somehow get more informative/more robust noise values // maybe: monte carlo sampling + reprojection, use essential matrix to guide covariance strength - gtsam::noiseModel::Diagonal::shared_ptr noise = + gtsam::noiseModel::Diagonal::shared_ptr noise_rotation = gtsam::noiseModel::Isotropic::Sigma(3, 0.1); // radians - std::unordered_set frames_seen; - for (size_t i = 0; i < frames.size(); i++) { - const Frame& frame = frames[i]; - if (i == 0) { - initial.insert(gtsam::Symbol(symbol_char_rotation, i), - frame.world_from_cam_initial_guess_ - ? *frame.world_from_cam_initial_guess_ - : gtsam::Rot3::Identity()); + std::vector symbols_frames; + while (!q.empty()) { + Frame frame = frames[q.front()]; + q.pop(); + seen_frame_id.insert(frame.id_); + SharedFrameVertex frame_vertex = frame_tree.at(frame.id_); + if (seen_frame_id.size() == 1) { // make the first frame rotation identity + frame_vertex->w_from_this = frame_vertex->frame.world_from_cam_initial_guess_ + ? *frame_vertex->frame.world_from_cam_initial_guess_ + : gtsam::Rot3::Identity(); } - std::vector candidates_world_from_frame; + frame_vertex->frame.world_from_cam_initial_guess_ = frame_vertex->w_from_this; + const gtsam::Symbol symbol_frame(symbol_char_rotation, frame.id_); + symbols_frames.push_back(symbol_frame); + initial.insert(symbol_frame, *frame.world_from_cam_initial_guess_); for (const auto& [other_frame_id, frame_from_other_frame] : frame.frame_from_other_frames_) { - if (frames_seen.find(other_frame_id) == frames_seen.end()) { - candidates_world_from_frame.push_back(frame_from_other_frame); + if (seen_frame_id.find(other_frame_id) == seen_frame_id.end()) { + q.push(other_frame_id); + seen_frame_id.insert(other_frame_id); + } + frame_vertex->neighbor_id_to_rot.emplace(other_frame_id, frame_from_other_frame); + SharedFrameVertex other_frame_vertex = frame_tree.at(other_frame_id); + frame_vertex->children.insert(other_frame_vertex); + other_frame_vertex->parents.insert(frame_vertex); + other_frame_vertex->neighbor_id_to_rot.emplace(frame.id_, + frame_from_other_frame.inverse()); + if (!other_frame_vertex->w_from_this) { + other_frame_vertex->w_from_this = + *frame_vertex->w_from_this * frame_from_other_frame; } - } - initial.insert(gtsam::Symbol(symbol_char_rotation, frame.id_), - average_rotations(candidates_world_from_frame)); - frames_seen.insert(frame.id_); - } - - // Assume we have relative rotations Rij between node i and j - std::vector> edges = {{0, 1}, {1, 2}, {2, 3}, {3, 4}}; - std::vector relativeRotations = { - Rot3::RzRyRx(0.25, 0.0, 0.0), Rot3::RzRyRx(0.25, 0.0, 0.0), Rot3::RzRyRx(0.3, 0.0, 0.0), - Rot3::RzRyRx(0.2, 0.0, 0.0)}; - - // Add BetweenFactors for relative rotations - for (size_t k = 0; k < edges.size(); ++k) { - size_t i = edges[k].first; - size_t j = edges[k].second; - graph.emplace_shared>( - gtsam::Symbol(Backend::symbol_char_rotation, i), - gtsam::Symbol(Backend::symbol_char_rotation, j), relativeRotations[k], noise); - } - - // Fix the first rotation - initial.insert(gtsam::Symbol(Backend::symbol_char_rotation, 0), gtsam::Rot3::Identity()); - // Initialize other rotations naively - for (size_t i = 1; i <= 4; ++i) { - initial.insert(gtsam::Symbol(Backend::symbol_char_rotation, i), - gtsam::Rot3::RzRyRx(0.0, 0.0, 0.0)); + graph.add(gtsam::BetweenFactor( + symbol_frame, gtsam::Symbol(symbol_char_rotation, other_frame_id), + frame_from_other_frame.inverse(), noise_rotation)); + } + frame_tree.emplace(frame.id_, frame_vertex); } - // Optimize gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial); gtsam::Values result = optimizer.optimize(); - // Output results - for (size_t i = 0; i <= 4; ++i) { - gtsam::Rot3 Ri = result.at(gtsam::Symbol(Backend::symbol_char_rotation, i)); - std::cout << "Rotation " << i << ": " << Ri.rpy().transpose() << " (roll pitch yaw radians)" - << std::endl; + // Output results and assign results to frames + for (const gtsam::Symbol& symbol_rotation : symbols_frames) { + gtsam::Rot3 w_from_frame = result.at(symbol_rotation); + frame_tree.at(symbol_rotation.index())->frame.world_from_cam_initial_guess_ = w_from_frame; + // std::cout << "Rotation " << symbol_rotation.string() << ": " + // << w_from_frame.rpy().transpose() << " (roll pitch yaw radians)" << std::endl; } } } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/backend_test.cc b/experimental/learn_descriptors/backend_test.cc index d1cbec44e..48fe2c450 100644 --- a/experimental/learn_descriptors/backend_test.cc +++ b/experimental/learn_descriptors/backend_test.cc @@ -24,7 +24,7 @@ TEST(BackendTest, cube) { // Backend backend(K); // test_scene.add_rand_cameras_face_origin(7, 2.0, 6.0, *K); - // std::vector> cameras = test_scene.get_cameras(); + // std::vector> cameras = test_scene.cameras()(); // std::vector camera_poses; // camera_poses.reserve(cameras.size()); // for (const gtsam::PinholeCamera &T_world_cam : cameras) { @@ -49,6 +49,8 @@ TEST(BackendTest, cube) { // } // } - // geometry::viz_scene(camera_poses, test_scene.get_points()); + // geometry::viz_scene(camera_poses, test_scene.points()); } + +TEST(BackendTest, test_rotation_estimation) {} } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc index d9a6b89f1..3b85aa540 100644 --- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc +++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc @@ -82,8 +82,9 @@ int main(int argc, const char** argv) { ROBOT_CHECK(parser.num_images() != 0); - std::vector viz_frames; // camera frames from visual world frame - std::vector viz_points; // camera frames from visual world frame + std::vector viz_frames; // camera frames from visual world frame + std::vector + viz_points; // camera frames from visual world frame Eigen::Isometry3d scale_mat = Eigen::Isometry3d::Identity(); std::cout << "gnss scale: " << parser.gnss_scale() << std::endl; @@ -192,6 +193,6 @@ int main(int argc, const char** argv) { std::cout << "max delta: " << max_delta << std::endl; std::cout << "\ngot " << viz_frames.size() << " poses" << std::endl; std::cout << "got " << viz_points.size() << " points" << std::endl; - robot::geometry::viz_scene(viz_frames, viz_points, cv::viz::Color::brown(), true, true, - "Viz Trajectory + GPS Points"); + robot::visualization::viz_scene(viz_frames, viz_points, cv::viz::Color::brown(), true, true, + "Viz Trajectory + GPS Points"); } diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc index 1c527f5f2..4b1376dd5 100644 --- a/experimental/learn_descriptors/frontend.cc +++ b/experimental/learn_descriptors/frontend.cc @@ -241,8 +241,8 @@ void Frontend::match_frames_and_build_tracks() { if (!scale_cam0_from_cam1) { continue; } - ROBOT_CHECK(frames_[i].world_from_cam_initial_guess_, - "This rotation should be populated."); + // ROBOT_CHECK(frames_[i].world_from_cam_initial_guess_, + // "This rotation should be populated."); // this could use some work to verify the quality of the output, particularly inside // of estiamte_cam0_from_cam1 // also, at the moment I am not accounting for any covariance between the gps diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh index a9e1a01c2..352330e3e 100644 --- a/experimental/learn_descriptors/frontend.hh +++ b/experimental/learn_descriptors/frontend.hh @@ -44,7 +44,7 @@ class Frontend { const FrontendParams::MatcherType &matcher_type() const { return params_.matcher_type; }; const FeatureTracks &feature_tracks() const { return feature_tracks_; }; const FrameLandmarkIdMap &frame_landmark_id_map() const { return lmk_id_map_; }; - const std::vector &frames() const { return frames_; }; + std::vector &frames() { return frames_; }; std::pair, cv::Mat> extract_features(const cv::Mat &img) const; std::vector compute_matches(const cv::Mat &descriptors1, diff --git a/experimental/learn_descriptors/image_point_four_seasons.hh b/experimental/learn_descriptors/image_point_four_seasons.hh index a0c337c56..645a2571a 100644 --- a/experimental/learn_descriptors/image_point_four_seasons.hh +++ b/experimental/learn_descriptors/image_point_four_seasons.hh @@ -9,6 +9,7 @@ #include "Eigen/Core" #include "Eigen/Geometry" +#include "common/check.hh" #include "common/liegroups/se3.hh" #include "experimental/learn_descriptors/four_seasons_transforms.hh" #include "experimental/learn_descriptors/gps_data.hh" @@ -29,6 +30,19 @@ struct ImagePointFourSeasons : ImagePoint { std::optional gps_gcs; // raw gps measurement in gcs (global cordinate system) std::shared_ptr shared_static_transforms; + std::optional velocity_to(const ImagePointFourSeasons& other_img_pt) const { + ROBOT_CHECK(other_img_pt.seq >= seq); + if (!other_img_pt.cam_in_world() || !cam_in_world()) return std::nullopt; + double dt_seconds = static_cast(other_img_pt.seq - seq) * 1e-9; + return (*other_img_pt.cam_in_world() - *cam_in_world()) / dt_seconds; + }; + std::optional velocity_from(const ImagePointFourSeasons& other_img_pt) const { + ROBOT_CHECK(seq >= other_img_pt.seq); + if (!other_img_pt.cam_in_world() || !cam_in_world()) return std::nullopt; + double dt_seconds = static_cast(seq - other_img_pt.seq) * 1e-9; + return (*cam_in_world() - *other_img_pt.cam_in_world()) / dt_seconds; + }; + std::optional world_from_cam_ground_truth() const override; std::optional cam_in_world() const override; std::optional translation_covariance_in_cam() const override; diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc index bafd667b5..b3b8a17e6 100644 --- a/experimental/learn_descriptors/incremental_sfm_mvp.cc +++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc @@ -140,8 +140,8 @@ std::optional attempt_triangulate(const std::vector void graph_values(const gtsam::Values &values, const std::string &window_name, const std::vector &symbols_pose, const std::vector &symbols_landmarks) { - std::vector final_poses; - std::vector final_lmks; + std::vector final_poses; + std::vector final_lmks; for (const gtsam::Symbol &symbol_pose : symbols_pose) { final_poses.emplace_back(Eigen::Isometry3d(values.at(symbol_pose).matrix()), symbol_pose.string()); @@ -153,8 +153,8 @@ void graph_values(const gtsam::Values &values, const std::string &window_name, final_lmks.emplace_back(values.at(symbol_lmk), symbol_lmk.string()); } std::cout << "About to viz gtsam::Values with " << values.size() << " variables." << std::endl; - robot::geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true, - window_name); + robot::visualization::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true, + window_name); } gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gtsam::Values &values, @@ -338,7 +338,7 @@ int main(int argc, const char **argv) { id_to_initial_world_from_cam; // these are for initial guesses!! Eigen::Vector3d t_first_cam_in_world; // to create our own world std::vector references_world_from_cam; - std::vector viz_references_world_from_cam; + std::vector viz_references_world_from_cam; Eigen::Matrix4d scale_mat_reference = Eigen::Matrix4d::Identity(); scale_mat_reference(0, 0) = scale_mat_reference(1, 1) = scale_mat_reference(2, 2) = parser.gnss_scale(); @@ -420,9 +420,9 @@ int main(int argc, const char **argv) { id++; } if (visualize) - robot::geometry::viz_scene(viz_references_world_from_cam, - std::vector(), - cv::viz::Color::brown(), true, true, "References"); + robot::visualization::viz_scene(viz_references_world_from_cam, + std::vector(), + cv::viz::Color::brown(), true, true, "References"); // populate feature_tracks and lmk_id_map // TODO: "smart" matching, using initial poses to filter which pairs make sense to compute @@ -514,7 +514,7 @@ int main(int argc, const char **argv) { } } } - std::vector viz_world_from_cam_init; + std::vector viz_world_from_cam_init; for (const auto &[id, world_from_cam] : id_to_initial_world_from_cam) { viz_world_from_cam_init.emplace_back(Eigen::Isometry3d(world_from_cam.matrix()), std::to_string(id)); @@ -522,14 +522,14 @@ int main(int argc, const char **argv) { // std::cout << "pre heartbeat" << std::endl; if (visualize) - robot::geometry::viz_scene(viz_world_from_cam_init, - std::vector(), - cv::viz::Color::brown(), true, true, "world_from_cam_estimates"); + robot::visualization::viz_scene( + viz_world_from_cam_init, std::vector(), + cv::viz::Color::brown(), true, true, "world_from_cam_estimates"); // std::cout << "heartbeat" << std::endl; // TRIANGULATE all of the points (for initial guess) std::unordered_map lmk_triangulated_map; // points are kpt_in_world - std::vector triangulated_lmks_viz; + std::vector triangulated_lmks_viz; std::vector triangulated_lmks; for (const std::pair lmk_feat : feature_tracks) { std::vector world_from_cams; @@ -556,9 +556,9 @@ int main(int argc, const char **argv) { } std::cout << "there are " << triangulated_lmks.size() << " triangulated lmks" << std::endl; if (visualize) - robot::geometry::viz_scene(viz_references_world_from_cam, triangulated_lmks_viz, - cv::viz::Color::brown(), true, true, - "Unfiltered, triangulated points"); + robot::visualization::viz_scene(viz_references_world_from_cam, triangulated_lmks_viz, + cv::viz::Color::brown(), true, true, + "Unfiltered, triangulated points"); // filter points via variance // TODO: filter based on quality and/or quantity of matches @@ -580,9 +580,9 @@ int main(int argc, const char **argv) { std::cout << "filtered variance " << detail_sfm::get_variance(filtered_points) << std::endl; std::cout << "number of filtered points: " << lmk_triangulated_map_filtered.size() << std::endl; if (visualize) - robot::geometry::viz_scene(std::vector(), filtered_points, - cv::viz::Color::brown(), true, true, - "Filtered, triangulated points"); + robot::visualization::viz_scene(std::vector(), filtered_points, + cv::viz::Color::brown(), true, true, + "Filtered, triangulated points"); // ############# BACKEND ############### @@ -669,10 +669,10 @@ int main(int argc, const char **argv) { std::vector world_from_cams{id_to_initial_world_from_cam.at(i), id_to_initial_world_from_cam.at(i + 1)}; - std::vector viz_world_from_cams{ + std::vector viz_world_from_cams{ {Eigen::Isometry3d(world_from_cams[0].matrix()), "cam 0"}, {Eigen::Isometry3d(world_from_cams[1].matrix()), "cam 1"}}; - std::vector viz_lmks; + std::vector viz_lmks; for (const cv::DMatch match : matches) { std::vector feat_kpts; const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx]; @@ -735,8 +735,9 @@ int main(int argc, const char **argv) { } std::cout << "setup complete!" << std::endl; if (visualize) - robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, cv::viz::Color::brown(), - true, true, "Local Optimization " + std::to_string(i)); + robot::visualization::viz_scene(viz_world_from_cams, viz_lmks, + cv::viz::Color::brown(), true, true, + "Local Optimization " + std::to_string(i)); const gtsam::Values symbols_result_local = detail_sfm::optimize_graph( local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false); diff --git a/experimental/learn_descriptors/refactor_sfm_mvp.cc b/experimental/learn_descriptors/refactor_sfm_mvp.cc index c2270e69a..4071faac6 100644 --- a/experimental/learn_descriptors/refactor_sfm_mvp.cc +++ b/experimental/learn_descriptors/refactor_sfm_mvp.cc @@ -12,6 +12,7 @@ #include "Eigen/Geometry" #include "common/geometry/camera.hh" #include "cxxopts.hpp" +#include "experimental/learn_descriptors/backend.hh" #include "experimental/learn_descriptors/camera_calibration.hh" #include "experimental/learn_descriptors/four_seasons_parser.hh" #include "experimental/learn_descriptors/frame.hh" @@ -102,8 +103,8 @@ std::optional attempt_triangulate(const std::vector void graph_values(const gtsam::Values &values, const std::string &window_name, const std::vector &symbols_pose, const std::vector &symbols_landmarks) { - std::vector final_poses; - std::vector final_lmks; + std::vector final_poses; + std::vector final_lmks; for (const gtsam::Symbol &symbol_pose : symbols_pose) { final_poses.emplace_back(Eigen::Isometry3d(values.at(symbol_pose).matrix()), symbol_pose.string()); @@ -115,8 +116,8 @@ void graph_values(const gtsam::Values &values, const std::string &window_name, final_lmks.emplace_back(values.at(symbol_lmk), symbol_lmk.string()); } std::cout << "About to viz gtsam::Values with " << values.size() << " variables." << std::endl; - robot::geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true, - window_name); + robot::visualization::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true, + window_name); } gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gtsam::Values &values, @@ -204,7 +205,7 @@ int main(int argc, const char **argv) { true, false}; Frontend frontend(params); - for (size_t i = 660; i < 750; i += 5) { + for (size_t i = 581; i < 750; i += 5) { // if (!parser.get_image_point(i).K) std::cout << "UH OH" << std::endl; frontend.add_image( ImageAndPoint{parser.load_image(i), @@ -218,7 +219,7 @@ int main(int argc, const char **argv) { std::cout << "ground truth translation: " << frontend.frames()[0].world_from_cam_groundtruth_->translation() << std::endl; - std::vector viz_poses_init; + std::vector viz_poses_init; std::optional first_point; std::optional first_grnd_trth; (void)first_point; @@ -242,8 +243,9 @@ int main(int argc, const char **argv) { } } std::cout << "visualizing " << viz_poses_init.size() << " poses" << std::endl; - robot::geometry::viz_scene(viz_poses_init, std::vector(), - cv::viz::Color::brown(), true, true, "Frontend initial guesses"); + robot::visualization::viz_scene(viz_poses_init, std::vector(), + cv::viz::Color::brown(), true, true, + "Frontend initial guesses"); // ############# BACKEND ############### gtsam::Values initial_estimate_; @@ -262,14 +264,16 @@ int main(int argc, const char **argv) { gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas_fallback); // add gps factors - const std::vector &frames = frontend.frames(); + std::vector &frames = frontend.frames(); + frames[0].world_from_cam_initial_guess_ = + frames[0].world_from_cam_groundtruth_->rotation(); // make sure first idx has grnd trth + Backend::populate_rotation_estimate(frames); std::vector symbols_pose; std::unordered_map world_from_cam_initial_guess; std::optional cam0_in_w; for (const Frame &frame : frames) { if (!frame.cam_in_world_initial_guess_) continue; if (!cam0_in_w) cam0_in_w = *frame.cam_in_world_initial_guess_; - // should do some interpolation here most likely const gtsam::Pose3 world_from_cam_estimate(frame.world_from_cam_initial_guess_ ? *frame.world_from_cam_initial_guess_ : gtsam::Rot3::Identity(), @@ -286,12 +290,13 @@ int main(int argc, const char **argv) { initial_estimate_.insert(cam_symbol, world_from_cam_estimate); symbols_pose.push_back(cam_symbol); } - std::vector viz_pose; + std::vector viz_pose; for (const auto &[id, pose] : world_from_cam_initial_guess) { viz_pose.emplace_back(Eigen::Isometry3d(pose.matrix()), "x_" + std::to_string(id)); } - robot::geometry::viz_scene(viz_pose, std::vector(), - cv::viz::Color::brown(), true, true, "Initial guesses in backend"); + robot::visualization::viz_scene(viz_pose, std::vector(), + cv::viz::Color::brown(), true, true, + "Initial guesses in backend"); std::cout << "heartbeat pre lmks in graph" << std::endl; @@ -324,7 +329,7 @@ int main(int argc, const char **argv) { // do global optimization const gtsam::Values result = - optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_lmks, 0); + optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_lmks, 10); // // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference // double sum_traj_error = 0; diff --git a/experimental/learn_descriptors/spatial_test_scene.cc b/experimental/learn_descriptors/spatial_test_scene.cc index 9d17d1d20..93168e118 100644 --- a/experimental/learn_descriptors/spatial_test_scene.cc +++ b/experimental/learn_descriptors/spatial_test_scene.cc @@ -2,6 +2,24 @@ #include +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "common/check.hh" +#include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/PinholeCamera.h" +#include "opencv2/opencv.hpp" + +Eigen::Isometry3d isometry_from_vector(const Eigen::VectorXd &pose_vector) { + ROBOT_CHECK(pose_vector.size() == 6); + Eigen::Isometry3d T = Eigen::Isometry3d::Identity(); + if (pose_vector.tail<3>().norm() > 1e-6) { + T.linear() = Eigen::AngleAxisd(pose_vector.tail<3>().norm(), pose_vector.tail<3>()) + .toRotationMatrix(); + } + T.translation() = pose_vector.head<3>(); + return T; +} + namespace robot::experimental::learn_descriptors { template std::vector SpatialTestScene::get_projected_pixels( @@ -38,22 +56,58 @@ SpatialTestScene::get_corresponding_pixels( return corresponding_pixels; }; -void SpatialTestScene::add_point(Eigen::Vector3d point) { points_.push_back(point); }; +void SpatialTestScene::add_point(Eigen::Vector3d point, std::optional point_noise) { + points_groundtruth_.push_back(point); + if (point_noise) { + ROBOT_CHECK(point_noise->dim() == 3); + points_.push_back(point + point_noise->sample()); + } else { + points_.push_back(point); + } +}; -void SpatialTestScene::add_points(std::vector points) { - points_.insert(points_.end(), points.begin(), points.end()); +void SpatialTestScene::add_points(std::vector points, + std::optional point_noise) { + for (const Eigen::Vector3d &point : points) { + points_groundtruth_.push_back(point); + if (point_noise) { + ROBOT_CHECK(point_noise->dim() == 3); + points_.push_back(point + point_noise->sample()); + } else { + points_.push_back(point); + } + } } -void SpatialTestScene::add_camera(gtsam::PinholeCamera camera) { - cameras_.push_back(camera); +void SpatialTestScene::add_camera(gtsam::PinholeCamera camera, + std::optional pose_noise) { + cameras_groundtruth_.push_back(camera); + if (pose_noise) { + cameras_.push_back(gtsam::PinholeCamera( + camera.pose() * gtsam::Pose3(isometry_from_vector(pose_noise->sample()).matrix()), + camera.calibration())); + } else { + cameras_.push_back(camera); + } } -void SpatialTestScene::add_cameras(std::vector> cameras) { - cameras_.insert(cameras_.end(), cameras.begin(), cameras.end()); +void SpatialTestScene::add_cameras(std::vector> cameras, + std::optional pose_noise) { + for (const gtsam::PinholeCamera &cam : cameras) { + cameras_groundtruth_.push_back(cam); + if (pose_noise) { + cameras_.push_back(gtsam::PinholeCamera( + cam.pose() * gtsam::Pose3(isometry_from_vector(pose_noise->sample()).matrix()), + cam.calibration())); + } else { + cameras_.push_back(cam); + } + } } void SpatialTestScene::add_rand_cameras_face_origin(int num_cameras, double min_radius_origin, double max_radius_origin, + std::optional pose_noise, const gtsam::Cal3_S2 &K) { std::random_device rd; std::mt19937 gen(rd()); @@ -61,22 +115,33 @@ void SpatialTestScene::add_rand_cameras_face_origin(int num_cameras, double min_ std::uniform_real_distribution dist_radius(min_radius_origin, max_radius_origin); std::uniform_real_distribution dist_omega(0.0, 2 * M_PI); + const Eigen::Isometry3d world_from_cam_default = + Eigen::Translation3d(1.0, 0, 0) * Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d::UnitY()) * + Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d::UnitZ()); for (int i = 0; i < num_cameras; i++) { - double angle_z = dist_radius(gen); - Eigen::Isometry3d R_z; - R_z.linear() = Eigen::AngleAxisd(angle_z, Eigen::Vector3d::UnitZ()).matrix(); - double angle_x = dist_radius(gen); - Eigen::Isometry3d R_y; - R_y.linear() = Eigen::AngleAxisd(angle_x, Eigen::Vector3d::UnitY()).matrix(); + double angle_z = dist_omega(gen); + Eigen::Isometry3d longitude_from_world = Eigen::Isometry3d::Identity(); + longitude_from_world.linear() = + Eigen::AngleAxisd(angle_z, Eigen::Vector3d::UnitZ()).matrix(); + double angle_x = dist_omega(gen); + Eigen::Isometry3d latitude_from_world = Eigen::Isometry3d::Identity(); + latitude_from_world.linear() = + Eigen::AngleAxisd(angle_x, Eigen::Vector3d::UnitY()).matrix(); double radius = dist_radius(gen); - Eigen::Isometry3d T_world_cam; - T_world_cam.translation() = Eigen::Vector3d(radius, 0, 0); - T_world_cam.linear() = (Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d::UnitY()) * - Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d::UnitZ())) - .matrix(); - T_world_cam = R_z * R_y * T_world_cam; - cameras_.push_back(gtsam::PinholeCamera(gtsam::Pose3(T_world_cam.matrix()), K)); + Eigen::Isometry3d world_from_cam(world_from_cam_default); + world_from_cam.translation() *= radius; + world_from_cam = longitude_from_world * latitude_from_world * world_from_cam; + cameras_groundtruth_.push_back( + gtsam::PinholeCamera(gtsam::Pose3(world_from_cam.matrix()), K)); + if (pose_noise) { + cameras_.push_back(gtsam::PinholeCamera( + gtsam::Pose3( + (world_from_cam * isometry_from_vector(pose_noise->sample())).matrix()), + K)); + } else { + cameras_.push_back(gtsam::PinholeCamera(gtsam::Pose3(world_from_cam.matrix()), K)); + } } } } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/spatial_test_scene.hh b/experimental/learn_descriptors/spatial_test_scene.hh index 49d033180..4f542223a 100644 --- a/experimental/learn_descriptors/spatial_test_scene.hh +++ b/experimental/learn_descriptors/spatial_test_scene.hh @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include #include @@ -9,15 +11,58 @@ #include "gtsam/geometry/PinholeCamera.h" #include "opencv2/opencv.hpp" +namespace robot::experimental::learn_descriptors { class ProjectionHelper { public: + static constexpr size_t img_width_default = 640; + static constexpr size_t img_height_default = 480; + static constexpr double fx_default = 500.0; + static constexpr double fy_default = fx_default; + static constexpr double cx_default = img_width_default / 2.0; + static constexpr double cy_default = img_height_default / 2.0; + static const gtsam::Cal3_S2::shared_ptr &K_default() { + static gtsam::Cal3_S2::shared_ptr K( + new gtsam::Cal3_S2(fx_default, fy_default, 0.0, cx_default, cy_default)); + return K; + } static bool pixel_in_range(Eigen::Vector2d pixel, size_t img_width, size_t img_height) { return pixel[0] >= 0 && pixel[0] < img_width && pixel[1] >= 0 && pixel[1] < img_height; } }; -namespace robot::experimental::learn_descriptors { class SpatialTestScene { public: + struct Noise { + Noise(const std::vector &sigmas, std::optional seed = std::nullopt) + : sigmas_(sigmas) { + if (seed) { + gen_.seed(*seed); + } else { + std::random_device rd; + gen_.seed(rd()); + } + + for (double sigma : sigmas_) { + dists_.emplace_back(0.0, sigma); + } + } + + static Noise constrained(size_t dim = 6) { return Noise(std::vector(dim, 0.0)); } + + Eigen::VectorXd sample() { + Eigen::VectorXd result(sigmas_.size()); + for (size_t i = 0; i < sigmas_.size(); ++i) { + result(i) = dists_[i](gen_); + } + return result; + } + + size_t dim() const { return sigmas_.size(); } + + private: + std::vector sigmas_; + std::vector> dists_; + std::mt19937 gen_; + }; struct ProjectedPoint { ProjectedPoint(const size_t pt_idx, const Eigen::Vector2d &pixel) : pt_idx(pt_idx), pixel(pixel){}; @@ -34,18 +79,29 @@ class SpatialTestScene { const std::vector, cv::Size>> &cam_and_img_size); // make this parameter of size 2 - void add_point(Eigen::Vector3d point); - void add_points(std::vector points); - void add_camera(gtsam::PinholeCamera camera); - void add_cameras(std::vector> cameras); + void add_point(Eigen::Vector3d point, std::optional point_noise = std::nullopt); + void add_points(std::vector points, + std::optional point_noise = std::nullopt); + void add_camera(gtsam::PinholeCamera camera, + std::optional pose_noise = std::nullopt); + void add_cameras(std::vector> cameras, + std::optional pose_noise = std::nullopt); void add_rand_cameras_face_origin(int num_cameras, double min_radius_origin, - double max_radius_origin, const gtsam::Cal3_S2 &K); + double max_radius_origin, + std::optional pose_noise = std::nullopt, + const gtsam::Cal3_S2 &K = *ProjectionHelper::K_default()); - std::vector &get_points() { return points_; }; - std::vector> &get_cameras() { return cameras_; }; + const std::vector &points() { return points_; }; + const std::vector &points_groundtruth() { return points_groundtruth_; }; + const std::vector> &cameras() { return cameras_; }; + const std::vector> &cameras_groundtruth() { + return cameras_groundtruth_; + }; protected: std::vector points_; + std::vector points_groundtruth_; std::vector> cameras_; + std::vector> cameras_groundtruth_; }; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/spatial_test_scene_cube.hh b/experimental/learn_descriptors/spatial_test_scene_cube.hh index 306afae4b..1dd08b190 100644 --- a/experimental/learn_descriptors/spatial_test_scene_cube.hh +++ b/experimental/learn_descriptors/spatial_test_scene_cube.hh @@ -8,14 +8,14 @@ namespace robot::experimental::learn_descriptors { class SpatialTestSceneCube : public SpatialTestScene { public: SpatialTestSceneCube(const float cube_size) : SpatialTestScene() { - points_.push_back(Eigen::Vector3d(-cube_size / 2, -cube_size / 2, -cube_size / 2)); - points_.push_back(Eigen::Vector3d(-cube_size / 2, cube_size / 2, -cube_size / 2)); - points_.push_back(Eigen::Vector3d(cube_size / 2, -cube_size / 2, -cube_size / 2)); - points_.push_back(Eigen::Vector3d(cube_size / 2, cube_size / 2, -cube_size / 2)); - points_.push_back(Eigen::Vector3d(-cube_size / 2, -cube_size / 2, cube_size / 2)); - points_.push_back(Eigen::Vector3d(-cube_size / 2, cube_size / 2, cube_size / 2)); - points_.push_back(Eigen::Vector3d(cube_size / 2, -cube_size / 2, cube_size / 2)); - points_.push_back(Eigen::Vector3d(cube_size / 2, cube_size / 2, cube_size / 2)); + add_point(Eigen::Vector3d(-cube_size / 2, -cube_size / 2, -cube_size / 2)); + add_point(Eigen::Vector3d(-cube_size / 2, cube_size / 2, -cube_size / 2)); + add_point(Eigen::Vector3d(cube_size / 2, -cube_size / 2, -cube_size / 2)); + add_point(Eigen::Vector3d(cube_size / 2, cube_size / 2, -cube_size / 2)); + add_point(Eigen::Vector3d(-cube_size / 2, -cube_size / 2, cube_size / 2)); + add_point(Eigen::Vector3d(-cube_size / 2, cube_size / 2, cube_size / 2)); + add_point(Eigen::Vector3d(cube_size / 2, -cube_size / 2, cube_size / 2)); + add_point(Eigen::Vector3d(cube_size / 2, cube_size / 2, cube_size / 2)); }; }; } // namespace robot::experimental::learn_descriptors diff --git a/experimental/learn_descriptors/spatial_test_scene_cube_example.cc b/experimental/learn_descriptors/spatial_test_scene_cube_example.cc index c4f56d736..5ba850db7 100644 --- a/experimental/learn_descriptors/spatial_test_scene_cube_example.cc +++ b/experimental/learn_descriptors/spatial_test_scene_cube_example.cc @@ -1,36 +1,46 @@ +#include #include #include "Eigen/Dense" +#include "common/check.hh" #include "experimental/learn_descriptors/spatial_test_scene_cube.hh" -#include "gtest/gtest.h" +#include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/PinholeCamera.h" #include "visualization/opencv/opencv_viz.hh" -namespace robot::experimental::learn_descriptors { -TEST(SpatialTestSceneCubeTest, viz_cube) { - SpatialTestSceneCube test_scene(1.0f); - // geometry::viz_scene(std::vector(), test_scene.get_points()); -} +using namespace robot::experimental::learn_descriptors; -TEST(SpatialTestSceneCubeTest, viz_cube_with_cameras) { +int main() { SpatialTestSceneCube test_scene(1.0f); const size_t img_width = 640; const size_t img_height = 480; const double fx = 500.0; const double fy = fx; - const double cx = img_width / 2.0; - const double cy = img_height / 2.0; + const double cx = static_cast(img_width) / 2.0; + const double cy = static_cast(img_height) / 2.0; gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy)); - test_scene.add_rand_cameras_face_origin(7, 2.0, 6.0, *K); + SpatialTestScene::Noise pose_noise({0.05, 0.05, 0.05, 0.15, 0.15, 0.15}); + test_scene.add_rand_cameras_face_origin(7, 2.0, 6.0, pose_noise, *K); - std::vector> cameras = test_scene.get_cameras(); - std::vector camera_poses; - camera_poses.reserve(cameras.size()); - for (const gtsam::PinholeCamera &T_world_cam : cameras) { - camera_poses.emplace_back(T_world_cam.pose().matrix()); + std::vector> cameras = test_scene.cameras(); + std::vector> cameras_grndtrth = + test_scene.cameras_groundtruth(); + ROBOT_CHECK(cameras.size() == cameras_grndtrth.size()); + std::vector viz_cam_poses; + viz_cam_poses.reserve(cameras.size() + cameras_grndtrth.size()); + for (size_t i = 0; i < cameras.size(); i++) { + viz_cam_poses.emplace_back(Eigen::Isometry3d(cameras[i].pose().matrix()), + "cam_" + std::to_string(i)); + viz_cam_poses.emplace_back(Eigen::Isometry3d(cameras_grndtrth[i].pose().matrix()), + "cam_grnd_" + std::to_string(i)); } - - geometry::viz_scene(camera_poses, test_scene.get_points()); -} -} // namespace robot::experimental::learn_descriptors \ No newline at end of file + ROBOT_CHECK(test_scene.points().size() == test_scene.points_groundtruth().size()); + std::vector viz_points; + for (size_t i = 0; i < test_scene.points().size(); i++) { + viz_points.emplace_back(test_scene.points()[i], "pt_" + std::to_string(i)); + viz_points.emplace_back(test_scene.points_groundtruth()[i], "pt_grnd_" + std::to_string(i)); + } + robot::visualization::viz_scene(viz_cam_poses, viz_points); +} \ No newline at end of file diff --git a/visualization/opencv/opencv_viz.cc b/visualization/opencv/opencv_viz.cc index d750555ed..06c46da1f 100644 --- a/visualization/opencv/opencv_viz.cc +++ b/visualization/opencv/opencv_viz.cc @@ -2,10 +2,13 @@ #include +#include "Eigen/Core" +#include "Eigen/Geometry" #include "common/geometry/translate_types.hh" #include "opencv2/viz.hpp" -namespace robot::geometry { +using namespace robot::geometry; +namespace robot::visualization { cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) { // Ensure R is a valid rotation matrix CV_Assert(cv::determinant(R) > 0.999 && cv::determinant(R) < 1.001); @@ -107,12 +110,6 @@ void viz_scene(const std::vector &world_from_poses, cv::Point3d(0, 0, 1), cells, cell_sizes, color)); } - // std::cout << "viz heartbeat" << std::endl; - // window.setViewerPose(cv::Affine3d( - // eigen_mat_to_cv(Eigen::Matrix4d(world_from_poses[0].world_from_pose.matrix())))); - // window.setViewerPose(cv::Affine3d::Identity()); - // std::cout << "success" << std::endl; - window.spin(); } -} // namespace robot::geometry +} // namespace robot::visualization diff --git a/visualization/opencv/opencv_viz.hh b/visualization/opencv/opencv_viz.hh index 28dbc197b..5f5956b43 100644 --- a/visualization/opencv/opencv_viz.hh +++ b/visualization/opencv/opencv_viz.hh @@ -7,7 +7,7 @@ #include "Eigen/Geometry" #include "opencv2/viz.hpp" -namespace robot::geometry { +namespace robot::visualization { struct VizPose { const Eigen::Isometry3d world_from_pose; const std::optional label; @@ -27,4 +27,4 @@ void viz_scene(const std::vector &world_from_poses, const cv::viz::Color color_background = cv::viz::Color::black(), const bool show_grid = true, const bool show_origin = true, const std::string &window_name = "Viz Window", const double text_scale = 0.1); -} // namespace robot::geometry \ No newline at end of file +} // namespace robot::visualization \ No newline at end of file diff --git a/visualization/opencv/opencv_viz_test.cc b/visualization/opencv/opencv_viz_test.cc index 502f8e4f4..832ce2878 100644 --- a/visualization/opencv/opencv_viz_test.cc +++ b/visualization/opencv/opencv_viz_test.cc @@ -3,7 +3,7 @@ #include "gtest/gtest.h" #include "opencv2/viz.hpp" -namespace robot::geometry { +namespace robot::visualization { TEST(OpencvVizTest, demo) { cv::viz::Viz3d window("My Window"); @@ -111,4 +111,4 @@ TEST(OpencvVizTest, cube_test_labeled) { viz_scene(world_from_cams, cube_points_in_world); } -} // namespace robot::geometry \ No newline at end of file +} // namespace robot::visualization \ No newline at end of file From 3a2ba892aa81d12e75596d5192378e19b7dc9e77 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Fri, 18 Jul 2025 18:08:15 -0400 Subject: [PATCH 35/45] added tranlsation interpolation. a handful of initial rotations are erroneous. final optimization isn't quite there yet. --- experimental/learn_descriptors/BUILD | 5 + experimental/learn_descriptors/backend.cc | 27 +++-- .../learn_descriptors/camera_calibration.hh | 3 + experimental/learn_descriptors/frame.hh | 29 ++++- experimental/learn_descriptors/frontend.cc | 59 +++++++++- experimental/learn_descriptors/frontend.hh | 5 +- .../learn_descriptors/frontend_test.cc | 103 ++++++++++++++++-- experimental/learn_descriptors/image_point.hh | 7 +- .../image_point_four_seasons.hh | 4 +- .../learn_descriptors/refactor_sfm_mvp.cc | 66 ++++++----- 10 files changed, 249 insertions(+), 59 deletions(-) diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index f6bd3d71d..140d492d5 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -242,8 +242,10 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":frontend_definitions", + "//common:check", "@gtsam", "@opencv", + "@eigen" ], ) @@ -363,7 +365,10 @@ cc_test( srcs = ["frontend_test.cc"], deps = [ ":frontend", + ":image_point", "@com_google_googletest//:gtest_main", + "@opencv", + "@eigen" ], ) diff --git a/experimental/learn_descriptors/backend.cc b/experimental/learn_descriptors/backend.cc index 03cfd3622..4dbdbd8d1 100644 --- a/experimental/learn_descriptors/backend.cc +++ b/experimental/learn_descriptors/backend.cc @@ -103,6 +103,9 @@ void Backend::populate_rotation_estimate(std::vector& frames) { frame_vertex->w_from_this = frame_vertex->frame.world_from_cam_initial_guess_ ? *frame_vertex->frame.world_from_cam_initial_guess_ : gtsam::Rot3::Identity(); + graph.add(gtsam::PriorFactor( + gtsam::Symbol(symbol_char_rotation, frame_vertex->frame.id_), + *frame_vertex->w_from_this, gtsam::noiseModel::Constrained::All(3))); } frame_vertex->frame.world_from_cam_initial_guess_ = frame_vertex->w_from_this; const gtsam::Symbol symbol_frame(symbol_char_rotation, frame.id_); @@ -123,6 +126,8 @@ void Backend::populate_rotation_estimate(std::vector& frames) { if (!other_frame_vertex->w_from_this) { other_frame_vertex->w_from_this = *frame_vertex->w_from_this * frame_from_other_frame; + other_frame_vertex->frame.world_from_cam_initial_guess_ = + *frame_vertex->w_from_this * frame_from_other_frame; } graph.add(gtsam::BetweenFactor( @@ -132,15 +137,17 @@ void Backend::populate_rotation_estimate(std::vector& frames) { frame_tree.emplace(frame.id_, frame_vertex); } - gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial); - gtsam::Values result = optimizer.optimize(); - - // Output results and assign results to frames - for (const gtsam::Symbol& symbol_rotation : symbols_frames) { - gtsam::Rot3 w_from_frame = result.at(symbol_rotation); - frame_tree.at(symbol_rotation.index())->frame.world_from_cam_initial_guess_ = w_from_frame; - // std::cout << "Rotation " << symbol_rotation.string() << ": " - // << w_from_frame.rpy().transpose() << " (roll pitch yaw radians)" << std::endl; - } + // gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial); + // gtsam::Values result = optimizer.optimize(); + + // // Output results and assign results to frames + // for (const gtsam::Symbol& symbol_rotation : symbols_frames) { + // gtsam::Rot3 w_from_frame = result.at(symbol_rotation); + // frame_tree.at(symbol_rotation.index())->frame.world_from_cam_initial_guess_ = + // w_from_frame; + // // std::cout << "Rotation " << symbol_rotation.string() << ": " + // // << w_from_frame.rpy().transpose() << " (roll pitch yaw radians)" << + // std::endl; + // } } } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/camera_calibration.hh b/experimental/learn_descriptors/camera_calibration.hh index 1e985acad..610dd2d8d 100644 --- a/experimental/learn_descriptors/camera_calibration.hh +++ b/experimental/learn_descriptors/camera_calibration.hh @@ -4,6 +4,9 @@ namespace robot::experimental::learn_descriptors { struct CameraCalibrationFisheye { + CameraCalibrationFisheye(double fx, double fy, double cx, double cy, double k1, double k2, + double k3, double k4) + : fx(fx), fy(fy), cx(cx), cy(cy), k1(k1), k2(k2), k3(k3), k4(k4) {} double fx, fy, cx, cy, k1, k2, k3, k4; cv::Mat k_mat() { diff --git a/experimental/learn_descriptors/frame.hh b/experimental/learn_descriptors/frame.hh index b6eb43a47..0d7808b01 100644 --- a/experimental/learn_descriptors/frame.hh +++ b/experimental/learn_descriptors/frame.hh @@ -2,6 +2,8 @@ #include #include +#include "Eigen/Core" +#include "common/check.hh" #include "experimental/learn_descriptors/frontend_definitions.hh" #include "gtsam/base/Matrix.h" #include "gtsam/geometry/Cal3_S2.h" @@ -12,22 +14,41 @@ namespace robot::experimental::learn_descriptors { class Frame { public: - Frame(FrameId id, const cv::Mat img, gtsam::Cal3_S2::shared_ptr K, const KeypointsCV kpts, - const cv::Mat descriptors) + Frame(FrameId id, size_t seq, const cv::Mat img, gtsam::Cal3_S2::shared_ptr K, + const KeypointsCV kpts, const cv::Mat descriptors) : id_(id), + seq_(seq), undistorted_img_(img), K_(std::move(K)), kpts_(kpts), descriptors_(descriptors) {} + std::optional velocity_to(const Frame& other_frame) const { + ROBOT_CHECK(other_frame.seq_ >= seq_); + if (!other_frame.cam_in_world_initial_guess_ || !cam_in_world_initial_guess_) + return std::nullopt; + double dt_seconds = static_cast(other_frame.seq_ - seq_) * 1e-9; + return (*other_frame.cam_in_world_initial_guess_ - *cam_in_world_initial_guess_) / + dt_seconds; + }; + std::optional velocity_from(const Frame& other_frame) const { + ROBOT_CHECK(seq_ >= other_frame.seq_); + if (!other_frame.cam_in_world_initial_guess_ || !cam_in_world_initial_guess_) + return std::nullopt; + double dt_seconds = static_cast(seq_ - other_frame.seq_) * 1e-9; + return (*cam_in_world_initial_guess_ - *other_frame.cam_in_world_initial_guess_) / + dt_seconds; + }; + void add_keypoints(const KeypointsCV& kpts); void assign_descriptors(const cv::Mat& descriptors); const KeypointsCV& keypoint() { return kpts_; }; const cv::Mat& descriptors() { return descriptors_; }; - const FrameId id_; - const cv::Mat undistorted_img_; + FrameId id_; + size_t seq_; + cv::Mat undistorted_img_; gtsam::Cal3_S2::shared_ptr K_; KeypointsCV kpts_; cv::Mat descriptors_; diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc index 4b1376dd5..4b10579cd 100644 --- a/experimental/learn_descriptors/frontend.cc +++ b/experimental/learn_descriptors/frontend.cc @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -166,11 +167,13 @@ Frontend::Frontend(FrontendParams params_) : params_(params_) { } } -void Frontend::populate_frames() { +void Frontend::populate_frames(bool verbose) { FrameId id = frames_.size(); for (const ImageAndPoint &img_and_pt : images_and_points_) { const std::shared_ptr shared_img_pt = img_and_pt.shared_img_pt; - std::cout << shared_img_pt->to_string() << std::endl; + if (verbose) { + std::cout << shared_img_pt->to_string() << std::endl; + } cv::Mat img_undistorted; cv::fisheye::undistortImage(img_and_pt.image_distorted, img_undistorted, shared_img_pt->K->k_mat(), shared_img_pt->K->d_mat(), @@ -185,7 +188,7 @@ void Frontend::populate_frames() { boost::make_shared(shared_img_pt->K->fx, shared_img_pt->K->fy, 0, shared_img_pt->K->cx, shared_img_pt->K->cy); - Frame frame(id, img_undistorted, K, kpts_cv, descriptors); + Frame frame(id, shared_img_pt->seq, img_undistorted, K, kpts_cv, descriptors); const std::optional maybe_world_from_cam_grnd_trth = shared_img_pt->world_from_cam_ground_truth(); if (maybe_world_from_cam_grnd_trth) { @@ -328,6 +331,56 @@ void Frontend::match_frames_and_build_tracks() { std::cout << "done processing matches" << std::endl; } +std::optional> Frontend::interpolated_initial_translations() { + std::vector frames_with_guess; + for (Frame &frame : + frames_) { // this assumes that frames are ordered by seq (time), later on may have to + // think about this more if trying to run pipeline on unordered images or sets + // whose capture times are on different days + if (frame.cam_in_world_initial_guess_) { + frames_with_guess.push_back(&frame); + } + } + if (frames_with_guess.size() <= 2) return std::nullopt; + std::vector interpolated_init_guesses; + interpolated_init_guesses.reserve(frames_.size()); + size_t idx_guess = 0; + + for (const Frame &frame : frames_) { + Eigen::Vector3d interpolated; + + // advance to next guess window if needed + while (idx_guess + 1 < frames_with_guess.size() - 1 && + frame.seq_ > frames_with_guess[idx_guess + 1]->seq_) { + idx_guess++; + } + + if (frame.cam_in_world_initial_guess_) { + interpolated = *frame.cam_in_world_initial_guess_; + } else if (frame.seq_ < frames_with_guess[idx_guess]->seq_) { + Eigen::Vector3d v_b_to_a = + *frames_with_guess[idx_guess]->velocity_from(*frames_with_guess[idx_guess + 1]); + double dt = 1e-9 * (frames_with_guess[idx_guess]->seq_ - frame.seq_); + interpolated = + *frames_with_guess[idx_guess]->cam_in_world_initial_guess_ + v_b_to_a * dt; + std::cout << "interpolated frame " << frame.id_ << ": " << interpolated << std::endl; + std::cout << "\tdt: " << dt << std::endl; + std::cout << "\tv_a_to_b: " << v_b_to_a << std::endl; + } else { + Eigen::Vector3d v_a_to_b = + *frames_with_guess[idx_guess]->velocity_to(*frames_with_guess[idx_guess + 1]); + double dt = 1e-9 * (frame.seq_ - frames_with_guess[idx_guess]->seq_); + interpolated = + *frames_with_guess[idx_guess]->cam_in_world_initial_guess_ + v_a_to_b * dt; + std::cout << "interpolated frame " << frame.id_ << ": " << interpolated << std::endl; + std::cout << "\tdt: " << dt << std::endl; + std::cout << "\tv_a_to_b: " << v_a_to_b << std::endl; + } + interpolated_init_guesses.push_back(interpolated); + } + return interpolated_init_guesses; +} + std::pair, cv::Mat> Frontend::extract_features(const cv::Mat &img) const { std::vector keypoints; cv::Mat descriptors; diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh index 352330e3e..ce2f5c43d 100644 --- a/experimental/learn_descriptors/frontend.hh +++ b/experimental/learn_descriptors/frontend.hh @@ -1,9 +1,11 @@ #pragma once #include +#include #include #include +#include "Eigen/Core" #include "experimental/learn_descriptors/frame.hh" #include "experimental/learn_descriptors/frontend_definitions.hh" #include "experimental/learn_descriptors/image_point.hh" @@ -30,8 +32,9 @@ class Frontend { explicit Frontend(FrontendParams params); ~Frontend(){}; - void populate_frames(); + void populate_frames(bool verbose = false); void match_frames_and_build_tracks(); + std::optional> interpolated_initial_translations(); void add_image(const ImageAndPoint &img_and_pt) { images_and_points_.push_back(img_and_pt); }; void add_images(const std::vector &img_and_pts) { diff --git a/experimental/learn_descriptors/frontend_test.cc b/experimental/learn_descriptors/frontend_test.cc index 28e7dc0c8..a04a443ca 100644 --- a/experimental/learn_descriptors/frontend_test.cc +++ b/experimental/learn_descriptors/frontend_test.cc @@ -1,9 +1,13 @@ #include "experimental/learn_descriptors/frontend.hh" +#include +#include #include #include #include +#include "Eigen/Core" +#include "experimental/learn_descriptors/image_point.hh" #include "gtest/gtest.h" #include "opencv2/opencv.hpp" @@ -52,27 +56,31 @@ TEST(FrontendTest, pipeline_sweep) { cv::imshow("Test", img_test_disp); cv::waitKey(1000); - Frontend::ExtractorType extractor_types[2] = {Frontend::ExtractorType::SIFT, - Frontend::ExtractorType::ORB}; - Frontend::MatcherType matcher_types[3] = {Frontend::MatcherType::BRUTE_FORCE, - Frontend::MatcherType::FLANN, - Frontend::MatcherType::KNN}; + FrontendParams::ExtractorType extractor_types[2] = {FrontendParams::ExtractorType::SIFT, + FrontendParams::ExtractorType::ORB}; + FrontendParams::MatcherType matcher_types[3] = {FrontendParams::MatcherType::BRUTE_FORCE, + FrontendParams::MatcherType::FLANN, + FrontendParams::MatcherType::KNN}; - Frontend frontend; + FrontendParams params{FrontendParams::ExtractorType::SIFT, FrontendParams::MatcherType::KNN, + true, false}; + Frontend frontend(params); std::pair, cv::Mat> keypoints_descriptors_pair_1; std::pair, cv::Mat> keypoints_descriptors_pair_2; std::vector matches; cv::Mat img_keypoints_out_1(height, width, CV_8UC3), img_keypoints_out_2(height, width, CV_8UC3), img_matches_out(height, 2 * width, CV_8UC3); cv::Mat img_display_test; - for (Frontend::ExtractorType extractor_type : extractor_types) { - for (Frontend::MatcherType matcher_type : matcher_types) { + for (FrontendParams::ExtractorType extractor_type : extractor_types) { + for (FrontendParams::MatcherType matcher_type : matcher_types) { printf("started frontend combination: (%d, %d)\n", static_cast(extractor_type), static_cast(matcher_type)); + FrontendParams params{extractor_type, matcher_type, true, false}; try { - frontend = Frontend(extractor_type, matcher_type); + frontend = Frontend(params); } catch (const std::invalid_argument &e) { - assert(std::string(e.what()) == "FLANN can not be used with ORB."); // very jank... + ROBOT_CHECK(std::string(e.what()) == + "FLANN can not be used with ORB."); // very jank... continue; } keypoints_descriptors_pair_1 = frontend.extract_features(image_1); @@ -98,7 +106,7 @@ TEST(FrontendTest, pipeline_sweep) { } printf("completed frontend combination: (%d, %d)\n", static_cast(extractor_type), static_cast(matcher_type)); - if (extractor_type != Frontend::ExtractorType::ORB) { // don't check ORB for now + if (extractor_type != FrontendParams::ExtractorType::ORB) { // don't check ORB for now for (const cv::DMatch match : matches) { EXPECT_NEAR(keypoints_descriptors_pair_1.first[match.queryIdx].pt.x - keypoints_descriptors_pair_2.first[match.trainIdx].pt.x, @@ -111,4 +119,77 @@ TEST(FrontendTest, pipeline_sweep) { } } } + +TEST(FrontendTest, interpolate_frames) { + constexpr size_t width = 800; + constexpr size_t height = 400; + constexpr double fx = 500.0; + constexpr double fy = fx; + constexpr double cx = static_cast(width) / 2.0; + constexpr double cy = static_cast(height) / 2.0; + // gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy)); + std::shared_ptr shared_K = + std::make_shared(fx, fy, cx, cy, 1, 1, 1, 1); + cv::Mat white_image(height, width, CV_8UC3, cv::Scalar(255, 255, 255)); + + FrontendParams params{FrontendParams::ExtractorType::SIFT, FrontendParams::MatcherType::KNN, + true, false}; + Frontend frontend(params); + + struct VelocityPoint { + double time; // time in seconds + Eigen::Vector3d velocity; // m/s + }; + std::vector velocity_points{ + // basically a piecewise constant function + VelocityPoint{1.0, Eigen::Vector3d(1, 0, 0)}, // go in x dir from seconds 0 - 1 + VelocityPoint{2.0, Eigen::Vector3d(0, 1, 0)}, // go in y dir from seconds 1 - 2 + VelocityPoint{ + 3.0, Eigen::Vector3d(1, 1, 0)}, // go diagonal in first quadrant from seconds 2 - 3 + }; + size_t idx_vel_pt = 0; + const Eigen::Vector3d pt0_in_world = Eigen::Vector3d::Zero(); + std::vector pts_in_world{pt0_in_world}; + double time = 0.0; + std::vector> img_pts; + ImagePoint img_pt_first; + img_pt_first.id = 0; + img_pt_first.seq = static_cast(time * 1e9); + img_pt_first.K = shared_K; + img_pt_first.set_cam_in_world(pt0_in_world); + img_pts.push_back(std::make_shared(img_pt_first)); + frontend.add_image(ImageAndPoint{white_image, img_pts.back()}); + constexpr double sample_hz = 6.0; + constexpr size_t num_samples_skip = 2; + ROBOT_CHECK(static_cast(sample_hz) % (num_samples_skip + 1) == + 0); // needed so that the interpolation isn't lossy + constexpr double dt = 1 / sample_hz; + time += dt; + while (time < velocity_points.back().time) { + while (idx_vel_pt < velocity_points.size() && time > velocity_points[idx_vel_pt].time) { + idx_vel_pt++; + } + pts_in_world.emplace_back(pts_in_world.back() + dt * velocity_points[idx_vel_pt].velocity); + ImagePoint img_pt; + img_pt.id = img_pts.size(); + img_pt.seq = static_cast(time * 1e9); + img_pt.K = shared_K; + if (img_pts.size() % (num_samples_skip + 1) == 0) { + img_pt.set_cam_in_world(pts_in_world.back()); + } + img_pts.emplace_back(std::make_shared(img_pt)); + frontend.add_image(ImageAndPoint{white_image, img_pts.back()}); + time += dt; + } + frontend.populate_frames(); + std::optional> interpolated_pts = + frontend.interpolated_initial_translations(); + ROBOT_CHECK(interpolated_pts); + ROBOT_CHECK(interpolated_pts->size() == img_pts.size()); + constexpr double TOL = 1e-5; + for (size_t i = 0; i < pts_in_world.size(); i++) { + ROBOT_CHECK(pts_in_world[i].isApprox((*interpolated_pts)[i], TOL), i, pts_in_world[i], + (*interpolated_pts)[i]); + } +} } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/image_point.hh b/experimental/learn_descriptors/image_point.hh index f4a4635b9..f5b9a681b 100644 --- a/experimental/learn_descriptors/image_point.hh +++ b/experimental/learn_descriptors/image_point.hh @@ -18,11 +18,13 @@ struct ImagePoint { virtual ~ImagePoint() = default; size_t id; + size_t seq; // for time. TODO: make a time struct std::shared_ptr K; + void set_cam_in_world(const Eigen::Vector3d& cam_in_world) { cam_in_world_ = cam_in_world; }; virtual std::optional world_from_cam_ground_truth() const { return std::nullopt; }; - virtual std::optional cam_in_world() const { return std::nullopt; }; + virtual std::optional cam_in_world() const { return cam_in_world_; }; virtual std::optional translation_covariance_in_cam() const { return std::nullopt; }; @@ -62,5 +64,8 @@ struct ImagePoint { } return ss.str(); } + + private: + std::optional cam_in_world_; }; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/image_point_four_seasons.hh b/experimental/learn_descriptors/image_point_four_seasons.hh index 645a2571a..4fce4f74a 100644 --- a/experimental/learn_descriptors/image_point_four_seasons.hh +++ b/experimental/learn_descriptors/image_point_four_seasons.hh @@ -21,8 +21,8 @@ class FourSeasonsParser; struct ImagePointFourSeasons : ImagePoint { virtual ~ImagePointFourSeasons(); - size_t id; // idx for DB - size_t seq; // time in nanoseconds of image capture (also the name of image) + // size_t id; // idx for DB + // size_t seq; // time in nanoseconds of image capture (also the name of image) std::optional AS_w_from_gnss_cam; // globally opimized arbitrary scale visual world (gnss + VIO + loop // closure + etc.) from cam diff --git a/experimental/learn_descriptors/refactor_sfm_mvp.cc b/experimental/learn_descriptors/refactor_sfm_mvp.cc index 4071faac6..c82ccd50f 100644 --- a/experimental/learn_descriptors/refactor_sfm_mvp.cc +++ b/experimental/learn_descriptors/refactor_sfm_mvp.cc @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -42,14 +43,15 @@ std::optional attempt_triangulate(const std::vector gtsam::Cal3_S2::shared_ptr K, const double max_reproj_error = 2.0, const bool verbose = true) { + std::cout << "attmepting to triangulate!" << std::endl; gtsam::Point3 p_lmk_in_world; - if (cam_poses.size() >= 2) { + if (cam_poses.size() > 2) { try { // Attempt triangulation using DLT (or the GTSAM provided method) p_lmk_in_world = gtsam::triangulatePoint3( cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end())); - } catch (const gtsam::TriangulationCheiralityException &e) { + } catch (const std::exception &e) { // Handle the exception gracefully by logging and retaining the previous // estimate or discard if (verbose) { @@ -84,6 +86,7 @@ std::optional attempt_triangulate(const std::vector return std::nullopt; } // Reprojection error + std::cout << "heartbeat cheirality and reprojection checks" << std::endl; if (max_reproj_error > 0) { gtsam::PinholeCamera cam(pose, *K); const auto reproj = cam.project(p_lmk_in_world); @@ -205,11 +208,12 @@ int main(int argc, const char **argv) { true, false}; Frontend frontend(params); + std::vector idx_img_pts; for (size_t i = 581; i < 750; i += 5) { - // if (!parser.get_image_point(i).K) std::cout << "UH OH" << std::endl; frontend.add_image( ImageAndPoint{parser.load_image(i), std::make_shared(parser.get_image_point(i))}); + idx_img_pts.push_back(i); } frontend.populate_frames(); frontend.match_frames_and_build_tracks(); @@ -224,15 +228,16 @@ int main(int argc, const char **argv) { std::optional first_grnd_trth; (void)first_point; for (const Frame &frame : frontend.frames()) { - if (frame.cam_in_world_initial_guess_) { - Eigen::Isometry3d world_from_cam_init; - world_from_cam_init.linear() = frame.world_from_cam_initial_guess_->matrix(); - if (!first_point) first_point = *frame.cam_in_world_initial_guess_; - world_from_cam_init.translation() = *frame.cam_in_world_initial_guess_ - *first_point; - std::cout << "world_from_cam_init: " << world_from_cam_init.matrix() << std::endl; - viz_poses_init.emplace_back( - world_from_cam_init, gtsam::Symbol(Frontend::symbol_pose_char, frame.id_).string()); - } + // if (frame.cam_in_world_initial_guess_) { + // Eigen::Isometry3d world_from_cam_init; + // world_from_cam_init.linear() = frame.world_from_cam_initial_guess_->matrix(); + // if (!first_point) first_point = *frame.cam_in_world_initial_guess_; + // world_from_cam_init.translation() = *frame.cam_in_world_initial_guess_ - + // *first_point; std::cout << "world_from_cam_init: " << world_from_cam_init.matrix() << + // std::endl; viz_poses_init.emplace_back( + // world_from_cam_init, gtsam::Symbol(Frontend::symbol_pose_char, + // frame.id_).string()); + // } if (frame.world_from_cam_groundtruth_) { std::cout << "adding a ground truth frame to viz!" << std::endl; Eigen::Isometry3d w_from_cam_grnd_trth(frame.world_from_cam_groundtruth_->matrix()); @@ -270,24 +275,31 @@ int main(int argc, const char **argv) { Backend::populate_rotation_estimate(frames); std::vector symbols_pose; std::unordered_map world_from_cam_initial_guess; - std::optional cam0_in_w; - for (const Frame &frame : frames) { - if (!frame.cam_in_world_initial_guess_) continue; - if (!cam0_in_w) cam0_in_w = *frame.cam_in_world_initial_guess_; + std::optional> interpolated_cam_in_w = + frontend.interpolated_initial_translations(); + std::cout << "heartbeat" << std::endl; + ROBOT_CHECK(interpolated_cam_in_w, interpolated_cam_in_w); + Eigen::Vector3d cam0_in_w = interpolated_cam_in_w->front(); + for (size_t i = 0; i < frames.size(); i++) { + // std::cout << "interpolated translation " << i << ": " << (*interpolated_cam_in_w)[i] + // << std::endl; + const Frame &frame = frames[i]; + const gtsam::Symbol cam_symbol(Backend::symbol_char_pose, frame.id_); const gtsam::Pose3 world_from_cam_estimate(frame.world_from_cam_initial_guess_ ? *frame.world_from_cam_initial_guess_ : gtsam::Rot3::Identity(), - *frame.cam_in_world_initial_guess_ - *cam0_in_w); + (*interpolated_cam_in_w)[i] - cam0_in_w); world_from_cam_initial_guess[frame.id_] = world_from_cam_estimate; - gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas( - frame.translation_covariance_in_cam_ - ? gtsam::Vector3((*frame.translation_covariance_in_cam_)(0, 0), - (*frame.translation_covariance_in_cam_)(1, 1), - (*frame.translation_covariance_in_cam_)(2, 2)) - : gps_sigmas_fallback); - const gtsam::Symbol cam_symbol('x', frame.id_); - graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise)); initial_estimate_.insert(cam_symbol, world_from_cam_estimate); + if (frame.world_from_cam_initial_guess_) { + gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas( + frame.translation_covariance_in_cam_ + ? gtsam::Vector3((*frame.translation_covariance_in_cam_)(0, 0), + (*frame.translation_covariance_in_cam_)(1, 1), + (*frame.translation_covariance_in_cam_)(2, 2)) + : gps_sigmas_fallback); + graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise)); + } symbols_pose.push_back(cam_symbol); } std::vector viz_pose; @@ -313,11 +325,11 @@ int main(int argc, const char **argv) { std::optional landmark_estimate = attempt_triangulate( world_from_lmk_cams, lmk_observations, frames[0].K_); // all K are the same for now... if (landmark_estimate) { - const gtsam::Symbol lmk_symbol(Frontend::symbol_lmk_char, i); + const gtsam::Symbol lmk_symbol(Backend::symbol_char_landmark, i); for (const auto &[frame_id, keypoint_cv] : feature_tracks[i].obs_) { graph_.add(gtsam::GenericProjectionFactor( gtsam::Point2(keypoint_cv.x, keypoint_cv.y), landmark_noise, - gtsam::Symbol(Frontend::symbol_pose_char, frame_id), lmk_symbol, frames[0].K_)); + gtsam::Symbol(Backend::symbol_char_pose, frame_id), lmk_symbol, frames[0].K_)); } initial_estimate_.insert(lmk_symbol, *landmark_estimate); symbols_lmks.push_back(lmk_symbol); From 901aac8ae2a82a486b5d936eb1fed1736cf8dc0c Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Sun, 20 Jul 2025 23:12:58 -0400 Subject: [PATCH 36/45] WIP --- experimental/learn_descriptors/refactor_sfm_mvp.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/experimental/learn_descriptors/refactor_sfm_mvp.cc b/experimental/learn_descriptors/refactor_sfm_mvp.cc index c82ccd50f..c45dc0e65 100644 --- a/experimental/learn_descriptors/refactor_sfm_mvp.cc +++ b/experimental/learn_descriptors/refactor_sfm_mvp.cc @@ -280,6 +280,7 @@ int main(int argc, const char **argv) { std::cout << "heartbeat" << std::endl; ROBOT_CHECK(interpolated_cam_in_w, interpolated_cam_in_w); Eigen::Vector3d cam0_in_w = interpolated_cam_in_w->front(); + std::vector viz_pose; for (size_t i = 0; i < frames.size(); i++) { // std::cout << "interpolated translation " << i << ": " << (*interpolated_cam_in_w)[i] // << std::endl; @@ -300,12 +301,11 @@ int main(int argc, const char **argv) { : gps_sigmas_fallback); graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise)); } + viz_pose.emplace_back( + Eigen::Isometry3d(world_from_cam_estimate.matrix()), + (frame.cam_in_world_initial_guess_ ? "xg_" : "x_") + std::to_string(frame.id_)); symbols_pose.push_back(cam_symbol); } - std::vector viz_pose; - for (const auto &[id, pose] : world_from_cam_initial_guess) { - viz_pose.emplace_back(Eigen::Isometry3d(pose.matrix()), "x_" + std::to_string(id)); - } robot::visualization::viz_scene(viz_pose, std::vector(), cv::viz::Color::brown(), true, true, "Initial guesses in backend"); From 73ced7875ba9c471a566544022f2329561121290 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Mon, 21 Jul 2025 17:27:21 -0400 Subject: [PATCH 37/45] optimizations look decent. need to fix and tune variance figures. need to also implement initial rotation alignment. --- experimental/learn_descriptors/BUILD | 1 + experimental/learn_descriptors/backend.cc | 7 +- experimental/learn_descriptors/backend.hh | 3 +- experimental/learn_descriptors/frame.hh | 42 +++++ experimental/learn_descriptors/frontend.cc | 20 +- experimental/learn_descriptors/gps_data.cc | 0 experimental/learn_descriptors/gps_data.hh | 59 +++--- .../image_point_four_seasons.hh | 6 +- .../learn_descriptors/incremental_sfm_mvp.cc | 3 +- .../learn_descriptors/refactor_sfm_mvp.cc | 171 ++++++++++++------ 10 files changed, 219 insertions(+), 93 deletions(-) delete mode 100644 experimental/learn_descriptors/gps_data.cc diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index 140d492d5..bb369d6d1 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -431,6 +431,7 @@ cc_binary( ":spatial_test_scene_cube", ":structure_from_motion_types", "//common/geometry:camera", + "//common/gps:frame_translation", "//visualization/opencv:opencv_viz", "@boost//:smart_ptr", "@cxxopts", diff --git a/experimental/learn_descriptors/backend.cc b/experimental/learn_descriptors/backend.cc index 4dbdbd8d1..30706673a 100644 --- a/experimental/learn_descriptors/backend.cc +++ b/experimental/learn_descriptors/backend.cc @@ -99,7 +99,8 @@ void Backend::populate_rotation_estimate(std::vector& frames) { q.pop(); seen_frame_id.insert(frame.id_); SharedFrameVertex frame_vertex = frame_tree.at(frame.id_); - if (seen_frame_id.size() == 1) { // make the first frame rotation identity + if (seen_frame_id.size() == 1) { // make the first frame rotation identity unless the + // initial guess is set otherwise frame_vertex->w_from_this = frame_vertex->frame.world_from_cam_initial_guess_ ? *frame_vertex->frame.world_from_cam_initial_guess_ : gtsam::Rot3::Identity(); @@ -108,6 +109,8 @@ void Backend::populate_rotation_estimate(std::vector& frames) { *frame_vertex->w_from_this, gtsam::noiseModel::Constrained::All(3))); } frame_vertex->frame.world_from_cam_initial_guess_ = frame_vertex->w_from_this; + std::cout << frame.to_string() << std::endl; + const gtsam::Symbol symbol_frame(symbol_char_rotation, frame.id_); symbols_frames.push_back(symbol_frame); initial.insert(symbol_frame, *frame.world_from_cam_initial_guess_); @@ -127,7 +130,7 @@ void Backend::populate_rotation_estimate(std::vector& frames) { other_frame_vertex->w_from_this = *frame_vertex->w_from_this * frame_from_other_frame; other_frame_vertex->frame.world_from_cam_initial_guess_ = - *frame_vertex->w_from_this * frame_from_other_frame; + other_frame_vertex->w_from_this; } graph.add(gtsam::BetweenFactor( diff --git a/experimental/learn_descriptors/backend.hh b/experimental/learn_descriptors/backend.hh index c042de3cb..9d7d403d9 100644 --- a/experimental/learn_descriptors/backend.hh +++ b/experimental/learn_descriptors/backend.hh @@ -36,7 +36,8 @@ class Backend { static gtsam::Rot3 average_rotations(const std::vector &rotations, int max_iter = 10); // use rotation averaging to set the rotation initial guess for each frame in the world frame. - // the world frame will be the first frame in the vector + // the world frame will be the first frame in the vector. deadreckon_incrementally won't + // optimize if true static void populate_rotation_estimate(std::vector &frames); const gtsam::Values ¤t_initial_values() const { return initial_estimate_; }; diff --git a/experimental/learn_descriptors/frame.hh b/experimental/learn_descriptors/frame.hh index 0d7808b01..0d8390295 100644 --- a/experimental/learn_descriptors/frame.hh +++ b/experimental/learn_descriptors/frame.hh @@ -1,5 +1,7 @@ #pragma once #include +#include +#include #include #include "Eigen/Core" @@ -46,6 +48,46 @@ class Frame { const KeypointsCV& keypoint() { return kpts_; }; const cv::Mat& descriptors() { return descriptors_; }; + std::string to_string() const { + std::stringstream ss; + ss << "Frame " << id_ << ":\n"; + ss << "\tseq: " << seq_ << "\n"; + ss << "\tK: "; + if (K_) { + ss << "fx: " << K_->fx() << ", fy: " << K_->fy() << ", s: " << K_->skew() + << ", px: " << K_->px() << ", py: " << K_->py(); + } else { + ss << "N/A"; + } + ss << "\n\tkpts_.size(): " << kpts_.size(); + ss << "\n\tframe_from_other_frames_.size(): " << frame_from_other_frames_.size(); + ss << "\n\tworld_from_cam_groundtruth_: "; + if (world_from_cam_groundtruth_) { + ss << "\n" << world_from_cam_groundtruth_->matrix(); + } else { + ss << "N/A"; + } + ss << "\n\tcam_in_world_initial_guess_: "; + if (cam_in_world_initial_guess_) { + ss << "\n" << cam_in_world_initial_guess_->matrix(); + } else { + ss << "N/A"; + } + ss << "\n\tworld_from_cam_initial_guess_: "; + if (world_from_cam_initial_guess_) { + ss << "\n" << world_from_cam_initial_guess_->matrix(); + } else { + ss << "N/A"; + } + ss << "\n\ttranslation_covariance_in_cam_: "; + if (translation_covariance_in_cam_) { + ss << "\n" << translation_covariance_in_cam_->matrix(); + } else { + ss << "N/A"; + } + return ss.str(); + } + FrameId id_; size_t seq_; cv::Mat undistorted_img_; diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc index 4b10579cd..e42fedb39 100644 --- a/experimental/learn_descriptors/frontend.cc +++ b/experimental/learn_descriptors/frontend.cc @@ -341,7 +341,7 @@ std::optional> Frontend::interpolated_initial_trans frames_with_guess.push_back(&frame); } } - if (frames_with_guess.size() <= 2) return std::nullopt; + if (frames_with_guess.size() < 2) return std::nullopt; std::vector interpolated_init_guesses; interpolated_init_guesses.reserve(frames_.size()); size_t idx_guess = 0; @@ -357,21 +357,15 @@ std::optional> Frontend::interpolated_initial_trans if (frame.cam_in_world_initial_guess_) { interpolated = *frame.cam_in_world_initial_guess_; - } else if (frame.seq_ < frames_with_guess[idx_guess]->seq_) { - Eigen::Vector3d v_b_to_a = - *frames_with_guess[idx_guess]->velocity_from(*frames_with_guess[idx_guess + 1]); - double dt = 1e-9 * (frames_with_guess[idx_guess]->seq_ - frame.seq_); - interpolated = - *frames_with_guess[idx_guess]->cam_in_world_initial_guess_ + v_b_to_a * dt; - std::cout << "interpolated frame " << frame.id_ << ": " << interpolated << std::endl; - std::cout << "\tdt: " << dt << std::endl; - std::cout << "\tv_a_to_b: " << v_b_to_a << std::endl; } else { Eigen::Vector3d v_a_to_b = *frames_with_guess[idx_guess]->velocity_to(*frames_with_guess[idx_guess + 1]); - double dt = 1e-9 * (frame.seq_ - frames_with_guess[idx_guess]->seq_); - interpolated = - *frames_with_guess[idx_guess]->cam_in_world_initial_guess_ + v_a_to_b * dt; + double dt = (frame.seq_ < frames_with_guess[idx_guess]->seq_ ? -1.0 : 1.0) * 1e-9 * + (frames_with_guess[idx_guess]->seq_ > frame.seq_ + ? frames_with_guess[idx_guess]->seq_ - frame.seq_ + : frame.seq_ - frames_with_guess[idx_guess]->seq_); + interpolated = *frames_with_guess[idx_guess]->cam_in_world_initial_guess_ + + v_a_to_b * dt; // add sign back to dt std::cout << "interpolated frame " << frame.id_ << ": " << interpolated << std::endl; std::cout << "\tdt: " << dt << std::endl; std::cout << "\tv_a_to_b: " << v_a_to_b << std::endl; diff --git a/experimental/learn_descriptors/gps_data.cc b/experimental/learn_descriptors/gps_data.cc deleted file mode 100644 index e69de29bb..000000000 diff --git a/experimental/learn_descriptors/gps_data.hh b/experimental/learn_descriptors/gps_data.hh index 13b032c25..34f89b9b3 100644 --- a/experimental/learn_descriptors/gps_data.hh +++ b/experimental/learn_descriptors/gps_data.hh @@ -9,49 +9,64 @@ namespace robot::experimental::learn_descriptors { struct GPSData { struct Uncertainty { - double sigma_lat_mitude; - double sigma_longitude; - double sigma_altitude; + double sigma_latitude_deg; + double sigma_longitude_deg; + double sigma_altitude_m; double orientation_deg; double rms_range_error_m; // diagonal latitude, longitude, altitude covariance in meters squared Eigen::Matrix3d to_LLA_covariance() const { Eigen::Matrix3d cov = Eigen::Matrix3d::Zero(); - cov(0, 0) = sigma_lat_mitude * sigma_lat_mitude; - cov(1, 1) = sigma_longitude * sigma_longitude; - cov(2, 2) = sigma_altitude * sigma_altitude; + cov(0, 0) = sigma_latitude_deg * sigma_latitude_deg; + cov(1, 1) = sigma_longitude_deg * sigma_longitude_deg; + cov(2, 2) = sigma_altitude_m * sigma_altitude_m; return cov; } // Convert lat/lon covariance to ENU at given latitude Eigen::Matrix3d to_ENU_covariance(double lat_deg) const { - double lat_rad = lat_deg * M_PI / 180.0; + // –– Named conversion/constants –– + static constexpr double DEG2RAD = M_PI / 180.0; + static constexpr double ORIENTATION_OFFSET_DEG = + 90.0; // for CW‐from‐North → CCW‐from‐East - // meters per degree scaling - double kLat = - 111132.92 - 559.82 * std::cos(2 * lat_rad) + 1.175 * std::cos(4 * lat_rad); - double kLon = 111412.84 * std::cos(lat_rad) - 93.5 * std::cos(3 * lat_rad); + // WGS‑84 approximate “meters per degree” polynomial coefficients + static constexpr double MERIDIONAL_A0 = 111132.92; + static constexpr double MERIDIONAL_A2 = -559.82; + static constexpr double MERIDIONAL_A4 = 1.175; + static constexpr double PARALLEL_B0 = 111412.84; + static constexpr double PARALLEL_B2 = -93.5; + // 1) get LLA covariance in degrees² Eigen::Matrix3d lla_cov = to_LLA_covariance(); - // rotate lat/lon covariance (upper-left 2x2) using orientation - double theta = orientation_deg * M_PI / 180.0; - Eigen::Matrix2d R; - R << std::cos(theta), -std::sin(theta), std::sin(theta), std::cos(theta); + // 2) convert latitude to radians + double phi = lat_deg * DEG2RAD; - Eigen::Matrix2d D = lla_cov.topLeftCorner<2, 2>(); - Eigen::Matrix2d rotated_latlon_cov = R * D * R.transpose(); + // 3) compute meters/degree at this latitude + double kLat = MERIDIONAL_A0 + MERIDIONAL_A2 * std::cos(2.0 * phi) + + MERIDIONAL_A4 * std::cos(4.0 * phi); + double kLon = PARALLEL_B0 * std::cos(phi) + PARALLEL_B2 * std::cos(3.0 * phi); - // apply linear scaling to convert degrees to meters in EN + // 4) build the Jacobian for degree→meter scaling Eigen::Matrix2d J; - J << kLat, 0, 0, kLon; + J << kLat, 0.0, 0.0, kLon; - Eigen::Matrix2d en_cov = J * rotated_latlon_cov * J.transpose(); + // 5) scale the 2×2 lat/lon covariance into meters² + Eigen::Matrix2d D = lla_cov.topLeftCorner<2, 2>(); + Eigen::Matrix2d cov_m = J * D * J.transpose(); + + // 6) rotate that ellipse into ENU axes (θ = CCW from East) + double theta = (ORIENTATION_OFFSET_DEG - orientation_deg) * DEG2RAD; + Eigen::Matrix2d R; + R << std::cos(theta), -std::sin(theta), std::sin(theta), std::cos(theta); + Eigen::Matrix2d horz_cov = R * cov_m * R.transpose(); + // 7) assemble full 3×3 ENU covariance Eigen::Matrix3d enu_cov = Eigen::Matrix3d::Zero(); - enu_cov.topLeftCorner<2, 2>() = en_cov; - enu_cov(2, 2) = lla_cov(2, 2); // alt variance unchanged + enu_cov.topLeftCorner<2, 2>() = horz_cov; + enu_cov(2, 2) = lla_cov(2, 2); // altitude variance already in m² return enu_cov; } diff --git a/experimental/learn_descriptors/image_point_four_seasons.hh b/experimental/learn_descriptors/image_point_four_seasons.hh index 4fce4f74a..368fe8351 100644 --- a/experimental/learn_descriptors/image_point_four_seasons.hh +++ b/experimental/learn_descriptors/image_point_four_seasons.hh @@ -83,9 +83,9 @@ struct ImagePointFourSeasons : ImagePoint { } ss << "\n\t\tsigma: "; if (gps_gcs->uncertainty) { - ss << gps_gcs->uncertainty->sigma_lat_mitude << "\t" - << gps_gcs->uncertainty->sigma_longitude << "\t" - << gps_gcs->uncertainty->sigma_altitude; + ss << gps_gcs->uncertainty->sigma_latitude_deg << "\t" + << gps_gcs->uncertainty->sigma_longitude_deg << "\t" + << gps_gcs->uncertainty->sigma_altitude_m; } else { ss << "N/A"; } diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc index b3b8a17e6..4906f0ff1 100644 --- a/experimental/learn_descriptors/incremental_sfm_mvp.cc +++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc @@ -8,6 +8,7 @@ #include "Eigen/Core" #include "Eigen/Geometry" #include "common/geometry/camera.hh" +#include "common/gps/frame_translation.hh" #include "cxxopts.hpp" #include "experimental/learn_descriptors/camera_calibration.hh" #include "experimental/learn_descriptors/four_seasons_parser.hh" @@ -358,7 +359,7 @@ int main(int argc, const char **argv) { const Eigen::Vector3d gps_gcs( img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude, img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0); - const Eigen::Vector3d p_gps_in_ECEF = parser.ecef_from_lla(gps_gcs); + const Eigen::Vector3d p_gps_in_ECEF = robot::gps::ecef_from_lla(gps_gcs); const Eigen::Vector4d p_gps_in_ECEF_hom(p_gps_in_ECEF.x(), p_gps_in_ECEF.y(), p_gps_in_ECEF.z(), 1.0); world_from_cam = Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference * diff --git a/experimental/learn_descriptors/refactor_sfm_mvp.cc b/experimental/learn_descriptors/refactor_sfm_mvp.cc index c45dc0e65..ef87991f1 100644 --- a/experimental/learn_descriptors/refactor_sfm_mvp.cc +++ b/experimental/learn_descriptors/refactor_sfm_mvp.cc @@ -36,6 +36,7 @@ #include "gtsam/slam/BetweenFactor.h" #include "gtsam/slam/PriorFactor.h" #include "gtsam/slam/ProjectionFactor.h" +#include "opencv2/opencv.hpp" #include "visualization/opencv/opencv_viz.hh" std::optional attempt_triangulate(const std::vector &cam_poses, @@ -43,7 +44,6 @@ std::optional attempt_triangulate(const std::vector gtsam::Cal3_S2::shared_ptr K, const double max_reproj_error = 2.0, const bool verbose = true) { - std::cout << "attmepting to triangulate!" << std::endl; gtsam::Point3 p_lmk_in_world; if (cam_poses.size() > 2) { try { @@ -86,7 +86,6 @@ std::optional attempt_triangulate(const std::vector return std::nullopt; } // Reprojection error - std::cout << "heartbeat cheirality and reprojection checks" << std::endl; if (max_reproj_error > 0) { gtsam::PinholeCamera cam(pose, *K); const auto reproj = cam.project(p_lmk_in_world); @@ -209,12 +208,18 @@ int main(int argc, const char **argv) { Frontend frontend(params); std::vector idx_img_pts; - for (size_t i = 581; i < 750; i += 5) { + for (size_t i = 633; i < 646; i += 2) { frontend.add_image( ImageAndPoint{parser.load_image(i), std::make_shared(parser.get_image_point(i))}); idx_img_pts.push_back(i); } + // for (size_t i = 581; i < 750; i += 5) { + // frontend.add_image( + // ImageAndPoint{parser.load_image(i), + // std::make_shared(parser.get_image_point(i))}); + // idx_img_pts.push_back(i); + // } frontend.populate_frames(); frontend.match_frames_and_build_tracks(); @@ -233,8 +238,8 @@ int main(int argc, const char **argv) { // world_from_cam_init.linear() = frame.world_from_cam_initial_guess_->matrix(); // if (!first_point) first_point = *frame.cam_in_world_initial_guess_; // world_from_cam_init.translation() = *frame.cam_in_world_initial_guess_ - - // *first_point; std::cout << "world_from_cam_init: " << world_from_cam_init.matrix() << - // std::endl; viz_poses_init.emplace_back( + // *first_point; std::cout << "world_from_cam_init: " << + // world_from_cam_init.matrix() << std::endl; viz_poses_init.emplace_back( // world_from_cam_init, gtsam::Symbol(Frontend::symbol_pose_char, // frame.id_).string()); // } @@ -245,14 +250,16 @@ int main(int argc, const char **argv) { w_from_cam_grnd_trth.translation() -= first_grnd_trth->translation(); viz_poses_init.emplace_back(w_from_cam_grnd_trth, "x_grnd_" + std::to_string(frame.id_)); - } + }; } + std::cout << "visualizing " << viz_poses_init.size() << " poses" << std::endl; robot::visualization::viz_scene(viz_poses_init, std::vector(), cv::viz::Color::brown(), true, true, "Frontend initial guesses"); // ############# BACKEND ############### + constexpr bool use_grndtrth_only = false; gtsam::Values initial_estimate_; gtsam::NonlinearFactorGraph graph_; @@ -262,7 +269,7 @@ int main(int argc, const char **argv) { gtsam::Vector6 prior_sigmas; prior_sigmas << 0.04, 0.04, 0.09, 2.1, 2.1, 0.1; gtsam::Vector3 gps_sigmas_fallback; - gps_sigmas_fallback << 0.5, 0.5, 1.5; + gps_sigmas_fallback << 1.5, 1.5, 1.5; gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise = gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas); gtsam::noiseModel::Diagonal::shared_ptr gps_noise = @@ -272,48 +279,106 @@ int main(int argc, const char **argv) { std::vector &frames = frontend.frames(); frames[0].world_from_cam_initial_guess_ = frames[0].world_from_cam_groundtruth_->rotation(); // make sure first idx has grnd trth + + // // let's examine the rotations to the first thingy + // std::vector viz_5_pt_rot; + // for (const auto &[frame_id, rotation] : frames[0].frame_from_other_frames_) { + // viz_5_pt_rot.emplace_back( + // Eigen::Isometry3d(gtsam::Pose3(rotation, gtsam::Point3()).matrix()), + // "rot_to_" + std::to_string(frame_id)); + // } + // robot::visualization::viz_scene(viz_5_pt_rot, std::vector(), + // cv::viz::Color::brown(), true, true, + // "Frontend initial guesses"); + Backend::populate_rotation_estimate(frames); std::vector symbols_pose; std::unordered_map world_from_cam_initial_guess; std::optional> interpolated_cam_in_w = frontend.interpolated_initial_translations(); - std::cout << "heartbeat" << std::endl; ROBOT_CHECK(interpolated_cam_in_w, interpolated_cam_in_w); + // std::vector viz_pts_interpolated; + // for (size_t i = 0; i < interpolated_cam_in_w->size(); i++) { + // viz_pts_interpolated.emplace_back( + // (*interpolated_cam_in_w)[i] - interpolated_cam_in_w->front(), + // "pt_" + std::to_string(i)); + // std::cout << "pt_" + std::to_string(i) << ": " << (*interpolated_cam_in_w)[i] << + // std::endl; + // } + // robot::visualization::viz_scene(std::vector(), + // viz_pts_interpolated, cv::viz::Color::brown(), true, true, + // "Initial guesses in backend"); Eigen::Vector3d cam0_in_w = interpolated_cam_in_w->front(); + std::optional grndtrth_cam0_in_w; + std::vector idx_grndtrth_frame; std::vector viz_pose; - for (size_t i = 0; i < frames.size(); i++) { - // std::cout << "interpolated translation " << i << ": " << (*interpolated_cam_in_w)[i] - // << std::endl; + auto noise_tight_prior = gtsam::noiseModel::Diagonal::Sigmas( + (gtsam::Vector(6) << 0, 0, 0, // rotation stdev in radians + 1e-6, 1e-6, 1e-6 // translation stdev in meters + ) + .finished()); + const gtsam::Pose3 w_from_cam0_init_estimate(*frames[0].world_from_cam_initial_guess_, + gtsam::Point3()); + const gtsam::Symbol symbol_cam0(Backend::symbol_char_pose, 0); + graph_.add(gtsam::PriorFactor( + symbol_cam0, w_from_cam0_init_estimate, + noise_tight_prior)); // currently assuming that we have groundtruth on the first pose + initial_estimate_.insert(symbol_cam0, w_from_cam0_init_estimate); + std::string str_pose0 = + std::string("x_") + (frames[0].world_from_cam_groundtruth_ ? "g_" : "") + + (!frames[0].world_from_cam_initial_guess_ ? "norot_" : "") + std::to_string(frames[0].id_); + viz_pose.emplace_back(Eigen::Isometry3d(w_from_cam0_init_estimate.matrix()), str_pose0); + symbols_pose.push_back(symbol_cam0); + for (size_t i = 1; i < frames.size(); i++) { const Frame &frame = frames[i]; - const gtsam::Symbol cam_symbol(Backend::symbol_char_pose, frame.id_); - const gtsam::Pose3 world_from_cam_estimate(frame.world_from_cam_initial_guess_ + std::optional world_from_cam_estimate; + if (use_grndtrth_only && !frame.world_from_cam_groundtruth_) { + continue; + } else if (use_grndtrth_only && frame.world_from_cam_groundtruth_) { + if (!grndtrth_cam0_in_w) + grndtrth_cam0_in_w = frame.world_from_cam_groundtruth_->translation(); + world_from_cam_estimate = + gtsam::Pose3(frame.world_from_cam_groundtruth_->rotation(), + gtsam::Point3(frame.world_from_cam_groundtruth_->translation() - + *grndtrth_cam0_in_w)); + idx_grndtrth_frame.push_back(i); + std::cout << "populating groundtruth pose: " << world_from_cam_estimate->translation() + << std::endl; + } else { + world_from_cam_estimate = gtsam::Pose3(frame.world_from_cam_initial_guess_ ? *frame.world_from_cam_initial_guess_ : gtsam::Rot3::Identity(), (*interpolated_cam_in_w)[i] - cam0_in_w); - world_from_cam_initial_guess[frame.id_] = world_from_cam_estimate; - initial_estimate_.insert(cam_symbol, world_from_cam_estimate); - if (frame.world_from_cam_initial_guess_) { + } + std::cout << "world_from_cam_estimate_" << i << ": " << *world_from_cam_estimate + << std::endl; + world_from_cam_initial_guess[frame.id_] = *world_from_cam_estimate; + const gtsam::Symbol cam_symbol(Backend::symbol_char_pose, frame.id_); + initial_estimate_.insert(cam_symbol, *world_from_cam_estimate); + if (frame.cam_in_world_initial_guess_) { + if (frame.translation_covariance_in_cam_) { + std::cout << "translation covariance: " << *frame.translation_covariance_in_cam_ + << std::endl; + } gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas( - frame.translation_covariance_in_cam_ - ? gtsam::Vector3((*frame.translation_covariance_in_cam_)(0, 0), - (*frame.translation_covariance_in_cam_)(1, 1), - (*frame.translation_covariance_in_cam_)(2, 2)) - : gps_sigmas_fallback); + // frame.translation_covariance_in_cam_ + false ? gtsam::Vector3(std::sqrt((*frame.translation_covariance_in_cam_)(0, 0)), + std::sqrt((*frame.translation_covariance_in_cam_)(1, 1)), + std::sqrt((*frame.translation_covariance_in_cam_)(2, 2))) + : gps_sigmas_fallback); graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise)); + std::cout << "adding gps factor!" << std::endl; } - viz_pose.emplace_back( - Eigen::Isometry3d(world_from_cam_estimate.matrix()), - (frame.cam_in_world_initial_guess_ ? "xg_" : "x_") + std::to_string(frame.id_)); + std::string str_pose = std::string("x_") + (frame.world_from_cam_groundtruth_ ? "g_" : "") + + (!frame.world_from_cam_initial_guess_ ? "norot_" : "") + + std::to_string(frame.id_); + viz_pose.emplace_back(Eigen::Isometry3d(world_from_cam_estimate->matrix()), str_pose); symbols_pose.push_back(cam_symbol); } - robot::visualization::viz_scene(viz_pose, std::vector(), - cv::viz::Color::brown(), true, true, - "Initial guesses in backend"); - - std::cout << "heartbeat pre lmks in graph" << std::endl; // add landmarks to graph std::vector symbols_lmks; + std::vector viz_pts; const FeatureTracks feature_tracks = frontend.feature_tracks(); for (size_t i = 0; i < feature_tracks.size(); i++) { std::vector world_from_lmk_cams; @@ -322,8 +387,9 @@ int main(int argc, const char **argv) { world_from_lmk_cams.push_back(world_from_cam_initial_guess[frame_id]); lmk_observations.emplace_back(keypoint_cv.x, keypoint_cv.y); } - std::optional landmark_estimate = attempt_triangulate( - world_from_lmk_cams, lmk_observations, frames[0].K_); // all K are the same for now... + std::optional landmark_estimate = + attempt_triangulate(world_from_lmk_cams, lmk_observations, frames[0].K_, 2.0, + false); // all K are the same for now... if (landmark_estimate) { const gtsam::Symbol lmk_symbol(Backend::symbol_char_landmark, i); for (const auto &[frame_id, keypoint_cv] : feature_tracks[i].obs_) { @@ -333,33 +399,36 @@ int main(int argc, const char **argv) { } initial_estimate_.insert(lmk_symbol, *landmark_estimate); symbols_lmks.push_back(lmk_symbol); + viz_pts.emplace_back(*landmark_estimate, "lmk_" + std::to_string(i)); } - std::cout << "idx " << i << " out of " << feature_tracks.size() << std::endl; } - - std::cout << "heartbeat" << std::endl; + robot::visualization::viz_scene(viz_pose, viz_pts, cv::viz::Color::brown(), true, true, + "Initial guesses in backend"); // do global optimization const gtsam::Values result = optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_lmks, 10); - // // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference - // double sum_traj_error = 0; - // double sum_rot_error = 0; - // for (size_t i = 0; i < symbols_pose.size(); i++) { - // const gtsam::Pose3 traj_pose = result.at(symbols_pose[i]); - // sum_traj_error += std::pow( - // (references_world_from_cam[i].translation() - traj_pose.translation()).norm(), 2); - // sum_rot_error += - // rotation_error(references_world_from_cam[i], Eigen::Isometry3d(traj_pose.matrix())); - // } - // std::cout << "sum_rot_error: " << sum_rot_error << std::endl; - // double rmse_ate = std::sqrt(sum_traj_error / symbols_pose.size()); - // double rmse_rot = std::sqrt(sum_rot_error / symbols_pose.size()); - // std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl; + // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference + double sum_traj_error = 0; + double sum_rot_error = 0; + // for (size_t i = 0; i < idx_grndtrth_frame.size(); i++) { + for (const size_t i_grndtrth_frame : idx_grndtrth_frame) { + const Frame &frame = frames[i_grndtrth_frame]; + const gtsam::Pose3 traj_pose = + result.at(gtsam::Symbol(Backend::symbol_char_pose, frame.id_)); + sum_traj_error += std::pow( + (frame.world_from_cam_groundtruth_->translation() - traj_pose.translation()).norm(), 2); + sum_rot_error += + rotation_error(Eigen::Isometry3d(frame.world_from_cam_groundtruth_->matrix()), + Eigen::Isometry3d(traj_pose.matrix())); + } + std::cout << "sum_rot_error: " << sum_rot_error << std::endl; + double rmse_ate = std::sqrt(sum_traj_error / symbols_pose.size()); + double rmse_rot = std::sqrt(sum_rot_error / symbols_pose.size()); + std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl; std::cout << "about to visualize result" << std::endl; - result.print(); - graph_values(result, "Result", symbols_pose, std::vector()); - // detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks); + // result.print(); + graph_values(result, "Result", symbols_pose, symbols_lmks); } \ No newline at end of file From 3a5f6c6a38bd88311ed5f0d07dbe6ae31fdb3ee7 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Tue, 22 Jul 2025 18:01:07 -0400 Subject: [PATCH 38/45] WIP. need to fix backend adding frames --- experimental/learn_descriptors/BUILD | 55 ++-- experimental/learn_descriptors/backend.cc | 286 ++++++++++++++++-- experimental/learn_descriptors/backend.hh | 44 ++- .../learn_descriptors/feature_manager.hh | 49 --- .../learn_descriptors/feature_manager_test.cc | 68 ----- experimental/learn_descriptors/feature_set.hh | 78 ----- experimental/learn_descriptors/frame.hh | 21 +- experimental/learn_descriptors/frontend.cc | 120 ++++---- experimental/learn_descriptors/frontend.hh | 6 +- experimental/learn_descriptors/gps_data.hh | 83 +++-- experimental/learn_descriptors/image_point.hh | 1 + .../image_point_four_seasons.cc | 4 +- .../image_point_four_seasons.hh | 21 +- .../learn_descriptors/refactor_sfm_mvp.cc | 63 +--- .../structure_from_motion.cc | 200 ++++-------- .../structure_from_motion.hh | 54 +--- .../structure_from_motion_example.cc | 75 +++++ 17 files changed, 606 insertions(+), 622 deletions(-) delete mode 100644 experimental/learn_descriptors/feature_manager.hh delete mode 100644 experimental/learn_descriptors/feature_manager_test.cc delete mode 100644 experimental/learn_descriptors/feature_set.hh create mode 100644 experimental/learn_descriptors/structure_from_motion_example.cc diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index bb369d6d1..63a162d08 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -26,8 +26,10 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":backend", - ":feature_manager", ":frontend", + ":frame", + ":image_point", + "//common:check", "//visualization/opencv:opencv_viz", "@eigen", "@gtsam", @@ -280,30 +282,6 @@ cc_library( ], ) -cc_library( - name = "feature_manager", - hdrs = ["feature_manager.hh"], - visibility = ["//visibility:public"], - deps = [ - ":feature_set", - "@gtsam", - "@opencv", - ], -) - -cc_test( - name = "feature_manager_test", - srcs = ["feature_manager_test.cc"], - deps = [ - ":feature_manager", - ":spatial_test_scene_cube", - "@com_google_googletest//:gtest_main", - "@eigen", - "@gtsam", - "@opencv", - ], -) - cc_library( name = "spatial_test_scene", srcs = ["spatial_test_scene.cc"], @@ -383,6 +361,7 @@ cc_library( deps = [ ":structure_from_motion_types", ":frame", + "//common:check", "//common/geometry:camera", "@boost//:smart_ptr", "@gtsam", @@ -476,4 +455,30 @@ cc_binary( "@opencv", ":camera_calibration" ], +) + +cc_binary( + name = "structure_from_motion_example", + srcs = ["structure_from_motion_example.cc"], + copts = [ + "-Wno-error=unused-parameter", + "-Wno-error=unused-function", + ], + deps = [ + ":structure_from_motion", + ":frontend", + ":backend", + ":four_seasons_parser", + ":frame", + ":image_point", + ":image_point_four_seasons", + ":spatial_test_scene_cube", + ":structure_from_motion_types", + ":camera_calibration", + "//visualization/opencv:opencv_viz", + "@boost//:smart_ptr", + "@cxxopts", + "@eigen", + "@opencv", + ], ) \ No newline at end of file diff --git a/experimental/learn_descriptors/backend.cc b/experimental/learn_descriptors/backend.cc index 30706673a..682c29ec3 100644 --- a/experimental/learn_descriptors/backend.cc +++ b/experimental/learn_descriptors/backend.cc @@ -1,49 +1,93 @@ #include "experimental/learn_descriptors/backend.hh" +#include +#include #include #include #include #include #include "boost/make_shared.hpp" +#include "common/check.hh" #include "experimental/learn_descriptors/frame.hh" #include "experimental/learn_descriptors/structure_from_motion_types.hh" +#include "gtsam/geometry/Cal3_S2.h" +#include "gtsam/geometry/Point2.h" +#include "gtsam/geometry/Point3.h" +#include "gtsam/geometry/Pose3.h" +#include "gtsam/geometry/Rot3.h" #include "gtsam/geometry/triangulation.h" +#include "gtsam/inference/Symbol.h" #include "gtsam/linear/NoiseModel.h" #include "gtsam/navigation/GPSFactor.h" #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" +#include "gtsam/nonlinear/NonlinearFactorGraph.h" +#include "gtsam/nonlinear/Values.h" #include "gtsam/slam/BetweenFactor.h" #include "gtsam/slam/PriorFactor.h" #include "gtsam/slam/ProjectionFactor.h" namespace robot::experimental::learn_descriptors { -void Backend::solve_graph() { - gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_); - result_ = optimizer.optimize(); -} - -void Backend::solve_graph(const int num_steps, - std::optional inter_debug_func) { - gtsam::LevenbergMarquardtParams params; - params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. - params.maxIterations = 1; // We'll manually step it - gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, params); +std::optional Backend::attempt_triangulate( + const std::vector& cam_poses, const std::vector& cam_obs, + gtsam::Cal3_S2::shared_ptr K, const double max_reproj_error, const bool verbose) { + gtsam::Point3 p_lmk_in_world; + if (cam_poses.size() > 2) { + try { + // Attempt triangulation using DLT (or the GTSAM provided method) + p_lmk_in_world = gtsam::triangulatePoint3( + cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end())); - double prev_error = optimizer.error(); - for (int i = 0; i < num_steps; i++) { - optimizer.iterate(); - double curr_error = optimizer.error(); + } catch (const std::exception& e) { + // Handle the exception gracefully by logging and retaining the previous + // estimate or discard + if (verbose) { + std::cerr << "[attempt_triangulate] failed. Likely cheirality exception: " + << e.what() << ". Discarding involved keypoints." << std::endl; + } + return std::nullopt; + } + } else { + return std::nullopt; + } + // Optional: perform an explicit cheirality check + for (const auto& pose : cam_poses) { + // Transform point to the camera coordinate system. + // transformTo() converts a world point to the camera frame. + gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world); + if (p_cam_lmk.z() <= 0) { // Check that the depth is positive + return std::nullopt; + } + } - if (inter_debug_func) { - (*inter_debug_func)(optimizer.values(), i); + // Cheirality & reprojection checks + for (size_t i = 0; i < cam_poses.size(); ++i) { + const auto& pose = cam_poses[i]; + // Cheirality + gtsam::Point3 p_cam = pose.transformTo(p_lmk_in_world); + if (p_cam.z() <= 0) { + if (verbose) { + std::cerr << "[attempt_triangulate] point behind camera " << i + << " (z=" << p_cam.z() << ")\n"; + } + return std::nullopt; } - if (std::abs(prev_error - curr_error) < 1e-6) { - std::cout << "Converged at iteration " << i << "\n"; - break; + // Reprojection error + if (max_reproj_error > 0) { + gtsam::PinholeCamera cam(pose, *K); + const auto reproj = cam.project(p_lmk_in_world); + const double err = (reproj - cam_obs[i]).norm(); + if (err > max_reproj_error) { + if (verbose) { + std::cerr << "[attempt_triangulate] reprojection error too large on view " << i + << " (" << err << " px)\n"; + } + return std::nullopt; + } } } - result_ = optimizer.values(); + return p_lmk_in_world; } gtsam::Rot3 average_rotations(const std::vector& rotations, int max_iter = 10) { @@ -140,17 +184,193 @@ void Backend::populate_rotation_estimate(std::vector& frames) { frame_tree.emplace(frame.id_, frame_vertex); } - // gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial); - // gtsam::Values result = optimizer.optimize(); - - // // Output results and assign results to frames - // for (const gtsam::Symbol& symbol_rotation : symbols_frames) { - // gtsam::Rot3 w_from_frame = result.at(symbol_rotation); - // frame_tree.at(symbol_rotation.index())->frame.world_from_cam_initial_guess_ = - // w_from_frame; - // // std::cout << "Rotation " << symbol_rotation.string() << ": " - // // << w_from_frame.rpy().transpose() << " (roll pitch yaw radians)" << - // std::endl; - // } + gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial); + gtsam::Values result = optimizer.optimize(); + + // Output results and assign results to frames + for (const gtsam::Symbol& symbol_rotation : symbols_frames) { + gtsam::Rot3 w_from_frame = result.at(symbol_rotation); + frame_tree.at(symbol_rotation.index())->frame.world_from_cam_initial_guess_ = w_from_frame; + } +} + +void Backend::populate_rotation_estimate(std::vector& shared_frames) { + struct FrameVertex; + using SharedFrameVertex = std::shared_ptr; + struct FrameVertex { + Frame& frame; + std::optional w_from_this; + std::unordered_map + neighbor_id_to_rot; // rot is frame_from_neighbor_frame + std::unordered_set parents; + std::unordered_set children; + }; + std::unordered_map frame_tree; + for (SharedFrame& shared_frame : shared_frames) { + frame_tree.emplace(shared_frame->id_, std::make_shared(*shared_frame)); + } + // populate all frames and add children to neighbors + std::queue q; // indices of Frame in frames + std::unordered_set seen_frame_id; + q.push(shared_frames.front()->id_); + + gtsam::NonlinearFactorGraph graph; + gtsam::Values initial; + // TODO: somehow get more informative/more robust noise values + // maybe: monte carlo sampling + reprojection, use essential matrix to guide covariance strength + gtsam::noiseModel::Diagonal::shared_ptr noise_rotation = + gtsam::noiseModel::Isotropic::Sigma(3, 0.1); // radians + + std::vector symbols_frames; + while (!q.empty()) { + Frame frame = *shared_frames[q.front()]; + q.pop(); + seen_frame_id.insert(frame.id_); + SharedFrameVertex frame_vertex = frame_tree.at(frame.id_); + if (seen_frame_id.size() == 1) { // make the first frame rotation identity unless the + // initial guess is set otherwise + frame_vertex->w_from_this = frame_vertex->frame.world_from_cam_initial_guess_ + ? *frame_vertex->frame.world_from_cam_initial_guess_ + : gtsam::Rot3::Identity(); + graph.add(gtsam::PriorFactor( + gtsam::Symbol(symbol_char_rotation, frame_vertex->frame.id_), + *frame_vertex->w_from_this, gtsam::noiseModel::Constrained::All(3))); + } + frame_vertex->frame.world_from_cam_initial_guess_ = frame_vertex->w_from_this; + std::cout << frame.to_string() << std::endl; + + const gtsam::Symbol symbol_frame(symbol_char_rotation, frame.id_); + symbols_frames.push_back(symbol_frame); + initial.insert(symbol_frame, *frame.world_from_cam_initial_guess_); + for (const auto& [other_frame_id, frame_from_other_frame] : + frame.frame_from_other_frames_) { + if (seen_frame_id.find(other_frame_id) == seen_frame_id.end()) { + q.push(other_frame_id); + seen_frame_id.insert(other_frame_id); + } + frame_vertex->neighbor_id_to_rot.emplace(other_frame_id, frame_from_other_frame); + SharedFrameVertex other_frame_vertex = frame_tree.at(other_frame_id); + frame_vertex->children.insert(other_frame_vertex); + other_frame_vertex->parents.insert(frame_vertex); + other_frame_vertex->neighbor_id_to_rot.emplace(frame.id_, + frame_from_other_frame.inverse()); + if (!other_frame_vertex->w_from_this) { + other_frame_vertex->w_from_this = + *frame_vertex->w_from_this * frame_from_other_frame; + other_frame_vertex->frame.world_from_cam_initial_guess_ = + other_frame_vertex->w_from_this; + } + + graph.add(gtsam::BetweenFactor( + symbol_frame, gtsam::Symbol(symbol_char_rotation, other_frame_id), + frame_from_other_frame.inverse(), noise_rotation)); + } + frame_tree.emplace(frame.id_, frame_vertex); + } + + gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial); + gtsam::Values result = optimizer.optimize(); + + // Output results and assign results to frames + for (const gtsam::Symbol& symbol_rotation : symbols_frames) { + gtsam::Rot3 w_from_frame = result.at(symbol_rotation); + frame_tree.at(symbol_rotation.index())->frame.world_from_cam_initial_guess_ = w_from_frame; + } +} + +void Backend::calculate_initial_values() { populate_rotation_estimate(shared_frames_); } + +void Backend::populate_graph(const FeatureTracks& feature_tracks) { + ROBOT_CHECK(shared_frames_.size() > 3); + ROBOT_CHECK(shared_frames_[0]->world_from_cam_groundtruth_, + "We're assuming the first camera has groundtruth for now"); + + shared_frames_[0]->world_from_cam_initial_guess_ = + shared_frames_[0]->world_from_cam_groundtruth_->rotation(); // align rotation + + Backend::populate_rotation_estimate(shared_frames_); + + // add first cam as prior + Eigen::Vector3d cam0_in_w = *shared_frames_.front()->cam_in_world_interpolated_guess_; + const gtsam::Pose3 w_from_cam0_init_estimate(*shared_frames_[0]->world_from_cam_initial_guess_, + gtsam::Point3()); + const gtsam::Symbol symbol_cam0(symbol_char_pose, 0); + graph_.add(gtsam::PriorFactor( + symbol_cam0, w_from_cam0_init_estimate, + noise_tight_prior)); // currently assuming that we have groundtruth on the first pose + initial_estimate_.insert(symbol_cam0, w_from_cam0_init_estimate); + + // add gps factors + for (size_t i = 1; i < shared_frames_.size(); i++) { + const Frame& frame = *shared_frames_[i]; + gtsam::Pose3 world_from_cam_estimate( + frame.world_from_cam_initial_guess_ ? *frame.world_from_cam_initial_guess_ + : gtsam::Rot3::Identity(), + *shared_frames_[i]->cam_in_world_interpolated_guess_ - cam0_in_w); + world_from_cam_initial_estimates_[frame.id_] = world_from_cam_estimate; + const gtsam::Symbol cam_symbol(symbol_char_pose, frame.id_); + initial_estimate_.insert(cam_symbol, world_from_cam_estimate); + world_from_cam_initial_estimates_.emplace(frame.id_, world_from_cam_estimate); + if (frame.cam_in_world_initial_guess_) { + gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas( + frame.translation_covariance_in_cam_ + ? gtsam::Vector3(std::sqrt((*frame.translation_covariance_in_cam_)(0, 0)), + std::sqrt((*frame.translation_covariance_in_cam_)(1, 1)), + std::sqrt((*frame.translation_covariance_in_cam_)(2, 2))) + : gps_sigmas_fallback); + graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise)); + } + } + + // add landmarks + for (size_t i = 0; i < feature_tracks.size(); i++) { + std::vector world_from_lmk_cams; + std::vector lmk_observations; + for (const auto& [frame_id, keypoint_cv] : feature_tracks[i].obs_) { + world_from_lmk_cams.push_back(world_from_cam_initial_estimates_[frame_id]); + lmk_observations.emplace_back(keypoint_cv.x, keypoint_cv.y); + } + std::optional landmark_estimate = + attempt_triangulate(world_from_lmk_cams, lmk_observations, shared_frames_[0]->K_, 2.0, + false); // all K are the same for now... + if (landmark_estimate) { + const gtsam::Symbol lmk_symbol(symbol_char_landmark, i); + for (const auto& [frame_id, keypoint_cv] : feature_tracks[i].obs_) { + graph_.add(gtsam::GenericProjectionFactor( + gtsam::Point2(keypoint_cv.x, keypoint_cv.y), landmark_noise_, + gtsam::Symbol(symbol_char_pose, frame_id), lmk_symbol, shared_frames_[0]->K_)); + } + initial_estimate_.insert(lmk_symbol, *landmark_estimate); + lmk_initial_estimates_.emplace(i, *landmark_estimate); + } + } +} + +void Backend::solve_graph() { + gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_); + result_ = optimizer.optimize(); +} + +void Backend::solve_graph(const int num_epochs, + std::optional iter_debug_func) { + gtsam::LevenbergMarquardtParams params; + params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. + params.maxIterations = 1; // We'll manually step it + gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, params); + + double prev_error = optimizer.error(); + for (int i = 0; i < num_epochs; i++) { + optimizer.iterate(); + double curr_error = optimizer.error(); + + if (iter_debug_func) { + (*iter_debug_func)(optimizer.values(), i); + } + if (std::abs(prev_error - curr_error) < 1e-6) { + std::cout << "Converged at iteration " << i << "\n"; + break; + } + } + result_ = optimizer.values(); } } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/backend.hh b/experimental/learn_descriptors/backend.hh index 9d7d403d9..50a934699 100644 --- a/experimental/learn_descriptors/backend.hh +++ b/experimental/learn_descriptors/backend.hh @@ -1,7 +1,9 @@ #pragma once +#include #include #include +#include #include #include @@ -27,26 +29,56 @@ class Backend { static constexpr char symbol_char_landmark = 'l'; static constexpr char symbol_char_cam_cal = 'k'; - void solve_graph(); - typedef int epoch; - using graph_step_debug_func = std::function; - void solve_graph(const int num_steps, - std::optional inter_debug_func = std::nullopt); - + static std::optional attempt_triangulate( + const std::vector &cam_poses, const std::vector &cam_obs, + gtsam::Cal3_S2::shared_ptr K, const double max_reproj_error = 2.0, + const bool verbose = true); static gtsam::Rot3 average_rotations(const std::vector &rotations, int max_iter = 10); // use rotation averaging to set the rotation initial guess for each frame in the world frame. // the world frame will be the first frame in the vector. deadreckon_incrementally won't // optimize if true static void populate_rotation_estimate(std::vector &frames); + // use rotation averaging to set the rotation initial guess for each frame in the world frame. + // the world frame will be the first frame in the vector. deadreckon_incrementally won't + // optimize if true + static void populate_rotation_estimate(std::vector &shared_frames); + void add_frames(std::vector &shared_frames) { + shared_frames_.reserve(shared_frames_.size() + shared_frames.size()); + shared_frames_.insert(shared_frames_.end(), shared_frames.begin(), shared_frames.end()); + }; + void calculate_initial_values(); + void populate_graph(const FeatureTracks &feature_tracks); + void solve_graph(); + typedef int epoch; + using graph_step_debug_func = std::function; + void solve_graph(const int num_epochs, + std::optional iter_debug_func = std::nullopt); + // void imshow_keypoints(const size_t img_id, bool viz = true, bool viz_all = false); + + const std::vector &shared_frames() const { return shared_frames_; }; const gtsam::Values ¤t_initial_values() const { return initial_estimate_; }; const gtsam::Values &result() const { return result_; }; private: + std::vector shared_frames_; + gtsam::Values initial_estimate_; gtsam::Values result_; gtsam::NonlinearFactorGraph graph_; + + std::unordered_map world_from_cam_initial_estimates_; + std::unordered_map lmk_initial_estimates_; + + gtsam::noiseModel::Diagonal::shared_ptr noise_tight_prior = gtsam::noiseModel::Diagonal::Sigmas( + (gtsam::Vector(6) << 0, 0, 0, // rotation stdev in radians + 1e-6, 1e-6, 1e-6 // translation stdev in meters + ) + .finished()); + gtsam::Vector3 gps_sigmas_fallback{0.5, 0.5, 0.5}; + gtsam::noiseModel::Isotropic::shared_ptr landmark_noise_ = + gtsam::noiseModel::Isotropic::Sigma(2, 1.0); }; using Landmark = gtsam::Point3; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/feature_manager.hh b/experimental/learn_descriptors/feature_manager.hh deleted file mode 100644 index d11fb8132..000000000 --- a/experimental/learn_descriptors/feature_manager.hh +++ /dev/null @@ -1,49 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include - -#include "experimental/learn_descriptors/feature_set.hh" -#include "gtsam/inference/Symbol.h" -#include "opencv2/opencv.hpp" - -namespace robot::experimental::learn_descriptors { -class FeatureManager { - public: - void append_img_data(const std::vector &kpts, const cv::Mat &descriptors) { - feature_sets_.push_back(FeatureSet(kpts, descriptors)); - }; - void insert_symbol(const size_t idx_img, const cv::KeyPoint &kpt, const gtsam::Symbol &symbol) { - feature_sets_[idx_img].insert_symbol(kpt, symbol); - if (symbols_to_idx_img_.find(symbol) != symbols_to_idx_img_.end()) { - symbols_to_idx_img_.at(symbol).push_back(idx_img); - } else { - symbols_to_idx_img_.emplace(symbol, std::vector{idx_img}); - } - }; - - std::optional get_symbol(const size_t idx_img, const cv::KeyPoint &kpt) { - return feature_sets_[idx_img].get_symbol(kpt); - }; - - size_t get_num_images_added() { return feature_sets_.size(); }; - - std::unordered_map> get_added_symbols() { - return symbols_to_idx_img_; - }; - - bool get_idxs_for_symbol(const gtsam::Symbol &symbol, std::vector &out_cam_idxs) { - if (symbols_to_idx_img_.find(symbol) != symbols_to_idx_img_.end()) { - out_cam_idxs = symbols_to_idx_img_.at(symbol); - return true; - } - return false; - } - - private: - std::vector feature_sets_; - std::unordered_map> symbols_to_idx_img_; -}; -} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/feature_manager_test.cc b/experimental/learn_descriptors/feature_manager_test.cc deleted file mode 100644 index b258f558a..000000000 --- a/experimental/learn_descriptors/feature_manager_test.cc +++ /dev/null @@ -1,68 +0,0 @@ -#include "experimental/learn_descriptors/feature_manager.hh" - -#include "Eigen/Dense" -#include "experimental/learn_descriptors/spatial_test_scene_cube.hh" -#include "gtest/gtest.h" -#include "gtsam/geometry/Cal3_S2.h" -#include "gtsam/geometry/PinholeCamera.h" -#include "gtsam/geometry/Point3.h" -#include "gtsam/geometry/Pose3.h" -#include "gtsam/geometry/Rot3.h" -#include "opencv2/opencv.hpp" - -namespace robot::experimental::learn_descriptors { -TEST(feature_manager_test, test) { - FeatureManager feature_manager; - SpatialTestSceneCube test_cube(1.f); - - const size_t img_width = 640; - const size_t img_height = 480; - const double fx = 500.0; - const double fy = fx; - const double cx = img_width / 2.0; - const double cy = img_height / 2.0; - - gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy)); - - Eigen::Matrix3d rotation0( - Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * - Eigen::AngleAxis(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix()); - gtsam::Pose3 pose0(gtsam::Rot3(rotation0), gtsam::Point3(4, 0, 0)); - gtsam::PinholeCamera camera0(pose0, *K); - - gtsam::Pose3 pose1( - gtsam::Rot3(Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * - rotation0), - gtsam::Point3(0, 4, 0)); - gtsam::PinholeCamera camera1(pose1, *K); - - gtsam::Pose3 pose2( - gtsam::Rot3(Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * - rotation0), - gtsam::Point3(0, 4, 0)); - gtsam::PinholeCamera camera2(pose1, *K); - - std::vector> cameras{camera0, camera1}; - - // std::vector projected_pixels = - // test_cube.get_projected_pixels(camera0, cv::Size(img_width, img_height)); - // cv::Mat image(480, 640, CV_8UC3, cv::Scalar(255, 255, 255)); - // for (const SpatialTestScene::ProjectedPoint &projected_pt : projected_pixels) { - // const Eigen::Vector2d &pt = projected_pt.pixel; - // cv::circle(image, cv::Point2i(static_cast(pt[0]), static_cast(pt[1])), 4, - // cv::Scalar(0, 0, 0)); - // } - // cv::imshow("projected cubes", image); - // cv::waitKey(0); - - // std::vector, cv::Size>> cams{ - // std::pair, cv::Size>(camera0, - // cv::Size(img_width, - // img_height)), - // std::pair, cv::Size>(camera1, - // cv::Size(img_width, - // img_height))}; - // std::vector> - // correspondences = test_cube.get_corresponding_pixels(cams); -} -} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/feature_set.hh b/experimental/learn_descriptors/feature_set.hh deleted file mode 100644 index 24db3ee0e..000000000 --- a/experimental/learn_descriptors/feature_set.hh +++ /dev/null @@ -1,78 +0,0 @@ -#pragma once -#include -#include -#include -#include - -#include "gtsam/geometry/Pose3.h" -#include "gtsam/inference/Symbol.h" -#include "opencv2/opencv.hpp" - -namespace std { -template <> -struct hash { - size_t operator()(const cv::KeyPoint &kp) const { - size_t h1 = hash()(kp.pt.x); - size_t h2 = hash()(kp.pt.y); - size_t h3 = hash()(kp.size); - size_t h4 = hash()(kp.angle); - size_t h5 = hash()(kp.response); - size_t h6 = hash()(kp.octave); - size_t h7 = hash()(kp.class_id); - - return (h1 ^ (h2 << 1)) ^ (h3 << 2) ^ (h4 << 3) ^ (h5 << 4) ^ (h6 << 5) ^ (h7 << 6); - } -}; -template <> -struct hash { - size_t operator()(const gtsam::Symbol &s) const noexcept { - return std::hash()(s.key()); - } -}; -} // namespace std -struct KeyPointEqual { - bool operator()(const cv::KeyPoint &kp1, const cv::KeyPoint &kp2) const { - return kp1.pt == kp2.pt && kp1.size == kp2.size && kp1.angle == kp2.angle && - kp1.response == kp2.response && kp1.octave == kp2.octave && - kp1.class_id == kp2.class_id; - } -}; - -namespace robot::experimental::learn_descriptors { -class FeatureSet { - public: - struct keypoints_descriptors { - keypoints_descriptors(const std::vector &keypoints, - const cv::Mat &descriptors) - : kpts(keypoints), descriptors(descriptors) {} - std::vector kpts; - cv::Mat descriptors; - }; - - FeatureSet(const std::vector &kpts, const cv::Mat &descriptors) - : kpts_descriptors_(keypoints_descriptors(kpts, descriptors)){}; - ~FeatureSet() = default; - - void insert_symbol(const cv::KeyPoint &kpt, const gtsam::Symbol &symbol) { - if (kpt_to_symbol_.find(kpt) != kpt_to_symbol_.end()) { - std::stringstream error_msg; - error_msg << "Keypoint already has symbol! Keypoint has symbol: " - << kpt_to_symbol_.at(kpt); - throw std::runtime_error(error_msg.str()); - } - kpt_to_symbol_.emplace(kpt, symbol); - }; - std::optional get_symbol(const cv::KeyPoint &kpt) { - if (kpt_to_symbol_.find(kpt) != kpt_to_symbol_.end()) { - return kpt_to_symbol_.at(kpt); - } - return std::nullopt; - }; - - private: - keypoints_descriptors kpts_descriptors_; - std::unordered_map, KeyPointEqual> - kpt_to_symbol_; - std::unordered_map symbol_to_kpt_; -}; -} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/frame.hh b/experimental/learn_descriptors/frame.hh index 0d8390295..0c5535899 100644 --- a/experimental/learn_descriptors/frame.hh +++ b/experimental/learn_descriptors/frame.hh @@ -1,4 +1,5 @@ #pragma once +#include #include #include #include @@ -21,9 +22,22 @@ class Frame { : id_(id), seq_(seq), undistorted_img_(img), - K_(std::move(K)), + K_(K), kpts_(kpts), descriptors_(descriptors) {} + Frame(const Frame& other) + : id_(other.id_), + seq_(other.seq_), + undistorted_img_(other.undistorted_img_.clone()), + K_(other.K_), + kpts_(other.kpts_), + descriptors_(other.descriptors_.clone()), + frame_from_other_frames_(other.frame_from_other_frames_), + world_from_cam_groundtruth_(other.world_from_cam_groundtruth_), + cam_in_world_initial_guess_(other.cam_in_world_initial_guess_), + cam_in_world_interpolated_guess_(other.cam_in_world_interpolated_guess_), + world_from_cam_initial_guess_(other.world_from_cam_initial_guess_), + translation_covariance_in_cam_(other.translation_covariance_in_cam_) {} std::optional velocity_to(const Frame& other_frame) const { ROBOT_CHECK(other_frame.seq_ >= seq_); @@ -97,8 +111,11 @@ class Frame { std::unordered_map frame_from_other_frames_; // map of relative rotation for this_frame_from_frame_[FrameId] std::optional world_from_cam_groundtruth_; - std::optional cam_in_world_initial_guess_; + std::optional + cam_in_world_initial_guess_; // intended for data from sensors (GPS) + std::optional cam_in_world_interpolated_guess_; std::optional world_from_cam_initial_guess_; std::optional translation_covariance_in_cam_; }; +using SharedFrame = std::shared_ptr; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc index e42fedb39..c6dffd66b 100644 --- a/experimental/learn_descriptors/frontend.cc +++ b/experimental/learn_descriptors/frontend.cc @@ -168,9 +168,9 @@ Frontend::Frontend(FrontendParams params_) : params_(params_) { } void Frontend::populate_frames(bool verbose) { - FrameId id = frames_.size(); + FrameId id = shared_frames_.size(); for (const ImageAndPoint &img_and_pt : images_and_points_) { - const std::shared_ptr shared_img_pt = img_and_pt.shared_img_pt; + const std::shared_ptr &shared_img_pt = img_and_pt.shared_img_pt; if (verbose) { std::cout << shared_img_pt->to_string() << std::endl; } @@ -205,17 +205,23 @@ void Frontend::populate_frames(bool verbose) { if (maybe_translation_covariance_in_cam) { frame.translation_covariance_in_cam_ = *maybe_translation_covariance_in_cam; } - frames_.push_back(frame); + if (id == 0) { + std::cout << (frame.world_from_cam_groundtruth_ ? "first frame has grnd" + : "first frame doesn't have grnd") + << std::endl; + } + shared_frames_.push_back(std::make_shared(frame)); id++; } + interpolate_frames(); } void Frontend::match_frames_and_build_tracks() { if (params_.exhaustive) { - for (size_t i = 0; i < frames_.size() - 1; i++) { - for (size_t j = i + 1; j < frames_.size(); j++) { - std::vector matches = - compute_matches(frames_[i].descriptors(), frames_[j].descriptors()); + for (size_t i = 0; i < shared_frames_.size() - 1; i++) { + for (size_t j = i + 1; j < shared_frames_.size(); j++) { + std::vector matches = compute_matches(shared_frames_[i]->descriptors(), + shared_frames_[j]->descriptors()); enforce_bijective_buffer_matches(matches); if (matches.size() < 5) { continue; @@ -223,12 +229,12 @@ void Frontend::match_frames_and_build_tracks() { std::vector cv_kpts_1; std::vector cv_kpts_2; - for (const KeypointCV &kpt : frames_[i].keypoint()) { + for (const KeypointCV &kpt : shared_frames_[i]->keypoint()) { cv::KeyPoint cv_kpt; cv_kpt.pt = kpt; cv_kpts_1.push_back(cv_kpt); } - for (const KeypointCV &kpt : frames_[j].keypoint()) { + for (const KeypointCV &kpt : shared_frames_[j]->keypoint()) { cv::KeyPoint cv_kpt; cv_kpt.pt = kpt; cv_kpts_2.push_back(cv_kpt); @@ -244,40 +250,34 @@ void Frontend::match_frames_and_build_tracks() { if (!scale_cam0_from_cam1) { continue; } - // ROBOT_CHECK(frames_[i].world_from_cam_initial_guess_, - // "This rotation should be populated."); - // this could use some work to verify the quality of the output, particularly inside - // of estiamte_cam0_from_cam1 - // also, at the moment I am not accounting for any covariance between the gps - // measurement and the unit translation vector from estimate_cam0_from_cam1 - - frames_[i].frame_from_other_frames_.emplace( + + shared_frames_[i]->frame_from_other_frames_.emplace( j, gtsam::Rot3(scale_cam0_from_cam1->linear().matrix())); for (const cv::DMatch match : matches) { - const KeypointCV kpt_cam0 = frames_[i].keypoint()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames_[j].keypoint()[match.trainIdx]; + const KeypointCV kpt_cam0 = shared_frames_[i]->keypoint()[match.queryIdx]; + const KeypointCV kpt_cam1 = shared_frames_[j]->keypoint()[match.trainIdx]; - auto key = std::make_pair(frames_[i].id_, kpt_cam0); + auto key = std::make_pair(shared_frames_[i]->id_, kpt_cam0); if (lmk_id_map_.find(key) != lmk_id_map_.end()) { const size_t id = lmk_id_map_.at(key); ROBOT_CHECK(id < feature_tracks_.size(), "lmk_id_map_ id's shuold not exceed feature_tracks_ size!"); - feature_tracks_[id].obs_.emplace_back(frames_[i].id_, kpt_cam0); - feature_tracks_[id].obs_.emplace_back(frames_[j].id_, kpt_cam1); + feature_tracks_[id].obs_.emplace_back(shared_frames_[i]->id_, kpt_cam0); + feature_tracks_[id].obs_.emplace_back(shared_frames_[j]->id_, kpt_cam1); } else { FeatureTrack feature_track(i, kpt_cam0); - feature_track.obs_.emplace_back(frames_[j].id_, kpt_cam1); + feature_track.obs_.emplace_back(shared_frames_[j]->id_, kpt_cam1); feature_tracks_.push_back(feature_track); - lmk_id_map_.emplace(std::make_pair(frames_[i].id_, kpt_cam0), + lmk_id_map_.emplace(std::make_pair(shared_frames_[i]->id_, kpt_cam0), feature_tracks_.size() - 1); } } } } } else { // successive only - for (size_t i = 0; i < frames_.size() - 1; i++) { - std::vector matches = - compute_matches(frames_[i].descriptors(), frames_[i + 1].descriptors()); + for (size_t i = 0; i < shared_frames_.size() - 1; i++) { + std::vector matches = compute_matches(shared_frames_[i]->descriptors(), + shared_frames_[i + 1]->descriptors()); enforce_bijective_buffer_matches(matches); if (matches.size() < 5) { continue; @@ -285,12 +285,12 @@ void Frontend::match_frames_and_build_tracks() { std::vector cv_kpts_1; std::vector cv_kpts_2; - for (const KeypointCV &kpt : frames_[i].keypoint()) { + for (const KeypointCV &kpt : shared_frames_[i]->keypoint()) { cv::KeyPoint cv_kpt; cv_kpt.pt = kpt; cv_kpts_1.push_back(cv_kpt); } - for (const KeypointCV &kpt : frames_[i + 1].keypoint()) { + for (const KeypointCV &kpt : shared_frames_[i + 1]->keypoint()) { cv::KeyPoint cv_kpt; cv_kpt.pt = kpt; cv_kpts_2.push_back(cv_kpt); @@ -301,28 +301,28 @@ void Frontend::match_frames_and_build_tracks() { if (!scale_cam0_from_cam1) { continue; } - ROBOT_CHECK(frames_[i].world_from_cam_initial_guess_, + ROBOT_CHECK(shared_frames_[i]->world_from_cam_initial_guess_, "This rotation should be populated."); - frames_[i + 1].world_from_cam_initial_guess_.emplace( - frames_[i].world_from_cam_initial_guess_->matrix() * + shared_frames_[i + 1]->world_from_cam_initial_guess_.emplace( + shared_frames_[i]->world_from_cam_initial_guess_->matrix() * scale_cam0_from_cam1->linear().matrix()); for (const cv::DMatch match : matches) { - const KeypointCV kpt_cam0 = frames_[i].keypoint()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames_[i + 1].keypoint()[match.trainIdx]; + const KeypointCV kpt_cam0 = shared_frames_[i]->keypoint()[match.queryIdx]; + const KeypointCV kpt_cam1 = shared_frames_[i + 1]->keypoint()[match.trainIdx]; - auto key = std::make_pair(frames_[i].id_, kpt_cam0); + auto key = std::make_pair(shared_frames_[i]->id_, kpt_cam0); if (lmk_id_map_.find(key) != lmk_id_map_.end()) { const size_t id = lmk_id_map_.at(key); ROBOT_CHECK(id < feature_tracks_.size(), "lmk_id_map_ id's shuold not exceed feature_tracks_ size!"); - feature_tracks_[id].obs_.emplace_back(frames_[i].id_, kpt_cam0); - feature_tracks_[id].obs_.emplace_back(frames_[i + 1].id_, kpt_cam1); + feature_tracks_[id].obs_.emplace_back(shared_frames_[i]->id_, kpt_cam0); + feature_tracks_[id].obs_.emplace_back(shared_frames_[i + 1]->id_, kpt_cam1); } else { FeatureTrack feature_track(i, kpt_cam0); - feature_track.obs_.emplace_back(frames_[i + 1].id_, kpt_cam1); + feature_track.obs_.emplace_back(shared_frames_[i + 1]->id_, kpt_cam1); feature_tracks_.push_back(feature_track); - lmk_id_map_.emplace(std::make_pair(frames_[i].id_, kpt_cam0), + lmk_id_map_.emplace(std::make_pair(shared_frames_[i]->id_, kpt_cam0), feature_tracks_.size() - 1); } } @@ -331,48 +331,42 @@ void Frontend::match_frames_and_build_tracks() { std::cout << "done processing matches" << std::endl; } -std::optional> Frontend::interpolated_initial_translations() { +void Frontend::interpolate_frames() { std::vector frames_with_guess; - for (Frame &frame : - frames_) { // this assumes that frames are ordered by seq (time), later on may have to - // think about this more if trying to run pipeline on unordered images or sets - // whose capture times are on different days - if (frame.cam_in_world_initial_guess_) { - frames_with_guess.push_back(&frame); + for (SharedFrame &shared_frame : shared_frames_) { // this assumes that frames are ordered by + // seq (time), later on may have to + // think about this more if trying to run pipeline on unordered images or sets + // whose capture times are on different days + if (shared_frame->cam_in_world_initial_guess_) { + frames_with_guess.push_back(&(*shared_frame)); } } - if (frames_with_guess.size() < 2) return std::nullopt; - std::vector interpolated_init_guesses; - interpolated_init_guesses.reserve(frames_.size()); + if (frames_with_guess.size() < 2) return; size_t idx_guess = 0; - - for (const Frame &frame : frames_) { + for (SharedFrame &shared_frame : shared_frames_) { Eigen::Vector3d interpolated; // advance to next guess window if needed while (idx_guess + 1 < frames_with_guess.size() - 1 && - frame.seq_ > frames_with_guess[idx_guess + 1]->seq_) { + shared_frame->seq_ > frames_with_guess[idx_guess + 1]->seq_) { idx_guess++; } - if (frame.cam_in_world_initial_guess_) { - interpolated = *frame.cam_in_world_initial_guess_; + if (shared_frame->cam_in_world_initial_guess_) { + interpolated = *shared_frame->cam_in_world_initial_guess_; } else { Eigen::Vector3d v_a_to_b = *frames_with_guess[idx_guess]->velocity_to(*frames_with_guess[idx_guess + 1]); - double dt = (frame.seq_ < frames_with_guess[idx_guess]->seq_ ? -1.0 : 1.0) * 1e-9 * - (frames_with_guess[idx_guess]->seq_ > frame.seq_ - ? frames_with_guess[idx_guess]->seq_ - frame.seq_ - : frame.seq_ - frames_with_guess[idx_guess]->seq_); + double dt = (shared_frame->seq_ < frames_with_guess[idx_guess]->seq_ ? -1.0 : 1.0) * + 1e-9 * + (frames_with_guess[idx_guess]->seq_ > shared_frame->seq_ + ? frames_with_guess[idx_guess]->seq_ - shared_frame->seq_ + : shared_frame->seq_ - frames_with_guess[idx_guess]->seq_); interpolated = *frames_with_guess[idx_guess]->cam_in_world_initial_guess_ + v_a_to_b * dt; // add sign back to dt - std::cout << "interpolated frame " << frame.id_ << ": " << interpolated << std::endl; - std::cout << "\tdt: " << dt << std::endl; - std::cout << "\tv_a_to_b: " << v_a_to_b << std::endl; } - interpolated_init_guesses.push_back(interpolated); + shared_frame->cam_in_world_interpolated_guess_ = interpolated; } - return interpolated_init_guesses; } std::pair, cv::Mat> Frontend::extract_features(const cv::Mat &img) const { diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh index ce2f5c43d..e381d8089 100644 --- a/experimental/learn_descriptors/frontend.hh +++ b/experimental/learn_descriptors/frontend.hh @@ -34,7 +34,7 @@ class Frontend { void populate_frames(bool verbose = false); void match_frames_and_build_tracks(); - std::optional> interpolated_initial_translations(); + void interpolate_frames(); void add_image(const ImageAndPoint &img_and_pt) { images_and_points_.push_back(img_and_pt); }; void add_images(const std::vector &img_and_pts) { @@ -47,7 +47,7 @@ class Frontend { const FrontendParams::MatcherType &matcher_type() const { return params_.matcher_type; }; const FeatureTracks &feature_tracks() const { return feature_tracks_; }; const FrameLandmarkIdMap &frame_landmark_id_map() const { return lmk_id_map_; }; - std::vector &frames() { return frames_; }; + std::vector &frames() { return shared_frames_; }; std::pair, cv::Mat> extract_features(const cv::Mat &img) const; std::vector compute_matches(const cv::Mat &descriptors1, @@ -81,6 +81,6 @@ class Frontend { std::vector images_and_points_; FeatureTracks feature_tracks_; FrameLandmarkIdMap lmk_id_map_; - std::vector frames_; + std::vector shared_frames_; }; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/gps_data.hh b/experimental/learn_descriptors/gps_data.hh index 34f89b9b3..52d900be1 100644 --- a/experimental/learn_descriptors/gps_data.hh +++ b/experimental/learn_descriptors/gps_data.hh @@ -3,14 +3,16 @@ #include #include #include +#include +#include #include "Eigen/Core" namespace robot::experimental::learn_descriptors { struct GPSData { struct Uncertainty { - double sigma_latitude_deg; - double sigma_longitude_deg; + double sigma_latitude_m; + double sigma_longitude_m; double sigma_altitude_m; double orientation_deg; double rms_range_error_m; @@ -18,57 +20,45 @@ struct GPSData { // diagonal latitude, longitude, altitude covariance in meters squared Eigen::Matrix3d to_LLA_covariance() const { Eigen::Matrix3d cov = Eigen::Matrix3d::Zero(); - cov(0, 0) = sigma_latitude_deg * sigma_latitude_deg; - cov(1, 1) = sigma_longitude_deg * sigma_longitude_deg; + cov(0, 0) = sigma_latitude_m * sigma_latitude_m; + cov(1, 1) = sigma_longitude_m * sigma_longitude_m; cov(2, 2) = sigma_altitude_m * sigma_altitude_m; return cov; } - // Convert lat/lon covariance to ENU at given latitude - Eigen::Matrix3d to_ENU_covariance(double lat_deg) const { - // –– Named conversion/constants –– + // Convert lat/lon covariance to ENU + Eigen::Matrix3d to_ENU_covariance() const { static constexpr double DEG2RAD = M_PI / 180.0; - static constexpr double ORIENTATION_OFFSET_DEG = - 90.0; // for CW‐from‐North → CCW‐from‐East + static constexpr double ORIENT_OFFSET_DEG = 90.0; // CW‑from‑North → CCW‑from‑East - // WGS‑84 approximate “meters per degree” polynomial coefficients - static constexpr double MERIDIONAL_A0 = 111132.92; - static constexpr double MERIDIONAL_A2 = -559.82; - static constexpr double MERIDIONAL_A4 = 1.175; - static constexpr double PARALLEL_B0 = 111412.84; - static constexpr double PARALLEL_B2 = -93.5; + // 1) build the 2×2 variance ellipse in metres² + Eigen::Matrix2d D; + D << sigma_latitude_m * sigma_latitude_m, 0.0, 0.0, + sigma_longitude_m * sigma_longitude_m; - // 1) get LLA covariance in degrees² - Eigen::Matrix3d lla_cov = to_LLA_covariance(); - - // 2) convert latitude to radians - double phi = lat_deg * DEG2RAD; - - // 3) compute meters/degree at this latitude - double kLat = MERIDIONAL_A0 + MERIDIONAL_A2 * std::cos(2.0 * phi) + - MERIDIONAL_A4 * std::cos(4.0 * phi); - double kLon = PARALLEL_B0 * std::cos(phi) + PARALLEL_B2 * std::cos(3.0 * phi); - - // 4) build the Jacobian for degree→meter scaling - Eigen::Matrix2d J; - J << kLat, 0.0, 0.0, kLon; - - // 5) scale the 2×2 lat/lon covariance into meters² - Eigen::Matrix2d D = lla_cov.topLeftCorner<2, 2>(); - Eigen::Matrix2d cov_m = J * D * J.transpose(); - - // 6) rotate that ellipse into ENU axes (θ = CCW from East) - double theta = (ORIENTATION_OFFSET_DEG - orientation_deg) * DEG2RAD; + // 2) rotate that into ENU axes + double theta = (ORIENT_OFFSET_DEG - orientation_deg) * DEG2RAD; Eigen::Matrix2d R; R << std::cos(theta), -std::sin(theta), std::sin(theta), std::cos(theta); - Eigen::Matrix2d horz_cov = R * cov_m * R.transpose(); - // 7) assemble full 3×3 ENU covariance - Eigen::Matrix3d enu_cov = Eigen::Matrix3d::Zero(); - enu_cov.topLeftCorner<2, 2>() = horz_cov; - enu_cov(2, 2) = lla_cov(2, 2); // altitude variance already in m² + Eigen::Matrix2d horz = R * D * R.transpose(); + + // 3) assemble full 3×3 ENU covariance + Eigen::Matrix3d enu = Eigen::Matrix3d::Zero(); + enu.topLeftCorner<2, 2>() = horz; + enu(2, 2) = sigma_altitude_m * sigma_altitude_m; + return enu; + } - return enu_cov; + std::string to_string() const { + std::stringstream ss; + ss << "Uncertainty:"; + ss << "\n\tsigma_lat_deg: " << sigma_latitude_m; + ss << "\n\tsigma_lon_deg: " << sigma_longitude_m; + ss << "\n\tsigma_altitude_m: " << sigma_altitude_m; + ss << "\n\torientation_deg: " << orientation_deg; + ss << "\n\trms_range_error_m: " << rms_range_error_m; + return ss.str(); } }; size_t seq; // time in nanoseconds, may differ from image capturetime @@ -76,5 +66,14 @@ struct GPSData { double longitude; std::optional altitude; // meters above sea level std::optional uncertainty; + std::string to_string() const { + std::stringstream ss; + ss << "GPS Data: "; + ss << "\n\tseq: " << seq; + ss << "\n\tlla: " << latitude << ", " << longitude << ", " + << (altitude ? std::to_string(*altitude) : std::string("N/A")); + ss << "\n" << (uncertainty ? uncertainty->to_string() : "Uncertainty: N/A"); + return ss.str(); + } }; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/image_point.hh b/experimental/learn_descriptors/image_point.hh index f5b9a681b..b9fae431c 100644 --- a/experimental/learn_descriptors/image_point.hh +++ b/experimental/learn_descriptors/image_point.hh @@ -68,4 +68,5 @@ struct ImagePoint { private: std::optional cam_in_world_; }; +using SharedImagePoint = std::shared_ptr; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/image_point_four_seasons.cc b/experimental/learn_descriptors/image_point_four_seasons.cc index 6cdec9d5f..1d5643abd 100644 --- a/experimental/learn_descriptors/image_point_four_seasons.cc +++ b/experimental/learn_descriptors/image_point_four_seasons.cc @@ -45,8 +45,8 @@ std::optional ImagePointFourSeasons::translation_covariance_in_ std::optional ImagePointFourSeasons::gps_covariance_in_world() const { if (!gps_gcs || !gps_gcs->uncertainty) return std::nullopt; - const Eigen::Matrix3d ENU_covariance( - gps_gcs->uncertainty->to_ENU_covariance(gps_gcs->latitude)); + const Eigen::Matrix3d ENU_covariance(gps_gcs->uncertainty->to_ENU_covariance()); + std::cout << "image point " << id << " ENU_covariance: " << ENU_covariance << std::endl; const Eigen::Matrix3d w_from_ENU(shared_static_transforms->w_from_gpsw.so3().matrix()); const Eigen::Matrix3d w_covariance(w_from_ENU * ENU_covariance * w_from_ENU.transpose()); return w_covariance; diff --git a/experimental/learn_descriptors/image_point_four_seasons.hh b/experimental/learn_descriptors/image_point_four_seasons.hh index 368fe8351..95401d885 100644 --- a/experimental/learn_descriptors/image_point_four_seasons.hh +++ b/experimental/learn_descriptors/image_point_four_seasons.hh @@ -72,26 +72,7 @@ struct ImagePointFourSeasons : ImagePoint { } else { ss << "N/A"; } - ss << "\n\tgps_gcs: "; - if (gps_gcs) { - ss << "\n\t\tseq: " << gps_gcs->seq; - ss << "\n\t\tval: " << gps_gcs->latitude << "\t" << gps_gcs->longitude << "\t"; - if (gps_gcs->altitude) { - ss << *(gps_gcs->altitude); - } else { - ss << "alt N/A"; - } - ss << "\n\t\tsigma: "; - if (gps_gcs->uncertainty) { - ss << gps_gcs->uncertainty->sigma_latitude_deg << "\t" - << gps_gcs->uncertainty->sigma_longitude_deg << "\t" - << gps_gcs->uncertainty->sigma_altitude_m; - } else { - ss << "N/A"; - } - } else { - ss << "N/A"; - } + ss << "\n" << (gps_gcs ? gps_gcs->to_string() : "GPS Data: N/A"); return ss.str(); } diff --git a/experimental/learn_descriptors/refactor_sfm_mvp.cc b/experimental/learn_descriptors/refactor_sfm_mvp.cc index ef87991f1..054c8a197 100644 --- a/experimental/learn_descriptors/refactor_sfm_mvp.cc +++ b/experimental/learn_descriptors/refactor_sfm_mvp.cc @@ -207,42 +207,20 @@ int main(int argc, const char **argv) { true, false}; Frontend frontend(params); - std::vector idx_img_pts; for (size_t i = 633; i < 646; i += 2) { frontend.add_image( ImageAndPoint{parser.load_image(i), std::make_shared(parser.get_image_point(i))}); - idx_img_pts.push_back(i); + const ImagePointFourSeasons img_pt = parser.get_image_point(i); + std::cout << img_pt.to_string() << std::endl; } - // for (size_t i = 581; i < 750; i += 5) { - // frontend.add_image( - // ImageAndPoint{parser.load_image(i), - // std::make_shared(parser.get_image_point(i))}); - // idx_img_pts.push_back(i); - // } frontend.populate_frames(); frontend.match_frames_and_build_tracks(); - std::cout << "first frame translation: " << *frontend.frames()[0].cam_in_world_initial_guess_ - << std::endl; - std::cout << "ground truth translation: " - << frontend.frames()[0].world_from_cam_groundtruth_->translation() << std::endl; - + // visualization std::vector viz_poses_init; - std::optional first_point; std::optional first_grnd_trth; - (void)first_point; for (const Frame &frame : frontend.frames()) { - // if (frame.cam_in_world_initial_guess_) { - // Eigen::Isometry3d world_from_cam_init; - // world_from_cam_init.linear() = frame.world_from_cam_initial_guess_->matrix(); - // if (!first_point) first_point = *frame.cam_in_world_initial_guess_; - // world_from_cam_init.translation() = *frame.cam_in_world_initial_guess_ - - // *first_point; std::cout << "world_from_cam_init: " << - // world_from_cam_init.matrix() << std::endl; viz_poses_init.emplace_back( - // world_from_cam_init, gtsam::Symbol(Frontend::symbol_pose_char, - // frame.id_).string()); - // } if (frame.world_from_cam_groundtruth_) { std::cout << "adding a ground truth frame to viz!" << std::endl; Eigen::Isometry3d w_from_cam_grnd_trth(frame.world_from_cam_groundtruth_->matrix()); @@ -280,34 +258,13 @@ int main(int argc, const char **argv) { frames[0].world_from_cam_initial_guess_ = frames[0].world_from_cam_groundtruth_->rotation(); // make sure first idx has grnd trth - // // let's examine the rotations to the first thingy - // std::vector viz_5_pt_rot; - // for (const auto &[frame_id, rotation] : frames[0].frame_from_other_frames_) { - // viz_5_pt_rot.emplace_back( - // Eigen::Isometry3d(gtsam::Pose3(rotation, gtsam::Point3()).matrix()), - // "rot_to_" + std::to_string(frame_id)); - // } - // robot::visualization::viz_scene(viz_5_pt_rot, std::vector(), - // cv::viz::Color::brown(), true, true, - // "Frontend initial guesses"); - Backend::populate_rotation_estimate(frames); std::vector symbols_pose; std::unordered_map world_from_cam_initial_guess; std::optional> interpolated_cam_in_w = frontend.interpolated_initial_translations(); ROBOT_CHECK(interpolated_cam_in_w, interpolated_cam_in_w); - // std::vector viz_pts_interpolated; - // for (size_t i = 0; i < interpolated_cam_in_w->size(); i++) { - // viz_pts_interpolated.emplace_back( - // (*interpolated_cam_in_w)[i] - interpolated_cam_in_w->front(), - // "pt_" + std::to_string(i)); - // std::cout << "pt_" + std::to_string(i) << ": " << (*interpolated_cam_in_w)[i] << - // std::endl; - // } - // robot::visualization::viz_scene(std::vector(), - // viz_pts_interpolated, cv::viz::Color::brown(), true, true, - // "Initial guesses in backend"); + Eigen::Vector3d cam0_in_w = interpolated_cam_in_w->front(); std::optional grndtrth_cam0_in_w; std::vector idx_grndtrth_frame; @@ -361,11 +318,11 @@ int main(int argc, const char **argv) { << std::endl; } gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas( - // frame.translation_covariance_in_cam_ - false ? gtsam::Vector3(std::sqrt((*frame.translation_covariance_in_cam_)(0, 0)), - std::sqrt((*frame.translation_covariance_in_cam_)(1, 1)), - std::sqrt((*frame.translation_covariance_in_cam_)(2, 2))) - : gps_sigmas_fallback); + frame.translation_covariance_in_cam_ + ? gtsam::Vector3(std::sqrt((*frame.translation_covariance_in_cam_)(0, 0)), + std::sqrt((*frame.translation_covariance_in_cam_)(1, 1)), + std::sqrt((*frame.translation_covariance_in_cam_)(2, 2))) + : gps_sigmas_fallback); graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise)); std::cout << "adding gps factor!" << std::endl; } @@ -412,7 +369,6 @@ int main(int argc, const char **argv) { // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference double sum_traj_error = 0; double sum_rot_error = 0; - // for (size_t i = 0; i < idx_grndtrth_frame.size(); i++) { for (const size_t i_grndtrth_frame : idx_grndtrth_frame) { const Frame &frame = frames[i_grndtrth_frame]; const gtsam::Pose3 traj_pose = @@ -429,6 +385,5 @@ int main(int argc, const char **argv) { std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl; std::cout << "about to visualize result" << std::endl; - // result.print(); graph_values(result, "Result", symbols_pose, symbols_lmks); } \ No newline at end of file diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index c0c39d2a8..a749b6098 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -1,160 +1,82 @@ #include "experimental/learn_descriptors/structure_from_motion.hh" #include +#include #include #include +#include "common/check.hh" #include "common/geometry/camera.hh" #include "common/geometry/translate_types.hh" +#include "experimental/learn_descriptors/backend.hh" +#include "experimental/learn_descriptors/frame.hh" +#include "experimental/learn_descriptors/frontend.hh" #include "gtsam/geometry/Point2.h" +#include "opencv2/opencv.hpp" +#include "opencv2/viz.hpp" #include "visualization/opencv/opencv_viz.hh" -namespace fs = std::filesystem; - -namespace geom = robot::geometry; - namespace robot::experimental::learn_descriptors { - -const Eigen::Isometry3d StructureFromMotion::T_symlake_boat_cam = []() { - Eigen::Isometry3d transform = Eigen::Isometry3d::Identity(); - transform.linear() = (Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0, 1, 0)) * - Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d(0, 0, 1))) - .toRotationMatrix(); - return transform; -}(); - -std::string pose_to_string(gtsam::Pose3 pose) { - std::stringstream ss; - ss << pose; - return ss.str(); -}; - -const gtsam::Pose3 StructureFromMotion::default_initial_pose( - StructureFromMotion::T_symlake_boat_cam.matrix()); - -StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor, - gtsam::Cal3_S2 K, Eigen::Matrix D, - gtsam::Pose3 initial_pose, - Frontend::MatcherType frontend_matcher) - : feature_manager_(std::make_shared()), - initial_pose_(initial_pose), - frontend_(frontend_extractor, frontend_matcher), - backend_(boost::make_shared(K)) { - // frontend_ = Frontend(frontend_extractor, frontend_matcher); - // backend_ = Backend(K); - // backend_ = Backend(feature_manager_, K); - - K_ = (cv::Mat_(3, 3) << K.fx(), 0, K.px(), 0, K.fy(), K.py(), 0, 0, 1); - D_ = (cv::Mat_(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0)); - - set_initial_pose(initial_pose); -} - -void StructureFromMotion::set_initial_pose(gtsam::Pose3 initial_pose) { - backend_.add_prior_factor(gtsam::Symbol(Backend::symbol_char_pose, 0), initial_pose, - gtsam::noiseModel::Isotropic::Sigma(6, 0)); +void StructureFromMotion::add_image_point(const cv::Mat &img_undistorted, + const SharedImagePoint &image_point) { + frontend_.add_image(ImageAndPoint{img_undistorted, image_point}); } - -void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_world_cam) { - cv::Mat img_undistorted; - cv::undistort(img, img_undistorted, K_, D_); - std::pair, cv::Mat> keypoints_and_descriptors = - frontend_.extract_features(img); - feature_manager_->append_img_data(keypoints_and_descriptors.first, - keypoints_and_descriptors.second); - keypoint_to_landmarks_.push_back(std::unordered_map()); - const size_t idx_img_current = get_num_images_added(); - // const size_t idx_img_current = feature_manager_->get_num_images_added() - 1; - std::cout << "current index " << idx_img_current << std::endl; - if (idx_img_current > 0) { - std::vector matches = - compute_matches(img_keypoints_and_descriptors_.back().second, - keypoints_and_descriptors.second, Frontend::enforce_bijective_matches); - - const gtsam::Symbol sym_T_w_c0(Backend::symbol_char_pose, idx_img_current - 1); - const gtsam::Symbol sym_T_w_c1(Backend::symbol_char_pose, idx_img_current); - - backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise(), - T_world_cam.rotation()); - - for (const cv::DMatch match : matches) { - const cv::KeyPoint kpt_cam0 = - img_keypoints_and_descriptors_.back().first[match.queryIdx]; - const cv::KeyPoint kpt_cam1 = keypoints_and_descriptors.first[match.trainIdx]; - - size_t idx; - unsigned char chr; - - auto maybe_symbol0 = feature_manager_->get_symbol(idx_img_current - 1, kpt_cam0); - if (maybe_symbol0) { - idx = (*maybe_symbol0).index(); - chr = (*maybe_symbol0).chr(); - } else { - gtsam::Symbol symbol_temp = - gtsam::Symbol(Backend::symbol_char_landmark, landmark_count_); - idx = symbol_temp.index(); - chr = symbol_temp.chr(); - landmark_count_++; - feature_manager_->insert_symbol(idx_img_current - 1, kpt_cam0, symbol_temp); - } - gtsam::Symbol symbol_lmk = gtsam::Symbol(chr, idx); - std::cout << "value: char: " << chr << " , idx: " << idx << ". " << symbol_lmk - << std::endl; - feature_manager_->insert_symbol(idx_img_current, kpt_cam1, symbol_lmk); - - const Backend::Landmark landmark_cam_0( - symbol_lmk, sym_T_w_c0, - gtsam::Point2( - static_cast( - img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x), - static_cast( - img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)), - backend_.get_K(), 3.0); - - const Backend::Landmark landmark_cam_1( - symbol_lmk, sym_T_w_c1, - gtsam::Point2( - static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.x), - static_cast(keypoints_and_descriptors.first[match.trainIdx].pt.y)), - backend_.get_K(), 3.0); - - backend_.add_landmark(landmark_cam_0); - backend_.add_landmark(landmark_cam_1); - } +void StructureFromMotion::add_image_points( + const std::vector &imgs_undistorted, + const std::vector &shared_image_points) { + ROBOT_CHECK(imgs_undistorted.size() == shared_image_points.size()); + std::vector img_and_pts; + img_and_pts.reserve(shared_image_points.size()); + for (size_t i = 0; i < shared_image_points.size(); i++) { + img_and_pts.emplace_back(imgs_undistorted[i], shared_image_points[i]); } - img_keypoints_and_descriptors_.push_back(keypoints_and_descriptors); + frontend_.add_images(img_and_pts); } -std::vector StructureFromMotion::compute_matches( - const cv::Mat &descriptors_1, const cv::Mat &descriptors_2, - std::optional post_process_func) { - std::vector matches = frontend_.compute_matches(descriptors_1, descriptors_2); - if (post_process_func) { - (*post_process_func)(matches); - } - matches_.push_back(matches); - return matches; +void StructureFromMotion::solve_structure( + const int num_steps, std::optional iter_debug_func) { + // FRONTEND + frontend_.populate_frames(); + ROBOT_CHECK(frontend_.frames().size() > 2); + frontend_.match_frames_and_build_tracks(); + ROBOT_CHECK(frontend_.frames().front()->world_from_cam_groundtruth_, + "Assuming that our first frame has groundtruth for now.", + *frontend_.frames().front()->world_from_cam_groundtruth_); + // align rotation via the initial guess + std::cout << "sfm heartbeat" << std::endl; + frontend_.frames().front()->world_from_cam_initial_guess_ = + frontend_.frames().front()->world_from_cam_groundtruth_->rotation(); + std::cout << "sfm heartbeat 2" << std::endl; + std::cout << "sfm heartbeat 3" << std::endl; + std::cout << "sfm heartbeat 4" << std::endl; + + // BACKEND + backend_.add_frames(frontend_.frames()); + std::cout << "backend_.shared_frames.size(): " << backend_.shared_frames().size() << std::endl; + std::cout << "sfm heartbeat 5" << std::endl; + backend_.calculate_initial_values(); + std::cout << "backend.initial_values.size(): " << backend_.current_initial_values().size() + << std::endl; + std::cout << "sfm heartbeat 6" << std::endl; + backend_.solve_graph(10, iter_debug_func); + std::cout << "sfm heartbeat 7" << std::endl; } -void StructureFromMotion::graph_values(const gtsam::Values &values, - const std::string &window_name) { - std::vector final_poses; - std::vector final_lmks; - for (size_t i = 0; i < feature_manager_->get_num_images_added(); i++) { - final_poses.emplace_back( - values.at(gtsam::Symbol(get_backend().symbol_char_pose, i)).matrix()); +void StructureFromMotion::graph_values(const gtsam::Values &values, const std::string &window_name, + const cv::viz::Color color) { + std::vector final_poses; + std::vector final_lmks; + for (const auto &key_value : values) { + gtsam::Key key = key_value.key; + gtsam::Symbol symbol(key); + if (values.exists(key)) { + const gtsam::Pose3 &pose = values.at(key); + final_poses.emplace_back(Eigen::Isometry3d(pose.matrix()), symbol.string()); + } else if (values.exists(key)) { + const gtsam::Point3 &pt = values.at(key); + final_lmks.emplace_back(pt, symbol.string()); + } } - // for (const gtsam::Symbol &lmk_symbol : feature_manager_->get_added_symbols()) { - // if (!values.exists(lmk_symbol)) { - // std::cout << "WTF " << lmk_symbol << std::endl; - // } - // final_lmks.emplace_back(values.at(lmk_symbol)); - // } - geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::black(), true, true, window_name); -} - -void StructureFromMotion::solve_structure( - const int num_steps, std::optional inter_debug_func) { - backend_.solve_graph(num_steps, inter_debug_func); + robot::visualization::viz_scene(final_poses, final_lmks, color, true, true, window_name); } } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh index 90b6bcfbd..93ffe0ddb 100644 --- a/experimental/learn_descriptors/structure_from_motion.hh +++ b/experimental/learn_descriptors/structure_from_motion.hh @@ -5,65 +5,43 @@ #include #include #include +#include #include #include #include "Eigen/Dense" #include "common/geometry/camera.hh" #include "experimental/learn_descriptors/backend.hh" -#include "experimental/learn_descriptors/feature_manager.hh" +#include "experimental/learn_descriptors/frame.hh" #include "experimental/learn_descriptors/frontend.hh" +#include "experimental/learn_descriptors/image_point.hh" #include "gtsam/geometry/Pose3.h" #include "gtsam/inference/Symbol.h" #include "gtsam/nonlinear/Values.h" #include "opencv2/opencv.hpp" +#include "opencv2/viz.hpp" namespace robot::experimental::learn_descriptors { class StructureFromMotion { public: - static const Eigen::Isometry3d T_symlake_boat_cam; - static const gtsam::Pose3 default_initial_pose; - /** - * @param D is vector (5x1) of the distortion coefficients (k1, k2, p1, p2, k3) - */ - StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, - Eigen::Matrix D, - gtsam::Pose3 initial_pose = default_initial_pose, - Frontend::MatcherType frontend_matcher = Frontend::MatcherType::KNN); - ~StructureFromMotion(){}; + StructureFromMotion(FrontendParams frontend_params) : frontend_(Frontend(frontend_params)){}; - void set_initial_pose(gtsam::Pose3 initial_pose); - void add_image(const cv::Mat &img, const gtsam::Pose3 &T_world_cam); - void solve_structure() { backend_.solve_graph(); }; + static void graph_values(const gtsam::Values &values, + const std::string &window_name = "Viz Structure", + const cv::viz::Color color = cv::viz::Color::brown()); + + void add_image_point(const cv::Mat &img_undistorted, const SharedImagePoint &image_point); + void add_image_points(const std::vector &imgs_undistorted, + const std::vector &image_points); void solve_structure( const int num_steps, - std::optional inter_debug_func = std::nullopt); - const gtsam::Values &get_structure_result() { return backend_.get_result(); }; - using match_function = std::function &)>; - std::vector compute_matches( - const cv::Mat &descriptors_1, const cv::Mat &descriptors_2, - std::optional post_process_func = std::nullopt); - void graph_values(const gtsam::Values &values, const std::string &window_name = "graph values"); + std::optional iter_debug_func = std::nullopt); + const gtsam::Values &result() { return backend_.result(); }; - Frontend get_frontend() { return frontend_; }; - Backend get_backend() { return backend_; } - size_t get_num_images_added() { return img_keypoints_and_descriptors_.size(); }; - size_t get_landmark_count() { return landmark_count_; }; - const std::vector> compute_matches() { return matches_; }; + Frontend frontend() { return frontend_; }; + Backend backend() { return backend_; } private: - std::shared_ptr feature_manager_; - - gtsam::Pose3 initial_pose_; - std::vector, cv::Mat>> img_keypoints_and_descriptors_; - std::vector> matches_; - std::vector> landmarks_; - std::vector> keypoint_to_landmarks_; - size_t landmark_count_ = 0; - - cv::Mat K_; - cv::Mat D_; - Frontend frontend_; Backend backend_; }; diff --git a/experimental/learn_descriptors/structure_from_motion_example.cc b/experimental/learn_descriptors/structure_from_motion_example.cc new file mode 100644 index 000000000..2be249af3 --- /dev/null +++ b/experimental/learn_descriptors/structure_from_motion_example.cc @@ -0,0 +1,75 @@ +#include +#include +#include +#include + +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "cxxopts.hpp" +#include "experimental/learn_descriptors/backend.hh" +#include "experimental/learn_descriptors/four_seasons_parser.hh" +#include "experimental/learn_descriptors/frame.hh" +#include "experimental/learn_descriptors/frontend.hh" +#include "experimental/learn_descriptors/frontend_definitions.hh" +#include "experimental/learn_descriptors/image_point_four_seasons.hh" +#include "experimental/learn_descriptors/structure_from_motion.hh" +#include "experimental/learn_descriptors/structure_from_motion_types.hh" +#include "opencv2/opencv.hpp" +#include "visualization/opencv/opencv_viz.hh" + +int main(int argc, const char **argv) { + using namespace robot::experimental::learn_descriptors; + // clang-format off + cxxopts::Options options("four_seasons_parser_example", "Demonstrate usage of four_seasons_parser"); + options.add_options() + ("data_dir", "Path to dataset root directory", cxxopts::value()) + ("calibration_dir", "Path to dataset calibration directory", cxxopts::value()) + ("help", "Print usage"); + // clang-format on + + auto args = options.parse(argc, argv); + + const auto check_required = [&](const std::string &opt) { + if (args.count(opt) == 0) { + std::cout << "Missing " << opt << " argument" << std::endl; + std::cout << options.help() << std::endl; + std::exit(1); + } + }; + + if (args.count("help")) { + std::cout << options.help() << std::endl; + return 0; + } + + check_required("data_dir"); + check_required("calibration_dir"); + + const std::filesystem::path path_data = args["data_dir"].as(); + const std::filesystem::path path_calibration = args["calibration_dir"].as(); + FourSeasonsParser parser(path_data, path_calibration); + + FrontendParams frontend_params{FrontendParams::ExtractorType::SIFT, + FrontendParams::MatcherType::KNN, true, false}; + StructureFromMotion sfm(frontend_params); + + for (size_t i = 633; i < 646; i += 2) { + const ImagePointFourSeasons img_pt = parser.get_image_point(i); + sfm.add_image_point(parser.load_image(i), std::make_shared(img_pt)); + std::cout << img_pt.to_string() << std::endl; + } + + std::cout << "heartbeat" << std::endl; + using epoch = size_t; + Backend::graph_step_debug_func graph_itr_debug_func = [&](const gtsam::Values &vals, + const epoch iter) { + std::cout << "iteration " << iter << " complete!"; + std::string window_name = "Iteration_" + std::to_string(iter); + StructureFromMotion::graph_values(vals, window_name); + }; + // sfm.solve_structure(10, graph_itr_debug_func); + sfm.solve_structure(10); + std::cout << "heartbeat 2" << std::endl; + StructureFromMotion::graph_values(sfm.result()); + std::cout << "heartbeat 3" << std::endl; +} \ No newline at end of file From 7c53d8ef6fac345381536b49ee958216082d946d Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Wed, 23 Jul 2025 15:57:49 -0400 Subject: [PATCH 39/45] WIP. SFM works on short to medium sequences. Likely need to do some sort of incremental initialization and incremental optimization(s) --- experimental/learn_descriptors/backend.cc | 97 ++++++++++++++----- experimental/learn_descriptors/backend.hh | 20 ++-- experimental/learn_descriptors/frontend.cc | 8 +- .../image_point_four_seasons.cc | 1 - .../structure_from_motion.cc | 25 ++--- .../structure_from_motion_example.cc | 12 ++- 6 files changed, 107 insertions(+), 56 deletions(-) diff --git a/experimental/learn_descriptors/backend.cc b/experimental/learn_descriptors/backend.cc index 682c29ec3..225aadda5 100644 --- a/experimental/learn_descriptors/backend.cc +++ b/experimental/learn_descriptors/backend.cc @@ -278,46 +278,83 @@ void Backend::populate_rotation_estimate(std::vector& shared_frames } } -void Backend::calculate_initial_values() { populate_rotation_estimate(shared_frames_); } +void Backend::calculate_initial_values(bool interpolate_gps) { + populate_rotation_estimate(shared_frames_); + if (interpolate_gps) { + std::vector frames_with_gps; + for (SharedFrame& shared_frame : shared_frames_) { // this assumes that frames are ordered + // by seq (time), later on may have to + // think about this more if trying to run pipeline on unordered images or sets + // whose capture times are on different days + if (shared_frame->cam_in_world_initial_guess_) { + frames_with_gps.push_back(shared_frame); + } + } + if (frames_with_gps.size() < 2) return; + size_t idx_gps = 0; + for (SharedFrame& shared_frame : shared_frames_) { + // advance to next guess window if needed + while (idx_gps + 1 < frames_with_gps.size() - 1 && + shared_frame->seq_ > frames_with_gps[idx_gps + 1]->seq_) { + idx_gps++; + } + + if (shared_frame->cam_in_world_initial_guess_) { + continue; + } + ROBOT_CHECK( + shared_frame->cam_in_world_interpolated_guess_, + "Currently assuming that the frontend populated interpolated initial guesses"); + Eigen::Matrix3d interpolated_covariance( + *shared_frame->translation_covariance_in_cam_ * + 100.0); // very jank for now, not true interpolation + shared_frame->translation_covariance_in_cam_ = interpolated_covariance; + shared_frame->cam_in_world_initial_guess_ = + shared_frame->cam_in_world_interpolated_guess_; + } + } +} void Backend::populate_graph(const FeatureTracks& feature_tracks) { ROBOT_CHECK(shared_frames_.size() > 3); - ROBOT_CHECK(shared_frames_[0]->world_from_cam_groundtruth_, + ROBOT_CHECK(shared_frames_.front()->world_from_cam_groundtruth_, "We're assuming the first camera has groundtruth for now"); - shared_frames_[0]->world_from_cam_initial_guess_ = - shared_frames_[0]->world_from_cam_groundtruth_->rotation(); // align rotation + shared_frames_.front()->world_from_cam_initial_guess_ = + shared_frames_.front() + ->world_from_cam_groundtruth_->rotation(); // align rotation with groundtruth Backend::populate_rotation_estimate(shared_frames_); // add first cam as prior Eigen::Vector3d cam0_in_w = *shared_frames_.front()->cam_in_world_interpolated_guess_; - const gtsam::Pose3 w_from_cam0_init_estimate(*shared_frames_[0]->world_from_cam_initial_guess_, - gtsam::Point3()); + const gtsam::Pose3 w_from_cam0_init_estimate( + *shared_frames_.front()->world_from_cam_initial_guess_, gtsam::Point3{0, 0, 0}); const gtsam::Symbol symbol_cam0(symbol_char_pose, 0); graph_.add(gtsam::PriorFactor( symbol_cam0, w_from_cam0_init_estimate, - noise_tight_prior)); // currently assuming that we have groundtruth on the first pose + noise_tight_prior_)); // currently assuming that we have groundtruth on the first pose initial_estimate_.insert(symbol_cam0, w_from_cam0_init_estimate); - // add gps factors + // add gps factors and populate initial pose values for (size_t i = 1; i < shared_frames_.size(); i++) { const Frame& frame = *shared_frames_[i]; gtsam::Pose3 world_from_cam_estimate( frame.world_from_cam_initial_guess_ ? *frame.world_from_cam_initial_guess_ : gtsam::Rot3::Identity(), *shared_frames_[i]->cam_in_world_interpolated_guess_ - cam0_in_w); - world_from_cam_initial_estimates_[frame.id_] = world_from_cam_estimate; - const gtsam::Symbol cam_symbol(symbol_char_pose, frame.id_); - initial_estimate_.insert(cam_symbol, world_from_cam_estimate); world_from_cam_initial_estimates_.emplace(frame.id_, world_from_cam_estimate); + + const gtsam::Symbol cam_symbol(symbol_char_pose, frame.id_); + initial_estimate_.insert(cam_symbol, world_from_cam_initial_estimates_[frame.id_]); if (frame.cam_in_world_initial_guess_) { gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas( frame.translation_covariance_in_cam_ ? gtsam::Vector3(std::sqrt((*frame.translation_covariance_in_cam_)(0, 0)), std::sqrt((*frame.translation_covariance_in_cam_)(1, 1)), std::sqrt((*frame.translation_covariance_in_cam_)(2, 2))) - : gps_sigmas_fallback); + : gps_sigmas_fallback_); + std::cout << "gps noise " << i << gps_noise->sigmas() << std::endl; graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise)); } } @@ -338,7 +375,8 @@ void Backend::populate_graph(const FeatureTracks& feature_tracks) { for (const auto& [frame_id, keypoint_cv] : feature_tracks[i].obs_) { graph_.add(gtsam::GenericProjectionFactor( gtsam::Point2(keypoint_cv.x, keypoint_cv.y), landmark_noise_, - gtsam::Symbol(symbol_char_pose, frame_id), lmk_symbol, shared_frames_[0]->K_)); + gtsam::Symbol(symbol_char_pose, frame_id), lmk_symbol, + shared_frames_[0]->K_)); // all K are the same for now... } initial_estimate_.insert(lmk_symbol, *landmark_estimate); lmk_initial_estimates_.emplace(i, *landmark_estimate); @@ -346,11 +384,6 @@ void Backend::populate_graph(const FeatureTracks& feature_tracks) { } } -void Backend::solve_graph() { - gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_); - result_ = optimizer.optimize(); -} - void Backend::solve_graph(const int num_epochs, std::optional iter_debug_func) { gtsam::LevenbergMarquardtParams params; @@ -359,18 +392,36 @@ void Backend::solve_graph(const int num_epochs, gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, params); double prev_error = optimizer.error(); + double lowest_error = prev_error; + gtsam::Values best_values = optimizer.values(); for (int i = 0; i < num_epochs; i++) { optimizer.iterate(); double curr_error = optimizer.error(); + if (curr_error < lowest_error) { + lowest_error = curr_error; + best_values = optimizer.values(); + } if (iter_debug_func) { (*iter_debug_func)(optimizer.values(), i); } - if (std::abs(prev_error - curr_error) < 1e-6) { - std::cout << "Converged at iteration " << i << "\n"; - break; - } + // if (std::abs(prev_error - curr_error) < 1e-6) { + // std::cout << "Converged at iteration " << i << "\n"; + // break; + // } } - result_ = optimizer.values(); + result_ = best_values; +} + +void Backend::clear() { + shared_frames_.clear(); + shared_frames_.shrink_to_fit(); + + initial_estimate_ = gtsam::Values(); + result_ = gtsam::Values(); + graph_ = gtsam::NonlinearFactorGraph(); + + std::unordered_map().swap(world_from_cam_initial_estimates_); + std::unordered_map().swap(lmk_initial_estimates_); } } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/backend.hh b/experimental/learn_descriptors/backend.hh index 50a934699..5b5dc66ed 100644 --- a/experimental/learn_descriptors/backend.hh +++ b/experimental/learn_descriptors/backend.hh @@ -48,14 +48,13 @@ class Backend { shared_frames_.reserve(shared_frames_.size() + shared_frames.size()); shared_frames_.insert(shared_frames_.end(), shared_frames.begin(), shared_frames.end()); }; - void calculate_initial_values(); + void calculate_initial_values(bool interpolate_gps = true); void populate_graph(const FeatureTracks &feature_tracks); - void solve_graph(); typedef int epoch; using graph_step_debug_func = std::function; void solve_graph(const int num_epochs, std::optional iter_debug_func = std::nullopt); - // void imshow_keypoints(const size_t img_id, bool viz = true, bool viz_all = false); + void clear(); const std::vector &shared_frames() const { return shared_frames_; }; const gtsam::Values ¤t_initial_values() const { return initial_estimate_; }; @@ -71,14 +70,15 @@ class Backend { std::unordered_map world_from_cam_initial_estimates_; std::unordered_map lmk_initial_estimates_; - gtsam::noiseModel::Diagonal::shared_ptr noise_tight_prior = gtsam::noiseModel::Diagonal::Sigmas( - (gtsam::Vector(6) << 0, 0, 0, // rotation stdev in radians - 1e-6, 1e-6, 1e-6 // translation stdev in meters - ) - .finished()); - gtsam::Vector3 gps_sigmas_fallback{0.5, 0.5, 0.5}; + gtsam::noiseModel::Diagonal::shared_ptr noise_tight_prior_ = + gtsam::noiseModel::Diagonal::Sigmas((gtsam::Vector(6) << 1e-3, 1e-3, + 1e-3, // rotation stdev in radians + 1e-3, 1e-3, 1e-3 // translation stdev in meters + ) + .finished()); + gtsam::Vector3 gps_sigmas_fallback_{0.5, 0.5, 0.5}; gtsam::noiseModel::Isotropic::shared_ptr landmark_noise_ = - gtsam::noiseModel::Isotropic::Sigma(2, 1.0); + gtsam::noiseModel::Isotropic::Sigma(2, 10.0); }; using Landmark = gtsam::Point3; } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc index c6dffd66b..3a3b6af92 100644 --- a/experimental/learn_descriptors/frontend.cc +++ b/experimental/learn_descriptors/frontend.cc @@ -168,6 +168,9 @@ Frontend::Frontend(FrontendParams params_) : params_(params_) { } void Frontend::populate_frames(bool verbose) { + shared_frames_.clear(); + shared_frames_.shrink_to_fit(); + shared_frames_.reserve(images_and_points_.size()); FrameId id = shared_frames_.size(); for (const ImageAndPoint &img_and_pt : images_and_points_) { const std::shared_ptr &shared_img_pt = img_and_pt.shared_img_pt; @@ -205,11 +208,6 @@ void Frontend::populate_frames(bool verbose) { if (maybe_translation_covariance_in_cam) { frame.translation_covariance_in_cam_ = *maybe_translation_covariance_in_cam; } - if (id == 0) { - std::cout << (frame.world_from_cam_groundtruth_ ? "first frame has grnd" - : "first frame doesn't have grnd") - << std::endl; - } shared_frames_.push_back(std::make_shared(frame)); id++; } diff --git a/experimental/learn_descriptors/image_point_four_seasons.cc b/experimental/learn_descriptors/image_point_four_seasons.cc index 1d5643abd..7c2829e94 100644 --- a/experimental/learn_descriptors/image_point_four_seasons.cc +++ b/experimental/learn_descriptors/image_point_four_seasons.cc @@ -46,7 +46,6 @@ std::optional ImagePointFourSeasons::translation_covariance_in_ std::optional ImagePointFourSeasons::gps_covariance_in_world() const { if (!gps_gcs || !gps_gcs->uncertainty) return std::nullopt; const Eigen::Matrix3d ENU_covariance(gps_gcs->uncertainty->to_ENU_covariance()); - std::cout << "image point " << id << " ENU_covariance: " << ENU_covariance << std::endl; const Eigen::Matrix3d w_from_ENU(shared_static_transforms->w_from_gpsw.so3().matrix()); const Eigen::Matrix3d w_covariance(w_from_ENU * ENU_covariance * w_from_ENU.transpose()); return w_covariance; diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index a749b6098..8afcba047 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -43,23 +43,16 @@ void StructureFromMotion::solve_structure( "Assuming that our first frame has groundtruth for now.", *frontend_.frames().front()->world_from_cam_groundtruth_); // align rotation via the initial guess - std::cout << "sfm heartbeat" << std::endl; frontend_.frames().front()->world_from_cam_initial_guess_ = frontend_.frames().front()->world_from_cam_groundtruth_->rotation(); - std::cout << "sfm heartbeat 2" << std::endl; - std::cout << "sfm heartbeat 3" << std::endl; - std::cout << "sfm heartbeat 4" << std::endl; // BACKEND + backend_.clear(); backend_.add_frames(frontend_.frames()); - std::cout << "backend_.shared_frames.size(): " << backend_.shared_frames().size() << std::endl; - std::cout << "sfm heartbeat 5" << std::endl; backend_.calculate_initial_values(); - std::cout << "backend.initial_values.size(): " << backend_.current_initial_values().size() - << std::endl; - std::cout << "sfm heartbeat 6" << std::endl; + backend_.populate_graph(frontend_.feature_tracks()); + graph_values(backend_.current_initial_values(), "viz init values"); backend_.solve_graph(10, iter_debug_func); - std::cout << "sfm heartbeat 7" << std::endl; } void StructureFromMotion::graph_values(const gtsam::Values &values, const std::string &window_name, @@ -69,13 +62,21 @@ void StructureFromMotion::graph_values(const gtsam::Values &values, const std::s for (const auto &key_value : values) { gtsam::Key key = key_value.key; gtsam::Symbol symbol(key); - if (values.exists(key)) { + try { const gtsam::Pose3 &pose = values.at(key); final_poses.emplace_back(Eigen::Isometry3d(pose.matrix()), symbol.string()); - } else if (values.exists(key)) { + continue; + } catch (const gtsam::ValuesIncorrectType &) { + } + + try { const gtsam::Point3 &pt = values.at(key); final_lmks.emplace_back(pt, symbol.string()); + continue; + } catch (const gtsam::ValuesIncorrectType &) { } + + std::cerr << "Key " << symbol << " is neither Pose3 nor Point3.\n"; } robot::visualization::viz_scene(final_poses, final_lmks, color, true, true, window_name); } diff --git a/experimental/learn_descriptors/structure_from_motion_example.cc b/experimental/learn_descriptors/structure_from_motion_example.cc index 2be249af3..95c45a650 100644 --- a/experimental/learn_descriptors/structure_from_motion_example.cc +++ b/experimental/learn_descriptors/structure_from_motion_example.cc @@ -53,13 +53,16 @@ int main(int argc, const char **argv) { FrontendParams::MatcherType::KNN, true, false}; StructureFromMotion sfm(frontend_params); - for (size_t i = 633; i < 646; i += 2) { + // for (size_t i = 633; i < 1000; i += 1) { + // const ImagePointFourSeasons img_pt = parser.get_image_point(i); + // sfm.add_image_point(parser.load_image(i), + // std::make_shared(img_pt)); + // } + for (size_t i = 835; i < 900; i += 4) { const ImagePointFourSeasons img_pt = parser.get_image_point(i); sfm.add_image_point(parser.load_image(i), std::make_shared(img_pt)); - std::cout << img_pt.to_string() << std::endl; } - std::cout << "heartbeat" << std::endl; using epoch = size_t; Backend::graph_step_debug_func graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter) { @@ -67,9 +70,8 @@ int main(int argc, const char **argv) { std::string window_name = "Iteration_" + std::to_string(iter); StructureFromMotion::graph_values(vals, window_name); }; + // passing graph_itr_debug_func will execute the function after each optimization epoch // sfm.solve_structure(10, graph_itr_debug_func); sfm.solve_structure(10); - std::cout << "heartbeat 2" << std::endl; StructureFromMotion::graph_values(sfm.result()); - std::cout << "heartbeat 3" << std::endl; } \ No newline at end of file From 672efcc529891650d61f76b0e8b4e26c00acd19f Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Thu, 24 Jul 2025 11:50:37 -0400 Subject: [PATCH 40/45] clean up --- experimental/learn_descriptors/BUILD | 84 -- experimental/learn_descriptors/backend.cc | 85 -- experimental/learn_descriptors/backend.hh | 7 +- .../get_colmap_groundtruth.cc | 82 -- .../learn_descriptors/incremental_sfm_mvp.cc | 793 ------------------ experimental/learn_descriptors/map.hh | 4 - experimental/learn_descriptors/map.proto | 15 - .../learn_descriptors/refactor_sfm_mvp.cc | 389 --------- experimental/learn_descriptors/sfm_mvp.cc | 685 --------------- .../structure_from_motion_example.cc | 10 +- 10 files changed, 5 insertions(+), 2149 deletions(-) delete mode 100644 experimental/learn_descriptors/get_colmap_groundtruth.cc delete mode 100644 experimental/learn_descriptors/incremental_sfm_mvp.cc delete mode 100644 experimental/learn_descriptors/map.hh delete mode 100644 experimental/learn_descriptors/map.proto delete mode 100644 experimental/learn_descriptors/refactor_sfm_mvp.cc delete mode 100644 experimental/learn_descriptors/sfm_mvp.cc diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index 63a162d08..4a1fd3419 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -251,27 +251,6 @@ cc_library( ], ) -cc_test( - name = "sfm_mvp", - srcs = ["sfm_mvp.cc"], - copts = [ - "-Wno-error=unused-parameter", - "-Wno-error=unused-variable", - ], - deps = [ - ":frame", - ":frontend", - ":spatial_test_scene_cube", - ":structure_from_motion_types", - ":symphony_lake_parser", - "//visualization/opencv:opencv_viz", - "@com_google_googletest//:gtest_main", - "@eigen", - "@gtsam", - "@opencv", - ], -) - cc_library( name = "feature_set", hdrs = ["feature_set.hh"], @@ -385,42 +364,6 @@ cc_test( ], ) -cc_binary( - name = "get_colmap_groundtruth", - srcs = ["get_colmap_groundtruth.cc"], - copts = [ - "-Wno-error=unused-parameter", - ], - deps = [ - ":symphony_lake_parser", - ], -) - -cc_binary( - name = "incremental_sfm_mvp", - srcs = ["incremental_sfm_mvp.cc"], - copts = [ - "-Wno-error=unused-parameter", - "-Wno-error=unused-function", - ], - deps = [ - ":four_seasons_parser", - ":frame", - ":frontend", - ":spatial_test_scene_cube", - ":structure_from_motion_types", - "//common/geometry:camera", - "//common/gps:frame_translation", - "//visualization/opencv:opencv_viz", - "@boost//:smart_ptr", - "@cxxopts", - "@eigen", - "@gtsam", - "@opencv", - ":camera_calibration" - ], -) - cc_library( name = "camera_calibration", hdrs = ["camera_calibration.hh"], @@ -430,33 +373,6 @@ cc_library( ] ) -cc_binary( - name = "refactor_sfm_mvp", - srcs = ["refactor_sfm_mvp.cc"], - copts = [ - "-Wno-error=unused-parameter", - "-Wno-error=unused-function", - ], - deps = [ - ":backend", - ":four_seasons_parser", - ":frame", - ":frontend", - ":image_point", - ":image_point_four_seasons", - ":spatial_test_scene_cube", - ":structure_from_motion_types", - "//common/geometry:camera", - "//visualization/opencv:opencv_viz", - "@boost//:smart_ptr", - "@cxxopts", - "@eigen", - "@gtsam", - "@opencv", - ":camera_calibration" - ], -) - cc_binary( name = "structure_from_motion_example", srcs = ["structure_from_motion_example.cc"], diff --git a/experimental/learn_descriptors/backend.cc b/experimental/learn_descriptors/backend.cc index 225aadda5..37a9c7a4d 100644 --- a/experimental/learn_descriptors/backend.cc +++ b/experimental/learn_descriptors/backend.cc @@ -109,91 +109,6 @@ gtsam::Rot3 average_rotations(const std::vector& rotations, int max return mean; } -void Backend::populate_rotation_estimate(std::vector& frames) { - struct FrameVertex; - using SharedFrameVertex = std::shared_ptr; - struct FrameVertex { - // std::unique_ptr frame; - Frame& frame; - std::optional w_from_this; - std::unordered_map - neighbor_id_to_rot; // rot is frame_from_neighbor_frame - std::unordered_set parents; - std::unordered_set children; - }; - std::unordered_map frame_tree; - for (Frame& frame : frames) { - frame_tree.emplace(frame.id_, std::make_shared(frame)); - } - // populate all frames and add children to neighbors - std::queue q; // indices of Frame in frames - std::unordered_set seen_frame_id; - q.push(frames.front().id_); - - gtsam::NonlinearFactorGraph graph; - gtsam::Values initial; - // TODO: somehow get more informative/more robust noise values - // maybe: monte carlo sampling + reprojection, use essential matrix to guide covariance strength - gtsam::noiseModel::Diagonal::shared_ptr noise_rotation = - gtsam::noiseModel::Isotropic::Sigma(3, 0.1); // radians - - std::vector symbols_frames; - while (!q.empty()) { - Frame frame = frames[q.front()]; - q.pop(); - seen_frame_id.insert(frame.id_); - SharedFrameVertex frame_vertex = frame_tree.at(frame.id_); - if (seen_frame_id.size() == 1) { // make the first frame rotation identity unless the - // initial guess is set otherwise - frame_vertex->w_from_this = frame_vertex->frame.world_from_cam_initial_guess_ - ? *frame_vertex->frame.world_from_cam_initial_guess_ - : gtsam::Rot3::Identity(); - graph.add(gtsam::PriorFactor( - gtsam::Symbol(symbol_char_rotation, frame_vertex->frame.id_), - *frame_vertex->w_from_this, gtsam::noiseModel::Constrained::All(3))); - } - frame_vertex->frame.world_from_cam_initial_guess_ = frame_vertex->w_from_this; - std::cout << frame.to_string() << std::endl; - - const gtsam::Symbol symbol_frame(symbol_char_rotation, frame.id_); - symbols_frames.push_back(symbol_frame); - initial.insert(symbol_frame, *frame.world_from_cam_initial_guess_); - for (const auto& [other_frame_id, frame_from_other_frame] : - frame.frame_from_other_frames_) { - if (seen_frame_id.find(other_frame_id) == seen_frame_id.end()) { - q.push(other_frame_id); - seen_frame_id.insert(other_frame_id); - } - frame_vertex->neighbor_id_to_rot.emplace(other_frame_id, frame_from_other_frame); - SharedFrameVertex other_frame_vertex = frame_tree.at(other_frame_id); - frame_vertex->children.insert(other_frame_vertex); - other_frame_vertex->parents.insert(frame_vertex); - other_frame_vertex->neighbor_id_to_rot.emplace(frame.id_, - frame_from_other_frame.inverse()); - if (!other_frame_vertex->w_from_this) { - other_frame_vertex->w_from_this = - *frame_vertex->w_from_this * frame_from_other_frame; - other_frame_vertex->frame.world_from_cam_initial_guess_ = - other_frame_vertex->w_from_this; - } - - graph.add(gtsam::BetweenFactor( - symbol_frame, gtsam::Symbol(symbol_char_rotation, other_frame_id), - frame_from_other_frame.inverse(), noise_rotation)); - } - frame_tree.emplace(frame.id_, frame_vertex); - } - - gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial); - gtsam::Values result = optimizer.optimize(); - - // Output results and assign results to frames - for (const gtsam::Symbol& symbol_rotation : symbols_frames) { - gtsam::Rot3 w_from_frame = result.at(symbol_rotation); - frame_tree.at(symbol_rotation.index())->frame.world_from_cam_initial_guess_ = w_from_frame; - } -} - void Backend::populate_rotation_estimate(std::vector& shared_frames) { struct FrameVertex; using SharedFrameVertex = std::shared_ptr; diff --git a/experimental/learn_descriptors/backend.hh b/experimental/learn_descriptors/backend.hh index 5b5dc66ed..6f9a8c0c7 100644 --- a/experimental/learn_descriptors/backend.hh +++ b/experimental/learn_descriptors/backend.hh @@ -36,12 +36,7 @@ class Backend { static gtsam::Rot3 average_rotations(const std::vector &rotations, int max_iter = 10); // use rotation averaging to set the rotation initial guess for each frame in the world frame. - // the world frame will be the first frame in the vector. deadreckon_incrementally won't - // optimize if true - static void populate_rotation_estimate(std::vector &frames); - // use rotation averaging to set the rotation initial guess for each frame in the world frame. - // the world frame will be the first frame in the vector. deadreckon_incrementally won't - // optimize if true + // the world frame will be the first frame in the vector static void populate_rotation_estimate(std::vector &shared_frames); void add_frames(std::vector &shared_frames) { diff --git a/experimental/learn_descriptors/get_colmap_groundtruth.cc b/experimental/learn_descriptors/get_colmap_groundtruth.cc deleted file mode 100644 index 7115daf0d..000000000 --- a/experimental/learn_descriptors/get_colmap_groundtruth.cc +++ /dev/null @@ -1,82 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "experimental/learn_descriptors/symphony_lake_parser.hh" - -namespace ld = robot::experimental::learn_descriptors; - -struct ImagePose { - std::string filename; - double X; - double Y; - double Z; - double heading; -}; - -void writeCsv(const std::string& path, const std::vector& rows) { - std::ofstream out(path); - if (!out) { - throw std::runtime_error("Failed to open " + path); - } - - out << "filename,X,Y,Z,heading\n"; - - out << std::fixed << std::setprecision(6); - for (const auto& r : rows) { - out << r.filename << ',' << r.X << ',' << r.Y << ',' << r.Z << ',' << r.heading << '\n'; - } -} - -void writeTxt(const std::string& path, const std::vector& rows) { - std::ofstream out(path); - if (!out) { - throw std::runtime_error("Failed to open " + path); - } - - out << std::fixed << std::setprecision(6); - for (const auto& r : rows) { - out << r.filename << ' ' << r.X << ' ' << r.Y << ' ' << r.Z << ' ' << r.heading << '\n'; - } -} - -int main() { - ld::DataParser data_parser("/home/pizzaroll04/Documents/datasets", - std::vector{"symphony_lake_building_test"}); - - const symphony_lake_dataset::SurveyVector& survey_vector = data_parser.get_surveys(); - const symphony_lake_dataset::Survey& survey = survey_vector.get(0); - - std::vector poses; - for (int i = 0; i < 718; i++) { - symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(i); - const Eigen::Isometry3d T_wrld_cam = - data_parser.get_world_from_boat(img_pt) * data_parser.get_boat_from_camera(img_pt); - ImagePose img_pose; - - std::ostringstream oss; - oss << std::setw(4) << std::setfill('0') << i; - img_pose.filename = oss.str() + ".jpg"; - img_pose.X = T_wrld_cam.translation().x(); - img_pose.Y = T_wrld_cam.translation().y(); - img_pose.Z = T_wrld_cam.translation().z(); - Eigen::Matrix3d R = T_wrld_cam.linear(); - img_pose.heading = std::atan2(R(1, 0), R(0, 0)); - - poses.push_back(img_pose); - } - - try { - writeTxt("/home/pizzaroll04/Documents/datasets/symphony_lake_building_test/ref_images.txt", - poses); - std::cout << "Wrote " << poses.size() << " rows to ref_images.csv\n"; - } catch (const std::exception& e) { - std::cerr << "Error: " << e.what() << "\n"; - return 1; - } - - return 0; -} diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc deleted file mode 100644 index 4906f0ff1..000000000 --- a/experimental/learn_descriptors/incremental_sfm_mvp.cc +++ /dev/null @@ -1,793 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "Eigen/Core" -#include "Eigen/Geometry" -#include "common/geometry/camera.hh" -#include "common/gps/frame_translation.hh" -#include "cxxopts.hpp" -#include "experimental/learn_descriptors/camera_calibration.hh" -#include "experimental/learn_descriptors/four_seasons_parser.hh" -#include "experimental/learn_descriptors/frame.hh" -#include "experimental/learn_descriptors/frontend.hh" -#include "experimental/learn_descriptors/frontend_definitions.hh" -#include "experimental/learn_descriptors/structure_from_motion_types.hh" -#include "gtsam/geometry/Cal3_S2.h" -#include "gtsam/geometry/Point2.h" -#include "gtsam/geometry/Point3.h" -#include "gtsam/geometry/Pose3.h" -#include "gtsam/geometry/Rot3.h" -#include "gtsam/geometry/triangulation.h" -#include "gtsam/inference/Symbol.h" -#include "gtsam/linear/NoiseModel.h" -#include "gtsam/navigation/GPSFactor.h" -#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" -#include "gtsam/nonlinear/NonlinearFactorGraph.h" -#include "gtsam/nonlinear/Values.h" -#include "gtsam/slam/BetweenFactor.h" -#include "gtsam/slam/PriorFactor.h" -#include "gtsam/slam/ProjectionFactor.h" -#include "visualization/opencv/opencv_viz.hh" - -namespace std { -template <> -struct hash { - size_t operator()(const gtsam::Symbol &s) const { return hash()(s.key()); } -}; -} // namespace std - -namespace detail_sfm { - -std::optional attempt_triangulate(const std::vector &cam_poses, - const std::vector &cam_obs, - gtsam::Cal3_S2::shared_ptr K, - const double max_reproj_error = 2.0, - const bool verbose = true) { - gtsam::Point3 p_lmk_in_world; - if (cam_poses.size() >= 2) { - try { - // Attempt triangulation using DLT (or the GTSAM provided method) - p_lmk_in_world = gtsam::triangulatePoint3( - cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end())); - - } catch (const gtsam::TriangulationCheiralityException &e) { - // Handle the exception gracefully by logging and retaining the previous - // estimate. - if (verbose) - std::cerr << "attempt_triangulation failed. Likely cheirality exception: " - << e.what() << ". Discarding involved keypoints." << std::endl; - } - } - // Optional: perform an explicit cheirality check - for (const auto &pose : cam_poses) { - // Transform point to the camera coordinate system. - // transformTo() converts a world point to the camera frame. - gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world); - if (p_cam_lmk.z() <= 0) { // Check that the depth is positive - return std::nullopt; - } - } - - // Cheirality & reprojection checks - for (size_t i = 0; i < cam_poses.size(); ++i) { - const auto &pose = cam_poses[i]; - // Cheirality - gtsam::Point3 p_cam = pose.transformTo(p_lmk_in_world); - if (p_cam.z() <= 0) { - if (verbose) { - std::cerr << "[triangulate] point behind camera " << i << " (z=" << p_cam.z() - << ")\n"; - } - return std::nullopt; - } - // Reprojection error - if (max_reproj_error > 0) { - gtsam::PinholeCamera cam(pose, *K); - const auto reproj = cam.project(p_lmk_in_world); - const double err = (reproj - cam_obs[i]).norm(); - if (err > max_reproj_error) { - if (verbose) { - std::cerr << "[triangulate] reprojection error too large on view " << i << " (" - << err << " px)\n"; - } - return std::nullopt; - } - } - } - - return p_lmk_in_world; -} - -// bool attempt_triangulate(const std::vector &cam_poses, -// const std::vector &cam_obs, -// gtsam::Cal3_S2::shared_ptr K, gtsam::Point3 &out_p_world_landmark, -// const bool verbose = true) { -// bool valid = true; -// if (cam_poses.size() >= 2) { -// try { -// // Attempt triangulation using DLT (or the GTSAM provided method) -// gtsam::Point3 p_lmk_in_world = gtsam::triangulatePoint3( -// cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end())); - -// out_p_world_landmark = p_lmk_in_world; - -// // Optional: perform an explicit cheirality check -// for (const auto &pose : cam_poses) { -// // Transform point to the camera coordinate system. -// // transformTo() converts a world point to the camera frame. -// gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world); -// if (p_cam_lmk.z() <= 0) { // Check that the depth is positive -// valid = false; -// break; -// } -// } -// } catch (const gtsam::TriangulationCheiralityException &e) { -// // Handle the exception gracefully by logging and retaining the previous -// // estimate. -// if (verbose) -// std::cerr << "attempt_triangulation failed. Likely cheirality exception: " -// << e.what() << ". Keeping initial landmark estimate." << std::endl; -// } -// } else { -// valid = false; -// } -// return valid; -// } - -void graph_values(const gtsam::Values &values, const std::string &window_name, - const std::vector &symbols_pose, - const std::vector &symbols_landmarks) { - std::vector final_poses; - std::vector final_lmks; - for (const gtsam::Symbol &symbol_pose : symbols_pose) { - final_poses.emplace_back(Eigen::Isometry3d(values.at(symbol_pose).matrix()), - symbol_pose.string()); - } - for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) { - if (!values.exists(symbol_lmk)) { - std::cout << "WTF " << symbol_lmk << std::endl; - } - final_lmks.emplace_back(values.at(symbol_lmk), symbol_lmk.string()); - } - std::cout << "About to viz gtsam::Values with " << values.size() << " variables." << std::endl; - robot::visualization::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true, - window_name); -} - -gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gtsam::Values &values, - const std::vector &symbols_pose, - const std::vector &symbols_landmarks, - const int num_epochs = 5, bool viz_itr = false) { - gtsam::LevenbergMarquardtParams params; - params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. - params.maxIterations = 1; - gtsam::LevenbergMarquardtOptimizer optimizer(graph, values, params); - - double prev_error = optimizer.error(); - typedef int epoch; - std::function &, - const std::vector &)> - graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter, - const std::vector &symbols_pose, - const std::vector &symbols_landmarks) { - std::cout << "iteration " << iter << " complete!"; - std::string window_name = "Iteration_" + std::to_string(iter); - graph_values(vals, window_name, symbols_pose, symbols_landmarks); - }; - - for (int i = 0; i < num_epochs; i++) { - optimizer.iterate(); - double curr_error = optimizer.error(); - - if (viz_itr) { - graph_itr_debug_func(optimizer.values(), i, symbols_pose, symbols_landmarks); - } - if (std::abs(prev_error - curr_error) < 1e-6) { - std::cout << "Converged at iteration " << i << "\n"; - break; - } - } - return optimizer.values(); -} - -gtsam::Pose3 averagePoses(const std::vector &poses, int maxIter = 10) { - if (poses.empty()) throw std::runtime_error("Empty pose vector"); - - gtsam::Pose3 mean = poses[0]; - - for (int iter = 0; iter < maxIter; ++iter) { - gtsam::Vector6 total = gtsam::Vector6::Zero(); - - for (const auto &pose : poses) { - gtsam::Pose3 delta = mean.between(pose); - total += gtsam::Pose3::Logmap(delta); - } - - total /= poses.size(); - mean = mean.compose(gtsam::Pose3::Expmap(total)); - } - - return mean; -} - -gtsam::Point3 averagePoints(const std::vector &points) { - if (points.empty()) throw std::runtime_error("Empty point vector"); - gtsam::Point3 sum(0, 0, 0); - for (const auto &pt : points) sum += pt; - return sum / points.size(); -} - -gtsam::Point3 get_variance(const std::vector &points) { - const gtsam::Point3 mean = averagePoints(points); - gtsam::Point3 var(0, 0, 0); - for (const gtsam::Point3 &pt : points) { - var += (mean - pt).array().square().matrix(); - } - return var / points.size(); -} - -double rotation_error(const Eigen::Isometry3d &T_est, const Eigen::Isometry3d &T_gt) { - Eigen::Matrix3d R_err = T_gt.rotation().transpose() * T_est.rotation(); - - // 2. Compute trace and clamp to [-1,1] for numerical safety - double tr = R_err.trace(); - double cos_theta = std::min(1.0, std::max(-1.0, (tr - 1.0) / 2.0)); - - // 3. Recover angle (in radians) - return std::acos(cos_theta); -} - -// const std::vector get_absolute_trajectory_error(const std::vector &) -// {} -}; // namespace detail_sfm - -int main(int argc, const char **argv) { - using namespace robot::experimental::learn_descriptors; - // clang-format off - cxxopts::Options options("four_seasons_parser_example", "Demonstrate usage of four_seasons_parser"); - options.add_options() - ("data_dir", "Path to dataset root directory", cxxopts::value()) - ("calibration_dir", "Path to dataset calibration directory", cxxopts::value()) - ("help", "Print usage"); - // clang-format on - - auto args = options.parse(argc, argv); - - const auto check_required = [&](const std::string &opt) { - if (args.count(opt) == 0) { - std::cout << "Missing " << opt << " argument" << std::endl; - std::cout << options.help() << std::endl; - std::exit(1); - } - }; - - if (args.count("help")) { - std::cout << options.help() << std::endl; - return 0; - } - - check_required("data_dir"); - check_required("calibration_dir"); - - const std::filesystem::path path_data = args["data_dir"].as(); - const std::filesystem::path path_calibration = args["calibration_dir"].as(); - FourSeasonsParser parser(path_data, path_calibration); - - std::cout << "incremental_sfm_mvp!" << std::endl; - // const std::vector indices{581, 609, - // 633}; // indices with all data fields on neighborhood_5_train - // const std::vector indices{581, 593, 609, 621, 633, 636, 639, 642}; // indices with - // get indices with fully populated img_pt containing full gps data and reference - const std::vector indices = [&parser]() -> std::vector { - std::vector tmp; - for (size_t i = 0; i < parser.num_images(); i++) { - const ImagePointFourSeasons img_pt = parser.get_image_point(i); - if (img_pt.AS_w_from_gnss_cam && img_pt.gps_gcs && img_pt.gps_gcs && - img_pt.gps_gcs->uncertainty && img_pt.gps_gcs->altitude) - tmp.push_back(i); - } - return tmp; - }(); - constexpr bool visualize = false; - // all data fields on neighborhood_5_train const std::vector indices = []() { - // std::vector tmp; - // for (int i = 660; i < 681; i += 10) { - // tmp.push_back(i); - // } - // return tmp; - // }(); - // FourSeasonsParser parser() - // const size_t img_width = img_pt_first.width, img_height = - // img_pt_first.height; - const std::shared_ptr cal_parser_left_cam = - parser.camera_calibration(); - gtsam::Cal3_S2::shared_ptr K = - boost::make_shared(cal_parser_left_cam->fx, cal_parser_left_cam->fy, 0, - cal_parser_left_cam->cx, cal_parser_left_cam->cy); - cv::Mat K_mat = (cv::Mat_(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1); - cv::Mat D_mat = (cv::Mat_(4, 1) << cal_parser_left_cam->k1, cal_parser_left_cam->k2, - cal_parser_left_cam->k3, cal_parser_left_cam->k4); - - // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, - // gtsam::Pose3(T_world_camera0.matrix())); - FrontendParams params{FrontendParams::ExtractorType::SIFT, FrontendParams::MatcherType::KNN, - true, false}; - Frontend frontend(params); - - gtsam::Values initial_estimate_; - gtsam::NonlinearFactorGraph graph_; - - gtsam::noiseModel::Isotropic::shared_ptr landmark_noise = - gtsam::noiseModel::Isotropic::Sigma(2, 1.0); - gtsam::Vector6 prior_sigmas; - prior_sigmas << 0.04, 0.04, 0.09, 2.1, 2.1, 0.1; - gtsam::Vector3 gps_sigmas; - gps_sigmas << 2.1, 2.1, 0.1; - gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise = - gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas); - gtsam::noiseModel::Diagonal::shared_ptr gps_noise = - gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas); - - // populate frames and cam_poses - std::vector frames; - FrameId id = 0; - std::unordered_map - id_to_initial_world_from_cam; // these are for initial guesses!! - Eigen::Vector3d t_first_cam_in_world; // to create our own world - std::vector references_world_from_cam; - std::vector viz_references_world_from_cam; - Eigen::Matrix4d scale_mat_reference = Eigen::Matrix4d::Identity(); - scale_mat_reference(0, 0) = scale_mat_reference(1, 1) = scale_mat_reference(2, 2) = - parser.gnss_scale(); - std::vector> kpt_list; - for (const int &idx : indices) { - const ImagePointFourSeasons img_pt = parser.get_image_point(idx); - std::cout << img_pt.to_string() << std::endl; - // - Eigen::Isometry3d world_from_cam; - if (id == 0) { - // for now we will use an informative prior based on reference - world_from_cam = Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference * - img_pt.AS_w_from_gnss_cam->matrix()) - .matrix()); - t_first_cam_in_world = world_from_cam.translation(); - } else if (img_pt.gps_gcs) { - const Eigen::Vector3d gps_gcs( - img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude, - img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0); - const Eigen::Vector3d p_gps_in_ECEF = robot::gps::ecef_from_lla(gps_gcs); - const Eigen::Vector4d p_gps_in_ECEF_hom(p_gps_in_ECEF.x(), p_gps_in_ECEF.y(), - p_gps_in_ECEF.z(), 1.0); - world_from_cam = Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference * - img_pt.AS_w_from_gnss_cam->matrix()) - .matrix()); - // std::cout << "ok" << std::endl; - // world_from_cam.translation() = - // ((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix() * - // p_gps_in_ECEF_hom) - // .head<3>(); - - // Eigen:: - } - world_from_cam.translation() = world_from_cam.translation() - t_first_cam_in_world; - id_to_initial_world_from_cam.emplace(id, world_from_cam.matrix()); - - // populate frame - cv::Mat img = parser.load_image(idx); - cv::Mat img_undistorted; - cv::fisheye::undistortImage(img, img_undistorted, K_mat, D_mat, K_mat, img.size()); - - // std::stringstream ss; - // ss << "image " << idx << " of size " - // << "(" << img_undistorted.cols << ", " << img_undistorted.rows << ")"; - // cv::imshow(ss.str(), img_undistorted); - // cv::waitKey(0); - // cv::destroyWindow(ss.str()); - - std::optional frame; - if (img_pt.AS_w_from_gnss_cam) { - Eigen::Isometry3d world_from_cam_reference = - Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference * - img_pt.AS_w_from_gnss_cam->matrix())); - world_from_cam_reference.translation() = - world_from_cam_reference.translation() - t_first_cam_in_world; - viz_references_world_from_cam.emplace_back(world_from_cam_reference, - "img " + std::to_string(idx)); - references_world_from_cam.push_back(world_from_cam_reference); - frame.emplace(id, img_undistorted, K, gtsam::Pose3(world_from_cam_reference.matrix())); - } else { - frame.emplace(id, img_undistorted, K); - std::clog << "Warning: no reference data at img_pt " << idx << std::endl; - } - - std::pair, cv::Mat> kpts_descs = - frontend.extract_features(img_undistorted); - std::cout << "kpts_descs.size(): " << kpts_descs.first.size() << std::endl; - kpt_list.push_back(kpts_descs.first); - KeypointsCV kpts; - for (const cv::KeyPoint &kpt : kpts_descs.first) { - kpts.push_back(kpt.pt); - } - // std::cout << "idx " << idx << " has " << kpts.size() << " kpts" << std::endl; - frame->add_keypoints(kpts); - frame->assign_descriptors(kpts_descs.second); - - frames.push_back(*frame); - - id++; - } - if (visualize) - robot::visualization::viz_scene(viz_references_world_from_cam, - std::vector(), - cv::viz::Color::brown(), true, true, "References"); - - // populate feature_tracks and lmk_id_map - // TODO: "smart" matching, using initial poses to filter which pairs make sense to compute - // matches - FeatureTracks feature_tracks; - FrameLandmarkIdMap lmk_id_map; - LandmarkId lmk_id = 0; - constexpr bool exhaustive = true; - if (exhaustive) { - for (size_t i = 0; i < indices.size() - 1; i += 4) { - // std::cout << "i: " << i << std::endl; - for (size_t j = i + 1; j < indices.size(); j++) { - // std::cout << "j: " << j << std::endl; - std::vector matches = - frontend.compute_matches(frames[i].descriptors(), frames[j].descriptors()); - // DIAL TO MESS WITH - frontend.enforce_bijective_buffer_matches(matches); - - if (matches.size() < 5) { - continue; - } - - std::vector cv_kpts_1; - std::vector cv_kpts_2; - for (const KeypointCV &kpt : frames[i].keypoint()) { - cv::KeyPoint cv_kpt; - cv_kpt.pt = kpt; - cv_kpts_1.push_back(cv_kpt); - } - for (const KeypointCV &kpt : frames[j].keypoint()) { - cv::KeyPoint cv_kpt; - cv_kpt.pt = kpt; - cv_kpts_2.push_back(cv_kpt); - } - Eigen::Isometry3d world_from_camj(id_to_initial_world_from_cam.at(j).matrix()); - // std::cout << "heartbeat" << std::endl; - std::optional scale_cam0_from_cam1 = - robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat); - if (!scale_cam0_from_cam1) { - continue; - } - world_from_camj.linear() = - (Eigen::Isometry3d(id_to_initial_world_from_cam.at(i).matrix()) * - *scale_cam0_from_cam1) - .linear(); - // std::cout << "heartbeat 2" << std::endl; - id_to_initial_world_from_cam.at(j) = gtsam::Pose3(world_from_camj.matrix()); - for (const cv::DMatch match : matches) { - const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames[j].keypoint()[match.trainIdx]; - - auto key = std::make_pair(frames[i].id_, kpt_cam0); - if (lmk_id_map.find(key) != lmk_id_map.end()) { - auto id = lmk_id_map.at(key); - feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0); - feature_tracks.at(id).obs_.emplace_back(frames[j].id_, kpt_cam1); - } else { - FeatureTrack feature_track(i, kpt_cam0); - feature_track.obs_.emplace_back(frames[j].id_, kpt_cam1); - feature_tracks.emplace(lmk_id, feature_track); - lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id); - lmk_id++; - } - } - } - } - std::cout << "done processing matches" << std::endl; - } else { // successive only - for (size_t i = 0; i < indices.size() - 1; i++) { - std::vector matches = - frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors()); - frontend.enforce_bijective_buffer_matches(matches); - for (const cv::DMatch match : matches) { - const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx]; - - auto key = std::make_pair(frames[i].id_, kpt_cam0); - if (lmk_id_map.find(key) != lmk_id_map.end()) { - auto id = lmk_id_map.at(key); - feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0); - feature_tracks.at(id).obs_.emplace_back(frames[i + 1].id_, kpt_cam1); - } else { - FeatureTrack feature_track(i, kpt_cam0); - feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); - feature_tracks.emplace(lmk_id, feature_track); - lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id); - lmk_id++; - } - } - } - } - std::vector viz_world_from_cam_init; - for (const auto &[id, world_from_cam] : id_to_initial_world_from_cam) { - viz_world_from_cam_init.emplace_back(Eigen::Isometry3d(world_from_cam.matrix()), - std::to_string(id)); - } - - // std::cout << "pre heartbeat" << std::endl; - if (visualize) - robot::visualization::viz_scene( - viz_world_from_cam_init, std::vector(), - cv::viz::Color::brown(), true, true, "world_from_cam_estimates"); - // std::cout << "heartbeat" << std::endl; - - // TRIANGULATE all of the points (for initial guess) - std::unordered_map lmk_triangulated_map; // points are kpt_in_world - std::vector triangulated_lmks_viz; - std::vector triangulated_lmks; - for (const std::pair lmk_feat : feature_tracks) { - std::vector world_from_cams; - std::vector feat_kpts; - const LandmarkId lmk_id = lmk_feat.first; - const FeatureTrack feature_track = lmk_feat.second; - // gtsam::Point3 kpt_in_world(0, 0, 0); - for (const std::pair &feat_track : feature_track.obs_) { - world_from_cams.push_back(id_to_initial_world_from_cam[feat_track.first]); - feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y)); - } - std::optional kpt_in_world = - detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, false); - // bool triangulate_success = - // detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world, false); - if (kpt_in_world) { - lmk_triangulated_map.emplace(lmk_id, *kpt_in_world); - triangulated_lmks.push_back(*kpt_in_world); - triangulated_lmks_viz.emplace_back(*kpt_in_world, "lmk " + std::to_string(lmk_id)); - } else { - std::cout << "triangulation not success, leaving it out." << std::endl; - continue; - } - } - std::cout << "there are " << triangulated_lmks.size() << " triangulated lmks" << std::endl; - if (visualize) - robot::visualization::viz_scene(viz_references_world_from_cam, triangulated_lmks_viz, - cv::viz::Color::brown(), true, true, - "Unfiltered, triangulated points"); - - // filter points via variance - // TODO: filter based on quality and/or quantity of matches - std::cout << "triangulated_lmks.size(): " << triangulated_lmks.size() << std::endl; - const gtsam::Point3 variance_pts = detail_sfm::get_variance(triangulated_lmks); - std::cout << "heart beat 2" << std::endl; - const gtsam::Point3 std_dev_pts = variance_pts.array().sqrt().matrix(); - const gtsam::Point3 mean_pts = detail_sfm::averagePoints(triangulated_lmks); - std::unordered_map lmk_triangulated_map_filtered; - std::vector filtered_points; - std::cout << "original var " << variance_pts << std::endl; - for (const std::pair lmk_id_pt : lmk_triangulated_map) { - const gtsam::Point3 dist_mean = (lmk_id_pt.second - mean_pts).array().abs().matrix(); - if ((dist_mean.array() <= (3 * std_dev_pts).array()).all()) { - lmk_triangulated_map_filtered.emplace(lmk_id_pt); - filtered_points.push_back(lmk_id_pt.second); - } - } - std::cout << "filtered variance " << detail_sfm::get_variance(filtered_points) << std::endl; - std::cout << "number of filtered points: " << lmk_triangulated_map_filtered.size() << std::endl; - if (visualize) - robot::visualization::viz_scene(std::vector(), filtered_points, - cv::viz::Color::brown(), true, true, - "Filtered, triangulated points"); - - // ############# BACKEND ############### - - // add filtered points to graph - std::unordered_map> symbols_poses_values_iter; - std::unordered_map> symbols_landmarks_values_iter; - std::vector symbols_pose; - std::vector symbols_landmarks; - for (const auto &[lmk_id, kpt_in_world] : lmk_triangulated_map_filtered) { - // LandmarkId lmk_id = lmk_id_pt.first; - // const gtsam::Point3 kpt_in_world = lmk_id_pt.second; - FeatureTrack feature_track = feature_tracks.at(lmk_id); - const gtsam::Symbol symbol_lmk('l', lmk_id); - for (const auto &[frame_id, obs] : feature_track.obs_) { - initial_estimate_.insert_or_assign(symbol_lmk, kpt_in_world); - symbols_landmarks.push_back(symbol_lmk); - symbols_landmarks_values_iter.emplace(symbol_lmk, - std::vector{kpt_in_world}); - graph_.emplace_shared>( - gtsam::Point2(obs.x, obs.y), landmark_noise, gtsam::Symbol('x', frame_id), - symbol_lmk, K); - } - } - - // add gps factors - graph_.emplace_shared>( - gtsam::Symbol('x', 0), id_to_initial_world_from_cam.at(0), prior_pose_noise); - initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), id_to_initial_world_from_cam.at(0)); - symbols_pose.push_back(gtsam::Symbol('x', 0)); - symbols_poses_values_iter.emplace( - gtsam::Symbol('x', 0), std::vector{id_to_initial_world_from_cam.at(0)}); - for (const auto &[frame_id, estimate_world_from_cam] : id_to_initial_world_from_cam) { - if (frame_id == 0) { - continue; - } - const gtsam::Symbol key_cam('x', frame_id); - symbols_pose.push_back(key_cam); - gtsam::Point3 p_cam_in_world = estimate_world_from_cam.translation(); - graph_.emplace_shared(key_cam, p_cam_in_world, gps_noise); - initial_estimate_.insert_or_assign(key_cam, estimate_world_from_cam); - symbols_poses_values_iter.emplace(key_cam, - std::vector{estimate_world_from_cam}); - // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_cam_in_world)); - } - - // detail_sfm::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks); - std::cout << "heart beat 3" << std::endl; - - constexpr bool local_optimizations = false; - if (local_optimizations) { - // TODO: do local optimizations between groups of any n nubmer of cameras with >= x number - // of matches - // do local optimizations and add to iter cache - // const int window = 2; - std::cout << "length of cam_poses: " << id_to_initial_world_from_cam.size() << std::endl; - for (const auto &[frame_id, world_from_cam] : id_to_initial_world_from_cam) { - std::cout << "id: " << frame_id << "\tpose: " << world_from_cam << std::endl; - } - for (size_t i = 0; i < indices.size() - 1; i++) { - std::cout << "Local optimization " << i << std::endl; - gtsam::Values local_estimate_; - gtsam::NonlinearFactorGraph local_graph_; - - std::vector symbols_poses{gtsam::Symbol('x', i), - gtsam::Symbol('x', i + 1)}; - std::vector symbols_lmks; - - local_graph_.emplace_shared>( - gtsam::PriorFactor( - symbols_poses[0], id_to_initial_world_from_cam.at(i), prior_pose_noise)); - std::cout << "fuck" << std::endl; - local_graph_.emplace_shared>( - gtsam::PriorFactor( - symbols_poses[1], id_to_initial_world_from_cam.at(i + 1), prior_pose_noise)); - std::cout << "mega fuck" << std::endl; - local_estimate_.insert_or_assign(symbols_poses[0], id_to_initial_world_from_cam.at(i)); - local_estimate_.insert_or_assign(symbols_poses[1], - id_to_initial_world_from_cam.at(i + 1)); - - std::vector matches = - frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors()); - // DIAL TO MESS WITH - frontend.enforce_bijective_matches(matches); - std::vector world_from_cams{id_to_initial_world_from_cam.at(i), - id_to_initial_world_from_cam.at(i + 1)}; - - std::vector viz_world_from_cams{ - {Eigen::Isometry3d(world_from_cams[0].matrix()), "cam 0"}, - {Eigen::Isometry3d(world_from_cams[1].matrix()), "cam 1"}}; - std::vector viz_lmks; - for (const cv::DMatch match : matches) { - std::vector feat_kpts; - const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx]; - feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y); - feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y); - - const std::pair key_lmk_id = - std::make_pair(frames[i].id_, kpt_cam0); - if (lmk_id_map.find(key_lmk_id) != lmk_id_map.end()) { - LandmarkId match_lmk_id = lmk_id_map.at(key_lmk_id); - if (lmk_triangulated_map_filtered.find(match_lmk_id) == - lmk_triangulated_map_filtered.end()) { - continue; - } - std::cout << "good" << std::endl; - // do nothing - // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0); - // feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1); - } else { - std::cerr << "ERROR: this shouldn't happen right?" << std::endl; - FeatureTrack feature_track(frames[i].id_, kpt_cam0); - feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); - feature_tracks.push_back(feature_track); - lmk_id_map.emplace(key_lmk_id, lmk_id); - lmk_id++; - } - std::cout << "oog" << std::endl; - const gtsam::Symbol symbol_lmk = gtsam::Symbol('l', lmk_id_map.at(key_lmk_id)); - // if (gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))) != - // gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam1)))) { - // std::cerr << "UH OH" << std::endl; - // } else { - // std::cout << "cool" << std::endl; - // } - symbols_lmks.push_back(symbol_lmk); - local_graph_ - .emplace_shared>( - feat_kpts[0], landmark_noise, symbols_poses[0], symbol_lmk, K); - local_graph_ - .emplace_shared>( - feat_kpts[1], landmark_noise, symbols_poses[1], symbol_lmk, K); - - std::optional kpt_in_world = - detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K); - // gtsam::Point3 kpt_in_world; - // bool triangulate_success = - // detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world); - if (kpt_in_world) { - local_estimate_.insert_or_assign(symbol_lmk, *kpt_in_world); - viz_lmks.emplace_back(*kpt_in_world); - if (symbols_landmarks_values_iter.find(symbol_lmk) != - symbols_landmarks_values_iter.end()) { - symbols_landmarks_values_iter[symbol_lmk].push_back(*kpt_in_world); - } else { - symbols_landmarks_values_iter.emplace( - symbol_lmk, std::vector{*kpt_in_world}); - } - } - } - std::cout << "setup complete!" << std::endl; - if (visualize) - robot::visualization::viz_scene(viz_world_from_cams, viz_lmks, - cv::viz::Color::brown(), true, true, - "Local Optimization " + std::to_string(i)); - - const gtsam::Values symbols_result_local = detail_sfm::optimize_graph( - local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false); - - for (const gtsam::Symbol &symbol_pose : symbols_poses) { - const gtsam::Pose3 T_wrld_cam = symbols_result_local.at(symbol_pose); - symbols_poses_values_iter.at(symbol_pose).push_back(T_wrld_cam); - } - for (const gtsam::Symbol &symbol_lmk : symbols_lmks) { - const gtsam::Point3 p_wrld_lmk = symbols_result_local.at(symbol_lmk); - symbols_landmarks_values_iter.at(symbol_lmk).push_back(p_wrld_lmk); - } - } - - std::cout << "\nLocal Optimizations Complete!\n" << std::endl; - - for (std::pair> sym_pose : - symbols_poses_values_iter) { - initial_estimate_.insert_or_assign(sym_pose.first, - detail_sfm::averagePoses(sym_pose.second)); - } - for (std::pair> sym_lmk : - symbols_landmarks_values_iter) { - initial_estimate_.insert_or_assign(sym_lmk.first, - detail_sfm::averagePoints(sym_lmk.second)); - } - } - - // do global optimization - const gtsam::Values result = - detail_sfm::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks, 0); - - // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference - double sum_traj_error = 0; - double sum_rot_error = 0; - for (size_t i = 0; i < symbols_pose.size(); i++) { - const gtsam::Pose3 traj_pose = result.at(symbols_pose[i]); - sum_traj_error += std::pow( - (references_world_from_cam[i].translation() - traj_pose.translation()).norm(), 2); - sum_rot_error += detail_sfm::rotation_error(references_world_from_cam[i], - Eigen::Isometry3d(traj_pose.matrix())); - } - std::cout << "sum_rot_error: " << sum_rot_error << std::endl; - double rmse_ate = std::sqrt(sum_traj_error / symbols_pose.size()); - double rmse_rot = std::sqrt(sum_rot_error / symbols_pose.size()); - std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl; - - std::cout << "about to visualize result" << std::endl; - result.print(); - detail_sfm::graph_values(result, "Result", symbols_pose, std::vector()); - // detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks); -} \ No newline at end of file diff --git a/experimental/learn_descriptors/map.hh b/experimental/learn_descriptors/map.hh deleted file mode 100644 index a22c1e5c7..000000000 --- a/experimental/learn_descriptors/map.hh +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once -// #include "Eigen/" - -namespace robot::experimental::learn_descriptors {} \ No newline at end of file diff --git a/experimental/learn_descriptors/map.proto b/experimental/learn_descriptors/map.proto deleted file mode 100644 index 67d0c5db0..000000000 --- a/experimental/learn_descriptors/map.proto +++ /dev/null @@ -1,15 +0,0 @@ - -syntax = "proto3"; - -package robot.experimental.learn_descriptors; - -message Map { - message Quaternion { - // Represents a quaternion as a + bi + cj + dk - double a = 1; - double b = 2; - double c = 3; - double d = 4; - } - Quaternion quaternion = 1; -} diff --git a/experimental/learn_descriptors/refactor_sfm_mvp.cc b/experimental/learn_descriptors/refactor_sfm_mvp.cc deleted file mode 100644 index 054c8a197..000000000 --- a/experimental/learn_descriptors/refactor_sfm_mvp.cc +++ /dev/null @@ -1,389 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Eigen/Core" -#include "Eigen/Geometry" -#include "common/geometry/camera.hh" -#include "cxxopts.hpp" -#include "experimental/learn_descriptors/backend.hh" -#include "experimental/learn_descriptors/camera_calibration.hh" -#include "experimental/learn_descriptors/four_seasons_parser.hh" -#include "experimental/learn_descriptors/frame.hh" -#include "experimental/learn_descriptors/frontend.hh" -#include "experimental/learn_descriptors/frontend_definitions.hh" -#include "experimental/learn_descriptors/image_point_four_seasons.hh" -#include "experimental/learn_descriptors/structure_from_motion_types.hh" -#include "gtsam/geometry/Cal3_S2.h" -#include "gtsam/geometry/Point2.h" -#include "gtsam/geometry/Point3.h" -#include "gtsam/geometry/Pose3.h" -#include "gtsam/geometry/Rot3.h" -#include "gtsam/geometry/triangulation.h" -#include "gtsam/inference/Symbol.h" -#include "gtsam/linear/NoiseModel.h" -#include "gtsam/navigation/GPSFactor.h" -#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" -#include "gtsam/nonlinear/NonlinearFactorGraph.h" -#include "gtsam/nonlinear/Values.h" -#include "gtsam/slam/BetweenFactor.h" -#include "gtsam/slam/PriorFactor.h" -#include "gtsam/slam/ProjectionFactor.h" -#include "opencv2/opencv.hpp" -#include "visualization/opencv/opencv_viz.hh" - -std::optional attempt_triangulate(const std::vector &cam_poses, - const std::vector &cam_obs, - gtsam::Cal3_S2::shared_ptr K, - const double max_reproj_error = 2.0, - const bool verbose = true) { - gtsam::Point3 p_lmk_in_world; - if (cam_poses.size() > 2) { - try { - // Attempt triangulation using DLT (or the GTSAM provided method) - p_lmk_in_world = gtsam::triangulatePoint3( - cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end())); - - } catch (const std::exception &e) { - // Handle the exception gracefully by logging and retaining the previous - // estimate or discard - if (verbose) { - std::cerr << "[attempt_triangulate] failed. Likely cheirality exception: " - << e.what() << ". Discarding involved keypoints." << std::endl; - } - return std::nullopt; - } - } else { - return std::nullopt; - } - // Optional: perform an explicit cheirality check - for (const auto &pose : cam_poses) { - // Transform point to the camera coordinate system. - // transformTo() converts a world point to the camera frame. - gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world); - if (p_cam_lmk.z() <= 0) { // Check that the depth is positive - return std::nullopt; - } - } - - // Cheirality & reprojection checks - for (size_t i = 0; i < cam_poses.size(); ++i) { - const auto &pose = cam_poses[i]; - // Cheirality - gtsam::Point3 p_cam = pose.transformTo(p_lmk_in_world); - if (p_cam.z() <= 0) { - if (verbose) { - std::cerr << "[attempt_triangulate] point behind camera " << i - << " (z=" << p_cam.z() << ")\n"; - } - return std::nullopt; - } - // Reprojection error - if (max_reproj_error > 0) { - gtsam::PinholeCamera cam(pose, *K); - const auto reproj = cam.project(p_lmk_in_world); - const double err = (reproj - cam_obs[i]).norm(); - if (err > max_reproj_error) { - if (verbose) { - std::cerr << "[attempt_triangulate] reprojection error too large on view " << i - << " (" << err << " px)\n"; - } - return std::nullopt; - } - } - } - return p_lmk_in_world; -} - -void graph_values(const gtsam::Values &values, const std::string &window_name, - const std::vector &symbols_pose, - const std::vector &symbols_landmarks) { - std::vector final_poses; - std::vector final_lmks; - for (const gtsam::Symbol &symbol_pose : symbols_pose) { - final_poses.emplace_back(Eigen::Isometry3d(values.at(symbol_pose).matrix()), - symbol_pose.string()); - } - for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) { - if (!values.exists(symbol_lmk)) { - std::cout << "WTF " << symbol_lmk << std::endl; - } - final_lmks.emplace_back(values.at(symbol_lmk), symbol_lmk.string()); - } - std::cout << "About to viz gtsam::Values with " << values.size() << " variables." << std::endl; - robot::visualization::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true, - window_name); -} - -gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gtsam::Values &values, - const std::vector &symbols_pose, - const std::vector &symbols_landmarks, - const int num_epochs = 5, bool viz_itr = false) { - gtsam::LevenbergMarquardtParams params; - params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. - params.maxIterations = 1; - gtsam::LevenbergMarquardtOptimizer optimizer(graph, values, params); - - double prev_error = optimizer.error(); - typedef int epoch; - std::function &, - const std::vector &)> - graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter, - const std::vector &symbols_pose, - const std::vector &symbols_landmarks) { - std::cout << "iteration " << iter << " complete!"; - std::string window_name = "Iteration_" + std::to_string(iter); - graph_values(vals, window_name, symbols_pose, symbols_landmarks); - }; - - for (int i = 0; i < num_epochs; i++) { - optimizer.iterate(); - double curr_error = optimizer.error(); - - if (viz_itr) { - graph_itr_debug_func(optimizer.values(), i, symbols_pose, symbols_landmarks); - } - if (std::abs(prev_error - curr_error) < 1e-6) { - std::cout << "Converged at iteration " << i << "\n"; - break; - } - } - return optimizer.values(); -} - -double rotation_error(const Eigen::Isometry3d &T_est, const Eigen::Isometry3d &T_gt) { - Eigen::Matrix3d R_err = T_gt.rotation().transpose() * T_est.rotation(); - - // 2. Compute trace and clamp to [-1,1] for numerical safety - double tr = R_err.trace(); - double cos_theta = std::min(1.0, std::max(-1.0, (tr - 1.0) / 2.0)); - - // 3. Recover angle (in radians) - return std::acos(cos_theta); -} - -int main(int argc, const char **argv) { - using namespace robot::experimental::learn_descriptors; - // clang-format off - cxxopts::Options options("four_seasons_parser_example", "Demonstrate usage of four_seasons_parser"); - options.add_options() - ("data_dir", "Path to dataset root directory", cxxopts::value()) - ("calibration_dir", "Path to dataset calibration directory", cxxopts::value()) - ("help", "Print usage"); - // clang-format on - - auto args = options.parse(argc, argv); - - const auto check_required = [&](const std::string &opt) { - if (args.count(opt) == 0) { - std::cout << "Missing " << opt << " argument" << std::endl; - std::cout << options.help() << std::endl; - std::exit(1); - } - }; - - if (args.count("help")) { - std::cout << options.help() << std::endl; - return 0; - } - - check_required("data_dir"); - check_required("calibration_dir"); - - const std::filesystem::path path_data = args["data_dir"].as(); - const std::filesystem::path path_calibration = args["calibration_dir"].as(); - FourSeasonsParser parser(path_data, path_calibration); - - std::cout << "refactor_sfm_mvp!" << std::endl; - - FrontendParams params{FrontendParams::ExtractorType::SIFT, FrontendParams::MatcherType::KNN, - true, false}; - Frontend frontend(params); - - for (size_t i = 633; i < 646; i += 2) { - frontend.add_image( - ImageAndPoint{parser.load_image(i), - std::make_shared(parser.get_image_point(i))}); - const ImagePointFourSeasons img_pt = parser.get_image_point(i); - std::cout << img_pt.to_string() << std::endl; - } - frontend.populate_frames(); - frontend.match_frames_and_build_tracks(); - - // visualization - std::vector viz_poses_init; - std::optional first_grnd_trth; - for (const Frame &frame : frontend.frames()) { - if (frame.world_from_cam_groundtruth_) { - std::cout << "adding a ground truth frame to viz!" << std::endl; - Eigen::Isometry3d w_from_cam_grnd_trth(frame.world_from_cam_groundtruth_->matrix()); - if (!first_grnd_trth) first_grnd_trth = w_from_cam_grnd_trth; - w_from_cam_grnd_trth.translation() -= first_grnd_trth->translation(); - viz_poses_init.emplace_back(w_from_cam_grnd_trth, - "x_grnd_" + std::to_string(frame.id_)); - }; - } - - std::cout << "visualizing " << viz_poses_init.size() << " poses" << std::endl; - robot::visualization::viz_scene(viz_poses_init, std::vector(), - cv::viz::Color::brown(), true, true, - "Frontend initial guesses"); - - // ############# BACKEND ############### - constexpr bool use_grndtrth_only = false; - gtsam::Values initial_estimate_; - gtsam::NonlinearFactorGraph graph_; - - gtsam::noiseModel::Isotropic::shared_ptr landmark_noise = gtsam::noiseModel::Isotropic::Sigma( - 2, 1.0); // should probably mess with this, could also use essential matrix values to guide - // this potentially - gtsam::Vector6 prior_sigmas; - prior_sigmas << 0.04, 0.04, 0.09, 2.1, 2.1, 0.1; - gtsam::Vector3 gps_sigmas_fallback; - gps_sigmas_fallback << 1.5, 1.5, 1.5; - gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise = - gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas); - gtsam::noiseModel::Diagonal::shared_ptr gps_noise = - gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas_fallback); - - // add gps factors - std::vector &frames = frontend.frames(); - frames[0].world_from_cam_initial_guess_ = - frames[0].world_from_cam_groundtruth_->rotation(); // make sure first idx has grnd trth - - Backend::populate_rotation_estimate(frames); - std::vector symbols_pose; - std::unordered_map world_from_cam_initial_guess; - std::optional> interpolated_cam_in_w = - frontend.interpolated_initial_translations(); - ROBOT_CHECK(interpolated_cam_in_w, interpolated_cam_in_w); - - Eigen::Vector3d cam0_in_w = interpolated_cam_in_w->front(); - std::optional grndtrth_cam0_in_w; - std::vector idx_grndtrth_frame; - std::vector viz_pose; - auto noise_tight_prior = gtsam::noiseModel::Diagonal::Sigmas( - (gtsam::Vector(6) << 0, 0, 0, // rotation stdev in radians - 1e-6, 1e-6, 1e-6 // translation stdev in meters - ) - .finished()); - const gtsam::Pose3 w_from_cam0_init_estimate(*frames[0].world_from_cam_initial_guess_, - gtsam::Point3()); - const gtsam::Symbol symbol_cam0(Backend::symbol_char_pose, 0); - graph_.add(gtsam::PriorFactor( - symbol_cam0, w_from_cam0_init_estimate, - noise_tight_prior)); // currently assuming that we have groundtruth on the first pose - initial_estimate_.insert(symbol_cam0, w_from_cam0_init_estimate); - std::string str_pose0 = - std::string("x_") + (frames[0].world_from_cam_groundtruth_ ? "g_" : "") + - (!frames[0].world_from_cam_initial_guess_ ? "norot_" : "") + std::to_string(frames[0].id_); - viz_pose.emplace_back(Eigen::Isometry3d(w_from_cam0_init_estimate.matrix()), str_pose0); - symbols_pose.push_back(symbol_cam0); - for (size_t i = 1; i < frames.size(); i++) { - const Frame &frame = frames[i]; - std::optional world_from_cam_estimate; - if (use_grndtrth_only && !frame.world_from_cam_groundtruth_) { - continue; - } else if (use_grndtrth_only && frame.world_from_cam_groundtruth_) { - if (!grndtrth_cam0_in_w) - grndtrth_cam0_in_w = frame.world_from_cam_groundtruth_->translation(); - world_from_cam_estimate = - gtsam::Pose3(frame.world_from_cam_groundtruth_->rotation(), - gtsam::Point3(frame.world_from_cam_groundtruth_->translation() - - *grndtrth_cam0_in_w)); - idx_grndtrth_frame.push_back(i); - std::cout << "populating groundtruth pose: " << world_from_cam_estimate->translation() - << std::endl; - } else { - world_from_cam_estimate = gtsam::Pose3(frame.world_from_cam_initial_guess_ - ? *frame.world_from_cam_initial_guess_ - : gtsam::Rot3::Identity(), - (*interpolated_cam_in_w)[i] - cam0_in_w); - } - std::cout << "world_from_cam_estimate_" << i << ": " << *world_from_cam_estimate - << std::endl; - world_from_cam_initial_guess[frame.id_] = *world_from_cam_estimate; - const gtsam::Symbol cam_symbol(Backend::symbol_char_pose, frame.id_); - initial_estimate_.insert(cam_symbol, *world_from_cam_estimate); - if (frame.cam_in_world_initial_guess_) { - if (frame.translation_covariance_in_cam_) { - std::cout << "translation covariance: " << *frame.translation_covariance_in_cam_ - << std::endl; - } - gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas( - frame.translation_covariance_in_cam_ - ? gtsam::Vector3(std::sqrt((*frame.translation_covariance_in_cam_)(0, 0)), - std::sqrt((*frame.translation_covariance_in_cam_)(1, 1)), - std::sqrt((*frame.translation_covariance_in_cam_)(2, 2))) - : gps_sigmas_fallback); - graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise)); - std::cout << "adding gps factor!" << std::endl; - } - std::string str_pose = std::string("x_") + (frame.world_from_cam_groundtruth_ ? "g_" : "") + - (!frame.world_from_cam_initial_guess_ ? "norot_" : "") + - std::to_string(frame.id_); - viz_pose.emplace_back(Eigen::Isometry3d(world_from_cam_estimate->matrix()), str_pose); - symbols_pose.push_back(cam_symbol); - } - - // add landmarks to graph - std::vector symbols_lmks; - std::vector viz_pts; - const FeatureTracks feature_tracks = frontend.feature_tracks(); - for (size_t i = 0; i < feature_tracks.size(); i++) { - std::vector world_from_lmk_cams; - std::vector lmk_observations; - for (const auto &[frame_id, keypoint_cv] : feature_tracks[i].obs_) { - world_from_lmk_cams.push_back(world_from_cam_initial_guess[frame_id]); - lmk_observations.emplace_back(keypoint_cv.x, keypoint_cv.y); - } - std::optional landmark_estimate = - attempt_triangulate(world_from_lmk_cams, lmk_observations, frames[0].K_, 2.0, - false); // all K are the same for now... - if (landmark_estimate) { - const gtsam::Symbol lmk_symbol(Backend::symbol_char_landmark, i); - for (const auto &[frame_id, keypoint_cv] : feature_tracks[i].obs_) { - graph_.add(gtsam::GenericProjectionFactor( - gtsam::Point2(keypoint_cv.x, keypoint_cv.y), landmark_noise, - gtsam::Symbol(Backend::symbol_char_pose, frame_id), lmk_symbol, frames[0].K_)); - } - initial_estimate_.insert(lmk_symbol, *landmark_estimate); - symbols_lmks.push_back(lmk_symbol); - viz_pts.emplace_back(*landmark_estimate, "lmk_" + std::to_string(i)); - } - } - robot::visualization::viz_scene(viz_pose, viz_pts, cv::viz::Color::brown(), true, true, - "Initial guesses in backend"); - - // do global optimization - const gtsam::Values result = - optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_lmks, 10); - - // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference - double sum_traj_error = 0; - double sum_rot_error = 0; - for (const size_t i_grndtrth_frame : idx_grndtrth_frame) { - const Frame &frame = frames[i_grndtrth_frame]; - const gtsam::Pose3 traj_pose = - result.at(gtsam::Symbol(Backend::symbol_char_pose, frame.id_)); - sum_traj_error += std::pow( - (frame.world_from_cam_groundtruth_->translation() - traj_pose.translation()).norm(), 2); - sum_rot_error += - rotation_error(Eigen::Isometry3d(frame.world_from_cam_groundtruth_->matrix()), - Eigen::Isometry3d(traj_pose.matrix())); - } - std::cout << "sum_rot_error: " << sum_rot_error << std::endl; - double rmse_ate = std::sqrt(sum_traj_error / symbols_pose.size()); - double rmse_rot = std::sqrt(sum_rot_error / symbols_pose.size()); - std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl; - - std::cout << "about to visualize result" << std::endl; - graph_values(result, "Result", symbols_pose, symbols_lmks); -} \ No newline at end of file diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc deleted file mode 100644 index 9771f2a29..000000000 --- a/experimental/learn_descriptors/sfm_mvp.cc +++ /dev/null @@ -1,685 +0,0 @@ -#include -#include - -#include "Eigen/Dense" -#include "boost/make_shared.hpp" -#include "experimental/learn_descriptors/frame.hh" -#include "experimental/learn_descriptors/frontend.hh" -#include "experimental/learn_descriptors/frontend_definitions.hh" -#include "experimental/learn_descriptors/structure_from_motion_types.hh" -#include "experimental/learn_descriptors/symphony_lake_parser.hh" -#include "gtest/gtest.h" -#include "gtsam/geometry/Cal3_S2.h" -#include "gtsam/geometry/Point2.h" -#include "gtsam/geometry/Point3.h" -#include "gtsam/geometry/Pose3.h" -#include "gtsam/geometry/Rot3.h" -#include "gtsam/geometry/triangulation.h" -#include "gtsam/inference/Symbol.h" -#include "gtsam/linear/NoiseModel.h" -#include "gtsam/navigation/GPSFactor.h" -#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h" -#include "gtsam/nonlinear/NonlinearFactorGraph.h" -#include "gtsam/nonlinear/Values.h" -#include "gtsam/slam/BetweenFactor.h" -#include "gtsam/slam/PriorFactor.h" -#include "gtsam/slam/ProjectionFactor.h" -#include "opencv2/opencv.hpp" -#include "visualization/opencv/opencv_viz.hh" - -namespace std { -template <> -struct hash { - size_t operator()(const gtsam::Symbol &s) const { return hash()(s.key()); } -}; -} // namespace std - -namespace robot::experimental::learn_descriptors { -class SFMMvpHelper { - public: - // struct SymbolHasher { - // size_t operator()(const gtsam::Symbol &s) const { return std::hash()(s.key()); - // } - // }; - - static bool attempt_triangulate(const std::vector &cam_poses, - const std::vector &cam_obs, - gtsam::Cal3_S2::shared_ptr K, - gtsam::Point3 &out_p_world_landmark) { - bool valid = true; - if (cam_poses.size() >= 2) { - try { - // Attempt triangulation using DLT (or the GTSAM provided method) - gtsam::Point3 p_world_lmk = gtsam::triangulatePoint3( - cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end())); - - out_p_world_landmark = p_world_lmk; - - // Optional: perform an explicit cheirality check - for (const auto &pose : cam_poses) { - // Transform point to the camera coordinate system. - // transformTo() converts a world point to the camera frame. - gtsam::Point3 p_cam_lmk = pose.transformTo(p_world_lmk); - if (p_cam_lmk.z() <= 0) { // Check that the depth is positive - valid = false; - break; - } - } - } catch (const gtsam::TriangulationCheiralityException &e) { - // Handle the exception gracefully by logging and retaining the previous - // estimate. - std::cerr << "Triangulation Cheirality Exception: " << e.what() - << ". Keeping initial landmark estimate." << std::endl; - } - } else { - valid = false; - } - return valid; - } - - static void graph_values(const gtsam::Values &values, const std::string &window_name, - const std::vector &symbols_pose, - const std::vector &symbols_landmarks) { - std::vector final_poses; - std::vector final_lmks; - for (const gtsam::Symbol &symbol_pose : symbols_pose) { - final_poses.emplace_back(values.at(symbol_pose).matrix()); - } - for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) { - if (!values.exists(symbol_lmk)) { - std::cout << "WTF " << symbol_lmk << std::endl; - } - final_lmks.emplace_back(values.at(symbol_lmk)); - } - geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::black(), true, true, - window_name); - } - - static gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, - const gtsam::Values &values, - const std::vector &symbols_pose, - const std::vector &symbols_landmarks, - bool viz_itr = false) { - gtsam::LevenbergMarquardtParams params; - params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. - params.maxIterations = 1; - gtsam::LevenbergMarquardtOptimizer optimizer(graph, values, params); - - double prev_error = optimizer.error(); - const int num_steps = 5; - typedef int epoch; - std::function &, - const std::vector &)> - graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter, - const std::vector &symbols_pose, - const std::vector &symbols_landmarks) { - std::cout << "iteration " << iter << " complete!"; - std::string window_name = "Iteration_" + std::to_string(iter); - SFMMvpHelper::graph_values(vals, window_name, symbols_pose, symbols_landmarks); - }; - - for (int i = 0; i < num_steps; i++) { - optimizer.iterate(); - double curr_error = optimizer.error(); - - if (viz_itr) { - graph_itr_debug_func(optimizer.values(), i, symbols_pose, symbols_landmarks); - } - if (std::abs(prev_error - curr_error) < 1e-6) { - std::cout << "Converged at iteration " << i << "\n"; - break; - } - } - return optimizer.values(); - } - - static gtsam::Pose3 averagePoses(const std::vector &poses, int maxIter = 10) { - if (poses.empty()) throw std::runtime_error("Empty pose vector"); - - gtsam::Pose3 mean = poses[0]; - - for (int iter = 0; iter < maxIter; ++iter) { - gtsam::Vector6 total = gtsam::Vector6::Zero(); - - for (const auto &pose : poses) { - gtsam::Pose3 delta = mean.between(pose); - total += gtsam::Pose3::Logmap(delta); - } - - total /= poses.size(); - mean = mean.compose(gtsam::Pose3::Expmap(total)); - } - - return mean; - } - - static gtsam::Point3 averagePoints(const std::vector &points) { - if (points.empty()) throw std::runtime_error("Empty point vector"); - gtsam::Point3 sum(0, 0, 0); - for (const auto &pt : points) sum += pt; - return sum / points.size(); - } - - static gtsam::Point3 get_variance(const std::vector &points) { - const gtsam::Point3 mean = SFMMvpHelper::averagePoints(points); - gtsam::Point3 var(0, 0, 0); - for (const gtsam::Point3 &pt : points) { - var += (mean - pt).array().square().matrix(); - } - return var / points.size(); - } -}; - -TEST(SFMMvp, sfm_building_manual_global) { - const std::vector indices{60, 80, 100, 120, 140}; - DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); - const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); - const symphony_lake_dataset::Survey &survey = survey_vector.get(0); - const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front()); - - // const size_t img_width = img_pt_first.width, img_height = - // img_pt_first.height; - const double fx = img_pt_first.fx, fy = img_pt_first.fy; - const double cx = img_pt_first.cx, cy = img_pt_first.cy; - gtsam::Cal3_S2::shared_ptr K = boost::make_shared(fx, fy, 0, cx, cy); - Eigen::Matrix D = - (Eigen::Matrix() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2, - SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3) - .finished(); - cv::Mat K_mat = (cv::Mat_(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1); - cv::Mat D_mat = (cv::Mat_(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0)); - - // let world be the first boat base recorded. T_world_camera0 = earth_from_boat0 - // * boat_from_camera - Eigen::Isometry3d earth_from_boat0 = DataParser::get_world_from_boat(img_pt_first); - Eigen::Isometry3d world_from_boat0; - world_from_boat0.linear() = earth_from_boat0.linear(); - Eigen::Isometry3d T_world_camera0 = - world_from_boat0 * DataParser::get_boat_from_camera(img_pt_first); - // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, - // gtsam::Pose3(T_world_camera0.matrix())); - Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN); - - gtsam::Values initial_estimate_; - gtsam::NonlinearFactorGraph graph_; - - gtsam::noiseModel::Isotropic::shared_ptr landmark_noise = - gtsam::noiseModel::Isotropic::Sigma(2, 1.0); - gtsam::Vector6 prior_sigmas; - prior_sigmas << 0.04, 0.04, 0.09, 0.01, 0.01, 0.01; - gtsam::Vector3 gps_sigmas; - gps_sigmas << 2.1, 2.1, 0.1; - gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise = - gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas); - gtsam::noiseModel::Diagonal::shared_ptr gps_noise = - gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas); - - // populate frames and cam_poses - std::vector frames; - FrameId id = 0; - std::unordered_map cam_pose; - std::vector cam_isometries; - for (const int &idx : indices) { - const cv::Mat img = survey.loadImageByImageIndex(idx); - cv::Mat img_undistorted; - cv::undistort(img, img_undistorted, K_mat, D_mat); - const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); - - Eigen::Isometry3d world_from_boat = DataParser::get_world_from_boat(img_pt); - world_from_boat.translation() -= earth_from_boat0.translation(); - Eigen::Isometry3d T_world_cam = world_from_boat * DataParser::get_boat_from_camera(img_pt); - gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix()); - cam_pose.emplace(id, T_world_cam_gtsam); - cam_isometries.push_back(T_world_cam); - - // graph_.emplace_shared>(gtsam::Symbol('x', id), - // T_world_cam_gtsam, pose_noise); - - Frame frame{id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix())}; - - std::pair, cv::Mat> kpts_descs = - frontend.extract_features(img_undistorted); - KeypointsCV kpts; - for (const cv::KeyPoint &kpt : kpts_descs.first) { - kpts.push_back(kpt.pt); - } - frame.add_keypoints(kpts); - frame.assign_descriptors(kpts_descs.second); - - frames.push_back(frame); - - id++; - } - - geometry::viz_scene(cam_isometries, std::vector()); - - // populate feature_tracks and lmk_id_map - FeatureTracks feature_tracks; - FrameLandmarkIdMap lmk_id_map; - LandmarkId lmk_id = 0; - for (size_t i = 0; i < indices.size() - 1; i++) { - std::vector matches = - frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors()); - // frontend.enforce_bijective_matches(matches); - frontend.enforce_bijective_buffer_matches(matches); - for (const cv::DMatch match : matches) { - const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx]; - - auto key = std::make_pair(i, kpt_cam0); - if (lmk_id_map.find(key) != lmk_id_map.end()) { - auto id = lmk_id_map.at(key); - feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0); - feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1); - } else { - FeatureTrack feature_track(i, kpt_cam0); - feature_track.obs_.emplace_back(i + 1, kpt_cam1); - feature_tracks.emplace(lmk_id, feature_track); - lmk_id_map.emplace(std::make_pair(i, kpt_cam0), lmk_id); - lmk_id++; - } - } - } - - // populate graph - std::vector symbols_pose; - std::vector symbols_landmarks; - for (const std::pair lmk_feat : feature_tracks) { - std::vector feat_cam_poses; - std::vector feat_kpts; - LandmarkId lmk_id = lmk_feat.first; - FeatureTrack feature_track = lmk_feat.second; - const gtsam::Symbol symbol_lmk('l', lmk_id); - symbols_landmarks.push_back(symbol_lmk); - for (const std::pair &feat_track : feature_track.obs_) { - initial_estimate_.insert_or_assign(symbol_lmk, gtsam::Point3(0, 0, 0)); - graph_.emplace_shared>( - gtsam::Point2(feat_track.second.x, feat_track.second.y), landmark_noise, - gtsam::Symbol('x', feat_track.first), symbol_lmk, K); - - feat_cam_poses.push_back(cam_pose[feat_track.first]); - feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y)); - } - gtsam::Point3 p_wrld_kpt; - bool triangulate_success = - SFMMvpHelper::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt); - if (triangulate_success) { - initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt); - } - } - graph_.emplace_shared>(gtsam::Symbol('x', 0), cam_pose.at(0), - prior_pose_noise); - initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), cam_pose.at(0)); - symbols_pose.push_back(gtsam::Symbol('x', 0)); - for (const std::pair cam_id_pose : cam_pose) { - if (cam_id_pose.first == 0) { - continue; - } - const gtsam::Symbol key_cam('x', cam_id_pose.first); - symbols_pose.push_back(key_cam); - gtsam::Point3 p_wrld_cam = cam_id_pose.second.translation(); - graph_.emplace_shared(key_cam, p_wrld_cam, gps_noise); - initial_estimate_.insert_or_assign(key_cam, cam_id_pose.second); - // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_wrld_cam)); - } - - // initial_estimate_.print("huh"); - SFMMvpHelper::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks); - - gtsam::LevenbergMarquardtParams params; - params.setVerbosityLM("SUMMARY"); // or "TERMINATION", "TRYLAMBDA", etc. - params.maxIterations = 1; // We'll manually step it - gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, params); - - double prev_error = optimizer.error(); - const int num_steps = 5; - const bool viz_itr = true; - typedef int epoch; - std::function &, - const std::vector &)> - graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter, - const std::vector &symbols_pose, - const std::vector &symbols_landmarks) { - std::cout << "iteration " << iter << " complete!"; - std::string window_name = "Iteration_" + std::to_string(iter); - SFMMvpHelper::graph_values(vals, window_name, symbols_pose, symbols_landmarks); - }; - - SFMMvpHelper::graph_values(initial_estimate_, "Initial Graph", symbols_pose, symbols_landmarks); - for (int i = 0; i < num_steps; i++) { - optimizer.iterate(); - double curr_error = optimizer.error(); - - if (viz_itr) { - graph_itr_debug_func(optimizer.values(), i, symbols_pose, symbols_landmarks); - } - if (std::abs(prev_error - curr_error) < 1e-6) { - std::cout << "Converged at iteration " << i << "\n"; - break; - } - } - const gtsam::Values result = optimizer.values(); -} - -TEST(SFMMvp, sfm_building_manual_incremental) { - const std::vector indices{60, 80, 100, 120, 140}; - // const std::vector indices = []() { - // std::vector tmp; - // for (int i = 0; i < 200; i += 10) { - // tmp.push_back(i); - // } - // return tmp; - // }(); - DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); - const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); - const symphony_lake_dataset::Survey &survey = survey_vector.get(0); - const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front()); - - // const size_t img_width = img_pt_first.width, img_height = - // img_pt_first.height; - const double fx = img_pt_first.fx, fy = img_pt_first.fy; - const double cx = img_pt_first.cx, cy = img_pt_first.cy; - gtsam::Cal3_S2::shared_ptr K = boost::make_shared(fx, fy, 0, cx, cy); - Eigen::Matrix D = - (Eigen::Matrix() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2, - SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3) - .finished(); - cv::Mat K_mat = (cv::Mat_(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1); - cv::Mat D_mat = (cv::Mat_(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0)); - - // let world be the first boat base recorded. T_world_camera0 = earth_from_boat0 - Eigen::Isometry3d earth_from_boat0 = DataParser::get_world_from_boat(img_pt_first); - Eigen::Isometry3d world_from_boat0; - world_from_boat0.linear() = earth_from_boat0.linear(); - Eigen::Isometry3d T_world_camera0 = - world_from_boat0 * DataParser::get_boat_from_camera(img_pt_first); - // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, - // gtsam::Pose3(T_world_camera0.matrix())); - Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN); - - gtsam::Values initial_estimate_; - gtsam::NonlinearFactorGraph graph_; - - gtsam::noiseModel::Isotropic::shared_ptr landmark_noise = - gtsam::noiseModel::Isotropic::Sigma(2, 1.0); - gtsam::Vector6 prior_sigmas; - prior_sigmas << 0.04, 0.04, 0.09, 2.1, 2.1, 0.1; - gtsam::Vector3 gps_sigmas; - gps_sigmas << 2.1, 2.1, 0.1; - gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise = - gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas); - gtsam::noiseModel::Diagonal::shared_ptr gps_noise = - gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas); - - // populate frames and cam_poses - std::vector frames; - FrameId id = 0; - std::unordered_map cam_pose; - std::vector cam_isometries; - for (const int &idx : indices) { - const cv::Mat img = survey.loadImageByImageIndex(idx); - cv::Mat img_undistorted; - cv::undistort(img, img_undistorted, K_mat, D_mat); - const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); - - Eigen::Isometry3d world_from_boat = DataParser::get_world_from_boat(img_pt); - world_from_boat.translation() -= earth_from_boat0.translation(); - Eigen::Isometry3d T_world_cam = world_from_boat * DataParser::get_boat_from_camera(img_pt); - gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix()); - cam_pose.emplace(id, T_world_cam_gtsam); - cam_isometries.push_back(T_world_cam); - - Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix())); - - std::pair, cv::Mat> kpts_descs = - frontend.extract_features(img_undistorted); - KeypointsCV kpts; - for (const cv::KeyPoint &kpt : kpts_descs.first) { - kpts.push_back(kpt.pt); - } - frame.add_keypoints(kpts); - frame.assign_descriptors(kpts_descs.second); - - frames.push_back(frame); - - id++; - } - - geometry::viz_scene(cam_isometries, std::vector()); - - // populate feature_tracks and lmk_id_map - FeatureTracks feature_tracks; - FrameLandmarkIdMap lmk_id_map; - LandmarkId lmk_id = 0; - for (size_t i = 0; i < indices.size() - 1; i++) { - std::vector matches = - frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors()); - frontend.enforce_bijective_buffer_matches(matches); - for (const cv::DMatch match : matches) { - const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx]; - - auto key = std::make_pair(frames[i].id_, kpt_cam0); - if (lmk_id_map.find(key) != lmk_id_map.end()) { - auto id = lmk_id_map.at(key); - feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0); - feature_tracks.at(id).obs_.emplace_back(frames[i + 1].id_, kpt_cam1); - } else { - FeatureTrack feature_track(i, kpt_cam0); - feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); - feature_tracks.emplace(lmk_id, feature_track); - lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id); - lmk_id++; - } - } - } - - // triangulate all of the points - - std::unordered_map lmk_triangulated_map; - std::vector triangulated_lmks; - for (const std::pair lmk_feat : feature_tracks) { - std::vector feat_cam_poses; - std::vector feat_kpts; - LandmarkId lmk_id = lmk_feat.first; - FeatureTrack feature_track = lmk_feat.second; - gtsam::Point3 p_wrld_kpt(0, 0, 0); - for (const std::pair &feat_track : feature_track.obs_) { - feat_cam_poses.push_back(cam_pose[feat_track.first]); - feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y)); - } - bool triangulate_success = - SFMMvpHelper::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt); - if (triangulate_success) { - lmk_triangulated_map.emplace(lmk_id, p_wrld_kpt); - triangulated_lmks.push_back(p_wrld_kpt); - } else { - continue; - } - } - // filter points - geometry::viz_scene(std::vector(), triangulated_lmks, - cv::viz::Color::black(), true, true, "Unfiltered points"); - const gtsam::Point3 variance_pts = SFMMvpHelper::get_variance(triangulated_lmks); - const gtsam::Point3 std_dev_pts = variance_pts.array().sqrt().matrix(); - const gtsam::Point3 mean_pts = SFMMvpHelper::averagePoints(triangulated_lmks); - std::unordered_map lmk_triangulated_map_filtered; - std::vector filtered_points; - std::cout << "original var " << variance_pts << std::endl; - for (const std::pair lmk_id_pt : lmk_triangulated_map) { - const gtsam::Point3 dist_mean = (lmk_id_pt.second - mean_pts).array().abs().matrix(); - if ((dist_mean.array() <= (3 * std_dev_pts).array()).all()) { - lmk_triangulated_map_filtered.emplace(lmk_id_pt); - filtered_points.push_back(lmk_id_pt.second); - } - } - std::cout << "filtered variance " << SFMMvpHelper::get_variance(filtered_points) << std::endl; - geometry::viz_scene(std::vector(), filtered_points, cv::viz::Color::black(), - true, true, "Unfiltered points"); - - // add filtered points to graph - std::unordered_map> symbols_poses_values_iter; - std::unordered_map> symbols_landmarks_values_iter; - std::vector symbols_pose; - std::vector symbols_landmarks; - for (const std::pair lmk_id_pt : lmk_triangulated_map_filtered) { - LandmarkId lmk_id = lmk_id_pt.first; - const gtsam::Point3 p_wrld_kpt = lmk_id_pt.second; - FeatureTrack feature_track = feature_tracks.at(lmk_id); - const gtsam::Symbol symbol_lmk('l', lmk_id); - for (const std::pair &feat_track : feature_track.obs_) { - initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt); - symbols_landmarks_values_iter.emplace(symbol_lmk, - std::vector{p_wrld_kpt}); - graph_.emplace_shared>( - gtsam::Point2(feat_track.second.x, feat_track.second.y), landmark_noise, - gtsam::Symbol('x', feat_track.first), symbol_lmk, K); - } - } - graph_.emplace_shared>(gtsam::Symbol('x', 0), cam_pose.at(0), - prior_pose_noise); - initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), cam_pose.at(0)); - symbols_pose.push_back(gtsam::Symbol('x', 0)); - symbols_poses_values_iter.emplace(gtsam::Symbol('x', 0), - std::vector{cam_pose.at(0)}); - for (const std::pair cam_id_pose : cam_pose) { - if (cam_id_pose.first == 0) { - continue; - } - const gtsam::Symbol key_cam('x', cam_id_pose.first); - symbols_pose.push_back(key_cam); - gtsam::Point3 p_wrld_cam = cam_id_pose.second.translation(); - graph_.emplace_shared(key_cam, p_wrld_cam, gps_noise); - initial_estimate_.insert_or_assign(key_cam, cam_id_pose.second); - symbols_poses_values_iter.emplace(key_cam, std::vector{cam_id_pose.second}); - // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_wrld_cam)); - } - - // SFMMvpHelper::graph_values(initial_estimate_, "Confirmation", symbols_pose, - // symbols_landmarks); - - // do local optimizations and add to iter cache - // const int window = 2; - std::cout << "length of cam_poses: " << cam_pose.size() << std::endl; - for (const auto &id_pose : cam_pose) { - std::cout << "id: " << id_pose.first << "\tpose: " << id_pose.second << std::endl; - } - for (size_t i = 0; i < indices.size() - 1; i++) { - std::cout << "Local optimization " << i << std::endl; - gtsam::Values local_estimate_; - gtsam::NonlinearFactorGraph local_graph_; - - std::vector poses{gtsam::Symbol('x', i), gtsam::Symbol('x', i + 1)}; - std::vector lmks; - - local_graph_.emplace_shared>( - gtsam::PriorFactor(poses[0], cam_pose.at(i), prior_pose_noise)); - std::cout << "fuck" << std::endl; - local_graph_.emplace_shared>( - gtsam::PriorFactor(poses[1], cam_pose.at(i + 1), prior_pose_noise)); - std::cout << "mega fuck" << std::endl; - local_estimate_.insert_or_assign(poses[0], cam_pose.at(i)); - local_estimate_.insert_or_assign(poses[1], cam_pose.at(i + 1)); - - std::vector matches = - frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors()); - frontend.enforce_bijective_matches(matches); - std::vector feat_cam_poses{cam_pose.at(i), cam_pose.at(i + 1)}; - - std::vector viz_poses{Eigen::Isometry3d(feat_cam_poses[0].matrix()), - Eigen::Isometry3d(feat_cam_poses[1].matrix())}; - std::vector viz_lmks; - for (const cv::DMatch match : matches) { - std::vector feat_kpts; - const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx]; - const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx]; - feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y); - feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y); - - auto key = std::make_pair(frames[i].id_, kpt_cam0); - if (lmk_id_map.find(key) != lmk_id_map.end()) { - auto id = lmk_id_map.at(key); - if (lmk_triangulated_map_filtered.find(id) == lmk_triangulated_map_filtered.end()) { - continue; - } - std::cout << "good" << std::endl; - // do nothing - // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0); - // feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1); - } else { - std::cerr << "this shouldn't happen right?" << std::endl; - FeatureTrack feature_track(frames[i].id_, kpt_cam0); - feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1); - feature_tracks.emplace(lmk_id, feature_track); - lmk_id_map.emplace(key, lmk_id); - lmk_id++; - } - std::cout << "oog" << std::endl; - const gtsam::Symbol symbol_lmk = - gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))); - // if (gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))) != - // gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam1)))) { - // std::cerr << "UH OH" << std::endl; - // } else { - // std::cout << "cool" << std::endl; - // } - lmks.push_back(symbol_lmk); - local_graph_ - .emplace_shared>( - feat_kpts[0], landmark_noise, poses[0], symbol_lmk, K); - local_graph_ - .emplace_shared>( - feat_kpts[1], landmark_noise, poses[1], symbol_lmk, K); - - gtsam::Point3 p_wrld_kpt; - bool triangulate_success = - SFMMvpHelper::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt); - if (triangulate_success) { - local_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt); - viz_lmks.push_back(p_wrld_kpt); - } - if (symbols_landmarks_values_iter.find(symbol_lmk) != - symbols_landmarks_values_iter.end()) { - symbols_landmarks_values_iter[symbol_lmk].push_back(p_wrld_kpt); - } else { - symbols_landmarks_values_iter.emplace(symbol_lmk, - std::vector{p_wrld_kpt}); - } - } - std::cout << "setup complete!" << std::endl; - // geometry::viz_scene(viz_poses, viz_lmks, true, true, - // "Local Optimization " + std::to_string(i)); - - const gtsam::Values symbols_result_local = SFMMvpHelper::optimize_graph( - local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false); - - for (const gtsam::Symbol &symbol_pose : poses) { - const gtsam::Pose3 T_wrld_cam = symbols_result_local.at(symbol_pose); - symbols_poses_values_iter.at(symbol_pose).push_back(T_wrld_cam); - } - for (const gtsam::Symbol &symbol_lmk : lmks) { - const gtsam::Point3 p_wrld_lmk = symbols_result_local.at(symbol_lmk); - symbols_landmarks_values_iter.at(symbol_lmk).push_back(p_wrld_lmk); - } - } - - std::cout << "\nLocal Optimizations Complete!\n" << std::endl; - - for (std::pair> sym_pose : symbols_poses_values_iter) { - initial_estimate_.insert_or_assign(sym_pose.first, - SFMMvpHelper::averagePoses(sym_pose.second)); - } - for (std::pair> sym_lmk : - symbols_landmarks_values_iter) { - initial_estimate_.insert_or_assign(sym_lmk.first, - SFMMvpHelper::averagePoints(sym_lmk.second)); - } - - // do global optimization - const gtsam::Values result = - SFMMvpHelper::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks); - std::cout << "about to visualize result" << std::endl; - SFMMvpHelper::graph_values(result, "Result", symbols_pose, symbols_landmarks); -} - -} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/structure_from_motion_example.cc b/experimental/learn_descriptors/structure_from_motion_example.cc index 95c45a650..a6989ca95 100644 --- a/experimental/learn_descriptors/structure_from_motion_example.cc +++ b/experimental/learn_descriptors/structure_from_motion_example.cc @@ -53,12 +53,10 @@ int main(int argc, const char **argv) { FrontendParams::MatcherType::KNN, true, false}; StructureFromMotion sfm(frontend_params); - // for (size_t i = 633; i < 1000; i += 1) { - // const ImagePointFourSeasons img_pt = parser.get_image_point(i); - // sfm.add_image_point(parser.load_image(i), - // std::make_shared(img_pt)); - // } - for (size_t i = 835; i < 900; i += 4) { + // short sequences (maybe about a doezen points that have decent overlap and aren't spaced too + // far apart on the order of maybe a meter) perform ok. many improvements needed for the overall + // system + for (size_t i = 835; i < 855; i += 2) { const ImagePointFourSeasons img_pt = parser.get_image_point(i); sfm.add_image_point(parser.load_image(i), std::make_shared(img_pt)); } From a17767f081be4775566b5d508fafa77e01c8df49 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Thu, 24 Jul 2025 12:47:13 -0400 Subject: [PATCH 41/45] bug fixes --- experimental/learn_descriptors/BUILD | 55 +------ .../learn_descriptors/backend_test.cc | 56 ------- .../learn_descriptors/four_seasons_parser.hh | 2 +- .../four_seasons_parser_example_viz.cc | 9 +- .../learn_descriptors/frontend_test.cc | 43 +++--- .../structure_from_motion_test.cc | 140 ------------------ 6 files changed, 30 insertions(+), 275 deletions(-) delete mode 100644 experimental/learn_descriptors/backend_test.cc delete mode 100644 experimental/learn_descriptors/structure_from_motion_test.cc diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD index 6f710c081..3f1229b82 100644 --- a/experimental/learn_descriptors/BUILD +++ b/experimental/learn_descriptors/BUILD @@ -37,22 +37,6 @@ cc_library( ], ) -cc_test( - name = "structure_from_motion_test", - srcs = ["structure_from_motion_test.cc"], - copts = [ - "-Wno-error=unused-parameter", - ], - deps = [ - ":frame", - ":structure_from_motion", - ":symphony_lake_parser", - "//common/geometry:translate_types", - "//visualization/opencv:opencv_viz", - "@com_google_googletest//:gtest_main", - ], -) - cc_library( name = "symphony_lake_parser", srcs = ["symphony_lake_parser.cc"], @@ -166,20 +150,8 @@ cc_library( deps = [ ":camera_calibration", ":gps_data", - "//common/liegroups:se3", "@com_google_absl//absl/strings:str_format", - "@geographiclib", "@nmea", - ], - "@eigen", - "@opencv//:opencv", - "//common/liegroups:se3", - "//common/time:robot_time", - "@com_google_absl//absl/strings:str_format", - ":image_point", - "@nmea", - "@geographiclib", - "//common:check" ] ) @@ -262,16 +234,6 @@ cc_library( ], ) -cc_library( - name = "feature_set", - hdrs = ["feature_set.hh"], - visibility = ["//visibility:public"], - deps = [ - "@gtsam", - "@opencv", - ], -) - cc_library( name = "spatial_test_scene", srcs = ["spatial_test_scene.cc"], @@ -332,6 +294,7 @@ cc_test( name = "frontend_test", srcs = ["frontend_test.cc"], deps = [ + ":frame", ":frontend", ":image_point", "@com_google_googletest//:gtest_main", @@ -359,22 +322,6 @@ cc_library( ], ) -cc_test( - name = "backend_test", - srcs = ["backend_test.cc"], - copts = [ - "-Wno-error=unused-parameter", - ], - deps = [ - ":backend", - ":feature_manager", - ":spatial_test_scene_cube", - "//visualization/opencv:opencv_viz", - "@com_google_googletest//:gtest_main", - "@opencv", - ], -) - cc_library( name = "camera_calibration", hdrs = ["camera_calibration.hh"], diff --git a/experimental/learn_descriptors/backend_test.cc b/experimental/learn_descriptors/backend_test.cc deleted file mode 100644 index 48fe2c450..000000000 --- a/experimental/learn_descriptors/backend_test.cc +++ /dev/null @@ -1,56 +0,0 @@ -#include "experimental/learn_descriptors/backend.hh" - -#include - -#include "experimental/learn_descriptors/feature_manager.hh" -#include "experimental/learn_descriptors/spatial_test_scene_cube.hh" -#include "gtest/gtest.h" -#include "visualization/opencv/opencv_viz.hh" - -namespace robot::experimental::learn_descriptors { -TEST(BackendTest, cube) { - // SpatialTestSceneCube test_scene(1.0f); - - const size_t img_width = 640; - const size_t img_height = 480; - const cv::Size img_size(640, 480); - const double fx = 500.0; - const double fy = fx; - const double cx = img_width / 2.0; - const double cy = img_height / 2.0; - gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy)); - - // Backend backend(std::make_shared(), *K); - // Backend backend(K); - // test_scene.add_rand_cameras_face_origin(7, 2.0, 6.0, *K); - - // std::vector> cameras = test_scene.cameras()(); - // std::vector camera_poses; - // camera_poses.reserve(cameras.size()); - // for (const gtsam::PinholeCamera &T_world_cam : cameras) { - // camera_poses.emplace_back(T_world_cam.pose().matrix()); - // } - - // backend.add_prior_factor(gtsam::Symbol(backend.symbol_char_cam_cal, 0), camera_poses.front(), - // backend.get_pose_noise()); - // for (size_t i = 1; i < camera_poses.size(); i++) { - // backend.add_factor_GPS(gtsam::Symbol(backend.symbol_char_cam_cal, i), - // camera_poses[i].translation(), backend.get_gps_noise()); - - // std::pair, cv::Size> cam_and_img_sizes[2] = { - // std::pair, cv::Size>(cameras[i - 1], img_size), - // std::pair, cv::Size>(cameras[i], img_size)}; - // std::vector> - // lmks = test_scene.get_corresponding_pixels(cam_and_img_sizes); - // for (const &std::pair < SpatialTestScene::ProjectedPoint, - // SpatialTestScene::ProjectedPoint >> lmk : lmks) { - // // // Backend::Landmark() - // } - // } - - // geometry::viz_scene(camera_poses, test_scene.points()); -} - -TEST(BackendTest, test_rotation_estimation) {} -} // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/four_seasons_parser.hh b/experimental/learn_descriptors/four_seasons_parser.hh index 2d188f1b3..4458215c8 100644 --- a/experimental/learn_descriptors/four_seasons_parser.hh +++ b/experimental/learn_descriptors/four_seasons_parser.hh @@ -17,7 +17,7 @@ class FourSeasonsParser : std::enable_shared_from_this { public: static constexpr double CAM_HZ = 30.0; static constexpr double CAM_CAP_DELTA_NS = 1e9 / CAM_HZ; - + FourSeasonsParser(const std::filesystem::path& root_dir, const std::filesystem::path& calibration_dir); std::shared_ptr get_shared() { return shared_from_this(); } diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc index 771ab7d77..ae9036342 100644 --- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc +++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc @@ -106,7 +106,7 @@ int main(int argc, const char** argv) { Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix())); viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i)); Eigen::Isometry3d frontend_w_from_cam_groundtruth( - frontend.frames()[i].world_from_cam_groundtruth_->matrix()); + frontend.frames()[i]->world_from_cam_groundtruth_->matrix()); ROBOT_CHECK(frontend_w_from_cam_groundtruth.matrix().isApprox(w_from_gnss_cam.matrix()), frontend_w_from_cam_groundtruth.matrix(), w_from_gnss_cam.matrix()); // Eigen::Isometry3d frontend_w_from_cam_groundtruth(*img_pt.world_from_cam_ground_truth()); @@ -139,8 +139,9 @@ int main(int argc, const char** argv) { altitude_gps_from_gnss_cam = gnss_cam_in_gcs.z() - *(img_pt.gps_gcs->altitude); } if (!first_gps_to_cam) { - first_gps_to_cam = frontend.frames()[i].world_from_cam_groundtruth_->translation() - - *frontend.frames()[i].cam_in_world_initial_guess_; + first_gps_to_cam = + frontend.frames()[i]->world_from_cam_groundtruth_->translation() - + *frontend.frames()[i]->cam_in_world_initial_guess_; } gcs_coordinate.z() += *altitude_gps_from_gnss_cam; std::cout << std::setprecision(20) << "gnss_cam_in_gcs: " << gnss_cam_in_gcs @@ -158,7 +159,7 @@ int main(int argc, const char** argv) { Eigen::Vector3d cam_gps_in_w = gps_in_w.head<3>() - cam_from_gps.translation(); viz_points.emplace_back(cam_gps_in_w, "x_cam_gps" + std::to_string(i)); viz_points.emplace_back( - *frontend.frames()[i].cam_in_world_initial_guess_ + *first_gps_to_cam, + *frontend.frames()[i]->cam_in_world_initial_guess_ + *first_gps_to_cam, "x_frontend_cam_gps" + std::to_string(i)); const Eigen::Vector3d gps_from_ref_in_world = gps_in_w.head<3>() - w_from_gnss_cam.translation(); diff --git a/experimental/learn_descriptors/frontend_test.cc b/experimental/learn_descriptors/frontend_test.cc index a04a443ca..c5046fd1f 100644 --- a/experimental/learn_descriptors/frontend_test.cc +++ b/experimental/learn_descriptors/frontend_test.cc @@ -7,6 +7,8 @@ #include #include "Eigen/Core" +#include "experimental/learn_descriptors/frame.hh" +#include "experimental/learn_descriptors/frontend.hh" #include "experimental/learn_descriptors/image_point.hh" #include "gtest/gtest.h" #include "opencv2/opencv.hpp" @@ -51,10 +53,10 @@ TEST(FrontendTest, pipeline_sweep) { cv::warpAffine(image_1, image_1, rotation_matrix, image_1.size()); cv::warpAffine(image_1, image_2, translation_mat, image_1.size()); - cv::Mat img_test_disp; - cv::hconcat(image_1, image_2, img_test_disp); - cv::imshow("Test", img_test_disp); - cv::waitKey(1000); + // cv::Mat img_test_disp; + // cv::hconcat(image_1, image_2, img_test_disp); + // cv::imshow("Test", img_test_disp); + // cv::waitKey(1000); FrontendParams::ExtractorType extractor_types[2] = {FrontendParams::ExtractorType::SIFT, FrontendParams::ExtractorType::ORB}; @@ -70,7 +72,7 @@ TEST(FrontendTest, pipeline_sweep) { std::vector matches; cv::Mat img_keypoints_out_1(height, width, CV_8UC3), img_keypoints_out_2(height, width, CV_8UC3), img_matches_out(height, 2 * width, CV_8UC3); - cv::Mat img_display_test; + // cv::Mat img_display_test; for (FrontendParams::ExtractorType extractor_type : extractor_types) { for (FrontendParams::MatcherType matcher_type : matcher_types) { printf("started frontend combination: (%d, %d)\n", static_cast(extractor_type), @@ -93,15 +95,15 @@ TEST(FrontendTest, pipeline_sweep) { img_keypoints_out_2); frontend.draw_matches(image_1, keypoints_descriptors_pair_1.first, image_2, keypoints_descriptors_pair_2.first, matches, img_matches_out); - cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test); - cv::vconcat(img_display_test, img_matches_out, img_display_test); - std::stringstream text; - text << "Extractor " << static_cast(extractor_type) << ", matcher " - << static_cast(matcher_type); - cv::putText(img_display_test, text.str(), cv::Point(20, height - 50), - cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2, cv::LINE_AA); - cv::imshow("Keypoints and Matches Output.", img_display_test); - std::cout << "Hold spacebar to pause." << std::endl; + // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test); + // cv::vconcat(img_display_test, img_matches_out, img_display_test); + // std::stringstream text; + // text << "Extractor " << static_cast(extractor_type) << ", matcher " + // << static_cast(matcher_type); + // cv::putText(img_display_test, text.str(), cv::Point(20, height - 50), + // cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2, cv::LINE_AA); + // cv::imshow("Keypoints and Matches Output.", img_display_test); + // std::cout << "Hold spacebar to pause." << std::endl; while (cv::waitKey(1000) == 32) { } printf("completed frontend combination: (%d, %d)\n", static_cast(extractor_type), @@ -182,14 +184,15 @@ TEST(FrontendTest, interpolate_frames) { time += dt; } frontend.populate_frames(); - std::optional> interpolated_pts = - frontend.interpolated_initial_translations(); - ROBOT_CHECK(interpolated_pts); - ROBOT_CHECK(interpolated_pts->size() == img_pts.size()); + frontend.interpolate_frames(); + + const std::vector &shared_frames = frontend.frames(); constexpr double TOL = 1e-5; for (size_t i = 0; i < pts_in_world.size(); i++) { - ROBOT_CHECK(pts_in_world[i].isApprox((*interpolated_pts)[i], TOL), i, pts_in_world[i], - (*interpolated_pts)[i]); + const Frame &frame = *shared_frames[i]; + ROBOT_CHECK(frame.cam_in_world_interpolated_guess_); + ROBOT_CHECK(pts_in_world[i].isApprox(*frame.cam_in_world_interpolated_guess_, TOL), i, + pts_in_world[i], *frame.cam_in_world_interpolated_guess_); } } } // namespace robot::experimental::learn_descriptors \ No newline at end of file diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc deleted file mode 100644 index 3063c2234..000000000 --- a/experimental/learn_descriptors/structure_from_motion_test.cc +++ /dev/null @@ -1,140 +0,0 @@ -#include "experimental/learn_descriptors/structure_from_motion.hh" - -#include -#include -#include - -#include "Eigen/Geometry" -#include "common/geometry/translate_types.hh" -#include "experimental/learn_descriptors/frame.hh" -#include "experimental/learn_descriptors/symphony_lake_parser.hh" -#include "gtest/gtest.h" -#include "visualization/opencv/opencv_viz.hh" - -class GtsamTestHelper { - public: - static bool pixel_in_range(Eigen::Vector2d pixel, size_t img_width, size_t img_height) { - return pixel[0] > 0 && pixel[0] < img_width && pixel[1] > 0 && pixel[1] < img_height; - } -}; - -namespace robot::experimental::learn_descriptors { -TEST(StructureFromMotiontest, sfm_snippet_small) { - const std::vector indices{120, 130}; // 0-199 - DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); - const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); - const symphony_lake_dataset::Survey &survey = survey_vector.get(0); - const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front()); - - // const size_t img_width = img_pt_first.width, img_height = img_pt_first.height; - const double fx = img_pt_first.fx, fy = img_pt_first.fy; - const double cx = img_pt_first.cx, cy = img_pt_first.cy; - gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); - Eigen::Matrix D = - (Eigen::Matrix() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2, - SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3) - .finished(); - - // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera - // T_earth_boat0 = - Eigen::Isometry3d T_earth_world = DataParser::get_world_from_boat(img_pt_first); - Eigen::Isometry3d T_world_camera0 = DataParser::get_boat_from_camera(img_pt_first); - StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, - gtsam::Pose3(T_world_camera0.matrix())); - - std::vector img_vector; - for (const int &idx : indices) { - const cv::Mat img = survey.loadImageByImageIndex(idx); - img_vector.push_back(img); - const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); - Eigen::Isometry3d T_earth_boat = DataParser::get_world_from_boat(img_pt); - Eigen::Isometry3d T_world_boat = T_earth_world.inverse() * T_earth_boat; - Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_boat_from_camera(img_pt); - - sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix())); - } - - const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values(); - - sfm.graph_values(initial_values, "initial_values"); - - std::cout << "Solving for structure!" << std::endl; - - sfm.solve_structure(); - - std::cout << "Solution complete." << std::endl; - - const gtsam::Values result_values = sfm.get_structure_result(); - sfm.graph_values(result_values, "result_values"); -} - -TEST(StructureFromMotiontest, sfm_building) { - // indices 0-199 - const std::vector indices = []() { - std::vector tmp; - for (int i = 0; i < 200; i += 10) { - tmp.push_back(i); - } - return tmp; - }(); - DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser(); - const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys(); - const symphony_lake_dataset::Survey &survey = survey_vector.get(0); - const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front()); - - // const size_t img_width = img_pt_first.width, img_height = img_pt_first.height; - const double fx = img_pt_first.fx, fy = img_pt_first.fy; - const double cx = img_pt_first.cx, cy = img_pt_first.cy; - gtsam::Cal3_S2 K(fx, fy, 0, cx, cy); - Eigen::Matrix D = - (Eigen::Matrix() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2, - SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3) - .finished(); - - // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera - Eigen::Isometry3d T_earth_boat0 = DataParser::get_world_from_boat(img_pt_first); - Eigen::Isometry3d T_world_boat0; - T_world_boat0.linear() = T_earth_boat0.linear(); - Eigen::Isometry3d T_world_camera0 = - T_world_boat0 * DataParser::get_boat_from_camera(img_pt_first); - StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, - gtsam::Pose3(T_world_camera0.matrix())); - - for (const int &idx : indices) { - const cv::Mat img = survey.loadImageByImageIndex(idx); - const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx); - - Eigen::Isometry3d T_world_boat = DataParser::get_world_from_boat(img_pt); - T_world_boat.translation() -= T_earth_boat0.translation(); - - Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_boat_from_camera(img_pt); - - sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix())); - } - - const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values(); - sfm.graph_values(initial_values, "initial values"); - - std::cout << "Solving for structure!" << std::endl; - - Backend::graph_step_debug_func solve_iter_debug_func = [&sfm](const gtsam::Values &vals, - const Backend::epoch iter) { - std::cout << "iteration " << iter << " complete!"; - std::string window_name = "Iteration_" + std::to_string(iter); - sfm.graph_values(vals, window_name); - }; - sfm.solve_structure(5, solve_iter_debug_func); - - std::cout << "Solution complete." << std::endl; - - const gtsam::Values result_values = sfm.get_structure_result(); - sfm.graph_values(result_values, "optimized values"); -} - -TEST(StructureFromMotiontest, random_test) { - std::cout << DataParser::T_boat_gps.matrix() << std::endl; - geometry::viz_scene( - std::vector{DataParser::T_boat_imu, DataParser::T_boat_gps}, - std::vector{DataParser::t_boat_cam}); -} -} // namespace robot::experimental::learn_descriptors From 04fc7bce78c7ee65b8c68f93a9ade7b4a13789a3 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Thu, 24 Jul 2025 13:53:42 -0400 Subject: [PATCH 42/45] fix test errors --- common/geometry/BUILD | 10 ---------- common/geometry/camera_test.cc | 6 ------ common/geometry/trajectory_evaluation.hh | 17 ----------------- 3 files changed, 33 deletions(-) delete mode 100644 common/geometry/trajectory_evaluation.hh diff --git a/common/geometry/BUILD b/common/geometry/BUILD index 6b11a2b02..2c750e8b0 100644 --- a/common/geometry/BUILD +++ b/common/geometry/BUILD @@ -66,13 +66,3 @@ cc_test( ":translate_types" ] ) - -cc_library( - name = "trajectory_evaluation", - hdrs = ["trajectory_evaluation.hh"], - srcs = ["trajectory_evaluation.cc"], - visibility = ["//visibility:public"], - deps = [ - "@eigen", - ] -) \ No newline at end of file diff --git a/common/geometry/camera_test.cc b/common/geometry/camera_test.cc index 4d8b810fa..04c7411f2 100644 --- a/common/geometry/camera_test.cc +++ b/common/geometry/camera_test.cc @@ -76,7 +76,6 @@ TEST(CameraTest, test_estimate_pose) { cv::Mat K = (cv::Mat_(3, 3) << fx, 0, cx, 0, fy, cy, 0, 0, 1); Eigen::Matrix3d K_eig = cv_to_eigen_mat(K); - std::vector poses; Eigen::Matrix3d R_world_from_cam0( Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * @@ -84,7 +83,6 @@ TEST(CameraTest, test_estimate_pose) { Eigen::Isometry3d world_from_cam0 = Eigen::Isometry3d::Identity(); world_from_cam0.linear() = R_world_from_cam0; world_from_cam0.translation() = Eigen::Vector3d(4, cube_size / 2, cube_size / 2); - poses.push_back(world_from_cam0); Eigen::Matrix3d R_world_45deg( Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix()); @@ -92,7 +90,6 @@ TEST(CameraTest, test_estimate_pose) { world_from_cam1.linear() = R_world_45deg * world_from_cam0.linear(); world_from_cam1.translation() = R_world_45deg * (world_from_cam0.translation() - p_world_cube_center) + p_world_cube_center; - poses.push_back(world_from_cam1); std::vector kpts0; std::vector kpts1; @@ -127,9 +124,6 @@ TEST(CameraTest, test_estimate_pose) { EXPECT_TRUE(cam0_from_cam1_estimate->translation().isApprox(cam0_from_cam1.translation(), 0.000001)); EXPECT_TRUE(cam0_from_cam1_estimate->linear().isApprox(cam0_from_cam1.linear(), 0.001)); - - poses.emplace_back(world_from_cam0 * *cam0_from_cam1_estimate); - viz_scene(poses, p_W_cube); } } } // namespace robot::geometry \ No newline at end of file diff --git a/common/geometry/trajectory_evaluation.hh b/common/geometry/trajectory_evaluation.hh deleted file mode 100644 index c3832359a..000000000 --- a/common/geometry/trajectory_evaluation.hh +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once -#include -#include - -#include "Eigen/Core" - -namespace robot::geometry { -const std::vector get_absolute_trajectory_error(const std::vector &traj1, - const std::vector &traj2) { - assert(traj1.size() == traj2.size()); - std::vector result; - for (size_t i = 0; i < traj2.size(); i++) { - result.push_back((traj2[i] - traj1[i]).norm()); - } - return result; -} -} // namespace robot::geometry \ No newline at end of file From 1fc67df1b7052238653df01e610a018996ac7122 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Thu, 24 Jul 2025 14:32:02 -0400 Subject: [PATCH 43/45] clean up includes --- .../learn_descriptors/four_seasons_parser.cc | 14 ++------------ .../four_seasons_parser_detail.cc | 6 ------ .../four_seasons_parser_example_viz.cc | 15 --------------- experimental/learn_descriptors/frontend.cc | 3 --- .../learn_descriptors/frontend_definitions.hh | 1 + experimental/learn_descriptors/image_point.hh | 5 +++-- .../learn_descriptors/learn_descriptors.cc | 3 ++- .../learn_descriptors/spatial_test_scene_cube.hh | 1 - .../learn_descriptors/structure_from_motion.cc | 1 + .../structure_from_motion_example.cc | 2 +- 10 files changed, 10 insertions(+), 41 deletions(-) diff --git a/experimental/learn_descriptors/four_seasons_parser.cc b/experimental/learn_descriptors/four_seasons_parser.cc index 50d52e5f5..61ef35e18 100644 --- a/experimental/learn_descriptors/four_seasons_parser.cc +++ b/experimental/learn_descriptors/four_seasons_parser.cc @@ -1,15 +1,11 @@ #include "experimental/learn_descriptors/four_seasons_parser.hh" #include -#include -#include #include #include -#include #include #include #include -#include #include #include @@ -72,10 +68,7 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, std::stod( parsed_line_gnss_poses[static_cast(txt_parser_help::GPSIdx::QUAT_Z)])); img_pt.AS_w_from_gnss_cam = liegroups::SE3(R_gps_cam_from_AS_w, t_gps_cam_from_AS_w); - } // else { - // std::clog << "There is no AS_w_from_gnss_cam data at img_pt with id: " << id - // << std::endl; - // } + } if (vio_poses_time_map.find(time_key) != vio_poses_time_map.end()) { const std::vector& parsed_line_vio = vio_poses_time_map.at(time_key); Eigen::Vector3d t_AS_w_from_vio_cam( @@ -90,10 +83,7 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir, std::stod( parsed_line_vio[static_cast(txt_parser_help::ResultIdx::QUAT_Z)])); img_pt.AS_w_from_vio_cam = liegroups::SE3(R_AS_w_from_vio_cam, t_AS_w_from_vio_cam); - } // else { - // std::clog << "There is no AS_w_from_vio_cam data at img_pt with id: " << id - // << std::endl; - // } + } img_pt_vector_.push_back(img_pt); id++; diff --git a/experimental/learn_descriptors/four_seasons_parser_detail.cc b/experimental/learn_descriptors/four_seasons_parser_detail.cc index 65dac2893..b41e71113 100644 --- a/experimental/learn_descriptors/four_seasons_parser_detail.cc +++ b/experimental/learn_descriptors/four_seasons_parser_detail.cc @@ -17,12 +17,6 @@ #include "nmea/sentence.hpp" namespace robot::experimental::learn_descriptors::detail::four_seasons_parser { - -// template -// size_t abs_diff(const T& a, const T& b) { -// return a > b ? a - b : b - a; -// } - namespace txt_parser_help { std::vector parse_line_adv(const std::string& line, const std::string& delim) { diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc index ae9036342..4e52440d2 100644 --- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc +++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc @@ -58,7 +58,6 @@ int main(int argc, const char** argv) { (void)NUM_IMAGES; auto start = std::chrono::high_resolution_clock::now(); for (size_t i = 0; i < NUM_IMAGES; i += 1) { - // for (const size_t i : std::vector{74, 82, 92}) { const lrn_desc::ImagePointFourSeasons img_pt = parser.get_image_point(i); frontend.add_image(lrn_desc::ImageAndPoint{ parser.load_image(i), std::make_shared(img_pt)}); @@ -72,13 +71,6 @@ int main(int argc, const char** argv) { std::chrono::high_resolution_clock::now() - start); std::cout << "done populating frames! took " << duration.count() << " milliseconds" << std::endl; - // frontend.match_frames_and_build_tracks(); - // start = std::chrono::high_resolution_clock::now(); - // duration = std::chrono::duration_cast( - // std::chrono::high_resolution_clock::now() - start); - // std::cout << "done matching and building tracks! took " << duration.count() << " - // milliseconds" - // << std::endl; ROBOT_CHECK(parser.num_images() != 0); @@ -93,9 +85,7 @@ int main(int argc, const char** argv) { std::optional first_gps_to_cam; std::optional altitude_gps_from_gnss_cam; std::vector gps_ns_delta_from_shutter; - // double ate_cam = 0.0, ate_gps = 0.0; for (size_t i = 0; i < NUM_IMAGES; i += 1) { - // for (const size_t i : std::vector{74, 82, 92}) { const lrn_desc::ImagePointFourSeasons img_pt = parser.get_image_point(i); std::cout << img_pt.to_string() << std::endl; if (!img_pt.AS_w_from_gnss_cam) { @@ -109,12 +99,8 @@ int main(int argc, const char** argv) { frontend.frames()[i]->world_from_cam_groundtruth_->matrix()); ROBOT_CHECK(frontend_w_from_cam_groundtruth.matrix().isApprox(w_from_gnss_cam.matrix()), frontend_w_from_cam_groundtruth.matrix(), w_from_gnss_cam.matrix()); - // Eigen::Isometry3d frontend_w_from_cam_groundtruth(*img_pt.world_from_cam_ground_truth()); viz_frames.emplace_back(frontend_w_from_cam_groundtruth, "x_frontend_ref_" + std::to_string(i)); - // std::cout << "frontend_w_from_cam_groundtruth: " << - // frontend_w_from_cam_groundtruth.matrix() - // << std::endl; if (img_pt.gps_gcs) { if (img_pt.gps_gcs->seq > img_pt.seq) { gps_ns_delta_from_shutter.push_back( @@ -166,7 +152,6 @@ int main(int argc, const char** argv) { std::cout << "gps_from_ref_in_world: " << gps_from_ref_in_world << std::endl; } } - // std::cout << "ate_cam: " << ate_cam << "\tate_gps: " << ate_gps << std::endl; const double sum = std::accumulate(gps_ns_delta_from_shutter.begin(), gps_ns_delta_from_shutter.end(), 0.0); diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc index 3a3b6af92..32af22642 100644 --- a/experimental/learn_descriptors/frontend.cc +++ b/experimental/learn_descriptors/frontend.cc @@ -129,9 +129,6 @@ double rotation_error(const Eigen::Isometry3d &T_est, const Eigen::Isometry3d &T // 3. Recover angle (in radians) return std::acos(cos_theta); } - -// const std::vector get_absolute_trajectory_error(const std::vector &) -// {} } // namespace detail_sfm namespace robot::experimental::learn_descriptors { diff --git a/experimental/learn_descriptors/frontend_definitions.hh b/experimental/learn_descriptors/frontend_definitions.hh index 1a96aa6bb..6204af219 100644 --- a/experimental/learn_descriptors/frontend_definitions.hh +++ b/experimental/learn_descriptors/frontend_definitions.hh @@ -2,6 +2,7 @@ #include #include +#include #include #include "experimental/learn_descriptors/structure_from_motion_types.hh" diff --git a/experimental/learn_descriptors/image_point.hh b/experimental/learn_descriptors/image_point.hh index b9fae431c..6e9273eed 100644 --- a/experimental/learn_descriptors/image_point.hh +++ b/experimental/learn_descriptors/image_point.hh @@ -17,8 +17,9 @@ namespace robot::experimental::learn_descriptors { struct ImagePoint { virtual ~ImagePoint() = default; - size_t id; - size_t seq; // for time. TODO: make a time struct + size_t id; // idx for DB + size_t seq; // time in nanoseconds of image capture (also could be name of image; this is also + // time since unix epoch) std::shared_ptr K; void set_cam_in_world(const Eigen::Vector3d& cam_in_world) { cam_in_world_ = cam_in_world; }; virtual std::optional world_from_cam_ground_truth() const { diff --git a/experimental/learn_descriptors/learn_descriptors.cc b/experimental/learn_descriptors/learn_descriptors.cc index 94ba9833a..ee8ac0055 100644 --- a/experimental/learn_descriptors/learn_descriptors.cc +++ b/experimental/learn_descriptors/learn_descriptors.cc @@ -1,6 +1,7 @@ #include "experimental/learn_descriptors/learn_descriptors.hh" -#include +#include +#include namespace robot::experimental::learn_descriptors { void hello_world(const std::string &in) { std::cout << in << std::endl; } diff --git a/experimental/learn_descriptors/spatial_test_scene_cube.hh b/experimental/learn_descriptors/spatial_test_scene_cube.hh index 1dd08b190..0d74262ac 100644 --- a/experimental/learn_descriptors/spatial_test_scene_cube.hh +++ b/experimental/learn_descriptors/spatial_test_scene_cube.hh @@ -1,5 +1,4 @@ #pragma once -#include #include "Eigen/Core" #include "experimental/learn_descriptors/spatial_test_scene.hh" diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc index 8afcba047..ae46c2966 100644 --- a/experimental/learn_descriptors/structure_from_motion.cc +++ b/experimental/learn_descriptors/structure_from_motion.cc @@ -2,6 +2,7 @@ #include #include +#include #include #include diff --git a/experimental/learn_descriptors/structure_from_motion_example.cc b/experimental/learn_descriptors/structure_from_motion_example.cc index a6989ca95..2dce0047e 100644 --- a/experimental/learn_descriptors/structure_from_motion_example.cc +++ b/experimental/learn_descriptors/structure_from_motion_example.cc @@ -1,7 +1,7 @@ #include #include -#include #include +#include #include "Eigen/Core" #include "Eigen/Geometry" From 02bf477af97a892a5ec604196f8e8cbc90367076 Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Thu, 24 Jul 2025 14:49:04 -0400 Subject: [PATCH 44/45] fix include --- experimental/learn_descriptors/learn_descriptors.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/learn_descriptors/learn_descriptors.cc b/experimental/learn_descriptors/learn_descriptors.cc index ee8ac0055..f2938510f 100644 --- a/experimental/learn_descriptors/learn_descriptors.cc +++ b/experimental/learn_descriptors/learn_descriptors.cc @@ -1,6 +1,6 @@ #include "experimental/learn_descriptors/learn_descriptors.hh" -#include +#include #include namespace robot::experimental::learn_descriptors { From f5fbfc0893933d0318117460564b20b6ab566afb Mon Sep 17 00:00:00 2001 From: Nicolaniello Buono Date: Thu, 24 Jul 2025 15:37:24 -0400 Subject: [PATCH 45/45] fixed exception handle type --- common/geometry/camera.cc | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/common/geometry/camera.cc b/common/geometry/camera.cc index 38733d6f4..46ddb340a 100644 --- a/common/geometry/camera.cc +++ b/common/geometry/camera.cc @@ -16,7 +16,6 @@ Eigen::Matrix3d get_intrinsic_matrix(const gtsam::Cal3_S2 &intrinsic) { Eigen::Vector3d project(const Eigen::Matrix3d &K, const Eigen::Vector3d &p_cam_point) { return K * p_cam_point; - // return (K * p_cam_point).head<2>(); } Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector3d &pixel_homog) { @@ -41,24 +40,21 @@ std::optional estimate_cam0_from_cam1(const std::vector= 5); - // std::cout << "heartbeat 3" << std::endl; try { cv::Mat E = cv::findEssentialMat(pts1, pts2, K, cv::RANSAC, 0.999, 1.0); cv::Mat R_c1_c0, t_c1_c0; - // std::cout << "heartbest 4" << std::endl; ROBOT_CHECK(!E.empty(), "Essential matrix is empty."); // TOOD: handle multiple returned candidate E matrices better (they are stacked on top of // each other in E) if (E.rows > 3) { E = E.rowRange(0, 3); } - // std::cout << E << std::endl; cv::recoverPose(E, pts1, pts2, K, R_c1_c0, t_c1_c0); result.linear() = cv_to_eigen_mat(R_c1_c0); result.translation() = cv_to_eigen_mat(t_c1_c0); result = result.inverse(); return result; - } catch (const std::exception e) { + } catch (const std::exception &e) { std::cerr << "Failed to estimate pose up to scale cam0_from_cam1.\n" << e.what() << std::endl; return std::nullopt;