From aa384702dbd881e0d9eac28246454492b42d3586 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Fri, 13 Dec 2024 10:43:27 -0500
Subject: [PATCH 01/45] committing progress

---
 experimental/learn_descriptors/BUILD          |  28 +-
 .../structure_from_motion.cc                  | 250 ++++++++++++++++++
 .../structure_from_motion.hh                  | 124 +++++++++
 ..._test.cc => structure_from_motion_test.cc} |  43 ++-
 .../learn_descriptors/symphony_lake_parser.cc |   3 +-
 .../learn_descriptors/symphony_lake_parser.hh |  75 +++++-
 .../symphony_lake_parser_test.cc              |  45 ++--
 .../learn_descriptors/visual_odometry.cc      | 118 ---------
 .../learn_descriptors/visual_odometry.hh      |  58 ----
 9 files changed, 530 insertions(+), 214 deletions(-)
 create mode 100644 experimental/learn_descriptors/structure_from_motion.cc
 create mode 100644 experimental/learn_descriptors/structure_from_motion.hh
 rename experimental/learn_descriptors/{visual_odometry_test.cc => structure_from_motion_test.cc} (77%)
 delete mode 100644 experimental/learn_descriptors/visual_odometry.cc
 delete mode 100644 experimental/learn_descriptors/visual_odometry.hh

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index e0f417bd7..91088abdf 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -17,19 +17,25 @@ cc_test(
 )
 
 cc_library(
-  name = "visual_odometry",
-  hdrs = ["visual_odometry.hh"],
+  name = "structure_from_motion",
+  hdrs = ["structure_from_motion.hh"],
   visibility = ["//visibility:public"],
-  srcs = ["visual_odometry.cc"],
+  srcs = ["structure_from_motion.cc"],
   deps = [
-    "@opencv//:opencv"
+    "@opencv//:opencv",
+    "@gtsam//:gtsam",
+    "@eigen"
   ]
 )
 
 cc_library(
   name = "symphony_lake_parser",
   hdrs = ["symphony_lake_parser.hh"],
-  copts = ["-Wno-unused-parameter"],
+  copts = [
+    "-Wno-unused-parameter",
+    "-std=c++20",
+  ],
+  data = ["@symphony_lake_snippet"],
   visibility = ["//visibility:public"],
   srcs = ["symphony_lake_parser.cc"],
   deps = [
@@ -39,11 +45,15 @@ cc_library(
 )
 
 cc_test(
-  name = "visual_odometry_test",
-  srcs = ["visual_odometry_test.cc"],  
+  name = "structure_from_motion_test",
+  srcs = ["structure_from_motion_test.cc"],  
+  copts = [
+    "-Wno-unused-parameter",
+  ],
   deps = [
     "@com_google_googletest//:gtest_main",
-    ":visual_odometry"
+    ":symphony_lake_parser",
+    ":structure_from_motion"
   ]
 )
 
@@ -51,7 +61,7 @@ cc_test(
   name = "symphony_lake_parser_test",
   srcs = ["symphony_lake_parser_test.cc"],
   copts = ["-Wno-unused-parameter"],
-  data = ["@symphony_lake_snippet"],
+  # data = ["@symphony_lake_snippet"],
   deps = [
     "@com_google_googletest//:gtest_main",
     ":symphony_lake_parser",
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
new file mode 100644
index 000000000..544b0728e
--- /dev/null
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -0,0 +1,250 @@
+#include "experimental/learn_descriptors/structure_from_motion.hh"
+
+namespace robot::experimental::learn_descriptors {
+StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose,
+                               Frontend::MatcherType frontend_matcher) {
+    frontend_ = Frontend(frontend_extractor, frontend_matcher);
+    set_initial_pose(initial_pose);    
+    backend_ = Backend(K);
+}
+
+void StructureFromMotion::set_initial_pose(gtsam::Pose3 initial_pose) {
+    backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, 0), initial_pose);
+}
+
+void StructureFromMotion::add_image(const cv::Mat &img) {
+    std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_and_descriptors = frontend_.get_keypoints_and_descriptors(img);   
+    landmarks_.push_back(std::vector<Backend::Landmark>());
+    if (get_num_images_added() > 0) {
+        std::vector<cv::DMatch> matches = frontend_.get_matches(img_keypoints_and_descriptors_.back().second, keypoints_and_descriptors.second);        
+        Frontend::enforce_bijective_matches(matches);
+
+        matches_.push_back(matches);
+        backend_.add_between_factor(
+            gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()-1),
+            gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()),
+            gtsam::Pose3::Identity());
+        for (const cv::DMatch match : matches) {
+            Backend::Landmark landmark_cam_1(
+                gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_),
+                gtsam::Symbol(Backend::camera_symbol_char, get_num_images_added()-1),
+                gtsam::Point2(
+                    static_cast<double>(img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x),
+                    static_cast<double>(img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y) 
+                )
+            );
+            Backend::Landmark landmark_cam_2(
+                gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_),
+                gtsam::Symbol(Backend::camera_symbol_char, get_num_images_added()),
+                gtsam::Point2(
+                    static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.x),
+                    static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.y)
+                )
+            );
+            landmark_count_++;
+            landmarks_[get_num_images_added()-1].push_back(landmark_cam_1);
+            landmarks_[get_num_images_added()].push_back(landmark_cam_2);
+            backend_.add_landmark(landmark_cam_1);
+            backend_.add_landmark(landmark_cam_2);
+        }        
+    }
+    img_keypoints_and_descriptors_.push_back(keypoints_and_descriptors);
+}
+
+Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matcher) {
+    extractor_type_ = frontend_algorithm;
+    matcher_type_ = frontend_matcher;
+
+    switch (extractor_type_) {
+        case ExtractorType::SIFT:
+            feature_extractor_ = cv::SIFT::create();
+            break;
+        case ExtractorType::ORB:
+            feature_extractor_ = cv::ORB::create();
+            break;
+        default:
+            // Error handling needed?
+            break;
+    }
+    switch (matcher_type_) {
+        case MatcherType::BRUTE_FORCE:
+            descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2);
+            break;
+        case MatcherType::KNN:
+            descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2);
+            break;
+        case MatcherType::FLANN:
+            if (frontend_algorithm == ExtractorType::ORB) {
+                throw std::invalid_argument("FLANN can not be used with ORB.");
+            }
+            descriptor_matcher_ = cv::FlannBasedMatcher::create();
+            break;
+        default:
+            // Error handling needed?
+            break;
+    }
+}
+
+std::pair<std::vector<cv::KeyPoint>, cv::Mat> Frontend::get_keypoints_and_descriptors(
+    const cv::Mat &img) const {
+    std::vector<cv::KeyPoint> keypoints;
+    cv::Mat descriptors;
+    switch (extractor_type_) {
+        default:  // the opencv extractors have the same function signature
+            feature_extractor_->detectAndCompute(img, cv::noArray(), keypoints, descriptors);
+            break;
+    }
+    return std::pair<std::vector<cv::KeyPoint>, cv::Mat>(keypoints, descriptors);
+}
+
+std::vector<cv::DMatch> Frontend::get_matches(const cv::Mat &descriptors1,
+                                              const cv::Mat &descriptors2) const {
+    std::vector<cv::DMatch> matches;
+    switch (matcher_type_) {
+        case MatcherType::BRUTE_FORCE:
+            get_brute_matches(descriptors1, descriptors2, matches);
+            break;
+        case MatcherType::KNN:
+            get_KNN_matches(descriptors1, descriptors2, matches);
+            break;
+        case MatcherType::FLANN:
+            get_FLANN_matches(descriptors1, descriptors2, matches);
+            break;
+        default:
+            break;
+    }
+    std::sort(matches.begin(), matches.end());
+    return matches;
+}
+
+void Frontend::threshold_matches(std::vector<cv::DMatch> &matches, float dist_threshhold) {
+    matches.erase(
+        std::remove_if(matches.begin(), matches.end(), [dist_threshhold](const cv::DMatch& match) {
+            return match.distance > dist_threshhold;
+        }),
+        matches.end());
+}
+
+void Frontend::enforce_bijective_matches(std::vector<cv::DMatch>& matches) {    
+    std::unordered_map<int, cv::DMatch> bestQueryMatch;
+    std::unordered_map<int, cv::DMatch> bestTrainMatch;
+    
+    for (const auto& match : matches) {
+        int queryIdx = match.queryIdx;
+        int trainIdx = match.trainIdx;
+        
+        if (bestQueryMatch.find(queryIdx) == bestQueryMatch.end() || 
+            match.distance < bestQueryMatch[queryIdx].distance) {
+            bestQueryMatch[queryIdx] = match;
+        }
+        
+        if (bestTrainMatch.find(trainIdx) == bestTrainMatch.end() || 
+            match.distance < bestTrainMatch[trainIdx].distance) {
+            bestTrainMatch[trainIdx] = match;
+        }
+    }
+    
+    matches.erase(
+        std::remove_if(
+            matches.begin(), matches.end(),
+            [&bestQueryMatch, &bestTrainMatch](const cv::DMatch& match) {
+                int queryIdx = match.queryIdx;
+                int trainIdx = match.trainIdx;
+
+                return bestQueryMatch[queryIdx].trainIdx != trainIdx ||
+                       bestTrainMatch[trainIdx].queryIdx != queryIdx;
+            }),
+        matches.end());
+}
+
+
+bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                                 std::vector<cv::DMatch> &matches_out) const {
+    if (matcher_type_ != MatcherType::BRUTE_FORCE) {
+        return false;
+    }
+    matches_out.clear();
+    descriptor_matcher_->match(descriptors1, descriptors2, matches_out);
+    return true;
+}
+
+bool Frontend::get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                               std::vector<cv::DMatch> &matches_out) const {
+    if (matcher_type_ != MatcherType::KNN) {
+        return false;
+    }
+    std::vector<std::vector<cv::DMatch>> knn_matches;
+    descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2);
+    const float ratio_thresh = 0.7f;
+    matches_out.clear();
+    // Lowe's Ratio Test
+    for (size_t i = 0; i < knn_matches.size(); i++) {
+        if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) {
+            matches_out.push_back(knn_matches[i][0]);
+        }
+    }
+    return true;
+}
+
+bool Frontend::get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                                 std::vector<cv::DMatch> &matches_out) const {
+    if (matcher_type_ != MatcherType::FLANN) {
+        return false;
+    }
+    std::vector<std::vector<cv::DMatch>> knn_matches;
+    descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2);
+    const float ratio_thresh = 0.7f;
+    matches_out.clear();
+    for (size_t i = 0; i < knn_matches.size(); i++) {
+        if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) {
+            matches_out.push_back(knn_matches[i][0]);
+        }
+    }
+    return true;
+}
+
+Backend::Backend() {
+    const size_t img_width = 640;
+    const size_t img_height = 480;
+    const double fx = 500.0;
+    const double fy = fx;
+    const double cx = img_width / 2.0;
+    const double cy = img_height / 2.0;
+
+    gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
+    
+    initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K);
+}
+
+Backend::Backend(gtsam::Cal3_S2 K) {
+    initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K);
+}
+
+void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value) {
+    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(symbol, value, pose_noise_);
+    initial_estimate_.insert(symbol, value);
+}
+
+void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, const gtsam::Pose3 &value) {
+    graph_.emplace_shared<gtsam::BetweenFactor<gtsam::Pose3>>(symbol_1, symbol_2, value, pose_noise_);
+    initial_estimate_.insert(symbol_2, initial_estimate_.at<gtsam::Pose3>(symbol_2).compose(value));
+}
+
+void Backend::add_landmarks(const std::vector<Landmark> &landmarks) {
+    for (const Landmark &landmark : landmarks) {
+        add_landmark(landmark);
+    }
+}
+
+void Backend::add_landmark(const Landmark &landmark) {
+    graph_.emplace_shared<gtsam::GeneralSFMFactor2<gtsam::Cal3_S2>>(
+        landmark.projection, measurement_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol, gtsam::Symbol(camera_symbol_char, 0) 
+    );
+    initial_estimate_.insert(landmark.lmk_factor_symbol, landmark.initial_guess);
+}
+
+void Backend::solve_graph() {
+    gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_);
+    result_ = optimizer.optimize();
+}
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh
new file mode 100644
index 000000000..a2185a909
--- /dev/null
+++ b/experimental/learn_descriptors/structure_from_motion.hh
@@ -0,0 +1,124 @@
+#pragma once
+
+#include <opencv2/opencv.hpp>
+
+#include "Eigen/Core"
+#include "gtsam/geometry/Point3.h"
+#include "gtsam/geometry/Pose3.h"
+#include "gtsam/inference/Symbol.h"
+#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
+#include "gtsam/nonlinear/Values.h"
+#include "gtsam/slam/GeneralSFMFactor.h"
+#include "gtsam/slam/PriorFactor.h"
+#include "gtsam/slam/BetweenFactor.h"
+#include "gtsam/slam/ProjectionFactor.h"
+
+#include <set>
+#include <map>
+#include <unordered_map>
+#include <utility>
+
+namespace robot::experimental::learn_descriptors {
+class Frontend {
+   public:
+    enum class ExtractorType { SIFT, ORB };
+    enum class MatcherType { BRUTE_FORCE, KNN, FLANN };
+
+    Frontend(){};
+    Frontend(ExtractorType frontend_extractor, MatcherType frontend_matcher);
+    ~Frontend(){};
+
+    const ExtractorType& get_extractor_type() const { return extractor_type_; };
+    const MatcherType& get_matcher_type() const { return matcher_type_; };
+
+    std::pair<std::vector<cv::KeyPoint>, cv::Mat> get_keypoints_and_descriptors(
+        const cv::Mat &img) const;
+    std::vector<cv::DMatch> get_matches(const cv::Mat &descriptors1,
+                                        const cv::Mat &descriptors2) const;
+    static void threshold_matches(std::vector<cv::DMatch> &matches, float dist_threshhold);
+    static void enforce_bijective_matches(std::vector<cv::DMatch> &matches);
+    static void draw_keypoints(const cv::Mat &img, std::vector<cv::KeyPoint> keypoints,
+                               cv::Mat img_keypoints_out) {
+        cv::drawKeypoints(img, keypoints, img_keypoints_out, cv::Scalar::all(-1),
+                          cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS);
+    }
+    static void draw_matches(const cv::Mat &img1, std::vector<cv::KeyPoint> keypoints1,
+                             const cv::Mat &img2, std::vector<cv::KeyPoint> keypoints2,
+                             std::vector<cv::DMatch> matches, cv::Mat img_matches_out) {
+        cv::drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches_out);
+    }    
+
+   private:
+    bool get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                           std::vector<cv::DMatch> &matches_out) const;
+    bool get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                         std::vector<cv::DMatch> &matches_out) const;
+    bool get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                           std::vector<cv::DMatch> &matches_out) const;
+    ExtractorType extractor_type_;
+    MatcherType matcher_type_;
+
+    cv::Ptr<cv::Feature2D> feature_extractor_;
+    cv::Ptr<cv::DescriptorMatcher> descriptor_matcher_;
+};
+class Backend {
+    public:
+        static const char pose_symbol_char = 'x';
+        static const char landmark_symbol_char = 'l';
+        static const char camera_symbol_char = 'k';
+
+        struct Landmark {
+            Landmark(const gtsam::Symbol &lmk_factor_symbol, const gtsam::Symbol &cam_pose_symbol, const gtsam::Point2 &projection, const gtsam::Point3 &initial_guess=gtsam::Point3::Identity()) 
+                : lmk_factor_symbol(lmk_factor_symbol), cam_pose_symbol(cam_pose_symbol), projection(projection), initial_guess(initial_guess) {};
+            const gtsam::Symbol lmk_factor_symbol; 
+            const gtsam::Symbol cam_pose_symbol;
+            const gtsam::Point2 projection;
+            const gtsam::Point3 initial_guess;
+        };
+
+        Backend();
+        Backend(gtsam::Cal3_S2 K);
+        ~Backend(){};
+    
+        void add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value);
+        void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, const gtsam::Pose3 &value);
+
+        void add_landmarks(const std::vector<Landmark> &landmarks);
+        void add_landmark(const Landmark &landmark);
+
+        void solve_graph();
+        const gtsam::Values& get_result() const {
+            return result_;
+        };
+    private:                
+        gtsam::Values initial_estimate_;
+        gtsam::Values result_;
+        gtsam::NonlinearFactorGraph graph_;
+
+        gtsam::noiseModel::Isotropic::shared_ptr measurement_noise_ = gtsam::noiseModel::Isotropic::Sigma(2, 1.0);        
+        gtsam::noiseModel::Diagonal::shared_ptr pose_noise_ = gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6::Constant(0.1));
+};
+class StructureFromMotion {
+   public:
+    StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose= gtsam::Pose3::Identity(),
+                   Frontend::MatcherType frontend_matcher = Frontend::MatcherType::KNN);
+    ~StructureFromMotion(){};
+    
+    void set_initial_pose(gtsam::Pose3 initial_pose);
+    void add_image(const cv::Mat &img);
+    void solve_structure() { backend_.get_result(); };
+    const gtsam::Values& get_structure_result() { return backend_.get_result(); };
+
+    Frontend get_frontend() { return frontend_; };
+    Backend get_backend() { return backend_; }
+    size_t get_num_images_added() { return img_keypoints_and_descriptors_.size(); };
+   private:
+    std::vector<std::pair<std::vector<cv::KeyPoint>, cv::Mat>> img_keypoints_and_descriptors_;
+    std::vector<std::vector<cv::DMatch>> matches_;
+    std::vector<std::vector<Backend::Landmark>> landmarks_;
+    size_t landmark_count_;
+
+    Frontend frontend_;
+    Backend backend_;
+};
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/visual_odometry_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc
similarity index 77%
rename from experimental/learn_descriptors/visual_odometry_test.cc
rename to experimental/learn_descriptors/structure_from_motion_test.cc
index b1803317e..e7b4488fc 100644
--- a/experimental/learn_descriptors/visual_odometry_test.cc
+++ b/experimental/learn_descriptors/structure_from_motion_test.cc
@@ -1,4 +1,5 @@
-#include "experimental/learn_descriptors/visual_odometry.hh"
+#include "experimental/learn_descriptors/structure_from_motion.hh"
+#include "experimental/learn_descriptors/symphony_lake_parser.hh"
 
 #include <iostream>
 #include <sstream>
@@ -6,7 +7,7 @@
 #include "gtest/gtest.h"
 
 namespace robot::experimental::learn_descriptors {
-TEST(VIO_TEST, frontend_pipeline_sweep) {
+TEST(SFM_TEST, frontend_pipeline_sweep) {
     const size_t width = 640;
     const size_t height = 480;
 
@@ -109,4 +110,42 @@ TEST(VIO_TEST, frontend_pipeline_sweep) {
         }
     }
 }
+TEST(SFM_TEST, structure_from_motion) {
+    const size_t img_width = 640;
+    const size_t img_height = 480;
+    const double fx = 500.0;
+    const double fy = fx;
+    const double cx = img_width / 2.0;
+    const double cy = img_height / 2.0;
+
+    gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
+
+    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K);     
+    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
+    DataParser::Generator<cv::Mat> generator = data_parser.create_img_generator();
+    const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
+
+    cv::Mat image;
+    for (int i = 0; i < static_cast<int>(survey_vector.getNumSurveys()); i++) {
+        const symphony_lake_dataset::Survey &survey = survey_vector.get(i);
+        for (int j = 0; j < static_cast<int>(survey.getNumImages()); j++) {
+            image = survey.loadImageByImageIndex(j);
+            sfm.add_image(image);
+        }
+    }
+    // DataParser::Generator<cv::Mat>::iterator img_itr = generator.begin();
+    // while (img_itr != generator.end()) {
+    //     if ((*img_itr).empty()) {
+    //         continue;
+    //     }
+    //     std::cout << "ooga booga" << std::endl;
+    //     std::cout << "image is empty: " << (*img_itr).empty() << std::endl;
+    //     cv::imshow("ooga", *img_itr);
+    //     cv::waitKey(1000);
+    //     sfm.add_image(*img_itr);
+    //     ++img_itr;
+    // }
+    sfm.solve_structure();
+    gtsam::Values output = sfm.get_structure_result();    
+}
 }  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/symphony_lake_parser.cc b/experimental/learn_descriptors/symphony_lake_parser.cc
index 5f93ab174..790932fab 100644
--- a/experimental/learn_descriptors/symphony_lake_parser.cc
+++ b/experimental/learn_descriptors/symphony_lake_parser.cc
@@ -6,7 +6,8 @@
 
 namespace robot::experimental::learn_descriptors {
 DataParser::DataParser(const std::filesystem::path &image_root_dir,
-                       const std::vector<std::string> &survey_list) {
+                       const std::vector<std::string> &survey_list) : 
+                       image_root_dir_(image_root_dir) , survey_list_(survey_list) {
     ROBOT_CHECK(std::filesystem::exists(image_root_dir), "Image root dir does not exist!",
                 image_root_dir);
     surveys_.load(image_root_dir.string(), survey_list);
diff --git a/experimental/learn_descriptors/symphony_lake_parser.hh b/experimental/learn_descriptors/symphony_lake_parser.hh
index 021ebd3f2..a2aa67add 100644
--- a/experimental/learn_descriptors/symphony_lake_parser.hh
+++ b/experimental/learn_descriptors/symphony_lake_parser.hh
@@ -1,21 +1,88 @@
 #pragma once
-#pragma once
-#include <string.h>
-
 #include <filesystem>
+#include <string>
+#include <iostream>
+#include <coroutine>
+#include <vector>
 
 #include "symphony_lake_dataset/SurveyVector.h"
 
 namespace robot::experimental::learn_descriptors {
 class DataParser {
    public:
+   template <typename T>
+    struct Generator {
+        struct promise_type {
+            T value;
+            std::suspend_always yield_value(T v) {
+                value = v;
+                return {};
+            }
+            std::suspend_always initial_suspend() { return {}; }
+            std::suspend_always final_suspend() noexcept { return {}; }
+            Generator get_return_object() { return Generator{handle_type::from_promise(*this)}; }
+            void return_void() {}
+            void unhandled_exception() { std::terminate(); }
+        };
+
+        using handle_type = std::coroutine_handle<promise_type>;
+
+        Generator(handle_type h) : handle(h) {}
+        ~Generator() { if (handle) handle.destroy(); }
+
+        struct iterator {
+            handle_type handle;
+            bool operator!=(std::default_sentinel_t) const { return !handle.done(); }
+            iterator& operator++() { handle.resume(); return *this; }
+            T operator*() const { return handle.promise().value; }
+        };
+
+        iterator begin() { return iterator{handle}; }
+        std::default_sentinel_t end() { return {}; }
+
+    private:
+        handle_type handle;
+    };
+
+    Generator<cv::Mat> image_generator(
+        const symphony_lake_dataset::SurveyVector& survey_vector) {
+
+        for (int i = 0; i < static_cast<int>(survey_vector.getNumSurveys()); i++) {
+            const symphony_lake_dataset::Survey &survey = survey_vector.get(i);
+            for (int j = 0; j < static_cast<int>(survey.getNumImages()); j++) {                
+                co_yield survey.loadImageByImageIndex(j);
+            }
+        }
+    }
+
     DataParser(const std::filesystem::path &image_root_dir,
                const std::vector<std::string> &survey_list);
     ~DataParser();
 
     const symphony_lake_dataset::SurveyVector &get_surveys() const { return surveys_; };
-
+    Generator<cv::Mat> create_img_generator() { return image_generator(surveys_); };
    private:
+    std::filesystem::path image_root_dir_;
+    std::vector<std::string> survey_list_;
     symphony_lake_dataset::SurveyVector surveys_;
 };
+class SymphonyLakeDatasetTestHelper {
+   public:
+    static constexpr const char* test_image_root_dir = "external/symphony_lake_snippet/symphony_lake";
+    static constexpr std::array<const char*, 1> test_survey_list = {"140106_snippet"};
+    static bool images_equal(cv::Mat img1, cv::Mat img2) {
+        if (img1.size() != img2.size() || img1.type() != img2.type()) {
+            return false;
+        }
+        cv::Mat diff;
+        cv::absdiff(img1, img2, diff);
+        diff = diff.reshape(1);
+        return cv::countNonZero(diff) == 0;
+    }
+    static DataParser get_test_parser() { return DataParser(get_test_iamge_root_dir(), get_test_survey_list()); };
+    static std::filesystem::path get_test_iamge_root_dir() { return std::filesystem::path(test_image_root_dir); };
+    static std::vector<std::string> get_test_survey_list() {
+        return std::vector<std::string>(test_survey_list.begin(), test_survey_list.end());
+    };
+};
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/symphony_lake_parser_test.cc b/experimental/learn_descriptors/symphony_lake_parser_test.cc
index e17ff8022..e8169b8da 100644
--- a/experimental/learn_descriptors/symphony_lake_parser_test.cc
+++ b/experimental/learn_descriptors/symphony_lake_parser_test.cc
@@ -14,24 +14,8 @@ namespace robot::experimental::learn_descriptors {
 namespace {
 bool is_test() { return std::getenv("BAZEL_TEST") != nullptr; }
 }  // namespace
-
-class SymphonyLakeDatasetTestHelper {
-   public:
-    static bool images_equal(cv::Mat img1, cv::Mat img2) {
-        if (img1.size() != img2.size() || img1.type() != img2.type()) {
-            return false;
-        }
-        cv::Mat diff;
-        cv::absdiff(img1, img2, diff);
-        diff = diff.reshape(1);
-        return cv::countNonZero(diff) == 0;
-    }
-};
 TEST(SymphonyLakeParserTest, snippet_140106) {
-    const std::filesystem::path image_root_dir = "external/symphony_lake_snippet/symphony_lake";
-    const std::vector<std::string> survey_list{"140106_snippet"};
-
-    DataParser data_parser = DataParser(image_root_dir, survey_list);
+    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
 
     const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
 
@@ -53,15 +37,32 @@ TEST(SymphonyLakeParserTest, snippet_140106) {
             std::string target_img_name_str = target_img_name.str();
             target_img_name_str.replace(0, target_img_name_str.size() - target_img_name_length, "");
             std::filesystem::path target_img_dir =
-                image_root_dir / survey_list[i] / "0027" / target_img_name_str;
+                SymphonyLakeDatasetTestHelper::get_test_iamge_root_dir() / SymphonyLakeDatasetTestHelper::get_test_survey_list()[i] / "0027" / target_img_name_str;
             target_img = cv::imread(target_img_dir.string());
 
             EXPECT_TRUE(SymphonyLakeDatasetTestHelper::images_equal(image, target_img));
-            if (!is_test()) {
-                cv::imshow("Symphony Dataset Image", image);
-                cv::waitKey(2);
-            }
+            // if (!is_test()) {
+            //     cv::imshow("Symphony Dataset Image", image);
+            //     cv::waitKey(2);
+            // }
+            // cv::imshow("Symphony Dataset Image", image);
+            // cv::waitKey(200);
         }
     }
 }
+TEST(SymphonyLakeParserTest, snippet_140106_generator_test) {
+    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
+    DataParser::Generator<cv::Mat> generator = (SymphonyLakeDatasetTestHelper::get_test_parser()).create_img_generator();
+    DataParser::Generator<cv::Mat>::iterator img_iter = generator.begin();
+    while (img_iter != generator.end()) {
+        cv::Mat img = *img_iter;
+        // if (!is_test()) {
+        //     cv::imshow("Symphony Dataset Image", img);
+        //     cv::waitKey(200);
+        // }
+        // cv::imshow("Symphony Dataset Image", img);
+        // cv::waitKey(200);
+        ++img_iter;
+    }
+}
 }  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/visual_odometry.cc b/experimental/learn_descriptors/visual_odometry.cc
deleted file mode 100644
index 360473208..000000000
--- a/experimental/learn_descriptors/visual_odometry.cc
+++ /dev/null
@@ -1,118 +0,0 @@
-#include "experimental/learn_descriptors/visual_odometry.hh"
-
-namespace robot::experimental::learn_descriptors {
-VisualOdometry::VisualOdometry(Frontend::ExtractorType frontend_extractor,
-                               Frontend::MatcherType frontend_matcher) {
-    frontend_ = Frontend(frontend_extractor, frontend_matcher);
-}
-
-Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matcher) {
-    extractor_type_ = frontend_algorithm;
-    matcher_type_ = frontend_matcher;
-
-    switch (extractor_type_) {
-        case ExtractorType::SIFT:
-            feature_extractor_ = cv::SIFT::create();
-            break;
-        case ExtractorType::ORB:
-            feature_extractor_ = cv::ORB::create();
-            break;
-        default:
-            // Error handling needed?
-            break;
-    }
-    switch (matcher_type_) {
-        case MatcherType::BRUTE_FORCE:
-            descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2);
-            break;
-        case MatcherType::KNN:
-            descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2);
-            break;
-        case MatcherType::FLANN:
-            if (frontend_algorithm == ExtractorType::ORB) {
-                throw std::invalid_argument("FLANN can not be used with ORB.");
-            }
-            descriptor_matcher_ = cv::FlannBasedMatcher::create();
-            break;
-        default:
-            // Error handling needed?
-            break;
-    }
-}
-
-std::pair<std::vector<cv::KeyPoint>, cv::Mat> Frontend::get_keypoints_and_descriptors(
-    const cv::Mat &img) const {
-    std::vector<cv::KeyPoint> keypoints;
-    cv::Mat descriptors;
-    switch (extractor_type_) {
-        default:  // the opencv extractors have the same function signature
-            feature_extractor_->detectAndCompute(img, cv::noArray(), keypoints, descriptors);
-            break;
-    }
-    return std::pair<std::vector<cv::KeyPoint>, cv::Mat>(keypoints, descriptors);
-}
-
-std::vector<cv::DMatch> Frontend::get_matches(const cv::Mat &descriptors1,
-                                              const cv::Mat &descriptors2) const {
-    std::vector<cv::DMatch> matches;
-    switch (matcher_type_) {
-        case MatcherType::BRUTE_FORCE:
-            get_brute_matches(descriptors1, descriptors2, matches);
-            break;
-        case MatcherType::KNN:
-            get_KNN_matches(descriptors1, descriptors2, matches);
-            break;
-        case MatcherType::FLANN:
-            get_FLANN_matches(descriptors1, descriptors2, matches);
-            break;
-        default:
-            break;
-    }
-    std::sort(matches.begin(), matches.end());
-    return matches;
-}
-
-bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                                 std::vector<cv::DMatch> &matches_out) const {
-    if (matcher_type_ != MatcherType::BRUTE_FORCE) {
-        return false;
-    }
-    matches_out.clear();
-    descriptor_matcher_->match(descriptors1, descriptors2, matches_out);
-    return true;
-}
-
-bool Frontend::get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                               std::vector<cv::DMatch> &matches_out) const {
-    if (matcher_type_ != MatcherType::KNN) {
-        return false;
-    }
-    std::vector<std::vector<cv::DMatch>> knn_matches;
-    descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2);
-    const float ratio_thresh = 0.7f;
-    matches_out.clear();
-    for (size_t i = 0; i < knn_matches.size(); i++) {
-        if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) {
-            matches_out.push_back(knn_matches[i][0]);
-        }
-    }
-    return true;
-}
-
-bool Frontend::get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                                 std::vector<cv::DMatch> &matches_out) const {
-    if (matcher_type_ != MatcherType::FLANN) {
-        return false;
-    }
-    std::vector<std::vector<cv::DMatch>> knn_matches;
-    descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2);
-    const float ratio_thresh = 0.7f;
-    matches_out.clear();
-    for (size_t i = 0; i < knn_matches.size(); i++) {
-        if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) {
-            matches_out.push_back(knn_matches[i][0]);
-        }
-    }
-    return true;
-}
-}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/visual_odometry.hh b/experimental/learn_descriptors/visual_odometry.hh
deleted file mode 100644
index b64434d2f..000000000
--- a/experimental/learn_descriptors/visual_odometry.hh
+++ /dev/null
@@ -1,58 +0,0 @@
-#pragma once
-
-#include <opencv2/opencv.hpp>
-
-namespace robot::experimental::learn_descriptors {
-class Frontend {
-   public:
-    enum class ExtractorType { SIFT, ORB };
-    enum class MatcherType { BRUTE_FORCE, KNN, FLANN };
-
-    Frontend(){};
-    Frontend(ExtractorType frontend_extractor, MatcherType frontend_matcher);
-    ~Frontend(){};
-
-    ExtractorType get_extractor_type() const { return extractor_type_; };
-    MatcherType get_matcher_type() const { return matcher_type_; };
-
-    std::pair<std::vector<cv::KeyPoint>, cv::Mat> get_keypoints_and_descriptors(
-        const cv::Mat &img) const;
-    std::vector<cv::DMatch> get_matches(const cv::Mat &descriptors1,
-                                        const cv::Mat &descriptors2) const;
-
-    static void draw_keypoints(const cv::Mat &img, std::vector<cv::KeyPoint> keypoints,
-                               cv::Mat img_keypoints_out) {
-        cv::drawKeypoints(img, keypoints, img_keypoints_out, cv::Scalar::all(-1),
-                          cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS);
-    }
-    static void draw_matches(const cv::Mat &img1, std::vector<cv::KeyPoint> keypoints1,
-                             const cv::Mat &img2, std::vector<cv::KeyPoint> keypoints2,
-                             std::vector<cv::DMatch> matches, cv::Mat img_matches_out) {
-        cv::drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches_out);
-    }
-
-   private:
-    bool get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                           std::vector<cv::DMatch> &matches_out) const;
-    bool get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                         std::vector<cv::DMatch> &matches_out) const;
-    bool get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                           std::vector<cv::DMatch> &matches_out) const;
-    ExtractorType extractor_type_;
-    MatcherType matcher_type_;
-
-    cv::Ptr<cv::Feature2D> feature_extractor_;
-    cv::Ptr<cv::DescriptorMatcher> descriptor_matcher_;
-};
-class VisualOdometry {
-   public:
-    VisualOdometry(Frontend::ExtractorType frontend_extractor,
-                   Frontend::MatcherType frontend_matcher = Frontend::MatcherType::KNN);
-    ~VisualOdometry(){};
-
-   private:
-    cv::Mat prev_image_;
-
-    Frontend frontend_;
-};
-}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file

From fc37811b061bbee9cd22e51f66babeba54470e02 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Fri, 13 Dec 2024 18:51:21 -0500
Subject: [PATCH 02/45] Uploading progress. Need to approach initial values
 better to avoid landmark not in camera error

---
 .../structure_from_motion.cc                  | 20 ++++++++++++++-----
 .../structure_from_motion.hh                  |  3 ++-
 .../structure_from_motion_test.cc             |  8 ++++++--
 3 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index 544b0728e..76142ada2 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -4,8 +4,10 @@ namespace robot::experimental::learn_descriptors {
 StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose,
                                Frontend::MatcherType frontend_matcher) {
     frontend_ = Frontend(frontend_extractor, frontend_matcher);
-    set_initial_pose(initial_pose);    
     backend_ = Backend(K);
+
+    set_initial_pose(initial_pose);   
+    // backend_.get_current_initial_values().print("Ooga: ");
 }
 
 void StructureFromMotion::set_initial_pose(gtsam::Pose3 initial_pose) {
@@ -27,7 +29,7 @@ void StructureFromMotion::add_image(const cv::Mat &img) {
         for (const cv::DMatch match : matches) {
             Backend::Landmark landmark_cam_1(
                 gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_),
-                gtsam::Symbol(Backend::camera_symbol_char, get_num_images_added()-1),
+                gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()-1),
                 gtsam::Point2(
                     static_cast<double>(img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x),
                     static_cast<double>(img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y) 
@@ -35,7 +37,7 @@ void StructureFromMotion::add_image(const cv::Mat &img) {
             );
             Backend::Landmark landmark_cam_2(
                 gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_),
-                gtsam::Symbol(Backend::camera_symbol_char, get_num_images_added()),
+                gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()),
                 gtsam::Point2(
                     static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.x),
                     static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.y)
@@ -223,11 +225,15 @@ Backend::Backend(gtsam::Cal3_S2 K) {
 void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value) {
     graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(symbol, value, pose_noise_);
     initial_estimate_.insert(symbol, value);
+    // std::cout << "adding a prior factor! with symbol: " << symbol << std::endl;
+    // initial_estimate_.print("values after adding prior: ");
 }
 
 void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, const gtsam::Pose3 &value) {
     graph_.emplace_shared<gtsam::BetweenFactor<gtsam::Pose3>>(symbol_1, symbol_2, value, pose_noise_);
-    initial_estimate_.insert(symbol_2, initial_estimate_.at<gtsam::Pose3>(symbol_2).compose(value));
+    // std::cout << "adding between factor. symbol_1: " << symbol_1 << ". symbol_2: " << symbol_2 << std::endl;
+    // initial_estimate_.print("values when adding between factor: ");
+    initial_estimate_.insert(symbol_2, initial_estimate_.at<gtsam::Pose3>(symbol_1).compose(value));
 }
 
 void Backend::add_landmarks(const std::vector<Landmark> &landmarks) {
@@ -240,7 +246,11 @@ void Backend::add_landmark(const Landmark &landmark) {
     graph_.emplace_shared<gtsam::GeneralSFMFactor2<gtsam::Cal3_S2>>(
         landmark.projection, measurement_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol, gtsam::Symbol(camera_symbol_char, 0) 
     );
-    initial_estimate_.insert(landmark.lmk_factor_symbol, landmark.initial_guess);
+    if (!initial_estimate_.exists(landmark.lmk_factor_symbol)) {
+        initial_estimate_.insert(landmark.lmk_factor_symbol, landmark.initial_guess);
+    } else {
+        initial_estimate_.update(landmark.lmk_factor_symbol, landmark.initial_guess);
+    }
 }
 
 void Backend::solve_graph() {
diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh
index a2185a909..7c2c859aa 100644
--- a/experimental/learn_descriptors/structure_from_motion.hh
+++ b/experimental/learn_descriptors/structure_from_motion.hh
@@ -87,6 +87,7 @@ class Backend {
         void add_landmark(const Landmark &landmark);
 
         void solve_graph();
+        const gtsam::Values& get_current_initial_values() const { return initial_estimate_; };
         const gtsam::Values& get_result() const {
             return result_;
         };
@@ -106,7 +107,7 @@ class StructureFromMotion {
     
     void set_initial_pose(gtsam::Pose3 initial_pose);
     void add_image(const cv::Mat &img);
-    void solve_structure() { backend_.get_result(); };
+    void solve_structure() { backend_.solve_graph(); };
     const gtsam::Values& get_structure_result() { return backend_.get_result(); };
 
     Frontend get_frontend() { return frontend_; };
diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc
index e7b4488fc..b7d617b59 100644
--- a/experimental/learn_descriptors/structure_from_motion_test.cc
+++ b/experimental/learn_descriptors/structure_from_motion_test.cc
@@ -122,7 +122,7 @@ TEST(SFM_TEST, structure_from_motion) {
 
     StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K);     
     DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
-    DataParser::Generator<cv::Mat> generator = data_parser.create_img_generator();
+    // DataParser::Generator<cv::Mat> generator = data_parser.create_img_generator();
     const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
 
     cv::Mat image;
@@ -146,6 +146,10 @@ TEST(SFM_TEST, structure_from_motion) {
     //     ++img_itr;
     // }
     sfm.solve_structure();
-    gtsam::Values output = sfm.get_structure_result();    
+    // gtsam::Values output = sfm.get_structure_result();    
+    // output.print("result values: ");
+    // for (size_t i = 0; i < sfm.get_num_images_added(); i++) {
+        
+    // }
 }
 }  // namespace robot::experimental::learn_descriptors

From 166d40d824698b16a5e766f7a931d0c06c2f6edc Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Mon, 23 Dec 2024 14:54:08 -0500
Subject: [PATCH 03/45] uploading progress

---
 experimental/learn_descriptors/BUILD          |  3 ++-
 .../learn_descriptors/symphony_lake_parser.cc | 19 +++++++++++++++++++
 .../learn_descriptors/symphony_lake_parser.hh |  9 +++++++--
 .../symphony_lake_parser_test.cc              |  5 ++++-
 4 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index 91088abdf..4c203ade2 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -40,7 +40,8 @@ cc_library(
   srcs = ["symphony_lake_parser.cc"],
   deps = [
     "@symphony_lake_parser",
-    "//common:check"
+    "//common:check",
+    "@eigen"
   ]
 )
 
diff --git a/experimental/learn_descriptors/symphony_lake_parser.cc b/experimental/learn_descriptors/symphony_lake_parser.cc
index 790932fab..c2c134273 100644
--- a/experimental/learn_descriptors/symphony_lake_parser.cc
+++ b/experimental/learn_descriptors/symphony_lake_parser.cc
@@ -13,4 +13,23 @@ DataParser::DataParser(const std::filesystem::path &image_root_dir,
     surveys_.load(image_root_dir.string(), survey_list);
 }
 DataParser::~DataParser() {}
+
+Eigen::Affine3d DataParser::get_T_world_camera(size_t survey_idx, size_t img_idx, bool use_gps, bool use_compass) {
+    const symphony_lake_dataset::ImagePoint img_point = surveys_.get(survey_idx).getImagePoint(img_idx);
+    Eigen::Vector3d t;
+    Eigen::Matrix3d R;        
+    Eigen::Affine3d T_world_camera;
+    if (use_gps) {
+        t[0] = img_point.x;
+        t[1] = img_point.y;
+    }
+    // coordinate frame is x-axis north, y-axis east, z-axis down (to Earth's core)
+    double yaw = use_compass ? img_point.theta - img_point.pan : img_point.pan;    
+    Eigen::Matrix3d R_y(Eigen::AngleAxisd(img_point.tilt, Eigen::Vector3d::UnitY()));
+    Eigen::Matrix3d R_x = Eigen::Matrix3d::Identity();
+    Eigen::Matrix3d R_z(Eigen::AngleAxisd(yaw, Eigen::Vector3d::UnitZ()));
+    R = R_x * R_y * R_z;
+    T_world_camera = t * R;
+    return T_world_camera;
+}
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/symphony_lake_parser.hh b/experimental/learn_descriptors/symphony_lake_parser.hh
index a2aa67add..6dc74c7d3 100644
--- a/experimental/learn_descriptors/symphony_lake_parser.hh
+++ b/experimental/learn_descriptors/symphony_lake_parser.hh
@@ -5,8 +5,11 @@
 #include <coroutine>
 #include <vector>
 
+#include "symphony_lake_dataset/ImagePoint.h"
 #include "symphony_lake_dataset/SurveyVector.h"
 
+#include "Eigen/Dense"
+
 namespace robot::experimental::learn_descriptors {
 class DataParser {
    public:
@@ -53,12 +56,14 @@ class DataParser {
                 co_yield survey.loadImageByImageIndex(j);
             }
         }
-    }
+    }    
 
     DataParser(const std::filesystem::path &image_root_dir,
                const std::vector<std::string> &survey_list);
     ~DataParser();
 
+    Eigen::Affine3d get_T_world_camera(size_t survey_idx, size_t image_idx, bool use_gps = false, bool use_compass = false);    
+
     const symphony_lake_dataset::SurveyVector &get_surveys() const { return surveys_; };
     Generator<cv::Mat> create_img_generator() { return image_generator(surveys_); };
    private:
@@ -78,7 +83,7 @@ class SymphonyLakeDatasetTestHelper {
         cv::absdiff(img1, img2, diff);
         diff = diff.reshape(1);
         return cv::countNonZero(diff) == 0;
-    }
+    };
     static DataParser get_test_parser() { return DataParser(get_test_iamge_root_dir(), get_test_survey_list()); };
     static std::filesystem::path get_test_iamge_root_dir() { return std::filesystem::path(test_image_root_dir); };
     static std::vector<std::string> get_test_survey_list() {
diff --git a/experimental/learn_descriptors/symphony_lake_parser_test.cc b/experimental/learn_descriptors/symphony_lake_parser_test.cc
index e8169b8da..5ede85062 100644
--- a/experimental/learn_descriptors/symphony_lake_parser_test.cc
+++ b/experimental/learn_descriptors/symphony_lake_parser_test.cc
@@ -27,7 +27,10 @@ TEST(SymphonyLakeParserTest, snippet_140106) {
     printf("Press 'q' in graphic window to quit\n");
     for (int i = 0; i < static_cast<int>(survey_vector.getNumSurveys()); i++) {
         const symphony_lake_dataset::Survey &survey = survey_vector.get(i);
-        for (int j = 0; j < static_cast<int>(survey.getNumImages()); j++) {
+        for (int j = 0; j < static_cast<int>(survey.getNumImages()); j++) {     
+            // img_point contains all the csv data entries       
+            const symphony_lake_dataset::ImagePoint img_point = survey.getImagePoint(j);
+            (void)img_point; // suppress unused variable
             image = survey.loadImageByImageIndex(j);
 
             // get the target image

From aa4a77a74e8ee7e83befacc960a861b4ca80af2e Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Tue, 21 Jan 2025 17:38:03 -0800
Subject: [PATCH 04/45] Uploading progress

---
 experimental/learn_descriptors/BUILD          |   3 +-
 .../structure_from_motion.cc                  | 120 +++++++++++++++++-
 .../structure_from_motion.hh                  |  24 +++-
 .../structure_from_motion_test.cc             |  85 ++++++++++++-
 .../learn_descriptors/symphony_lake_parser.cc |   5 +-
 5 files changed, 223 insertions(+), 14 deletions(-)

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index 4c203ade2..a9d8e2224 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -24,7 +24,8 @@ cc_library(
   deps = [
     "@opencv//:opencv",
     "@gtsam//:gtsam",
-    "@eigen"
+    "@eigen",
+    "@nlohmann_json//:json",
   ]
 )
 
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index 76142ada2..87c82864b 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -1,6 +1,54 @@
 #include "experimental/learn_descriptors/structure_from_motion.hh"
+#include <filesystem>
+#include <sstream>
+
+namespace fs = std::filesystem;
 
 namespace robot::experimental::learn_descriptors {
+
+const Eigen::Affine3d StructureFromMotion::T_symlake_boat_cam = []() {
+    Eigen::Affine3d transform = Eigen::Affine3d::Identity();
+    transform.translate(Eigen::Vector3d::Zero());
+    transform.rotate(Eigen::Matrix3d(
+        Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(1,0,0)) *
+        Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0,1,0)))
+    );
+
+    // std::stringstream ss;
+    // ss << gtsam::Pose3(gtsam::Rot3(transform.rotation()), gtsam::Point3(transform.translation()));
+    // json json_obj;
+    // json_obj["T_symlake_boat_cam"] = ss.str();
+    // fs::path output_dir = "output";
+    // fs::path output_file = output_dir / "output_sfm.json";
+    // if (!fs::exists(output_dir)) {
+    //     fs::create_directory(output_dir);
+    // }
+    // std::ofstream file(output_file);
+    // if (file.is_open()) {
+    //     file << json_obj.dump(4);
+    //     file.close();
+    // }
+
+    return transform;
+}();
+
+std::string pose_to_string(gtsam::Pose3 pose) {
+    std::stringstream ss;
+    ss << pose;
+    return ss.str();
+}
+
+const gtsam::Pose3 StructureFromMotion::default_initial_pose = gtsam::Pose3(
+    gtsam::Rot3(T_symlake_boat_cam.rotation()), 
+    T_symlake_boat_cam.translation());
+// gtsam::Pose3(
+//     gtsam::Rot3(Eigen::Matrix3d(
+//         Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix() *
+//         Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 1, 0)).toRotationMatrix()
+//     )),
+//     gtsam::Point3::Identity()
+// );
+
 StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose,
                                Frontend::MatcherType frontend_matcher) {
     frontend_ = Frontend(frontend_extractor, frontend_matcher);
@@ -22,18 +70,46 @@ void StructureFromMotion::add_image(const cv::Mat &img) {
         Frontend::enforce_bijective_matches(matches);
 
         matches_.push_back(matches);
+        gtsam::Pose3 between_value = gtsam::Pose3::Identity();
         backend_.add_between_factor(
             gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()-1),
             gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()),
-            gtsam::Pose3::Identity());
+            between_value);        
+        gtsam::Pose3 T_cam_landmark(gtsam::Rot3::Identity(), gtsam::Point3(0,0,1));
+        gtsam::Pose3 T_world_cam1 = backend_.get_current_initial_values().at<gtsam::Pose3>(gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()-1));      
+        gtsam::Pose3 T_world_cam2 = backend_.get_current_initial_values().at<gtsam::Pose3>(gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()));      
+        int count = 0;
         for (const cv::DMatch match : matches) {
+            if (count < 1) {
+                std::cout << "hello???" << std::endl;
+                json json_obj;
+                json_obj["add_image_1"] = {
+                    {"T_world_cam1", pose_to_string(T_world_cam1)},
+                    {"T_world_cam2", pose_to_string(T_world_cam1)},
+                    {"T_world_lmkcam1", pose_to_string(T_world_cam1 * T_cam_landmark)},
+                    {"T_world_lmkcam2", pose_to_string(T_world_cam2 * T_cam_landmark)}
+                };
+                fs::path output_dir = "output";
+                fs::path output_file = output_dir / "output_sfm.json";
+                if (!fs::exists(output_dir)) {
+                    fs::create_directory(output_dir);
+                }
+                std::ofstream file(output_file);
+                if (file.is_open()) {
+                    file << json_obj.dump(4);
+                    file.close();
+                }
+            }
+            count++;
+            std::cout << "T_world_landmark" <<  T_world_cam1 * T_cam_landmark << std::endl;
             Backend::Landmark landmark_cam_1(
                 gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_),
                 gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()-1),
                 gtsam::Point2(
                     static_cast<double>(img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x),
                     static_cast<double>(img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y) 
-                )
+                ),
+                (T_world_cam1 * T_cam_landmark).translation()
             );
             Backend::Landmark landmark_cam_2(
                 gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_),
@@ -41,14 +117,19 @@ void StructureFromMotion::add_image(const cv::Mat &img) {
                 gtsam::Point2(
                     static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.x),
                     static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.y)
-                )
+                ),
+                (T_world_cam2 * T_cam_landmark).translation()
             );
-            landmark_count_++;
             landmarks_[get_num_images_added()-1].push_back(landmark_cam_1);
             landmarks_[get_num_images_added()].push_back(landmark_cam_2);
             backend_.add_landmark(landmark_cam_1);
             backend_.add_landmark(landmark_cam_2);
-        }        
+            landmark_count_++;
+        }      
+        std::cout << "number of landmarks: " << count << std::endl;  
+        std::cout << "number of landmarks added to graph: " << [this](){int count = 0; for (const auto &landmark : landmarks_){count += landmark.size();} return count; }() << std::endl;
+        backend_.get_current_initial_values().print("Current initial values: ");
+        std::cout << "landmark_count_: " << landmark_count_ << std::endl;
     }
     img_keypoints_and_descriptors_.push_back(keypoints_and_descriptors);
 }
@@ -257,4 +338,33 @@ void Backend::solve_graph() {
     gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_);
     result_ = optimizer.optimize();
 }
+
+// json SFM_Logger::gtsam_pose3_to_json(const gtsam::Pose3 &pose) {
+//     json json_obj;  
+//     json_obj = {
+//         {},
+
+//     };
+
+//     return json_obj;
+// }
+
+// void SFM_Logger::values_to_json(const gtsam::Values &values, const std::filesystem::path &output_path) {
+//     json json_obj;
+//     json_obj["add_image_1"] = {
+//         {"T_world_cam1", pose_to_string(T_world_cam1)},
+//         {"T_world_cam2", pose_to_string(T_world_cam1)},
+//         {"T_world_lmkcam1", pose_to_string(T_world_cam1 * T_cam_landmark)},
+//         {"T_world_lmkcam2", pose_to_string(T_world_cam2 * T_cam_landmark)}
+//     };
+//     if (!fs::exists(output_path)) {
+//         fs::create_directory(output_path);
+//     }
+//     std::ofstream file(output_file);
+//     if (file.is_open()) {
+//         file << json_obj.dump(4);
+//         file.close();
+//     }
+// }
+
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh
index 7c2c859aa..b2bbfdefa 100644
--- a/experimental/learn_descriptors/structure_from_motion.hh
+++ b/experimental/learn_descriptors/structure_from_motion.hh
@@ -18,6 +18,10 @@
 #include <unordered_map>
 #include <utility>
 
+#include "nlohmann/json.hpp"
+
+using json = nlohmann::json;
+
 namespace robot::experimental::learn_descriptors {
 class Frontend {
    public:
@@ -63,9 +67,9 @@ class Frontend {
 };
 class Backend {
     public:
-        static const char pose_symbol_char = 'x';
-        static const char landmark_symbol_char = 'l';
-        static const char camera_symbol_char = 'k';
+        static constexpr char pose_symbol_char = 'x';
+        static constexpr char landmark_symbol_char = 'l';
+        static constexpr char camera_symbol_char = 'k';
 
         struct Landmark {
             Landmark(const gtsam::Symbol &lmk_factor_symbol, const gtsam::Symbol &cam_pose_symbol, const gtsam::Point2 &projection, const gtsam::Point3 &initial_guess=gtsam::Point3::Identity()) 
@@ -101,7 +105,9 @@ class Backend {
 };
 class StructureFromMotion {
    public:
-    StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose= gtsam::Pose3::Identity(),
+    static const Eigen::Affine3d T_symlake_boat_cam;
+    static const gtsam::Pose3 default_initial_pose; 
+    StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose= default_initial_pose,
                    Frontend::MatcherType frontend_matcher = Frontend::MatcherType::KNN);
     ~StructureFromMotion(){};
     
@@ -113,13 +119,21 @@ class StructureFromMotion {
     Frontend get_frontend() { return frontend_; };
     Backend get_backend() { return backend_; }
     size_t get_num_images_added() { return img_keypoints_and_descriptors_.size(); };
+    size_t get_landmark_count() { return landmark_count_; };
    private:
     std::vector<std::pair<std::vector<cv::KeyPoint>, cv::Mat>> img_keypoints_and_descriptors_;
     std::vector<std::vector<cv::DMatch>> matches_;
     std::vector<std::vector<Backend::Landmark>> landmarks_;
-    size_t landmark_count_;
+    size_t landmark_count_ = 0;
 
     Frontend frontend_;
     Backend backend_;
 };
+// class SFM_Logger {
+//     public:
+//         // static const json gtsam_pose3_to_json(const gtsam::Pose3 &pose);
+//         static const json eigen_mat_to_json(const Eigen::Matrix &mat);
+//         static const json gtsam_pose3_to_json(const gtsam::Pose3 &pose);
+//         static const void values_to_json(const gtsam::Values &values, const std::filesystem::path &file);
+// };
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc
index b7d617b59..c1e1266ec 100644
--- a/experimental/learn_descriptors/structure_from_motion_test.cc
+++ b/experimental/learn_descriptors/structure_from_motion_test.cc
@@ -110,6 +110,85 @@ TEST(SFM_TEST, frontend_pipeline_sweep) {
         }
     }
 }
+
+TEST(SFM_TEST, frontend_snippet_two) {
+    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
+    const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
+    const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
+    const symphony_lake_dataset::ImagePoint image_point = survey.getImagePoint(180);
+    const cv::Mat image_1 = survey.loadImageByImageIndex(180);
+    const cv::Mat image_2 = survey.loadImageByImageIndex(185);
+
+    const size_t img_width = image_point.width, img_height = image_point.height;
+    const double fx = image_point.fx, fy = image_point.fy;        
+    const double cx = image_point.cx, cy = image_point.cy;    
+    gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
+
+    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K);  
+    Frontend frontend = sfm.get_frontend();
+
+    cv::Mat img_keypoints_out_1(img_height, img_width, CV_8UC3),
+        img_keypoints_out_2(img_height, img_width, CV_8UC3), img_matches_out(img_height, 2 * img_width, CV_8UC3);
+    cv::Mat img_display_test;
+
+    std::pair<std::vector<cv::KeyPoint>, cv::Mat>keypts_descriptors_1 = frontend.get_keypoints_and_descriptors(image_1);
+    std::pair<std::vector<cv::KeyPoint>, cv::Mat>keypts_descriptors_2 = frontend.get_keypoints_and_descriptors(image_2);
+    std::cout << "num of keypoints " << keypts_descriptors_1.first.size() << std::endl;
+    frontend.draw_keypoints(image_1, keypts_descriptors_1.first,
+                            img_keypoints_out_1);
+    frontend.draw_keypoints(image_2, keypts_descriptors_2.first,
+                            img_keypoints_out_2);
+    std::vector<cv::DMatch> matches = frontend.get_matches(keypts_descriptors_1.second, keypts_descriptors_2.second);
+    frontend.draw_matches(image_1, keypts_descriptors_1.first, image_2,
+                            keypts_descriptors_2.first, matches, img_matches_out);
+
+    cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test);
+    cv::vconcat(img_display_test, img_matches_out, img_display_test);
+    cv::Mat og_images;
+    cv::hconcat(image_1, image_2, og_images);
+    cv::vconcat(og_images, img_display_test, img_display_test);
+    std::cout << "og_images.cols: " << og_images.cols << ". img_display_test.cols" << img_display_test.cols << std::endl;
+    cv::imshow("Keypoints and Matches Output.", img_display_test);
+    std::cout << "Press spacebar to pause." << std::endl;
+    while (cv::waitKey(1000) != 32) {}
+}
+
+TEST(SFM_TEST, sfm_snippet_two) {
+    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
+    const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
+    const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
+    const symphony_lake_dataset::ImagePoint image_point = survey.getImagePoint(180);
+    const std::vector<cv::Mat> images = {
+        survey.loadImageByImageIndex(180),
+        survey.loadImageByImageIndex(185)
+    };
+
+    // const size_t img_width = image_point.width, img_height = image_point.height;
+    const double fx = image_point.fx, fy = image_point.fy;        
+    const double cx = image_point.cx, cy = image_point.cy;    
+    gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
+
+    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K);  
+
+    std::cout << "sfm.landmark_count_: " << sfm.get_landmark_count() << std::endl;
+
+    for (const cv::Mat &image : images) {
+        sfm.add_image(image);
+    }
+
+    // sfm.solve_structure();
+
+    // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test);
+    // cv::vconcat(img_display_test, img_matches_out, img_display_test);
+    // cv::Mat og_images;
+    // cv::hconcat(image_1, image_2, og_images);
+    // cv::vconcat(og_images, img_display_test, img_display_test);
+    // std::cout << "og_images.cols: " << og_images.cols << ". img_display_test.cols" << img_display_test.cols << std::endl;
+    // cv::imshow("Keypoints and Matches Output.", img_display_test);
+    // std::cout << "Press spacebar to pause." << std::endl;
+    // while (cv::waitKey(1000) != 32) {}    
+}
+
 TEST(SFM_TEST, structure_from_motion) {
     const size_t img_width = 640;
     const size_t img_height = 480;
@@ -128,7 +207,11 @@ TEST(SFM_TEST, structure_from_motion) {
     cv::Mat image;
     for (int i = 0; i < static_cast<int>(survey_vector.getNumSurveys()); i++) {
         const symphony_lake_dataset::Survey &survey = survey_vector.get(i);
-        for (int j = 0; j < static_cast<int>(survey.getNumImages()); j++) {
+        // for (int j = 0; j < static_cast<int>(survey.getNumImages()); j++) {
+        //     image = survey.loadImageByImageIndex(j);
+            // sfm.add_image(image);
+        // }
+        for (int j = 0; j < 2; j++) {
             image = survey.loadImageByImageIndex(j);
             sfm.add_image(image);
         }
diff --git a/experimental/learn_descriptors/symphony_lake_parser.cc b/experimental/learn_descriptors/symphony_lake_parser.cc
index c2c134273..5f210578b 100644
--- a/experimental/learn_descriptors/symphony_lake_parser.cc
+++ b/experimental/learn_descriptors/symphony_lake_parser.cc
@@ -28,8 +28,9 @@ Eigen::Affine3d DataParser::get_T_world_camera(size_t survey_idx, size_t img_idx
     Eigen::Matrix3d R_y(Eigen::AngleAxisd(img_point.tilt, Eigen::Vector3d::UnitY()));
     Eigen::Matrix3d R_x = Eigen::Matrix3d::Identity();
     Eigen::Matrix3d R_z(Eigen::AngleAxisd(yaw, Eigen::Vector3d::UnitZ()));
-    R = R_x * R_y * R_z;
-    T_world_camera = t * R;
+    R = R_x * R_y * R_z;    
+    T_world_camera.translate(t);
+    T_world_camera.rotate(R);    
     return T_world_camera;
 }
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file

From d14b504a4235ee5a938f0e92d9a7d51af5ef9fd0 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Thu, 23 Jan 2025 12:10:31 -0800
Subject: [PATCH 05/45] including matrix_to_proto as a dependency doesn't work
 for some reason

---
 experimental/learn_descriptors/BUILD          |  11 +-
 experimental/learn_descriptors/gtsam_test.cc  |  17 ++-
 experimental/learn_descriptors/opencv_viz.cc  | 119 +++++++++++++++
 .../learn_descriptors/opencv_viz_example.cc   |  83 +++++++++++
 .../structure_from_motion.cc                  |  12 +-
 .../structure_from_motion_test.cc             | 139 +++++++++++-------
 6 files changed, 322 insertions(+), 59 deletions(-)
 create mode 100644 experimental/learn_descriptors/opencv_viz.cc
 create mode 100644 experimental/learn_descriptors/opencv_viz_example.cc

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index a9d8e2224..83a43f8d1 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -55,7 +55,8 @@ cc_test(
   deps = [
     "@com_google_googletest//:gtest_main",
     ":symphony_lake_parser",
-    ":structure_from_motion"
+    ":structure_from_motion",
+    "//common/math:matrix_to_proto"
   ]
 )
 
@@ -80,3 +81,11 @@ cc_test(
     ":learn_descriptors"  
   ]
 )
+
+cc_binary(
+  name = "opencv_viz_example",
+  srcs = ["opencv_viz_example.cc"],
+  deps = [
+    "@opencv//:opencv"
+  ]
+)
diff --git a/experimental/learn_descriptors/gtsam_test.cc b/experimental/learn_descriptors/gtsam_test.cc
index 53ee67b49..82ac8bf6a 100644
--- a/experimental/learn_descriptors/gtsam_test.cc
+++ b/experimental/learn_descriptors/gtsam_test.cc
@@ -87,7 +87,7 @@ TEST(GtsamTesting, gtsam_simple_cube) {
     gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial);
     gtsam::Values result = optimizer.optimize();
 
-    // result.print("Optimized Results:\n");
+    result.print("Optimized Results:\n");
 
     constexpr double TOL = 1e-3;
     for (size_t i = 0; i < poses.size(); i++) {
@@ -103,5 +103,20 @@ TEST(GtsamTesting, gtsam_simple_cube) {
         EXPECT_NEAR(cube_W[i][1], result_point[1], TOL);
         EXPECT_NEAR(cube_W[i][2], result_point[2], TOL);
     }
+
+    // // Setup
+    // const Eigen::Matrix3d in{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}};
+
+    // // Action
+    // proto::Matrix proto;
+    // pack_into(in, &proto);
+    // const Eigen::Matrix3d out = unpack_from<Eigen::Matrix3d>(proto);
+
+    // // Verification
+    // EXPECT_EQ(in.rows(), out.rows());
+    // EXPECT_EQ(in.cols(), out.cols());
+    // for (int i = 0; i < in.rows(); i++) {
+    //     EXPECT_EQ(in(i), out(i));
+    // }
 }
 }  // namespace robot::experimental::gtsam_testing
\ No newline at end of file
diff --git a/experimental/learn_descriptors/opencv_viz.cc b/experimental/learn_descriptors/opencv_viz.cc
new file mode 100644
index 000000000..f61dd508d
--- /dev/null
+++ b/experimental/learn_descriptors/opencv_viz.cc
@@ -0,0 +1,119 @@
+#include <vector>
+
+#include "eigen3/Eigen/Core"
+#include "opencv2/viz.hpp"
+
+namespace opencv_viz {
+template <typename Derived>
+cv::Mat eigen_to_cv_mat(const Eigen::MatrixBase<Derived>& eigenMatrix) {
+    int cvType;
+    if constexpr (std::is_same_v<typename Derived::Scalar, double>) {
+        cvType = CV_64F; // 64-bit floating point
+    } else if constexpr (std::is_same_v<typename Derived::Scalar, float>) {
+        cvType = CV_32F; // 32-bit floating point
+    } else if constexpr (std::is_same_v<typename Derived::Scalar, int>) {
+        cvType = CV_32S; // 32-bit signed integer
+    } else {
+        static_assert(!std::is_same_v<Derived, Derived>, "Unsupported data type in Eigen matrix");
+    }
+    return cv::Mat(eigenMatrix.rows(), eigenMatrix.cols(), cvType, const_cast<void*>(static_cast<const void*>(eigenMatrix.derived().data()))).clone();
+}
+cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) {
+    // Ensure R is a valid rotation matrix
+    CV_Assert(cv::determinant(R) > 0.999 && cv::determinant(R) < 1.001);
+    
+    double trace = R(0, 0) + R(1, 1) + R(2, 2);
+    double theta = std::acos((trace - 1) / 2.0);
+
+    if (std::abs(theta) < 1e-6) {
+        return cv::Vec3d(0, 0, 0);
+    }
+
+    double sinTheta = std::sin(theta);
+    cv::Vec3d axis(
+        (R(2, 1) - R(1, 2)) / (2.0 * sinTheta),
+        (R(0, 2) - R(2, 0)) / (2.0 * sinTheta),
+        (R(1, 0) - R(0, 1)) / (2.0 * sinTheta)
+    );
+
+    return axis * theta;
+}
+void viz_scene(const std::vector<Eigen::MatrixXd> &poses, const std::vector<Eigen::Vector3d> &points) {
+    cv::viz::Viz3d window("Viz Scene");
+
+    std::vector<cv::Affine3d> cv_poses;
+    constexpr double pose_size = 0.2;    
+    for (unsigned int i = 0; i < poses.size(); i++) {
+        const Eigen::MatrixXd &pose = poses[i];
+        cv::Affine3d cv_pose(
+            eigen_to_cv_mat(pose.block(0,0,3,3)) ,
+            cv::Affine3d::Vec3{pose(0,3), pose(1,3), pose(2,3)}            
+        );
+        window.showWidget(
+            "pose_" + std::to_string(i), 
+            cv::viz::WCoordinateSystem(pose_size),
+            cv_pose
+            );
+    }
+    constexpr double point_radius = 0.2;
+    constexpr int sphere_res = 10;
+    const cv::viz::Color point_color = cv::viz::Color::celestial_blue();
+    for (unsigned int i = 0; i < points.size(); i++) {
+        const Eigen::Vector3d &point = points[i];
+        window.showWidget(
+            "translation_" + std::to_string(i),
+            cv::viz::WSphere(
+                cv::Point3d(point[0], point[1], point[2]),
+                point_radius,
+                sphere_res,
+                point_color
+            )
+        );
+    }
+
+    window.showWidget("world_frame", cv::viz::WCoordinateSystem());
+
+    constexpr unsigned int num_cells = 6;
+    constexpr double units = 1.0;
+    const cv::viz::Color color = cv::viz::Color::white();
+    const cv::Vec2i cells = cv::Vec2i::all(num_cells);
+    const cv::Vec2d cell_sizes = cv::Vec2d::all(units);
+    window.showWidget("xy_grid", cv::viz::WGrid(cells, cell_sizes, color));
+    window.showWidget("yz_grid", cv::viz::WGrid(cv::Point3d(0.,0.,0.), cv::Point3d(1,0,0), cv::Point3d(0,0,1), cells, cell_sizes, color));
+    window.showWidget("xz_grid", cv::viz::WGrid(cv::Point3d(0.,0.,0.), cv::Point3d(0,1,0), cv::Point3d(0,0,1), cells, cell_sizes, color));
+    window.showWidget("text_overlay", cv::viz::WText("hello world overlay!", cv::Point(20, 50)));
+    
+    constexpr bool ALWAYS_FACE_CAMERA = true;
+    constexpr double TEXT_SCALE = 0.1;
+    window.showWidget("text_3d", cv::viz::WText3D("hello world 3d!", cv::Point3d(0.1, 0.2, 0.3),
+                                                  TEXT_SCALE, ALWAYS_FACE_CAMERA));
+
+    constexpr bool FIXED_TEXT = false;
+    cv::Affine3d world_from_fixed_text(
+        cv::Affine3d::Vec3{M_PI_2, 0.0, 0.0}, // rotation
+        {0.2, 0.4, 0.6} // translation
+    );
+
+    window.showWidget(
+        "text_3d_fixed",
+        cv::viz::WText3D("hello world fixed!", cv::Point3d(0.0, 0.0, 0.0), TEXT_SCALE, FIXED_TEXT),
+        world_from_fixed_text
+    );
+    constexpr double COORD_SCALE = 0.2;
+    window.showWidget("text_3d_fixed_frame", cv::viz::WCoordinateSystem(COORD_SCALE), world_from_fixed_text);
+
+    constexpr double CIRCLE_RADIUS_M = 0.5;
+    cv::Affine3d world_from_circle(
+        cv::Affine3d::Vec3{0.0, 0.0, 0.0}, // rotation
+        {0.0, 0.0, 1.5} // translation
+    );
+    window.showWidget(
+        "circle",
+        cv::viz::WCircle(CIRCLE_RADIUS_M),
+        world_from_circle
+    );
+
+    window.spin();
+}
+}
+
diff --git a/experimental/learn_descriptors/opencv_viz_example.cc b/experimental/learn_descriptors/opencv_viz_example.cc
new file mode 100644
index 000000000..c37c3ba4f
--- /dev/null
+++ b/experimental/learn_descriptors/opencv_viz_example.cc
@@ -0,0 +1,83 @@
+#include "opencv2/viz.hpp"
+#include "opencv_viz.cc"
+#include "eigen/Eigen/Geometry"
+
+void demo() {
+ cv::viz::Viz3d window("My Window");
+
+    window.showWidget("world_frame", cv::viz::WCoordinateSystem());
+
+    constexpr unsigned int num_cells = 6;
+    constexpr double units = 1.0;
+    const cv::viz::Color color = cv::viz::Color::white();
+    const cv::Vec2i cells = cv::Vec2i::all(num_cells);
+    const cv::Vec2d cell_sizes = cv::Vec2d::all(units);
+    window.showWidget("xy_grid", cv::viz::WGrid(cells, cell_sizes, color));
+    window.showWidget("yz_grid", cv::viz::WGrid(cv::Point3d(0.,0.,0.), cv::Point3d(1,0,0), cv::Point3d(0,0,1), cells, cell_sizes, color));
+    window.showWidget("xz_grid", cv::viz::WGrid(cv::Point3d(0.,0.,0.), cv::Point3d(0,1,0), cv::Point3d(0,0,1), cells, cell_sizes, color));
+    window.showWidget("text_overlay", cv::viz::WText("hello world overlay!", cv::Point(20, 50)));
+    constexpr bool ALWAYS_FACE_CAMERA = true;
+    constexpr double TEXT_SCALE = 0.1;
+    window.showWidget("text_3d", cv::viz::WText3D("hello world 3d!", cv::Point3d(0.1, 0.2, 0.3),
+                                                  TEXT_SCALE, ALWAYS_FACE_CAMERA));
+
+    constexpr bool FIXED_TEXT = false;
+    cv::Affine3d world_from_fixed_text(
+        cv::Affine3d::Vec3{M_PI_2, 0.0, 0.0}, // rotation
+        {0.2, 0.4, 0.6} // translation
+    );
+
+    window.showWidget(
+        "text_3d_fixed",
+        cv::viz::WText3D("hello world fixed!", cv::Point3d(0.0, 0.0, 0.0), TEXT_SCALE, FIXED_TEXT),
+        world_from_fixed_text
+    );
+    constexpr double COORD_SCALE = 0.2;
+    window.showWidget("text_3d_fixed_frame", cv::viz::WCoordinateSystem(COORD_SCALE), world_from_fixed_text);
+
+    constexpr double CIRCLE_RADIUS_M = 0.5;
+    cv::Affine3d world_from_circle(
+        cv::Affine3d::Vec3{0.0, 0.0, 0.0}, // rotation
+        {0.0, 0.0, 1.5} // translation
+    );
+    window.showWidget(
+        "circle",
+        cv::viz::WCircle(CIRCLE_RADIUS_M),
+        world_from_circle
+    );
+
+    window.spin();
+}
+
+void demo_cube() {
+    std::vector<Eigen::Vector3d> cube_W;
+    float cube_size = 1.0f;
+    cube_W.push_back(Eigen::Vector3d(0, 0, 0));
+    cube_W.push_back(Eigen::Vector3d(cube_size, 0, 0));
+    cube_W.push_back(Eigen::Vector3d(cube_size, cube_size, 0));
+    cube_W.push_back(Eigen::Vector3d(0, cube_size, 0));
+    cube_W.push_back(Eigen::Vector3d(0, 0, cube_size));
+    cube_W.push_back(Eigen::Vector3d(cube_size, 0, cube_size));
+    cube_W.push_back(Eigen::Vector3d(cube_size, cube_size, cube_size));
+    cube_W.push_back(Eigen::Vector3d(0, cube_size, cube_size));
+
+    std::vector<cv::Affine3d> poses;
+
+    Eigen::Matrix3d rotation0(
+        Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+        Eigen::AngleAxis(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
+    cv::Affine3d pose0(opencv_viz::eigen_to_cv_mat(rotation0), cv::Affine3d::Vec3{4, 0, 0});
+
+    // Eigen::Matrix3d
+    // cv::Affine3d pose1(
+    //     opencv_viz::eigen_to_cv_mat(Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * rotation0),
+    //     Eigen::Vector3d(0, 4, 0));
+    // gtsam::PinholeCamera<gtsam::Cal3_S2> camera1(pose1, *K);
+    // graph.emplace_shared<gtsam::PriorFactor<cv::Affine3d>>(gtsam::Symbol('x', 1), pose1, poseNoise);
+    // poses.push_back(pose1);
+    // cameras.push_back(camera1);
+}
+
+int main() {
+    demo();
+}
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index 87c82864b..d19634ef1 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -81,7 +81,7 @@ void StructureFromMotion::add_image(const cv::Mat &img) {
         int count = 0;
         for (const cv::DMatch match : matches) {
             if (count < 1) {
-                std::cout << "hello???" << std::endl;
+                // std::cout << "hello???" << std::endl;
                 json json_obj;
                 json_obj["add_image_1"] = {
                     {"T_world_cam1", pose_to_string(T_world_cam1)},
@@ -101,7 +101,7 @@ void StructureFromMotion::add_image(const cv::Mat &img) {
                 }
             }
             count++;
-            std::cout << "T_world_landmark" <<  T_world_cam1 * T_cam_landmark << std::endl;
+            // std::cout << "T_world_landmark" <<  T_world_cam1 * T_cam_landmark << std::endl;
             Backend::Landmark landmark_cam_1(
                 gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_),
                 gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()-1),
@@ -126,10 +126,10 @@ void StructureFromMotion::add_image(const cv::Mat &img) {
             backend_.add_landmark(landmark_cam_2);
             landmark_count_++;
         }      
-        std::cout << "number of landmarks: " << count << std::endl;  
-        std::cout << "number of landmarks added to graph: " << [this](){int count = 0; for (const auto &landmark : landmarks_){count += landmark.size();} return count; }() << std::endl;
-        backend_.get_current_initial_values().print("Current initial values: ");
-        std::cout << "landmark_count_: " << landmark_count_ << std::endl;
+        // std::cout << "number of landmarks: " << count << std::endl;  
+        // std::cout << "number of landmarks added to graph: " << [this](){int count = 0; for (const auto &landmark : landmarks_){count += landmark.size();} return count; }() << std::endl;
+        // backend_.get_current_initial_values().print("Current initial values: ");
+        // std::cout << "landmark_count_: " << landmark_count_ << std::endl;
     }
     img_keypoints_and_descriptors_.push_back(keypoints_and_descriptors);
 }
diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc
index c1e1266ec..20a034e0d 100644
--- a/experimental/learn_descriptors/structure_from_motion_test.cc
+++ b/experimental/learn_descriptors/structure_from_motion_test.cc
@@ -1,5 +1,6 @@
 #include "experimental/learn_descriptors/structure_from_motion.hh"
 #include "experimental/learn_descriptors/symphony_lake_parser.hh"
+#include "common/math/matrix_to_proto.hh"
 
 #include <iostream>
 #include <sstream>
@@ -142,15 +143,15 @@ TEST(SFM_TEST, frontend_snippet_two) {
     frontend.draw_matches(image_1, keypts_descriptors_1.first, image_2,
                             keypts_descriptors_2.first, matches, img_matches_out);
 
-    cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test);
-    cv::vconcat(img_display_test, img_matches_out, img_display_test);
-    cv::Mat og_images;
-    cv::hconcat(image_1, image_2, og_images);
-    cv::vconcat(og_images, img_display_test, img_display_test);
-    std::cout << "og_images.cols: " << og_images.cols << ". img_display_test.cols" << img_display_test.cols << std::endl;
-    cv::imshow("Keypoints and Matches Output.", img_display_test);
-    std::cout << "Press spacebar to pause." << std::endl;
-    while (cv::waitKey(1000) != 32) {}
+    // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test);
+    // cv::vconcat(img_display_test, img_matches_out, img_display_test);
+    // cv::Mat og_images;
+    // cv::hconcat(image_1, image_2, og_images);
+    // cv::vconcat(og_images, img_display_test, img_display_test);
+    // std::cout << "og_images.cols: " << og_images.cols << ". img_display_test.cols" << img_display_test.cols << std::endl;
+    // cv::imshow("Keypoints and Matches Output.", img_display_test);
+    // std::cout << "Press spacebar to pause." << std::endl;
+    // while (cv::waitKey(1000) != 32) {}
 }
 
 TEST(SFM_TEST, sfm_snippet_two) {
@@ -176,6 +177,42 @@ TEST(SFM_TEST, sfm_snippet_two) {
         sfm.add_image(image);
     }
 
+    gtsam::Values initial_values = sfm.get_backend().get_current_initial_values();
+    std::vector<Eigen::MatrixXd> poses;
+    std::vector<proto::Matrix> protos;
+    for (size_t i = 0; i < images.size(); i++) {
+        gtsam::Pose3 pose = initial_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i));
+        Eigen::MatrixXd eigen_affine(4,4);
+        gtsam::Matrix3 rot_mat = pose.rotation().matrix();
+        gtsam::Point3 translation = pose.translation();
+        for (int j = 0; j < 3; j++) {
+            for (int k = 0; k < 3; k++) {
+                eigen_affine(j,k) = rot_mat(j,k);
+            }
+            eigen_affine(3,j) = translation(j);
+        }
+        eigen_affine(3,3) = 1;
+        poses.push_back(eigen_affine);
+        protos.emplace_shared(proto::Matrix());
+        pack_into(eigen_affine, &protos.back());
+    }
+    std::cout << poses.front() << std::endl;
+    
+
+    // const Eigen::Vector3d in(1.0, 2.0, 3.0);
+    // // Action
+
+    // // proto::Matrix proto;
+    // // pack_into(in, &proto);
+    // // const Eigen::Vector3d out = unpack_from<Eigen::Vector3d>(proto);
+
+    // // const Eigen::VectorXd in{{1.0, 2.0, 3.0}};
+
+    // // // Action
+    // // proto::Matrix proto;
+    // // pack_into(in, &proto);
+    // // const Eigen::VectorXd out = unpack_from<Eigen::VectorXd>(proto);
+
     // sfm.solve_structure();
 
     // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test);
@@ -189,50 +226,50 @@ TEST(SFM_TEST, sfm_snippet_two) {
     // while (cv::waitKey(1000) != 32) {}    
 }
 
-TEST(SFM_TEST, structure_from_motion) {
-    const size_t img_width = 640;
-    const size_t img_height = 480;
-    const double fx = 500.0;
-    const double fy = fx;
-    const double cx = img_width / 2.0;
-    const double cy = img_height / 2.0;
+// TEST(SFM_TEST, structure_from_motion) {
+//     const size_t img_width = 640;
+//     const size_t img_height = 480;
+//     const double fx = 500.0;
+//     const double fy = fx;
+//     const double cx = img_width / 2.0;
+//     const double cy = img_height / 2.0;
 
-    gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
+//     gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
 
-    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K);     
-    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
-    // DataParser::Generator<cv::Mat> generator = data_parser.create_img_generator();
-    const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
+//     StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K);     
+//     DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
+//     // DataParser::Generator<cv::Mat> generator = data_parser.create_img_generator();
+//     const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
 
-    cv::Mat image;
-    for (int i = 0; i < static_cast<int>(survey_vector.getNumSurveys()); i++) {
-        const symphony_lake_dataset::Survey &survey = survey_vector.get(i);
-        // for (int j = 0; j < static_cast<int>(survey.getNumImages()); j++) {
-        //     image = survey.loadImageByImageIndex(j);
-            // sfm.add_image(image);
-        // }
-        for (int j = 0; j < 2; j++) {
-            image = survey.loadImageByImageIndex(j);
-            sfm.add_image(image);
-        }
-    }
-    // DataParser::Generator<cv::Mat>::iterator img_itr = generator.begin();
-    // while (img_itr != generator.end()) {
-    //     if ((*img_itr).empty()) {
-    //         continue;
-    //     }
-    //     std::cout << "ooga booga" << std::endl;
-    //     std::cout << "image is empty: " << (*img_itr).empty() << std::endl;
-    //     cv::imshow("ooga", *img_itr);
-    //     cv::waitKey(1000);
-    //     sfm.add_image(*img_itr);
-    //     ++img_itr;
-    // }
-    sfm.solve_structure();
-    // gtsam::Values output = sfm.get_structure_result();    
-    // output.print("result values: ");
-    // for (size_t i = 0; i < sfm.get_num_images_added(); i++) {
+//     cv::Mat image;
+//     for (int i = 0; i < static_cast<int>(survey_vector.getNumSurveys()); i++) {
+//         const symphony_lake_dataset::Survey &survey = survey_vector.get(i);
+//         // for (int j = 0; j < static_cast<int>(survey.getNumImages()); j++) {
+//         //     image = survey.loadImageByImageIndex(j);
+//             // sfm.add_image(image);
+//         // }
+//         for (int j = 0; j < 2; j++) {
+//             image = survey.loadImageByImageIndex(j);
+//             sfm.add_image(image);
+//         }
+//     }
+//     // DataParser::Generator<cv::Mat>::iterator img_itr = generator.begin();
+//     // while (img_itr != generator.end()) {
+//     //     if ((*img_itr).empty()) {
+//     //         continue;
+//     //     }
+//     //     std::cout << "ooga booga" << std::endl;
+//     //     std::cout << "image is empty: " << (*img_itr).empty() << std::endl;
+//     //     cv::imshow("ooga", *img_itr);
+//     //     cv::waitKey(1000);
+//     //     sfm.add_image(*img_itr);
+//     //     ++img_itr;
+//     // }
+//     sfm.solve_structure();
+//     // gtsam::Values output = sfm.get_structure_result();    
+//     // output.print("result values: ");
+//     // for (size_t i = 0; i < sfm.get_num_images_added(); i++) {
         
-    // }
-}
+//     // }
+// }
 }  // namespace robot::experimental::learn_descriptors

From da30081faaa1cc33193310c4b25b793d0d3f0305 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Wed, 5 Feb 2025 15:03:16 -0500
Subject: [PATCH 06/45] progress

---
 common/geometry/BUILD                         |  28 +++
 common/geometry/opencv_viz.cc                 |  79 +++++++
 common/geometry/opencv_viz.hh                 |  11 +
 common/geometry/opencv_viz_example.cc         |  46 ++++
 common/geometry/opencv_viz_test.cc            |  89 ++++++++
 experimental/learn_descriptors/BUILD          |   3 +-
 experimental/learn_descriptors/gtsam_test.cc  |  94 +++++++-
 experimental/learn_descriptors/opencv_viz.cc  |  84 +++----
 .../learn_descriptors/opencv_viz_example.cc   |  45 ++--
 .../structure_from_motion.cc                  | 176 ++++++++-------
 .../structure_from_motion.hh                  | 120 +++++-----
 .../structure_from_motion_test.cc             | 210 ++++++++++++------
 .../learn_descriptors/symphony_lake_parser.cc |  18 +-
 .../learn_descriptors/symphony_lake_parser.hh |  45 ++--
 .../symphony_lake_parser_test.cc              |  13 +-
 15 files changed, 742 insertions(+), 319 deletions(-)
 create mode 100644 common/geometry/opencv_viz.cc
 create mode 100644 common/geometry/opencv_viz.hh
 create mode 100644 common/geometry/opencv_viz_example.cc
 create mode 100644 common/geometry/opencv_viz_test.cc

diff --git a/common/geometry/BUILD b/common/geometry/BUILD
index 8468ebe13..0a28b1e20 100644
--- a/common/geometry/BUILD
+++ b/common/geometry/BUILD
@@ -18,3 +18,31 @@ cc_test(
     "@com_google_googletest//:gtest_main",
   ]
 )
+
+cc_library(
+  name = "opencv_viz",
+  hdrs = ["opencv_viz.hh"],
+  srcs = ["opencv_viz.cc"],
+  visibility = ["//visibility:public"],
+  deps = [
+    "@eigen",
+    "@opencv//:opencv"
+  ]
+)
+
+cc_test(
+  name = "opencv_viz_test",
+  srcs = ["opencv_viz_test.cc"],
+  deps = [
+    ":opencv_viz",
+    "@com_google_googletest//:gtest_main",
+  ]
+)
+
+cc_binary(
+  name = "opencv_viz_example",
+  srcs = ["opencv_viz_example.cc"],
+  deps = [
+    "@opencv//:opencv"
+  ]
+)
diff --git a/common/geometry/opencv_viz.cc b/common/geometry/opencv_viz.cc
new file mode 100644
index 000000000..48675e239
--- /dev/null
+++ b/common/geometry/opencv_viz.cc
@@ -0,0 +1,79 @@
+#include "common/geometry/opencv_viz.hh"
+
+#include <iostream>
+
+namespace robot::geometry::opencv_viz {
+template <typename Derived>
+cv::Mat eigen_to_cv_mat(const Eigen::MatrixBase<Derived> &eigenMatrix) {
+    int cvType;
+    if constexpr (std::is_same_v<typename Derived::Scalar, double>) {
+        cvType = CV_64F;
+    } else if constexpr (std::is_same_v<typename Derived::Scalar, float>) {
+        cvType = CV_32F;
+    } else if constexpr (std::is_same_v<typename Derived::Scalar, int>) {
+        cvType = CV_32S;
+    } else {
+        static_assert(!std::is_same_v<Derived, Derived>, "Unsupported data type in Eigen matrix");
+    }
+    return cv::Mat(eigenMatrix.rows(), eigenMatrix.cols(), cvType,
+                   const_cast<void *>(static_cast<const void *>(eigenMatrix.derived().data())))
+        .clone();
+}
+cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) {
+    // Ensure R is a valid rotation matrix
+    CV_Assert(cv::determinant(R) > 0.999 && cv::determinant(R) < 1.001);
+
+    double trace = R(0, 0) + R(1, 1) + R(2, 2);
+    double theta = std::acos((trace - 1) / 2.0);
+
+    if (std::abs(theta) < 1e-6) {
+        return cv::Vec3d(0, 0, 0);
+    }
+
+    double sinTheta = std::sin(theta);
+    cv::Vec3d axis((R(2, 1) - R(1, 2)) / (2.0 * sinTheta), (R(0, 2) - R(2, 0)) / (2.0 * sinTheta),
+                   (R(1, 0) - R(0, 1)) / (2.0 * sinTheta));
+
+    return axis * theta;
+}
+
+void viz_scene(const std::vector<Eigen::Isometry3d> &poses,
+               const std::vector<Eigen::Vector3d> &points, const bool show_grid) {
+    cv::viz::Viz3d window("Viz Scene");
+
+    constexpr double pose_size = .5;
+    for (unsigned int i = 0; i < poses.size(); i++) {
+        cv::Affine3d cv_pose(eigen_to_cv_mat(Eigen::Matrix4d(poses[i].matrix().transpose())));
+        window.showWidget("rigid_transform_" + std::to_string(i),
+                          cv::viz::WCoordinateSystem(pose_size), cv_pose);
+    }
+    constexpr double point_radius = 0.08;
+    constexpr int sphere_res = 10;
+    const cv::viz::Color point_color = cv::viz::Color::celestial_blue();
+    for (unsigned int i = 0; i < points.size(); i++) {
+        const Eigen::Vector3d &point = points[i];
+        window.showWidget("point_" + std::to_string(i),
+                          cv::viz::WSphere(cv::Point3d(point[0], point[1], point[2]), point_radius,
+                                           sphere_res, point_color));
+    }
+
+    window.showWidget("world_frame", cv::viz::WCoordinateSystem());
+
+    if (show_grid) {
+        constexpr unsigned int num_cells = 6;
+        constexpr double units = 1.0;
+        const cv::viz::Color color = cv::viz::Color::gray();
+        const cv::Vec2i cells = cv::Vec2i::all(num_cells);
+        const cv::Vec2d cell_sizes = cv::Vec2d::all(units);
+        window.showWidget("xy_grid", cv::viz::WGrid(cells, cell_sizes, color));
+        window.showWidget("yz_grid",
+                          cv::viz::WGrid(cv::Point3d(0., 0., 0.), cv::Point3d(1, 0, 0),
+                                         cv::Point3d(0, 0, 1), cells, cell_sizes, color));
+        window.showWidget("xz_grid",
+                          cv::viz::WGrid(cv::Point3d(0., 0., 0.), cv::Point3d(0, 1, 0),
+                                         cv::Point3d(0, 0, 1), cells, cell_sizes, color));
+    }
+
+    window.spin();
+}
+}  // namespace robot::geometry::opencv_viz
diff --git a/common/geometry/opencv_viz.hh b/common/geometry/opencv_viz.hh
new file mode 100644
index 000000000..d5530ecef
--- /dev/null
+++ b/common/geometry/opencv_viz.hh
@@ -0,0 +1,11 @@
+#pragma once
+#include <vector>
+
+#include "Eigen/Core"
+#include "Eigen/Geometry"
+#include "opencv2/viz.hpp"
+
+namespace robot::geometry::opencv_viz {
+void viz_scene(const std::vector<Eigen::Isometry3d> &poses,
+               const std::vector<Eigen::Vector3d> &points, const bool show_grid = true);
+}
\ No newline at end of file
diff --git a/common/geometry/opencv_viz_example.cc b/common/geometry/opencv_viz_example.cc
new file mode 100644
index 000000000..6a045b0bc
--- /dev/null
+++ b/common/geometry/opencv_viz_example.cc
@@ -0,0 +1,46 @@
+#include "opencv2/viz.hpp"
+
+void demo() {
+    cv::viz::Viz3d window("My Window");
+
+    window.showWidget("world_frame", cv::viz::WCoordinateSystem());
+
+    constexpr unsigned int num_cells = 6;
+    constexpr double units = 1.0;
+    const cv::viz::Color color = cv::viz::Color::white();
+    const cv::Vec2i cells = cv::Vec2i::all(num_cells);
+    const cv::Vec2d cell_sizes = cv::Vec2d::all(units);
+    window.showWidget("xy_grid", cv::viz::WGrid(cells, cell_sizes, color));
+    window.showWidget("yz_grid", cv::viz::WGrid(cv::Point3d(0., 0., 0.), cv::Point3d(1, 0, 0),
+                                                cv::Point3d(0, 0, 1), cells, cell_sizes, color));
+    window.showWidget("xz_grid", cv::viz::WGrid(cv::Point3d(0., 0., 0.), cv::Point3d(0, 1, 0),
+                                                cv::Point3d(0, 0, 1), cells, cell_sizes, color));
+    window.showWidget("text_overlay", cv::viz::WText("hello world overlay!", cv::Point(20, 50)));
+    constexpr bool ALWAYS_FACE_CAMERA = true;
+    constexpr double TEXT_SCALE = 0.1;
+    window.showWidget("text_3d", cv::viz::WText3D("hello world 3d!", cv::Point3d(0.1, 0.2, 0.3),
+                                                  TEXT_SCALE, ALWAYS_FACE_CAMERA));
+
+    constexpr bool FIXED_TEXT = false;
+    cv::Affine3d world_from_fixed_text(cv::Affine3d::Vec3{M_PI_2, 0.0, 0.0},  // rotation
+                                       {0.2, 0.4, 0.6}                        // translation
+    );
+
+    window.showWidget(
+        "text_3d_fixed",
+        cv::viz::WText3D("hello world fixed!", cv::Point3d(0.0, 0.0, 0.0), TEXT_SCALE, FIXED_TEXT),
+        world_from_fixed_text);
+    constexpr double COORD_SCALE = 0.2;
+    window.showWidget("text_3d_fixed_frame", cv::viz::WCoordinateSystem(COORD_SCALE),
+                      world_from_fixed_text);
+
+    constexpr double CIRCLE_RADIUS_M = 0.5;
+    cv::Affine3d world_from_circle(cv::Affine3d::Vec3{0.0, 0.0, 0.0},  // rotation
+                                   {0.0, 0.0, 1.5}                     // translation
+    );
+    window.showWidget("circle", cv::viz::WCircle(CIRCLE_RADIUS_M), world_from_circle);
+
+    window.spin();
+}
+
+int main() { demo(); }
diff --git a/common/geometry/opencv_viz_test.cc b/common/geometry/opencv_viz_test.cc
new file mode 100644
index 000000000..e5e8265af
--- /dev/null
+++ b/common/geometry/opencv_viz_test.cc
@@ -0,0 +1,89 @@
+#include "common/geometry/opencv_viz.hh"
+
+#include "gtest/gtest.h"
+
+namespace robot::geometry::opencv_viz {
+namespace {
+bool is_test() {
+    return std::getenv("BAZEL_TEST") != nullptr &&
+           std::getenv("BUILD_WORKSPACE_DIRECTORY") == nullptr;
+}
+}  // namespace
+
+TEST(OpencvVizTest, demo) {
+    cv::viz::Viz3d window("My Window");
+
+    window.showWidget("world_frame", cv::viz::WCoordinateSystem());
+
+    constexpr unsigned int num_cells = 6;
+    constexpr double units = 1.0;
+    const cv::viz::Color color = cv::viz::Color::white();
+    const cv::Vec2i cells = cv::Vec2i::all(num_cells);
+    const cv::Vec2d cell_sizes = cv::Vec2d::all(units);
+    window.showWidget("xy_grid", cv::viz::WGrid(cells, cell_sizes, color));
+    window.showWidget("yz_grid", cv::viz::WGrid(cv::Point3d(0., 0., 0.), cv::Point3d(1, 0, 0),
+                                                cv::Point3d(0, 0, 1), cells, cell_sizes, color));
+    window.showWidget("xz_grid", cv::viz::WGrid(cv::Point3d(0., 0., 0.), cv::Point3d(0, 1, 0),
+                                                cv::Point3d(0, 0, 1), cells, cell_sizes, color));
+    window.showWidget("text_overlay", cv::viz::WText("hello world overlay!", cv::Point(20, 50)));
+    constexpr bool ALWAYS_FACE_CAMERA = true;
+    constexpr double TEXT_SCALE = 0.1;
+    window.showWidget("text_3d", cv::viz::WText3D("hello world 3d!", cv::Point3d(0.1, 0.2, 0.3),
+                                                  TEXT_SCALE, ALWAYS_FACE_CAMERA));
+
+    constexpr bool FIXED_TEXT = false;
+    cv::Affine3d world_to_fixed_text(cv::Affine3d::Vec3{M_PI_2, 0.0, 0.0},  // rotation
+                                     {0.2, 0.4, 0.6}                        // translation
+    );
+
+    window.showWidget(
+        "text_3d_fixed",
+        cv::viz::WText3D("hello world fixed!", cv::Point3d(0.0, 0.0, 0.0), TEXT_SCALE, FIXED_TEXT),
+        world_to_fixed_text);
+    constexpr double COORD_SCALE = 0.2;
+    window.showWidget("text_3d_fixed_frame", cv::viz::WCoordinateSystem(COORD_SCALE),
+                      world_to_fixed_text);
+
+    constexpr double CIRCLE_RADIUS_M = 0.5;
+    cv::Affine3d world_to_circle(cv::Affine3d::Vec3{0.0, 0.0, 0.0},  // rotation
+                                 {0.0, 0.0, 1.5}                     // translation
+    );
+    window.showWidget("circle", cv::viz::WCircle(CIRCLE_RADIUS_M), world_to_circle);
+    if (!is_test()) {
+        window.spin();
+    }
+}
+
+TEST(OpencvVizTest, cube_test) {
+    std::vector<Eigen::Vector3d> cube_W;
+    float cube_size = 1.0f;
+    cube_W.push_back(Eigen::Vector3d(0, 0, 0));
+    cube_W.push_back(Eigen::Vector3d(cube_size, 0, 0));
+    cube_W.push_back(Eigen::Vector3d(cube_size, cube_size, 0));
+    cube_W.push_back(Eigen::Vector3d(0, cube_size, 0));
+    cube_W.push_back(Eigen::Vector3d(0, 0, cube_size));
+    cube_W.push_back(Eigen::Vector3d(cube_size, 0, cube_size));
+    cube_W.push_back(Eigen::Vector3d(cube_size, cube_size, cube_size));
+    cube_W.push_back(Eigen::Vector3d(0, cube_size, cube_size));
+
+    std::vector<Eigen::Isometry3d> poses;
+
+    Eigen::Matrix3d rotation0(
+        Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+        Eigen::AngleAxis(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
+    Eigen::Isometry3d pose0;
+    pose0.translation() = Eigen::Vector3d(4, 0, 0);
+    pose0.linear() = rotation0;
+    poses.push_back(pose0);
+
+    Eigen::Isometry3d pose1;
+    pose1.linear() =
+        Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * rotation0;
+    pose1.translation() = Eigen::Vector3d(0, 4, 0);
+    poses.push_back(pose1);
+
+    if (!is_test()) {
+        viz_scene(poses, cube_W);
+    }
+}
+}  // namespace robot::geometry::opencv_viz
\ No newline at end of file
diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index 83a43f8d1..da4ec0163 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -56,7 +56,8 @@ cc_test(
     "@com_google_googletest//:gtest_main",
     ":symphony_lake_parser",
     ":structure_from_motion",
-    "//common/math:matrix_to_proto"
+    "//common/math:matrix_to_proto",
+    "//common/geometry:opencv_viz"
   ]
 )
 
diff --git a/experimental/learn_descriptors/gtsam_test.cc b/experimental/learn_descriptors/gtsam_test.cc
index 82ac8bf6a..96b643d03 100644
--- a/experimental/learn_descriptors/gtsam_test.cc
+++ b/experimental/learn_descriptors/gtsam_test.cc
@@ -8,6 +8,7 @@
 #include "gtsam/inference/Symbol.h"
 #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
 #include "gtsam/nonlinear/Values.h"
+#include "gtsam/slam/BetweenFactor.h"
 #include "gtsam/slam/GeneralSFMFactor.h"
 #include "gtsam/slam/PriorFactor.h"
 #include "gtsam/slam/ProjectionFactor.h"
@@ -103,20 +104,89 @@ TEST(GtsamTesting, gtsam_simple_cube) {
         EXPECT_NEAR(cube_W[i][1], result_point[1], TOL);
         EXPECT_NEAR(cube_W[i][2], result_point[2], TOL);
     }
+}
+TEST(GtsamTesting, gtsam_simple_cube_2) {
+    std::vector<gtsam::Point3> cube_W;
+    float cube_size = 1.0f;
+    cube_W.push_back(gtsam::Point3(0, 0, 0));
+    cube_W.push_back(gtsam::Point3(cube_size, 0, 0));
+    cube_W.push_back(gtsam::Point3(cube_size, cube_size, 0));
+    cube_W.push_back(gtsam::Point3(0, cube_size, 0));
+    cube_W.push_back(gtsam::Point3(0, 0, cube_size));
+    cube_W.push_back(gtsam::Point3(cube_size, 0, cube_size));
+    cube_W.push_back(gtsam::Point3(cube_size, cube_size, cube_size));
+    cube_W.push_back(gtsam::Point3(0, cube_size, cube_size));
+
+    const size_t img_width = 640;
+    const size_t img_height = 480;
+    const double fx = 500.0;
+    const double fy = fx;
+    const double cx = img_width / 2.0;
+    const double cy = img_height / 2.0;
+
+    gtsam::Values initial;
+    gtsam::NonlinearFactorGraph graph;
+
+    gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy));
+    initial.insert(gtsam::Symbol('K', 0), *K);
+
+    auto poseNoise = gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6::Constant(0.1));
+    auto measurementNoise = gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
+    std::vector<gtsam::Pose3> poses;
+    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras;
+
+    Eigen::Matrix3d rotation0(
+        Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+        Eigen::AngleAxis(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
+    gtsam::Pose3 pose0(gtsam::Rot3(rotation0), gtsam::Point3(4, 0, 0));
+    gtsam::PinholeCamera<gtsam::Cal3_S2> camera0(pose0, *K);
+    graph.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(gtsam::Symbol('x', 0), pose0, poseNoise);
+    poses.push_back(pose0);
+    cameras.push_back(camera0);
+
+    gtsam::Pose3 pose1(
+        gtsam::Rot3(Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+                    rotation0),
+        gtsam::Point3(0, 4, 0));
+    gtsam::PinholeCamera<gtsam::Cal3_S2> camera1(pose1, *K);
+    graph.emplace_shared<gtsam::BetweenFactor<gtsam::Pose3>>(
+        gtsam::Symbol('x', 0), gtsam::Symbol('x', 1), pose0.between(pose1), poseNoise);
+    poses.push_back(pose1);
+    cameras.push_back(camera1);
 
-    // // Setup
-    // const Eigen::Matrix3d in{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}};
+    for (size_t i = 0; i < poses.size(); i++) {
+        initial.insert(gtsam::Symbol('x', i), poses[i]);
+    }
+    for (size_t i = 0; i < cube_W.size(); i++) {
+        initial.insert(gtsam::Symbol('L', i), gtsam::Point3(0, 0, 0));
+        for (size_t j = 0; j < poses.size(); j++) {
+            gtsam::Point2 pixel_p_cube_C = cameras[j].project(cube_W[i]);
+            if (GtsamTestHelper::pixel_in_range(pixel_p_cube_C, img_width, img_height)) {
+                graph.emplace_shared<gtsam::GeneralSFMFactor2<gtsam::Cal3_S2>>(
+                    pixel_p_cube_C, measurementNoise, gtsam::Symbol('x', j), gtsam::Symbol('L', i),
+                    gtsam::Symbol('K', 0));
+            }
+        }
+    }
 
-    // // Action
-    // proto::Matrix proto;
-    // pack_into(in, &proto);
-    // const Eigen::Matrix3d out = unpack_from<Eigen::Matrix3d>(proto);
+    gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial);
+    gtsam::Values result = optimizer.optimize();
+
+    result.print("Optimized Results:\n");
 
-    // // Verification
-    // EXPECT_EQ(in.rows(), out.rows());
-    // EXPECT_EQ(in.cols(), out.cols());
-    // for (int i = 0; i < in.rows(); i++) {
-    //     EXPECT_EQ(in(i), out(i));
-    // }
+    constexpr double TOL = 1e-3;
+    for (size_t i = 0; i < poses.size(); i++) {
+        gtsam::Pose3 result_pose = result.at<gtsam::Pose3>(gtsam::Symbol('x', i));
+        EXPECT_NEAR(poses[i].translation()[0], result_pose.translation()[0], TOL);
+        EXPECT_NEAR(poses[i].translation()[1], result_pose.translation()[1], TOL);
+        EXPECT_NEAR(poses[i].translation()[2], result_pose.translation()[2], TOL);
+        EXPECT_TRUE(poses[i].rotation().matrix().isApprox(result_pose.rotation().matrix(), TOL));
+    }
+    for (size_t i = 0; i < cube_W.size(); i++) {
+        gtsam::Point3 result_point = result.at<gtsam::Point3>(gtsam::Symbol('L', i));
+        EXPECT_NEAR(cube_W[i][0], result_point[0], TOL);
+        EXPECT_NEAR(cube_W[i][1], result_point[1], TOL);
+        EXPECT_NEAR(cube_W[i][2], result_point[2], TOL);
+    }
 }
 }  // namespace robot::experimental::gtsam_testing
\ No newline at end of file
diff --git a/experimental/learn_descriptors/opencv_viz.cc b/experimental/learn_descriptors/opencv_viz.cc
index f61dd508d..f24726e92 100644
--- a/experimental/learn_descriptors/opencv_viz.cc
+++ b/experimental/learn_descriptors/opencv_viz.cc
@@ -5,23 +5,25 @@
 
 namespace opencv_viz {
 template <typename Derived>
-cv::Mat eigen_to_cv_mat(const Eigen::MatrixBase<Derived>& eigenMatrix) {
+cv::Mat eigen_to_cv_mat(const Eigen::MatrixBase<Derived> &eigenMatrix) {
     int cvType;
     if constexpr (std::is_same_v<typename Derived::Scalar, double>) {
-        cvType = CV_64F; // 64-bit floating point
+        cvType = CV_64F;  // 64-bit floating point
     } else if constexpr (std::is_same_v<typename Derived::Scalar, float>) {
-        cvType = CV_32F; // 32-bit floating point
+        cvType = CV_32F;  // 32-bit floating point
     } else if constexpr (std::is_same_v<typename Derived::Scalar, int>) {
-        cvType = CV_32S; // 32-bit signed integer
+        cvType = CV_32S;  // 32-bit signed integer
     } else {
         static_assert(!std::is_same_v<Derived, Derived>, "Unsupported data type in Eigen matrix");
     }
-    return cv::Mat(eigenMatrix.rows(), eigenMatrix.cols(), cvType, const_cast<void*>(static_cast<const void*>(eigenMatrix.derived().data()))).clone();
+    return cv::Mat(eigenMatrix.rows(), eigenMatrix.cols(), cvType,
+                   const_cast<void *>(static_cast<const void *>(eigenMatrix.derived().data())))
+        .clone();
 }
 cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) {
     // Ensure R is a valid rotation matrix
     CV_Assert(cv::determinant(R) > 0.999 && cv::determinant(R) < 1.001);
-    
+
     double trace = R(0, 0) + R(1, 1) + R(2, 2);
     double theta = std::acos((trace - 1) / 2.0);
 
@@ -30,45 +32,32 @@ cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) {
     }
 
     double sinTheta = std::sin(theta);
-    cv::Vec3d axis(
-        (R(2, 1) - R(1, 2)) / (2.0 * sinTheta),
-        (R(0, 2) - R(2, 0)) / (2.0 * sinTheta),
-        (R(1, 0) - R(0, 1)) / (2.0 * sinTheta)
-    );
+    cv::Vec3d axis((R(2, 1) - R(1, 2)) / (2.0 * sinTheta), (R(0, 2) - R(2, 0)) / (2.0 * sinTheta),
+                   (R(1, 0) - R(0, 1)) / (2.0 * sinTheta));
 
     return axis * theta;
 }
-void viz_scene(const std::vector<Eigen::MatrixXd> &poses, const std::vector<Eigen::Vector3d> &points) {
+void viz_scene(const std::vector<Eigen::MatrixXd> &poses,
+               const std::vector<Eigen::Vector3d> &points) {
     cv::viz::Viz3d window("Viz Scene");
 
     std::vector<cv::Affine3d> cv_poses;
-    constexpr double pose_size = 0.2;    
+    constexpr double pose_size = 0.2;
     for (unsigned int i = 0; i < poses.size(); i++) {
         const Eigen::MatrixXd &pose = poses[i];
-        cv::Affine3d cv_pose(
-            eigen_to_cv_mat(pose.block(0,0,3,3)) ,
-            cv::Affine3d::Vec3{pose(0,3), pose(1,3), pose(2,3)}            
-        );
-        window.showWidget(
-            "pose_" + std::to_string(i), 
-            cv::viz::WCoordinateSystem(pose_size),
-            cv_pose
-            );
+        cv::Affine3d cv_pose(eigen_to_cv_mat(pose.block(0, 0, 3, 3)),
+                             cv::Affine3d::Vec3{pose(0, 3), pose(1, 3), pose(2, 3)});
+        window.showWidget("pose_" + std::to_string(i), cv::viz::WCoordinateSystem(pose_size),
+                          cv_pose);
     }
     constexpr double point_radius = 0.2;
     constexpr int sphere_res = 10;
     const cv::viz::Color point_color = cv::viz::Color::celestial_blue();
     for (unsigned int i = 0; i < points.size(); i++) {
         const Eigen::Vector3d &point = points[i];
-        window.showWidget(
-            "translation_" + std::to_string(i),
-            cv::viz::WSphere(
-                cv::Point3d(point[0], point[1], point[2]),
-                point_radius,
-                sphere_res,
-                point_color
-            )
-        );
+        window.showWidget("translation_" + std::to_string(i),
+                          cv::viz::WSphere(cv::Point3d(point[0], point[1], point[2]), point_radius,
+                                           sphere_res, point_color));
     }
 
     window.showWidget("world_frame", cv::viz::WCoordinateSystem());
@@ -79,41 +68,36 @@ void viz_scene(const std::vector<Eigen::MatrixXd> &poses, const std::vector<Eige
     const cv::Vec2i cells = cv::Vec2i::all(num_cells);
     const cv::Vec2d cell_sizes = cv::Vec2d::all(units);
     window.showWidget("xy_grid", cv::viz::WGrid(cells, cell_sizes, color));
-    window.showWidget("yz_grid", cv::viz::WGrid(cv::Point3d(0.,0.,0.), cv::Point3d(1,0,0), cv::Point3d(0,0,1), cells, cell_sizes, color));
-    window.showWidget("xz_grid", cv::viz::WGrid(cv::Point3d(0.,0.,0.), cv::Point3d(0,1,0), cv::Point3d(0,0,1), cells, cell_sizes, color));
+    window.showWidget("yz_grid", cv::viz::WGrid(cv::Point3d(0., 0., 0.), cv::Point3d(1, 0, 0),
+                                                cv::Point3d(0, 0, 1), cells, cell_sizes, color));
+    window.showWidget("xz_grid", cv::viz::WGrid(cv::Point3d(0., 0., 0.), cv::Point3d(0, 1, 0),
+                                                cv::Point3d(0, 0, 1), cells, cell_sizes, color));
     window.showWidget("text_overlay", cv::viz::WText("hello world overlay!", cv::Point(20, 50)));
-    
+
     constexpr bool ALWAYS_FACE_CAMERA = true;
     constexpr double TEXT_SCALE = 0.1;
     window.showWidget("text_3d", cv::viz::WText3D("hello world 3d!", cv::Point3d(0.1, 0.2, 0.3),
                                                   TEXT_SCALE, ALWAYS_FACE_CAMERA));
 
     constexpr bool FIXED_TEXT = false;
-    cv::Affine3d world_from_fixed_text(
-        cv::Affine3d::Vec3{M_PI_2, 0.0, 0.0}, // rotation
-        {0.2, 0.4, 0.6} // translation
+    cv::Affine3d world_from_fixed_text(cv::Affine3d::Vec3{M_PI_2, 0.0, 0.0},  // rotation
+                                       {0.2, 0.4, 0.6}                        // translation
     );
 
     window.showWidget(
         "text_3d_fixed",
         cv::viz::WText3D("hello world fixed!", cv::Point3d(0.0, 0.0, 0.0), TEXT_SCALE, FIXED_TEXT),
-        world_from_fixed_text
-    );
+        world_from_fixed_text);
     constexpr double COORD_SCALE = 0.2;
-    window.showWidget("text_3d_fixed_frame", cv::viz::WCoordinateSystem(COORD_SCALE), world_from_fixed_text);
+    window.showWidget("text_3d_fixed_frame", cv::viz::WCoordinateSystem(COORD_SCALE),
+                      world_from_fixed_text);
 
     constexpr double CIRCLE_RADIUS_M = 0.5;
-    cv::Affine3d world_from_circle(
-        cv::Affine3d::Vec3{0.0, 0.0, 0.0}, // rotation
-        {0.0, 0.0, 1.5} // translation
-    );
-    window.showWidget(
-        "circle",
-        cv::viz::WCircle(CIRCLE_RADIUS_M),
-        world_from_circle
+    cv::Affine3d world_from_circle(cv::Affine3d::Vec3{0.0, 0.0, 0.0},  // rotation
+                                   {0.0, 0.0, 1.5}                     // translation
     );
+    window.showWidget("circle", cv::viz::WCircle(CIRCLE_RADIUS_M), world_from_circle);
 
     window.spin();
 }
-}
-
+}  // namespace opencv_viz
diff --git a/experimental/learn_descriptors/opencv_viz_example.cc b/experimental/learn_descriptors/opencv_viz_example.cc
index c37c3ba4f..bdbb28c95 100644
--- a/experimental/learn_descriptors/opencv_viz_example.cc
+++ b/experimental/learn_descriptors/opencv_viz_example.cc
@@ -1,9 +1,9 @@
+#include "eigen/Eigen/Geometry"
 #include "opencv2/viz.hpp"
 #include "opencv_viz.cc"
-#include "eigen/Eigen/Geometry"
 
 void demo() {
- cv::viz::Viz3d window("My Window");
+    cv::viz::Viz3d window("My Window");
 
     window.showWidget("world_frame", cv::viz::WCoordinateSystem());
 
@@ -13,8 +13,10 @@ void demo() {
     const cv::Vec2i cells = cv::Vec2i::all(num_cells);
     const cv::Vec2d cell_sizes = cv::Vec2d::all(units);
     window.showWidget("xy_grid", cv::viz::WGrid(cells, cell_sizes, color));
-    window.showWidget("yz_grid", cv::viz::WGrid(cv::Point3d(0.,0.,0.), cv::Point3d(1,0,0), cv::Point3d(0,0,1), cells, cell_sizes, color));
-    window.showWidget("xz_grid", cv::viz::WGrid(cv::Point3d(0.,0.,0.), cv::Point3d(0,1,0), cv::Point3d(0,0,1), cells, cell_sizes, color));
+    window.showWidget("yz_grid", cv::viz::WGrid(cv::Point3d(0., 0., 0.), cv::Point3d(1, 0, 0),
+                                                cv::Point3d(0, 0, 1), cells, cell_sizes, color));
+    window.showWidget("xz_grid", cv::viz::WGrid(cv::Point3d(0., 0., 0.), cv::Point3d(0, 1, 0),
+                                                cv::Point3d(0, 0, 1), cells, cell_sizes, color));
     window.showWidget("text_overlay", cv::viz::WText("hello world overlay!", cv::Point(20, 50)));
     constexpr bool ALWAYS_FACE_CAMERA = true;
     constexpr double TEXT_SCALE = 0.1;
@@ -22,29 +24,23 @@ void demo() {
                                                   TEXT_SCALE, ALWAYS_FACE_CAMERA));
 
     constexpr bool FIXED_TEXT = false;
-    cv::Affine3d world_from_fixed_text(
-        cv::Affine3d::Vec3{M_PI_2, 0.0, 0.0}, // rotation
-        {0.2, 0.4, 0.6} // translation
+    cv::Affine3d world_from_fixed_text(cv::Affine3d::Vec3{M_PI_2, 0.0, 0.0},  // rotation
+                                       {0.2, 0.4, 0.6}                        // translation
     );
 
     window.showWidget(
         "text_3d_fixed",
         cv::viz::WText3D("hello world fixed!", cv::Point3d(0.0, 0.0, 0.0), TEXT_SCALE, FIXED_TEXT),
-        world_from_fixed_text
-    );
+        world_from_fixed_text);
     constexpr double COORD_SCALE = 0.2;
-    window.showWidget("text_3d_fixed_frame", cv::viz::WCoordinateSystem(COORD_SCALE), world_from_fixed_text);
+    window.showWidget("text_3d_fixed_frame", cv::viz::WCoordinateSystem(COORD_SCALE),
+                      world_from_fixed_text);
 
     constexpr double CIRCLE_RADIUS_M = 0.5;
-    cv::Affine3d world_from_circle(
-        cv::Affine3d::Vec3{0.0, 0.0, 0.0}, // rotation
-        {0.0, 0.0, 1.5} // translation
-    );
-    window.showWidget(
-        "circle",
-        cv::viz::WCircle(CIRCLE_RADIUS_M),
-        world_from_circle
+    cv::Affine3d world_from_circle(cv::Affine3d::Vec3{0.0, 0.0, 0.0},  // rotation
+                                   {0.0, 0.0, 1.5}                     // translation
     );
+    window.showWidget("circle", cv::viz::WCircle(CIRCLE_RADIUS_M), world_from_circle);
 
     window.spin();
 }
@@ -70,14 +66,11 @@ void demo_cube() {
 
     // Eigen::Matrix3d
     // cv::Affine3d pose1(
-    //     opencv_viz::eigen_to_cv_mat(Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * rotation0),
-    //     Eigen::Vector3d(0, 4, 0));
+    //     opencv_viz::eigen_to_cv_mat(Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0,
+    //     1)).toRotationMatrix() * rotation0), Eigen::Vector3d(0, 4, 0));
     // gtsam::PinholeCamera<gtsam::Cal3_S2> camera1(pose1, *K);
-    // graph.emplace_shared<gtsam::PriorFactor<cv::Affine3d>>(gtsam::Symbol('x', 1), pose1, poseNoise);
-    // poses.push_back(pose1);
-    // cameras.push_back(camera1);
+    // graph.emplace_shared<gtsam::PriorFactor<cv::Affine3d>>(gtsam::Symbol('x', 1), pose1,
+    // poseNoise); poses.push_back(pose1); cameras.push_back(camera1);
 }
 
-int main() {
-    demo();
-}
+int main() { demo(); }
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index d19634ef1..92e9bc0a7 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -1,4 +1,5 @@
 #include "experimental/learn_descriptors/structure_from_motion.hh"
+
 #include <filesystem>
 #include <sstream>
 
@@ -9,18 +10,14 @@ namespace robot::experimental::learn_descriptors {
 const Eigen::Affine3d StructureFromMotion::T_symlake_boat_cam = []() {
     Eigen::Affine3d transform = Eigen::Affine3d::Identity();
     transform.translate(Eigen::Vector3d::Zero());
-    transform.rotate(Eigen::Matrix3d(
-        Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(1,0,0)) *
-        Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0,1,0)))
-    );
+    transform.rotate(Eigen::Matrix3d(Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(1, 0, 0)) *
+                                     Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0, 1, 0))));
 
     // std::stringstream ss;
-    // ss << gtsam::Pose3(gtsam::Rot3(transform.rotation()), gtsam::Point3(transform.translation()));
-    // json json_obj;
-    // json_obj["T_symlake_boat_cam"] = ss.str();
-    // fs::path output_dir = "output";
-    // fs::path output_file = output_dir / "output_sfm.json";
-    // if (!fs::exists(output_dir)) {
+    // ss << gtsam::Pose3(gtsam::Rot3(transform.rotation()),
+    // gtsam::Point3(transform.translation())); json json_obj; json_obj["T_symlake_boat_cam"] =
+    // ss.str(); fs::path output_dir = "output"; fs::path output_file = output_dir /
+    // "output_sfm.json"; if (!fs::exists(output_dir)) {
     //     fs::create_directory(output_dir);
     // }
     // std::ofstream file(output_file);
@@ -38,9 +35,8 @@ std::string pose_to_string(gtsam::Pose3 pose) {
     return ss.str();
 }
 
-const gtsam::Pose3 StructureFromMotion::default_initial_pose = gtsam::Pose3(
-    gtsam::Rot3(T_symlake_boat_cam.rotation()), 
-    T_symlake_boat_cam.translation());
+const gtsam::Pose3 StructureFromMotion::default_initial_pose =
+    gtsam::Pose3(gtsam::Rot3(T_symlake_boat_cam.rotation()), T_symlake_boat_cam.translation());
 // gtsam::Pose3(
 //     gtsam::Rot3(Eigen::Matrix3d(
 //         Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix() *
@@ -49,12 +45,13 @@ const gtsam::Pose3 StructureFromMotion::default_initial_pose = gtsam::Pose3(
 //     gtsam::Point3::Identity()
 // );
 
-StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose,
-                               Frontend::MatcherType frontend_matcher) {
+StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor,
+                                         gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose,
+                                         Frontend::MatcherType frontend_matcher) {
     frontend_ = Frontend(frontend_extractor, frontend_matcher);
     backend_ = Backend(K);
 
-    set_initial_pose(initial_pose);   
+    set_initial_pose(initial_pose);
     // backend_.get_current_initial_values().print("Ooga: ");
 }
 
@@ -63,21 +60,26 @@ void StructureFromMotion::set_initial_pose(gtsam::Pose3 initial_pose) {
 }
 
 void StructureFromMotion::add_image(const cv::Mat &img) {
-    std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_and_descriptors = frontend_.get_keypoints_and_descriptors(img);   
+    std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_and_descriptors =
+        frontend_.get_keypoints_and_descriptors(img);
     landmarks_.push_back(std::vector<Backend::Landmark>());
     if (get_num_images_added() > 0) {
-        std::vector<cv::DMatch> matches = frontend_.get_matches(img_keypoints_and_descriptors_.back().second, keypoints_and_descriptors.second);        
+        std::vector<cv::DMatch> matches = frontend_.get_matches(
+            img_keypoints_and_descriptors_.back().second, keypoints_and_descriptors.second);
         Frontend::enforce_bijective_matches(matches);
 
         matches_.push_back(matches);
-        gtsam::Pose3 between_value = gtsam::Pose3::Identity();
+        gtsam::Pose3 between_value = get_backend().estimate_pose(
+            img_keypoints_and_descriptors_.back().first, keypoints_and_descriptors.first, matches,
+            get_backend().get_K());
         backend_.add_between_factor(
-            gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()-1),
-            gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()),
-            between_value);        
-        gtsam::Pose3 T_cam_landmark(gtsam::Rot3::Identity(), gtsam::Point3(0,0,1));
-        gtsam::Pose3 T_world_cam1 = backend_.get_current_initial_values().at<gtsam::Pose3>(gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()-1));      
-        gtsam::Pose3 T_world_cam2 = backend_.get_current_initial_values().at<gtsam::Pose3>(gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()));      
+            gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1),
+            gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), between_value);
+        gtsam::Pose3 T_cam_landmark(gtsam::Rot3::Identity(), gtsam::Point3(0, 0, 1));
+        gtsam::Pose3 T_world_cam1 = backend_.get_current_initial_values().at<gtsam::Pose3>(
+            gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1));
+        gtsam::Pose3 T_world_cam2 = backend_.get_current_initial_values().at<gtsam::Pose3>(
+            gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()));
         int count = 0;
         for (const cv::DMatch match : matches) {
             if (count < 1) {
@@ -87,8 +89,7 @@ void StructureFromMotion::add_image(const cv::Mat &img) {
                     {"T_world_cam1", pose_to_string(T_world_cam1)},
                     {"T_world_cam2", pose_to_string(T_world_cam1)},
                     {"T_world_lmkcam1", pose_to_string(T_world_cam1 * T_cam_landmark)},
-                    {"T_world_lmkcam2", pose_to_string(T_world_cam2 * T_cam_landmark)}
-                };
+                    {"T_world_lmkcam2", pose_to_string(T_world_cam2 * T_cam_landmark)}};
                 fs::path output_dir = "output";
                 fs::path output_file = output_dir / "output_sfm.json";
                 if (!fs::exists(output_dir)) {
@@ -104,30 +105,29 @@ void StructureFromMotion::add_image(const cv::Mat &img) {
             // std::cout << "T_world_landmark" <<  T_world_cam1 * T_cam_landmark << std::endl;
             Backend::Landmark landmark_cam_1(
                 gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_),
-                gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()-1),
+                gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1),
                 gtsam::Point2(
-                    static_cast<double>(img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x),
-                    static_cast<double>(img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y) 
-                ),
-                (T_world_cam1 * T_cam_landmark).translation()
-            );
+                    static_cast<double>(
+                        img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x),
+                    static_cast<double>(
+                        img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)),
+                (T_world_cam1 * T_cam_landmark).translation());
             Backend::Landmark landmark_cam_2(
                 gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_),
                 gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()),
                 gtsam::Point2(
                     static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.x),
-                    static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.y)
-                ),
-                (T_world_cam2 * T_cam_landmark).translation()
-            );
-            landmarks_[get_num_images_added()-1].push_back(landmark_cam_1);
+                    static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.y)),
+                (T_world_cam2 * T_cam_landmark).translation());
+            landmarks_[get_num_images_added() - 1].push_back(landmark_cam_1);
             landmarks_[get_num_images_added()].push_back(landmark_cam_2);
             backend_.add_landmark(landmark_cam_1);
             backend_.add_landmark(landmark_cam_2);
             landmark_count_++;
-        }      
-        // std::cout << "number of landmarks: " << count << std::endl;  
-        // std::cout << "number of landmarks added to graph: " << [this](){int count = 0; for (const auto &landmark : landmarks_){count += landmark.size();} return count; }() << std::endl;
+        }
+        // std::cout << "number of landmarks: " << count << std::endl;
+        // std::cout << "number of landmarks added to graph: " << [this](){int count = 0; for (const
+        // auto &landmark : landmarks_){count += landmark.size();} return count; }() << std::endl;
         // backend_.get_current_initial_values().print("Current initial values: ");
         // std::cout << "landmark_count_: " << landmark_count_ << std::endl;
     }
@@ -201,45 +201,42 @@ std::vector<cv::DMatch> Frontend::get_matches(const cv::Mat &descriptors1,
 }
 
 void Frontend::threshold_matches(std::vector<cv::DMatch> &matches, float dist_threshhold) {
-    matches.erase(
-        std::remove_if(matches.begin(), matches.end(), [dist_threshhold](const cv::DMatch& match) {
-            return match.distance > dist_threshhold;
-        }),
-        matches.end());
+    matches.erase(std::remove_if(matches.begin(), matches.end(),
+                                 [dist_threshhold](const cv::DMatch &match) {
+                                     return match.distance > dist_threshhold;
+                                 }),
+                  matches.end());
 }
 
-void Frontend::enforce_bijective_matches(std::vector<cv::DMatch>& matches) {    
+void Frontend::enforce_bijective_matches(std::vector<cv::DMatch> &matches) {
     std::unordered_map<int, cv::DMatch> bestQueryMatch;
     std::unordered_map<int, cv::DMatch> bestTrainMatch;
-    
-    for (const auto& match : matches) {
+
+    for (const auto &match : matches) {
         int queryIdx = match.queryIdx;
         int trainIdx = match.trainIdx;
-        
-        if (bestQueryMatch.find(queryIdx) == bestQueryMatch.end() || 
+
+        if (bestQueryMatch.find(queryIdx) == bestQueryMatch.end() ||
             match.distance < bestQueryMatch[queryIdx].distance) {
             bestQueryMatch[queryIdx] = match;
         }
-        
-        if (bestTrainMatch.find(trainIdx) == bestTrainMatch.end() || 
+
+        if (bestTrainMatch.find(trainIdx) == bestTrainMatch.end() ||
             match.distance < bestTrainMatch[trainIdx].distance) {
             bestTrainMatch[trainIdx] = match;
         }
     }
-    
-    matches.erase(
-        std::remove_if(
-            matches.begin(), matches.end(),
-            [&bestQueryMatch, &bestTrainMatch](const cv::DMatch& match) {
-                int queryIdx = match.queryIdx;
-                int trainIdx = match.trainIdx;
-
-                return bestQueryMatch[queryIdx].trainIdx != trainIdx ||
-                       bestTrainMatch[trainIdx].queryIdx != queryIdx;
-            }),
-        matches.end());
-}
 
+    matches.erase(std::remove_if(matches.begin(), matches.end(),
+                                 [&bestQueryMatch, &bestTrainMatch](const cv::DMatch &match) {
+                                     int queryIdx = match.queryIdx;
+                                     int trainIdx = match.trainIdx;
+
+                                     return bestQueryMatch[queryIdx].trainIdx != trainIdx ||
+                                            bestTrainMatch[trainIdx].queryIdx != queryIdx;
+                                 }),
+                  matches.end());
+}
 
 bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
                                  std::vector<cv::DMatch> &matches_out) const {
@@ -295,7 +292,7 @@ Backend::Backend() {
     const double cy = img_height / 2.0;
 
     gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
-    
+
     initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K);
 }
 
@@ -310,10 +307,12 @@ void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &
     // initial_estimate_.print("values after adding prior: ");
 }
 
-void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, const gtsam::Pose3 &value) {
-    graph_.emplace_shared<gtsam::BetweenFactor<gtsam::Pose3>>(symbol_1, symbol_2, value, pose_noise_);
-    // std::cout << "adding between factor. symbol_1: " << symbol_1 << ". symbol_2: " << symbol_2 << std::endl;
-    // initial_estimate_.print("values when adding between factor: ");
+void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2,
+                                 const gtsam::Pose3 &value) {
+    graph_.emplace_shared<gtsam::BetweenFactor<gtsam::Pose3>>(symbol_1, symbol_2, value,
+                                                              pose_noise_);
+    // std::cout << "adding between factor. symbol_1: " << symbol_1 << ". symbol_2: " << symbol_2 <<
+    // std::endl; initial_estimate_.print("values when adding between factor: ");
     initial_estimate_.insert(symbol_2, initial_estimate_.at<gtsam::Pose3>(symbol_1).compose(value));
 }
 
@@ -325,8 +324,8 @@ void Backend::add_landmarks(const std::vector<Landmark> &landmarks) {
 
 void Backend::add_landmark(const Landmark &landmark) {
     graph_.emplace_shared<gtsam::GeneralSFMFactor2<gtsam::Cal3_S2>>(
-        landmark.projection, measurement_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol, gtsam::Symbol(camera_symbol_char, 0) 
-    );
+        landmark.projection, measurement_noise_, landmark.cam_pose_symbol,
+        landmark.lmk_factor_symbol, gtsam::Symbol(camera_symbol_char, 0));
     if (!initial_estimate_.exists(landmark.lmk_factor_symbol)) {
         initial_estimate_.insert(landmark.lmk_factor_symbol, landmark.initial_guess);
     } else {
@@ -334,13 +333,39 @@ void Backend::add_landmark(const Landmark &landmark) {
     }
 }
 
+gtsam::Pose3 Backend::estimate_pose(const std::vector<cv::KeyPoint> &kpts1,
+                                    const std::vector<cv::KeyPoint> &kpts2,
+                                    const std::vector<cv::DMatch> &matches,
+                                    const gtsam::Cal3_S2 &K) {
+    std::vector<cv::Point2f> pts1;
+    std::vector<cv::Point2f> pts2;
+    for (const cv::DMatch &match : matches) {
+        std::cout << "query point: " << kpts1[match.queryIdx].pt << std::endl;
+        std::cout << "trian point: " << kpts2[match.trainIdx].pt << std::endl;
+        pts1.push_back(kpts1[match.queryIdx].pt);
+        pts2.push_back(kpts2[match.trainIdx].pt);
+    }
+    cv::Mat cv_K = (cv::Mat_<double>(3, 3) << K.fx(), K.skew(), K.px(), 0, K.fy(), K.py(), 0, 0, 1);
+    cv::Mat E = cv::findEssentialMat(pts1, pts2, cv_K, cv::RANSAC, 0.999, 1.0);
+    cv::Mat R, t;
+    cv::recoverPose(E, pts1, pts2, cv_K, R, t);
+    gtsam::Point3 translation(t.at<double>(0, 0), t.at<double>(1, 0), t.at<double>(2, 0));
+    gtsam::Matrix3 mat_rot;
+    for (int i = 0; i < 3; i++) {
+        for (int j = 0; j < 3; j++) {
+            mat_rot(i, j) = R.at<double>(i, j);
+        }
+    }
+    return gtsam::Pose3(gtsam::Rot3(mat_rot), translation);
+}
+
 void Backend::solve_graph() {
     gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_);
     result_ = optimizer.optimize();
 }
 
 // json SFM_Logger::gtsam_pose3_to_json(const gtsam::Pose3 &pose) {
-//     json json_obj;  
+//     json json_obj;
 //     json_obj = {
 //         {},
 
@@ -349,7 +374,8 @@ void Backend::solve_graph() {
 //     return json_obj;
 // }
 
-// void SFM_Logger::values_to_json(const gtsam::Values &values, const std::filesystem::path &output_path) {
+// void SFM_Logger::values_to_json(const gtsam::Values &values, const std::filesystem::path
+// &output_path) {
 //     json json_obj;
 //     json_obj["add_image_1"] = {
 //         {"T_world_cam1", pose_to_string(T_world_cam1)},
diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh
index b2bbfdefa..991aa2f68 100644
--- a/experimental/learn_descriptors/structure_from_motion.hh
+++ b/experimental/learn_descriptors/structure_from_motion.hh
@@ -1,6 +1,9 @@
 #pragma once
 
-#include <opencv2/opencv.hpp>
+#include <map>
+#include <set>
+#include <unordered_map>
+#include <utility>
 
 #include "Eigen/Core"
 #include "gtsam/geometry/Point3.h"
@@ -8,17 +11,12 @@
 #include "gtsam/inference/Symbol.h"
 #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
 #include "gtsam/nonlinear/Values.h"
+#include "gtsam/slam/BetweenFactor.h"
 #include "gtsam/slam/GeneralSFMFactor.h"
 #include "gtsam/slam/PriorFactor.h"
-#include "gtsam/slam/BetweenFactor.h"
 #include "gtsam/slam/ProjectionFactor.h"
-
-#include <set>
-#include <map>
-#include <unordered_map>
-#include <utility>
-
 #include "nlohmann/json.hpp"
+#include "opencv2/opencv.hpp"
 
 using json = nlohmann::json;
 
@@ -32,8 +30,8 @@ class Frontend {
     Frontend(ExtractorType frontend_extractor, MatcherType frontend_matcher);
     ~Frontend(){};
 
-    const ExtractorType& get_extractor_type() const { return extractor_type_; };
-    const MatcherType& get_matcher_type() const { return matcher_type_; };
+    const ExtractorType &get_extractor_type() const { return extractor_type_; };
+    const MatcherType &get_matcher_type() const { return matcher_type_; };
 
     std::pair<std::vector<cv::KeyPoint>, cv::Mat> get_keypoints_and_descriptors(
         const cv::Mat &img) const;
@@ -50,7 +48,7 @@ class Frontend {
                              const cv::Mat &img2, std::vector<cv::KeyPoint> keypoints2,
                              std::vector<cv::DMatch> matches, cv::Mat img_matches_out) {
         cv::drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches_out);
-    }    
+    }
 
    private:
     bool get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
@@ -66,60 +64,75 @@ class Frontend {
     cv::Ptr<cv::DescriptorMatcher> descriptor_matcher_;
 };
 class Backend {
-    public:
-        static constexpr char pose_symbol_char = 'x';
-        static constexpr char landmark_symbol_char = 'l';
-        static constexpr char camera_symbol_char = 'k';
-
-        struct Landmark {
-            Landmark(const gtsam::Symbol &lmk_factor_symbol, const gtsam::Symbol &cam_pose_symbol, const gtsam::Point2 &projection, const gtsam::Point3 &initial_guess=gtsam::Point3::Identity()) 
-                : lmk_factor_symbol(lmk_factor_symbol), cam_pose_symbol(cam_pose_symbol), projection(projection), initial_guess(initial_guess) {};
-            const gtsam::Symbol lmk_factor_symbol; 
-            const gtsam::Symbol cam_pose_symbol;
-            const gtsam::Point2 projection;
-            const gtsam::Point3 initial_guess;
-        };
-
-        Backend();
-        Backend(gtsam::Cal3_S2 K);
-        ~Backend(){};
-    
-        void add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value);
-        void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2, const gtsam::Pose3 &value);
-
-        void add_landmarks(const std::vector<Landmark> &landmarks);
-        void add_landmark(const Landmark &landmark);
-
-        void solve_graph();
-        const gtsam::Values& get_current_initial_values() const { return initial_estimate_; };
-        const gtsam::Values& get_result() const {
-            return result_;
-        };
-    private:                
-        gtsam::Values initial_estimate_;
-        gtsam::Values result_;
-        gtsam::NonlinearFactorGraph graph_;
-
-        gtsam::noiseModel::Isotropic::shared_ptr measurement_noise_ = gtsam::noiseModel::Isotropic::Sigma(2, 1.0);        
-        gtsam::noiseModel::Diagonal::shared_ptr pose_noise_ = gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6::Constant(0.1));
+   public:
+    static constexpr char pose_symbol_char = 'x';
+    static constexpr char landmark_symbol_char = 'l';
+    static constexpr char camera_symbol_char = 'k';
+
+    struct Landmark {
+        Landmark(const gtsam::Symbol &lmk_factor_symbol, const gtsam::Symbol &cam_pose_symbol,
+                 const gtsam::Point2 &projection,
+                 const gtsam::Point3 &initial_guess = gtsam::Point3::Identity())
+            : lmk_factor_symbol(lmk_factor_symbol),
+              cam_pose_symbol(cam_pose_symbol),
+              projection(projection),
+              initial_guess(initial_guess){};
+        const gtsam::Symbol lmk_factor_symbol;
+        const gtsam::Symbol cam_pose_symbol;
+        const gtsam::Point2 projection;
+        const gtsam::Point3 initial_guess;
+    };
+
+    Backend();
+    Backend(gtsam::Cal3_S2 K);
+    ~Backend(){};
+
+    void add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value);
+    void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2,
+                            const gtsam::Pose3 &value);
+
+    void add_landmarks(const std::vector<Landmark> &landmarks);
+    void add_landmark(const Landmark &landmark);
+    gtsam::Pose3 estimate_pose(const std::vector<cv::KeyPoint> &kpts1,
+                               const std::vector<cv::KeyPoint> &kpts2,
+                               const std::vector<cv::DMatch> &matches, const gtsam::Cal3_S2 &K);
+
+    void solve_graph();
+    const gtsam::Values &get_current_initial_values() const { return initial_estimate_; };
+    const gtsam::Values &get_result() const { return result_; };
+    const gtsam::Cal3_S2 &get_K() const { return K_; };
+
+   private:
+    gtsam::Cal3_S2 K_;
+
+    gtsam::Values initial_estimate_;
+    gtsam::Values result_;
+    gtsam::NonlinearFactorGraph graph_;
+
+    gtsam::noiseModel::Isotropic::shared_ptr measurement_noise_ =
+        gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
+    gtsam::noiseModel::Diagonal::shared_ptr pose_noise_ =
+        gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6::Constant(0.1));
 };
 class StructureFromMotion {
    public:
     static const Eigen::Affine3d T_symlake_boat_cam;
-    static const gtsam::Pose3 default_initial_pose; 
-    StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose= default_initial_pose,
-                   Frontend::MatcherType frontend_matcher = Frontend::MatcherType::KNN);
+    static const gtsam::Pose3 default_initial_pose;
+    StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K,
+                        gtsam::Pose3 initial_pose = default_initial_pose,
+                        Frontend::MatcherType frontend_matcher = Frontend::MatcherType::KNN);
     ~StructureFromMotion(){};
-    
+
     void set_initial_pose(gtsam::Pose3 initial_pose);
     void add_image(const cv::Mat &img);
     void solve_structure() { backend_.solve_graph(); };
-    const gtsam::Values& get_structure_result() { return backend_.get_result(); };
+    const gtsam::Values &get_structure_result() { return backend_.get_result(); };
 
     Frontend get_frontend() { return frontend_; };
     Backend get_backend() { return backend_; }
     size_t get_num_images_added() { return img_keypoints_and_descriptors_.size(); };
     size_t get_landmark_count() { return landmark_count_; };
+
    private:
     std::vector<std::pair<std::vector<cv::KeyPoint>, cv::Mat>> img_keypoints_and_descriptors_;
     std::vector<std::vector<cv::DMatch>> matches_;
@@ -134,6 +147,7 @@ class StructureFromMotion {
 //         // static const json gtsam_pose3_to_json(const gtsam::Pose3 &pose);
 //         static const json eigen_mat_to_json(const Eigen::Matrix &mat);
 //         static const json gtsam_pose3_to_json(const gtsam::Pose3 &pose);
-//         static const void values_to_json(const gtsam::Values &values, const std::filesystem::path &file);
+//         static const void values_to_json(const gtsam::Values &values, const std::filesystem::path
+//         &file);
 // };
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc
index 20a034e0d..80ae60737 100644
--- a/experimental/learn_descriptors/structure_from_motion_test.cc
+++ b/experimental/learn_descriptors/structure_from_motion_test.cc
@@ -1,12 +1,21 @@
 #include "experimental/learn_descriptors/structure_from_motion.hh"
-#include "experimental/learn_descriptors/symphony_lake_parser.hh"
-#include "common/math/matrix_to_proto.hh"
 
 #include <iostream>
 #include <sstream>
 
+#include "Eigen/Geometry"
+#include "common/geometry/opencv_viz.hh"
+#include "common/math/matrix_to_proto.hh"
+#include "experimental/learn_descriptors/symphony_lake_parser.hh"
 #include "gtest/gtest.h"
 
+class GtsamTestHelper {
+   public:
+    static bool pixel_in_range(Eigen::Vector2d pixel, size_t img_width, size_t img_height) {
+        return pixel[0] > 0 && pixel[0] < img_width && pixel[1] > 0 && pixel[1] < img_height;
+    }
+};
+
 namespace robot::experimental::learn_descriptors {
 TEST(SFM_TEST, frontend_pipeline_sweep) {
     const size_t width = 640;
@@ -21,7 +30,7 @@ TEST(SFM_TEST, frontend_pipeline_sweep) {
     cv::Mat translation_mat = (cv::Mat_<double>(2, 3) << 1, 0, pixel_shift_x, 0, 1, 0);
     cv::Point rect_points[4] = {{150, 200}, {350, 200}, {350, 300}, {150, 300}};
     float com_x = 0.0f, com_y = 0.0f;
-    for (const cv::Point& rect_point : rect_points) {
+    for (const cv::Point &rect_point : rect_points) {
         com_x += rect_point.x;
         com_y += rect_point.y;
     }
@@ -71,7 +80,7 @@ TEST(SFM_TEST, frontend_pipeline_sweep) {
                    static_cast<int>(matcher_type));
             try {
                 frontend = Frontend(extractor_type, matcher_type);
-            } catch (const std::invalid_argument& e) {
+            } catch (const std::invalid_argument &e) {
                 assert(std::string(e.what()) == "FLANN can not be used with ORB.");  // very jank...
                 continue;
             }
@@ -112,46 +121,105 @@ TEST(SFM_TEST, frontend_pipeline_sweep) {
     }
 }
 
-TEST(SFM_TEST, frontend_snippet_two) {
-    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
-    const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
-    const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
-    const symphony_lake_dataset::ImagePoint image_point = survey.getImagePoint(180);
-    const cv::Mat image_1 = survey.loadImageByImageIndex(180);
-    const cv::Mat image_2 = survey.loadImageByImageIndex(185);
-
-    const size_t img_width = image_point.width, img_height = image_point.height;
-    const double fx = image_point.fx, fy = image_point.fy;        
-    const double cx = image_point.cx, cy = image_point.cy;    
-    gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
+TEST(GtsamTesting, sfm_test_estimate_pose) {
+    std::vector<gtsam::Point3> cube_W;
+    float cube_size = 1.0f;
+    cube_W.push_back(gtsam::Point3(0, 0, 0));
+    cube_W.push_back(gtsam::Point3(cube_size, 0, 0));
+    cube_W.push_back(gtsam::Point3(cube_size, cube_size, 0));
+    cube_W.push_back(gtsam::Point3(0, cube_size, 0));
+    cube_W.push_back(gtsam::Point3(0, 0, cube_size));
+    cube_W.push_back(gtsam::Point3(cube_size, 0, cube_size));
+    cube_W.push_back(gtsam::Point3(cube_size, cube_size, cube_size));
+    cube_W.push_back(gtsam::Point3(0, cube_size, cube_size));
+
+    Eigen::Matrix3d R_new_points(
+        Eigen::AngleAxis(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+        Eigen::AngleAxis(M_PI / 4, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
+
+    const int initial_size = cube_W.size();
+    const gtsam::Point3 p_world_cube_center(cube_size / 2, cube_size / 2, cube_size / 2);
+    for (int i = 0; i < initial_size; i++) {
+        cube_W.emplace_back(
+            R_new_points * (cube_W[i] - p_world_cube_center) + p_world_cube_center);
+    }
 
-    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K);  
-    Frontend frontend = sfm.get_frontend();
+    const size_t img_width = 640;
+    const size_t img_height = 480;
+    const double fx = 500.0;
+    const double fy = fx;
+    const double cx = img_width / 2.0;
+    const double cy = img_height / 2.0;
+
+    gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy));
+    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, *K);
+
+    std::vector<gtsam::Pose3> poses;
+    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras;
+
+    Eigen::Matrix3d R_world_pose0(
+        Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+        Eigen::AngleAxis(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
+    gtsam::Pose3 T_world_pose0(gtsam::Rot3(R_world_pose0),
+                               gtsam::Point3(4, cube_size / 2, cube_size / 2));
+    gtsam::PinholeCamera<gtsam::Cal3_S2> camera0(T_world_pose0, *K);
+    poses.push_back(T_world_pose0);
+    cameras.push_back(camera0);
+
+    Eigen::Matrix3d R_world_pose0_to_pose1(
+        Eigen::AngleAxis(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix());
+    gtsam::Pose3 T_world_pose1(
+        gtsam::Rot3(R_world_pose0_to_pose1) * T_world_pose0.rotation(),
+        (R_world_pose0_to_pose1 * (T_world_pose0.translation() - p_world_cube_center) +
+         p_world_cube_center));
+    gtsam::PinholeCamera<gtsam::Cal3_S2> camera1(T_world_pose1, *K);
+    poses.push_back(T_world_pose1);
+    cameras.push_back(camera1);
+
+    std::vector<Eigen::Isometry3d> isometries;
+    for (const gtsam::Pose3 &pose : poses) {
+        isometries.emplace_back(pose.matrix());
+    }
 
-    cv::Mat img_keypoints_out_1(img_height, img_width, CV_8UC3),
-        img_keypoints_out_2(img_height, img_width, CV_8UC3), img_matches_out(img_height, 2 * img_width, CV_8UC3);
-    cv::Mat img_display_test;
+    std::vector<cv::KeyPoint> kpts1;
+    std::vector<cv::KeyPoint> kpts2;
+    std::vector<cv::DMatch> matches;
 
-    std::pair<std::vector<cv::KeyPoint>, cv::Mat>keypts_descriptors_1 = frontend.get_keypoints_and_descriptors(image_1);
-    std::pair<std::vector<cv::KeyPoint>, cv::Mat>keypts_descriptors_2 = frontend.get_keypoints_and_descriptors(image_2);
-    std::cout << "num of keypoints " << keypts_descriptors_1.first.size() << std::endl;
-    frontend.draw_keypoints(image_1, keypts_descriptors_1.first,
-                            img_keypoints_out_1);
-    frontend.draw_keypoints(image_2, keypts_descriptors_2.first,
-                            img_keypoints_out_2);
-    std::vector<cv::DMatch> matches = frontend.get_matches(keypts_descriptors_1.second, keypts_descriptors_2.second);
-    frontend.draw_matches(image_1, keypts_descriptors_1.first, image_2,
-                            keypts_descriptors_2.first, matches, img_matches_out);
+    for (size_t i = 0; i < cube_W.size(); i++) {
+        gtsam::Point2 pixel_p_cube_C0 = cameras[0].project(cube_W[i]);
+        gtsam::Point2 pixel_p_cube_C1 = cameras[1].project(cube_W[i]);
+        if (GtsamTestHelper::pixel_in_range(pixel_p_cube_C0, img_width, img_height) &&
+            GtsamTestHelper::pixel_in_range(pixel_p_cube_C1, img_width, img_height)) {
+            kpts1.push_back(cv::KeyPoint(pixel_p_cube_C0[0], pixel_p_cube_C0[1], 3));
+            kpts2.push_back(cv::KeyPoint(pixel_p_cube_C1[0], pixel_p_cube_C1[1], 3));
+            matches.emplace_back(cv::DMatch(kpts1.size() - 1, kpts2.size() - 1, 0));
+        }
+    }
 
-    // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test);
-    // cv::vconcat(img_display_test, img_matches_out, img_display_test);
-    // cv::Mat og_images;
-    // cv::hconcat(image_1, image_2, og_images);
-    // cv::vconcat(og_images, img_display_test, img_display_test);
-    // std::cout << "og_images.cols: " << og_images.cols << ". img_display_test.cols" << img_display_test.cols << std::endl;
-    // cv::imshow("Keypoints and Matches Output.", img_display_test);
-    // std::cout << "Press spacebar to pause." << std::endl;
-    // while (cv::waitKey(1000) != 32) {}
+    std::cout << "kpts1.size(): " << kpts1.size() << std::endl;
+    std::cout << "kpts2.size(): " << kpts2.size() << std::endl;
+
+    gtsam::Pose3 pose_diff_estimate = sfm.get_backend().estimate_pose(kpts1, kpts2, matches, *K);
+    gtsam::Pose3 pose_diff_groundtruth = T_world_pose0.between(T_world_pose1);
+    std::vector<gtsam::Pose3> result_poses = {T_world_pose0, T_world_pose0 * pose_diff_estimate};
+    std::cout << "pose_diff_estimate: " << pose_diff_estimate << std::endl;
+    std::cout << "pose_diff_ground_truth: " << pose_diff_groundtruth << std::endl;
+
+    isometries.emplace_back(result_poses.back().matrix());
+    robot::geometry::opencv_viz::viz_scene(isometries, cube_W);
+
+    // std::cout << result_poses[1] << std::endl;
+    // std::cout << pose1 << std::endl;
+    // std::cout << pose0 << std::endl;
+
+    constexpr double TOL = 1e-3;
+    for (size_t i = 0; i < poses.size(); i++) {
+        gtsam::Pose3 result_pose = result_poses[i];
+        EXPECT_NEAR(poses[i].translation()[0], result_pose.translation()[0], TOL);
+        EXPECT_NEAR(poses[i].translation()[1], result_pose.translation()[1], TOL);
+        EXPECT_NEAR(poses[i].translation()[2], result_pose.translation()[2], TOL);
+        EXPECT_TRUE(poses[i].rotation().matrix().isApprox(result_pose.rotation().matrix(), TOL));
+    }
 }
 
 TEST(SFM_TEST, sfm_snippet_two) {
@@ -159,59 +227,59 @@ TEST(SFM_TEST, sfm_snippet_two) {
     const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
     const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
     const symphony_lake_dataset::ImagePoint image_point = survey.getImagePoint(180);
-    const std::vector<cv::Mat> images = {
-        survey.loadImageByImageIndex(180),
-        survey.loadImageByImageIndex(185)
-    };
+    const std::vector<cv::Mat> images = {survey.loadImageByImageIndex(180),
+                                         survey.loadImageByImageIndex(185)};
 
     // const size_t img_width = image_point.width, img_height = image_point.height;
-    const double fx = image_point.fx, fy = image_point.fy;        
-    const double cx = image_point.cx, cy = image_point.cy;    
+    const double fx = image_point.fx, fy = image_point.fy;
+    const double cx = image_point.cx, cy = image_point.cy;
     gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
 
-    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K);  
+    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K);
 
     std::cout << "sfm.landmark_count_: " << sfm.get_landmark_count() << std::endl;
 
     for (const cv::Mat &image : images) {
-        sfm.add_image(image);
+        sfm.add_image(
+            image);  //, gtsam::Pose3(gtsam::Rot3::Identity(), gtsam::Point3(0.05, 0, 0)));
     }
 
     gtsam::Values initial_values = sfm.get_backend().get_current_initial_values();
     std::vector<Eigen::MatrixXd> poses;
-    std::vector<proto::Matrix> protos;
+    std::vector<robot::math::proto::Matrix> protos;
     for (size_t i = 0; i < images.size(); i++) {
-        gtsam::Pose3 pose = initial_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i));
-        Eigen::MatrixXd eigen_affine(4,4);
+        gtsam::Pose3 pose =
+            initial_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i));
+        Eigen::MatrixXd eigen_affine(4, 4);
         gtsam::Matrix3 rot_mat = pose.rotation().matrix();
         gtsam::Point3 translation = pose.translation();
         for (int j = 0; j < 3; j++) {
             for (int k = 0; k < 3; k++) {
-                eigen_affine(j,k) = rot_mat(j,k);
+                eigen_affine(j, k) = rot_mat(j, k);
             }
-            eigen_affine(3,j) = translation(j);
+            eigen_affine(3, j) = translation(j);
         }
-        eigen_affine(3,3) = 1;
+        eigen_affine(3, 3) = 1;
         poses.push_back(eigen_affine);
-        protos.emplace_shared(proto::Matrix());
+        protos.emplace_back();
         pack_into(eigen_affine, &protos.back());
     }
     std::cout << poses.front() << std::endl;
-    
 
     // const Eigen::Vector3d in(1.0, 2.0, 3.0);
     // // Action
 
-    // // proto::Matrix proto;
-    // // pack_into(in, &proto);
-    // // const Eigen::Vector3d out = unpack_from<Eigen::Vector3d>(proto);
+    // robot::math::proto::Matrix proto;
+    // pack_into(in, &proto);
+    // const Eigen::Vector3d out = unpack_from<Eigen::Vector3d>(proto);
+    // std::cout << "vector out " << out << std::endl;
 
-    // // const Eigen::VectorXd in{{1.0, 2.0, 3.0}};
+    // const Eigen::VectorXd in{{1.0, 2.0, 3.0}};
 
-    // // // Action
-    // // proto::Matrix proto;
-    // // pack_into(in, &proto);
-    // // const Eigen::VectorXd out = unpack_from<Eigen::VectorXd>(proto);
+    // // Action
+    // robot::math::proto::Matrix proto;
+    // pack_into(in, &proto);
+    // const Eigen::VectorXd out = unpack_from<Eigen::VectorXd>(proto);
 
     // sfm.solve_structure();
 
@@ -220,10 +288,10 @@ TEST(SFM_TEST, sfm_snippet_two) {
     // cv::Mat og_images;
     // cv::hconcat(image_1, image_2, og_images);
     // cv::vconcat(og_images, img_display_test, img_display_test);
-    // std::cout << "og_images.cols: " << og_images.cols << ". img_display_test.cols" << img_display_test.cols << std::endl;
-    // cv::imshow("Keypoints and Matches Output.", img_display_test);
-    // std::cout << "Press spacebar to pause." << std::endl;
-    // while (cv::waitKey(1000) != 32) {}    
+    // std::cout << "og_images.cols: " << og_images.cols << ". img_display_test.cols" <<
+    // img_display_test.cols << std::endl; cv::imshow("Keypoints and Matches Output.",
+    // img_display_test); std::cout << "Press spacebar to pause." << std::endl; while
+    // (cv::waitKey(1000) != 32) {}
 }
 
 // TEST(SFM_TEST, structure_from_motion) {
@@ -236,7 +304,7 @@ TEST(SFM_TEST, sfm_snippet_two) {
 
 //     gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
 
-//     StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K);     
+//     StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K);
 //     DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
 //     // DataParser::Generator<cv::Mat> generator = data_parser.create_img_generator();
 //     const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
@@ -266,10 +334,10 @@ TEST(SFM_TEST, sfm_snippet_two) {
 //     //     ++img_itr;
 //     // }
 //     sfm.solve_structure();
-//     // gtsam::Values output = sfm.get_structure_result();    
+//     // gtsam::Values output = sfm.get_structure_result();
 //     // output.print("result values: ");
 //     // for (size_t i = 0; i < sfm.get_num_images_added(); i++) {
-        
+
 //     // }
 // }
 }  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/symphony_lake_parser.cc b/experimental/learn_descriptors/symphony_lake_parser.cc
index 5f210578b..377e367cb 100644
--- a/experimental/learn_descriptors/symphony_lake_parser.cc
+++ b/experimental/learn_descriptors/symphony_lake_parser.cc
@@ -6,31 +6,33 @@
 
 namespace robot::experimental::learn_descriptors {
 DataParser::DataParser(const std::filesystem::path &image_root_dir,
-                       const std::vector<std::string> &survey_list) : 
-                       image_root_dir_(image_root_dir) , survey_list_(survey_list) {
+                       const std::vector<std::string> &survey_list)
+    : image_root_dir_(image_root_dir), survey_list_(survey_list) {
     ROBOT_CHECK(std::filesystem::exists(image_root_dir), "Image root dir does not exist!",
                 image_root_dir);
     surveys_.load(image_root_dir.string(), survey_list);
 }
 DataParser::~DataParser() {}
 
-Eigen::Affine3d DataParser::get_T_world_camera(size_t survey_idx, size_t img_idx, bool use_gps, bool use_compass) {
-    const symphony_lake_dataset::ImagePoint img_point = surveys_.get(survey_idx).getImagePoint(img_idx);
+Eigen::Affine3d DataParser::get_T_world_camera(size_t survey_idx, size_t img_idx, bool use_gps,
+                                               bool use_compass) {
+    const symphony_lake_dataset::ImagePoint img_point =
+        surveys_.get(survey_idx).getImagePoint(img_idx);
     Eigen::Vector3d t;
-    Eigen::Matrix3d R;        
+    Eigen::Matrix3d R;
     Eigen::Affine3d T_world_camera;
     if (use_gps) {
         t[0] = img_point.x;
         t[1] = img_point.y;
     }
     // coordinate frame is x-axis north, y-axis east, z-axis down (to Earth's core)
-    double yaw = use_compass ? img_point.theta - img_point.pan : img_point.pan;    
+    double yaw = use_compass ? img_point.theta - img_point.pan : img_point.pan;
     Eigen::Matrix3d R_y(Eigen::AngleAxisd(img_point.tilt, Eigen::Vector3d::UnitY()));
     Eigen::Matrix3d R_x = Eigen::Matrix3d::Identity();
     Eigen::Matrix3d R_z(Eigen::AngleAxisd(yaw, Eigen::Vector3d::UnitZ()));
-    R = R_x * R_y * R_z;    
+    R = R_x * R_y * R_z;
     T_world_camera.translate(t);
-    T_world_camera.rotate(R);    
+    T_world_camera.rotate(R);
     return T_world_camera;
 }
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/symphony_lake_parser.hh b/experimental/learn_descriptors/symphony_lake_parser.hh
index 6dc74c7d3..dd8db32be 100644
--- a/experimental/learn_descriptors/symphony_lake_parser.hh
+++ b/experimental/learn_descriptors/symphony_lake_parser.hh
@@ -1,19 +1,18 @@
 #pragma once
+#include <coroutine>
 #include <filesystem>
-#include <string>
 #include <iostream>
-#include <coroutine>
+#include <string>
 #include <vector>
 
+#include "Eigen/Dense"
 #include "symphony_lake_dataset/ImagePoint.h"
 #include "symphony_lake_dataset/SurveyVector.h"
 
-#include "Eigen/Dense"
-
 namespace robot::experimental::learn_descriptors {
 class DataParser {
    public:
-   template <typename T>
+    template <typename T>
     struct Generator {
         struct promise_type {
             T value;
@@ -31,41 +30,46 @@ class DataParser {
         using handle_type = std::coroutine_handle<promise_type>;
 
         Generator(handle_type h) : handle(h) {}
-        ~Generator() { if (handle) handle.destroy(); }
+        ~Generator() {
+            if (handle) handle.destroy();
+        }
 
         struct iterator {
             handle_type handle;
             bool operator!=(std::default_sentinel_t) const { return !handle.done(); }
-            iterator& operator++() { handle.resume(); return *this; }
+            iterator &operator++() {
+                handle.resume();
+                return *this;
+            }
             T operator*() const { return handle.promise().value; }
         };
 
         iterator begin() { return iterator{handle}; }
         std::default_sentinel_t end() { return {}; }
 
-    private:
+       private:
         handle_type handle;
     };
 
-    Generator<cv::Mat> image_generator(
-        const symphony_lake_dataset::SurveyVector& survey_vector) {
-
+    Generator<cv::Mat> image_generator(const symphony_lake_dataset::SurveyVector &survey_vector) {
         for (int i = 0; i < static_cast<int>(survey_vector.getNumSurveys()); i++) {
             const symphony_lake_dataset::Survey &survey = survey_vector.get(i);
-            for (int j = 0; j < static_cast<int>(survey.getNumImages()); j++) {                
+            for (int j = 0; j < static_cast<int>(survey.getNumImages()); j++) {
                 co_yield survey.loadImageByImageIndex(j);
             }
         }
-    }    
+    }
 
     DataParser(const std::filesystem::path &image_root_dir,
                const std::vector<std::string> &survey_list);
     ~DataParser();
 
-    Eigen::Affine3d get_T_world_camera(size_t survey_idx, size_t image_idx, bool use_gps = false, bool use_compass = false);    
+    Eigen::Affine3d get_T_world_camera(size_t survey_idx, size_t image_idx, bool use_gps = false,
+                                       bool use_compass = false);
 
     const symphony_lake_dataset::SurveyVector &get_surveys() const { return surveys_; };
     Generator<cv::Mat> create_img_generator() { return image_generator(surveys_); };
+
    private:
     std::filesystem::path image_root_dir_;
     std::vector<std::string> survey_list_;
@@ -73,8 +77,9 @@ class DataParser {
 };
 class SymphonyLakeDatasetTestHelper {
    public:
-    static constexpr const char* test_image_root_dir = "external/symphony_lake_snippet/symphony_lake";
-    static constexpr std::array<const char*, 1> test_survey_list = {"140106_snippet"};
+    static constexpr const char *test_image_root_dir =
+        "external/symphony_lake_snippet/symphony_lake";
+    static constexpr std::array<const char *, 1> test_survey_list = {"140106_snippet"};
     static bool images_equal(cv::Mat img1, cv::Mat img2) {
         if (img1.size() != img2.size() || img1.type() != img2.type()) {
             return false;
@@ -84,8 +89,12 @@ class SymphonyLakeDatasetTestHelper {
         diff = diff.reshape(1);
         return cv::countNonZero(diff) == 0;
     };
-    static DataParser get_test_parser() { return DataParser(get_test_iamge_root_dir(), get_test_survey_list()); };
-    static std::filesystem::path get_test_iamge_root_dir() { return std::filesystem::path(test_image_root_dir); };
+    static DataParser get_test_parser() {
+        return DataParser(get_test_iamge_root_dir(), get_test_survey_list());
+    };
+    static std::filesystem::path get_test_iamge_root_dir() {
+        return std::filesystem::path(test_image_root_dir);
+    };
     static std::vector<std::string> get_test_survey_list() {
         return std::vector<std::string>(test_survey_list.begin(), test_survey_list.end());
     };
diff --git a/experimental/learn_descriptors/symphony_lake_parser_test.cc b/experimental/learn_descriptors/symphony_lake_parser_test.cc
index 5ede85062..0b22cdd3c 100644
--- a/experimental/learn_descriptors/symphony_lake_parser_test.cc
+++ b/experimental/learn_descriptors/symphony_lake_parser_test.cc
@@ -27,10 +27,10 @@ TEST(SymphonyLakeParserTest, snippet_140106) {
     printf("Press 'q' in graphic window to quit\n");
     for (int i = 0; i < static_cast<int>(survey_vector.getNumSurveys()); i++) {
         const symphony_lake_dataset::Survey &survey = survey_vector.get(i);
-        for (int j = 0; j < static_cast<int>(survey.getNumImages()); j++) {     
-            // img_point contains all the csv data entries       
+        for (int j = 0; j < static_cast<int>(survey.getNumImages()); j++) {
+            // img_point contains all the csv data entries
             const symphony_lake_dataset::ImagePoint img_point = survey.getImagePoint(j);
-            (void)img_point; // suppress unused variable
+            (void)img_point;  // suppress unused variable
             image = survey.loadImageByImageIndex(j);
 
             // get the target image
@@ -40,7 +40,9 @@ TEST(SymphonyLakeParserTest, snippet_140106) {
             std::string target_img_name_str = target_img_name.str();
             target_img_name_str.replace(0, target_img_name_str.size() - target_img_name_length, "");
             std::filesystem::path target_img_dir =
-                SymphonyLakeDatasetTestHelper::get_test_iamge_root_dir() / SymphonyLakeDatasetTestHelper::get_test_survey_list()[i] / "0027" / target_img_name_str;
+                SymphonyLakeDatasetTestHelper::get_test_iamge_root_dir() /
+                SymphonyLakeDatasetTestHelper::get_test_survey_list()[i] / "0027" /
+                target_img_name_str;
             target_img = cv::imread(target_img_dir.string());
 
             EXPECT_TRUE(SymphonyLakeDatasetTestHelper::images_equal(image, target_img));
@@ -55,7 +57,8 @@ TEST(SymphonyLakeParserTest, snippet_140106) {
 }
 TEST(SymphonyLakeParserTest, snippet_140106_generator_test) {
     DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
-    DataParser::Generator<cv::Mat> generator = (SymphonyLakeDatasetTestHelper::get_test_parser()).create_img_generator();
+    DataParser::Generator<cv::Mat> generator =
+        (SymphonyLakeDatasetTestHelper::get_test_parser()).create_img_generator();
     DataParser::Generator<cv::Mat>::iterator img_iter = generator.begin();
     while (img_iter != generator.end()) {
         cv::Mat img = *img_iter;

From 5366913a7a979030ff93c9ca2722896d18f09ce9 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Thu, 6 Feb 2025 12:34:39 -0500
Subject: [PATCH 07/45] progress

---
 .../structure_from_motion.cc                  | 30 ++++++++++++-------
 .../structure_from_motion.hh                  | 18 ++++++++---
 .../structure_from_motion_test.cc             | 14 ++++-----
 3 files changed, 38 insertions(+), 24 deletions(-)

diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index 92e9bc0a7..64fa76e1f 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -69,7 +69,7 @@ void StructureFromMotion::add_image(const cv::Mat &img) {
         Frontend::enforce_bijective_matches(matches);
 
         matches_.push_back(matches);
-        gtsam::Pose3 between_value = get_backend().estimate_pose(
+        gtsam::Pose3 between_value = get_backend().estimate_c0_c1(
             img_keypoints_and_descriptors_.back().first, keypoints_and_descriptors.first, matches,
             get_backend().get_K());
         backend_.add_between_factor(
@@ -307,15 +307,23 @@ void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &
     // initial_estimate_.print("values after adding prior: ");
 }
 
+template <>
 void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2,
-                                 const gtsam::Pose3 &value) {
+                                 const gtsam::Pose3 &value, const gtsam::SharedNoiseModel model) {
     graph_.emplace_shared<gtsam::BetweenFactor<gtsam::Pose3>>(symbol_1, symbol_2, value,
-                                                              pose_noise_);
+                                                              model);
     // std::cout << "adding between factor. symbol_1: " << symbol_1 << ". symbol_2: " << symbol_2 <<
     // std::endl; initial_estimate_.print("values when adding between factor: ");
     initial_estimate_.insert(symbol_2, initial_estimate_.at<gtsam::Pose3>(symbol_1).compose(value));
 }
 
+template <>
+void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2,
+                                 const gtsam::Rot3 &value, const gtsam::SharedNoiseModel &model) {
+    graph_.empalce_shared<gtsam::BetweenFactor<gtsam::Rot3>>(symbol_1, symbol_2, value, model);
+    initial_estimate_.insert()
+}
+
 void Backend::add_landmarks(const std::vector<Landmark> &landmarks) {
     for (const Landmark &landmark : landmarks) {
         add_landmark(landmark);
@@ -324,7 +332,7 @@ void Backend::add_landmarks(const std::vector<Landmark> &landmarks) {
 
 void Backend::add_landmark(const Landmark &landmark) {
     graph_.emplace_shared<gtsam::GeneralSFMFactor2<gtsam::Cal3_S2>>(
-        landmark.projection, measurement_noise_, landmark.cam_pose_symbol,
+        landmark.projection, landmark_noise_, landmark.cam_pose_symbol,
         landmark.lmk_factor_symbol, gtsam::Symbol(camera_symbol_char, 0));
     if (!initial_estimate_.exists(landmark.lmk_factor_symbol)) {
         initial_estimate_.insert(landmark.lmk_factor_symbol, landmark.initial_guess);
@@ -333,7 +341,7 @@ void Backend::add_landmark(const Landmark &landmark) {
     }
 }
 
-gtsam::Pose3 Backend::estimate_pose(const std::vector<cv::KeyPoint> &kpts1,
+gtsam::Pose3 Backend::estimate_c0_c1(const std::vector<cv::KeyPoint> &kpts1,
                                     const std::vector<cv::KeyPoint> &kpts2,
                                     const std::vector<cv::DMatch> &matches,
                                     const gtsam::Cal3_S2 &K) {
@@ -347,16 +355,16 @@ gtsam::Pose3 Backend::estimate_pose(const std::vector<cv::KeyPoint> &kpts1,
     }
     cv::Mat cv_K = (cv::Mat_<double>(3, 3) << K.fx(), K.skew(), K.px(), 0, K.fy(), K.py(), 0, 0, 1);
     cv::Mat E = cv::findEssentialMat(pts1, pts2, cv_K, cv::RANSAC, 0.999, 1.0);
-    cv::Mat R, t;
-    cv::recoverPose(E, pts1, pts2, cv_K, R, t);
-    gtsam::Point3 translation(t.at<double>(0, 0), t.at<double>(1, 0), t.at<double>(2, 0));
-    gtsam::Matrix3 mat_rot;
+    cv::Mat R_c1_c0, t_c1_c0;
+    cv::recoverPose(E, pts1, pts2, cv_K, R_c1_c0, t_c1_c0);
+    gtsam::Point3 t_c1_c0_eigen(t_c1_c0.at<double>(0, 0), t_c1_c0.at<double>(1, 0), t_c1_c0.at<double>(2, 0));
+    gtsam::Matrix3 R_c1_c0_eigen;
     for (int i = 0; i < 3; i++) {
         for (int j = 0; j < 3; j++) {
-            mat_rot(i, j) = R.at<double>(i, j);
+            R_c1_c0_eigen(i, j) = R_c1_c0.at<double>(i, j);
         }
     }
-    return gtsam::Pose3(gtsam::Rot3(mat_rot), translation);
+    return gtsam::Pose3(gtsam::Rot3(R_c1_c0_eigen), t_c1_c0_eigen).inverse();
 }
 
 void Backend::solve_graph() {
diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh
index 991aa2f68..8c98e5b0e 100644
--- a/experimental/learn_descriptors/structure_from_motion.hh
+++ b/experimental/learn_descriptors/structure_from_motion.hh
@@ -66,6 +66,8 @@ class Frontend {
 class Backend {
    public:
     static constexpr char pose_symbol_char = 'x';
+    static constexpr char pose_rot_symbol_char = 'r';
+    static constexpr char pose_bearing_symbol_char = 'b';
     static constexpr char landmark_symbol_char = 'l';
     static constexpr char camera_symbol_char = 'k';
 
@@ -88,16 +90,20 @@ class Backend {
     ~Backend(){};
 
     void add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value);
+
+    template <typename T>
     void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2,
-                            const gtsam::Pose3 &value);
+                            const T &value, const gtsam::SharedNoiseModel &model);
+
 
     void add_landmarks(const std::vector<Landmark> &landmarks);
     void add_landmark(const Landmark &landmark);
-    gtsam::Pose3 estimate_pose(const std::vector<cv::KeyPoint> &kpts1,
+    gtsam::Pose3 estimate_c0_c1(const std::vector<cv::KeyPoint> &kpts1,
                                const std::vector<cv::KeyPoint> &kpts2,
                                const std::vector<cv::DMatch> &matches, const gtsam::Cal3_S2 &K);
 
-    void solve_graph();
+    void solve_graph();    
+
     const gtsam::Values &get_current_initial_values() const { return initial_estimate_; };
     const gtsam::Values &get_result() const { return result_; };
     const gtsam::Cal3_S2 &get_K() const { return K_; };
@@ -109,10 +115,14 @@ class Backend {
     gtsam::Values result_;
     gtsam::NonlinearFactorGraph graph_;
 
-    gtsam::noiseModel::Isotropic::shared_ptr measurement_noise_ =
+    gtsam::noiseModel::Isotropic::shared_ptr landmark_noise_ =
         gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
     gtsam::noiseModel::Diagonal::shared_ptr pose_noise_ =
         gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6::Constant(0.1));
+    gtsam::noiseModel::Isotropic::shared_ptr rotation_noise_ = 
+        gtsam::noiseModel::Isotropic::Sigma(3, 0.1);
+    gtsam::noiseModel::Isotropic::shared_ptr pose_bearing_noise_ = 
+        gtsam::noiseModel::Isotropic::Sigma(2, 0.1);
 };
 class StructureFromMotion {
    public:
diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc
index 80ae60737..170debd8e 100644
--- a/experimental/learn_descriptors/structure_from_motion_test.cc
+++ b/experimental/learn_descriptors/structure_from_motion_test.cc
@@ -199,19 +199,15 @@ TEST(GtsamTesting, sfm_test_estimate_pose) {
     std::cout << "kpts1.size(): " << kpts1.size() << std::endl;
     std::cout << "kpts2.size(): " << kpts2.size() << std::endl;
 
-    gtsam::Pose3 pose_diff_estimate = sfm.get_backend().estimate_pose(kpts1, kpts2, matches, *K);
-    gtsam::Pose3 pose_diff_groundtruth = T_world_pose0.between(T_world_pose1);
-    std::vector<gtsam::Pose3> result_poses = {T_world_pose0, T_world_pose0 * pose_diff_estimate};
-    std::cout << "pose_diff_estimate: " << pose_diff_estimate << std::endl;
-    std::cout << "pose_diff_ground_truth: " << pose_diff_groundtruth << std::endl;
+    gtsam::Pose3 T_pose0_pose1_estimate = sfm.get_backend().estimate_c0_c1(kpts1, kpts2, matches, *K);
+    gtsam::Pose3 T_pose0_pose1 = T_world_pose0.between(T_world_pose1);
+    std::vector<gtsam::Pose3> result_poses = {T_world_pose0, T_world_pose0 * T_pose0_pose1_estimate};
+    std::cout << "pose_diff_estimate: " << T_pose0_pose1_estimate << std::endl;
+    std::cout << "pose_diff_ground_truth: " << T_pose0_pose1 << std::endl;
 
     isometries.emplace_back(result_poses.back().matrix());
     robot::geometry::opencv_viz::viz_scene(isometries, cube_W);
 
-    // std::cout << result_poses[1] << std::endl;
-    // std::cout << pose1 << std::endl;
-    // std::cout << pose0 << std::endl;
-
     constexpr double TOL = 1e-3;
     for (size_t i = 0; i < poses.size(); i++) {
         gtsam::Pose3 result_pose = result_poses[i];

From 6313e8750f8ee494cafbfae931d052dd35ea3c19 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Fri, 7 Mar 2025 01:54:20 -0500
Subject: [PATCH 08/45] hard coded symphony intrinsics and distortion, added
 some commons, did some refactoring, debugged some Backend issues. Next step:
 properly put cameras in graph in the world frame.

---
 common/geometry/BUILD                         |  50 ++++-
 common/geometry/camera.cc                     |  45 ++++
 common/geometry/camera.hh                     |  27 +++
 common/geometry/camera_test.cc                | 131 +++++++++++
 common/geometry/opencv_viz.cc                 |  39 ++--
 common/geometry/opencv_viz.hh                 |   7 +-
 common/geometry/opencv_viz_test.cc            |  28 +--
 common/geometry/translate_types.cc            |  34 +++
 common/geometry/translate_types.hh            |  10 +
 common/geometry/translate_types_test.cc       |  37 ++++
 experimental/learn_descriptors/BUILD          |  33 ++-
 .../structure_from_motion.cc                  | 204 ++++++++----------
 .../structure_from_motion.hh                  |  65 +++---
 .../structure_from_motion_test.cc             | 172 +++------------
 .../learn_descriptors/symphony_lake_parser.hh |   8 +
 15 files changed, 531 insertions(+), 359 deletions(-)
 create mode 100644 common/geometry/camera.cc
 create mode 100644 common/geometry/camera.hh
 create mode 100644 common/geometry/camera_test.cc
 create mode 100644 common/geometry/translate_types.cc
 create mode 100644 common/geometry/translate_types.hh
 create mode 100644 common/geometry/translate_types_test.cc

diff --git a/common/geometry/BUILD b/common/geometry/BUILD
index 0a28b1e20..319c5d930 100644
--- a/common/geometry/BUILD
+++ b/common/geometry/BUILD
@@ -21,12 +21,13 @@ cc_test(
 
 cc_library(
   name = "opencv_viz",
-  hdrs = ["opencv_viz.hh"],
+  hdrs = ["opencv_viz.hh"],  
   srcs = ["opencv_viz.cc"],
   visibility = ["//visibility:public"],
   deps = [
     "@eigen",
-    "@opencv//:opencv"
+    "@opencv//:opencv",
+    ":translate_types"
   ]
 )
 
@@ -46,3 +47,48 @@ cc_binary(
     "@opencv//:opencv"
   ]
 )
+
+cc_library(
+  name = "camera", 
+  hdrs = ["camera.hh"],  
+  srcs = ["camera.cc"],
+  visibility = ["//visibility:public"],
+  deps = [
+    "@eigen",
+    "@gtsam//:gtsam",
+    "@opencv//:opencv",
+    ":translate_types"
+  ]
+)
+
+cc_test(
+  name = "camera_test", 
+  srcs = ["camera_test.cc"],
+  deps = [
+    ":camera",
+    "@com_google_googletest//:gtest_main",
+    ":translate_types",
+    ":opencv_viz"
+  ]
+)
+
+cc_library(
+  name = "translate_types",
+  hdrs = ["translate_types.hh"],
+  srcs = ["translate_types.cc"],
+  visibility = ["//visibility:public"],
+  deps = [
+    "@eigen",     
+    "@opencv//:opencv"
+  ]
+)
+
+cc_test(
+  name = "translate_types_test", 
+  srcs = ["translate_types_test.cc"],
+  visibility = ["//visibility:public"],
+  deps = [
+    ":translate_types",
+    "@com_google_googletest//:gtest_main"
+  ]
+)
\ No newline at end of file
diff --git a/common/geometry/camera.cc b/common/geometry/camera.cc
new file mode 100644
index 000000000..b13718570
--- /dev/null
+++ b/common/geometry/camera.cc
@@ -0,0 +1,45 @@
+#include "common/geometry/camera.hh"
+
+#include "common/geometry/translate_types.hh"
+
+namespace robot::geometry {
+Eigen::Matrix3d get_intrinsic_matrix(const gtsam::Cal3_S2 &intrinsic) {
+    Eigen::Matrix3d K;
+    K << intrinsic.fx(), intrinsic.skew(), intrinsic.px(), 0, intrinsic.fy(), intrinsic.py(), 0, 0,
+        1;
+    return K;
+}
+
+Eigen::Vector3d project(const Eigen::Matrix3d &K, const Eigen::Vector3d &p_cam_point) {
+    return K * p_cam_point;
+    // return (K * p_cam_point).head<2>();
+}
+
+Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector3d &pixel_homog) {
+    return K.inverse() * pixel_homog;
+}
+
+Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel_inhomog,
+                          const double depth) {
+    return depth * K.inverse() * Eigen::Vector3d(pixel_inhomog(0), pixel_inhomog(1), 1.);
+}
+
+Eigen::Isometry3d estimate_c0_c1(const std::vector<cv::KeyPoint> &kpts0,
+                                 const std::vector<cv::KeyPoint> &kpts1,
+                                 const std::vector<cv::DMatch> &matches, const cv::Mat &K) {
+    Eigen::Isometry3d result;
+    std::vector<cv::Point2f> pts1;
+    std::vector<cv::Point2f> pts2;
+    for (const cv::DMatch &match : matches) {
+        pts1.push_back(kpts0[match.queryIdx].pt);
+        pts2.push_back(kpts1[match.trainIdx].pt);
+    }
+    cv::Mat E = cv::findEssentialMat(pts1, pts2, K, cv::RANSAC, 0.999, 1.0);
+    cv::Mat R_c1_c0, t_c1_c0;
+    cv::recoverPose(E, pts1, pts2, K, R_c1_c0, t_c1_c0);
+    result.linear() = cv_to_eigen_mat(R_c1_c0);
+    result.translation() = cv_to_eigen_mat(t_c1_c0);
+    result = result.inverse();
+    return result;
+}
+}  // namespace robot::geometry
\ No newline at end of file
diff --git a/common/geometry/camera.hh b/common/geometry/camera.hh
new file mode 100644
index 000000000..82b74c818
--- /dev/null
+++ b/common/geometry/camera.hh
@@ -0,0 +1,27 @@
+#pragma once
+#include "Eigen/Core"
+#include "Eigen/Geometry"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "opencv2/opencv.hpp"
+
+namespace robot::geometry {
+Eigen::Matrix3d get_intrinsic_matrix(const gtsam::Cal3_S2 &intrinsic);
+
+/// @return Homogeneous pixel projection coordinates
+Eigen::Vector3d project(const Eigen::Matrix3d &K, const Eigen::Vector3d &p_cam_point);
+/**
+ * @param pixel_homog homogeneous pixel cordinates 3x1
+ * @return 3d coordinate in camera frame
+ */
+Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector3d &pixel_homog);
+/**
+ * @param pixel_inhomog inhomogeneous pixel cordinates 2x1
+ * @return 3d coordinate in camera frame
+ */
+Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel_inhomog,
+                          const double depth);
+
+Eigen::Isometry3d estimate_c0_c1(const std::vector<cv::KeyPoint> &kpts0,
+                                 const std::vector<cv::KeyPoint> &kpts1,
+                                 const std::vector<cv::DMatch> &matches, const cv::Mat &K);
+}  // namespace robot::geometry
\ No newline at end of file
diff --git a/common/geometry/camera_test.cc b/common/geometry/camera_test.cc
new file mode 100644
index 000000000..1c4923b82
--- /dev/null
+++ b/common/geometry/camera_test.cc
@@ -0,0 +1,131 @@
+#include "common/geometry/camera.hh"
+
+#include "common/geometry/opencv_viz.hh"
+#include "common/geometry/translate_types.hh"
+#include "gtest/gtest.h"
+
+class CameraTestHelper {
+   public:
+    static bool pixel_in_range(Eigen::Vector2d pixel, size_t img_width, size_t img_height) {
+        return pixel[0] > 0 && pixel[0] < img_width && pixel[1] > 0 && pixel[1] < img_height;
+    }
+};
+
+namespace robot::geometry {
+TEST(CameraTest, test_get_intrinsics) {
+    const double fx = 1., fy = 1., s = 0, px = 0.5, py = 0.5;
+    const gtsam::Cal3_S2 K(fx, fy, s, px, py);
+    const Eigen::Matrix3d intrinsic_mat = get_intrinsic_matrix(K);
+
+    EXPECT_FLOAT_EQ(fx, intrinsic_mat(0, 0));
+    EXPECT_FLOAT_EQ(fy, intrinsic_mat(1, 1));
+    EXPECT_FLOAT_EQ(s, intrinsic_mat(0, 1));
+    EXPECT_FLOAT_EQ(px, intrinsic_mat(0, 2));
+    EXPECT_FLOAT_EQ(py, intrinsic_mat(1, 2));
+}
+
+TEST(CameraTest, test_projection) {
+    const double fx = 1., fy = 1., s = 0, px = 0.5, py = 0.5;
+    const gtsam::Cal3_S2 K(fx, fy, s, px, py);
+    const Eigen::Matrix3d intrinsic_mat = get_intrinsic_matrix(K);
+
+    const Eigen::Vector3d p_cam_lmk1(0.1, 0.15, 1.2);
+    const Eigen::Vector3d pxl_homog_cam_lmk1 = project(intrinsic_mat, p_cam_lmk1);
+    const Eigen::Vector2d pxl_inhomog_cam_lmk1 =
+        pxl_homog_cam_lmk1.head<2>() / pxl_homog_cam_lmk1(2);
+    const Eigen::Vector3d p_cam_lmk1_deproject = deproject(intrinsic_mat, pxl_homog_cam_lmk1);
+    const Eigen::Vector3d p_cam_lmk1_deproject_inhomog =
+        deproject(intrinsic_mat, pxl_inhomog_cam_lmk1, p_cam_lmk1(2));
+
+    EXPECT_TRUE(p_cam_lmk1.isApprox(p_cam_lmk1_deproject));
+    EXPECT_TRUE(p_cam_lmk1.isApprox(p_cam_lmk1_deproject_inhomog));
+}
+
+TEST(CameraTest, test_estimate_pose) {
+    std::vector<Eigen::Vector3d> p_W_cube;
+    float cube_size = 1.0f;
+    p_W_cube.push_back(Eigen::Vector3d(0, 0, 0));
+    p_W_cube.push_back(Eigen::Vector3d(cube_size, 0, 0));
+    p_W_cube.push_back(Eigen::Vector3d(cube_size, cube_size, 0));
+    p_W_cube.push_back(Eigen::Vector3d(0, cube_size, 0));
+    p_W_cube.push_back(Eigen::Vector3d(0, 0, cube_size));
+    p_W_cube.push_back(Eigen::Vector3d(cube_size, 0, cube_size));
+    p_W_cube.push_back(Eigen::Vector3d(cube_size, cube_size, cube_size));
+    p_W_cube.push_back(Eigen::Vector3d(0, cube_size, cube_size));
+
+    Eigen::Matrix3d R_new_points(
+        Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+        Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
+
+    const int initial_size = p_W_cube.size();
+    const Eigen::Vector3d p_world_cube_center(cube_size / 2, cube_size / 2, cube_size / 2);
+    for (const Eigen::Vector3d &point_W_cube : p_W_cube) {
+        p_W_cube.emplace_back(R_new_points * (point_W_cube - p_world_cube_center) +
+                              p_world_cube_center);
+    }
+
+    const size_t img_width = 640;
+    const size_t img_height = 480;
+    const double fx = 500.0;
+    const double fy = fx;
+    const double cx = img_width / 2.0;
+    const double cy = img_height / 2.0;
+
+    cv::Mat K = (cv::Mat_<double>(3, 3) << fx, 0, cx, 0, fy, cy, 0, 0, 1);
+
+    Eigen::Matrix3d K_eig = cv_to_eigen_mat(K);
+    std::vector<Eigen::Isometry3d> poses;
+
+    Eigen::Matrix3d R_world_cam0(
+        Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+        Eigen::AngleAxisd(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
+    Eigen::Isometry3d T_world_cam0 = Eigen::Isometry3d::Identity();
+    T_world_cam0.linear() = R_world_cam0;
+    T_world_cam0.translation() = Eigen::Vector3d(4, cube_size / 2, cube_size / 2);
+    poses.push_back(T_world_cam0);
+
+    Eigen::Matrix3d R_world_cam0_to_cam1(
+        Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix());
+    Eigen::Isometry3d T_world_cam1;
+    T_world_cam1.linear() = R_world_cam0_to_cam1 * T_world_cam0.linear();
+    T_world_cam1.translation() =
+        R_world_cam0_to_cam1 * (T_world_cam0.translation() - p_world_cube_center) +
+        p_world_cube_center;
+    poses.push_back(T_world_cam1);
+
+    std::vector<cv::KeyPoint> kpts0;
+    std::vector<cv::KeyPoint> kpts1;
+    std::vector<cv::DMatch> matches;
+
+    for (const Eigen::Vector3d &point_W_cube : p_W_cube) {
+        Eigen::Vector3d p_cam0_cubep =
+            (T_world_cam0.inverse() *
+             Eigen::Vector4d(point_W_cube(0), point_W_cube(1), point_W_cube(2), 1.))
+                .head<3>();
+        Eigen::Vector3d p_cam1_cubep =
+            (T_world_cam1.inverse() *
+             Eigen::Vector4d(point_W_cube(0), point_W_cube(1), point_W_cube(2), 1.))
+                .head<3>();
+        Eigen::Vector3d pxl_c0_pcube_homog = K_eig * p_cam0_cubep;
+        Eigen::Vector3d pxl_c1_pcube_homog = K_eig * p_cam1_cubep;
+        Eigen::Vector2d pxl_c0_pcube = pxl_c0_pcube_homog.head<2>() / pxl_c0_pcube_homog(2);
+        Eigen::Vector2d pxl_c1_pcube = pxl_c1_pcube_homog.head<2>() / pxl_c1_pcube_homog(2);
+        if (CameraTestHelper::pixel_in_range(pxl_c0_pcube, img_width, img_height) &&
+            CameraTestHelper::pixel_in_range(pxl_c1_pcube, img_width, img_height)) {
+            kpts0.push_back(cv::KeyPoint(pxl_c0_pcube[0], pxl_c0_pcube[1], 3));
+            kpts1.push_back(cv::KeyPoint(pxl_c1_pcube[0], pxl_c1_pcube[1], 3));
+            matches.emplace_back(cv::DMatch(kpts0.size() - 1, kpts1.size() - 1, 0));
+        }
+    }
+
+    Eigen::Isometry3d T_cam0_cam1_estimate = estimate_c0_c1(kpts0, kpts1, matches, K);
+    Eigen::Isometry3d T_cam0_cam1_scale = T_world_cam0.inverse() * T_world_cam1;
+    T_cam0_cam1_scale.translation() /= T_cam0_cam1_scale.translation().norm();
+    EXPECT_TRUE(
+        T_cam0_cam1_estimate.translation().isApprox(T_cam0_cam1_scale.translation(), 0.000001));
+    EXPECT_TRUE(T_cam0_cam1_estimate.linear().isApprox(T_cam0_cam1_scale.linear(), 0.001));
+
+    poses.emplace_back(T_world_cam0 * T_cam0_cam1_estimate);
+    // viz_scene(poses, p_W_cube);
+}
+}  // namespace robot::geometry
\ No newline at end of file
diff --git a/common/geometry/opencv_viz.cc b/common/geometry/opencv_viz.cc
index 48675e239..66df62a9a 100644
--- a/common/geometry/opencv_viz.cc
+++ b/common/geometry/opencv_viz.cc
@@ -2,23 +2,9 @@
 
 #include <iostream>
 
-namespace robot::geometry::opencv_viz {
-template <typename Derived>
-cv::Mat eigen_to_cv_mat(const Eigen::MatrixBase<Derived> &eigenMatrix) {
-    int cvType;
-    if constexpr (std::is_same_v<typename Derived::Scalar, double>) {
-        cvType = CV_64F;
-    } else if constexpr (std::is_same_v<typename Derived::Scalar, float>) {
-        cvType = CV_32F;
-    } else if constexpr (std::is_same_v<typename Derived::Scalar, int>) {
-        cvType = CV_32S;
-    } else {
-        static_assert(!std::is_same_v<Derived, Derived>, "Unsupported data type in Eigen matrix");
-    }
-    return cv::Mat(eigenMatrix.rows(), eigenMatrix.cols(), cvType,
-                   const_cast<void *>(static_cast<const void *>(eigenMatrix.derived().data())))
-        .clone();
-}
+#include "common/geometry/translate_types.hh"
+
+namespace robot::geometry {
 cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) {
     // Ensure R is a valid rotation matrix
     CV_Assert(cv::determinant(R) > 0.999 && cv::determinant(R) < 1.001);
@@ -37,27 +23,30 @@ cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) {
     return axis * theta;
 }
 
-void viz_scene(const std::vector<Eigen::Isometry3d> &poses,
-               const std::vector<Eigen::Vector3d> &points, const bool show_grid) {
+void viz_scene(const std::vector<Eigen::Isometry3d> &poses_world,
+               const std::vector<Eigen::Vector3d> &points_world, const bool show_grid,
+               const bool show_origin) {
     cv::viz::Viz3d window("Viz Scene");
 
     constexpr double pose_size = .5;
-    for (unsigned int i = 0; i < poses.size(); i++) {
-        cv::Affine3d cv_pose(eigen_to_cv_mat(Eigen::Matrix4d(poses[i].matrix().transpose())));
+    for (unsigned int i = 0; i < poses_world.size(); i++) {
+        cv::Affine3d cv_pose(eigen_mat_to_cv(Eigen::Matrix4d(poses_world[i].matrix())));
         window.showWidget("rigid_transform_" + std::to_string(i),
                           cv::viz::WCoordinateSystem(pose_size), cv_pose);
     }
     constexpr double point_radius = 0.08;
     constexpr int sphere_res = 10;
     const cv::viz::Color point_color = cv::viz::Color::celestial_blue();
-    for (unsigned int i = 0; i < points.size(); i++) {
-        const Eigen::Vector3d &point = points[i];
+    for (unsigned int i = 0; i < points_world.size(); i++) {
+        const Eigen::Vector3d &point = points_world[i];
         window.showWidget("point_" + std::to_string(i),
                           cv::viz::WSphere(cv::Point3d(point[0], point[1], point[2]), point_radius,
                                            sphere_res, point_color));
     }
 
-    window.showWidget("world_frame", cv::viz::WCoordinateSystem());
+    if (show_origin) {
+        window.showWidget("world_frame", cv::viz::WCoordinateSystem());
+    }
 
     if (show_grid) {
         constexpr unsigned int num_cells = 6;
@@ -76,4 +65,4 @@ void viz_scene(const std::vector<Eigen::Isometry3d> &poses,
 
     window.spin();
 }
-}  // namespace robot::geometry::opencv_viz
+}  // namespace robot::geometry
diff --git a/common/geometry/opencv_viz.hh b/common/geometry/opencv_viz.hh
index d5530ecef..cdfcfe857 100644
--- a/common/geometry/opencv_viz.hh
+++ b/common/geometry/opencv_viz.hh
@@ -5,7 +5,8 @@
 #include "Eigen/Geometry"
 #include "opencv2/viz.hpp"
 
-namespace robot::geometry::opencv_viz {
-void viz_scene(const std::vector<Eigen::Isometry3d> &poses,
-               const std::vector<Eigen::Vector3d> &points, const bool show_grid = true);
+namespace robot::geometry {
+void viz_scene(const std::vector<Eigen::Isometry3d> &poses_world,
+               const std::vector<Eigen::Vector3d> &points_world, const bool show_grid = true,
+               const bool show_origin = true);
 }
\ No newline at end of file
diff --git a/common/geometry/opencv_viz_test.cc b/common/geometry/opencv_viz_test.cc
index e5e8265af..3d618cc21 100644
--- a/common/geometry/opencv_viz_test.cc
+++ b/common/geometry/opencv_viz_test.cc
@@ -2,7 +2,7 @@
 
 #include "gtest/gtest.h"
 
-namespace robot::geometry::opencv_viz {
+namespace robot::geometry {
 namespace {
 bool is_test() {
     return std::getenv("BAZEL_TEST") != nullptr &&
@@ -68,22 +68,22 @@ TEST(OpencvVizTest, cube_test) {
 
     std::vector<Eigen::Isometry3d> poses;
 
-    Eigen::Matrix3d rotation0(
-        Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
-        Eigen::AngleAxis(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
-    Eigen::Isometry3d pose0;
-    pose0.translation() = Eigen::Vector3d(4, 0, 0);
-    pose0.linear() = rotation0;
-    poses.push_back(pose0);
+    Eigen::Matrix3d R_W_cam0(
+        Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+        Eigen::AngleAxisd(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
+    Eigen::Isometry3d T_W_cam0;
+    T_W_cam0.translation() = Eigen::Vector3d(4, 0, 0);
+    T_W_cam0.linear() = R_W_cam0;
+    poses.push_back(T_W_cam0);
 
-    Eigen::Isometry3d pose1;
-    pose1.linear() =
-        Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * rotation0;
-    pose1.translation() = Eigen::Vector3d(0, 4, 0);
-    poses.push_back(pose1);
+    Eigen::Isometry3d T_W_cam1;
+    T_W_cam1.linear() =
+        Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * R_W_cam0;
+    T_W_cam1.translation() = Eigen::Vector3d(0, 4, 0);
+    poses.push_back(T_W_cam1);
 
     if (!is_test()) {
         viz_scene(poses, cube_W);
     }
 }
-}  // namespace robot::geometry::opencv_viz
\ No newline at end of file
+}  // namespace robot::geometry
\ No newline at end of file
diff --git a/common/geometry/translate_types.cc b/common/geometry/translate_types.cc
new file mode 100644
index 000000000..8f940c348
--- /dev/null
+++ b/common/geometry/translate_types.cc
@@ -0,0 +1,34 @@
+#include "common/geometry/translate_types.hh"
+
+namespace robot::geometry {
+
+cv::Mat eigen_mat_to_cv(const Eigen::MatrixXd &matrix) {
+    cv::Mat result(matrix.rows(), matrix.cols(), CV_64F);  // Ensure correct type (double precision)
+    for (int i = 0; i < matrix.rows(); i++) {
+        for (int j = 0; j < matrix.cols(); j++) {
+            result.at<double>(i, j) = matrix(i, j);
+        }
+    }
+    return result;
+}
+
+cv::Mat eigen_vec_to_cv(const Eigen::VectorXd &vector) {
+    // cols should always be 1
+    cv::Mat result(vector.rows(), vector.cols(), CV_64F);
+    for (int i = 0; i < vector.rows(); i++) {
+        result.at<double>(i, 0) = vector(i);
+    }
+    return result;
+}
+
+Eigen::MatrixXd cv_to_eigen_mat(const cv::Mat &matrix) {
+    Eigen::MatrixXd result = Eigen::MatrixXd::Zero(matrix.rows, matrix.cols);
+    for (int i = 0; i < matrix.rows; i++) {
+        for (int j = 0; j < matrix.cols; j++) {
+            result(i, j) = matrix.at<double>(i, j);
+        }
+    }
+    return result;
+}
+
+}  // namespace robot::geometry
\ No newline at end of file
diff --git a/common/geometry/translate_types.hh b/common/geometry/translate_types.hh
new file mode 100644
index 000000000..6be817c13
--- /dev/null
+++ b/common/geometry/translate_types.hh
@@ -0,0 +1,10 @@
+#pragma once
+#include "Eigen/Core"
+#include "Eigen/Geometry"
+#include "opencv2/opencv.hpp"
+
+namespace robot::geometry {
+cv::Mat eigen_mat_to_cv(const Eigen::MatrixXd &matrix);
+cv::Mat eigen_vec_to_cv(const Eigen::VectorXd &vector);
+Eigen::MatrixXd cv_to_eigen_mat(const cv::Mat &matrix);
+}  // namespace robot::geometry
\ No newline at end of file
diff --git a/common/geometry/translate_types_test.cc b/common/geometry/translate_types_test.cc
new file mode 100644
index 000000000..67342ba21
--- /dev/null
+++ b/common/geometry/translate_types_test.cc
@@ -0,0 +1,37 @@
+#include "common/geometry/translate_types.hh"
+
+#include "gtest/gtest.h"
+
+namespace robot::geometry {
+TEST(TranslateTypesTest, eigen_mat_and_cv) {
+    int rows = 5, cols = 5;
+    Eigen::MatrixXd mat_eig = Eigen::MatrixXd::Random(rows, cols);
+    cv::Mat mat_cv_from_eig = eigen_mat_to_cv(mat_eig);
+    Eigen::MatrixXd mat_eig_from_cv = cv_to_eigen_mat(mat_cv_from_eig);
+    EXPECT_TRUE(static_cast<int>(mat_eig.rows()) == mat_cv_from_eig.rows);
+    EXPECT_TRUE(static_cast<int>(mat_eig.cols()) == mat_cv_from_eig.cols);
+    EXPECT_TRUE(static_cast<int>(mat_eig.rows()) == static_cast<int>(mat_eig_from_cv.rows()));
+    EXPECT_TRUE(static_cast<int>(mat_eig.cols()) == static_cast<int>(mat_eig_from_cv.cols()));
+    for (int i = 0; i < rows; i++) {
+        for (int j = 0; j < cols; j++) {
+            EXPECT_DOUBLE_EQ(mat_eig(i, j), mat_cv_from_eig.at<double>(i, j));
+            EXPECT_DOUBLE_EQ(mat_eig(i, j), mat_eig_from_cv(i, j));
+        }
+    }
+}
+
+TEST(TranslateTypesTest, eigen_vec_and_cv) {
+    int cols = 5, rows = 1;
+    Eigen::VectorXd vec_eig = Eigen::VectorXd::Random(5);
+    cv::Mat mat_cv_from_eig = eigen_vec_to_cv(vec_eig);
+    Eigen::VectorXd vec_eig_from_cv = Eigen::VectorXd(cv_to_eigen_mat(mat_cv_from_eig));
+    EXPECT_TRUE(static_cast<int>(vec_eig.rows()) == mat_cv_from_eig.rows);
+    EXPECT_TRUE(static_cast<int>(vec_eig.cols()) == mat_cv_from_eig.cols);
+    EXPECT_TRUE(static_cast<int>(vec_eig.rows()) == static_cast<int>(vec_eig_from_cv.rows()));
+    EXPECT_TRUE(static_cast<int>(vec_eig.cols()) == static_cast<int>(vec_eig_from_cv.cols()));
+    for (int i = 0; i < cols; i++) {
+        EXPECT_DOUBLE_EQ(vec_eig(i), mat_cv_from_eig.at<double>(i, 0));
+        EXPECT_DOUBLE_EQ(vec_eig(i), vec_eig_from_cv(i, 0));
+    }
+}
+}  // namespace robot::geometry
\ No newline at end of file
diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index da4ec0163..2a2e1b14b 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -25,7 +25,22 @@ cc_library(
     "@opencv//:opencv",
     "@gtsam//:gtsam",
     "@eigen",
-    "@nlohmann_json//:json",
+    "//common/geometry:camera",
+    "//common/geometry:translate_types"
+  ]
+)
+
+cc_test(
+  name = "structure_from_motion_test",
+  srcs = ["structure_from_motion_test.cc"],  
+  copts = [
+    "-Wno-unused-parameter",
+  ],
+  deps = [
+    "@com_google_googletest//:gtest_main",
+    ":symphony_lake_parser",
+    ":structure_from_motion",
+    "//common/geometry:opencv_viz"
   ]
 )
 
@@ -46,26 +61,10 @@ cc_library(
   ]
 )
 
-cc_test(
-  name = "structure_from_motion_test",
-  srcs = ["structure_from_motion_test.cc"],  
-  copts = [
-    "-Wno-unused-parameter",
-  ],
-  deps = [
-    "@com_google_googletest//:gtest_main",
-    ":symphony_lake_parser",
-    ":structure_from_motion",
-    "//common/math:matrix_to_proto",
-    "//common/geometry:opencv_viz"
-  ]
-)
-
 cc_test(
   name = "symphony_lake_parser_test",
   srcs = ["symphony_lake_parser_test.cc"],
   copts = ["-Wno-unused-parameter"],
-  # data = ["@symphony_lake_snippet"],
   deps = [
     "@com_google_googletest//:gtest_main",
     ":symphony_lake_parser",
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index 64fa76e1f..a70df5c03 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -2,9 +2,19 @@
 
 #include <filesystem>
 #include <sstream>
+#include <stdexcept>
+
+#include "common/geometry/camera.hh"
+#include "common/geometry/translate_types.hh"
+#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
+#include "gtsam/slam/BetweenFactor.h"
+#include "gtsam/slam/PriorFactor.h"
+#include "gtsam/slam/ProjectionFactor.h"
 
 namespace fs = std::filesystem;
 
+namespace geom = robot::geometry;
+
 namespace robot::experimental::learn_descriptors {
 
 const Eigen::Affine3d StructureFromMotion::T_symlake_boat_cam = []() {
@@ -12,20 +22,6 @@ const Eigen::Affine3d StructureFromMotion::T_symlake_boat_cam = []() {
     transform.translate(Eigen::Vector3d::Zero());
     transform.rotate(Eigen::Matrix3d(Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(1, 0, 0)) *
                                      Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0, 1, 0))));
-
-    // std::stringstream ss;
-    // ss << gtsam::Pose3(gtsam::Rot3(transform.rotation()),
-    // gtsam::Point3(transform.translation())); json json_obj; json_obj["T_symlake_boat_cam"] =
-    // ss.str(); fs::path output_dir = "output"; fs::path output_file = output_dir /
-    // "output_sfm.json"; if (!fs::exists(output_dir)) {
-    //     fs::create_directory(output_dir);
-    // }
-    // std::ofstream file(output_file);
-    // if (file.is_open()) {
-    //     file << json_obj.dump(4);
-    //     file.close();
-    // }
-
     return transform;
 }();
 
@@ -35,6 +31,9 @@ std::string pose_to_string(gtsam::Pose3 pose) {
     return ss.str();
 }
 
+// const gtsam::Pose3 StructureFromMotion::default_initial_pose = []() {
+//     Eigen::Isometry3d
+// }();
 const gtsam::Pose3 StructureFromMotion::default_initial_pose =
     gtsam::Pose3(gtsam::Rot3(T_symlake_boat_cam.rotation()), T_symlake_boat_cam.translation());
 // gtsam::Pose3(
@@ -46,13 +45,16 @@ const gtsam::Pose3 StructureFromMotion::default_initial_pose =
 // );
 
 StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor,
-                                         gtsam::Cal3_S2 K, gtsam::Pose3 initial_pose,
+                                         gtsam::Cal3_S2 K, Eigen::Matrix<double, 5, 1> D,
+                                         gtsam::Pose3 initial_pose,
                                          Frontend::MatcherType frontend_matcher) {
     frontend_ = Frontend(frontend_extractor, frontend_matcher);
     backend_ = Backend(K);
 
+    K_ = (cv::Mat_<double>(3, 3) << K.fx(), 0, K.px(), 0, K.fy(), K.py(), 0, 0, 1);
+    D_ = (cv::Mat_<double>(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0));
+
     set_initial_pose(initial_pose);
-    // backend_.get_current_initial_values().print("Ooga: ");
 }
 
 void StructureFromMotion::set_initial_pose(gtsam::Pose3 initial_pose) {
@@ -60,49 +62,36 @@ void StructureFromMotion::set_initial_pose(gtsam::Pose3 initial_pose) {
 }
 
 void StructureFromMotion::add_image(const cv::Mat &img) {
+    cv::Mat img_undistorted;
+    cv::undistort(img, img_undistorted, K_, D_);
     std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_and_descriptors =
         frontend_.get_keypoints_and_descriptors(img);
     landmarks_.push_back(std::vector<Backend::Landmark>());
     if (get_num_images_added() > 0) {
-        std::vector<cv::DMatch> matches = frontend_.get_matches(
-            img_keypoints_and_descriptors_.back().second, keypoints_and_descriptors.second);
-        Frontend::enforce_bijective_matches(matches);
-
-        matches_.push_back(matches);
-        gtsam::Pose3 between_value = get_backend().estimate_c0_c1(
-            img_keypoints_and_descriptors_.back().first, keypoints_and_descriptors.first, matches,
-            get_backend().get_K());
+        std::vector<cv::DMatch> matches =
+            get_matches(img_keypoints_and_descriptors_.back().second,
+                        keypoints_and_descriptors.second, Frontend::enforce_bijective_matches);
+        // std::vector<cv::DMatch> matches = frontend_.get_matches(
+        //     img_keypoints_and_descriptors_.back().second, keypoints_and_descriptors.second);
+        // Frontend::enforce_bijective_matches(matches);
+        // matches_.push_back(matches);
+        gtsam::SharedNoiseModel pose_noise_model = gtsam::noiseModel::Diagonal::Sigmas(
+            (gtsam::Vector6() << 0.1, 0.1, 0.1, 0.01, 0.01, 0.01).finished());
+        gtsam::Pose3 between_value(
+            geom::estimate_c0_c1(
+                img_keypoints_and_descriptors_.back().first, keypoints_and_descriptors.first,
+                matches, geom::eigen_mat_to_cv(geom::get_intrinsic_matrix(get_backend().get_K())))
+                .matrix());
         backend_.add_between_factor(
             gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1),
-            gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), between_value);
+            gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), between_value,
+            pose_noise_model);
         gtsam::Pose3 T_cam_landmark(gtsam::Rot3::Identity(), gtsam::Point3(0, 0, 1));
         gtsam::Pose3 T_world_cam1 = backend_.get_current_initial_values().at<gtsam::Pose3>(
             gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1));
         gtsam::Pose3 T_world_cam2 = backend_.get_current_initial_values().at<gtsam::Pose3>(
             gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()));
-        int count = 0;
         for (const cv::DMatch match : matches) {
-            if (count < 1) {
-                // std::cout << "hello???" << std::endl;
-                json json_obj;
-                json_obj["add_image_1"] = {
-                    {"T_world_cam1", pose_to_string(T_world_cam1)},
-                    {"T_world_cam2", pose_to_string(T_world_cam1)},
-                    {"T_world_lmkcam1", pose_to_string(T_world_cam1 * T_cam_landmark)},
-                    {"T_world_lmkcam2", pose_to_string(T_world_cam2 * T_cam_landmark)}};
-                fs::path output_dir = "output";
-                fs::path output_file = output_dir / "output_sfm.json";
-                if (!fs::exists(output_dir)) {
-                    fs::create_directory(output_dir);
-                }
-                std::ofstream file(output_file);
-                if (file.is_open()) {
-                    file << json_obj.dump(4);
-                    file.close();
-                }
-            }
-            count++;
-            // std::cout << "T_world_landmark" <<  T_world_cam1 * T_cam_landmark << std::endl;
             Backend::Landmark landmark_cam_1(
                 gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_),
                 gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1),
@@ -111,14 +100,14 @@ void StructureFromMotion::add_image(const cv::Mat &img) {
                         img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x),
                     static_cast<double>(
                         img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)),
-                (T_world_cam1 * T_cam_landmark).translation());
+                backend_.get_K(), 3.0);
             Backend::Landmark landmark_cam_2(
                 gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_),
                 gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()),
                 gtsam::Point2(
                     static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.x),
                     static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.y)),
-                (T_world_cam2 * T_cam_landmark).translation());
+                backend_.get_K(), 3.0);
             landmarks_[get_num_images_added() - 1].push_back(landmark_cam_1);
             landmarks_[get_num_images_added()].push_back(landmark_cam_2);
             backend_.add_landmark(landmark_cam_1);
@@ -134,6 +123,17 @@ void StructureFromMotion::add_image(const cv::Mat &img) {
     img_keypoints_and_descriptors_.push_back(keypoints_and_descriptors);
 }
 
+std::vector<cv::DMatch> StructureFromMotion::get_matches(
+    const cv::Mat &descriptors_1, const cv::Mat &descriptors_2,
+    std::optional<StructureFromMotion::MatchFunction> post_process_func) {
+    std::vector<cv::DMatch> matches = frontend_.get_matches(descriptors_1, descriptors_2);
+    if (post_process_func) {
+        (*post_process_func)(matches);
+    }
+    matches_.push_back(matches);
+    return matches;
+}
+
 Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matcher) {
     extractor_type_ = frontend_algorithm;
     matcher_type_ = frontend_matcher;
@@ -292,36 +292,42 @@ Backend::Backend() {
     const double cy = img_height / 2.0;
 
     gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
-
+    K_ = boost::make_shared<gtsam::Cal3_S2>(K);
     initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K);
 }
 
 Backend::Backend(gtsam::Cal3_S2 K) {
+    K_ = boost::make_shared<gtsam::Cal3_S2>(K);
     initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K);
 }
 
 void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value) {
     graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(symbol, value, pose_noise_);
-    initial_estimate_.insert(symbol, value);
+    initial_estimate_.insert_or_assign(symbol, value);
     // std::cout << "adding a prior factor! with symbol: " << symbol << std::endl;
     // initial_estimate_.print("values after adding prior: ");
 }
 
 template <>
-void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2,
-                                 const gtsam::Pose3 &value, const gtsam::SharedNoiseModel model) {
-    graph_.emplace_shared<gtsam::BetweenFactor<gtsam::Pose3>>(symbol_1, symbol_2, value,
-                                                              model);
+void Backend::add_between_factor<gtsam::Pose3>(const gtsam::Symbol &symbol_1,
+                                               const gtsam::Symbol &symbol_2,
+                                               const gtsam::Pose3 &value,
+                                               const gtsam::SharedNoiseModel &model) {
+    graph_.emplace_shared<gtsam::BetweenFactor<gtsam::Pose3>>(symbol_1, symbol_2, value, model);
     // std::cout << "adding between factor. symbol_1: " << symbol_1 << ". symbol_2: " << symbol_2 <<
     // std::endl; initial_estimate_.print("values when adding between factor: ");
-    initial_estimate_.insert(symbol_2, initial_estimate_.at<gtsam::Pose3>(symbol_1).compose(value));
+    initial_estimate_.insert_or_assign(symbol_2,
+                                       initial_estimate_.at<gtsam::Pose3>(symbol_1).compose(value));
 }
 
 template <>
-void Backend::add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2,
-                                 const gtsam::Rot3 &value, const gtsam::SharedNoiseModel &model) {
-    graph_.empalce_shared<gtsam::BetweenFactor<gtsam::Rot3>>(symbol_1, symbol_2, value, model);
-    initial_estimate_.insert()
+void Backend::add_between_factor<gtsam::Rot3>(const gtsam::Symbol &symbol_1,
+                                              const gtsam::Symbol &symbol_2,
+                                              const gtsam::Rot3 &value,
+                                              const gtsam::SharedNoiseModel &model) {
+    graph_.emplace_shared<gtsam::BetweenFactor<gtsam::Rot3>>(symbol_1, symbol_2, value, model);
+    initial_estimate_.insert_or_assign(symbol_2,
+                                       initial_estimate_.at<gtsam::Rot3>(symbol_1).compose(value));
 }
 
 void Backend::add_landmarks(const std::vector<Landmark> &landmarks) {
@@ -331,74 +337,32 @@ void Backend::add_landmarks(const std::vector<Landmark> &landmarks) {
 }
 
 void Backend::add_landmark(const Landmark &landmark) {
-    graph_.emplace_shared<gtsam::GeneralSFMFactor2<gtsam::Cal3_S2>>(
-        landmark.projection, landmark_noise_, landmark.cam_pose_symbol,
-        landmark.lmk_factor_symbol, gtsam::Symbol(camera_symbol_char, 0));
+    // graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+    //     landmark.projection, landmark_noise_, landmark.cam_pose_symbol,
+    //     landmark.lmk_factor_symbol, K_);
+    auto factor = boost::make_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+        landmark.projection, landmark_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol,
+        K_);
+    // Add the factor to the graph.
+    graph_.add(factor);
+    if (!initial_estimate_.exists(landmark.cam_pose_symbol)) {
+        throw std::runtime_error(
+            "landmark.cam_pose_symbol must already exist in Backend.initial_estimate_ before "
+            "add_landmark is called.");
+    }
+    gtsam::Point3 p_world_lmk_estimate =
+        initial_estimate_.at<gtsam::Pose3>(landmark.cam_pose_symbol)
+            .transformTo(landmark.p_cam_lmk_guess);
     if (!initial_estimate_.exists(landmark.lmk_factor_symbol)) {
-        initial_estimate_.insert(landmark.lmk_factor_symbol, landmark.initial_guess);
+        initial_estimate_.insert(landmark.lmk_factor_symbol, p_world_lmk_estimate);
     } else {
-        initial_estimate_.update(landmark.lmk_factor_symbol, landmark.initial_guess);
+        initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate);
     }
 }
 
-gtsam::Pose3 Backend::estimate_c0_c1(const std::vector<cv::KeyPoint> &kpts1,
-                                    const std::vector<cv::KeyPoint> &kpts2,
-                                    const std::vector<cv::DMatch> &matches,
-                                    const gtsam::Cal3_S2 &K) {
-    std::vector<cv::Point2f> pts1;
-    std::vector<cv::Point2f> pts2;
-    for (const cv::DMatch &match : matches) {
-        std::cout << "query point: " << kpts1[match.queryIdx].pt << std::endl;
-        std::cout << "trian point: " << kpts2[match.trainIdx].pt << std::endl;
-        pts1.push_back(kpts1[match.queryIdx].pt);
-        pts2.push_back(kpts2[match.trainIdx].pt);
-    }
-    cv::Mat cv_K = (cv::Mat_<double>(3, 3) << K.fx(), K.skew(), K.px(), 0, K.fy(), K.py(), 0, 0, 1);
-    cv::Mat E = cv::findEssentialMat(pts1, pts2, cv_K, cv::RANSAC, 0.999, 1.0);
-    cv::Mat R_c1_c0, t_c1_c0;
-    cv::recoverPose(E, pts1, pts2, cv_K, R_c1_c0, t_c1_c0);
-    gtsam::Point3 t_c1_c0_eigen(t_c1_c0.at<double>(0, 0), t_c1_c0.at<double>(1, 0), t_c1_c0.at<double>(2, 0));
-    gtsam::Matrix3 R_c1_c0_eigen;
-    for (int i = 0; i < 3; i++) {
-        for (int j = 0; j < 3; j++) {
-            R_c1_c0_eigen(i, j) = R_c1_c0.at<double>(i, j);
-        }
-    }
-    return gtsam::Pose3(gtsam::Rot3(R_c1_c0_eigen), t_c1_c0_eigen).inverse();
-}
-
 void Backend::solve_graph() {
     gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_);
     result_ = optimizer.optimize();
 }
 
-// json SFM_Logger::gtsam_pose3_to_json(const gtsam::Pose3 &pose) {
-//     json json_obj;
-//     json_obj = {
-//         {},
-
-//     };
-
-//     return json_obj;
-// }
-
-// void SFM_Logger::values_to_json(const gtsam::Values &values, const std::filesystem::path
-// &output_path) {
-//     json json_obj;
-//     json_obj["add_image_1"] = {
-//         {"T_world_cam1", pose_to_string(T_world_cam1)},
-//         {"T_world_cam2", pose_to_string(T_world_cam1)},
-//         {"T_world_lmkcam1", pose_to_string(T_world_cam1 * T_cam_landmark)},
-//         {"T_world_lmkcam2", pose_to_string(T_world_cam2 * T_cam_landmark)}
-//     };
-//     if (!fs::exists(output_path)) {
-//         fs::create_directory(output_path);
-//     }
-//     std::ofstream file(output_file);
-//     if (file.is_open()) {
-//         file << json_obj.dump(4);
-//         file.close();
-//     }
-// }
-
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh
index 8c98e5b0e..a1fb2ce27 100644
--- a/experimental/learn_descriptors/structure_from_motion.hh
+++ b/experimental/learn_descriptors/structure_from_motion.hh
@@ -6,20 +6,15 @@
 #include <utility>
 
 #include "Eigen/Core"
+#include "common/geometry/camera.hh"
 #include "gtsam/geometry/Point3.h"
 #include "gtsam/geometry/Pose3.h"
 #include "gtsam/inference/Symbol.h"
-#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
+#include "gtsam/linear/NoiseModel.h"
+#include "gtsam/nonlinear/NonlinearFactorGraph.h"
 #include "gtsam/nonlinear/Values.h"
-#include "gtsam/slam/BetweenFactor.h"
-#include "gtsam/slam/GeneralSFMFactor.h"
-#include "gtsam/slam/PriorFactor.h"
-#include "gtsam/slam/ProjectionFactor.h"
-#include "nlohmann/json.hpp"
 #include "opencv2/opencv.hpp"
 
-using json = nlohmann::json;
-
 namespace robot::experimental::learn_descriptors {
 class Frontend {
    public:
@@ -73,16 +68,17 @@ class Backend {
 
     struct Landmark {
         Landmark(const gtsam::Symbol &lmk_factor_symbol, const gtsam::Symbol &cam_pose_symbol,
-                 const gtsam::Point2 &projection,
-                 const gtsam::Point3 &initial_guess = gtsam::Point3::Identity())
+                 const gtsam::Point2 &projection, const gtsam::Cal3_S2 K,
+                 float initial_depth_guess = 5.0)
             : lmk_factor_symbol(lmk_factor_symbol),
               cam_pose_symbol(cam_pose_symbol),
               projection(projection),
-              initial_guess(initial_guess){};
+              p_cam_lmk_guess(robot::geometry::deproject(robot::geometry::get_intrinsic_matrix(K),
+                                                         projection, initial_depth_guess)){};
         const gtsam::Symbol lmk_factor_symbol;
         const gtsam::Symbol cam_pose_symbol;
         const gtsam::Point2 projection;
-        const gtsam::Point3 initial_guess;
+        const gtsam::Point3 p_cam_lmk_guess;
     };
 
     Backend();
@@ -95,21 +91,17 @@ class Backend {
     void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2,
                             const T &value, const gtsam::SharedNoiseModel &model);
 
-
     void add_landmarks(const std::vector<Landmark> &landmarks);
     void add_landmark(const Landmark &landmark);
-    gtsam::Pose3 estimate_c0_c1(const std::vector<cv::KeyPoint> &kpts1,
-                               const std::vector<cv::KeyPoint> &kpts2,
-                               const std::vector<cv::DMatch> &matches, const gtsam::Cal3_S2 &K);
 
-    void solve_graph();    
+    void solve_graph();
 
     const gtsam::Values &get_current_initial_values() const { return initial_estimate_; };
     const gtsam::Values &get_result() const { return result_; };
-    const gtsam::Cal3_S2 &get_K() const { return K_; };
+    const gtsam::Cal3_S2 &get_K() const { return *K_; };
 
    private:
-    gtsam::Cal3_S2 K_;
+    gtsam::Cal3_S2::shared_ptr K_;
 
     gtsam::Values initial_estimate_;
     gtsam::Values result_;
@@ -118,17 +110,26 @@ class Backend {
     gtsam::noiseModel::Isotropic::shared_ptr landmark_noise_ =
         gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
     gtsam::noiseModel::Diagonal::shared_ptr pose_noise_ =
-        gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6::Constant(0.1));
-    gtsam::noiseModel::Isotropic::shared_ptr rotation_noise_ = 
-        gtsam::noiseModel::Isotropic::Sigma(3, 0.1);
-    gtsam::noiseModel::Isotropic::shared_ptr pose_bearing_noise_ = 
-        gtsam::noiseModel::Isotropic::Sigma(2, 0.1);
+        gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01));
 };
+
+template <>
+void Backend::add_between_factor<gtsam::Pose3>(const gtsam::Symbol &, const gtsam::Symbol &,
+                                               const gtsam::Pose3 &,
+                                               const gtsam::SharedNoiseModel &);
+template <>
+void Backend::add_between_factor<gtsam::Rot3>(const gtsam::Symbol &, const gtsam::Symbol &,
+                                              const gtsam::Rot3 &, const gtsam::SharedNoiseModel &);
+
 class StructureFromMotion {
    public:
     static const Eigen::Affine3d T_symlake_boat_cam;
     static const gtsam::Pose3 default_initial_pose;
+    /**
+     * @param D is vector (5x1) of the distortion coefficients (k1, k2, p1, p2, k3)
+     */
     StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K,
+                        Eigen::Matrix<double, 5, 1> D,
                         gtsam::Pose3 initial_pose = default_initial_pose,
                         Frontend::MatcherType frontend_matcher = Frontend::MatcherType::KNN);
     ~StructureFromMotion(){};
@@ -137,11 +138,16 @@ class StructureFromMotion {
     void add_image(const cv::Mat &img);
     void solve_structure() { backend_.solve_graph(); };
     const gtsam::Values &get_structure_result() { return backend_.get_result(); };
+    using MatchFunction = std::function<void(std::vector<cv::DMatch> &)>;
+    std::vector<cv::DMatch> get_matches(
+        const cv::Mat &descriptors_1, const cv::Mat &descriptors_2,
+        std::optional<MatchFunction> post_process_func = std::nullopt);
 
     Frontend get_frontend() { return frontend_; };
     Backend get_backend() { return backend_; }
     size_t get_num_images_added() { return img_keypoints_and_descriptors_.size(); };
     size_t get_landmark_count() { return landmark_count_; };
+    const std::vector<std::vector<cv::DMatch>> get_matches() { return matches_; };
 
    private:
     std::vector<std::pair<std::vector<cv::KeyPoint>, cv::Mat>> img_keypoints_and_descriptors_;
@@ -149,15 +155,10 @@ class StructureFromMotion {
     std::vector<std::vector<Backend::Landmark>> landmarks_;
     size_t landmark_count_ = 0;
 
+    cv::Mat K_;
+    cv::Mat D_;
+
     Frontend frontend_;
     Backend backend_;
 };
-// class SFM_Logger {
-//     public:
-//         // static const json gtsam_pose3_to_json(const gtsam::Pose3 &pose);
-//         static const json eigen_mat_to_json(const Eigen::Matrix &mat);
-//         static const json gtsam_pose3_to_json(const gtsam::Pose3 &pose);
-//         static const void values_to_json(const gtsam::Values &values, const std::filesystem::path
-//         &file);
-// };
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc
index 170debd8e..82e4b84da 100644
--- a/experimental/learn_descriptors/structure_from_motion_test.cc
+++ b/experimental/learn_descriptors/structure_from_motion_test.cc
@@ -5,7 +5,6 @@
 
 #include "Eigen/Geometry"
 #include "common/geometry/opencv_viz.hh"
-#include "common/math/matrix_to_proto.hh"
 #include "experimental/learn_descriptors/symphony_lake_parser.hh"
 #include "gtest/gtest.h"
 
@@ -76,8 +75,8 @@ TEST(SFM_TEST, frontend_pipeline_sweep) {
     // cv::Mat img_display_test;
     for (Frontend::ExtractorType extractor_type : extractor_types) {
         for (Frontend::MatcherType matcher_type : matcher_types) {
-            printf("started frontend combination: (%d, %d)\n", static_cast<int>(extractor_type),
-                   static_cast<int>(matcher_type));
+            // printf("started frontend combination: (%d, %d)\n", static_cast<int>(extractor_type),
+            //        static_cast<int>(matcher_type));
             try {
                 frontend = Frontend(extractor_type, matcher_type);
             } catch (const std::invalid_argument &e) {
@@ -105,8 +104,9 @@ TEST(SFM_TEST, frontend_pipeline_sweep) {
             // std::cout << "Press spacebar to pause." << std::endl;
             // while (cv::waitKey(1000) == 32) {
             // }
-            printf("completed frontend combination: (%d, %d)\n", static_cast<int>(extractor_type),
-                   static_cast<int>(matcher_type));
+            // printf("completed frontend combination: (%d, %d)\n",
+            // static_cast<int>(extractor_type),
+            //        static_cast<int>(matcher_type));
             if (extractor_type != Frontend::ExtractorType::ORB) {  // don't check ORB for now
                 for (const cv::DMatch match : matches) {
                     EXPECT_NEAR(keypoints_descriptors_pair_1.first[match.queryIdx].pt.x -
@@ -121,103 +121,6 @@ TEST(SFM_TEST, frontend_pipeline_sweep) {
     }
 }
 
-TEST(GtsamTesting, sfm_test_estimate_pose) {
-    std::vector<gtsam::Point3> cube_W;
-    float cube_size = 1.0f;
-    cube_W.push_back(gtsam::Point3(0, 0, 0));
-    cube_W.push_back(gtsam::Point3(cube_size, 0, 0));
-    cube_W.push_back(gtsam::Point3(cube_size, cube_size, 0));
-    cube_W.push_back(gtsam::Point3(0, cube_size, 0));
-    cube_W.push_back(gtsam::Point3(0, 0, cube_size));
-    cube_W.push_back(gtsam::Point3(cube_size, 0, cube_size));
-    cube_W.push_back(gtsam::Point3(cube_size, cube_size, cube_size));
-    cube_W.push_back(gtsam::Point3(0, cube_size, cube_size));
-
-    Eigen::Matrix3d R_new_points(
-        Eigen::AngleAxis(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
-        Eigen::AngleAxis(M_PI / 4, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
-
-    const int initial_size = cube_W.size();
-    const gtsam::Point3 p_world_cube_center(cube_size / 2, cube_size / 2, cube_size / 2);
-    for (int i = 0; i < initial_size; i++) {
-        cube_W.emplace_back(
-            R_new_points * (cube_W[i] - p_world_cube_center) + p_world_cube_center);
-    }
-
-    const size_t img_width = 640;
-    const size_t img_height = 480;
-    const double fx = 500.0;
-    const double fy = fx;
-    const double cx = img_width / 2.0;
-    const double cy = img_height / 2.0;
-
-    gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy));
-    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, *K);
-
-    std::vector<gtsam::Pose3> poses;
-    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras;
-
-    Eigen::Matrix3d R_world_pose0(
-        Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
-        Eigen::AngleAxis(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
-    gtsam::Pose3 T_world_pose0(gtsam::Rot3(R_world_pose0),
-                               gtsam::Point3(4, cube_size / 2, cube_size / 2));
-    gtsam::PinholeCamera<gtsam::Cal3_S2> camera0(T_world_pose0, *K);
-    poses.push_back(T_world_pose0);
-    cameras.push_back(camera0);
-
-    Eigen::Matrix3d R_world_pose0_to_pose1(
-        Eigen::AngleAxis(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix());
-    gtsam::Pose3 T_world_pose1(
-        gtsam::Rot3(R_world_pose0_to_pose1) * T_world_pose0.rotation(),
-        (R_world_pose0_to_pose1 * (T_world_pose0.translation() - p_world_cube_center) +
-         p_world_cube_center));
-    gtsam::PinholeCamera<gtsam::Cal3_S2> camera1(T_world_pose1, *K);
-    poses.push_back(T_world_pose1);
-    cameras.push_back(camera1);
-
-    std::vector<Eigen::Isometry3d> isometries;
-    for (const gtsam::Pose3 &pose : poses) {
-        isometries.emplace_back(pose.matrix());
-    }
-
-    std::vector<cv::KeyPoint> kpts1;
-    std::vector<cv::KeyPoint> kpts2;
-    std::vector<cv::DMatch> matches;
-
-    for (size_t i = 0; i < cube_W.size(); i++) {
-        gtsam::Point2 pixel_p_cube_C0 = cameras[0].project(cube_W[i]);
-        gtsam::Point2 pixel_p_cube_C1 = cameras[1].project(cube_W[i]);
-        if (GtsamTestHelper::pixel_in_range(pixel_p_cube_C0, img_width, img_height) &&
-            GtsamTestHelper::pixel_in_range(pixel_p_cube_C1, img_width, img_height)) {
-            kpts1.push_back(cv::KeyPoint(pixel_p_cube_C0[0], pixel_p_cube_C0[1], 3));
-            kpts2.push_back(cv::KeyPoint(pixel_p_cube_C1[0], pixel_p_cube_C1[1], 3));
-            matches.emplace_back(cv::DMatch(kpts1.size() - 1, kpts2.size() - 1, 0));
-        }
-    }
-
-    std::cout << "kpts1.size(): " << kpts1.size() << std::endl;
-    std::cout << "kpts2.size(): " << kpts2.size() << std::endl;
-
-    gtsam::Pose3 T_pose0_pose1_estimate = sfm.get_backend().estimate_c0_c1(kpts1, kpts2, matches, *K);
-    gtsam::Pose3 T_pose0_pose1 = T_world_pose0.between(T_world_pose1);
-    std::vector<gtsam::Pose3> result_poses = {T_world_pose0, T_world_pose0 * T_pose0_pose1_estimate};
-    std::cout << "pose_diff_estimate: " << T_pose0_pose1_estimate << std::endl;
-    std::cout << "pose_diff_ground_truth: " << T_pose0_pose1 << std::endl;
-
-    isometries.emplace_back(result_poses.back().matrix());
-    robot::geometry::opencv_viz::viz_scene(isometries, cube_W);
-
-    constexpr double TOL = 1e-3;
-    for (size_t i = 0; i < poses.size(); i++) {
-        gtsam::Pose3 result_pose = result_poses[i];
-        EXPECT_NEAR(poses[i].translation()[0], result_pose.translation()[0], TOL);
-        EXPECT_NEAR(poses[i].translation()[1], result_pose.translation()[1], TOL);
-        EXPECT_NEAR(poses[i].translation()[2], result_pose.translation()[2], TOL);
-        EXPECT_TRUE(poses[i].rotation().matrix().isApprox(result_pose.rotation().matrix(), TOL));
-    }
-}
-
 TEST(SFM_TEST, sfm_snippet_two) {
     DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
     const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
@@ -230,64 +133,41 @@ TEST(SFM_TEST, sfm_snippet_two) {
     const double fx = image_point.fx, fy = image_point.fy;
     const double cx = image_point.cx, cy = image_point.cy;
     gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
+    Eigen::Matrix<double, 5, 1> D =
+        (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2,
+         SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3)
+            .finished();
 
-    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K);
-
-    std::cout << "sfm.landmark_count_: " << sfm.get_landmark_count() << std::endl;
+    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D);
 
     for (const cv::Mat &image : images) {
-        sfm.add_image(
-            image);  //, gtsam::Pose3(gtsam::Rot3::Identity(), gtsam::Point3(0.05, 0, 0)));
+        sfm.add_image(image);
     }
 
-    gtsam::Values initial_values = sfm.get_backend().get_current_initial_values();
-    std::vector<Eigen::MatrixXd> poses;
-    std::vector<robot::math::proto::Matrix> protos;
+    const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values();
+    std::vector<Eigen::Isometry3d> poses_world;
     for (size_t i = 0; i < images.size(); i++) {
         gtsam::Pose3 pose =
             initial_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i));
-        Eigen::MatrixXd eigen_affine(4, 4);
-        gtsam::Matrix3 rot_mat = pose.rotation().matrix();
-        gtsam::Point3 translation = pose.translation();
-        for (int j = 0; j < 3; j++) {
-            for (int k = 0; k < 3; k++) {
-                eigen_affine(j, k) = rot_mat(j, k);
+        poses_world.emplace_back(pose.matrix());
+    }
+    std::vector<Eigen::Vector3d> points_world;
+    for (int i = 0; i < static_cast<int>(sfm.get_matches().size()); i++) {
+        // NOTE: this j for lmk_symbol is wrong.
+        for (int j = 0; j < static_cast<int>(sfm.get_matches()[i].size()); j++) {
+            gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j);
+            if (initial_values.exists(lmk_symbol)) {
+                points_world.emplace_back(initial_values.at<gtsam::Point3>(lmk_symbol));
+            } else {
+                std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl;
             }
-            eigen_affine(3, j) = translation(j);
         }
-        eigen_affine(3, 3) = 1;
-        poses.push_back(eigen_affine);
-        protos.emplace_back();
-        pack_into(eigen_affine, &protos.back());
     }
-    std::cout << poses.front() << std::endl;
-
-    // const Eigen::Vector3d in(1.0, 2.0, 3.0);
-    // // Action
+    // std::cout << poses_world.front() << std::endl;
 
-    // robot::math::proto::Matrix proto;
-    // pack_into(in, &proto);
-    // const Eigen::Vector3d out = unpack_from<Eigen::Vector3d>(proto);
-    // std::cout << "vector out " << out << std::endl;
-
-    // const Eigen::VectorXd in{{1.0, 2.0, 3.0}};
-
-    // // Action
-    // robot::math::proto::Matrix proto;
-    // pack_into(in, &proto);
-    // const Eigen::VectorXd out = unpack_from<Eigen::VectorXd>(proto);
+    robot::geometry::viz_scene(poses_world, points_world, true, true);
 
     // sfm.solve_structure();
-
-    // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test);
-    // cv::vconcat(img_display_test, img_matches_out, img_display_test);
-    // cv::Mat og_images;
-    // cv::hconcat(image_1, image_2, og_images);
-    // cv::vconcat(og_images, img_display_test, img_display_test);
-    // std::cout << "og_images.cols: " << og_images.cols << ". img_display_test.cols" <<
-    // img_display_test.cols << std::endl; cv::imshow("Keypoints and Matches Output.",
-    // img_display_test); std::cout << "Press spacebar to pause." << std::endl; while
-    // (cv::waitKey(1000) != 32) {}
 }
 
 // TEST(SFM_TEST, structure_from_motion) {
diff --git a/experimental/learn_descriptors/symphony_lake_parser.hh b/experimental/learn_descriptors/symphony_lake_parser.hh
index dd8db32be..fd361013b 100644
--- a/experimental/learn_descriptors/symphony_lake_parser.hh
+++ b/experimental/learn_descriptors/symphony_lake_parser.hh
@@ -10,6 +10,14 @@
 #include "symphony_lake_dataset/SurveyVector.h"
 
 namespace robot::experimental::learn_descriptors {
+class SymphonyLakeCamParams {
+   public:
+    constexpr static double fx = 759.308, fy = 690.44;
+    constexpr static double px = 370.915, py = 250.909;
+
+    constexpr static double k1 = -0.302805, k2 = 0.171088, k3 = 0.0;
+    constexpr static double p1 = 0.001151, p2 = -0.00038;
+};
 class DataParser {
    public:
     template <typename T>

From 4fc09c583f0bfdc44c54674fb4799a125e66f0ce Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Fri, 7 Mar 2025 14:19:04 -0500
Subject: [PATCH 09/45] fixed up poses in graph. need to figure out
 triangulation and maybe tranlsation scaling next.

---
 experimental/learn_descriptors/BUILD          |   5 +-
 .../structure_from_motion.cc                  |  64 +++++---
 .../structure_from_motion.hh                  |   3 +-
 .../structure_from_motion_test.cc             | 140 ++++++++++++------
 4 files changed, 149 insertions(+), 63 deletions(-)

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index 2a2e1b14b..7220c019b 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -21,6 +21,9 @@ cc_library(
   hdrs = ["structure_from_motion.hh"],
   visibility = ["//visibility:public"],
   srcs = ["structure_from_motion.cc"],
+  copts = [
+    "-Wno-error=unused-parameter",
+  ],
   deps = [
     "@opencv//:opencv",
     "@gtsam//:gtsam",
@@ -34,7 +37,7 @@ cc_test(
   name = "structure_from_motion_test",
   srcs = ["structure_from_motion_test.cc"],  
   copts = [
-    "-Wno-unused-parameter",
+    "-Wno-error=unused-parameter",
   ],
   deps = [
     "@com_google_googletest//:gtest_main",
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index a70df5c03..d18db1b58 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -6,6 +6,7 @@
 
 #include "common/geometry/camera.hh"
 #include "common/geometry/translate_types.hh"
+#include "gtsam/geometry/triangulation.h"
 #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
 #include "gtsam/slam/BetweenFactor.h"
 #include "gtsam/slam/PriorFactor.h"
@@ -17,11 +18,10 @@ namespace geom = robot::geometry;
 
 namespace robot::experimental::learn_descriptors {
 
-const Eigen::Affine3d StructureFromMotion::T_symlake_boat_cam = []() {
-    Eigen::Affine3d transform = Eigen::Affine3d::Identity();
-    transform.translate(Eigen::Vector3d::Zero());
-    transform.rotate(Eigen::Matrix3d(Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(1, 0, 0)) *
-                                     Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0, 1, 0))));
+const Eigen::Isometry3d StructureFromMotion::T_symlake_boat_cam = []() {
+    Eigen::Isometry3d transform = Eigen::Isometry3d::Identity();    
+    transform.linear() = (Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0, 1, 0)) *
+                                     Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0, 0, 1))).toRotationMatrix();
     return transform;
 }();
 
@@ -34,8 +34,9 @@ std::string pose_to_string(gtsam::Pose3 pose) {
 // const gtsam::Pose3 StructureFromMotion::default_initial_pose = []() {
 //     Eigen::Isometry3d
 // }();
-const gtsam::Pose3 StructureFromMotion::default_initial_pose =
-    gtsam::Pose3(gtsam::Rot3(T_symlake_boat_cam.rotation()), T_symlake_boat_cam.translation());
+const gtsam::Pose3 StructureFromMotion::default_initial_pose(StructureFromMotion::T_symlake_boat_cam.matrix());
+// const gtsam::Pose3 StructureFromMotion::default_initial_pose =
+//     gtsam::Pose3(gtsam::Rot3(T_symlake_boat_cam.rotation()), T_symlake_boat_cam.translation());
 // gtsam::Pose3(
 //     gtsam::Rot3(Eigen::Matrix3d(
 //         Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix() *
@@ -86,7 +87,6 @@ void StructureFromMotion::add_image(const cv::Mat &img) {
             gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1),
             gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), between_value,
             pose_noise_model);
-        gtsam::Pose3 T_cam_landmark(gtsam::Rot3::Identity(), gtsam::Point3(0, 0, 1));
         gtsam::Pose3 T_world_cam1 = backend_.get_current_initial_values().at<gtsam::Pose3>(
             gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1));
         gtsam::Pose3 T_world_cam2 = backend_.get_current_initial_values().at<gtsam::Pose3>(
@@ -293,12 +293,12 @@ Backend::Backend() {
 
     gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
     K_ = boost::make_shared<gtsam::Cal3_S2>(K);
-    initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K);
+    // initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K);
 }
 
 Backend::Backend(gtsam::Cal3_S2 K) {
     K_ = boost::make_shared<gtsam::Cal3_S2>(K);
-    initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K);
+    // initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K);
 }
 
 void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value) {
@@ -330,6 +330,35 @@ void Backend::add_between_factor<gtsam::Rot3>(const gtsam::Symbol &symbol_1,
                                        initial_estimate_.at<gtsam::Rot3>(symbol_1).compose(value));
 }
 
+std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> Backend::get_obs_for_lmk(const gtsam::Symbol &lmk_symbol) {
+    std::vector<gtsam::Pose3> cam_poses;
+    std::vector<gtsam::Point2> observations;
+
+    // Iterate over all factors in the graph
+    for (const auto& factor : graph_) {
+        auto projFactor = boost::dynamic_pointer_cast<
+            gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(factor);
+        
+        if (projFactor && projFactor->keys().at(1) == lmk_symbol) {
+            // Get the camera pose symbol
+            gtsam::Symbol cameraSymbol = projFactor->keys().at(0);
+
+            // Retrieve the camera pose from values
+            if (!initial_estimate_.exists(cameraSymbol)) continue;
+            gtsam::Pose3 cameraPose = initial_estimate_.at<gtsam::Pose3>(cameraSymbol);
+
+            // Get the 2D observation (keypoint measurement)
+            gtsam::Point2 observation = projFactor->measured();
+
+            // Store the pose and corresponding observation
+            observations.push_back(observation);
+            cam_poses.push_back(cameraPose);
+        }
+    }
+    return std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>>(cam_poses, observations);
+}
+
+
 void Backend::add_landmarks(const std::vector<Landmark> &landmarks) {
     for (const Landmark &landmark : landmarks) {
         add_landmark(landmark);
@@ -337,14 +366,8 @@ void Backend::add_landmarks(const std::vector<Landmark> &landmarks) {
 }
 
 void Backend::add_landmark(const Landmark &landmark) {
-    // graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-    //     landmark.projection, landmark_noise_, landmark.cam_pose_symbol,
-    //     landmark.lmk_factor_symbol, K_);
-    auto factor = boost::make_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-        landmark.projection, landmark_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol,
+    graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(landmark.projection, landmark_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol,
         K_);
-    // Add the factor to the graph.
-    graph_.add(factor);
     if (!initial_estimate_.exists(landmark.cam_pose_symbol)) {
         throw std::runtime_error(
             "landmark.cam_pose_symbol must already exist in Backend.initial_estimate_ before "
@@ -352,12 +375,17 @@ void Backend::add_landmark(const Landmark &landmark) {
     }
     gtsam::Point3 p_world_lmk_estimate =
         initial_estimate_.at<gtsam::Pose3>(landmark.cam_pose_symbol)
-            .transformTo(landmark.p_cam_lmk_guess);
+            * landmark.p_cam_lmk_guess;
     if (!initial_estimate_.exists(landmark.lmk_factor_symbol)) {
         initial_estimate_.insert(landmark.lmk_factor_symbol, p_world_lmk_estimate);
     } else {
         initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate);
     }
+    // std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> lmk_obs = get_obs_for_lmk(landmark.lmk_factor_symbol);
+    // if (lmk_obs.first.size() >= 2) {
+    //     p_world_lmk_estimate = gtsam::triangulatePoint3(lmk_obs.first, K_, gtsam::Point2Vector(lmk_obs.second.begin(), lmk_obs.second.end()));
+    //     initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate);
+    // }
 }
 
 void Backend::solve_graph() {
diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh
index a1fb2ce27..e7d0c8e7e 100644
--- a/experimental/learn_descriptors/structure_from_motion.hh
+++ b/experimental/learn_descriptors/structure_from_motion.hh
@@ -91,6 +91,7 @@ class Backend {
     void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2,
                             const T &value, const gtsam::SharedNoiseModel &model);
 
+    std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> get_obs_for_lmk(const gtsam::Symbol &lmk_symbol);
     void add_landmarks(const std::vector<Landmark> &landmarks);
     void add_landmark(const Landmark &landmark);
 
@@ -123,7 +124,7 @@ void Backend::add_between_factor<gtsam::Rot3>(const gtsam::Symbol &, const gtsam
 
 class StructureFromMotion {
    public:
-    static const Eigen::Affine3d T_symlake_boat_cam;
+    static const Eigen::Isometry3d T_symlake_boat_cam;    
     static const gtsam::Pose3 default_initial_pose;
     /**
      * @param D is vector (5x1) of the distortion coefficients (k1, k2, p1, p2, k3)
diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc
index 82e4b84da..a9494c5fb 100644
--- a/experimental/learn_descriptors/structure_from_motion_test.cc
+++ b/experimental/learn_descriptors/structure_from_motion_test.cc
@@ -121,13 +121,27 @@ TEST(SFM_TEST, frontend_pipeline_sweep) {
     }
 }
 
-TEST(SFM_TEST, sfm_snippet_two) {
+// TEST(SFM_TEST, boat_and_cam_frames) {    
+//     // world is boat frame. +x is boat "forward", +y is the right side, +z is "down" into the water/hull
+//     Eigen::Isometry3d T_boat_cam = StructureFromMotion::T_symlake_boat_cam;
+//     T_boat_cam.translation() = Eigen::Vector3d::Ones();
+//     geometry::viz_scene(std::vector<Eigen::Isometry3d>{T_boat_cam}, std::vector<Eigen::Vector3d>());
+// }
+
+TEST(SFM_TEST, sfm_snippet_small) {
+    const std::vector<int> indices {120, 190}; // 0-199
     DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
     const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
     const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
-    const symphony_lake_dataset::ImagePoint image_point = survey.getImagePoint(180);
-    const std::vector<cv::Mat> images = {survey.loadImageByImageIndex(180),
-                                         survey.loadImageByImageIndex(185)};
+    const symphony_lake_dataset::ImagePoint image_point = survey.getImagePoint(indices.front());
+    std::vector<cv::Mat> images;
+
+    for (const int &idx : indices) {
+        images.push_back(survey.loadImageByImageIndex(idx));
+    }
+    
+    // = {survey.loadImageByImageIndex(idx_1),
+    //                                      survey.loadImageByImageIndex(idx_2)};
 
     // const size_t img_width = image_point.width, img_height = image_point.height;
     const double fx = image_point.fx, fy = image_point.fy;
@@ -165,55 +179,95 @@ TEST(SFM_TEST, sfm_snippet_two) {
     }
     // std::cout << poses_world.front() << std::endl;
 
-    robot::geometry::viz_scene(poses_world, points_world, true, true);
+    geometry::viz_scene(poses_world, points_world, true, false);
 
-    // sfm.solve_structure();
+    sfm.solve_structure();
+
+    const gtsam::Values result_values = sfm.get_structure_result();
+    std::vector<Eigen::Isometry3d> final_poses;
+    std::vector<Eigen::Vector3d> final_lmks;
+    for (size_t i = 0; i < images.size(); i++) {
+        final_poses.emplace_back(result_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)).matrix());
+    }
+    for (int i = 0; i < static_cast<int>(sfm.get_matches().size()); i++) {
+        // NOTE: this j for lmk_symbol is wrong.
+        for (int j = 0; j < static_cast<int>(sfm.get_matches()[i].size()); j++) {
+            gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j);
+            if (initial_values.exists(lmk_symbol)) {
+                final_lmks.emplace_back(initial_values.at<gtsam::Point3>(lmk_symbol));
+            } else {
+                std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl;
+            }
+        }
+    }
+    geometry::viz_scene(final_poses, final_lmks, true, false);
 }
 
-// TEST(SFM_TEST, structure_from_motion) {
-//     const size_t img_width = 640;
-//     const size_t img_height = 480;
-//     const double fx = 500.0;
-//     const double fy = fx;
-//     const double cx = img_width / 2.0;
-//     const double cy = img_height / 2.0;
+// TEST(SFM_TEST, sfm_building) {
+//     const int idx_start = 50, idx_end = 120, idx_skip = 2;
+//     DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
+//     const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
+//     const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
+//     const symphony_lake_dataset::ImagePoint image_point = survey.getImagePoint(idx_start);
 
+//     // const size_t img_width = image_point.width, img_height = image_point.height;
+//     const double fx = image_point.fx, fy = image_point.fy;
+//     const double cx = image_point.cx, cy = image_point.cy;
 //     gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
+//     Eigen::Matrix<double, 5, 1> D =
+//         (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2,
+//             SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3)
+//             .finished();
 
-//     StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K);
-//     DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
-//     // DataParser::Generator<cv::Mat> generator = data_parser.create_img_generator();
-//     const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
+//     StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D);
+//     cv::Mat image;    
+//     for (int j = idx_start; j < idx_end; j+=idx_skip) {
+//         image = survey.loadImageByImageIndex(j);
+//         sfm.add_image(image);
+//     }
 
-//     cv::Mat image;
-//     for (int i = 0; i < static_cast<int>(survey_vector.getNumSurveys()); i++) {
-//         const symphony_lake_dataset::Survey &survey = survey_vector.get(i);
-//         // for (int j = 0; j < static_cast<int>(survey.getNumImages()); j++) {
-//         //     image = survey.loadImageByImageIndex(j);
-//             // sfm.add_image(image);
-//         // }
-//         for (int j = 0; j < 2; j++) {
-//             image = survey.loadImageByImageIndex(j);
-//             sfm.add_image(image);
+//     const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values();
+//     std::vector<Eigen::Isometry3d> poses_world;
+//     for (size_t i = 0; i < (idx_end - idx_start)/idx_skip; i++) {
+//         gtsam::Pose3 pose =
+//             initial_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i));
+//         poses_world.emplace_back(pose.matrix());
+//     }
+//     std::vector<Eigen::Vector3d> points_world;
+//     for (int i = 0; i < static_cast<int>(sfm.get_matches().size()); i++) {
+//         // NOTE: this j for lmk_symbol is wrong.
+//         for (int j = 0; j < static_cast<int>(sfm.get_matches()[i].size()); j++) {
+//             gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j);
+//             if (initial_values.exists(lmk_symbol)) {
+//                 points_world.emplace_back(initial_values.at<gtsam::Point3>(lmk_symbol));
+//             } else {
+//                 std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl;
+//             }
 //         }
 //     }
-//     // DataParser::Generator<cv::Mat>::iterator img_itr = generator.begin();
-//     // while (img_itr != generator.end()) {
-//     //     if ((*img_itr).empty()) {
-//     //         continue;
-//     //     }
-//     //     std::cout << "ooga booga" << std::endl;
-//     //     std::cout << "image is empty: " << (*img_itr).empty() << std::endl;
-//     //     cv::imshow("ooga", *img_itr);
-//     //     cv::waitKey(1000);
-//     //     sfm.add_image(*img_itr);
-//     //     ++img_itr;
-//     // }
+//     // std::cout << poses_world.front() << std::endl;
+
+//     geometry::viz_scene(poses_world, points_world, true, false);
+
 //     sfm.solve_structure();
-//     // gtsam::Values output = sfm.get_structure_result();
-//     // output.print("result values: ");
-//     // for (size_t i = 0; i < sfm.get_num_images_added(); i++) {
 
-//     // }
+//     const gtsam::Values result_values = sfm.get_structure_result();
+//     std::vector<Eigen::Isometry3d> final_poses;
+//     std::vector<Eigen::Vector3d> final_lmks;
+//     for (size_t i = 0; i < (idx_end - idx_start)/idx_skip; i++) {
+//         final_poses.emplace_back(result_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)).matrix());
+//     }
+//     for (int i = 0; i < static_cast<int>(sfm.get_matches().size()); i++) {
+//         // NOTE: this j for lmk_symbol is wrong.
+//         for (int j = 0; j < static_cast<int>(sfm.get_matches()[i].size()); j++) {
+//             gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j);
+//             if (initial_values.exists(lmk_symbol)) {
+//                 final_lmks.emplace_back(initial_values.at<gtsam::Point3>(lmk_symbol));
+//             } else {
+//                 std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl;
+//             }
+//         }
+//     }
+//     geometry::viz_scene(final_poses, final_lmks, true, false);
 // }
 }  // namespace robot::experimental::learn_descriptors

From 697f16d9ee06630588993a28220577160fabd0e2 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Sun, 16 Mar 2025 22:17:10 -0400
Subject: [PATCH 10/45] MVP GPSFactor + GenericProjectionFactor sfm. Also
 figured out Transforms. Next: map lmk_symbols to landmarks to handle more
 than 2 images properly.

---
 experimental/learn_descriptors/BUILD          |   6 +-
 .../structure_from_motion.cc                  | 128 ++++++++++++------
 .../structure_from_motion.hh                  |  26 +++-
 .../structure_from_motion_test.cc             |  60 ++++----
 .../learn_descriptors/symphony_lake_parser.cc |  52 +++++++
 .../learn_descriptors/symphony_lake_parser.hh |  14 ++
 .../symphony_lake_parser_test.cc              |  46 +++++--
 7 files changed, 249 insertions(+), 83 deletions(-)

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index 7220c019b..977347d42 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -43,7 +43,8 @@ cc_test(
     "@com_google_googletest//:gtest_main",
     ":symphony_lake_parser",
     ":structure_from_motion",
-    "//common/geometry:opencv_viz"
+    "//common/geometry:opencv_viz",
+    "//common/geometry:translate_types"
   ]
 )
 
@@ -71,7 +72,8 @@ cc_test(
   deps = [
     "@com_google_googletest//:gtest_main",
     ":symphony_lake_parser",
-    "//common:check"
+    "//common:check",
+    "//common/geometry:opencv_viz"
   ]
 )
 
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index d18db1b58..1c3d2bd43 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -21,7 +21,7 @@ namespace robot::experimental::learn_descriptors {
 const Eigen::Isometry3d StructureFromMotion::T_symlake_boat_cam = []() {
     Eigen::Isometry3d transform = Eigen::Isometry3d::Identity();    
     transform.linear() = (Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0, 1, 0)) *
-                                     Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0, 0, 1))).toRotationMatrix();
+                                     Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d(0, 0, 1))).toRotationMatrix();
     return transform;
 }();
 
@@ -29,7 +29,7 @@ std::string pose_to_string(gtsam::Pose3 pose) {
     std::stringstream ss;
     ss << pose;
     return ss.str();
-}
+};
 
 // const gtsam::Pose3 StructureFromMotion::default_initial_pose = []() {
 //     Eigen::Isometry3d
@@ -48,7 +48,7 @@ const gtsam::Pose3 StructureFromMotion::default_initial_pose(StructureFromMotion
 StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor,
                                          gtsam::Cal3_S2 K, Eigen::Matrix<double, 5, 1> D,
                                          gtsam::Pose3 initial_pose,
-                                         Frontend::MatcherType frontend_matcher) {
+                                         Frontend::MatcherType frontend_matcher) : initial_pose_(initial_pose) {
     frontend_ = Frontend(frontend_extractor, frontend_matcher);
     backend_ = Backend(K);
 
@@ -59,10 +59,10 @@ StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extrac
 }
 
 void StructureFromMotion::set_initial_pose(gtsam::Pose3 initial_pose) {
-    backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, 0), initial_pose);
+    backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, 0), initial_pose, gtsam::noiseModel::Isotropic::Sigma(6, 0));
 }
 
-void StructureFromMotion::add_image(const cv::Mat &img) {
+void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_world_cam) {
     cv::Mat img_undistorted;
     cv::undistort(img, img_undistorted, K_, D_);
     std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_and_descriptors =
@@ -76,42 +76,49 @@ void StructureFromMotion::add_image(const cv::Mat &img) {
         //     img_keypoints_and_descriptors_.back().second, keypoints_and_descriptors.second);
         // Frontend::enforce_bijective_matches(matches);
         // matches_.push_back(matches);
-        gtsam::SharedNoiseModel pose_noise_model = gtsam::noiseModel::Diagonal::Sigmas(
-            (gtsam::Vector6() << 0.1, 0.1, 0.1, 0.01, 0.01, 0.01).finished());
-        gtsam::Pose3 between_value(
-            geom::estimate_c0_c1(
-                img_keypoints_and_descriptors_.back().first, keypoints_and_descriptors.first,
-                matches, geom::eigen_mat_to_cv(geom::get_intrinsic_matrix(get_backend().get_K())))
-                .matrix());
-        backend_.add_between_factor(
-            gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1),
-            gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), between_value,
-            pose_noise_model);
-        gtsam::Pose3 T_world_cam1 = backend_.get_current_initial_values().at<gtsam::Pose3>(
-            gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1));
-        gtsam::Pose3 T_world_cam2 = backend_.get_current_initial_values().at<gtsam::Pose3>(
-            gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()));
+        // gtsam::Rot3 R_c0_c1(
+        //     geom::estimate_c0_c1(
+        //         img_keypoints_and_descriptors_.back().first, keypoints_and_descriptors.first,
+        //         matches, geom::eigen_mat_to_cv(geom::get_intrinsic_matrix(get_backend().get_K()))).linear());
+
+        const gtsam::Symbol sym_T_w_c0(Backend::pose_symbol_char, get_num_images_added() - 1);
+        const gtsam::Symbol sym_T_w_c1(Backend::pose_symbol_char, get_num_images_added());
+        // backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), gps_prior, backend_.get_translation_noise());
+        // gtsam::Pose3 T_w_c0 = backend_.get_current_initial_values().at<gtsam::Pose3>(sym_T_w_c0);
+        // gtsam::Pose3 T_c0_c1(
+        //     R_c0_c1, 
+        //     t_world_gps - T_w_c0.translation());
+        // backend_.add_between_factor(
+        //     sym_T_w_c0,
+        //     sym_T_w_c1, 
+        //     T_c0_c1,
+        //     backend_.get_pose_noise());        
+        backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise(), T_world_cam.rotation());
+
         for (const cv::DMatch match : matches) {
-            Backend::Landmark landmark_cam_1(
+            // indexing each landmark symbol with landmark_count_ could create issues later on.
+            // lmks indices will be duplicated with greater than 2 images
+            // make map from lmk to idx?
+            Backend::Landmark landmark_cam_0(
                 gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_),
-                gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added() - 1),
+                sym_T_w_c0,
                 gtsam::Point2(
                     static_cast<double>(
                         img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x),
                     static_cast<double>(
                         img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)),
                 backend_.get_K(), 3.0);
-            Backend::Landmark landmark_cam_2(
+            Backend::Landmark landmark_cam_1(
                 gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_),
-                gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()),
+                sym_T_w_c1,
                 gtsam::Point2(
                     static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.x),
                     static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.y)),
                 backend_.get_K(), 3.0);
-            landmarks_[get_num_images_added() - 1].push_back(landmark_cam_1);
-            landmarks_[get_num_images_added()].push_back(landmark_cam_2);
+            landmarks_[get_num_images_added() - 1].push_back(landmark_cam_0);
+            landmarks_[get_num_images_added()].push_back(landmark_cam_1);
+            backend_.add_landmark(landmark_cam_0);
             backend_.add_landmark(landmark_cam_1);
-            backend_.add_landmark(landmark_cam_2);
             landmark_count_++;
         }
         // std::cout << "number of landmarks: " << count << std::endl;
@@ -301,13 +308,24 @@ Backend::Backend(gtsam::Cal3_S2 K) {
     // initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K);
 }
 
-void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value) {
-    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(symbol, value, pose_noise_);
+template <>
+void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value, const gtsam::SharedNoiseModel &noise) {
+    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(symbol, value, noise);
     initial_estimate_.insert_or_assign(symbol, value);
     // std::cout << "adding a prior factor! with symbol: " << symbol << std::endl;
     // initial_estimate_.print("values after adding prior: ");
 }
 
+template <>
+void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Point3 &value, const gtsam::SharedNoiseModel &noise) {
+    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Point3>>(symbol, value, noise);
+    gtsam::Rot3 R;
+    if (initial_estimate_.exists(symbol)) {        
+        R = initial_estimate_.at<gtsam::Pose3>(symbol).rotation();                
+    }     
+    initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R, value));
+}
+
 template <>
 void Backend::add_between_factor<gtsam::Pose3>(const gtsam::Symbol &symbol_1,
                                                const gtsam::Symbol &symbol_2,
@@ -325,9 +343,16 @@ void Backend::add_between_factor<gtsam::Rot3>(const gtsam::Symbol &symbol_1,
                                               const gtsam::Symbol &symbol_2,
                                               const gtsam::Rot3 &value,
                                               const gtsam::SharedNoiseModel &model) {
-    graph_.emplace_shared<gtsam::BetweenFactor<gtsam::Rot3>>(symbol_1, symbol_2, value, model);
+    graph_.emplace_shared<gtsam::BetweenFactor<gtsam::Rot3>>(symbol_1, symbol_2, value, model);    
     initial_estimate_.insert_or_assign(symbol_2,
-                                       initial_estimate_.at<gtsam::Rot3>(symbol_1).compose(value));
+                                       initial_estimate_.at<gtsam::Pose3>(symbol_1).compose(gtsam::Pose3(value, gtsam::Point3::Zero())));
+}
+
+void Backend::add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &t_world_cam, const gtsam::SharedNoiseModel &model,
+        const gtsam::Rot3 & R_world_cam) {
+    
+    graph_.emplace_shared<gtsam::GPSFactor>(symbol, t_world_cam, model);
+    initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R_world_cam, t_world_cam));
 }
 
 std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> Backend::get_obs_for_lmk(const gtsam::Symbol &lmk_symbol) {
@@ -376,16 +401,39 @@ void Backend::add_landmark(const Landmark &landmark) {
     gtsam::Point3 p_world_lmk_estimate =
         initial_estimate_.at<gtsam::Pose3>(landmark.cam_pose_symbol)
             * landmark.p_cam_lmk_guess;
-    if (!initial_estimate_.exists(landmark.lmk_factor_symbol)) {
-        initial_estimate_.insert(landmark.lmk_factor_symbol, p_world_lmk_estimate);
-    } else {
-        initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate);
+    initial_estimate_.insert_or_assign(landmark.lmk_factor_symbol, p_world_lmk_estimate);
+
+    std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> lmk_obs = get_obs_for_lmk(landmark.lmk_factor_symbol);
+    if (lmk_obs.first.size() >= 2) {
+        try {
+            // Attempt triangulation using DLT (or the GTSAM provided method)
+            p_world_lmk_estimate = gtsam::triangulatePoint3(
+                lmk_obs.first, K_, 
+                gtsam::Point2Vector(lmk_obs.second.begin(), lmk_obs.second.end()));
+            
+            // Optional: perform an explicit cheirality check
+            bool valid = true;
+            for (const auto &pose : lmk_obs.first) {
+                // Transform point to the camera coordinate system.
+                // transformTo() converts a world point to the camera frame.
+                gtsam::Point3 p_cam = pose.transformTo(p_world_lmk_estimate);
+                if (p_cam.z() <= 0) {  // Check that the depth is positive
+                    valid = false;
+                    break;
+                }
+            }
+            
+            if (valid) {
+                initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate);
+            } else {
+                std::cerr << "Triangulated landmark failed cheirality check; keeping initial guess." << std::endl;
+            }
+        } catch (const gtsam::TriangulationCheiralityException &e) {
+            // Handle the exception gracefully by logging and retaining the previous estimate.
+            std::cerr << "Triangulation Cheirality Exception: " << e.what() 
+                      << ". Keeping initial landmark estimate." << std::endl;
+        }
     }
-    // std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> lmk_obs = get_obs_for_lmk(landmark.lmk_factor_symbol);
-    // if (lmk_obs.first.size() >= 2) {
-    //     p_world_lmk_estimate = gtsam::triangulatePoint3(lmk_obs.first, K_, gtsam::Point2Vector(lmk_obs.second.begin(), lmk_obs.second.end()));
-    //     initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate);
-    // }
 }
 
 void Backend::solve_graph() {
diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh
index e7d0c8e7e..86c9ce42c 100644
--- a/experimental/learn_descriptors/structure_from_motion.hh
+++ b/experimental/learn_descriptors/structure_from_motion.hh
@@ -13,6 +13,7 @@
 #include "gtsam/linear/NoiseModel.h"
 #include "gtsam/nonlinear/NonlinearFactorGraph.h"
 #include "gtsam/nonlinear/Values.h"
+#include "gtsam/navigation/GPSFactor.h"
 #include "opencv2/opencv.hpp"
 
 namespace robot::experimental::learn_descriptors {
@@ -62,6 +63,7 @@ class Backend {
    public:
     static constexpr char pose_symbol_char = 'x';
     static constexpr char pose_rot_symbol_char = 'r';
+    static constexpr char pose_translation_symbol_char = 't';
     static constexpr char pose_bearing_symbol_char = 'b';
     static constexpr char landmark_symbol_char = 'l';
     static constexpr char camera_symbol_char = 'k';
@@ -85,11 +87,16 @@ class Backend {
     Backend(gtsam::Cal3_S2 K);
     ~Backend(){};
 
-    void add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value);
+    template <typename T>
+    void add_prior_factor(const gtsam::Symbol &symbol, const T &value, const gtsam::SharedNoiseModel & model);
 
     template <typename T>
     void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2,
                             const T &value, const gtsam::SharedNoiseModel &model);
+    
+
+    void add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &p_world_gps, const gtsam::SharedNoiseModel& model, 
+        const gtsam::Rot3 &R_world_cam = gtsam::Rot3::Identity());
 
     std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> get_obs_for_lmk(const gtsam::Symbol &lmk_symbol);
     void add_landmarks(const std::vector<Landmark> &landmarks);
@@ -101,6 +108,11 @@ class Backend {
     const gtsam::Values &get_result() const { return result_; };
     const gtsam::Cal3_S2 &get_K() const { return *K_; };
 
+    const gtsam::SharedNoiseModel get_lmk_noise() { return landmark_noise_; };
+    const gtsam::SharedNoiseModel get_pose_noise() { return pose_noise_; };
+    const gtsam::SharedNoiseModel get_translation_noise() { return translation_noise_; };
+    const gtsam::SharedNoiseModel get_gps_noise() { return gps_noise_; };
+
    private:
     gtsam::Cal3_S2::shared_ptr K_;
 
@@ -112,8 +124,17 @@ class Backend {
         gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
     gtsam::noiseModel::Diagonal::shared_ptr pose_noise_ =
         gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01));
+    gtsam::noiseModel::Isotropic::shared_ptr translation_noise_ =
+        gtsam::noiseModel::Isotropic::Sigma(2, 0.1);
+    gtsam::noiseModel::Isotropic::shared_ptr gps_noise_ = gtsam::noiseModel::Isotropic::Sigma(3, 2.);
 };
 
+template<>
+void Backend::add_prior_factor<gtsam::Pose3>(const gtsam::Symbol &, const gtsam::Pose3 &, const gtsam::SharedNoiseModel &);
+
+template <>
+void Backend::add_prior_factor<gtsam::Point3>(const gtsam::Symbol &, const gtsam::Point3 &, const gtsam::SharedNoiseModel &);
+
 template <>
 void Backend::add_between_factor<gtsam::Pose3>(const gtsam::Symbol &, const gtsam::Symbol &,
                                                const gtsam::Pose3 &,
@@ -136,7 +157,7 @@ class StructureFromMotion {
     ~StructureFromMotion(){};
 
     void set_initial_pose(gtsam::Pose3 initial_pose);
-    void add_image(const cv::Mat &img);
+    void add_image(const cv::Mat &img, const gtsam::Pose3 &T_world_cam);
     void solve_structure() { backend_.solve_graph(); };
     const gtsam::Values &get_structure_result() { return backend_.get_result(); };
     using MatchFunction = std::function<void(std::vector<cv::DMatch> &)>;
@@ -151,6 +172,7 @@ class StructureFromMotion {
     const std::vector<std::vector<cv::DMatch>> get_matches() { return matches_; };
 
    private:
+    gtsam::Pose3 initial_pose_;
     std::vector<std::pair<std::vector<cv::KeyPoint>, cv::Mat>> img_keypoints_and_descriptors_;
     std::vector<std::vector<cv::DMatch>> matches_;
     std::vector<std::vector<Backend::Landmark>> landmarks_;
diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc
index a9494c5fb..3bbf1c9fa 100644
--- a/experimental/learn_descriptors/structure_from_motion_test.cc
+++ b/experimental/learn_descriptors/structure_from_motion_test.cc
@@ -5,6 +5,7 @@
 
 #include "Eigen/Geometry"
 #include "common/geometry/opencv_viz.hh"
+#include "common/geometry/translate_types.hh"
 #include "experimental/learn_descriptors/symphony_lake_parser.hh"
 #include "gtest/gtest.h"
 
@@ -129,38 +130,43 @@ TEST(SFM_TEST, frontend_pipeline_sweep) {
 // }
 
 TEST(SFM_TEST, sfm_snippet_small) {
-    const std::vector<int> indices {120, 190}; // 0-199
+    const std::vector<int> indices {120, 130}; // 0-199
     DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
     const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
     const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
-    const symphony_lake_dataset::ImagePoint image_point = survey.getImagePoint(indices.front());
-    std::vector<cv::Mat> images;
+    const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front());
 
-    for (const int &idx : indices) {
-        images.push_back(survey.loadImageByImageIndex(idx));
-    }
-    
-    // = {survey.loadImageByImageIndex(idx_1),
-    //                                      survey.loadImageByImageIndex(idx_2)};
-
-    // const size_t img_width = image_point.width, img_height = image_point.height;
-    const double fx = image_point.fx, fy = image_point.fy;
-    const double cx = image_point.cx, cy = image_point.cy;
+    // const size_t img_width = img_pt_first.width, img_height = img_pt_first.height;
+    const double fx = img_pt_first.fx, fy = img_pt_first.fy;
+    const double cx = img_pt_first.cx, cy = img_pt_first.cy;
     gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
     Eigen::Matrix<double, 5, 1> D =
         (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2,
          SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3)
             .finished();
+    
+    // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera
+    // T_earth_boat0 = 
+    Eigen::Isometry3d T_earth_world = DataParser::get_T_world_boat(img_pt_first);
+    Eigen::Isometry3d T_world_camera0 = DataParser::get_T_boat_camera(img_pt_first);
+    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, gtsam::Pose3(T_world_camera0.matrix()));  
 
-    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D);
-
-    for (const cv::Mat &image : images) {
-        sfm.add_image(image);
+    for (const int &idx : indices) {    
+        const cv::Mat img = survey.loadImageByImageIndex(idx);
+        const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);        
+        Eigen::Isometry3d T_earth_boat = DataParser::get_T_world_boat(img_pt);
+        Eigen::Isometry3d T_world_boat = T_earth_world.inverse() * T_earth_boat;
+        Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt);
+        
+        sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix()));
     }
+    // for (const cv::Mat &image : images) {
+    //     sfm.add_image(image);
+    // }
 
     const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values();
     std::vector<Eigen::Isometry3d> poses_world;
-    for (size_t i = 0; i < images.size(); i++) {
+    for (size_t i = 0; i < indices.size(); i++) {
         gtsam::Pose3 pose =
             initial_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i));
         poses_world.emplace_back(pose.matrix());
@@ -179,14 +185,18 @@ TEST(SFM_TEST, sfm_snippet_small) {
     }
     // std::cout << poses_world.front() << std::endl;
 
-    geometry::viz_scene(poses_world, points_world, true, false);
+    geometry::viz_scene(poses_world, points_world, true, true);
+
+    std::cout << "Solving for structure!" << std::endl;
 
     sfm.solve_structure();
 
+    std::cout << "Solution complete." << std::endl;
+
     const gtsam::Values result_values = sfm.get_structure_result();
     std::vector<Eigen::Isometry3d> final_poses;
     std::vector<Eigen::Vector3d> final_lmks;
-    for (size_t i = 0; i < images.size(); i++) {
+    for (size_t i = 0; i < indices.size(); i++) {
         final_poses.emplace_back(result_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)).matrix());
     }
     for (int i = 0; i < static_cast<int>(sfm.get_matches().size()); i++) {
@@ -200,7 +210,7 @@ TEST(SFM_TEST, sfm_snippet_small) {
             }
         }
     }
-    geometry::viz_scene(final_poses, final_lmks, true, false);
+    geometry::viz_scene(final_poses, final_lmks, true, true);
 }
 
 // TEST(SFM_TEST, sfm_building) {
@@ -208,11 +218,11 @@ TEST(SFM_TEST, sfm_snippet_small) {
 //     DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
 //     const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
 //     const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
-//     const symphony_lake_dataset::ImagePoint image_point = survey.getImagePoint(idx_start);
+//     const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(idx_start);
 
-//     // const size_t img_width = image_point.width, img_height = image_point.height;
-//     const double fx = image_point.fx, fy = image_point.fy;
-//     const double cx = image_point.cx, cy = image_point.cy;
+//     // const size_t img_width = img_pt_first.width, img_height = img_pt_first.height;
+//     const double fx = img_pt_first.fx, fy = img_pt_first.fy;
+//     const double cx = img_pt_first.cx, cy = img_pt_first.cy;
 //     gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
 //     Eigen::Matrix<double, 5, 1> D =
 //         (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2,
diff --git a/experimental/learn_descriptors/symphony_lake_parser.cc b/experimental/learn_descriptors/symphony_lake_parser.cc
index 377e367cb..8146c6a36 100644
--- a/experimental/learn_descriptors/symphony_lake_parser.cc
+++ b/experimental/learn_descriptors/symphony_lake_parser.cc
@@ -5,6 +5,22 @@
 #include "common/check.hh"
 
 namespace robot::experimental::learn_descriptors {
+const Eigen::Vector3d DataParser::t_boat_cam(-0.420, 0, 0.255);
+
+const Eigen::Isometry3d DataParser::T_boat_gps((Eigen::Matrix4d() <<
+    1, 0, 0, 0.080,
+    0, 1, 0, -0.118,
+    0, 0, 1, 0.010,
+    0, 0, 0, 1
+).finished());
+
+const Eigen::Isometry3d DataParser::T_boat_imu = []() {
+    Eigen::Isometry3d T_boat_imu;
+    T_boat_imu.translation() = Eigen::Vector3d(-0.494, 0, -0.089);
+    T_boat_imu.linear() = Eigen::AngleAxisd(M_PI, Eigen::Vector3d::UnitX()).matrix();
+    return T_boat_imu;
+}();
+
 DataParser::DataParser(const std::filesystem::path &image_root_dir,
                        const std::vector<std::string> &survey_list)
     : image_root_dir_(image_root_dir), survey_list_(survey_list) {
@@ -35,4 +51,40 @@ Eigen::Affine3d DataParser::get_T_world_camera(size_t survey_idx, size_t img_idx
     T_world_camera.rotate(R);
     return T_world_camera;
 }
+
+const Eigen::Isometry3d DataParser::get_T_boat_camera(const symphony_lake_dataset::ImagePoint &img_pt) {
+    return get_T_boat_camera(img_pt.pan, img_pt.tilt);
+}
+const Eigen::Isometry3d DataParser::get_T_boat_camera(double theta_pan, double theta_tilt) {
+    Eigen::AngleAxisd R_z(-theta_pan, Eigen::Vector3d::UnitZ()); // pan
+    Eigen::AngleAxisd R_y(-theta_tilt, Eigen::Vector3d::UnitY()); // tilt
+    Eigen::Matrix3d R_boat_gimbal(R_z * R_y); 
+        
+    Eigen::Matrix3d R_gimbal_camera = (Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d::UnitX())
+        * Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d::UnitY())).matrix();
+    Eigen::Matrix3d R_boat_camera(R_boat_gimbal * R_gimbal_camera);
+    
+    Eigen::Isometry3d T_boat_cam;
+    T_boat_cam.translation() = t_boat_cam;
+    T_boat_cam.linear() = R_boat_camera;
+    return T_boat_cam;
+}
+
+const Eigen::Matrix3d DataParser::get_R_world_boat(double theta_compass) {
+    Eigen::Matrix3d R_world_imu = Eigen::AngleAxisd(theta_compass, Eigen::Vector3d::UnitZ()).matrix();
+    Eigen::Matrix3d R_imu_boat = T_boat_imu.inverse().linear();
+    Eigen::Matrix3d R_world_boat = R_world_imu * R_imu_boat;
+    return R_world_boat;
+}
+
+const Eigen::Isometry3d DataParser::get_T_world_boat(const symphony_lake_dataset::ImagePoint &img_pt) {
+    Eigen::Matrix3d R_world_boat = get_R_world_boat(img_pt.theta);
+    
+    Eigen::Isometry3d T_world_gps;
+    T_world_gps.linear() =  R_world_boat * T_boat_gps.linear();
+    T_world_gps.translation() = Eigen::Vector3d(img_pt.x, img_pt.y, 0);
+
+    Eigen::Isometry3d T_world_boat = T_world_gps * T_boat_gps.inverse();
+    return T_world_boat;
+}
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/symphony_lake_parser.hh b/experimental/learn_descriptors/symphony_lake_parser.hh
index fd361013b..664f5ccdb 100644
--- a/experimental/learn_descriptors/symphony_lake_parser.hh
+++ b/experimental/learn_descriptors/symphony_lake_parser.hh
@@ -68,12 +68,26 @@ class DataParser {
         }
     }
 
+    static const Eigen::Vector3d t_boat_cam;
+    static const Eigen::Isometry3d T_boat_gps;    
+    static const Eigen::Isometry3d T_boat_imu;
+
     DataParser(const std::filesystem::path &image_root_dir,
                const std::vector<std::string> &survey_list);
     ~DataParser();
 
     Eigen::Affine3d get_T_world_camera(size_t survey_idx, size_t image_idx, bool use_gps = false,
                                        bool use_compass = false);
+    static const Eigen::Isometry3d get_T_boat_camera(const symphony_lake_dataset::ImagePoint &img_pt);
+    static const Eigen::Isometry3d get_T_boat_camera(double theta_pan, double theta_tilt);
+    /// @brief get_R_world_boat assuming z_axis_boat dot z_axis_world ~ -1
+    /// @param theta_compass in radians
+    /// @return R_world_boat
+    static const Eigen::Matrix3d get_R_world_boat(double theta_compass);
+    /// @brief get_T_world_boat assuming z_axis_boat dot z_axis_world ~ -1
+    /// @param img_pt 
+    /// @return T_world_boat
+    static const Eigen::Isometry3d get_T_world_boat(const symphony_lake_dataset::ImagePoint &img_pt);
 
     const symphony_lake_dataset::SurveyVector &get_surveys() const { return surveys_; };
     Generator<cv::Mat> create_img_generator() { return image_generator(surveys_); };
diff --git a/experimental/learn_descriptors/symphony_lake_parser_test.cc b/experimental/learn_descriptors/symphony_lake_parser_test.cc
index 0b22cdd3c..c7c2329ef 100644
--- a/experimental/learn_descriptors/symphony_lake_parser_test.cc
+++ b/experimental/learn_descriptors/symphony_lake_parser_test.cc
@@ -1,4 +1,5 @@
 #include "experimental/learn_descriptors/symphony_lake_parser.hh"
+#include "common/geometry/opencv_viz.hh"
 
 #include <cstdlib>
 #include <iostream>
@@ -55,20 +56,37 @@ TEST(SymphonyLakeParserTest, snippet_140106) {
         }
     }
 }
-TEST(SymphonyLakeParserTest, snippet_140106_generator_test) {
+// TEST(SymphonyLakeParserTest, snippet_140106_generator_test) {
+//     DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
+//     DataParser::Generator<cv::Mat> generator =
+//         (SymphonyLakeDatasetTestHelper::get_test_parser()).create_img_generator();
+//     DataParser::Generator<cv::Mat>::iterator img_iter = generator.begin();
+//     while (img_iter != generator.end()) {
+//         cv::Mat img = *img_iter;
+//         // if (!is_test()) {
+//         //     cv::imshow("Symphony Dataset Image", img);
+//         //     cv::waitKey(200);
+//         // }
+//         // cv::imshow("Symphony Dataset Image", img);
+//         // cv::waitKey(200);
+//         ++img_iter;
+//     }
+// }
+
+TEST(SymphonyLakeParserTest, test_frames) {
+    const std::vector<int> indices {120, 190}; // 0-199
     DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
-    DataParser::Generator<cv::Mat> generator =
-        (SymphonyLakeDatasetTestHelper::get_test_parser()).create_img_generator();
-    DataParser::Generator<cv::Mat>::iterator img_iter = generator.begin();
-    while (img_iter != generator.end()) {
-        cv::Mat img = *img_iter;
-        // if (!is_test()) {
-        //     cv::imshow("Symphony Dataset Image", img);
-        //     cv::waitKey(200);
-        // }
-        // cv::imshow("Symphony Dataset Image", img);
-        // cv::waitKey(200);
-        ++img_iter;
-    }
+    const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
+    const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
+    const symphony_lake_dataset::ImagePoint image_point_first = survey.getImagePoint(indices.front());
+
+    std::vector<Eigen::Isometry3d> poses_viz;
+    poses_viz.push_back(DataParser::get_T_boat_camera(image_point_first));
+    poses_viz.push_back(DataParser::T_boat_gps);
+    poses_viz.push_back(DataParser::T_boat_imu);    
+
+    std::cout << "pan: " << image_point_first.pan << "\ntilt: " << image_point_first.tilt << std::endl;
+
+    geometry::viz_scene(poses_viz, std::vector<Eigen::Vector3d>(), true, true);
 }
 }  // namespace robot::experimental::learn_descriptors

From 3919f8af60d571c0e1d52af8707cd5c06a0f8f69 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Mon, 17 Mar 2025 23:56:06 -0400
Subject: [PATCH 11/45] Can now add N cameras and landmarks should correspond
 correctly in gtsam graph. TODO: Transforms seem off.

---
 .../structure_from_motion.cc                  |  70 ++++----
 .../structure_from_motion.hh                  |  25 ++-
 .../structure_from_motion_test.cc             | 159 ++++++++++--------
 3 files changed, 155 insertions(+), 99 deletions(-)

diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index 1c3d2bd43..b0ef8ae75 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -67,8 +67,9 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
     cv::undistort(img, img_undistorted, K_, D_);
     std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_and_descriptors =
         frontend_.get_keypoints_and_descriptors(img);
-    landmarks_.push_back(std::vector<Backend::Landmark>());
-    if (get_num_images_added() > 0) {
+    keypoint_to_landmarks_.push_back(std::unordered_map<cv::KeyPoint, Backend::Landmark>());
+    const size_t idx_img_current = get_num_images_added();
+    if (idx_img_current > 0) {
         std::vector<cv::DMatch> matches =
             get_matches(img_keypoints_and_descriptors_.back().second,
                         keypoints_and_descriptors.second, Frontend::enforce_bijective_matches);
@@ -81,8 +82,8 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
         //         img_keypoints_and_descriptors_.back().first, keypoints_and_descriptors.first,
         //         matches, geom::eigen_mat_to_cv(geom::get_intrinsic_matrix(get_backend().get_K()))).linear());
 
-        const gtsam::Symbol sym_T_w_c0(Backend::pose_symbol_char, get_num_images_added() - 1);
-        const gtsam::Symbol sym_T_w_c1(Backend::pose_symbol_char, get_num_images_added());
+        const gtsam::Symbol sym_T_w_c0(Backend::pose_symbol_char, idx_img_current - 1);
+        const gtsam::Symbol sym_T_w_c1(Backend::pose_symbol_char, idx_img_current);
         // backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), gps_prior, backend_.get_translation_noise());
         // gtsam::Pose3 T_w_c0 = backend_.get_current_initial_values().at<gtsam::Pose3>(sym_T_w_c0);
         // gtsam::Pose3 T_c0_c1(
@@ -96,36 +97,43 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
         backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise(), T_world_cam.rotation());
 
         for (const cv::DMatch match : matches) {
-            // indexing each landmark symbol with landmark_count_ could create issues later on.
-            // lmks indices will be duplicated with greater than 2 images
-            // make map from lmk to idx?
-            Backend::Landmark landmark_cam_0(
-                gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_),
-                sym_T_w_c0,
-                gtsam::Point2(
-                    static_cast<double>(
-                        img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x),
-                    static_cast<double>(
-                        img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)),
-                backend_.get_K(), 3.0);
-            Backend::Landmark landmark_cam_1(
-                gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_),
-                sym_T_w_c1,
-                gtsam::Point2(
-                    static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.x),
-                    static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.y)),
-                backend_.get_K(), 3.0);
-            landmarks_[get_num_images_added() - 1].push_back(landmark_cam_0);
-            landmarks_[get_num_images_added()].push_back(landmark_cam_1);
+            const cv::KeyPoint kpt_cam0 = img_keypoints_and_descriptors_.back().first[match.queryIdx];
+            const cv::KeyPoint kpt_cam1 = keypoints_and_descriptors.first[match.trainIdx];
+
+            if (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) == keypoint_to_landmarks_[idx_img_current - 1].end()) {
+                const Backend::Landmark landmark_cam_0(
+                    gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_),
+                    sym_T_w_c0,
+                    gtsam::Point2(
+                        static_cast<double>(
+                            img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x),
+                        static_cast<double>(
+                            img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)),
+                    backend_.get_K(), 3.0);
+                keypoint_to_landmarks_[idx_img_current - 1].emplace(kpt_cam0, landmark_cam_0);
+                landmark_count_++;
+            } 
+            // std::cout << "kpt_cam0 in keypoint_to_landmarks_: " << 
+            //     (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) == keypoint_to_landmarks_[idx_img_current - 1].end()) 
+            //     << std::endl;                        
+            const Backend::Landmark landmark_cam_0 = keypoint_to_landmarks_[idx_img_current - 1].at(kpt_cam0);
+
+            // technically don't need this conditional when enforcing bijectivity
+            if (keypoint_to_landmarks_[idx_img_current].find(kpt_cam1) == keypoint_to_landmarks_[idx_img_current].end()) {
+                const Backend::Landmark landmark_cam_1(
+                    landmark_cam_0.lmk_factor_symbol,
+                    sym_T_w_c1,
+                    gtsam::Point2(
+                        static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.x),
+                        static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.y)),
+                    backend_.get_K(), 3.0);
+                keypoint_to_landmarks_[idx_img_current].emplace(kpt_cam1, landmark_cam_1);                
+            }             
+            const Backend::Landmark landmark_cam_1 = keypoint_to_landmarks_[idx_img_current].at(kpt_cam1);
+
             backend_.add_landmark(landmark_cam_0);
             backend_.add_landmark(landmark_cam_1);
-            landmark_count_++;
         }
-        // std::cout << "number of landmarks: " << count << std::endl;
-        // std::cout << "number of landmarks added to graph: " << [this](){int count = 0; for (const
-        // auto &landmark : landmarks_){count += landmark.size();} return count; }() << std::endl;
-        // backend_.get_current_initial_values().print("Current initial values: ");
-        // std::cout << "landmark_count_: " << landmark_count_ << std::endl;
     }
     img_keypoints_and_descriptors_.push_back(keypoints_and_descriptors);
 }
diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh
index 86c9ce42c..2f7112293 100644
--- a/experimental/learn_descriptors/structure_from_motion.hh
+++ b/experimental/learn_descriptors/structure_from_motion.hh
@@ -16,6 +16,28 @@
 #include "gtsam/navigation/GPSFactor.h"
 #include "opencv2/opencv.hpp"
 
+namespace std {
+    template <>
+    struct hash<cv::KeyPoint> {
+        size_t operator()(const cv::KeyPoint& kp) const {
+            size_t h1 = hash<float>()(kp.pt.x);
+            size_t h2 = hash<float>()(kp.pt.y);
+            size_t h3 = hash<float>()(kp.size);
+            size_t h4 = hash<float>()(kp.angle);
+            size_t h5 = hash<float>()(kp.response);
+            size_t h6 = hash<int>()(kp.octave);
+            size_t h7 = hash<int>()(kp.class_id);
+ 
+            return (h1 ^ (h2 << 1)) ^ (h3 << 2) ^ (h4 << 3) ^ (h5 << 4) ^ (h6 << 5) ^ (h7 << 6);
+        }
+    };
+}
+namespace cv{
+    inline bool operator==(const cv::KeyPoint& kp1, const cv::KeyPoint& kp2) {
+        return kp1.pt == kp2.pt && kp1.size == kp2.size && kp1.angle == kp2.angle &&
+                kp1.response == kp2.response && kp1.octave == kp2.octave && kp1.class_id == kp2.class_id;
+    }
+}
 namespace robot::experimental::learn_descriptors {
 class Frontend {
    public:
@@ -146,7 +168,7 @@ void Backend::add_between_factor<gtsam::Rot3>(const gtsam::Symbol &, const gtsam
 class StructureFromMotion {
    public:
     static const Eigen::Isometry3d T_symlake_boat_cam;    
-    static const gtsam::Pose3 default_initial_pose;
+    static const gtsam::Pose3 default_initial_pose;    
     /**
      * @param D is vector (5x1) of the distortion coefficients (k1, k2, p1, p2, k3)
      */
@@ -176,6 +198,7 @@ class StructureFromMotion {
     std::vector<std::pair<std::vector<cv::KeyPoint>, cv::Mat>> img_keypoints_and_descriptors_;
     std::vector<std::vector<cv::DMatch>> matches_;
     std::vector<std::vector<Backend::Landmark>> landmarks_;
+    std::vector<std::unordered_map<cv::KeyPoint, Backend::Landmark>> keypoint_to_landmarks_;
     size_t landmark_count_ = 0;
 
     cv::Mat K_;
diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc
index 3bbf1c9fa..546827e9d 100644
--- a/experimental/learn_descriptors/structure_from_motion_test.cc
+++ b/experimental/learn_descriptors/structure_from_motion_test.cc
@@ -151,8 +151,10 @@ TEST(SFM_TEST, sfm_snippet_small) {
     Eigen::Isometry3d T_world_camera0 = DataParser::get_T_boat_camera(img_pt_first);
     StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, gtsam::Pose3(T_world_camera0.matrix()));  
 
+    std::vector<cv::Mat> img_vector;
     for (const int &idx : indices) {    
         const cv::Mat img = survey.loadImageByImageIndex(idx);
+        img_vector.push_back(img);
         const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);        
         Eigen::Isometry3d T_earth_boat = DataParser::get_T_world_boat(img_pt);
         Eigen::Isometry3d T_world_boat = T_earth_world.inverse() * T_earth_boat;
@@ -213,71 +215,94 @@ TEST(SFM_TEST, sfm_snippet_small) {
     geometry::viz_scene(final_poses, final_lmks, true, true);
 }
 
-// TEST(SFM_TEST, sfm_building) {
-//     const int idx_start = 50, idx_end = 120, idx_skip = 2;
-//     DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
-//     const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
-//     const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
-//     const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(idx_start);
-
-//     // const size_t img_width = img_pt_first.width, img_height = img_pt_first.height;
-//     const double fx = img_pt_first.fx, fy = img_pt_first.fy;
-//     const double cx = img_pt_first.cx, cy = img_pt_first.cy;
-//     gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
-//     Eigen::Matrix<double, 5, 1> D =
-//         (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2,
-//             SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3)
-//             .finished();
-
-//     StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D);
-//     cv::Mat image;    
-//     for (int j = idx_start; j < idx_end; j+=idx_skip) {
-//         image = survey.loadImageByImageIndex(j);
-//         sfm.add_image(image);
-//     }
-
-//     const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values();
-//     std::vector<Eigen::Isometry3d> poses_world;
-//     for (size_t i = 0; i < (idx_end - idx_start)/idx_skip; i++) {
-//         gtsam::Pose3 pose =
-//             initial_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i));
-//         poses_world.emplace_back(pose.matrix());
-//     }
-//     std::vector<Eigen::Vector3d> points_world;
-//     for (int i = 0; i < static_cast<int>(sfm.get_matches().size()); i++) {
-//         // NOTE: this j for lmk_symbol is wrong.
-//         for (int j = 0; j < static_cast<int>(sfm.get_matches()[i].size()); j++) {
-//             gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j);
-//             if (initial_values.exists(lmk_symbol)) {
-//                 points_world.emplace_back(initial_values.at<gtsam::Point3>(lmk_symbol));
-//             } else {
-//                 std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl;
-//             }
-//         }
-//     }
-//     // std::cout << poses_world.front() << std::endl;
-
-//     geometry::viz_scene(poses_world, points_world, true, false);
-
-//     sfm.solve_structure();
-
-//     const gtsam::Values result_values = sfm.get_structure_result();
-//     std::vector<Eigen::Isometry3d> final_poses;
-//     std::vector<Eigen::Vector3d> final_lmks;
-//     for (size_t i = 0; i < (idx_end - idx_start)/idx_skip; i++) {
-//         final_poses.emplace_back(result_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)).matrix());
-//     }
-//     for (int i = 0; i < static_cast<int>(sfm.get_matches().size()); i++) {
-//         // NOTE: this j for lmk_symbol is wrong.
-//         for (int j = 0; j < static_cast<int>(sfm.get_matches()[i].size()); j++) {
-//             gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j);
-//             if (initial_values.exists(lmk_symbol)) {
-//                 final_lmks.emplace_back(initial_values.at<gtsam::Point3>(lmk_symbol));
-//             } else {
-//                 std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl;
-//             }
-//         }
-//     }
-//     geometry::viz_scene(final_poses, final_lmks, true, false);
-// }
+TEST(SFM_TEST, sfm_building) {
+    // indices 0-199
+    const std::vector<int> indices = []() {
+        std::vector<int> tmp;
+        for (int i = 0; i < 200; i += 10) {
+            tmp.push_back(i);
+        }
+        return tmp;
+    }();
+    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
+    const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
+    const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
+    const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front());
+
+    // const size_t img_width = img_pt_first.width, img_height = img_pt_first.height;
+    const double fx = img_pt_first.fx, fy = img_pt_first.fy;
+    const double cx = img_pt_first.cx, cy = img_pt_first.cy;
+    gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
+    Eigen::Matrix<double, 5, 1> D =
+        (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2,
+         SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3)
+            .finished();
+    
+    // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera
+    Eigen::Isometry3d T_earth_world = DataParser::get_T_world_boat(img_pt_first);
+    Eigen::Isometry3d T_world_camera0 = DataParser::get_T_boat_camera(img_pt_first);
+    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, gtsam::Pose3(T_world_camera0.matrix()));  
+
+    for (const int &idx : indices) {    
+        const cv::Mat img = survey.loadImageByImageIndex(idx);
+        const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);  
+
+        Eigen::Isometry3d T_earth_boat = DataParser::get_T_world_boat(img_pt);
+        Eigen::Isometry3d T_world_boat = T_earth_world.inverse() * T_earth_boat;
+        Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt);
+        
+        sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix()));
+    }
+    // for (const cv::Mat &image : images) {
+    //     sfm.add_image(image);
+    // }
+
+    const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values();
+    std::vector<Eigen::Isometry3d> poses_world;
+    for (size_t i = 0; i < indices.size(); i++) {
+        gtsam::Pose3 pose =
+            initial_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i));
+        poses_world.emplace_back(pose.matrix());
+    }
+    std::vector<Eigen::Vector3d> points_world;
+    for (int i = 0; i < static_cast<int>(sfm.get_matches().size()); i++) {
+        // NOTE: this j for lmk_symbol is wrong.
+        for (int j = 0; j < static_cast<int>(sfm.get_matches()[i].size()); j++) {
+            gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j);
+            if (initial_values.exists(lmk_symbol)) {
+                points_world.emplace_back(initial_values.at<gtsam::Point3>(lmk_symbol));
+            } else {
+                std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl;
+            }
+        }
+    }
+    // std::cout << poses_world.front() << std::endl;
+
+    geometry::viz_scene(poses_world, points_world, true, true);
+
+    std::cout << "Solving for structure!" << std::endl;
+
+    sfm.solve_structure();
+
+    std::cout << "Solution complete." << std::endl;
+
+    const gtsam::Values result_values = sfm.get_structure_result();
+    std::vector<Eigen::Isometry3d> final_poses;
+    std::vector<Eigen::Vector3d> final_lmks;
+    for (size_t i = 0; i < indices.size(); i++) {
+        final_poses.emplace_back(result_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)).matrix());
+    }
+    for (int i = 0; i < static_cast<int>(sfm.get_matches().size()); i++) {
+        // NOTE: this j for lmk_symbol is wrong.
+        for (int j = 0; j < static_cast<int>(sfm.get_matches()[i].size()); j++) {
+            gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j);
+            if (initial_values.exists(lmk_symbol)) {
+                final_lmks.emplace_back(initial_values.at<gtsam::Point3>(lmk_symbol));
+            } else {
+                std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl;
+            }
+        }
+    }
+    geometry::viz_scene(final_poses, final_lmks, true, true);
+}
 }  // namespace robot::experimental::learn_descriptors

From f05522a5b3e7892845a0305572d084408e4a7204 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Tue, 18 Mar 2025 00:54:46 -0400
Subject: [PATCH 12/45] commented out bad function T_world_camera

---
 .../structure_from_motion.cc                  |  1 +
 .../structure_from_motion_test.cc             |  2 +
 .../learn_descriptors/symphony_lake_parser.cc | 42 +++++++++----------
 .../learn_descriptors/symphony_lake_parser.hh |  4 +-
 4 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index b0ef8ae75..fd7d0e4ef 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -95,6 +95,7 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
         //     T_c0_c1,
         //     backend_.get_pose_noise());        
         backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise(), T_world_cam.rotation());
+        // backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise());
 
         for (const cv::DMatch match : matches) {
             const cv::KeyPoint kpt_cam0 = img_keypoints_and_descriptors_.back().first[match.queryIdx];
diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc
index 546827e9d..61c816444 100644
--- a/experimental/learn_descriptors/structure_from_motion_test.cc
+++ b/experimental/learn_descriptors/structure_from_motion_test.cc
@@ -159,6 +159,8 @@ TEST(SFM_TEST, sfm_snippet_small) {
         Eigen::Isometry3d T_earth_boat = DataParser::get_T_world_boat(img_pt);
         Eigen::Isometry3d T_world_boat = T_earth_world.inverse() * T_earth_boat;
         Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt);
+
+        // T_world_cam.linear() = T_world_camera0.linear().matrix();
         
         sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix()));
     }
diff --git a/experimental/learn_descriptors/symphony_lake_parser.cc b/experimental/learn_descriptors/symphony_lake_parser.cc
index 8146c6a36..467c57160 100644
--- a/experimental/learn_descriptors/symphony_lake_parser.cc
+++ b/experimental/learn_descriptors/symphony_lake_parser.cc
@@ -30,27 +30,27 @@ DataParser::DataParser(const std::filesystem::path &image_root_dir,
 }
 DataParser::~DataParser() {}
 
-Eigen::Affine3d DataParser::get_T_world_camera(size_t survey_idx, size_t img_idx, bool use_gps,
-                                               bool use_compass) {
-    const symphony_lake_dataset::ImagePoint img_point =
-        surveys_.get(survey_idx).getImagePoint(img_idx);
-    Eigen::Vector3d t;
-    Eigen::Matrix3d R;
-    Eigen::Affine3d T_world_camera;
-    if (use_gps) {
-        t[0] = img_point.x;
-        t[1] = img_point.y;
-    }
-    // coordinate frame is x-axis north, y-axis east, z-axis down (to Earth's core)
-    double yaw = use_compass ? img_point.theta - img_point.pan : img_point.pan;
-    Eigen::Matrix3d R_y(Eigen::AngleAxisd(img_point.tilt, Eigen::Vector3d::UnitY()));
-    Eigen::Matrix3d R_x = Eigen::Matrix3d::Identity();
-    Eigen::Matrix3d R_z(Eigen::AngleAxisd(yaw, Eigen::Vector3d::UnitZ()));
-    R = R_x * R_y * R_z;
-    T_world_camera.translate(t);
-    T_world_camera.rotate(R);
-    return T_world_camera;
-}
+// Eigen::Affine3d DataParser::get_T_world_camera(size_t survey_idx, size_t img_idx, bool use_gps,
+//                                                bool use_compass) {
+//     const symphony_lake_dataset::ImagePoint img_point =
+//         surveys_.get(survey_idx).getImagePoint(img_idx);
+//     Eigen::Vector3d t;
+//     Eigen::Matrix3d R;
+//     Eigen::Affine3d T_world_camera;
+//     if (use_gps) {
+//         t[0] = img_point.x;
+//         t[1] = img_point.y;
+//     }
+//     // coordinate frame is x-axis north, y-axis east, z-axis down (to Earth's core)
+//     double yaw = use_compass ? img_point.theta - img_point.pan : img_point.pan;
+//     Eigen::Matrix3d R_y(Eigen::AngleAxisd(img_point.tilt, Eigen::Vector3d::UnitY()));
+//     Eigen::Matrix3d R_x = Eigen::Matrix3d::Identity();
+//     Eigen::Matrix3d R_z(Eigen::AngleAxisd(yaw, Eigen::Vector3d::UnitZ()));
+//     R = R_x * R_y * R_z;
+//     T_world_camera.translate(t);
+//     T_world_camera.rotate(R);
+//     return T_world_camera;
+// }
 
 const Eigen::Isometry3d DataParser::get_T_boat_camera(const symphony_lake_dataset::ImagePoint &img_pt) {
     return get_T_boat_camera(img_pt.pan, img_pt.tilt);
diff --git a/experimental/learn_descriptors/symphony_lake_parser.hh b/experimental/learn_descriptors/symphony_lake_parser.hh
index 664f5ccdb..5311a29f5 100644
--- a/experimental/learn_descriptors/symphony_lake_parser.hh
+++ b/experimental/learn_descriptors/symphony_lake_parser.hh
@@ -76,8 +76,8 @@ class DataParser {
                const std::vector<std::string> &survey_list);
     ~DataParser();
 
-    Eigen::Affine3d get_T_world_camera(size_t survey_idx, size_t image_idx, bool use_gps = false,
-                                       bool use_compass = false);
+    // Eigen::Affine3d get_T_world_camera(size_t survey_idx, size_t image_idx, bool use_gps = false,
+    //                                    bool use_compass = false);
     static const Eigen::Isometry3d get_T_boat_camera(const symphony_lake_dataset::ImagePoint &img_pt);
     static const Eigen::Isometry3d get_T_boat_camera(double theta_pan, double theta_tilt);
     /// @brief get_R_world_boat assuming z_axis_boat dot z_axis_world ~ -1

From 81c3b4562f66ef0285248f3c75599eef7546d47b Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Fri, 4 Apr 2025 12:51:48 -0400
Subject: [PATCH 13/45] Uploading progress

---
 common/geometry/opencv_viz.cc                 |   4 +-
 common/geometry/opencv_viz.hh                 |   3 +-
 experimental/learn_descriptors/BUILD          |   3 +-
 .../structure_from_motion.cc                  | 172 ++++++++++++------
 .../structure_from_motion.hh                  |  80 ++++----
 .../structure_from_motion_test.cc             | 111 +++++------
 .../learn_descriptors/symphony_lake_parser.cc |  82 ++++-----
 .../learn_descriptors/symphony_lake_parser.hh |  64 +------
 .../symphony_lake_parser_test.cc              |  84 ++++++---
 9 files changed, 317 insertions(+), 286 deletions(-)

diff --git a/common/geometry/opencv_viz.cc b/common/geometry/opencv_viz.cc
index 66df62a9a..6d11aca22 100644
--- a/common/geometry/opencv_viz.cc
+++ b/common/geometry/opencv_viz.cc
@@ -25,8 +25,8 @@ cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) {
 
 void viz_scene(const std::vector<Eigen::Isometry3d> &poses_world,
                const std::vector<Eigen::Vector3d> &points_world, const bool show_grid,
-               const bool show_origin) {
-    cv::viz::Viz3d window("Viz Scene");
+               const bool show_origin, const std::string &window_name) {
+    cv::viz::Viz3d window(window_name);
 
     constexpr double pose_size = .5;
     for (unsigned int i = 0; i < poses_world.size(); i++) {
diff --git a/common/geometry/opencv_viz.hh b/common/geometry/opencv_viz.hh
index cdfcfe857..8d2b3a506 100644
--- a/common/geometry/opencv_viz.hh
+++ b/common/geometry/opencv_viz.hh
@@ -1,4 +1,5 @@
 #pragma once
+#include <string>
 #include <vector>
 
 #include "Eigen/Core"
@@ -8,5 +9,5 @@
 namespace robot::geometry {
 void viz_scene(const std::vector<Eigen::Isometry3d> &poses_world,
                const std::vector<Eigen::Vector3d> &points_world, const bool show_grid = true,
-               const bool show_origin = true);
+               const bool show_origin = true, const std::string &window_name = "Viz Window");
 }
\ No newline at end of file
diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index 977347d42..dab2c184e 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -29,7 +29,8 @@ cc_library(
     "@gtsam//:gtsam",
     "@eigen",
     "//common/geometry:camera",
-    "//common/geometry:translate_types"
+    "//common/geometry:translate_types",
+    "//common/geometry:opencv_viz"
   ]
 )
 
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index fd7d0e4ef..d9b145a18 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -5,6 +5,7 @@
 #include <stdexcept>
 
 #include "common/geometry/camera.hh"
+#include "common/geometry/opencv_viz.hh"
 #include "common/geometry/translate_types.hh"
 #include "gtsam/geometry/triangulation.h"
 #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
@@ -19,9 +20,10 @@ namespace geom = robot::geometry;
 namespace robot::experimental::learn_descriptors {
 
 const Eigen::Isometry3d StructureFromMotion::T_symlake_boat_cam = []() {
-    Eigen::Isometry3d transform = Eigen::Isometry3d::Identity();    
+    Eigen::Isometry3d transform = Eigen::Isometry3d::Identity();
     transform.linear() = (Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0, 1, 0)) *
-                                     Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d(0, 0, 1))).toRotationMatrix();
+                          Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d(0, 0, 1)))
+                             .toRotationMatrix();
     return transform;
 }();
 
@@ -34,7 +36,8 @@ std::string pose_to_string(gtsam::Pose3 pose) {
 // const gtsam::Pose3 StructureFromMotion::default_initial_pose = []() {
 //     Eigen::Isometry3d
 // }();
-const gtsam::Pose3 StructureFromMotion::default_initial_pose(StructureFromMotion::T_symlake_boat_cam.matrix());
+const gtsam::Pose3 StructureFromMotion::default_initial_pose(
+    StructureFromMotion::T_symlake_boat_cam.matrix());
 // const gtsam::Pose3 StructureFromMotion::default_initial_pose =
 //     gtsam::Pose3(gtsam::Rot3(T_symlake_boat_cam.rotation()), T_symlake_boat_cam.translation());
 // gtsam::Pose3(
@@ -48,7 +51,8 @@ const gtsam::Pose3 StructureFromMotion::default_initial_pose(StructureFromMotion
 StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor,
                                          gtsam::Cal3_S2 K, Eigen::Matrix<double, 5, 1> D,
                                          gtsam::Pose3 initial_pose,
-                                         Frontend::MatcherType frontend_matcher) : initial_pose_(initial_pose) {
+                                         Frontend::MatcherType frontend_matcher)
+    : initial_pose_(initial_pose) {
     frontend_ = Frontend(frontend_extractor, frontend_matcher);
     backend_ = Backend(K);
 
@@ -59,7 +63,8 @@ StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extrac
 }
 
 void StructureFromMotion::set_initial_pose(gtsam::Pose3 initial_pose) {
-    backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, 0), initial_pose, gtsam::noiseModel::Isotropic::Sigma(6, 0));
+    backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, 0), initial_pose,
+                              gtsam::noiseModel::Isotropic::Sigma(6, 0));
 }
 
 void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_world_cam) {
@@ -80,31 +85,35 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
         // gtsam::Rot3 R_c0_c1(
         //     geom::estimate_c0_c1(
         //         img_keypoints_and_descriptors_.back().first, keypoints_and_descriptors.first,
-        //         matches, geom::eigen_mat_to_cv(geom::get_intrinsic_matrix(get_backend().get_K()))).linear());
+        //         matches,
+        //         geom::eigen_mat_to_cv(geom::get_intrinsic_matrix(get_backend().get_K()))).linear());
 
         const gtsam::Symbol sym_T_w_c0(Backend::pose_symbol_char, idx_img_current - 1);
         const gtsam::Symbol sym_T_w_c1(Backend::pose_symbol_char, idx_img_current);
-        // backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, get_num_images_added()), gps_prior, backend_.get_translation_noise());
-        // gtsam::Pose3 T_w_c0 = backend_.get_current_initial_values().at<gtsam::Pose3>(sym_T_w_c0);
-        // gtsam::Pose3 T_c0_c1(
-        //     R_c0_c1, 
+        // backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char,
+        // get_num_images_added()), gps_prior, backend_.get_translation_noise()); gtsam::Pose3
+        // T_w_c0 = backend_.get_current_initial_values().at<gtsam::Pose3>(sym_T_w_c0); gtsam::Pose3
+        // T_c0_c1(
+        //     R_c0_c1,
         //     t_world_gps - T_w_c0.translation());
         // backend_.add_between_factor(
         //     sym_T_w_c0,
-        //     sym_T_w_c1, 
+        //     sym_T_w_c1,
         //     T_c0_c1,
-        //     backend_.get_pose_noise());        
-        backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise(), T_world_cam.rotation());
+        //     backend_.get_pose_noise());
+        backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise(),
+                                T_world_cam.rotation());
         // backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise());
 
         for (const cv::DMatch match : matches) {
-            const cv::KeyPoint kpt_cam0 = img_keypoints_and_descriptors_.back().first[match.queryIdx];
+            const cv::KeyPoint kpt_cam0 =
+                img_keypoints_and_descriptors_.back().first[match.queryIdx];
             const cv::KeyPoint kpt_cam1 = keypoints_and_descriptors.first[match.trainIdx];
 
-            if (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) == keypoint_to_landmarks_[idx_img_current - 1].end()) {
+            if (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) ==
+                keypoint_to_landmarks_[idx_img_current - 1].end()) {
                 const Backend::Landmark landmark_cam_0(
-                    gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_),
-                    sym_T_w_c0,
+                    gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), sym_T_w_c0,
                     gtsam::Point2(
                         static_cast<double>(
                             img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x),
@@ -113,24 +122,27 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
                     backend_.get_K(), 3.0);
                 keypoint_to_landmarks_[idx_img_current - 1].emplace(kpt_cam0, landmark_cam_0);
                 landmark_count_++;
-            } 
-            // std::cout << "kpt_cam0 in keypoint_to_landmarks_: " << 
-            //     (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) == keypoint_to_landmarks_[idx_img_current - 1].end()) 
-            //     << std::endl;                        
-            const Backend::Landmark landmark_cam_0 = keypoint_to_landmarks_[idx_img_current - 1].at(kpt_cam0);
+            }
+            // std::cout << "kpt_cam0 in keypoint_to_landmarks_: " <<
+            //     (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) ==
+            //     keypoint_to_landmarks_[idx_img_current - 1].end())
+            //     << std::endl;
+            const Backend::Landmark landmark_cam_0 =
+                keypoint_to_landmarks_[idx_img_current - 1].at(kpt_cam0);
 
             // technically don't need this conditional when enforcing bijectivity
-            if (keypoint_to_landmarks_[idx_img_current].find(kpt_cam1) == keypoint_to_landmarks_[idx_img_current].end()) {
+            if (keypoint_to_landmarks_[idx_img_current].find(kpt_cam1) ==
+                keypoint_to_landmarks_[idx_img_current].end()) {
                 const Backend::Landmark landmark_cam_1(
-                    landmark_cam_0.lmk_factor_symbol,
-                    sym_T_w_c1,
+                    landmark_cam_0.lmk_factor_symbol, sym_T_w_c1,
                     gtsam::Point2(
                         static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.x),
                         static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.y)),
                     backend_.get_K(), 3.0);
-                keypoint_to_landmarks_[idx_img_current].emplace(kpt_cam1, landmark_cam_1);                
-            }             
-            const Backend::Landmark landmark_cam_1 = keypoint_to_landmarks_[idx_img_current].at(kpt_cam1);
+                keypoint_to_landmarks_[idx_img_current].emplace(kpt_cam1, landmark_cam_1);
+            }
+            const Backend::Landmark landmark_cam_1 =
+                keypoint_to_landmarks_[idx_img_current].at(kpt_cam1);
 
             backend_.add_landmark(landmark_cam_0);
             backend_.add_landmark(landmark_cam_1);
@@ -141,7 +153,7 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
 
 std::vector<cv::DMatch> StructureFromMotion::get_matches(
     const cv::Mat &descriptors_1, const cv::Mat &descriptors_2,
-    std::optional<StructureFromMotion::MatchFunction> post_process_func) {
+    std::optional<StructureFromMotion::match_function> post_process_func) {
     std::vector<cv::DMatch> matches = frontend_.get_matches(descriptors_1, descriptors_2);
     if (post_process_func) {
         (*post_process_func)(matches);
@@ -318,7 +330,8 @@ Backend::Backend(gtsam::Cal3_S2 K) {
 }
 
 template <>
-void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value, const gtsam::SharedNoiseModel &noise) {
+void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value,
+                               const gtsam::SharedNoiseModel &noise) {
     graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(symbol, value, noise);
     initial_estimate_.insert_or_assign(symbol, value);
     // std::cout << "adding a prior factor! with symbol: " << symbol << std::endl;
@@ -326,12 +339,13 @@ void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &
 }
 
 template <>
-void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Point3 &value, const gtsam::SharedNoiseModel &noise) {
+void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Point3 &value,
+                               const gtsam::SharedNoiseModel &noise) {
     graph_.emplace_shared<gtsam::PriorFactor<gtsam::Point3>>(symbol, value, noise);
     gtsam::Rot3 R;
-    if (initial_estimate_.exists(symbol)) {        
-        R = initial_estimate_.at<gtsam::Pose3>(symbol).rotation();                
-    }     
+    if (initial_estimate_.exists(symbol)) {
+        R = initial_estimate_.at<gtsam::Pose3>(symbol).rotation();
+    }
     initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R, value));
 }
 
@@ -352,27 +366,28 @@ void Backend::add_between_factor<gtsam::Rot3>(const gtsam::Symbol &symbol_1,
                                               const gtsam::Symbol &symbol_2,
                                               const gtsam::Rot3 &value,
                                               const gtsam::SharedNoiseModel &model) {
-    graph_.emplace_shared<gtsam::BetweenFactor<gtsam::Rot3>>(symbol_1, symbol_2, value, model);    
+    graph_.emplace_shared<gtsam::BetweenFactor<gtsam::Rot3>>(symbol_1, symbol_2, value, model);
     initial_estimate_.insert_or_assign(symbol_2,
-                                       initial_estimate_.at<gtsam::Pose3>(symbol_1).compose(gtsam::Pose3(value, gtsam::Point3::Zero())));
+                                       initial_estimate_.at<gtsam::Pose3>(symbol_1).compose(
+                                           gtsam::Pose3(value, gtsam::Point3::Zero())));
 }
 
-void Backend::add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &t_world_cam, const gtsam::SharedNoiseModel &model,
-        const gtsam::Rot3 & R_world_cam) {
-    
+void Backend::add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &t_world_cam,
+                             const gtsam::SharedNoiseModel &model, const gtsam::Rot3 &R_world_cam) {
     graph_.emplace_shared<gtsam::GPSFactor>(symbol, t_world_cam, model);
     initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R_world_cam, t_world_cam));
 }
 
-std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> Backend::get_obs_for_lmk(const gtsam::Symbol &lmk_symbol) {
+std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> Backend::get_obs_for_lmk(
+    const gtsam::Symbol &lmk_symbol) {
     std::vector<gtsam::Pose3> cam_poses;
     std::vector<gtsam::Point2> observations;
 
     // Iterate over all factors in the graph
-    for (const auto& factor : graph_) {
+    for (const auto &factor : graph_) {
         auto projFactor = boost::dynamic_pointer_cast<
             gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(factor);
-        
+
         if (projFactor && projFactor->keys().at(1) == lmk_symbol) {
             // Get the camera pose symbol
             gtsam::Symbol cameraSymbol = projFactor->keys().at(0);
@@ -389,10 +404,10 @@ std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> Backend::get_ob
             cam_poses.push_back(cameraPose);
         }
     }
-    return std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>>(cam_poses, observations);
+    return std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>>(cam_poses,
+                                                                            observations);
 }
 
-
 void Backend::add_landmarks(const std::vector<Landmark> &landmarks) {
     for (const Landmark &landmark : landmarks) {
         add_landmark(landmark);
@@ -400,7 +415,8 @@ void Backend::add_landmarks(const std::vector<Landmark> &landmarks) {
 }
 
 void Backend::add_landmark(const Landmark &landmark) {
-    graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(landmark.projection, landmark_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol,
+    graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+        landmark.projection, landmark_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol,
         K_);
     if (!initial_estimate_.exists(landmark.cam_pose_symbol)) {
         throw std::runtime_error(
@@ -408,18 +424,18 @@ void Backend::add_landmark(const Landmark &landmark) {
             "add_landmark is called.");
     }
     gtsam::Point3 p_world_lmk_estimate =
-        initial_estimate_.at<gtsam::Pose3>(landmark.cam_pose_symbol)
-            * landmark.p_cam_lmk_guess;
+        initial_estimate_.at<gtsam::Pose3>(landmark.cam_pose_symbol) * landmark.p_cam_lmk_guess;
     initial_estimate_.insert_or_assign(landmark.lmk_factor_symbol, p_world_lmk_estimate);
 
-    std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> lmk_obs = get_obs_for_lmk(landmark.lmk_factor_symbol);
+    std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> lmk_obs =
+        get_obs_for_lmk(landmark.lmk_factor_symbol);
     if (lmk_obs.first.size() >= 2) {
         try {
             // Attempt triangulation using DLT (or the GTSAM provided method)
             p_world_lmk_estimate = gtsam::triangulatePoint3(
-                lmk_obs.first, K_, 
+                lmk_obs.first, K_,
                 gtsam::Point2Vector(lmk_obs.second.begin(), lmk_obs.second.end()));
-            
+
             // Optional: perform an explicit cheirality check
             bool valid = true;
             for (const auto &pose : lmk_obs.first) {
@@ -431,15 +447,16 @@ void Backend::add_landmark(const Landmark &landmark) {
                     break;
                 }
             }
-            
+
             if (valid) {
                 initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate);
             } else {
-                std::cerr << "Triangulated landmark failed cheirality check; keeping initial guess." << std::endl;
+                std::cerr << "Triangulated landmark failed cheirality check; keeping initial guess."
+                          << std::endl;
             }
         } catch (const gtsam::TriangulationCheiralityException &e) {
             // Handle the exception gracefully by logging and retaining the previous estimate.
-            std::cerr << "Triangulation Cheirality Exception: " << e.what() 
+            std::cerr << "Triangulation Cheirality Exception: " << e.what()
                       << ". Keeping initial landmark estimate." << std::endl;
         }
     }
@@ -450,4 +467,53 @@ void Backend::solve_graph() {
     result_ = optimizer.optimize();
 }
 
+void Backend::solve_graph(const int num_steps,
+                          std::optional<Backend::graph_step_debug_func> inter_debug_func) {
+    gtsam::LevenbergMarquardtParams params;
+    params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
+    params.maxIterations = 1;          // We'll manually step it
+    gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, params);
+
+    double prev_error = optimizer.error();
+    for (int i = 0; i < num_steps; i++) {
+        optimizer.iterate();
+        double curr_error = optimizer.error();
+
+        if (inter_debug_func) {
+            (*inter_debug_func)(optimizer.values(), i);
+        }
+        if (std::abs(prev_error - curr_error) < 1e-6) {
+            std::cout << "Converged at iteration " << i << "\n";
+            break;
+        }
+    }
+    result_ = optimizer.values();
+}
+
+void StructureFromMotion::graph_values(const gtsam::Values &values,
+                                       const std::string &window_name) {
+    std::vector<Eigen::Isometry3d> final_poses;
+    std::vector<Eigen::Vector3d> final_lmks;
+    for (size_t i = 0; i < img_keypoints_and_descriptors_.size(); i++) {
+        final_poses.emplace_back(
+            values.at<gtsam::Pose3>(gtsam::Symbol(get_backend().pose_symbol_char, i)).matrix());
+    }
+    for (int i = 0; i < static_cast<int>(get_matches().size()); i++) {
+        // NOTE: this j for lmk_symbol is wrong.
+        for (int j = 0; j < static_cast<int>(get_matches()[i].size()); j++) {
+            gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j);
+            if (values.exists(lmk_symbol)) {
+                final_lmks.emplace_back(values.at<gtsam::Point3>(lmk_symbol));
+            } else {
+                std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl;
+            }
+        }
+    }
+    geometry::viz_scene(final_poses, final_lmks, true, true, window_name);
+}
+
+void StructureFromMotion::solve_structure(
+    const int num_steps, std::optional<Backend::graph_step_debug_func> inter_debug_func) {
+    backend_.solve_graph(num_steps, inter_debug_func);
+}
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh
index 2f7112293..7cc454558 100644
--- a/experimental/learn_descriptors/structure_from_motion.hh
+++ b/experimental/learn_descriptors/structure_from_motion.hh
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <functional>
 #include <map>
 #include <set>
 #include <unordered_map>
@@ -11,33 +12,33 @@
 #include "gtsam/geometry/Pose3.h"
 #include "gtsam/inference/Symbol.h"
 #include "gtsam/linear/NoiseModel.h"
+#include "gtsam/navigation/GPSFactor.h"
 #include "gtsam/nonlinear/NonlinearFactorGraph.h"
 #include "gtsam/nonlinear/Values.h"
-#include "gtsam/navigation/GPSFactor.h"
 #include "opencv2/opencv.hpp"
 
 namespace std {
-    template <>
-    struct hash<cv::KeyPoint> {
-        size_t operator()(const cv::KeyPoint& kp) const {
-            size_t h1 = hash<float>()(kp.pt.x);
-            size_t h2 = hash<float>()(kp.pt.y);
-            size_t h3 = hash<float>()(kp.size);
-            size_t h4 = hash<float>()(kp.angle);
-            size_t h5 = hash<float>()(kp.response);
-            size_t h6 = hash<int>()(kp.octave);
-            size_t h7 = hash<int>()(kp.class_id);
- 
-            return (h1 ^ (h2 << 1)) ^ (h3 << 2) ^ (h4 << 3) ^ (h5 << 4) ^ (h6 << 5) ^ (h7 << 6);
-        }
-    };
-}
-namespace cv{
-    inline bool operator==(const cv::KeyPoint& kp1, const cv::KeyPoint& kp2) {
-        return kp1.pt == kp2.pt && kp1.size == kp2.size && kp1.angle == kp2.angle &&
-                kp1.response == kp2.response && kp1.octave == kp2.octave && kp1.class_id == kp2.class_id;
+template <>
+struct hash<cv::KeyPoint> {
+    size_t operator()(const cv::KeyPoint &kp) const {
+        size_t h1 = hash<float>()(kp.pt.x);
+        size_t h2 = hash<float>()(kp.pt.y);
+        size_t h3 = hash<float>()(kp.size);
+        size_t h4 = hash<float>()(kp.angle);
+        size_t h5 = hash<float>()(kp.response);
+        size_t h6 = hash<int>()(kp.octave);
+        size_t h7 = hash<int>()(kp.class_id);
+
+        return (h1 ^ (h2 << 1)) ^ (h3 << 2) ^ (h4 << 3) ^ (h5 << 4) ^ (h6 << 5) ^ (h7 << 6);
     }
+};
+}  // namespace std
+namespace cv {
+inline bool operator==(const cv::KeyPoint &kp1, const cv::KeyPoint &kp2) {
+    return kp1.pt == kp2.pt && kp1.size == kp2.size && kp1.angle == kp2.angle &&
+           kp1.response == kp2.response && kp1.octave == kp2.octave && kp1.class_id == kp2.class_id;
 }
+}  // namespace cv
 namespace robot::experimental::learn_descriptors {
 class Frontend {
    public:
@@ -110,21 +111,27 @@ class Backend {
     ~Backend(){};
 
     template <typename T>
-    void add_prior_factor(const gtsam::Symbol &symbol, const T &value, const gtsam::SharedNoiseModel & model);
+    void add_prior_factor(const gtsam::Symbol &symbol, const T &value,
+                          const gtsam::SharedNoiseModel &model);
 
     template <typename T>
     void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2,
                             const T &value, const gtsam::SharedNoiseModel &model);
-    
 
-    void add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &p_world_gps, const gtsam::SharedNoiseModel& model, 
-        const gtsam::Rot3 &R_world_cam = gtsam::Rot3::Identity());
+    void add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &p_world_gps,
+                        const gtsam::SharedNoiseModel &model,
+                        const gtsam::Rot3 &R_world_cam = gtsam::Rot3::Identity());
 
-    std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> get_obs_for_lmk(const gtsam::Symbol &lmk_symbol);
+    std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> get_obs_for_lmk(
+        const gtsam::Symbol &lmk_symbol);
     void add_landmarks(const std::vector<Landmark> &landmarks);
     void add_landmark(const Landmark &landmark);
 
     void solve_graph();
+    typedef int epoch;
+    using graph_step_debug_func = std::function<void(const gtsam::Values &, const epoch)>;
+    void solve_graph(const int num_steps,
+                     std::optional<graph_step_debug_func> inter_debug_func = std::nullopt);
 
     const gtsam::Values &get_current_initial_values() const { return initial_estimate_; };
     const gtsam::Values &get_result() const { return result_; };
@@ -148,14 +155,17 @@ class Backend {
         gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01));
     gtsam::noiseModel::Isotropic::shared_ptr translation_noise_ =
         gtsam::noiseModel::Isotropic::Sigma(2, 0.1);
-    gtsam::noiseModel::Isotropic::shared_ptr gps_noise_ = gtsam::noiseModel::Isotropic::Sigma(3, 2.);
+    gtsam::noiseModel::Isotropic::shared_ptr gps_noise_ =
+        gtsam::noiseModel::Isotropic::Sigma(3, 2.);
 };
 
-template<>
-void Backend::add_prior_factor<gtsam::Pose3>(const gtsam::Symbol &, const gtsam::Pose3 &, const gtsam::SharedNoiseModel &);
+template <>
+void Backend::add_prior_factor<gtsam::Pose3>(const gtsam::Symbol &, const gtsam::Pose3 &,
+                                             const gtsam::SharedNoiseModel &);
 
 template <>
-void Backend::add_prior_factor<gtsam::Point3>(const gtsam::Symbol &, const gtsam::Point3 &, const gtsam::SharedNoiseModel &);
+void Backend::add_prior_factor<gtsam::Point3>(const gtsam::Symbol &, const gtsam::Point3 &,
+                                              const gtsam::SharedNoiseModel &);
 
 template <>
 void Backend::add_between_factor<gtsam::Pose3>(const gtsam::Symbol &, const gtsam::Symbol &,
@@ -167,8 +177,8 @@ void Backend::add_between_factor<gtsam::Rot3>(const gtsam::Symbol &, const gtsam
 
 class StructureFromMotion {
    public:
-    static const Eigen::Isometry3d T_symlake_boat_cam;    
-    static const gtsam::Pose3 default_initial_pose;    
+    static const Eigen::Isometry3d T_symlake_boat_cam;
+    static const gtsam::Pose3 default_initial_pose;
     /**
      * @param D is vector (5x1) of the distortion coefficients (k1, k2, p1, p2, k3)
      */
@@ -181,11 +191,15 @@ class StructureFromMotion {
     void set_initial_pose(gtsam::Pose3 initial_pose);
     void add_image(const cv::Mat &img, const gtsam::Pose3 &T_world_cam);
     void solve_structure() { backend_.solve_graph(); };
+    void solve_structure(
+        const int num_steps,
+        std::optional<Backend::graph_step_debug_func> inter_debug_func = std::nullopt);
     const gtsam::Values &get_structure_result() { return backend_.get_result(); };
-    using MatchFunction = std::function<void(std::vector<cv::DMatch> &)>;
+    using match_function = std::function<void(std::vector<cv::DMatch> &)>;
     std::vector<cv::DMatch> get_matches(
         const cv::Mat &descriptors_1, const cv::Mat &descriptors_2,
-        std::optional<MatchFunction> post_process_func = std::nullopt);
+        std::optional<match_function> post_process_func = std::nullopt);
+    void graph_values(const gtsam::Values &values, const std::string &window_name = "graph values");
 
     Frontend get_frontend() { return frontend_; };
     Backend get_backend() { return backend_; }
diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc
index 61c816444..c856abd71 100644
--- a/experimental/learn_descriptors/structure_from_motion_test.cc
+++ b/experimental/learn_descriptors/structure_from_motion_test.cc
@@ -2,6 +2,7 @@
 
 #include <iostream>
 #include <sstream>
+#include <thread>
 
 #include "Eigen/Geometry"
 #include "common/geometry/opencv_viz.hh"
@@ -122,15 +123,16 @@ TEST(SFM_TEST, frontend_pipeline_sweep) {
     }
 }
 
-// TEST(SFM_TEST, boat_and_cam_frames) {    
-//     // world is boat frame. +x is boat "forward", +y is the right side, +z is "down" into the water/hull
-//     Eigen::Isometry3d T_boat_cam = StructureFromMotion::T_symlake_boat_cam;
+// TEST(SFM_TEST, boat_and_cam_frames) {
+//     // world is boat frame. +x is boat "forward", +y is the right side, +z is "down" into the
+//     water/hull Eigen::Isometry3d T_boat_cam = StructureFromMotion::T_symlake_boat_cam;
 //     T_boat_cam.translation() = Eigen::Vector3d::Ones();
-//     geometry::viz_scene(std::vector<Eigen::Isometry3d>{T_boat_cam}, std::vector<Eigen::Vector3d>());
+//     geometry::viz_scene(std::vector<Eigen::Isometry3d>{T_boat_cam},
+//     std::vector<Eigen::Vector3d>());
 // }
 
 TEST(SFM_TEST, sfm_snippet_small) {
-    const std::vector<int> indices {120, 130}; // 0-199
+    const std::vector<int> indices{120, 130};  // 0-199
     DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
     const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
     const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
@@ -144,24 +146,25 @@ TEST(SFM_TEST, sfm_snippet_small) {
         (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2,
          SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3)
             .finished();
-    
+
     // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera
-    // T_earth_boat0 = 
+    // T_earth_boat0 =
     Eigen::Isometry3d T_earth_world = DataParser::get_T_world_boat(img_pt_first);
     Eigen::Isometry3d T_world_camera0 = DataParser::get_T_boat_camera(img_pt_first);
-    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, gtsam::Pose3(T_world_camera0.matrix()));  
+    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
+                            gtsam::Pose3(T_world_camera0.matrix()));
 
     std::vector<cv::Mat> img_vector;
-    for (const int &idx : indices) {    
+    for (const int &idx : indices) {
         const cv::Mat img = survey.loadImageByImageIndex(idx);
         img_vector.push_back(img);
-        const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);        
+        const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);
         Eigen::Isometry3d T_earth_boat = DataParser::get_T_world_boat(img_pt);
         Eigen::Isometry3d T_world_boat = T_earth_world.inverse() * T_earth_boat;
         Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt);
 
         // T_world_cam.linear() = T_world_camera0.linear().matrix();
-        
+
         sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix()));
     }
     // for (const cv::Mat &image : images) {
@@ -201,7 +204,9 @@ TEST(SFM_TEST, sfm_snippet_small) {
     std::vector<Eigen::Isometry3d> final_poses;
     std::vector<Eigen::Vector3d> final_lmks;
     for (size_t i = 0; i < indices.size(); i++) {
-        final_poses.emplace_back(result_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)).matrix());
+        final_poses.emplace_back(
+            result_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i))
+                .matrix());
     }
     for (int i = 0; i < static_cast<int>(sfm.get_matches().size()); i++) {
         // NOTE: this j for lmk_symbol is wrong.
@@ -239,72 +244,50 @@ TEST(SFM_TEST, sfm_building) {
         (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2,
          SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3)
             .finished();
-    
-    // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera
-    Eigen::Isometry3d T_earth_world = DataParser::get_T_world_boat(img_pt_first);
-    Eigen::Isometry3d T_world_camera0 = DataParser::get_T_boat_camera(img_pt_first);
-    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D, gtsam::Pose3(T_world_camera0.matrix()));  
 
-    for (const int &idx : indices) {    
+    // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera
+    Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first);
+    Eigen::Isometry3d T_world_boat0;
+    T_world_boat0.linear() = T_earth_boat0.linear();
+    Eigen::Isometry3d T_world_camera0 = T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first);
+    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
+                            gtsam::Pose3(T_world_camera0.matrix()));
+
+    for (const int &idx : indices) {
         const cv::Mat img = survey.loadImageByImageIndex(idx);
-        const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);  
+        const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);
+
+        Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt);
+        T_world_boat.translation() -= T_earth_boat0.translation();
 
-        Eigen::Isometry3d T_earth_boat = DataParser::get_T_world_boat(img_pt);
-        Eigen::Isometry3d T_world_boat = T_earth_world.inverse() * T_earth_boat;
         Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt);
-        
+
         sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix()));
     }
-    // for (const cv::Mat &image : images) {
-    //     sfm.add_image(image);
-    // }
 
     const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values();
-    std::vector<Eigen::Isometry3d> poses_world;
-    for (size_t i = 0; i < indices.size(); i++) {
-        gtsam::Pose3 pose =
-            initial_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i));
-        poses_world.emplace_back(pose.matrix());
-    }
-    std::vector<Eigen::Vector3d> points_world;
-    for (int i = 0; i < static_cast<int>(sfm.get_matches().size()); i++) {
-        // NOTE: this j for lmk_symbol is wrong.
-        for (int j = 0; j < static_cast<int>(sfm.get_matches()[i].size()); j++) {
-            gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j);
-            if (initial_values.exists(lmk_symbol)) {
-                points_world.emplace_back(initial_values.at<gtsam::Point3>(lmk_symbol));
-            } else {
-                std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl;
-            }
-        }
-    }
-    // std::cout << poses_world.front() << std::endl;
-
-    geometry::viz_scene(poses_world, points_world, true, true);
+    sfm.graph_values(initial_values, "initial values");
 
     std::cout << "Solving for structure!" << std::endl;
 
-    sfm.solve_structure();
+    Backend::graph_step_debug_func solve_iter_debug_func = [&sfm](const gtsam::Values &vals,
+                                                                  const Backend::epoch iter) {
+        std::cout << "iteration " << iter << " complete!";
+        std::string window_name = "Iteration_" + std::to_string(iter);
+        sfm.graph_values(vals, window_name);
+    };
+    sfm.solve_structure(5, solve_iter_debug_func);
 
     std::cout << "Solution complete." << std::endl;
 
     const gtsam::Values result_values = sfm.get_structure_result();
-    std::vector<Eigen::Isometry3d> final_poses;
-    std::vector<Eigen::Vector3d> final_lmks;
-    for (size_t i = 0; i < indices.size(); i++) {
-        final_poses.emplace_back(result_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i)).matrix());
-    }
-    for (int i = 0; i < static_cast<int>(sfm.get_matches().size()); i++) {
-        // NOTE: this j for lmk_symbol is wrong.
-        for (int j = 0; j < static_cast<int>(sfm.get_matches()[i].size()); j++) {
-            gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j);
-            if (initial_values.exists(lmk_symbol)) {
-                final_lmks.emplace_back(initial_values.at<gtsam::Point3>(lmk_symbol));
-            } else {
-                std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl;
-            }
-        }
-    }
-    geometry::viz_scene(final_poses, final_lmks, true, true);
+    sfm.graph_values(result_values, "optimized values");
+}
+
+TEST(SFM_TEST, random_test) {
+    std::cout << DataParser::T_boat_gps.matrix() << std::endl;
+    geometry::viz_scene(
+        std::vector<Eigen::Isometry3d>{DataParser::T_boat_imu, DataParser::T_boat_gps},
+        std::vector<Eigen::Vector3d>{DataParser::t_boat_cam});
 }
 }  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/symphony_lake_parser.cc b/experimental/learn_descriptors/symphony_lake_parser.cc
index 467c57160..4740deb1e 100644
--- a/experimental/learn_descriptors/symphony_lake_parser.cc
+++ b/experimental/learn_descriptors/symphony_lake_parser.cc
@@ -5,14 +5,10 @@
 #include "common/check.hh"
 
 namespace robot::experimental::learn_descriptors {
-const Eigen::Vector3d DataParser::t_boat_cam(-0.420, 0, 0.255);
+const Eigen::Vector3d DataParser::t_boat_cam(-0.430, 0, 0.255);
 
-const Eigen::Isometry3d DataParser::T_boat_gps((Eigen::Matrix4d() <<
-    1, 0, 0, 0.080,
-    0, 1, 0, -0.118,
-    0, 0, 1, 0.010,
-    0, 0, 0, 1
-).finished());
+const Eigen::Isometry3d DataParser::T_boat_gps(
+    (Eigen::Matrix4d() << 1, 0, 0, 0.080, 0, 1, 0, -0.1183, 0, 0, 1, 0.010, 0, 0, 0, 1).finished());
 
 const Eigen::Isometry3d DataParser::T_boat_imu = []() {
     Eigen::Isometry3d T_boat_imu;
@@ -30,60 +26,50 @@ DataParser::DataParser(const std::filesystem::path &image_root_dir,
 }
 DataParser::~DataParser() {}
 
-// Eigen::Affine3d DataParser::get_T_world_camera(size_t survey_idx, size_t img_idx, bool use_gps,
-//                                                bool use_compass) {
-//     const symphony_lake_dataset::ImagePoint img_point =
-//         surveys_.get(survey_idx).getImagePoint(img_idx);
-//     Eigen::Vector3d t;
-//     Eigen::Matrix3d R;
-//     Eigen::Affine3d T_world_camera;
-//     if (use_gps) {
-//         t[0] = img_point.x;
-//         t[1] = img_point.y;
-//     }
-//     // coordinate frame is x-axis north, y-axis east, z-axis down (to Earth's core)
-//     double yaw = use_compass ? img_point.theta - img_point.pan : img_point.pan;
-//     Eigen::Matrix3d R_y(Eigen::AngleAxisd(img_point.tilt, Eigen::Vector3d::UnitY()));
-//     Eigen::Matrix3d R_x = Eigen::Matrix3d::Identity();
-//     Eigen::Matrix3d R_z(Eigen::AngleAxisd(yaw, Eigen::Vector3d::UnitZ()));
-//     R = R_x * R_y * R_z;
-//     T_world_camera.translate(t);
-//     T_world_camera.rotate(R);
-//     return T_world_camera;
-// }
-
-const Eigen::Isometry3d DataParser::get_T_boat_camera(const symphony_lake_dataset::ImagePoint &img_pt) {
+const Eigen::Isometry3d DataParser::get_T_boat_camera(
+    const symphony_lake_dataset::ImagePoint &img_pt) {
     return get_T_boat_camera(img_pt.pan, img_pt.tilt);
 }
 const Eigen::Isometry3d DataParser::get_T_boat_camera(double theta_pan, double theta_tilt) {
-    Eigen::AngleAxisd R_z(-theta_pan, Eigen::Vector3d::UnitZ()); // pan
-    Eigen::AngleAxisd R_y(-theta_tilt, Eigen::Vector3d::UnitY()); // tilt
-    Eigen::Matrix3d R_boat_gimbal(R_z * R_y); 
-        
-    Eigen::Matrix3d R_gimbal_camera = (Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d::UnitX())
-        * Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d::UnitY())).matrix();
+    Eigen::AngleAxisd R_z(-theta_pan, Eigen::Vector3d::UnitZ());   // pan
+    Eigen::AngleAxisd R_y(-theta_tilt, Eigen::Vector3d::UnitY());  // tilt
+    Eigen::Matrix3d R_boat_gimbal(R_z * R_y);
+
+    Eigen::Matrix3d R_gimbal_camera = (Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d::UnitX()) *
+                                       Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d::UnitY()))
+                                          .matrix();
     Eigen::Matrix3d R_boat_camera(R_boat_gimbal * R_gimbal_camera);
-    
+
     Eigen::Isometry3d T_boat_cam;
     T_boat_cam.translation() = t_boat_cam;
     T_boat_cam.linear() = R_boat_camera;
     return T_boat_cam;
 }
 
-const Eigen::Matrix3d DataParser::get_R_world_boat(double theta_compass) {
-    Eigen::Matrix3d R_world_imu = Eigen::AngleAxisd(theta_compass, Eigen::Vector3d::UnitZ()).matrix();
-    Eigen::Matrix3d R_imu_boat = T_boat_imu.inverse().linear();
-    Eigen::Matrix3d R_world_boat = R_world_imu * R_imu_boat;
-    return R_world_boat;
-}
-
-const Eigen::Isometry3d DataParser::get_T_world_boat(const symphony_lake_dataset::ImagePoint &img_pt) {
-    Eigen::Matrix3d R_world_boat = get_R_world_boat(img_pt.theta);
-    
+const Eigen::Isometry3d DataParser::get_T_world_gps(
+    const symphony_lake_dataset::ImagePoint &img_pt) {
     Eigen::Isometry3d T_world_gps;
-    T_world_gps.linear() =  R_world_boat * T_boat_gps.linear();
+    // img_pt.theta is in the North East Down frame, not lattitude longitude coords. However,
+    // because of the alignment of the imu and gps, this rotation math holds.
+    T_world_gps.linear() = Eigen::AngleAxisd(img_pt.theta, Eigen::Vector3d::UnitZ()).matrix();
     T_world_gps.translation() = Eigen::Vector3d(img_pt.x, img_pt.y, 0);
+    return T_world_gps;
+}
+
+const Eigen::Isometry3d DataParser::get_T_world_boat(
+    const symphony_lake_dataset::ImagePoint &img_pt) {
+    // Eigen::Isometry3d T_world_gps;
+
+    // Eigen::Matrix3d R_world_imu = get_R_world_imu(img_pt.theta);
+    // Eigen::Isometry3d T_imu_gps = T_boat_imu.inverse() * T_boat_gps;
+    // Eigen::Matrix3d R_world_gps = R_world_imu * T_imu_gps.linear();
+
+    // T_world_gps.linear() = R_world_gps;
+    // T_world_gps.translation() = Eigen::Vector3d(img_pt.x, img_pt.y, 0);
+
+    // Eigen::Isometry3d T_world_boat = T_world_gps * T_boat_gps.inverse();
 
+    Eigen::Isometry3d T_world_gps = get_T_world_gps(img_pt);
     Eigen::Isometry3d T_world_boat = T_world_gps * T_boat_gps.inverse();
     return T_world_boat;
 }
diff --git a/experimental/learn_descriptors/symphony_lake_parser.hh b/experimental/learn_descriptors/symphony_lake_parser.hh
index 5311a29f5..5c6e91b54 100644
--- a/experimental/learn_descriptors/symphony_lake_parser.hh
+++ b/experimental/learn_descriptors/symphony_lake_parser.hh
@@ -20,56 +20,8 @@ class SymphonyLakeCamParams {
 };
 class DataParser {
    public:
-    template <typename T>
-    struct Generator {
-        struct promise_type {
-            T value;
-            std::suspend_always yield_value(T v) {
-                value = v;
-                return {};
-            }
-            std::suspend_always initial_suspend() { return {}; }
-            std::suspend_always final_suspend() noexcept { return {}; }
-            Generator get_return_object() { return Generator{handle_type::from_promise(*this)}; }
-            void return_void() {}
-            void unhandled_exception() { std::terminate(); }
-        };
-
-        using handle_type = std::coroutine_handle<promise_type>;
-
-        Generator(handle_type h) : handle(h) {}
-        ~Generator() {
-            if (handle) handle.destroy();
-        }
-
-        struct iterator {
-            handle_type handle;
-            bool operator!=(std::default_sentinel_t) const { return !handle.done(); }
-            iterator &operator++() {
-                handle.resume();
-                return *this;
-            }
-            T operator*() const { return handle.promise().value; }
-        };
-
-        iterator begin() { return iterator{handle}; }
-        std::default_sentinel_t end() { return {}; }
-
-       private:
-        handle_type handle;
-    };
-
-    Generator<cv::Mat> image_generator(const symphony_lake_dataset::SurveyVector &survey_vector) {
-        for (int i = 0; i < static_cast<int>(survey_vector.getNumSurveys()); i++) {
-            const symphony_lake_dataset::Survey &survey = survey_vector.get(i);
-            for (int j = 0; j < static_cast<int>(survey.getNumImages()); j++) {
-                co_yield survey.loadImageByImageIndex(j);
-            }
-        }
-    }
-
     static const Eigen::Vector3d t_boat_cam;
-    static const Eigen::Isometry3d T_boat_gps;    
+    static const Eigen::Isometry3d T_boat_gps;
     static const Eigen::Isometry3d T_boat_imu;
 
     DataParser(const std::filesystem::path &image_root_dir,
@@ -78,19 +30,17 @@ class DataParser {
 
     // Eigen::Affine3d get_T_world_camera(size_t survey_idx, size_t image_idx, bool use_gps = false,
     //                                    bool use_compass = false);
-    static const Eigen::Isometry3d get_T_boat_camera(const symphony_lake_dataset::ImagePoint &img_pt);
+    static const Eigen::Isometry3d get_T_boat_camera(
+        const symphony_lake_dataset::ImagePoint &img_pt);
     static const Eigen::Isometry3d get_T_boat_camera(double theta_pan, double theta_tilt);
-    /// @brief get_R_world_boat assuming z_axis_boat dot z_axis_world ~ -1
-    /// @param theta_compass in radians
-    /// @return R_world_boat
-    static const Eigen::Matrix3d get_R_world_boat(double theta_compass);
+    static const Eigen::Isometry3d get_T_world_gps(const symphony_lake_dataset::ImagePoint &img_pt);
     /// @brief get_T_world_boat assuming z_axis_boat dot z_axis_world ~ -1
-    /// @param img_pt 
+    /// @param img_pt
     /// @return T_world_boat
-    static const Eigen::Isometry3d get_T_world_boat(const symphony_lake_dataset::ImagePoint &img_pt);
+    static const Eigen::Isometry3d get_T_world_boat(
+        const symphony_lake_dataset::ImagePoint &img_pt);
 
     const symphony_lake_dataset::SurveyVector &get_surveys() const { return surveys_; };
-    Generator<cv::Mat> create_img_generator() { return image_generator(surveys_); };
 
    private:
     std::filesystem::path image_root_dir_;
diff --git a/experimental/learn_descriptors/symphony_lake_parser_test.cc b/experimental/learn_descriptors/symphony_lake_parser_test.cc
index c7c2329ef..05093dedb 100644
--- a/experimental/learn_descriptors/symphony_lake_parser_test.cc
+++ b/experimental/learn_descriptors/symphony_lake_parser_test.cc
@@ -1,5 +1,4 @@
 #include "experimental/learn_descriptors/symphony_lake_parser.hh"
-#include "common/geometry/opencv_viz.hh"
 
 #include <cstdlib>
 #include <iostream>
@@ -8,6 +7,7 @@
 #include <vector>
 
 #include "common/check.hh"
+#include "common/geometry/opencv_viz.hh"
 #include "gtest/gtest.h"
 #include "opencv2/opencv.hpp"
 
@@ -56,37 +56,67 @@ TEST(SymphonyLakeParserTest, snippet_140106) {
         }
     }
 }
-// TEST(SymphonyLakeParserTest, snippet_140106_generator_test) {
-//     DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
-//     DataParser::Generator<cv::Mat> generator =
-//         (SymphonyLakeDatasetTestHelper::get_test_parser()).create_img_generator();
-//     DataParser::Generator<cv::Mat>::iterator img_iter = generator.begin();
-//     while (img_iter != generator.end()) {
-//         cv::Mat img = *img_iter;
-//         // if (!is_test()) {
-//         //     cv::imshow("Symphony Dataset Image", img);
-//         //     cv::waitKey(200);
-//         // }
-//         // cv::imshow("Symphony Dataset Image", img);
-//         // cv::waitKey(200);
-//         ++img_iter;
-//     }
-// }
-
-TEST(SymphonyLakeParserTest, test_frames) {
-    const std::vector<int> indices {120, 190}; // 0-199
+
+TEST(SymphonyLakeParserTest, test_cam_frames) {
+    const std::vector<int> indices = []() {
+        std::vector<int> tmp;
+        for (int i = 0; i < 200; i += 10) {
+            tmp.push_back(i);
+        }
+        return tmp;
+    }();
     DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
     const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
     const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
-    const symphony_lake_dataset::ImagePoint image_point_first = survey.getImagePoint(indices.front());
+    const symphony_lake_dataset::ImagePoint image_point_first =
+        survey.getImagePoint(indices.front());
+
+    std::vector<Eigen::Isometry3d> cam_frames;
 
-    std::vector<Eigen::Isometry3d> poses_viz;
-    poses_viz.push_back(DataParser::get_T_boat_camera(image_point_first));
-    poses_viz.push_back(DataParser::T_boat_gps);
-    poses_viz.push_back(DataParser::T_boat_imu);    
+    // NOTE: the world in these images is east, north, up centered at boat0 translation
+    Eigen::Vector3d t_world_boat0 = DataParser::get_T_world_boat(image_point_first).translation();
 
-    std::cout << "pan: " << image_point_first.pan << "\ntilt: " << image_point_first.tilt << std::endl;
+    for (size_t i = 0; i < indices.size(); i++) {
+        const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(indices[i]);
+        Eigen::Isometry3d T_world_boatidx = DataParser::get_T_world_boat(img_pt);
+        Eigen::Isometry3d T_boatidx_camidx =
+            DataParser::get_T_boat_camera(img_pt);  // current boat to current camera
+
+        Eigen::Isometry3d T_world_camidx = T_world_boatidx * T_boatidx_camidx;
+        T_world_camidx.translation() -= t_world_boat0;
+
+        cam_frames.push_back(T_world_camidx);
+    }
+
+    geometry::viz_scene(cam_frames, std::vector<Eigen::Vector3d>(), true, true, "test_cam_frames");
+}
+
+TEST(SymphonyLakeParserTest, test_gps_frames) {
+    const std::vector<int> indices = []() {
+        std::vector<int> tmp;
+        for (int i = 0; i < 200; i += 10) {
+            tmp.push_back(i);
+        }
+        return tmp;
+    }();
+    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
+    const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
+    const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
+    const symphony_lake_dataset::ImagePoint image_point_first =
+        survey.getImagePoint(indices.front());
+
+    std::vector<Eigen::Isometry3d> gps_frames;
+
+    // NOTE: the world in these images is east, north, up centered at boat0 translation
+    Eigen::Vector3d t_world_gps0(image_point_first.x, image_point_first.y, 0);
+
+    for (size_t i = 0; i < indices.size(); i++) {
+        const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(indices[i]);
+        Eigen::Isometry3d T_world_gpsidx = DataParser::get_T_world_gps(img_pt);
+        T_world_gpsidx.translation() -= t_world_gps0;
+        gps_frames.push_back(T_world_gpsidx);
+    }
 
-    geometry::viz_scene(poses_viz, std::vector<Eigen::Vector3d>(), true, true);
+    geometry::viz_scene(gps_frames, std::vector<Eigen::Vector3d>(), true, true, "test_gps_frames");
 }
 }  // namespace robot::experimental::learn_descriptors

From f65345f2e39e0184e48b1a7834360745497b44ed Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Fri, 4 Apr 2025 13:11:14 -0400
Subject: [PATCH 14/45] made world fram for
 SymphonyLakeParserTest.test_gps_frames the North, East, Up frame with
 translation at boat0

---
 experimental/learn_descriptors/symphony_lake_parser_test.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/experimental/learn_descriptors/symphony_lake_parser_test.cc b/experimental/learn_descriptors/symphony_lake_parser_test.cc
index 05093dedb..2a800db9b 100644
--- a/experimental/learn_descriptors/symphony_lake_parser_test.cc
+++ b/experimental/learn_descriptors/symphony_lake_parser_test.cc
@@ -108,12 +108,13 @@ TEST(SymphonyLakeParserTest, test_gps_frames) {
     std::vector<Eigen::Isometry3d> gps_frames;
 
     // NOTE: the world in these images is east, north, up centered at boat0 translation
-    Eigen::Vector3d t_world_gps0(image_point_first.x, image_point_first.y, 0);
+    Eigen::Vector3d t_world_boat0 = DataParser::get_T_world_boat(image_point_first).translation();
+    // Eigen::Vector3d t_world_gps0(image_point_first.x, image_point_first.y, 0);
 
     for (size_t i = 0; i < indices.size(); i++) {
         const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(indices[i]);
         Eigen::Isometry3d T_world_gpsidx = DataParser::get_T_world_gps(img_pt);
-        T_world_gpsidx.translation() -= t_world_gps0;
+        T_world_gpsidx.translation() -= t_world_boat0;
         gps_frames.push_back(T_world_gpsidx);
     }
 

From 3ef706c77b66df983b7fa92ed5ceb6c2f672ec9e Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Tue, 8 Apr 2025 10:59:30 -0400
Subject: [PATCH 15/45] created a spatial scene helper to create test scenes. 
 did first pass at refactoring feature handling in SFM, SFM is currently
 broken.

---
 experimental/learn_descriptors/BUILD          |  60 +++++++-
 .../learn_descriptors/feature_manager.hh      |  30 ++++
 .../learn_descriptors/feature_manager_test.cc |  65 +++++++++
 experimental/learn_descriptors/feature_set.hh |  68 +++++++++
 experimental/learn_descriptors/gtsam_test.cc  |   4 +
 .../learn_descriptors/spatial_scene_test.hh   |  53 +++++++
 .../spatial_scene_test_cube.hh                |  21 +++
 .../structure_from_motion.cc                  | 136 +++++++++---------
 .../structure_from_motion.hh                  |  48 ++++---
 9 files changed, 395 insertions(+), 90 deletions(-)
 create mode 100644 experimental/learn_descriptors/feature_manager.hh
 create mode 100644 experimental/learn_descriptors/feature_manager_test.cc
 create mode 100644 experimental/learn_descriptors/feature_set.hh
 create mode 100644 experimental/learn_descriptors/spatial_scene_test.hh
 create mode 100644 experimental/learn_descriptors/spatial_scene_test_cube.hh

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index dab2c184e..87936dbdb 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -30,7 +30,8 @@ cc_library(
     "@eigen",
     "//common/geometry:camera",
     "//common/geometry:translate_types",
-    "//common/geometry:opencv_viz"
+    "//common/geometry:opencv_viz",
+    ":feature_manager"
   ]
 )
 
@@ -54,7 +55,6 @@ cc_library(
   hdrs = ["symphony_lake_parser.hh"],
   copts = [
     "-Wno-unused-parameter",
-    "-std=c++20",
   ],
   data = ["@symphony_lake_snippet"],
   visibility = ["//visibility:public"],
@@ -95,3 +95,59 @@ cc_binary(
     "@opencv//:opencv"
   ]
 )
+
+cc_library(
+  name = "feature_set",
+  hdrs = ["feature_set.hh"],
+  visibility = ["//visibility:public"],
+  deps = [        
+    "@gtsam//:gtsam",
+    "@opencv//:opencv"
+  ]
+)
+
+cc_library(
+  name = "feature_manager",
+  hdrs = ["feature_manager.hh"],
+  visibility = ["//visibility:public"],
+  deps = [        
+    "@gtsam//:gtsam",
+    "@opencv//:opencv",
+    ":feature_set",
+  ]
+)
+
+cc_test(
+  name = "feature_manager_test",
+  srcs = ["feature_manager_test.cc"],
+  deps = [
+    ":feature_manager",
+    "@com_google_googletest//:gtest_main",
+    "@gtsam//:gtsam",
+    "@opencv//:opencv",
+    "@eigen",
+    ":spatial_scene_test_cube"
+  ]
+)
+
+
+cc_library(
+  name = "spatial_scene_test",
+  hdrs = ["spatial_scene_test.hh"],
+  visibility = ["//visibility:public"],
+  deps = [        
+    "@gtsam//:gtsam",
+    "@opencv//:opencv",    
+    "@eigen"
+  ]
+)
+
+cc_library(
+  name = "spatial_scene_test_cube",
+  hdrs = ["spatial_scene_test_cube.hh"],
+  visibility = ["//visibility:public"],
+  deps = [        
+    "@eigen",
+    ":spatial_scene_test"
+  ]
+)
\ No newline at end of file
diff --git a/experimental/learn_descriptors/feature_manager.hh b/experimental/learn_descriptors/feature_manager.hh
new file mode 100644
index 000000000..0c66d514d
--- /dev/null
+++ b/experimental/learn_descriptors/feature_manager.hh
@@ -0,0 +1,30 @@
+#pragma once
+#include <optional>
+#include <sstream>
+#include <stdexcept>
+#include <vector>
+
+#include "experimental/learn_descriptors/feature_set.hh"
+#include "gtsam/inference/Symbol.h"
+#include "opencv2/opencv.hpp"
+
+namespace robot::experimental::learn_descriptors {
+class FeatureManager {
+   public:
+    void append_img_data(const std::vector<cv::KeyPoint> &kpts, const cv::Mat &descriptors) {
+        feature_sets_.push_back(FeatureSet(kpts, descriptors));
+    };
+    void insert_symbol(const size_t idx_img, const cv::KeyPoint &kpt, const gtsam::Symbol &symbol) {
+        feature_sets_[idx_img].insert_symbol(kpt, symbol);
+    };
+
+    std::optional<gtsam::Symbol> get_symbol(const size_t idx_img, const cv::KeyPoint &kpt) {
+        return feature_sets_[idx_img].get_symbol(kpt);
+    };
+
+    size_t get_num_images_added() { return feature_sets_.size(); };
+
+   private:
+    std::vector<FeatureSet> feature_sets_;
+};
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/feature_manager_test.cc b/experimental/learn_descriptors/feature_manager_test.cc
new file mode 100644
index 000000000..73f198011
--- /dev/null
+++ b/experimental/learn_descriptors/feature_manager_test.cc
@@ -0,0 +1,65 @@
+#include "experimental/learn_descriptors/feature_manager.hh"
+
+#include "Eigen/Dense"
+#include "experimental/learn_descriptors/spatial_scene_test_cube.hh"
+#include "gtest/gtest.h"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/PinholeCamera.h"
+#include "gtsam/geometry/Point3.h"
+#include "gtsam/geometry/Pose3.h"
+#include "gtsam/geometry/Rot3.h"
+#include "opencv2/opencv.hpp"
+
+namespace robot::experimental::learn_descriptors {
+TEST(feature_manager_test, test) {
+    FeatureManager feature_manager;
+    SpatialSceneTestCube test_cube(1.f);
+
+    const size_t img_width = 640;
+    const size_t img_height = 480;
+    const double fx = 500.0;
+    const double fy = fx;
+    const double cx = img_width / 2.0;
+    const double cy = img_height / 2.0;
+
+    gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy));
+
+    Eigen::Matrix3d rotation0(
+        Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+        Eigen::AngleAxis(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
+    gtsam::Pose3 pose0(gtsam::Rot3(rotation0), gtsam::Point3(4, 0, 0));
+    gtsam::PinholeCamera<gtsam::Cal3_S2> camera0(pose0, *K);
+
+    gtsam::Pose3 pose1(
+        gtsam::Rot3(Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+                    rotation0),
+        gtsam::Point3(0, 4, 0));
+    gtsam::PinholeCamera<gtsam::Cal3_S2> camera1(pose1, *K);
+
+    gtsam::Pose3 pose2(
+        gtsam::Rot3(Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+                    rotation0),
+        gtsam::Point3(0, 4, 0));
+    gtsam::PinholeCamera<gtsam::Cal3_S2> camera2(pose1, *K);
+
+    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras{camera0, camera1};
+
+    std::vector<Eigen::Vector2d> projected_pixels =
+        test_cube.get_projected_pixels(camera0, cv::Size(img_width, img_height));
+    cv::Mat image(480, 640, CV_8UC3, cv::Scalar(255, 255, 255));
+    for (const Eigen::Vector2d &pt : projected_pixels) {
+        cv::circle(image, cv::Point2i(static_cast<int>(pt[0]), static_cast<int>(pt[1])), 4,
+                   cv::Scalar(0, 0, 0));
+    }
+    cv::imshow("projected cubes", image);
+    cv::waitKey(0);
+
+    std::vector<std::pair<gtsam::PinholeCamera<gtsam::Cal3_S2>, cv::Size>> cams{
+        std::pair<gtsam::PinholeCamera<gtsam::Cal3_S2>, cv::Size>(camera0,
+                                                                  cv::Size(img_width, img_height)),
+        std::pair<gtsam::PinholeCamera<gtsam::Cal3_S2>, cv::Size>(camera1,
+                                                                  cv::Size(img_width, img_height))};
+    std::vector<std::pair<Eigen::Vector2d, Eigen::Vector2d>> correspondences =
+        test_cube.get_corresponding_pixels(cams);
+}
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/feature_set.hh b/experimental/learn_descriptors/feature_set.hh
new file mode 100644
index 000000000..65099cffe
--- /dev/null
+++ b/experimental/learn_descriptors/feature_set.hh
@@ -0,0 +1,68 @@
+#pragma once
+#include <optional>
+#include <sstream>
+#include <stdexcept>
+#include <unordered_map>
+
+#include "gtsam/geometry/Pose3.h"
+#include "gtsam/inference/Symbol.h"
+#include "opencv2/opencv.hpp"
+
+namespace std {
+template <>
+struct hash<cv::KeyPoint> {
+    size_t operator()(const cv::KeyPoint &kp) const {
+        size_t h1 = hash<float>()(kp.pt.x);
+        size_t h2 = hash<float>()(kp.pt.y);
+        size_t h3 = hash<float>()(kp.size);
+        size_t h4 = hash<float>()(kp.angle);
+        size_t h5 = hash<float>()(kp.response);
+        size_t h6 = hash<int>()(kp.octave);
+        size_t h7 = hash<int>()(kp.class_id);
+
+        return (h1 ^ (h2 << 1)) ^ (h3 << 2) ^ (h4 << 3) ^ (h5 << 4) ^ (h6 << 5) ^ (h7 << 6);
+    }
+};
+}  // namespace std
+namespace cv {
+inline bool operator==(const cv::KeyPoint &kp1, const cv::KeyPoint &kp2) {
+    return kp1.pt == kp2.pt && kp1.size == kp2.size && kp1.angle == kp2.angle &&
+           kp1.response == kp2.response && kp1.octave == kp2.octave && kp1.class_id == kp2.class_id;
+}
+}  // namespace cv
+namespace robot::experimental::learn_descriptors {
+class FeatureSet {
+   public:
+    struct keypoints_descriptors {
+        keypoints_descriptors(const std::vector<cv::KeyPoint> &keypoints,
+                              const cv::Mat &descriptors)
+            : kpts(keypoints), descriptors(descriptors) {}
+        std::vector<cv::KeyPoint> kpts;
+        cv::Mat descriptors;
+    };
+
+    FeatureSet(const std::vector<cv::KeyPoint> &kpts, const cv::Mat &descriptors)
+        : kpts_descriptors_(keypoints_descriptors(kpts, descriptors)){};
+    ~FeatureSet() = default;
+
+    void insert_symbol(const cv::KeyPoint &kpt, const gtsam::Symbol &symbol) {
+        if (kpt_to_symbol_.find(kpt) != kpt_to_symbol_.end()) {
+            std::stringstream error_msg;
+            error_msg << "Keypoint already has symbol! Keypoint has symbol: "
+                      << kpt_to_symbol_.at(kpt);
+            throw std::runtime_error(error_msg.str());
+        }
+        kpt_to_symbol_.emplace(kpt, symbol);
+    };
+    std::optional<gtsam::Symbol> get_symbol(const cv::KeyPoint &kpt) {
+        if (kpt_to_symbol_.find(kpt) != kpt_to_symbol_.end()) {
+            return kpt_to_symbol_.at(kpt);
+        }
+        return std::nullopt;
+    };
+
+   private:
+    keypoints_descriptors kpts_descriptors_;
+    std::unordered_map<cv::KeyPoint, gtsam::Symbol> kpt_to_symbol_;
+};
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/gtsam_test.cc b/experimental/learn_descriptors/gtsam_test.cc
index 96b643d03..019d0c7c4 100644
--- a/experimental/learn_descriptors/gtsam_test.cc
+++ b/experimental/learn_descriptors/gtsam_test.cc
@@ -1,10 +1,14 @@
+
 #include <iostream>
 #include <vector>
 
 #include "Eigen/Core"
 #include "gtest/gtest.h"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/PinholeCamera.h"
 #include "gtsam/geometry/Point3.h"
 #include "gtsam/geometry/Pose3.h"
+#include "gtsam/geometry/Rot3.h"
 #include "gtsam/inference/Symbol.h"
 #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
 #include "gtsam/nonlinear/Values.h"
diff --git a/experimental/learn_descriptors/spatial_scene_test.hh b/experimental/learn_descriptors/spatial_scene_test.hh
new file mode 100644
index 000000000..60daafb28
--- /dev/null
+++ b/experimental/learn_descriptors/spatial_scene_test.hh
@@ -0,0 +1,53 @@
+#pragma once
+#include <utility>
+#include <vector>
+
+#include "Eigen/Core"
+#include "gtsam/geometry/PinholeCamera.h"
+#include "opencv2/opencv.hpp"
+
+class ProjectionHelper {
+   public:
+    static bool pixel_in_range(Eigen::Vector2d pixel, size_t img_width, size_t img_height) {
+        return pixel[0] >= 0 && pixel[0] < img_width && pixel[1] >= 0 && pixel[1] < img_height;
+    }
+};
+namespace robot::experimental::learn_descriptors {
+class SpatialSceneTest {
+   public:
+    SpatialSceneTest() = default;
+    SpatialSceneTest(std::vector<Eigen::Vector3d> points) : points_(points){};
+    const std::vector<Eigen::Vector3d> &get_points() { return points_; };
+    template <typename T>
+    std::vector<Eigen::Vector2d> get_projected_pixels(const gtsam::PinholeCamera<T> &camera,
+                                                      const cv::Size &img_size) {
+        std::vector<Eigen::Vector2d> pixels;
+        for (const Eigen::Vector3d &pt : points_) {
+            Eigen::Vector2d pixel = camera.project(pt);
+            if (ProjectionHelper::pixel_in_range(pixel, img_size.width, img_size.height)) {
+                pixels.push_back(pixel);
+            }
+        }
+        return pixels;
+    };
+    template <typename T>
+    const std::vector<std::pair<Eigen::Vector2d, Eigen::Vector2d>> get_corresponding_pixels(
+        const std::vector<std::pair<gtsam::PinholeCamera<T>, cv::Size>> &cam_and_img_size) {
+        std::vector<std::pair<Eigen::Vector2d, Eigen::Vector2d>> corresponding_pixels;
+        for (const Eigen::Vector3d &pt : points_) {
+            Eigen::Vector2d pixel_cam0 = cam_and_img_size[0].first.project(pt);
+            Eigen::Vector2d pixel_cam1 = cam_and_img_size[1].first.project(pt);
+            if (ProjectionHelper::pixel_in_range(pixel_cam0, cam_and_img_size[0].second.width,
+                                                 cam_and_img_size[0].second.height) &&
+                ProjectionHelper::pixel_in_range(pixel_cam1, cam_and_img_size[1].second.width,
+                                                 cam_and_img_size[1].second.height)) {
+                corresponding_pixels.emplace_back(pixel_cam0, pixel_cam1);
+            }
+        }
+        return corresponding_pixels;
+    };
+
+   protected:
+    std::vector<Eigen::Vector3d> points_;
+};
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/spatial_scene_test_cube.hh b/experimental/learn_descriptors/spatial_scene_test_cube.hh
new file mode 100644
index 000000000..b8ff6aea6
--- /dev/null
+++ b/experimental/learn_descriptors/spatial_scene_test_cube.hh
@@ -0,0 +1,21 @@
+#pragma once
+#include <iostream>
+
+#include "Eigen/Core"
+#include "experimental/learn_descriptors/spatial_scene_test.hh"
+
+namespace robot::experimental::learn_descriptors {
+class SpatialSceneTestCube : public SpatialSceneTest {
+   public:
+    SpatialSceneTestCube(const float cube_size) : SpatialSceneTest() {
+        points_.push_back(Eigen::Vector3d(0, 0, 0));
+        points_.push_back(Eigen::Vector3d(cube_size, 0, 0));
+        points_.push_back(Eigen::Vector3d(cube_size, cube_size, 0));
+        points_.push_back(Eigen::Vector3d(0, cube_size, 0));
+        points_.push_back(Eigen::Vector3d(0, 0, cube_size));
+        points_.push_back(Eigen::Vector3d(cube_size, 0, cube_size));
+        points_.push_back(Eigen::Vector3d(cube_size, cube_size, cube_size));
+        points_.push_back(Eigen::Vector3d(0, cube_size, cube_size));
+    };
+};
+}  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index d9b145a18..cad7869d5 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -7,6 +7,7 @@
 #include "common/geometry/camera.hh"
 #include "common/geometry/opencv_viz.hh"
 #include "common/geometry/translate_types.hh"
+#include "gtsam/geometry/Point2.h"
 #include "gtsam/geometry/triangulation.h"
 #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
 #include "gtsam/slam/BetweenFactor.h"
@@ -33,20 +34,8 @@ std::string pose_to_string(gtsam::Pose3 pose) {
     return ss.str();
 };
 
-// const gtsam::Pose3 StructureFromMotion::default_initial_pose = []() {
-//     Eigen::Isometry3d
-// }();
 const gtsam::Pose3 StructureFromMotion::default_initial_pose(
     StructureFromMotion::T_symlake_boat_cam.matrix());
-// const gtsam::Pose3 StructureFromMotion::default_initial_pose =
-//     gtsam::Pose3(gtsam::Rot3(T_symlake_boat_cam.rotation()), T_symlake_boat_cam.translation());
-// gtsam::Pose3(
-//     gtsam::Rot3(Eigen::Matrix3d(
-//         Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix() *
-//         Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 1, 0)).toRotationMatrix()
-//     )),
-//     gtsam::Point3::Identity()
-// );
 
 StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor,
                                          gtsam::Cal3_S2 K, Eigen::Matrix<double, 5, 1> D,
@@ -72,77 +61,92 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
     cv::undistort(img, img_undistorted, K_, D_);
     std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_and_descriptors =
         frontend_.get_keypoints_and_descriptors(img);
-    keypoint_to_landmarks_.push_back(std::unordered_map<cv::KeyPoint, Backend::Landmark>());
-    const size_t idx_img_current = get_num_images_added();
+    feature_manager_.append_img_data(keypoints_and_descriptors.first,
+                                     keypoints_and_descriptors.second);
+    // keypoint_to_landmarks_.push_back(std::unordered_map<cv::KeyPoint, Backend::Landmark>());
+    // const size_t idx_img_current = get_num_images_added();
+    const size_t idx_img_current = feature_manager_.get_num_images_added() - 1;
+    std::cout << "current index " << idx_img_current << std::endl;
     if (idx_img_current > 0) {
         std::vector<cv::DMatch> matches =
             get_matches(img_keypoints_and_descriptors_.back().second,
                         keypoints_and_descriptors.second, Frontend::enforce_bijective_matches);
-        // std::vector<cv::DMatch> matches = frontend_.get_matches(
-        //     img_keypoints_and_descriptors_.back().second, keypoints_and_descriptors.second);
-        // Frontend::enforce_bijective_matches(matches);
-        // matches_.push_back(matches);
-        // gtsam::Rot3 R_c0_c1(
-        //     geom::estimate_c0_c1(
-        //         img_keypoints_and_descriptors_.back().first, keypoints_and_descriptors.first,
-        //         matches,
-        //         geom::eigen_mat_to_cv(geom::get_intrinsic_matrix(get_backend().get_K()))).linear());
 
         const gtsam::Symbol sym_T_w_c0(Backend::pose_symbol_char, idx_img_current - 1);
         const gtsam::Symbol sym_T_w_c1(Backend::pose_symbol_char, idx_img_current);
-        // backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char,
-        // get_num_images_added()), gps_prior, backend_.get_translation_noise()); gtsam::Pose3
-        // T_w_c0 = backend_.get_current_initial_values().at<gtsam::Pose3>(sym_T_w_c0); gtsam::Pose3
-        // T_c0_c1(
-        //     R_c0_c1,
-        //     t_world_gps - T_w_c0.translation());
-        // backend_.add_between_factor(
-        //     sym_T_w_c0,
-        //     sym_T_w_c1,
-        //     T_c0_c1,
-        //     backend_.get_pose_noise());
+
         backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise(),
                                 T_world_cam.rotation());
-        // backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise());
 
         for (const cv::DMatch match : matches) {
             const cv::KeyPoint kpt_cam0 =
                 img_keypoints_and_descriptors_.back().first[match.queryIdx];
             const cv::KeyPoint kpt_cam1 = keypoints_and_descriptors.first[match.trainIdx];
 
-            if (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) ==
-                keypoint_to_landmarks_[idx_img_current - 1].end()) {
-                const Backend::Landmark landmark_cam_0(
-                    gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), sym_T_w_c0,
-                    gtsam::Point2(
-                        static_cast<double>(
-                            img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x),
-                        static_cast<double>(
-                            img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)),
-                    backend_.get_K(), 3.0);
-                keypoint_to_landmarks_[idx_img_current - 1].emplace(kpt_cam0, landmark_cam_0);
+            size_t key;
+            unsigned char chr;
+
+            auto maybe_symbol0 = feature_manager_.get_symbol(idx_img_current - 1, kpt_cam0);
+            if (maybe_symbol0) {
+                key = (*maybe_symbol0).key();
+                chr = (*maybe_symbol0).chr();
+            } else {
+                gtsam::Symbol symbol_temp =
+                    gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_);
+                key = symbol_temp.key();
+                chr = symbol_temp.chr();
                 landmark_count_++;
+                feature_manager_.insert_symbol(idx_img_current - 1, kpt_cam0, symbol_temp);
             }
-            // std::cout << "kpt_cam0 in keypoint_to_landmarks_: " <<
-            //     (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) ==
-            //     keypoint_to_landmarks_[idx_img_current - 1].end())
-            //     << std::endl;
-            const Backend::Landmark landmark_cam_0 =
-                keypoint_to_landmarks_[idx_img_current - 1].at(kpt_cam0);
-
-            // technically don't need this conditional when enforcing bijectivity
-            if (keypoint_to_landmarks_[idx_img_current].find(kpt_cam1) ==
-                keypoint_to_landmarks_[idx_img_current].end()) {
-                const Backend::Landmark landmark_cam_1(
-                    landmark_cam_0.lmk_factor_symbol, sym_T_w_c1,
-                    gtsam::Point2(
-                        static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.x),
-                        static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.y)),
-                    backend_.get_K(), 3.0);
-                keypoint_to_landmarks_[idx_img_current].emplace(kpt_cam1, landmark_cam_1);
-            }
-            const Backend::Landmark landmark_cam_1 =
-                keypoint_to_landmarks_[idx_img_current].at(kpt_cam1);
+            gtsam::Symbol symbol = gtsam::Symbol(key, chr);
+            feature_manager_.insert_symbol(idx_img_current, kpt_cam1, symbol);
+
+            const Backend::Landmark landmark_cam_0(
+                symbol, sym_T_w_c0,
+                gtsam::Point2(
+                    static_cast<double>(
+                        img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x),
+                    static_cast<double>(
+                        img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)),
+                backend_.get_K(), 3.0);
+
+            const Backend::Landmark landmark_cam_1(
+                symbol, sym_T_w_c1,
+                gtsam::Point2(
+                    static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.x),
+                    static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.y)),
+                backend_.get_K(), 3.0);
+
+            // if (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) ==
+            //     keypoint_to_landmarks_[idx_img_current - 1].end()) {
+            //     const Backend::Landmark landmark_cam_0(
+            // gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), sym_T_w_c0,
+            // gtsam::Point2(
+            //     static_cast<double>(
+            //         img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x),
+            //     static_cast<double>(
+            //         img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)),
+            // backend_.get_K(), 3.0);
+            //     keypoint_to_landmarks_[idx_img_current - 1].emplace(kpt_cam0, landmark_cam_0);
+            //     landmark_count_++;
+            // }
+
+            // const Backend::Landmark landmark_cam_0 =
+            //     keypoint_to_landmarks_[idx_img_current - 1].at(kpt_cam0);
+
+            // // technically don't need this conditional when enforcing bijectivity
+            // if (keypoint_to_landmarks_[idx_img_current].find(kpt_cam1) ==
+            //     keypoint_to_landmarks_[idx_img_current].end()) {
+            //     const Backend::Landmark landmark_cam_1(
+            //         landmark_cam_0.lmk_factor_symbol, sym_T_w_c1,
+            //         gtsam::Point2(
+            //             static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.x),
+            //             static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.y)),
+            //         backend_.get_K(), 3.0);
+            //     keypoint_to_landmarks_[idx_img_current].emplace(kpt_cam1, landmark_cam_1);
+            // }
+            // const Backend::Landmark landmark_cam_1 =
+            //     keypoint_to_landmarks_[idx_img_current].at(kpt_cam1);
 
             backend_.add_landmark(landmark_cam_0);
             backend_.add_landmark(landmark_cam_1);
diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh
index 7cc454558..ada233108 100644
--- a/experimental/learn_descriptors/structure_from_motion.hh
+++ b/experimental/learn_descriptors/structure_from_motion.hh
@@ -8,6 +8,7 @@
 
 #include "Eigen/Core"
 #include "common/geometry/camera.hh"
+#include "experimental/learn_descriptors/feature_manager.hh"
 #include "gtsam/geometry/Point3.h"
 #include "gtsam/geometry/Pose3.h"
 #include "gtsam/inference/Symbol.h"
@@ -17,28 +18,29 @@
 #include "gtsam/nonlinear/Values.h"
 #include "opencv2/opencv.hpp"
 
-namespace std {
-template <>
-struct hash<cv::KeyPoint> {
-    size_t operator()(const cv::KeyPoint &kp) const {
-        size_t h1 = hash<float>()(kp.pt.x);
-        size_t h2 = hash<float>()(kp.pt.y);
-        size_t h3 = hash<float>()(kp.size);
-        size_t h4 = hash<float>()(kp.angle);
-        size_t h5 = hash<float>()(kp.response);
-        size_t h6 = hash<int>()(kp.octave);
-        size_t h7 = hash<int>()(kp.class_id);
-
-        return (h1 ^ (h2 << 1)) ^ (h3 << 2) ^ (h4 << 3) ^ (h5 << 4) ^ (h6 << 5) ^ (h7 << 6);
-    }
-};
-}  // namespace std
-namespace cv {
-inline bool operator==(const cv::KeyPoint &kp1, const cv::KeyPoint &kp2) {
-    return kp1.pt == kp2.pt && kp1.size == kp2.size && kp1.angle == kp2.angle &&
-           kp1.response == kp2.response && kp1.octave == kp2.octave && kp1.class_id == kp2.class_id;
-}
-}  // namespace cv
+// namespace std {
+// template <>
+// struct hash<cv::KeyPoint> {
+//     size_t operator()(const cv::KeyPoint &kp) const {
+//         size_t h1 = hash<float>()(kp.pt.x);
+//         size_t h2 = hash<float>()(kp.pt.y);
+//         size_t h3 = hash<float>()(kp.size);
+//         size_t h4 = hash<float>()(kp.angle);
+//         size_t h5 = hash<float>()(kp.response);
+//         size_t h6 = hash<int>()(kp.octave);
+//         size_t h7 = hash<int>()(kp.class_id);
+
+//         return (h1 ^ (h2 << 1)) ^ (h3 << 2) ^ (h4 << 3) ^ (h5 << 4) ^ (h6 << 5) ^ (h7 << 6);
+//     }
+// };
+// }  // namespace std
+// namespace cv {
+// inline bool operator==(const cv::KeyPoint &kp1, const cv::KeyPoint &kp2) {
+//     return kp1.pt == kp2.pt && kp1.size == kp2.size && kp1.angle == kp2.angle &&
+//            kp1.response == kp2.response && kp1.octave == kp2.octave && kp1.class_id ==
+//            kp2.class_id;
+// }
+// }  // namespace cv
 namespace robot::experimental::learn_descriptors {
 class Frontend {
    public:
@@ -208,6 +210,8 @@ class StructureFromMotion {
     const std::vector<std::vector<cv::DMatch>> get_matches() { return matches_; };
 
    private:
+    FeatureManager feature_manager_;
+
     gtsam::Pose3 initial_pose_;
     std::vector<std::pair<std::vector<cv::KeyPoint>, cv::Mat>> img_keypoints_and_descriptors_;
     std::vector<std::vector<cv::DMatch>> matches_;

From ef3ec99abb74a2a9675a3c91fb65a82d17f63a4a Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Fri, 11 Apr 2025 13:32:05 -0400
Subject: [PATCH 16/45] WIP. refactoring StructureFromMotion. Added some
 synthetic scenes for testing spatial pipeline components (gtsam stuff).
 Feature manager implementation and incorporation in StructureFromMotion still
 needs work.

---
 experimental/learn_descriptors/BUILD          |  84 +++-
 experimental/learn_descriptors/backend.cc     | 192 ++++++++
 experimental/learn_descriptors/backend.hh     | 114 +++++
 .../learn_descriptors/backend_test.cc         |  49 +++
 .../learn_descriptors/feature_manager.hh      |  19 +
 experimental/learn_descriptors/feature_set.hh |   1 +
 experimental/learn_descriptors/frontend.cc    | 155 +++++++
 experimental/learn_descriptors/frontend.hh    |  51 +++
 .../learn_descriptors/frontend_test.cc        | 115 +++++
 .../learn_descriptors/spatial_scene_test.hh   |  53 ---
 .../spatial_scene_test_cube.hh                |  21 -
 .../learn_descriptors/spatial_test_scene.cc   |  82 ++++
 .../learn_descriptors/spatial_test_scene.hh   |  50 +++
 .../spatial_test_scene_cube.hh                |  21 +
 .../spatial_test_scene_cube_test.cc           |  37 ++
 .../structure_from_motion.cc                  | 409 +-----------------
 .../structure_from_motion.hh                  | 170 +-------
 .../structure_from_motion_test.cc             | 165 +------
 18 files changed, 992 insertions(+), 796 deletions(-)
 create mode 100644 experimental/learn_descriptors/backend.cc
 create mode 100644 experimental/learn_descriptors/backend.hh
 create mode 100644 experimental/learn_descriptors/backend_test.cc
 create mode 100644 experimental/learn_descriptors/frontend.cc
 create mode 100644 experimental/learn_descriptors/frontend.hh
 create mode 100644 experimental/learn_descriptors/frontend_test.cc
 delete mode 100644 experimental/learn_descriptors/spatial_scene_test.hh
 delete mode 100644 experimental/learn_descriptors/spatial_scene_test_cube.hh
 create mode 100644 experimental/learn_descriptors/spatial_test_scene.cc
 create mode 100644 experimental/learn_descriptors/spatial_test_scene.hh
 create mode 100644 experimental/learn_descriptors/spatial_test_scene_cube.hh
 create mode 100644 experimental/learn_descriptors/spatial_test_scene_cube_test.cc

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index 87936dbdb..e6701405d 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -28,10 +28,10 @@ cc_library(
     "@opencv//:opencv",
     "@gtsam//:gtsam",
     "@eigen",
-    "//common/geometry:camera",
-    "//common/geometry:translate_types",
     "//common/geometry:opencv_viz",
-    ":feature_manager"
+    ":feature_manager",
+    ":frontend",
+    ":backend"
   ]
 )
 
@@ -126,15 +126,16 @@ cc_test(
     "@gtsam//:gtsam",
     "@opencv//:opencv",
     "@eigen",
-    ":spatial_scene_test_cube"
+    ":spatial_test_scene_cube"
   ]
 )
 
 
 cc_library(
-  name = "spatial_scene_test",
-  hdrs = ["spatial_scene_test.hh"],
+  name = "spatial_test_scene",
+  hdrs = ["spatial_test_scene.hh"],
   visibility = ["//visibility:public"],
+  srcs = ["spatial_test_scene.cc"],
   deps = [        
     "@gtsam//:gtsam",
     "@opencv//:opencv",    
@@ -143,11 +144,74 @@ cc_library(
 )
 
 cc_library(
-  name = "spatial_scene_test_cube",
-  hdrs = ["spatial_scene_test_cube.hh"],
+  name = "spatial_test_scene_cube",
+  hdrs = ["spatial_test_scene_cube.hh"],
   visibility = ["//visibility:public"],
   deps = [        
     "@eigen",
-    ":spatial_scene_test"
+    ":spatial_test_scene"
+  ]
+)
+
+cc_test(
+  name = "spatial_test_scene_cube_test",
+  srcs = ["spatial_test_scene_cube_test.cc"],
+  deps = [    
+    "@com_google_googletest//:gtest_main",    
+    ":spatial_test_scene_cube",
+    "//common/geometry:opencv_viz",
+    "@eigen"
   ]
-)
\ No newline at end of file
+)
+
+cc_library(
+  name = "frontend",
+  hdrs = ["frontend.hh"],
+  visibility = ["//visibility:public"],
+  srcs = ["frontend.cc"],
+  copts = [
+    "-Wno-error=unused-parameter",
+  ],
+  deps = [
+    "@opencv//:opencv",    
+  ]
+)
+
+
+cc_test(
+  name = "frontend_test",
+  srcs = ["frontend_test.cc"],
+  deps = [    
+    "@com_google_googletest//:gtest_main",    
+    ":frontend"
+  ]
+)
+
+cc_library(
+  name = "backend",
+  hdrs = ["backend.hh"],
+  visibility = ["//visibility:public"],
+  srcs = ["backend.cc"],
+  copts = [
+    "-Wno-error=unused-parameter",
+  ],
+  deps = [
+    "@opencv//:opencv",    
+    "@gtsam//:gtsam",
+    "//common/geometry:camera",
+    ":feature_manager"
+  ]
+)
+
+
+cc_test(
+  name = "backend_test",
+  srcs = ["backend_test.cc"],
+  deps = [    
+    "@com_google_googletest//:gtest_main",    
+    "@opencv//:opencv",
+    ":backend",
+    ":spatial_scene_test_cube",
+    ":feature_manager"
+  ]
+)
diff --git a/experimental/learn_descriptors/backend.cc b/experimental/learn_descriptors/backend.cc
new file mode 100644
index 000000000..bb601780c
--- /dev/null
+++ b/experimental/learn_descriptors/backend.cc
@@ -0,0 +1,192 @@
+#include "experimental/learn_descriptors/backend.hh"
+
+#include "gtsam/geometry/triangulation.h"
+#include "gtsam/navigation/GPSFactor.h"
+#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
+#include "gtsam/slam/BetweenFactor.h"
+#include "gtsam/slam/PriorFactor.h"
+#include "gtsam/slam/ProjectionFactor.h"
+
+namespace robot::experimental::learn_descriptors {
+
+Backend::Backend(std::shared_ptr<FeatureManager> feature_manager)
+    : feature_manager_(feature_manager) {
+    const size_t img_width = 640;
+    const size_t img_height = 480;
+    const double fx = 500.0;
+    const double fy = fx;
+    const double cx = img_width / 2.0;
+    const double cy = img_height / 2.0;
+
+    gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
+    K_ = boost::make_shared<gtsam::Cal3_S2>(K);
+    // initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K);
+}
+
+Backend::Backend(std::shared_ptr<FeatureManager> feature_manager, gtsam::Cal3_S2 K)
+    : feature_manager_(feature_manager) {
+    K_ = boost::make_shared<gtsam::Cal3_S2>(K);
+    // initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K);
+}
+
+template <>
+void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value,
+                               const gtsam::SharedNoiseModel &noise) {
+    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(symbol, value, noise);
+    initial_estimate_.insert_or_assign(symbol, value);
+    // std::cout << "adding a prior factor! with symbol: " << symbol << std::endl;
+    // initial_estimate_.print("values after adding prior: ");
+}
+
+template <>
+void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Point3 &value,
+                               const gtsam::SharedNoiseModel &noise) {
+    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Point3>>(symbol, value, noise);
+    gtsam::Rot3 R;
+    if (initial_estimate_.exists(symbol)) {
+        R = initial_estimate_.at<gtsam::Pose3>(symbol).rotation();
+    }
+    initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R, value));
+}
+
+template <>
+void Backend::add_between_factor<gtsam::Pose3>(const gtsam::Symbol &symbol_1,
+                                               const gtsam::Symbol &symbol_2,
+                                               const gtsam::Pose3 &value,
+                                               const gtsam::SharedNoiseModel &model) {
+    graph_.emplace_shared<gtsam::BetweenFactor<gtsam::Pose3>>(symbol_1, symbol_2, value, model);
+    // std::cout << "adding between factor. symbol_1: " << symbol_1 << ". symbol_2: " << symbol_2 <<
+    // std::endl; initial_estimate_.print("values when adding between factor: ");
+    initial_estimate_.insert_or_assign(symbol_2,
+                                       initial_estimate_.at<gtsam::Pose3>(symbol_1).compose(value));
+}
+
+template <>
+void Backend::add_between_factor<gtsam::Rot3>(const gtsam::Symbol &symbol_1,
+                                              const gtsam::Symbol &symbol_2,
+                                              const gtsam::Rot3 &value,
+                                              const gtsam::SharedNoiseModel &model) {
+    graph_.emplace_shared<gtsam::BetweenFactor<gtsam::Rot3>>(symbol_1, symbol_2, value, model);
+    initial_estimate_.insert_or_assign(symbol_2,
+                                       initial_estimate_.at<gtsam::Pose3>(symbol_1).compose(
+                                           gtsam::Pose3(value, gtsam::Point3::Zero())));
+}
+
+void Backend::add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &t_world_cam,
+                             const gtsam::SharedNoiseModel &model, const gtsam::Rot3 &R_world_cam) {
+    graph_.emplace_shared<gtsam::GPSFactor>(symbol, t_world_cam, model);
+    initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R_world_cam, t_world_cam));
+}
+
+std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> Backend::get_obs_for_lmk(
+    const gtsam::Symbol &lmk_symbol) {
+    std::vector<gtsam::Pose3> cam_poses;
+    std::vector<gtsam::Point2> observations;
+
+    // Iterate over all factors in the graph
+    for (const auto &factor : graph_) {
+        auto projFactor = boost::dynamic_pointer_cast<
+            gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(factor);
+
+        if (projFactor && projFactor->keys().at(1) == lmk_symbol) {
+            // Get the camera pose symbol
+            gtsam::Symbol cameraSymbol = projFactor->keys().at(0);
+
+            // Retrieve the camera pose from values
+            if (!initial_estimate_.exists(cameraSymbol)) continue;
+            gtsam::Pose3 cameraPose = initial_estimate_.at<gtsam::Pose3>(cameraSymbol);
+
+            // Get the 2D observation (keypoint measurement)
+            gtsam::Point2 observation = projFactor->measured();
+
+            // Store the pose and corresponding observation
+            observations.push_back(observation);
+            cam_poses.push_back(cameraPose);
+        }
+    }
+    return std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>>(cam_poses,
+                                                                            observations);
+}
+
+void Backend::add_landmarks(const std::vector<Landmark> &landmarks) {
+    for (const Landmark &landmark : landmarks) {
+        add_landmark(landmark);
+    }
+}
+
+void Backend::add_landmark(const Landmark &landmark) {
+    graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+        landmark.projection, landmark_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol,
+        K_);
+    if (!initial_estimate_.exists(landmark.cam_pose_symbol)) {
+        throw std::runtime_error(
+            "landmark.cam_pose_symbol must already exist in Backend.initial_estimate_ before "
+            "add_landmark is called.");
+    }
+    gtsam::Point3 p_world_lmk_estimate =
+        initial_estimate_.at<gtsam::Pose3>(landmark.cam_pose_symbol) * landmark.p_cam_lmk_guess;
+    initial_estimate_.insert_or_assign(landmark.lmk_factor_symbol, p_world_lmk_estimate);
+
+    std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> lmk_obs =
+        get_obs_for_lmk(landmark.lmk_factor_symbol);
+    if (lmk_obs.first.size() >= 2) {
+        try {
+            // Attempt triangulation using DLT (or the GTSAM provided method)
+            p_world_lmk_estimate = gtsam::triangulatePoint3(
+                lmk_obs.first, K_,
+                gtsam::Point2Vector(lmk_obs.second.begin(), lmk_obs.second.end()));
+
+            // Optional: perform an explicit cheirality check
+            bool valid = true;
+            for (const auto &pose : lmk_obs.first) {
+                // Transform point to the camera coordinate system.
+                // transformTo() converts a world point to the camera frame.
+                gtsam::Point3 p_cam = pose.transformTo(p_world_lmk_estimate);
+                if (p_cam.z() <= 0) {  // Check that the depth is positive
+                    valid = false;
+                    break;
+                }
+            }
+
+            if (valid) {
+                initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate);
+            } else {
+                std::cerr << "Triangulated landmark failed cheirality check; keeping initial guess."
+                          << std::endl;
+            }
+        } catch (const gtsam::TriangulationCheiralityException &e) {
+            // Handle the exception gracefully by logging and retaining the previous estimate.
+            std::cerr << "Triangulation Cheirality Exception: " << e.what()
+                      << ". Keeping initial landmark estimate." << std::endl;
+        }
+    }
+}
+
+void Backend::solve_graph() {
+    gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_);
+    result_ = optimizer.optimize();
+}
+
+void Backend::solve_graph(const int num_steps,
+                          std::optional<Backend::graph_step_debug_func> inter_debug_func) {
+    gtsam::LevenbergMarquardtParams params;
+    params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
+    params.maxIterations = 1;          // We'll manually step it
+    gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, params);
+
+    double prev_error = optimizer.error();
+    for (int i = 0; i < num_steps; i++) {
+        optimizer.iterate();
+        double curr_error = optimizer.error();
+
+        if (inter_debug_func) {
+            (*inter_debug_func)(optimizer.values(), i);
+        }
+        if (std::abs(prev_error - curr_error) < 1e-6) {
+            std::cout << "Converged at iteration " << i << "\n";
+            break;
+        }
+    }
+    result_ = optimizer.values();
+}
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/backend.hh b/experimental/learn_descriptors/backend.hh
new file mode 100644
index 000000000..fa988b905
--- /dev/null
+++ b/experimental/learn_descriptors/backend.hh
@@ -0,0 +1,114 @@
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <utility>
+#include <vector>
+
+#include "common/geometry/camera.hh"
+#include "experimental/learn_descriptors/feature_manager.hh"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/Point2.h"
+#include "gtsam/geometry/Point3.h"
+#include "gtsam/geometry/Pose3.h"
+#include "gtsam/geometry/Rot3.h"
+#include "gtsam/inference/Symbol.h"
+#include "gtsam/linear/NoiseModel.h"
+#include "gtsam/nonlinear/NonlinearFactorGraph.h"
+#include "gtsam/nonlinear/Values.h"
+
+namespace robot::experimental::learn_descriptors {
+class Backend {
+   public:
+    static constexpr char pose_symbol_char = 'x';
+    static constexpr char pose_rot_symbol_char = 'r';
+    static constexpr char pose_translation_symbol_char = 't';
+    static constexpr char pose_bearing_symbol_char = 'b';
+    static constexpr char landmark_symbol_char = 'l';
+    static constexpr char camera_symbol_char = 'k';
+
+    struct Landmark {
+        Landmark(const gtsam::Symbol &lmk_factor_symbol, const gtsam::Symbol &cam_pose_symbol,
+                 const gtsam::Point2 &projection, const gtsam::Cal3_S2 K,
+                 float initial_depth_guess = 5.0)
+            : lmk_factor_symbol(lmk_factor_symbol),
+              cam_pose_symbol(cam_pose_symbol),
+              projection(projection),
+              p_cam_lmk_guess(robot::geometry::deproject(robot::geometry::get_intrinsic_matrix(K),
+                                                         projection, initial_depth_guess)){};
+        const gtsam::Symbol lmk_factor_symbol;
+        const gtsam::Symbol cam_pose_symbol;
+        const gtsam::Point2 projection;
+        const gtsam::Point3 p_cam_lmk_guess;
+    };
+
+    Backend(std::shared_ptr<FeatureManager> feature_manager = nullptr);
+    Backend(std::shared_ptr<FeatureManager> feature_manager, gtsam::Cal3_S2 K);
+    ~Backend(){};
+
+    template <typename T>
+    void add_prior_factor(const gtsam::Symbol &symbol, const T &value,
+                          const gtsam::SharedNoiseModel &model);
+
+    template <typename T>
+    void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2,
+                            const T &value, const gtsam::SharedNoiseModel &model);
+
+    void add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &p_world_gps,
+                        const gtsam::SharedNoiseModel &model,
+                        const gtsam::Rot3 &R_world_cam = gtsam::Rot3::Identity());
+
+    std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> get_obs_for_lmk(
+        const gtsam::Symbol &lmk_symbol);
+    void add_landmarks(const std::vector<Landmark> &landmarks);
+    void add_landmark(const Landmark &landmark);
+
+    void solve_graph();
+    typedef int epoch;
+    using graph_step_debug_func = std::function<void(const gtsam::Values &, const epoch)>;
+    void solve_graph(const int num_steps,
+                     std::optional<graph_step_debug_func> inter_debug_func = std::nullopt);
+
+    const gtsam::Values &get_current_initial_values() const { return initial_estimate_; };
+    const gtsam::Values &get_result() const { return result_; };
+    const gtsam::Cal3_S2 &get_K() const { return *K_; };
+
+    const gtsam::SharedNoiseModel get_lmk_noise() { return landmark_noise_; };
+    const gtsam::SharedNoiseModel get_pose_noise() { return pose_noise_; };
+    const gtsam::SharedNoiseModel get_translation_noise() { return translation_noise_; };
+    const gtsam::SharedNoiseModel get_gps_noise() { return gps_noise_; };
+
+   private:
+    std::shared_ptr<FeatureManager> feature_manager_;
+    gtsam::Cal3_S2::shared_ptr K_;
+
+    gtsam::Values initial_estimate_;
+    gtsam::Values result_;
+    gtsam::NonlinearFactorGraph graph_;
+
+    gtsam::noiseModel::Isotropic::shared_ptr landmark_noise_ =
+        gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
+    gtsam::noiseModel::Diagonal::shared_ptr pose_noise_ =
+        gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01));
+    gtsam::noiseModel::Isotropic::shared_ptr translation_noise_ =
+        gtsam::noiseModel::Isotropic::Sigma(2, 0.1);
+    gtsam::noiseModel::Isotropic::shared_ptr gps_noise_ =
+        gtsam::noiseModel::Isotropic::Sigma(3, 2.);
+};
+
+template <>
+void Backend::add_prior_factor<gtsam::Pose3>(const gtsam::Symbol &, const gtsam::Pose3 &,
+                                             const gtsam::SharedNoiseModel &);
+
+template <>
+void Backend::add_prior_factor<gtsam::Point3>(const gtsam::Symbol &, const gtsam::Point3 &,
+                                              const gtsam::SharedNoiseModel &);
+
+template <>
+void Backend::add_between_factor<gtsam::Pose3>(const gtsam::Symbol &, const gtsam::Symbol &,
+                                               const gtsam::Pose3 &,
+                                               const gtsam::SharedNoiseModel &);
+template <>
+void Backend::add_between_factor<gtsam::Rot3>(const gtsam::Symbol &, const gtsam::Symbol &,
+                                              const gtsam::Rot3 &, const gtsam::SharedNoiseModel &);
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/backend_test.cc b/experimental/learn_descriptors/backend_test.cc
new file mode 100644
index 000000000..94d7b5067
--- /dev/null
+++ b/experimental/learn_descriptors/backend_test.cc
@@ -0,0 +1,49 @@
+#include "experimental/learn_descriptors/backend.hh"
+
+#include "experimental/learn_descriptors/feature_manager.hh"
+#include "gtest/gtest.h"
+#include "spatial_scene_test_cube.hh"
+
+namespace robot::experimental::learn_descriptors {
+TEST(BackendTest, cube) {
+    SpatialTestSceneCube test_scene(1.0f);
+
+    const size_t img_width = 640;
+    const size_t img_height = 480;
+    const cv::Size img_size(640, 480);
+    const double fx = 500.0;
+    const double fy = fx;
+    const double cx = img_width / 2.0;
+    const double cy = img_height / 2.0;
+    gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy));
+
+    Backend backend(std::make_shared<FeatureManager>(), *K);
+    test_scene.add_rand_cameras_face_origin(7, 2.0, 6.0, *K);
+
+    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras = test_scene.get_cameras();
+    std::vector<Eigen::Isometry3d> camera_poses;
+    camera_poses.reserve(cameras.size());
+    for (const gtsam::PinholeCamera<gtsam::Cal3_S2> &T_world_cam : cameras) {
+        camera_poses.emplace_back(T_world_cam.pose().matrix());
+    }
+
+    backend.add_prior_factor(gtsam::Symbol(backend.camera_symbol_char, 0), camera_poses.front(),
+                             backend.get_pose_noise());
+    for (int i = 1; i < camera_poses.size(); i++) {
+        backend.add_factor_GPS(gtsam::Symbol(backend.camera_symbol_char, i), camera_poses[i],
+                               backend.get_gps_noise());
+
+        std::pair<gtsam::PinholeCamera<T>, cv::Size> cam_and_img_sizes[2] = {
+            std::pair<gtsam::PinholeCamera<T>, cv::Size>(cameras[i - 1], img_size),
+            std::pair<gtsam::PinholeCamera<T>, cv::Size>(cameras[i], img_size)};
+        std::vector<std::pair<SpatialTestScene::ProjectedPoint, SpatialTestScene::ProjectedPoint>>
+            lmks = test_scene.get_corresponding_pixels(cam_and_img_sizes);
+        for (const &std::pair < SpatialTestScene::ProjectedPoint,
+             SpatialTestScene::ProjectedPoint >> lmk : lmks) {
+            // Backend::Landmark()
+        }
+    }
+
+    geometry::viz_scene(camera_poses, test_scene.get_points());
+}
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/feature_manager.hh b/experimental/learn_descriptors/feature_manager.hh
index 0c66d514d..d11fb8132 100644
--- a/experimental/learn_descriptors/feature_manager.hh
+++ b/experimental/learn_descriptors/feature_manager.hh
@@ -2,6 +2,7 @@
 #include <optional>
 #include <sstream>
 #include <stdexcept>
+#include <unordered_map>
 #include <vector>
 
 #include "experimental/learn_descriptors/feature_set.hh"
@@ -16,6 +17,11 @@ class FeatureManager {
     };
     void insert_symbol(const size_t idx_img, const cv::KeyPoint &kpt, const gtsam::Symbol &symbol) {
         feature_sets_[idx_img].insert_symbol(kpt, symbol);
+        if (symbols_to_idx_img_.find(symbol) != symbols_to_idx_img_.end()) {
+            symbols_to_idx_img_.at(symbol).push_back(idx_img);
+        } else {
+            symbols_to_idx_img_.emplace(symbol, std::vector<size_t>{idx_img});
+        }
     };
 
     std::optional<gtsam::Symbol> get_symbol(const size_t idx_img, const cv::KeyPoint &kpt) {
@@ -24,7 +30,20 @@ class FeatureManager {
 
     size_t get_num_images_added() { return feature_sets_.size(); };
 
+    std::unordered_map<gtsam::Symbol, std::vector<size_t>> get_added_symbols() {
+        return symbols_to_idx_img_;
+    };
+
+    bool get_idxs_for_symbol(const gtsam::Symbol &symbol, std::vector<size_t> &out_cam_idxs) {
+        if (symbols_to_idx_img_.find(symbol) != symbols_to_idx_img_.end()) {
+            out_cam_idxs = symbols_to_idx_img_.at(symbol);
+            return true;
+        }
+        return false;
+    }
+
    private:
     std::vector<FeatureSet> feature_sets_;
+    std::unordered_map<gtsam::Symbol, std::vector<size_t>> symbols_to_idx_img_;
 };
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/feature_set.hh b/experimental/learn_descriptors/feature_set.hh
index 65099cffe..5aa35260c 100644
--- a/experimental/learn_descriptors/feature_set.hh
+++ b/experimental/learn_descriptors/feature_set.hh
@@ -64,5 +64,6 @@ class FeatureSet {
    private:
     keypoints_descriptors kpts_descriptors_;
     std::unordered_map<cv::KeyPoint, gtsam::Symbol> kpt_to_symbol_;
+    std::unordered_map<gtsam::Symbol, cv::KeyPoint> symbol_to_kpt_;
 };
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc
new file mode 100644
index 000000000..46368a31e
--- /dev/null
+++ b/experimental/learn_descriptors/frontend.cc
@@ -0,0 +1,155 @@
+#include "experimental/learn_descriptors/frontend.hh"
+
+#include <unordered_map>
+
+namespace robot::experimental::learn_descriptors {
+
+Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matcher) {
+    extractor_type_ = frontend_algorithm;
+    matcher_type_ = frontend_matcher;
+
+    switch (extractor_type_) {
+        case ExtractorType::SIFT:
+            feature_extractor_ = cv::SIFT::create();
+            break;
+        case ExtractorType::ORB:
+            feature_extractor_ = cv::ORB::create();
+            break;
+        default:
+            // Error handling needed?
+            break;
+    }
+    switch (matcher_type_) {
+        case MatcherType::BRUTE_FORCE:
+            descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2);
+            break;
+        case MatcherType::KNN:
+            descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2);
+            break;
+        case MatcherType::FLANN:
+            if (frontend_algorithm == ExtractorType::ORB) {
+                throw std::invalid_argument("FLANN can not be used with ORB.");
+            }
+            descriptor_matcher_ = cv::FlannBasedMatcher::create();
+            break;
+        default:
+            // Error handling needed?
+            break;
+    }
+}
+
+std::pair<std::vector<cv::KeyPoint>, cv::Mat> Frontend::get_keypoints_and_descriptors(
+    const cv::Mat &img) const {
+    std::vector<cv::KeyPoint> keypoints;
+    cv::Mat descriptors;
+    switch (extractor_type_) {
+        default:  // the opencv extractors have the same function signature
+            feature_extractor_->detectAndCompute(img, cv::noArray(), keypoints, descriptors);
+            break;
+    }
+    return std::pair<std::vector<cv::KeyPoint>, cv::Mat>(keypoints, descriptors);
+}
+
+std::vector<cv::DMatch> Frontend::get_matches(const cv::Mat &descriptors1,
+                                              const cv::Mat &descriptors2) const {
+    std::vector<cv::DMatch> matches;
+    switch (matcher_type_) {
+        case MatcherType::BRUTE_FORCE:
+            get_brute_matches(descriptors1, descriptors2, matches);
+            break;
+        case MatcherType::KNN:
+            get_KNN_matches(descriptors1, descriptors2, matches);
+            break;
+        case MatcherType::FLANN:
+            get_FLANN_matches(descriptors1, descriptors2, matches);
+            break;
+        default:
+            break;
+    }
+    std::sort(matches.begin(), matches.end());
+    return matches;
+}
+
+void Frontend::threshold_matches(std::vector<cv::DMatch> &matches, float dist_threshhold) {
+    matches.erase(std::remove_if(matches.begin(), matches.end(),
+                                 [dist_threshhold](const cv::DMatch &match) {
+                                     return match.distance > dist_threshhold;
+                                 }),
+                  matches.end());
+}
+
+void Frontend::enforce_bijective_matches(std::vector<cv::DMatch> &matches) {
+    std::unordered_map<int, cv::DMatch> bestQueryMatch;
+    std::unordered_map<int, cv::DMatch> bestTrainMatch;
+
+    for (const auto &match : matches) {
+        int queryIdx = match.queryIdx;
+        int trainIdx = match.trainIdx;
+
+        if (bestQueryMatch.find(queryIdx) == bestQueryMatch.end() ||
+            match.distance < bestQueryMatch[queryIdx].distance) {
+            bestQueryMatch[queryIdx] = match;
+        }
+
+        if (bestTrainMatch.find(trainIdx) == bestTrainMatch.end() ||
+            match.distance < bestTrainMatch[trainIdx].distance) {
+            bestTrainMatch[trainIdx] = match;
+        }
+    }
+
+    matches.erase(std::remove_if(matches.begin(), matches.end(),
+                                 [&bestQueryMatch, &bestTrainMatch](const cv::DMatch &match) {
+                                     int queryIdx = match.queryIdx;
+                                     int trainIdx = match.trainIdx;
+
+                                     return bestQueryMatch[queryIdx].trainIdx != trainIdx ||
+                                            bestTrainMatch[trainIdx].queryIdx != queryIdx;
+                                 }),
+                  matches.end());
+}
+
+bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                                 std::vector<cv::DMatch> &matches_out) const {
+    if (matcher_type_ != MatcherType::BRUTE_FORCE) {
+        return false;
+    }
+    matches_out.clear();
+    descriptor_matcher_->match(descriptors1, descriptors2, matches_out);
+    return true;
+}
+
+bool Frontend::get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                               std::vector<cv::DMatch> &matches_out) const {
+    if (matcher_type_ != MatcherType::KNN) {
+        return false;
+    }
+    std::vector<std::vector<cv::DMatch>> knn_matches;
+    descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2);
+    const float ratio_thresh = 0.7f;
+    matches_out.clear();
+    // Lowe's Ratio Test
+    for (size_t i = 0; i < knn_matches.size(); i++) {
+        if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) {
+            matches_out.push_back(knn_matches[i][0]);
+        }
+    }
+    return true;
+}
+
+bool Frontend::get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                                 std::vector<cv::DMatch> &matches_out) const {
+    if (matcher_type_ != MatcherType::FLANN) {
+        return false;
+    }
+    std::vector<std::vector<cv::DMatch>> knn_matches;
+    descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2);
+    const float ratio_thresh = 0.7f;
+    matches_out.clear();
+    for (size_t i = 0; i < knn_matches.size(); i++) {
+        if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) {
+            matches_out.push_back(knn_matches[i][0]);
+        }
+    }
+    return true;
+}
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh
new file mode 100644
index 000000000..9b720cc06
--- /dev/null
+++ b/experimental/learn_descriptors/frontend.hh
@@ -0,0 +1,51 @@
+#pragma once
+
+#include <utility>
+#include <vector>
+
+#include "opencv2/opencv.hpp"
+
+namespace robot::experimental::learn_descriptors {
+class Frontend {
+   public:
+    enum class ExtractorType { SIFT, ORB };
+    enum class MatcherType { BRUTE_FORCE, KNN, FLANN };
+
+    Frontend(){};
+    Frontend(ExtractorType frontend_extractor, MatcherType frontend_matcher);
+    ~Frontend(){};
+
+    const ExtractorType &get_extractor_type() const { return extractor_type_; };
+    const MatcherType &get_matcher_type() const { return matcher_type_; };
+
+    std::pair<std::vector<cv::KeyPoint>, cv::Mat> get_keypoints_and_descriptors(
+        const cv::Mat &img) const;
+    std::vector<cv::DMatch> get_matches(const cv::Mat &descriptors1,
+                                        const cv::Mat &descriptors2) const;
+    static void threshold_matches(std::vector<cv::DMatch> &matches, float dist_threshhold);
+    static void enforce_bijective_matches(std::vector<cv::DMatch> &matches);
+    static void draw_keypoints(const cv::Mat &img, std::vector<cv::KeyPoint> keypoints,
+                               cv::Mat img_keypoints_out) {
+        cv::drawKeypoints(img, keypoints, img_keypoints_out, cv::Scalar::all(-1),
+                          cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS);
+    }
+    static void draw_matches(const cv::Mat &img1, std::vector<cv::KeyPoint> keypoints1,
+                             const cv::Mat &img2, std::vector<cv::KeyPoint> keypoints2,
+                             std::vector<cv::DMatch> matches, cv::Mat img_matches_out) {
+        cv::drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches_out);
+    }
+
+   private:
+    bool get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                           std::vector<cv::DMatch> &matches_out) const;
+    bool get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                         std::vector<cv::DMatch> &matches_out) const;
+    bool get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                           std::vector<cv::DMatch> &matches_out) const;
+    ExtractorType extractor_type_;
+    MatcherType matcher_type_;
+
+    cv::Ptr<cv::Feature2D> feature_extractor_;
+    cv::Ptr<cv::DescriptorMatcher> descriptor_matcher_;
+};
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frontend_test.cc b/experimental/learn_descriptors/frontend_test.cc
new file mode 100644
index 000000000..8ab092d3e
--- /dev/null
+++ b/experimental/learn_descriptors/frontend_test.cc
@@ -0,0 +1,115 @@
+#include "experimental/learn_descriptors/frontend.hh"
+
+#include <stdexcept>
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "opencv2/opencv.hpp"
+
+namespace robot::experimental::learn_descriptors {
+TEST(FrontendTest, pipeline_sweep) {
+    const size_t width = 640;
+    const size_t height = 480;
+
+    const size_t pixel_shift_x = 20;
+    const size_t PIXEL_COMP_TOL = 20;
+
+    cv::Mat image_1 = cv::Mat::zeros(height, width, CV_8UC3);
+    cv::Mat image_2;
+
+    cv::Mat translation_mat = (cv::Mat_<double>(2, 3) << 1, 0, pixel_shift_x, 0, 1, 0);
+    cv::Point rect_points[4] = {{150, 200}, {350, 200}, {350, 300}, {150, 300}};
+    float com_x = 0.0f, com_y = 0.0f;
+    for (const cv::Point &rect_point : rect_points) {
+        com_x += rect_point.x;
+        com_y += rect_point.y;
+    }
+    com_x *= 0.25;
+    com_y *= 0.25;
+    cv::Point rotation_center(com_x, com_y);
+    cv::Mat rotation_matrix = cv::getRotationMatrix2D(rotation_center, 45, 1.0);
+
+    const size_t line_spacing = 100;
+    for (size_t i = 0; i <= width / line_spacing; i++) {
+        size_t x = i * line_spacing + (width % line_spacing) / 2;
+        size_t b = i * 255.0 / (width / line_spacing);
+        size_t g = 255.0 - i * 255.0 / (width / line_spacing);
+        cv::line(image_1, cv::Point(x, 0), cv::Point(x, height - 1), cv::Scalar(b, g, 0), 2);
+    }
+    for (size_t i = 0; i <= height / line_spacing; i++) {
+        size_t y = i * line_spacing + (height % line_spacing) / 2;
+        size_t b = i * 255.0 / (width / line_spacing);
+        size_t g = 255.0 - i * 255.0 / (width / line_spacing);
+        cv::line(image_1, cv::Point(0, y), cv::Point(width - 1, y), cv::Scalar(b, g, 0), 2);
+    }
+
+    cv::warpAffine(image_1, image_1, rotation_matrix, image_1.size());
+    cv::warpAffine(image_1, image_2, translation_mat, image_1.size());
+
+    // cv::Mat img_test_disp;
+    // cv::hconcat(image_1, image_2, img_test_disp);
+    // cv::imshow("Test", img_test_disp);
+    // cv::waitKey(1000);
+
+    Frontend::ExtractorType extractor_types[2] = {Frontend::ExtractorType::SIFT,
+                                                  Frontend::ExtractorType::ORB};
+    Frontend::MatcherType matcher_types[3] = {Frontend::MatcherType::BRUTE_FORCE,
+                                              Frontend::MatcherType::FLANN,
+                                              Frontend::MatcherType::KNN};
+
+    Frontend frontend;
+    std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_descriptors_pair_1;
+    std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_descriptors_pair_2;
+    std::vector<cv::DMatch> matches;
+    cv::Mat img_keypoints_out_1(height, width, CV_8UC3),
+        img_keypoints_out_2(height, width, CV_8UC3), img_matches_out(height, 2 * width, CV_8UC3);
+    // cv::Mat img_display_test;
+    for (Frontend::ExtractorType extractor_type : extractor_types) {
+        for (Frontend::MatcherType matcher_type : matcher_types) {
+            // printf("started frontend combination: (%d, %d)\n", static_cast<int>(extractor_type),
+            //        static_cast<int>(matcher_type));
+            try {
+                frontend = Frontend(extractor_type, matcher_type);
+            } catch (const std::invalid_argument &e) {
+                assert(std::string(e.what()) == "FLANN can not be used with ORB.");  // very jank...
+                continue;
+            }
+            keypoints_descriptors_pair_1 = frontend.get_keypoints_and_descriptors(image_1);
+            keypoints_descriptors_pair_2 = frontend.get_keypoints_and_descriptors(image_2);
+            matches = frontend.get_matches(keypoints_descriptors_pair_1.second,
+                                           keypoints_descriptors_pair_2.second);
+            frontend.draw_keypoints(image_1, keypoints_descriptors_pair_1.first,
+                                    img_keypoints_out_1);
+            frontend.draw_keypoints(image_2, keypoints_descriptors_pair_2.first,
+                                    img_keypoints_out_2);
+            frontend.draw_matches(image_1, keypoints_descriptors_pair_1.first, image_2,
+                                  keypoints_descriptors_pair_2.first, matches, img_matches_out);
+            // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test);
+            // cv::vconcat(img_display_test, img_matches_out, img_display_test);
+            // std::stringstream text;
+            // text << "Extractor " << static_cast<int>(extractor_type) << ", matcher "
+            //      << static_cast<int>(matcher_type);
+            // cv::putText(img_display_test, text.str(), cv::Point(20, height - 50),
+            //             cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2, cv::LINE_AA);
+            // cv::imshow("Keypoints and Matches Output.", img_display_test);
+            // std::cout << "Hold spacebar to pause." << std::endl;
+            // while (cv::waitKey(1000) == 32) {
+            // }
+            // printf("completed frontend combination: (%d, %d)\n",
+            // static_cast<int>(extractor_type),
+            //        static_cast<int>(matcher_type));
+            if (extractor_type != Frontend::ExtractorType::ORB) {  // don't check ORB for now
+                for (const cv::DMatch match : matches) {
+                    EXPECT_NEAR(keypoints_descriptors_pair_1.first[match.queryIdx].pt.x -
+                                    keypoints_descriptors_pair_2.first[match.trainIdx].pt.x,
+                                pixel_shift_x, pixel_shift_x + PIXEL_COMP_TOL);
+                    EXPECT_NEAR(keypoints_descriptors_pair_2.first[match.trainIdx].pt.y -
+                                    keypoints_descriptors_pair_1.first[match.queryIdx].pt.y,
+                                0, PIXEL_COMP_TOL);
+                }
+            }
+        }
+    }
+}
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/spatial_scene_test.hh b/experimental/learn_descriptors/spatial_scene_test.hh
deleted file mode 100644
index 60daafb28..000000000
--- a/experimental/learn_descriptors/spatial_scene_test.hh
+++ /dev/null
@@ -1,53 +0,0 @@
-#pragma once
-#include <utility>
-#include <vector>
-
-#include "Eigen/Core"
-#include "gtsam/geometry/PinholeCamera.h"
-#include "opencv2/opencv.hpp"
-
-class ProjectionHelper {
-   public:
-    static bool pixel_in_range(Eigen::Vector2d pixel, size_t img_width, size_t img_height) {
-        return pixel[0] >= 0 && pixel[0] < img_width && pixel[1] >= 0 && pixel[1] < img_height;
-    }
-};
-namespace robot::experimental::learn_descriptors {
-class SpatialSceneTest {
-   public:
-    SpatialSceneTest() = default;
-    SpatialSceneTest(std::vector<Eigen::Vector3d> points) : points_(points){};
-    const std::vector<Eigen::Vector3d> &get_points() { return points_; };
-    template <typename T>
-    std::vector<Eigen::Vector2d> get_projected_pixels(const gtsam::PinholeCamera<T> &camera,
-                                                      const cv::Size &img_size) {
-        std::vector<Eigen::Vector2d> pixels;
-        for (const Eigen::Vector3d &pt : points_) {
-            Eigen::Vector2d pixel = camera.project(pt);
-            if (ProjectionHelper::pixel_in_range(pixel, img_size.width, img_size.height)) {
-                pixels.push_back(pixel);
-            }
-        }
-        return pixels;
-    };
-    template <typename T>
-    const std::vector<std::pair<Eigen::Vector2d, Eigen::Vector2d>> get_corresponding_pixels(
-        const std::vector<std::pair<gtsam::PinholeCamera<T>, cv::Size>> &cam_and_img_size) {
-        std::vector<std::pair<Eigen::Vector2d, Eigen::Vector2d>> corresponding_pixels;
-        for (const Eigen::Vector3d &pt : points_) {
-            Eigen::Vector2d pixel_cam0 = cam_and_img_size[0].first.project(pt);
-            Eigen::Vector2d pixel_cam1 = cam_and_img_size[1].first.project(pt);
-            if (ProjectionHelper::pixel_in_range(pixel_cam0, cam_and_img_size[0].second.width,
-                                                 cam_and_img_size[0].second.height) &&
-                ProjectionHelper::pixel_in_range(pixel_cam1, cam_and_img_size[1].second.width,
-                                                 cam_and_img_size[1].second.height)) {
-                corresponding_pixels.emplace_back(pixel_cam0, pixel_cam1);
-            }
-        }
-        return corresponding_pixels;
-    };
-
-   protected:
-    std::vector<Eigen::Vector3d> points_;
-};
-}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/spatial_scene_test_cube.hh b/experimental/learn_descriptors/spatial_scene_test_cube.hh
deleted file mode 100644
index b8ff6aea6..000000000
--- a/experimental/learn_descriptors/spatial_scene_test_cube.hh
+++ /dev/null
@@ -1,21 +0,0 @@
-#pragma once
-#include <iostream>
-
-#include "Eigen/Core"
-#include "experimental/learn_descriptors/spatial_scene_test.hh"
-
-namespace robot::experimental::learn_descriptors {
-class SpatialSceneTestCube : public SpatialSceneTest {
-   public:
-    SpatialSceneTestCube(const float cube_size) : SpatialSceneTest() {
-        points_.push_back(Eigen::Vector3d(0, 0, 0));
-        points_.push_back(Eigen::Vector3d(cube_size, 0, 0));
-        points_.push_back(Eigen::Vector3d(cube_size, cube_size, 0));
-        points_.push_back(Eigen::Vector3d(0, cube_size, 0));
-        points_.push_back(Eigen::Vector3d(0, 0, cube_size));
-        points_.push_back(Eigen::Vector3d(cube_size, 0, cube_size));
-        points_.push_back(Eigen::Vector3d(cube_size, cube_size, cube_size));
-        points_.push_back(Eigen::Vector3d(0, cube_size, cube_size));
-    };
-};
-}  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/spatial_test_scene.cc b/experimental/learn_descriptors/spatial_test_scene.cc
new file mode 100644
index 000000000..6be58e661
--- /dev/null
+++ b/experimental/learn_descriptors/spatial_test_scene.cc
@@ -0,0 +1,82 @@
+#include "experimental/learn_descriptors/spatial_test_scene.hh"
+
+#include <random>
+
+namespace robot::experimental::learn_descriptors {
+template <typename T>
+std::vector<SpatialTestScene::ProjectedPoint> SpatialTestScene::get_projected_pixels(
+    const gtsam::PinholeCamera<T> &camera, const cv::Size &img_size) {
+    std::vector<ProjectedPoint> pixels;
+    for (size_t i = 0; i < points_.size(); i++) {
+        const Eigen::Vector3d pt = points_[i];
+        Eigen::Vector2d pixel = camera.project(pt);
+        if (ProjectionHelper::pixel_in_range(pixel, img_size.width, img_size.height)) {
+            ProjectedPoint projected_pt(i, pixel);
+            pixels.push_back(projected_pt);
+        }
+    }
+    return pixels;
+};
+
+template <typename T>
+std::vector<std::pair<SpatialTestScene::ProjectedPoint, SpatialTestScene::ProjectedPoint>>
+SpatialTestScene::get_corresponding_pixels(
+    const std::pair<gtsam::PinholeCamera<T>, cv::Size> (&cam_and_img_size)[2]) {
+    std::vector<std::pair<ProjectedPoint, ProjectedPoint>> corresponding_pixels;
+    for (size_t i = 0; i < points_.size(); i++) {
+        const Eigen::Vector3d pt = points_[i];
+        Eigen::Vector2d pixel_cam0 = cam_and_img_size[0].first.project(pt);
+        Eigen::Vector2d pixel_cam1 = cam_and_img_size[1].first.project(pt);
+        if (ProjectionHelper::pixel_in_range(pixel_cam0, cam_and_img_size[0].second.width,
+                                             cam_and_img_size[0].second.height) &&
+            ProjectionHelper::pixel_in_range(pixel_cam1, cam_and_img_size[1].second.width,
+                                             cam_and_img_size[1].second.height)) {
+            corresponding_pixels.emplace_back(ProjectedPoint(i, pixel_cam0),
+                                              ProjectedPoint(i, pixel_cam1));
+        }
+    }
+    return corresponding_pixels;
+};
+
+void SpatialTestScene::add_point(Eigen::Vector3d point) { points_.push_back(point); };
+
+void SpatialTestScene::add_points(std::vector<Eigen::Vector3d> points) {
+    points_.insert(points_.end(), points.begin(), points.end());
+}
+
+void SpatialTestScene::add_camera(gtsam::PinholeCamera<gtsam::Cal3_S2> camera) {
+    cameras_.push_back(camera);
+}
+
+void SpatialTestScene::add_cameras(std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras) {
+    cameras_.insert(cameras_.end(), cameras.begin(), cameras.end());
+}
+
+void SpatialTestScene::add_rand_cameras_face_origin(int num_cameras, double min_radius_origin,
+                                                    double max_radius_origin,
+                                                    const gtsam::Cal3_S2 &K) {
+    std::random_device rd;
+    std::mt19937 gen(rd());
+
+    std::uniform_real_distribution<double> dist_radius(min_radius_origin, max_radius_origin);
+    std::uniform_real_distribution<double> dist_omega(0.0, 2 * M_PI);
+
+    for (int i = 0; i < num_cameras; i++) {
+        double angle_z = dist_radius(gen);
+        Eigen::Isometry3d R_z;
+        R_z.linear() = Eigen::AngleAxisd(angle_z, Eigen::Vector3d::UnitZ()).matrix();
+        double angle_x = dist_radius(gen);
+        Eigen::Isometry3d R_y;
+        R_y.linear() = Eigen::AngleAxisd(angle_x, Eigen::Vector3d::UnitY()).matrix();
+
+        double radius = dist_radius(gen);
+        Eigen::Isometry3d T_world_cam;
+        T_world_cam.translation() = Eigen::Vector3d(radius, 0, 0);
+        T_world_cam.linear() = (Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d::UnitY()) *
+                                Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d::UnitZ()))
+                                   .matrix();
+        T_world_cam = R_z * R_y * T_world_cam;
+        cameras_.push_back(gtsam::PinholeCamera(gtsam::Pose3(T_world_cam.matrix()), K));
+    }
+}
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/spatial_test_scene.hh b/experimental/learn_descriptors/spatial_test_scene.hh
new file mode 100644
index 000000000..bb7b714c4
--- /dev/null
+++ b/experimental/learn_descriptors/spatial_test_scene.hh
@@ -0,0 +1,50 @@
+#pragma once
+
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "Eigen/Dense"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/PinholeCamera.h"
+#include "opencv2/opencv.hpp"
+
+class ProjectionHelper {
+   public:
+    static bool pixel_in_range(Eigen::Vector2d pixel, size_t img_width, size_t img_height) {
+        return pixel[0] >= 0 && pixel[0] < img_width && pixel[1] >= 0 && pixel[1] < img_height;
+    }
+};
+namespace robot::experimental::learn_descriptors {
+class SpatialTestScene {
+   public:
+    struct ProjectedPoint {
+        ProjectedPoint(const size_t pt_idx, const Eigen::Vector2d &pixel)
+            : pt_idx(pt_idx), pixel(pixel){};
+        size_t pt_idx;
+        Eigen::Vector2d pixel;
+    };
+    SpatialTestScene() = default;
+    SpatialTestScene(std::vector<Eigen::Vector3d> points) : points_(points){};
+    template <typename T>
+    std::vector<ProjectedPoint> get_projected_pixels(const gtsam::PinholeCamera<T> &camera,
+                                                     const cv::Size &img_size);
+    template <typename T>
+    std::vector<std::pair<ProjectedPoint, ProjectedPoint>> get_corresponding_pixels(
+        const std::pair<gtsam::PinholeCamera<T>, cv::Size> (&cam_and_img_size)[2]);
+
+    void add_point(Eigen::Vector3d point);
+    void add_points(std::vector<Eigen::Vector3d> points);
+    void add_camera(gtsam::PinholeCamera<gtsam::Cal3_S2> camera);
+    void add_cameras(std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras);
+    void add_rand_cameras_face_origin(int num_cameras, double min_radius_origin,
+                                      double max_radius_origin, const gtsam::Cal3_S2 &K);
+
+    std::vector<Eigen::Vector3d> &get_points() { return points_; };
+    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> &get_cameras() { return cameras_; };
+
+   protected:
+    std::vector<Eigen::Vector3d> points_;
+    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras_;
+};
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/spatial_test_scene_cube.hh b/experimental/learn_descriptors/spatial_test_scene_cube.hh
new file mode 100644
index 000000000..306afae4b
--- /dev/null
+++ b/experimental/learn_descriptors/spatial_test_scene_cube.hh
@@ -0,0 +1,21 @@
+#pragma once
+#include <iostream>
+
+#include "Eigen/Core"
+#include "experimental/learn_descriptors/spatial_test_scene.hh"
+
+namespace robot::experimental::learn_descriptors {
+class SpatialTestSceneCube : public SpatialTestScene {
+   public:
+    SpatialTestSceneCube(const float cube_size) : SpatialTestScene() {
+        points_.push_back(Eigen::Vector3d(-cube_size / 2, -cube_size / 2, -cube_size / 2));
+        points_.push_back(Eigen::Vector3d(-cube_size / 2, cube_size / 2, -cube_size / 2));
+        points_.push_back(Eigen::Vector3d(cube_size / 2, -cube_size / 2, -cube_size / 2));
+        points_.push_back(Eigen::Vector3d(cube_size / 2, cube_size / 2, -cube_size / 2));
+        points_.push_back(Eigen::Vector3d(-cube_size / 2, -cube_size / 2, cube_size / 2));
+        points_.push_back(Eigen::Vector3d(-cube_size / 2, cube_size / 2, cube_size / 2));
+        points_.push_back(Eigen::Vector3d(cube_size / 2, -cube_size / 2, cube_size / 2));
+        points_.push_back(Eigen::Vector3d(cube_size / 2, cube_size / 2, cube_size / 2));
+    };
+};
+}  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/spatial_test_scene_cube_test.cc b/experimental/learn_descriptors/spatial_test_scene_cube_test.cc
new file mode 100644
index 000000000..0a244081e
--- /dev/null
+++ b/experimental/learn_descriptors/spatial_test_scene_cube_test.cc
@@ -0,0 +1,37 @@
+#include "experimental/learn_descriptors/spatial_test_scene_cube.hh"
+
+#include <vector>
+
+#include "Eigen/Dense"
+#include "common/geometry/opencv_viz.hh"
+#include "gtest/gtest.h"
+
+namespace robot::experimental::learn_descriptors {
+TEST(SpatialTestSceneCubeTest, viz_cube) {
+    SpatialTestSceneCube test_scene(1.0f);
+    geometry::viz_scene(std::vector<Eigen::Isometry3d>(), test_scene.get_points());
+}
+
+TEST(SpatialTestSceneCubeTest, viz_cube_with_cameras) {
+    SpatialTestSceneCube test_scene(1.0f);
+
+    const size_t img_width = 640;
+    const size_t img_height = 480;
+    const double fx = 500.0;
+    const double fy = fx;
+    const double cx = img_width / 2.0;
+    const double cy = img_height / 2.0;
+    gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy));
+
+    test_scene.add_rand_cameras_face_origin(7, 2.0, 6.0, *K);
+
+    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras = test_scene.get_cameras();
+    std::vector<Eigen::Isometry3d> camera_poses;
+    camera_poses.reserve(cameras.size());
+    for (const gtsam::PinholeCamera<gtsam::Cal3_S2> &T_world_cam : cameras) {
+        camera_poses.emplace_back(T_world_cam.pose().matrix());
+    }
+
+    geometry::viz_scene(camera_poses, test_scene.get_points());
+}
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index cad7869d5..b28d8f009 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -8,11 +8,6 @@
 #include "common/geometry/opencv_viz.hh"
 #include "common/geometry/translate_types.hh"
 #include "gtsam/geometry/Point2.h"
-#include "gtsam/geometry/triangulation.h"
-#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
-#include "gtsam/slam/BetweenFactor.h"
-#include "gtsam/slam/PriorFactor.h"
-#include "gtsam/slam/ProjectionFactor.h"
 
 namespace fs = std::filesystem;
 
@@ -41,9 +36,9 @@ StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extrac
                                          gtsam::Cal3_S2 K, Eigen::Matrix<double, 5, 1> D,
                                          gtsam::Pose3 initial_pose,
                                          Frontend::MatcherType frontend_matcher)
-    : initial_pose_(initial_pose) {
+    : feature_manager_(std::make_shared<FeatureManager>()), initial_pose_(initial_pose) {
     frontend_ = Frontend(frontend_extractor, frontend_matcher);
-    backend_ = Backend(K);
+    backend_ = Backend(feature_manager_, K);
 
     K_ = (cv::Mat_<double>(3, 3) << K.fx(), 0, K.px(), 0, K.fy(), K.py(), 0, 0, 1);
     D_ = (cv::Mat_<double>(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0));
@@ -61,11 +56,11 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
     cv::undistort(img, img_undistorted, K_, D_);
     std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_and_descriptors =
         frontend_.get_keypoints_and_descriptors(img);
-    feature_manager_.append_img_data(keypoints_and_descriptors.first,
-                                     keypoints_and_descriptors.second);
+    feature_manager_->append_img_data(keypoints_and_descriptors.first,
+                                      keypoints_and_descriptors.second);
     // keypoint_to_landmarks_.push_back(std::unordered_map<cv::KeyPoint, Backend::Landmark>());
     // const size_t idx_img_current = get_num_images_added();
-    const size_t idx_img_current = feature_manager_.get_num_images_added() - 1;
+    const size_t idx_img_current = feature_manager_->get_num_images_added() - 1;
     std::cout << "current index " << idx_img_current << std::endl;
     if (idx_img_current > 0) {
         std::vector<cv::DMatch> matches =
@@ -83,26 +78,28 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
                 img_keypoints_and_descriptors_.back().first[match.queryIdx];
             const cv::KeyPoint kpt_cam1 = keypoints_and_descriptors.first[match.trainIdx];
 
-            size_t key;
+            size_t idx;
             unsigned char chr;
 
-            auto maybe_symbol0 = feature_manager_.get_symbol(idx_img_current - 1, kpt_cam0);
+            auto maybe_symbol0 = feature_manager_->get_symbol(idx_img_current - 1, kpt_cam0);
             if (maybe_symbol0) {
-                key = (*maybe_symbol0).key();
+                idx = (*maybe_symbol0).index();
                 chr = (*maybe_symbol0).chr();
             } else {
                 gtsam::Symbol symbol_temp =
                     gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_);
-                key = symbol_temp.key();
+                idx = symbol_temp.index();
                 chr = symbol_temp.chr();
                 landmark_count_++;
-                feature_manager_.insert_symbol(idx_img_current - 1, kpt_cam0, symbol_temp);
+                feature_manager_->insert_symbol(idx_img_current - 1, kpt_cam0, symbol_temp);
             }
-            gtsam::Symbol symbol = gtsam::Symbol(key, chr);
-            feature_manager_.insert_symbol(idx_img_current, kpt_cam1, symbol);
+            gtsam::Symbol symbol_lmk = gtsam::Symbol(chr, idx);
+            std::cout << "value: char: " << chr << " , idx: " << idx << ". " << symbol_lmk
+                      << std::endl;
+            feature_manager_->insert_symbol(idx_img_current, kpt_cam1, symbol_lmk);
 
             const Backend::Landmark landmark_cam_0(
-                symbol, sym_T_w_c0,
+                symbol_lmk, sym_T_w_c0,
                 gtsam::Point2(
                     static_cast<double>(
                         img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x),
@@ -111,43 +108,12 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
                 backend_.get_K(), 3.0);
 
             const Backend::Landmark landmark_cam_1(
-                symbol, sym_T_w_c1,
+                symbol_lmk, sym_T_w_c1,
                 gtsam::Point2(
                     static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.x),
                     static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.y)),
                 backend_.get_K(), 3.0);
 
-            // if (keypoint_to_landmarks_[idx_img_current - 1].find(kpt_cam0) ==
-            //     keypoint_to_landmarks_[idx_img_current - 1].end()) {
-            //     const Backend::Landmark landmark_cam_0(
-            // gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_), sym_T_w_c0,
-            // gtsam::Point2(
-            //     static_cast<double>(
-            //         img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x),
-            //     static_cast<double>(
-            //         img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)),
-            // backend_.get_K(), 3.0);
-            //     keypoint_to_landmarks_[idx_img_current - 1].emplace(kpt_cam0, landmark_cam_0);
-            //     landmark_count_++;
-            // }
-
-            // const Backend::Landmark landmark_cam_0 =
-            //     keypoint_to_landmarks_[idx_img_current - 1].at(kpt_cam0);
-
-            // // technically don't need this conditional when enforcing bijectivity
-            // if (keypoint_to_landmarks_[idx_img_current].find(kpt_cam1) ==
-            //     keypoint_to_landmarks_[idx_img_current].end()) {
-            //     const Backend::Landmark landmark_cam_1(
-            //         landmark_cam_0.lmk_factor_symbol, sym_T_w_c1,
-            //         gtsam::Point2(
-            //             static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.x),
-            //             static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.y)),
-            //         backend_.get_K(), 3.0);
-            //     keypoint_to_landmarks_[idx_img_current].emplace(kpt_cam1, landmark_cam_1);
-            // }
-            // const Backend::Landmark landmark_cam_1 =
-            //     keypoint_to_landmarks_[idx_img_current].at(kpt_cam1);
-
             backend_.add_landmark(landmark_cam_0);
             backend_.add_landmark(landmark_cam_1);
         }
@@ -166,352 +132,19 @@ std::vector<cv::DMatch> StructureFromMotion::get_matches(
     return matches;
 }
 
-Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matcher) {
-    extractor_type_ = frontend_algorithm;
-    matcher_type_ = frontend_matcher;
-
-    switch (extractor_type_) {
-        case ExtractorType::SIFT:
-            feature_extractor_ = cv::SIFT::create();
-            break;
-        case ExtractorType::ORB:
-            feature_extractor_ = cv::ORB::create();
-            break;
-        default:
-            // Error handling needed?
-            break;
-    }
-    switch (matcher_type_) {
-        case MatcherType::BRUTE_FORCE:
-            descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2);
-            break;
-        case MatcherType::KNN:
-            descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2);
-            break;
-        case MatcherType::FLANN:
-            if (frontend_algorithm == ExtractorType::ORB) {
-                throw std::invalid_argument("FLANN can not be used with ORB.");
-            }
-            descriptor_matcher_ = cv::FlannBasedMatcher::create();
-            break;
-        default:
-            // Error handling needed?
-            break;
-    }
-}
-
-std::pair<std::vector<cv::KeyPoint>, cv::Mat> Frontend::get_keypoints_and_descriptors(
-    const cv::Mat &img) const {
-    std::vector<cv::KeyPoint> keypoints;
-    cv::Mat descriptors;
-    switch (extractor_type_) {
-        default:  // the opencv extractors have the same function signature
-            feature_extractor_->detectAndCompute(img, cv::noArray(), keypoints, descriptors);
-            break;
-    }
-    return std::pair<std::vector<cv::KeyPoint>, cv::Mat>(keypoints, descriptors);
-}
-
-std::vector<cv::DMatch> Frontend::get_matches(const cv::Mat &descriptors1,
-                                              const cv::Mat &descriptors2) const {
-    std::vector<cv::DMatch> matches;
-    switch (matcher_type_) {
-        case MatcherType::BRUTE_FORCE:
-            get_brute_matches(descriptors1, descriptors2, matches);
-            break;
-        case MatcherType::KNN:
-            get_KNN_matches(descriptors1, descriptors2, matches);
-            break;
-        case MatcherType::FLANN:
-            get_FLANN_matches(descriptors1, descriptors2, matches);
-            break;
-        default:
-            break;
-    }
-    std::sort(matches.begin(), matches.end());
-    return matches;
-}
-
-void Frontend::threshold_matches(std::vector<cv::DMatch> &matches, float dist_threshhold) {
-    matches.erase(std::remove_if(matches.begin(), matches.end(),
-                                 [dist_threshhold](const cv::DMatch &match) {
-                                     return match.distance > dist_threshhold;
-                                 }),
-                  matches.end());
-}
-
-void Frontend::enforce_bijective_matches(std::vector<cv::DMatch> &matches) {
-    std::unordered_map<int, cv::DMatch> bestQueryMatch;
-    std::unordered_map<int, cv::DMatch> bestTrainMatch;
-
-    for (const auto &match : matches) {
-        int queryIdx = match.queryIdx;
-        int trainIdx = match.trainIdx;
-
-        if (bestQueryMatch.find(queryIdx) == bestQueryMatch.end() ||
-            match.distance < bestQueryMatch[queryIdx].distance) {
-            bestQueryMatch[queryIdx] = match;
-        }
-
-        if (bestTrainMatch.find(trainIdx) == bestTrainMatch.end() ||
-            match.distance < bestTrainMatch[trainIdx].distance) {
-            bestTrainMatch[trainIdx] = match;
-        }
-    }
-
-    matches.erase(std::remove_if(matches.begin(), matches.end(),
-                                 [&bestQueryMatch, &bestTrainMatch](const cv::DMatch &match) {
-                                     int queryIdx = match.queryIdx;
-                                     int trainIdx = match.trainIdx;
-
-                                     return bestQueryMatch[queryIdx].trainIdx != trainIdx ||
-                                            bestTrainMatch[trainIdx].queryIdx != queryIdx;
-                                 }),
-                  matches.end());
-}
-
-bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                                 std::vector<cv::DMatch> &matches_out) const {
-    if (matcher_type_ != MatcherType::BRUTE_FORCE) {
-        return false;
-    }
-    matches_out.clear();
-    descriptor_matcher_->match(descriptors1, descriptors2, matches_out);
-    return true;
-}
-
-bool Frontend::get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                               std::vector<cv::DMatch> &matches_out) const {
-    if (matcher_type_ != MatcherType::KNN) {
-        return false;
-    }
-    std::vector<std::vector<cv::DMatch>> knn_matches;
-    descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2);
-    const float ratio_thresh = 0.7f;
-    matches_out.clear();
-    // Lowe's Ratio Test
-    for (size_t i = 0; i < knn_matches.size(); i++) {
-        if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) {
-            matches_out.push_back(knn_matches[i][0]);
-        }
-    }
-    return true;
-}
-
-bool Frontend::get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                                 std::vector<cv::DMatch> &matches_out) const {
-    if (matcher_type_ != MatcherType::FLANN) {
-        return false;
-    }
-    std::vector<std::vector<cv::DMatch>> knn_matches;
-    descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2);
-    const float ratio_thresh = 0.7f;
-    matches_out.clear();
-    for (size_t i = 0; i < knn_matches.size(); i++) {
-        if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) {
-            matches_out.push_back(knn_matches[i][0]);
-        }
-    }
-    return true;
-}
-
-Backend::Backend() {
-    const size_t img_width = 640;
-    const size_t img_height = 480;
-    const double fx = 500.0;
-    const double fy = fx;
-    const double cx = img_width / 2.0;
-    const double cy = img_height / 2.0;
-
-    gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
-    K_ = boost::make_shared<gtsam::Cal3_S2>(K);
-    // initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K);
-}
-
-Backend::Backend(gtsam::Cal3_S2 K) {
-    K_ = boost::make_shared<gtsam::Cal3_S2>(K);
-    // initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K);
-}
-
-template <>
-void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value,
-                               const gtsam::SharedNoiseModel &noise) {
-    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(symbol, value, noise);
-    initial_estimate_.insert_or_assign(symbol, value);
-    // std::cout << "adding a prior factor! with symbol: " << symbol << std::endl;
-    // initial_estimate_.print("values after adding prior: ");
-}
-
-template <>
-void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Point3 &value,
-                               const gtsam::SharedNoiseModel &noise) {
-    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Point3>>(symbol, value, noise);
-    gtsam::Rot3 R;
-    if (initial_estimate_.exists(symbol)) {
-        R = initial_estimate_.at<gtsam::Pose3>(symbol).rotation();
-    }
-    initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R, value));
-}
-
-template <>
-void Backend::add_between_factor<gtsam::Pose3>(const gtsam::Symbol &symbol_1,
-                                               const gtsam::Symbol &symbol_2,
-                                               const gtsam::Pose3 &value,
-                                               const gtsam::SharedNoiseModel &model) {
-    graph_.emplace_shared<gtsam::BetweenFactor<gtsam::Pose3>>(symbol_1, symbol_2, value, model);
-    // std::cout << "adding between factor. symbol_1: " << symbol_1 << ". symbol_2: " << symbol_2 <<
-    // std::endl; initial_estimate_.print("values when adding between factor: ");
-    initial_estimate_.insert_or_assign(symbol_2,
-                                       initial_estimate_.at<gtsam::Pose3>(symbol_1).compose(value));
-}
-
-template <>
-void Backend::add_between_factor<gtsam::Rot3>(const gtsam::Symbol &symbol_1,
-                                              const gtsam::Symbol &symbol_2,
-                                              const gtsam::Rot3 &value,
-                                              const gtsam::SharedNoiseModel &model) {
-    graph_.emplace_shared<gtsam::BetweenFactor<gtsam::Rot3>>(symbol_1, symbol_2, value, model);
-    initial_estimate_.insert_or_assign(symbol_2,
-                                       initial_estimate_.at<gtsam::Pose3>(symbol_1).compose(
-                                           gtsam::Pose3(value, gtsam::Point3::Zero())));
-}
-
-void Backend::add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &t_world_cam,
-                             const gtsam::SharedNoiseModel &model, const gtsam::Rot3 &R_world_cam) {
-    graph_.emplace_shared<gtsam::GPSFactor>(symbol, t_world_cam, model);
-    initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R_world_cam, t_world_cam));
-}
-
-std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> Backend::get_obs_for_lmk(
-    const gtsam::Symbol &lmk_symbol) {
-    std::vector<gtsam::Pose3> cam_poses;
-    std::vector<gtsam::Point2> observations;
-
-    // Iterate over all factors in the graph
-    for (const auto &factor : graph_) {
-        auto projFactor = boost::dynamic_pointer_cast<
-            gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(factor);
-
-        if (projFactor && projFactor->keys().at(1) == lmk_symbol) {
-            // Get the camera pose symbol
-            gtsam::Symbol cameraSymbol = projFactor->keys().at(0);
-
-            // Retrieve the camera pose from values
-            if (!initial_estimate_.exists(cameraSymbol)) continue;
-            gtsam::Pose3 cameraPose = initial_estimate_.at<gtsam::Pose3>(cameraSymbol);
-
-            // Get the 2D observation (keypoint measurement)
-            gtsam::Point2 observation = projFactor->measured();
-
-            // Store the pose and corresponding observation
-            observations.push_back(observation);
-            cam_poses.push_back(cameraPose);
-        }
-    }
-    return std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>>(cam_poses,
-                                                                            observations);
-}
-
-void Backend::add_landmarks(const std::vector<Landmark> &landmarks) {
-    for (const Landmark &landmark : landmarks) {
-        add_landmark(landmark);
-    }
-}
-
-void Backend::add_landmark(const Landmark &landmark) {
-    graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-        landmark.projection, landmark_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol,
-        K_);
-    if (!initial_estimate_.exists(landmark.cam_pose_symbol)) {
-        throw std::runtime_error(
-            "landmark.cam_pose_symbol must already exist in Backend.initial_estimate_ before "
-            "add_landmark is called.");
-    }
-    gtsam::Point3 p_world_lmk_estimate =
-        initial_estimate_.at<gtsam::Pose3>(landmark.cam_pose_symbol) * landmark.p_cam_lmk_guess;
-    initial_estimate_.insert_or_assign(landmark.lmk_factor_symbol, p_world_lmk_estimate);
-
-    std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> lmk_obs =
-        get_obs_for_lmk(landmark.lmk_factor_symbol);
-    if (lmk_obs.first.size() >= 2) {
-        try {
-            // Attempt triangulation using DLT (or the GTSAM provided method)
-            p_world_lmk_estimate = gtsam::triangulatePoint3(
-                lmk_obs.first, K_,
-                gtsam::Point2Vector(lmk_obs.second.begin(), lmk_obs.second.end()));
-
-            // Optional: perform an explicit cheirality check
-            bool valid = true;
-            for (const auto &pose : lmk_obs.first) {
-                // Transform point to the camera coordinate system.
-                // transformTo() converts a world point to the camera frame.
-                gtsam::Point3 p_cam = pose.transformTo(p_world_lmk_estimate);
-                if (p_cam.z() <= 0) {  // Check that the depth is positive
-                    valid = false;
-                    break;
-                }
-            }
-
-            if (valid) {
-                initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate);
-            } else {
-                std::cerr << "Triangulated landmark failed cheirality check; keeping initial guess."
-                          << std::endl;
-            }
-        } catch (const gtsam::TriangulationCheiralityException &e) {
-            // Handle the exception gracefully by logging and retaining the previous estimate.
-            std::cerr << "Triangulation Cheirality Exception: " << e.what()
-                      << ". Keeping initial landmark estimate." << std::endl;
-        }
-    }
-}
-
-void Backend::solve_graph() {
-    gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_);
-    result_ = optimizer.optimize();
-}
-
-void Backend::solve_graph(const int num_steps,
-                          std::optional<Backend::graph_step_debug_func> inter_debug_func) {
-    gtsam::LevenbergMarquardtParams params;
-    params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
-    params.maxIterations = 1;          // We'll manually step it
-    gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, params);
-
-    double prev_error = optimizer.error();
-    for (int i = 0; i < num_steps; i++) {
-        optimizer.iterate();
-        double curr_error = optimizer.error();
-
-        if (inter_debug_func) {
-            (*inter_debug_func)(optimizer.values(), i);
-        }
-        if (std::abs(prev_error - curr_error) < 1e-6) {
-            std::cout << "Converged at iteration " << i << "\n";
-            break;
-        }
-    }
-    result_ = optimizer.values();
-}
-
 void StructureFromMotion::graph_values(const gtsam::Values &values,
                                        const std::string &window_name) {
     std::vector<Eigen::Isometry3d> final_poses;
     std::vector<Eigen::Vector3d> final_lmks;
-    for (size_t i = 0; i < img_keypoints_and_descriptors_.size(); i++) {
+    for (size_t i = 0; i < feature_manager_->get_num_images_added(); i++) {
         final_poses.emplace_back(
             values.at<gtsam::Pose3>(gtsam::Symbol(get_backend().pose_symbol_char, i)).matrix());
     }
-    for (int i = 0; i < static_cast<int>(get_matches().size()); i++) {
-        // NOTE: this j for lmk_symbol is wrong.
-        for (int j = 0; j < static_cast<int>(get_matches()[i].size()); j++) {
-            gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j);
-            if (values.exists(lmk_symbol)) {
-                final_lmks.emplace_back(values.at<gtsam::Point3>(lmk_symbol));
-            } else {
-                std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl;
-            }
+    for (const gtsam::Symbol &lmk_symbol : feature_manager_->get_added_symbols()) {
+        if (!values.exists(lmk_symbol)) {
+            std::cout << "WTF " << lmk_symbol << std::endl;
         }
+        final_lmks.emplace_back(values.at<gtsam::Point3>(lmk_symbol));
     }
     geometry::viz_scene(final_poses, final_lmks, true, true, window_name);
 }
diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh
index ada233108..a4e3850d2 100644
--- a/experimental/learn_descriptors/structure_from_motion.hh
+++ b/experimental/learn_descriptors/structure_from_motion.hh
@@ -2,181 +2,23 @@
 
 #include <functional>
 #include <map>
+#include <memory>
+#include <optional>
 #include <set>
 #include <unordered_map>
 #include <utility>
 
-#include "Eigen/Core"
+#include "Eigen/Dense"
 #include "common/geometry/camera.hh"
+#include "experimental/learn_descriptors/backend.hh"
 #include "experimental/learn_descriptors/feature_manager.hh"
-#include "gtsam/geometry/Point3.h"
+#include "experimental/learn_descriptors/frontend.hh"
 #include "gtsam/geometry/Pose3.h"
 #include "gtsam/inference/Symbol.h"
-#include "gtsam/linear/NoiseModel.h"
-#include "gtsam/navigation/GPSFactor.h"
-#include "gtsam/nonlinear/NonlinearFactorGraph.h"
 #include "gtsam/nonlinear/Values.h"
 #include "opencv2/opencv.hpp"
 
-// namespace std {
-// template <>
-// struct hash<cv::KeyPoint> {
-//     size_t operator()(const cv::KeyPoint &kp) const {
-//         size_t h1 = hash<float>()(kp.pt.x);
-//         size_t h2 = hash<float>()(kp.pt.y);
-//         size_t h3 = hash<float>()(kp.size);
-//         size_t h4 = hash<float>()(kp.angle);
-//         size_t h5 = hash<float>()(kp.response);
-//         size_t h6 = hash<int>()(kp.octave);
-//         size_t h7 = hash<int>()(kp.class_id);
-
-//         return (h1 ^ (h2 << 1)) ^ (h3 << 2) ^ (h4 << 3) ^ (h5 << 4) ^ (h6 << 5) ^ (h7 << 6);
-//     }
-// };
-// }  // namespace std
-// namespace cv {
-// inline bool operator==(const cv::KeyPoint &kp1, const cv::KeyPoint &kp2) {
-//     return kp1.pt == kp2.pt && kp1.size == kp2.size && kp1.angle == kp2.angle &&
-//            kp1.response == kp2.response && kp1.octave == kp2.octave && kp1.class_id ==
-//            kp2.class_id;
-// }
-// }  // namespace cv
 namespace robot::experimental::learn_descriptors {
-class Frontend {
-   public:
-    enum class ExtractorType { SIFT, ORB };
-    enum class MatcherType { BRUTE_FORCE, KNN, FLANN };
-
-    Frontend(){};
-    Frontend(ExtractorType frontend_extractor, MatcherType frontend_matcher);
-    ~Frontend(){};
-
-    const ExtractorType &get_extractor_type() const { return extractor_type_; };
-    const MatcherType &get_matcher_type() const { return matcher_type_; };
-
-    std::pair<std::vector<cv::KeyPoint>, cv::Mat> get_keypoints_and_descriptors(
-        const cv::Mat &img) const;
-    std::vector<cv::DMatch> get_matches(const cv::Mat &descriptors1,
-                                        const cv::Mat &descriptors2) const;
-    static void threshold_matches(std::vector<cv::DMatch> &matches, float dist_threshhold);
-    static void enforce_bijective_matches(std::vector<cv::DMatch> &matches);
-    static void draw_keypoints(const cv::Mat &img, std::vector<cv::KeyPoint> keypoints,
-                               cv::Mat img_keypoints_out) {
-        cv::drawKeypoints(img, keypoints, img_keypoints_out, cv::Scalar::all(-1),
-                          cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS);
-    }
-    static void draw_matches(const cv::Mat &img1, std::vector<cv::KeyPoint> keypoints1,
-                             const cv::Mat &img2, std::vector<cv::KeyPoint> keypoints2,
-                             std::vector<cv::DMatch> matches, cv::Mat img_matches_out) {
-        cv::drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches_out);
-    }
-
-   private:
-    bool get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                           std::vector<cv::DMatch> &matches_out) const;
-    bool get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                         std::vector<cv::DMatch> &matches_out) const;
-    bool get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                           std::vector<cv::DMatch> &matches_out) const;
-    ExtractorType extractor_type_;
-    MatcherType matcher_type_;
-
-    cv::Ptr<cv::Feature2D> feature_extractor_;
-    cv::Ptr<cv::DescriptorMatcher> descriptor_matcher_;
-};
-class Backend {
-   public:
-    static constexpr char pose_symbol_char = 'x';
-    static constexpr char pose_rot_symbol_char = 'r';
-    static constexpr char pose_translation_symbol_char = 't';
-    static constexpr char pose_bearing_symbol_char = 'b';
-    static constexpr char landmark_symbol_char = 'l';
-    static constexpr char camera_symbol_char = 'k';
-
-    struct Landmark {
-        Landmark(const gtsam::Symbol &lmk_factor_symbol, const gtsam::Symbol &cam_pose_symbol,
-                 const gtsam::Point2 &projection, const gtsam::Cal3_S2 K,
-                 float initial_depth_guess = 5.0)
-            : lmk_factor_symbol(lmk_factor_symbol),
-              cam_pose_symbol(cam_pose_symbol),
-              projection(projection),
-              p_cam_lmk_guess(robot::geometry::deproject(robot::geometry::get_intrinsic_matrix(K),
-                                                         projection, initial_depth_guess)){};
-        const gtsam::Symbol lmk_factor_symbol;
-        const gtsam::Symbol cam_pose_symbol;
-        const gtsam::Point2 projection;
-        const gtsam::Point3 p_cam_lmk_guess;
-    };
-
-    Backend();
-    Backend(gtsam::Cal3_S2 K);
-    ~Backend(){};
-
-    template <typename T>
-    void add_prior_factor(const gtsam::Symbol &symbol, const T &value,
-                          const gtsam::SharedNoiseModel &model);
-
-    template <typename T>
-    void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2,
-                            const T &value, const gtsam::SharedNoiseModel &model);
-
-    void add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &p_world_gps,
-                        const gtsam::SharedNoiseModel &model,
-                        const gtsam::Rot3 &R_world_cam = gtsam::Rot3::Identity());
-
-    std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> get_obs_for_lmk(
-        const gtsam::Symbol &lmk_symbol);
-    void add_landmarks(const std::vector<Landmark> &landmarks);
-    void add_landmark(const Landmark &landmark);
-
-    void solve_graph();
-    typedef int epoch;
-    using graph_step_debug_func = std::function<void(const gtsam::Values &, const epoch)>;
-    void solve_graph(const int num_steps,
-                     std::optional<graph_step_debug_func> inter_debug_func = std::nullopt);
-
-    const gtsam::Values &get_current_initial_values() const { return initial_estimate_; };
-    const gtsam::Values &get_result() const { return result_; };
-    const gtsam::Cal3_S2 &get_K() const { return *K_; };
-
-    const gtsam::SharedNoiseModel get_lmk_noise() { return landmark_noise_; };
-    const gtsam::SharedNoiseModel get_pose_noise() { return pose_noise_; };
-    const gtsam::SharedNoiseModel get_translation_noise() { return translation_noise_; };
-    const gtsam::SharedNoiseModel get_gps_noise() { return gps_noise_; };
-
-   private:
-    gtsam::Cal3_S2::shared_ptr K_;
-
-    gtsam::Values initial_estimate_;
-    gtsam::Values result_;
-    gtsam::NonlinearFactorGraph graph_;
-
-    gtsam::noiseModel::Isotropic::shared_ptr landmark_noise_ =
-        gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
-    gtsam::noiseModel::Diagonal::shared_ptr pose_noise_ =
-        gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01));
-    gtsam::noiseModel::Isotropic::shared_ptr translation_noise_ =
-        gtsam::noiseModel::Isotropic::Sigma(2, 0.1);
-    gtsam::noiseModel::Isotropic::shared_ptr gps_noise_ =
-        gtsam::noiseModel::Isotropic::Sigma(3, 2.);
-};
-
-template <>
-void Backend::add_prior_factor<gtsam::Pose3>(const gtsam::Symbol &, const gtsam::Pose3 &,
-                                             const gtsam::SharedNoiseModel &);
-
-template <>
-void Backend::add_prior_factor<gtsam::Point3>(const gtsam::Symbol &, const gtsam::Point3 &,
-                                              const gtsam::SharedNoiseModel &);
-
-template <>
-void Backend::add_between_factor<gtsam::Pose3>(const gtsam::Symbol &, const gtsam::Symbol &,
-                                               const gtsam::Pose3 &,
-                                               const gtsam::SharedNoiseModel &);
-template <>
-void Backend::add_between_factor<gtsam::Rot3>(const gtsam::Symbol &, const gtsam::Symbol &,
-                                              const gtsam::Rot3 &, const gtsam::SharedNoiseModel &);
-
 class StructureFromMotion {
    public:
     static const Eigen::Isometry3d T_symlake_boat_cam;
@@ -210,7 +52,7 @@ class StructureFromMotion {
     const std::vector<std::vector<cv::DMatch>> get_matches() { return matches_; };
 
    private:
-    FeatureManager feature_manager_;
+    std::shared_ptr<FeatureManager> feature_manager_;
 
     gtsam::Pose3 initial_pose_;
     std::vector<std::pair<std::vector<cv::KeyPoint>, cv::Mat>> img_keypoints_and_descriptors_;
diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc
index c856abd71..3da914853 100644
--- a/experimental/learn_descriptors/structure_from_motion_test.cc
+++ b/experimental/learn_descriptors/structure_from_motion_test.cc
@@ -18,120 +18,7 @@ class GtsamTestHelper {
 };
 
 namespace robot::experimental::learn_descriptors {
-TEST(SFM_TEST, frontend_pipeline_sweep) {
-    const size_t width = 640;
-    const size_t height = 480;
-
-    const size_t pixel_shift_x = 20;
-    const size_t PIXEL_COMP_TOL = 20;
-
-    cv::Mat image_1 = cv::Mat::zeros(height, width, CV_8UC3);
-    cv::Mat image_2;
-
-    cv::Mat translation_mat = (cv::Mat_<double>(2, 3) << 1, 0, pixel_shift_x, 0, 1, 0);
-    cv::Point rect_points[4] = {{150, 200}, {350, 200}, {350, 300}, {150, 300}};
-    float com_x = 0.0f, com_y = 0.0f;
-    for (const cv::Point &rect_point : rect_points) {
-        com_x += rect_point.x;
-        com_y += rect_point.y;
-    }
-    com_x *= 0.25;
-    com_y *= 0.25;
-    cv::Point rotation_center(com_x, com_y);
-    cv::Mat rotation_matrix = cv::getRotationMatrix2D(rotation_center, 45, 1.0);
-
-    const size_t line_spacing = 100;
-    for (size_t i = 0; i <= width / line_spacing; i++) {
-        size_t x = i * line_spacing + (width % line_spacing) / 2;
-        size_t b = i * 255.0 / (width / line_spacing);
-        size_t g = 255.0 - i * 255.0 / (width / line_spacing);
-        cv::line(image_1, cv::Point(x, 0), cv::Point(x, height - 1), cv::Scalar(b, g, 0), 2);
-    }
-    for (size_t i = 0; i <= height / line_spacing; i++) {
-        size_t y = i * line_spacing + (height % line_spacing) / 2;
-        size_t b = i * 255.0 / (width / line_spacing);
-        size_t g = 255.0 - i * 255.0 / (width / line_spacing);
-        cv::line(image_1, cv::Point(0, y), cv::Point(width - 1, y), cv::Scalar(b, g, 0), 2);
-    }
-
-    cv::warpAffine(image_1, image_1, rotation_matrix, image_1.size());
-    cv::warpAffine(image_1, image_2, translation_mat, image_1.size());
-
-    // cv::Mat img_test_disp;
-    // cv::hconcat(image_1, image_2, img_test_disp);
-    // cv::imshow("Test", img_test_disp);
-    // cv::waitKey(1000);
-
-    Frontend::ExtractorType extractor_types[2] = {Frontend::ExtractorType::SIFT,
-                                                  Frontend::ExtractorType::ORB};
-    Frontend::MatcherType matcher_types[3] = {Frontend::MatcherType::BRUTE_FORCE,
-                                              Frontend::MatcherType::FLANN,
-                                              Frontend::MatcherType::KNN};
-
-    Frontend frontend;
-    std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_descriptors_pair_1;
-    std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_descriptors_pair_2;
-    std::vector<cv::DMatch> matches;
-    cv::Mat img_keypoints_out_1(height, width, CV_8UC3),
-        img_keypoints_out_2(height, width, CV_8UC3), img_matches_out(height, 2 * width, CV_8UC3);
-    // cv::Mat img_display_test;
-    for (Frontend::ExtractorType extractor_type : extractor_types) {
-        for (Frontend::MatcherType matcher_type : matcher_types) {
-            // printf("started frontend combination: (%d, %d)\n", static_cast<int>(extractor_type),
-            //        static_cast<int>(matcher_type));
-            try {
-                frontend = Frontend(extractor_type, matcher_type);
-            } catch (const std::invalid_argument &e) {
-                assert(std::string(e.what()) == "FLANN can not be used with ORB.");  // very jank...
-                continue;
-            }
-            keypoints_descriptors_pair_1 = frontend.get_keypoints_and_descriptors(image_1);
-            keypoints_descriptors_pair_2 = frontend.get_keypoints_and_descriptors(image_2);
-            matches = frontend.get_matches(keypoints_descriptors_pair_1.second,
-                                           keypoints_descriptors_pair_2.second);
-            frontend.draw_keypoints(image_1, keypoints_descriptors_pair_1.first,
-                                    img_keypoints_out_1);
-            frontend.draw_keypoints(image_2, keypoints_descriptors_pair_2.first,
-                                    img_keypoints_out_2);
-            frontend.draw_matches(image_1, keypoints_descriptors_pair_1.first, image_2,
-                                  keypoints_descriptors_pair_2.first, matches, img_matches_out);
-            // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test);
-            // cv::vconcat(img_display_test, img_matches_out, img_display_test);
-            // std::stringstream text;
-            // text << "Extractor " << static_cast<int>(extractor_type) << ", matcher "
-            //      << static_cast<int>(matcher_type);
-            // cv::putText(img_display_test, text.str(), cv::Point(20, height - 50),
-            //             cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2, cv::LINE_AA);
-            // cv::imshow("Keypoints and Matches Output.", img_display_test);
-            // std::cout << "Press spacebar to pause." << std::endl;
-            // while (cv::waitKey(1000) == 32) {
-            // }
-            // printf("completed frontend combination: (%d, %d)\n",
-            // static_cast<int>(extractor_type),
-            //        static_cast<int>(matcher_type));
-            if (extractor_type != Frontend::ExtractorType::ORB) {  // don't check ORB for now
-                for (const cv::DMatch match : matches) {
-                    EXPECT_NEAR(keypoints_descriptors_pair_1.first[match.queryIdx].pt.x -
-                                    keypoints_descriptors_pair_2.first[match.trainIdx].pt.x,
-                                pixel_shift_x, pixel_shift_x + PIXEL_COMP_TOL);
-                    EXPECT_NEAR(keypoints_descriptors_pair_2.first[match.trainIdx].pt.y -
-                                    keypoints_descriptors_pair_1.first[match.queryIdx].pt.y,
-                                0, PIXEL_COMP_TOL);
-                }
-            }
-        }
-    }
-}
-
-// TEST(SFM_TEST, boat_and_cam_frames) {
-//     // world is boat frame. +x is boat "forward", +y is the right side, +z is "down" into the
-//     water/hull Eigen::Isometry3d T_boat_cam = StructureFromMotion::T_symlake_boat_cam;
-//     T_boat_cam.translation() = Eigen::Vector3d::Ones();
-//     geometry::viz_scene(std::vector<Eigen::Isometry3d>{T_boat_cam},
-//     std::vector<Eigen::Vector3d>());
-// }
-
-TEST(SFM_TEST, sfm_snippet_small) {
+TEST(StructureFromMotiontest, sfm_snippet_small) {
     const std::vector<int> indices{120, 130};  // 0-199
     DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
     const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
@@ -163,36 +50,12 @@ TEST(SFM_TEST, sfm_snippet_small) {
         Eigen::Isometry3d T_world_boat = T_earth_world.inverse() * T_earth_boat;
         Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt);
 
-        // T_world_cam.linear() = T_world_camera0.linear().matrix();
-
         sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix()));
     }
-    // for (const cv::Mat &image : images) {
-    //     sfm.add_image(image);
-    // }
 
     const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values();
-    std::vector<Eigen::Isometry3d> poses_world;
-    for (size_t i = 0; i < indices.size(); i++) {
-        gtsam::Pose3 pose =
-            initial_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i));
-        poses_world.emplace_back(pose.matrix());
-    }
-    std::vector<Eigen::Vector3d> points_world;
-    for (int i = 0; i < static_cast<int>(sfm.get_matches().size()); i++) {
-        // NOTE: this j for lmk_symbol is wrong.
-        for (int j = 0; j < static_cast<int>(sfm.get_matches()[i].size()); j++) {
-            gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j);
-            if (initial_values.exists(lmk_symbol)) {
-                points_world.emplace_back(initial_values.at<gtsam::Point3>(lmk_symbol));
-            } else {
-                std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl;
-            }
-        }
-    }
-    // std::cout << poses_world.front() << std::endl;
 
-    geometry::viz_scene(poses_world, points_world, true, true);
+    sfm.graph_values(initial_values, "initial_values");
 
     std::cout << "Solving for structure!" << std::endl;
 
@@ -201,28 +64,10 @@ TEST(SFM_TEST, sfm_snippet_small) {
     std::cout << "Solution complete." << std::endl;
 
     const gtsam::Values result_values = sfm.get_structure_result();
-    std::vector<Eigen::Isometry3d> final_poses;
-    std::vector<Eigen::Vector3d> final_lmks;
-    for (size_t i = 0; i < indices.size(); i++) {
-        final_poses.emplace_back(
-            result_values.at<gtsam::Pose3>(gtsam::Symbol(sfm.get_backend().pose_symbol_char, i))
-                .matrix());
-    }
-    for (int i = 0; i < static_cast<int>(sfm.get_matches().size()); i++) {
-        // NOTE: this j for lmk_symbol is wrong.
-        for (int j = 0; j < static_cast<int>(sfm.get_matches()[i].size()); j++) {
-            gtsam::Symbol lmk_symbol = gtsam::Symbol(Backend::landmark_symbol_char, j);
-            if (initial_values.exists(lmk_symbol)) {
-                final_lmks.emplace_back(initial_values.at<gtsam::Point3>(lmk_symbol));
-            } else {
-                std::cout << "lmk symbol doesn't exist in initial_values!" << std::endl;
-            }
-        }
-    }
-    geometry::viz_scene(final_poses, final_lmks, true, true);
+    sfm.graph_values(result_values, "result_values");
 }
 
-TEST(SFM_TEST, sfm_building) {
+TEST(StructureFromMotiontest, sfm_building) {
     // indices 0-199
     const std::vector<int> indices = []() {
         std::vector<int> tmp;
@@ -284,7 +129,7 @@ TEST(SFM_TEST, sfm_building) {
     sfm.graph_values(result_values, "optimized values");
 }
 
-TEST(SFM_TEST, random_test) {
+TEST(StructureFromMotiontest, random_test) {
     std::cout << DataParser::T_boat_gps.matrix() << std::endl;
     geometry::viz_scene(
         std::vector<Eigen::Isometry3d>{DataParser::T_boat_imu, DataParser::T_boat_gps},

From d586265c7d2d211cc16cde25c4e3f6fb7c7c83f7 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Fri, 18 Apr 2025 13:52:49 -0400
Subject: [PATCH 17/45] WIP. Focus on sfm_mvp.cc first, then fill out
 object-oriented structure for pipeline

---
 experimental/learn_descriptors/BUILD          |  60 +++++-
 experimental/learn_descriptors/backend.hh     |   9 +-
 experimental/learn_descriptors/frame.cc       |   9 +
 experimental/learn_descriptors/frame.hh       |  27 +++
 .../learn_descriptors/frontend_definitions.hh |  42 ++++
 experimental/learn_descriptors/sfm_mvp.cc     | 197 ++++++++++++++++++
 .../structure_from_motion.cc                  |  39 ++--
 .../structure_from_motion.hh                  |   4 +-
 .../structure_from_motion_test.cc             |   1 +
 .../structure_from_motion_types.hh            |  19 ++
 10 files changed, 379 insertions(+), 28 deletions(-)
 create mode 100644 experimental/learn_descriptors/frame.cc
 create mode 100644 experimental/learn_descriptors/frame.hh
 create mode 100644 experimental/learn_descriptors/frontend_definitions.hh
 create mode 100644 experimental/learn_descriptors/sfm_mvp.cc
 create mode 100644 experimental/learn_descriptors/structure_from_motion_types.hh

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index e6701405d..9ebb247ec 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -29,7 +29,7 @@ cc_library(
     "@gtsam//:gtsam",
     "@eigen",
     "//common/geometry:opencv_viz",
-    ":feature_manager",
+    # ":feature_manager",
     ":frontend",
     ":backend"
   ]
@@ -46,7 +46,8 @@ cc_test(
     ":symphony_lake_parser",
     ":structure_from_motion",
     "//common/geometry:opencv_viz",
-    "//common/geometry:translate_types"
+    "//common/geometry:translate_types",
+    ":frame"
   ]
 )
 
@@ -96,6 +97,59 @@ cc_binary(
   ]
 )
 
+cc_library(
+  name = "frontend_definitions",
+  hdrs = ["frontend_definitions.hh"],
+  visibility = ["//visibility:public"],
+  deps = [        
+    "@gtsam//:gtsam",    
+    ":structure_from_motion_types"
+  ]
+)
+
+cc_library(
+  name = "structure_from_motion_types",
+  hdrs = ["structure_from_motion_types.hh"],
+  visibility = ["//visibility:public"],
+  deps = [        
+    "@gtsam//:gtsam",    
+    "@opencv//:opencv"
+  ]
+)
+
+cc_library(
+  name = "frame",
+  hdrs = ["frame.hh"],
+  srcs = ["frame.cc"],
+  visibility = ["//visibility:public"],
+  deps = [             
+    "@opencv//:opencv",
+    ":frontend_definitions",
+    "@gtsam//:gtsam"
+  ]
+)
+
+cc_test(
+  name = "sfm_mvp",
+  srcs = ["sfm_mvp.cc"],
+    copts = [
+    "-Wno-error=unused-parameter",
+    "-Wno-error=unused-variable",
+  ],
+  deps = [
+    ":frontend",
+    "@com_google_googletest//:gtest_main",
+    "@gtsam//:gtsam",
+    "@opencv//:opencv",
+    "@eigen",
+    ":spatial_test_scene_cube",
+    ":symphony_lake_parser",
+    ":frame", 
+    ":structure_from_motion_types"
+  ]
+)
+
+
 cc_library(
   name = "feature_set",
   hdrs = ["feature_set.hh"],
@@ -199,7 +253,7 @@ cc_library(
     "@opencv//:opencv",    
     "@gtsam//:gtsam",
     "//common/geometry:camera",
-    ":feature_manager"
+    # ":feature_manager"
   ]
 )
 
diff --git a/experimental/learn_descriptors/backend.hh b/experimental/learn_descriptors/backend.hh
index fa988b905..968bb26aa 100644
--- a/experimental/learn_descriptors/backend.hh
+++ b/experimental/learn_descriptors/backend.hh
@@ -6,7 +6,7 @@
 #include <vector>
 
 #include "common/geometry/camera.hh"
-#include "experimental/learn_descriptors/feature_manager.hh"
+// #include "experimental/learn_descriptors/feature_manager.hh"
 #include "gtsam/geometry/Cal3_S2.h"
 #include "gtsam/geometry/Point2.h"
 #include "gtsam/geometry/Point3.h"
@@ -42,8 +42,9 @@ class Backend {
         const gtsam::Point3 p_cam_lmk_guess;
     };
 
-    Backend(std::shared_ptr<FeatureManager> feature_manager = nullptr);
-    Backend(std::shared_ptr<FeatureManager> feature_manager, gtsam::Cal3_S2 K);
+    // Backend(std::shared_ptr<FeatureManager> feature_manager = nullptr);
+    // Backend(std::shared_ptr<FeatureManager> feature_manager, gtsam::Cal3_S2 K);
+    Backend(gtsam::Cal3_S2::shared_ptr K) : K_(K){};
     ~Backend(){};
 
     template <typename T>
@@ -79,7 +80,7 @@ class Backend {
     const gtsam::SharedNoiseModel get_gps_noise() { return gps_noise_; };
 
    private:
-    std::shared_ptr<FeatureManager> feature_manager_;
+    // std::shared_ptr<FeatureManager> feature_manager_;
     gtsam::Cal3_S2::shared_ptr K_;
 
     gtsam::Values initial_estimate_;
diff --git a/experimental/learn_descriptors/frame.cc b/experimental/learn_descriptors/frame.cc
new file mode 100644
index 000000000..139c27858
--- /dev/null
+++ b/experimental/learn_descriptors/frame.cc
@@ -0,0 +1,9 @@
+#include "experimental/learn_descriptors/frame.hh"
+
+namespace robot::experimental::learn_descriptors {
+void Frame::add_keypoints(const KeypointsCV& kpts) {
+    kpts_.insert(kpts_.end(), kpts.begin(), kpts.end());
+};
+
+void Frame::assign_descriptors(const cv::Mat& descriptors) { descriptors_ = descriptors; };
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frame.hh b/experimental/learn_descriptors/frame.hh
new file mode 100644
index 000000000..720aa1f59
--- /dev/null
+++ b/experimental/learn_descriptors/frame.hh
@@ -0,0 +1,27 @@
+#pragma once
+#include "experimental/learn_descriptors/frontend_definitions.hh"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/Pose3.h"
+#include "opencv2/opencv.hpp"
+
+namespace robot::experimental::learn_descriptors {
+class Frame {
+   public:
+    Frame(const FrameId& id, const cv::Mat& img_undistorted, const gtsam::Cal3_S2::shared_ptr& K,
+          const gtsam::Pose3& T_wrld_grnd_truth)
+        : id_(id), img_(img_undistorted), K_(K), groundtruth_(T_wrld_grnd_truth) {}
+
+    void add_keypoints(const KeypointsCV& kpts);
+    void assign_descriptors(const cv::Mat& descriptors);
+
+    const KeypointsCV get_keypoints() { return kpts_; };
+    const cv::Mat get_descriptors() { return descriptors_; };
+
+    const FrameId id_;
+    const cv::Mat img_;
+    gtsam::Cal3_S2::shared_ptr K_;
+    KeypointsCV kpts_;
+    cv::Mat descriptors_;
+    gtsam::Pose3 groundtruth_;
+};
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frontend_definitions.hh b/experimental/learn_descriptors/frontend_definitions.hh
new file mode 100644
index 000000000..9883230b3
--- /dev/null
+++ b/experimental/learn_descriptors/frontend_definitions.hh
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <ostream>
+#include <unordered_map>
+#include <vector>
+
+#include "experimental/learn_descriptors/structure_from_motion_types.hh"
+#include "opencv2/opencv.hpp"
+
+namespace std {
+template <>
+struct hash<std::pair<FrameId, KeypointCV>> {
+    size_t operator()(const std::pair<FrameId, KeypointCV>& p) const {
+        size_t h1 = std::hash<FrameId>()(p.first);
+        size_t h2 = std::hash<float>()(p.second.x) ^ (std::hash<float>()(p.second.y) << 1);
+        return h1 ^ (h2 << 1);  // combine hashes
+    }
+};
+}  // namespace std
+
+namespace robot::experimental::learn_descriptors {
+class FeatureTrack {
+   public:
+    std::vector<std::pair<FrameId, KeypointCV>> obs_;
+
+    bool in_ba_graph_;
+
+    FeatureTrack(FrameId frame_id, const KeypointCV& px) { obs_.emplace_back(frame_id, px); }
+
+    void print() const {
+        std::cout << "Feature track with cameras: ";
+        for (size_t i = 0u; i < obs_.size(); i++) {
+            std::cout << " " << obs_[i].first << " ";
+        }
+        std::cout << std::endl;
+    }
+};
+
+using FeatureTracks = std::unordered_map<LandmarkId, FeatureTrack>;
+using LandmarkIdMap = std::unordered_map<std::pair<FrameId, KeypointCV>, LandmarkId>;
+
+}  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc
new file mode 100644
index 000000000..3df433238
--- /dev/null
+++ b/experimental/learn_descriptors/sfm_mvp.cc
@@ -0,0 +1,197 @@
+#include <utility>
+
+#include "Eigen/Dense"
+#include "experimental/learn_descriptors/frame.hh"
+#include "experimental/learn_descriptors/frontend.hh"
+#include "experimental/learn_descriptors/frontend_definitions.hh"
+#include "experimental/learn_descriptors/structure_from_motion_types.hh"
+#include "experimental/learn_descriptors/symphony_lake_parser.hh"
+#include "gtest/gtest.h"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/Point2.h"
+#include "gtsam/geometry/Point3.h"
+#include "gtsam/geometry/Pose3.h"
+#include "gtsam/geometry/Rot3.h"
+#include "gtsam/geometry/triangulation.h"
+#include "gtsam/inference/Symbol.h"
+#include "gtsam/linear/NoiseModel.h"
+#include "gtsam/navigation/GPSFactor.h"
+#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
+#include "gtsam/nonlinear/NonlinearFactorGraph.h"
+#include "gtsam/nonlinear/Values.h"
+#include "gtsam/slam/BetweenFactor.h"
+#include "gtsam/slam/PriorFactor.h"
+#include "gtsam/slam/ProjectionFactor.h"
+#include "opencv2/opencv.hpp"
+
+namespace robot::experimental::learn_descriptors {
+TEST(SFMMvp, sfm_building_manual) {
+    // indices 0-199
+    const std::vector<int> indices = []() {
+        std::vector<int> tmp;
+        for (int i = 0; i < 200; i += 10) {
+            tmp.push_back(i);
+        }
+        return tmp;
+    }();
+    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
+    const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
+    const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
+    const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front());
+
+    // const size_t img_width = img_pt_first.width, img_height = img_pt_first.height;
+    const double fx = img_pt_first.fx, fy = img_pt_first.fy;
+    const double cx = img_pt_first.cx, cy = img_pt_first.cy;
+    gtsam::Cal3_S2::shared_ptr K = boost::make_shared<gtsam::Cal3_S2>(fx, fy, 0, cx, cy);
+    Eigen::Matrix<double, 5, 1> D =
+        (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2,
+         SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3)
+            .finished();
+    cv::Mat K_mat = (cv::Mat_<double>(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1);
+    cv::Mat D_mat = (cv::Mat_<double>(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0));
+
+    // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera
+    Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first);
+    Eigen::Isometry3d T_world_boat0;
+    T_world_boat0.linear() = T_earth_boat0.linear();
+    Eigen::Isometry3d T_world_camera0 = T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first);
+    // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
+    //                         gtsam::Pose3(T_world_camera0.matrix()));
+    Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN);
+
+    gtsam::Values initial_estimate_;
+    gtsam::NonlinearFactorGraph graph_;
+
+    gtsam::noiseModel::Isotropic::shared_ptr landmark_noise =
+        gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
+    gtsam::noiseModel::Diagonal::shared_ptr pose_noise =
+        gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01));
+    gtsam::noiseModel::Isotropic::shared_ptr translation_noise =
+        gtsam::noiseModel::Isotropic::Sigma(2, 0.1);
+    gtsam::noiseModel::Isotropic::shared_ptr gps_noise = gtsam::noiseModel::Isotropic::Sigma(3, 2.);
+
+    std::vector<Frame> frames;
+    FrameId id = 0;
+    for (const int &idx : indices) {
+        const cv::Mat img = survey.loadImageByImageIndex(idx);
+        cv::Mat img_undistorted;
+        cv::undistort(img, img_undistorted, K_mat, D_mat);
+        const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);
+
+        Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt);
+        T_world_boat.translation() -= T_earth_boat0.translation();
+        Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt);
+        gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix());
+
+        graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(gtsam::Symbol('x', id),
+                                                                T_world_cam_gtsam, pose_noise);
+
+        Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix()));
+
+        std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
+            frontend.get_keypoints_and_descriptors(img_undistorted);
+        KeypointsCV kpts;
+        for (auto kpt : kpts_descs.first) {
+            kpts.push_back(kpt.pt);
+        }
+        frame.add_keypoints(kpts);
+        frame.assign_descriptors(kpts_descs.second);
+
+        frames.push_back(frame);
+
+        id++;
+    }
+
+    FeatureTracks feature_tracks;
+    LandmarkIdMap lmk_id_map;
+    LandmarkId lmk_id;
+    for (size_t i = 0; i < indices.size() - 1; i++) {
+        std::vector<cv::DMatch> matches =
+            frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+        frontend.enforce_bijective_matches(matches);
+        for (const cv::DMatch match : matches) {
+            const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
+            const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
+
+            auto key = std::make_pair(i, kpt_cam0);
+            if (lmk_id_map.find(key) != lmk_id_map.end()) {
+                auto id = lmk_id_map.at(key);
+                feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0);
+                feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1);
+            } else {
+                FeatureTrack feature_track(i, kpt_cam0);
+                feature_track.obs_.emplace_back(i + 1, kpt_cam1);
+                feature_tracks.emplace(lmk_id, feature_track);
+                lmk_id_map.emplace(std::make_pair(i, kpt_cam0), lmk_id);
+                lmk_id++;
+            }
+        }
+    }
+
+    std::vector<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>> projection_factors;
+    for (const std::pair<LandmarkId, FeatureTrack> &lmk_feat : feature_tracks) {
+        // std::vector<FrameId> cam_poses;
+        // std::vector<
+        LandmarkId lmk_id = lmk_feat.first;
+        FeatureTrack feature_track = lmk_feat.second;
+        for (const std::pair<FrameId, KeypointCV> &feat_track : feature_track.obs_) {
+            graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+                feat_track.second, landmark_noise, gtsam::Symbol('x', feat_track.first),
+                gtsam::Symbol('l', lmk_id), K);
+        }
+    }
+
+    if (feature_track.obs_.size() >= 2) {
+        try {
+            // Attempt triangulation using DLT (or the GTSAM provided method)
+            gtsam::Point3 p_ = gtsam::triangulatePoint3(
+                lmk_obs.first, K,
+                gtsam::Point2Vector(lmk_obs.second.begin(), lmk_obs.second.end()));
+
+            // Optional: perform an explicit cheirality check
+            bool valid = true;
+            for (const auto &pose : lmk_obs.first) {
+                // Transform point to the camera coordinate system.
+                // transformTo() converts a world point to the camera frame.
+                gtsam::Point3 p_cam = pose.transformTo(p_world_lmk_estimate);
+                if (p_cam.z() <= 0) {  // Check that the depth is positive
+                    valid = false;
+                    break;
+                }
+            }
+
+            if (valid) {
+                initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate);
+            } else {
+                std::cerr << "Triangulated landmark failed cheirality check; keeping "
+                             "initial guess."
+                          << std::endl;
+            }
+        } catch (const gtsam::TriangulationCheiralityException &e) {
+            // Handle the exception gracefully by logging and retaining the previous
+            // estimate.
+            std::cerr << "Triangulation Cheirality Exception: " << e.what()
+                      << ". Keeping initial landmark estimate." << std::endl;
+        }
+    }
+}
+
+// const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values();
+// sfm.graph_values(initial_values, "initial values");
+
+// std::cout << "Solving for structure!" << std::endl;
+
+// Backend::graph_step_debug_func solve_iter_debug_func = [&sfm](const gtsam::Values &vals,
+//                                                               const Backend::epoch iter) {
+//     std::cout << "iteration " << iter << " complete!";
+//     std::string window_name = "Iteration_" + std::to_string(iter);
+//     sfm.graph_values(vals, window_name);
+// };
+// sfm.solve_structure(5, solve_iter_debug_func);
+
+// std::cout << "Solution complete." << std::endl;
+
+// const gtsam::Values result_values = sfm.get_structure_result();
+// sfm.graph_values(result_values, "optimized values");
+}
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index b28d8f009..5db771119 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -35,10 +35,11 @@ const gtsam::Pose3 StructureFromMotion::default_initial_pose(
 StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor,
                                          gtsam::Cal3_S2 K, Eigen::Matrix<double, 5, 1> D,
                                          gtsam::Pose3 initial_pose,
-                                         Frontend::MatcherType frontend_matcher)
-    : feature_manager_(std::make_shared<FeatureManager>()), initial_pose_(initial_pose) {
+                                         Frontend::MatcherType frontend_matcher) {
+    // : feature_manager_(std::make_shared<FeatureManager>()), initial_pose_(initial_pose) {
     frontend_ = Frontend(frontend_extractor, frontend_matcher);
-    backend_ = Backend(feature_manager_, K);
+    backend_ = Backend(K);
+    // backend_ = Backend(feature_manager_, K);
 
     K_ = (cv::Mat_<double>(3, 3) << K.fx(), 0, K.px(), 0, K.fy(), K.py(), 0, 0, 1);
     D_ = (cv::Mat_<double>(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0));
@@ -56,11 +57,11 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
     cv::undistort(img, img_undistorted, K_, D_);
     std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_and_descriptors =
         frontend_.get_keypoints_and_descriptors(img);
-    feature_manager_->append_img_data(keypoints_and_descriptors.first,
-                                      keypoints_and_descriptors.second);
+    // feature_manager_->append_img_data(keypoints_and_descriptors.first,
+    //                                   keypoints_and_descriptors.second);
     // keypoint_to_landmarks_.push_back(std::unordered_map<cv::KeyPoint, Backend::Landmark>());
     // const size_t idx_img_current = get_num_images_added();
-    const size_t idx_img_current = feature_manager_->get_num_images_added() - 1;
+    // const size_t idx_img_current = feature_manager_->get_num_images_added() - 1;
     std::cout << "current index " << idx_img_current << std::endl;
     if (idx_img_current > 0) {
         std::vector<cv::DMatch> matches =
@@ -81,7 +82,7 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
             size_t idx;
             unsigned char chr;
 
-            auto maybe_symbol0 = feature_manager_->get_symbol(idx_img_current - 1, kpt_cam0);
+            // auto maybe_symbol0 = feature_manager_->get_symbol(idx_img_current - 1, kpt_cam0);
             if (maybe_symbol0) {
                 idx = (*maybe_symbol0).index();
                 chr = (*maybe_symbol0).chr();
@@ -91,12 +92,12 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
                 idx = symbol_temp.index();
                 chr = symbol_temp.chr();
                 landmark_count_++;
-                feature_manager_->insert_symbol(idx_img_current - 1, kpt_cam0, symbol_temp);
+                // feature_manager_->insert_symbol(idx_img_current - 1, kpt_cam0, symbol_temp);
             }
             gtsam::Symbol symbol_lmk = gtsam::Symbol(chr, idx);
             std::cout << "value: char: " << chr << " , idx: " << idx << ". " << symbol_lmk
                       << std::endl;
-            feature_manager_->insert_symbol(idx_img_current, kpt_cam1, symbol_lmk);
+            // feature_manager_->insert_symbol(idx_img_current, kpt_cam1, symbol_lmk);
 
             const Backend::Landmark landmark_cam_0(
                 symbol_lmk, sym_T_w_c0,
@@ -136,16 +137,16 @@ void StructureFromMotion::graph_values(const gtsam::Values &values,
                                        const std::string &window_name) {
     std::vector<Eigen::Isometry3d> final_poses;
     std::vector<Eigen::Vector3d> final_lmks;
-    for (size_t i = 0; i < feature_manager_->get_num_images_added(); i++) {
-        final_poses.emplace_back(
-            values.at<gtsam::Pose3>(gtsam::Symbol(get_backend().pose_symbol_char, i)).matrix());
-    }
-    for (const gtsam::Symbol &lmk_symbol : feature_manager_->get_added_symbols()) {
-        if (!values.exists(lmk_symbol)) {
-            std::cout << "WTF " << lmk_symbol << std::endl;
-        }
-        final_lmks.emplace_back(values.at<gtsam::Point3>(lmk_symbol));
-    }
+    // for (size_t i = 0; i < feature_manager_->get_num_images_added(); i++) {
+    //     final_poses.emplace_back(
+    //         values.at<gtsam::Pose3>(gtsam::Symbol(get_backend().pose_symbol_char, i)).matrix());
+    // }
+    // for (const gtsam::Symbol &lmk_symbol : feature_manager_->get_added_symbols()) {
+    //     if (!values.exists(lmk_symbol)) {
+    //         std::cout << "WTF " << lmk_symbol << std::endl;
+    //     }
+    //     final_lmks.emplace_back(values.at<gtsam::Point3>(lmk_symbol));
+    // }
     geometry::viz_scene(final_poses, final_lmks, true, true, window_name);
 }
 
diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh
index a4e3850d2..bed65c15f 100644
--- a/experimental/learn_descriptors/structure_from_motion.hh
+++ b/experimental/learn_descriptors/structure_from_motion.hh
@@ -11,7 +11,7 @@
 #include "Eigen/Dense"
 #include "common/geometry/camera.hh"
 #include "experimental/learn_descriptors/backend.hh"
-#include "experimental/learn_descriptors/feature_manager.hh"
+// #include "experimental/learn_descriptors/feature_manager.hh"
 #include "experimental/learn_descriptors/frontend.hh"
 #include "gtsam/geometry/Pose3.h"
 #include "gtsam/inference/Symbol.h"
@@ -52,7 +52,7 @@ class StructureFromMotion {
     const std::vector<std::vector<cv::DMatch>> get_matches() { return matches_; };
 
    private:
-    std::shared_ptr<FeatureManager> feature_manager_;
+    // std::shared_ptr<FeatureManager> feature_manager_;
 
     gtsam::Pose3 initial_pose_;
     std::vector<std::pair<std::vector<cv::KeyPoint>, cv::Mat>> img_keypoints_and_descriptors_;
diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc
index 3da914853..752daa6a3 100644
--- a/experimental/learn_descriptors/structure_from_motion_test.cc
+++ b/experimental/learn_descriptors/structure_from_motion_test.cc
@@ -7,6 +7,7 @@
 #include "Eigen/Geometry"
 #include "common/geometry/opencv_viz.hh"
 #include "common/geometry/translate_types.hh"
+#include "experimental/learn_descriptors/frame.hh"
 #include "experimental/learn_descriptors/symphony_lake_parser.hh"
 #include "gtest/gtest.h"
 
diff --git a/experimental/learn_descriptors/structure_from_motion_types.hh b/experimental/learn_descriptors/structure_from_motion_types.hh
new file mode 100644
index 000000000..582a3c70a
--- /dev/null
+++ b/experimental/learn_descriptors/structure_from_motion_types.hh
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+#include "gtsam/geometry/Point3.h"
+#include "opencv2/opencv.hpp"
+
+using Timestamp = std::int64_t;
+
+using FrameId = std::uint64_t;  // Frame id is used as the index of gtsam symbol
+                                // (not as a gtsam key).
+using LandmarkId = long;        // -1 for invalid landmarks
+using LandmarkIds = std::vector<LandmarkId>;
+using Landmark = gtsam::Point3;
+using Landmarks = std::vector<Landmark, Eigen::aligned_allocator<Landmark>>;
+
+using KeypointCV = cv::Point2f;
+using KeypointsCV = std::vector<cv::Point2f>;

From a8cc405503c312199cf33e7fe71967ce2a663b42 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Fri, 18 Apr 2025 21:27:00 -0400
Subject: [PATCH 18/45] Debug time

---
 experimental/learn_descriptors/BUILD      |   3 +-
 experimental/learn_descriptors/sfm_mvp.cc | 401 ++++++++++++++--------
 2 files changed, 260 insertions(+), 144 deletions(-)

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index 9ebb247ec..681ebde27 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -145,7 +145,8 @@ cc_test(
     ":spatial_test_scene_cube",
     ":symphony_lake_parser",
     ":frame", 
-    ":structure_from_motion_types"
+    ":structure_from_motion_types",
+    "//common/geometry:opencv_viz"
   ]
 )
 
diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc
index 3df433238..b2686f7b5 100644
--- a/experimental/learn_descriptors/sfm_mvp.cc
+++ b/experimental/learn_descriptors/sfm_mvp.cc
@@ -1,6 +1,8 @@
+#include <memory>
 #include <utility>
 
 #include "Eigen/Dense"
+#include "common/geometry/opencv_viz.hh"
 #include "experimental/learn_descriptors/frame.hh"
 #include "experimental/learn_descriptors/frontend.hh"
 #include "experimental/learn_descriptors/frontend_definitions.hh"
@@ -25,164 +27,278 @@
 #include "opencv2/opencv.hpp"
 
 namespace robot::experimental::learn_descriptors {
-TEST(SFMMvp, sfm_building_manual) {
-    // indices 0-199
-    const std::vector<int> indices = []() {
-        std::vector<int> tmp;
-        for (int i = 0; i < 200; i += 10) {
-            tmp.push_back(i);
-        }
-        return tmp;
-    }();
-    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
-    const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
-    const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
-    const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front());
-
-    // const size_t img_width = img_pt_first.width, img_height = img_pt_first.height;
-    const double fx = img_pt_first.fx, fy = img_pt_first.fy;
-    const double cx = img_pt_first.cx, cy = img_pt_first.cy;
-    gtsam::Cal3_S2::shared_ptr K = boost::make_shared<gtsam::Cal3_S2>(fx, fy, 0, cx, cy);
-    Eigen::Matrix<double, 5, 1> D =
-        (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2,
-         SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3)
-            .finished();
-    cv::Mat K_mat = (cv::Mat_<double>(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1);
-    cv::Mat D_mat = (cv::Mat_<double>(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0));
-
-    // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera
-    Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first);
-    Eigen::Isometry3d T_world_boat0;
-    T_world_boat0.linear() = T_earth_boat0.linear();
-    Eigen::Isometry3d T_world_camera0 = T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first);
-    // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
-    //                         gtsam::Pose3(T_world_camera0.matrix()));
-    Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN);
-
-    gtsam::Values initial_estimate_;
-    gtsam::NonlinearFactorGraph graph_;
-
-    gtsam::noiseModel::Isotropic::shared_ptr landmark_noise =
-        gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
-    gtsam::noiseModel::Diagonal::shared_ptr pose_noise =
-        gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01));
-    gtsam::noiseModel::Isotropic::shared_ptr translation_noise =
-        gtsam::noiseModel::Isotropic::Sigma(2, 0.1);
-    gtsam::noiseModel::Isotropic::shared_ptr gps_noise = gtsam::noiseModel::Isotropic::Sigma(3, 2.);
-
-    std::vector<Frame> frames;
-    FrameId id = 0;
-    for (const int &idx : indices) {
-        const cv::Mat img = survey.loadImageByImageIndex(idx);
-        cv::Mat img_undistorted;
-        cv::undistort(img, img_undistorted, K_mat, D_mat);
-        const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);
-
-        Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt);
-        T_world_boat.translation() -= T_earth_boat0.translation();
-        Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt);
-        gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix());
-
-        graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(gtsam::Symbol('x', id),
-                                                                T_world_cam_gtsam, pose_noise);
-
-        Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix()));
-
-        std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
-            frontend.get_keypoints_and_descriptors(img_undistorted);
-        KeypointsCV kpts;
-        for (auto kpt : kpts_descs.first) {
-            kpts.push_back(kpt.pt);
+class SFMMvpHelper {
+ public:
+  static bool attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
+                                  const std::vector<gtsam::Point2> &cam_obs,
+                                  gtsam::Cal3_S2::shared_ptr K,
+                                  gtsam::Point3 &out_p_world_landmark) {
+    bool valid = true;
+    if (cam_poses.size() >= 2) {
+      try {
+        // Attempt triangulation using DLT (or the GTSAM provided method)
+        gtsam::Point3 p_world_lmk = gtsam::triangulatePoint3(
+            cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end()));
+
+        out_p_world_landmark = p_world_lmk;
+
+        // Optional: perform an explicit cheirality check
+        for (const auto &pose : cam_poses) {
+          // Transform point to the camera coordinate system.
+          // transformTo() converts a world point to the camera frame.
+          gtsam::Point3 p_cam_lmk = pose.transformTo(p_world_lmk);
+          if (p_cam_lmk.z() <= 0) {  // Check that the depth is positive
+            valid = false;
+            break;
+          }
         }
-        frame.add_keypoints(kpts);
-        frame.assign_descriptors(kpts_descs.second);
+      } catch (const gtsam::TriangulationCheiralityException &e) {
+        // Handle the exception gracefully by logging and retaining the previous
+        // estimate.
+        std::cerr << "Triangulation Cheirality Exception: " << e.what()
+                  << ". Keeping initial landmark estimate." << std::endl;
+      }
+    } else {
+      valid = false;
+    }
+    return valid;
+  }
 
-        frames.push_back(frame);
+  static void graph_values(
+      const gtsam::Values &values, const std::string &window_name,
+      const std::vector<gtsam::Symbol> &symbols_pose,
+      const std::vector<gtsam::Symbol> &symbols_landmarks) {
+    std::vector<Eigen::Isometry3d> final_poses;
+    std::vector<Eigen::Vector3d> final_lmks;
+    for (const gtsam::Symbol &symbol_pose : symbols_pose) {
+      final_poses.emplace_back(values.at<gtsam::Pose3>(symbol_pose).matrix());
+    }
+    for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) {
+      if (!values.exists(symbol_lmk)) {
+        std::cout << "WTF " << symbol_lmk << std::endl;
+      }
+      final_lmks.emplace_back(values.at<gtsam::Point3>(symbol_lmk));
+    }
+    geometry::viz_scene(final_poses, final_lmks, true, true, window_name);
+  }
+};
 
-        id++;
+TEST(SFMMvp, sfm_building_manual) {
+  // indices 0-199
+  const std::vector<int> indices = []() {
+    std::vector<int> tmp;
+    for (int i = 0; i < 200; i += 10) {
+      tmp.push_back(i);
     }
+    return tmp;
+  }();
+  DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
+  const symphony_lake_dataset::SurveyVector &survey_vector =
+      data_parser.get_surveys();
+  const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
+  const symphony_lake_dataset::ImagePoint img_pt_first =
+      survey.getImagePoint(indices.front());
 
-    FeatureTracks feature_tracks;
-    LandmarkIdMap lmk_id_map;
-    LandmarkId lmk_id;
-    for (size_t i = 0; i < indices.size() - 1; i++) {
-        std::vector<cv::DMatch> matches =
-            frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
-        frontend.enforce_bijective_matches(matches);
-        for (const cv::DMatch match : matches) {
-            const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
-            const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
-
-            auto key = std::make_pair(i, kpt_cam0);
-            if (lmk_id_map.find(key) != lmk_id_map.end()) {
-                auto id = lmk_id_map.at(key);
-                feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0);
-                feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1);
-            } else {
-                FeatureTrack feature_track(i, kpt_cam0);
-                feature_track.obs_.emplace_back(i + 1, kpt_cam1);
-                feature_tracks.emplace(lmk_id, feature_track);
-                lmk_id_map.emplace(std::make_pair(i, kpt_cam0), lmk_id);
-                lmk_id++;
-            }
-        }
+  // const size_t img_width = img_pt_first.width, img_height =
+  // img_pt_first.height;
+  const double fx = img_pt_first.fx, fy = img_pt_first.fy;
+  const double cx = img_pt_first.cx, cy = img_pt_first.cy;
+  gtsam::Cal3_S2::shared_ptr K =
+      boost::make_shared<gtsam::Cal3_S2>(fx, fy, 0, cx, cy);
+  Eigen::Matrix<double, 5, 1> D =
+      (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1,
+       SymphonyLakeCamParams::k2, SymphonyLakeCamParams::p1,
+       SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3)
+          .finished();
+  cv::Mat K_mat = (cv::Mat_<double>(3, 3) << K->fx(), 0, K->px(), 0, K->fy(),
+                   K->py(), 0, 0, 1);
+  cv::Mat D_mat =
+      (cv::Mat_<double>(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0));
+
+  // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0
+  // * T_boat_camera
+  Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first);
+  Eigen::Isometry3d T_world_boat0;
+  T_world_boat0.linear() = T_earth_boat0.linear();
+  Eigen::Isometry3d T_world_camera0 =
+      T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first);
+  // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
+  //                         gtsam::Pose3(T_world_camera0.matrix()));
+  Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN);
+
+  gtsam::Values initial_estimate_;
+  gtsam::NonlinearFactorGraph graph_;
+
+  gtsam::noiseModel::Isotropic::shared_ptr landmark_noise =
+      gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
+  gtsam::noiseModel::Diagonal::shared_ptr pose_noise =
+      gtsam::noiseModel::Diagonal::Sigmas(
+          gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01));
+  gtsam::noiseModel::Isotropic::shared_ptr translation_noise =
+      gtsam::noiseModel::Isotropic::Sigma(2, 0.1);
+  gtsam::noiseModel::Isotropic::shared_ptr gps_noise =
+      gtsam::noiseModel::Isotropic::Sigma(3, 2.);
+
+  // populate frames and cam_poses
+  std::vector<Frame> frames;
+  FrameId id = 0;
+  std::unordered_map<FrameId, gtsam::Pose3> cam_pose;
+  for (const int &idx : indices) {
+    const cv::Mat img = survey.loadImageByImageIndex(idx);
+    cv::Mat img_undistorted;
+    cv::undistort(img, img_undistorted, K_mat, D_mat);
+    const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);
+
+    Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt);
+    T_world_boat.translation() -= T_earth_boat0.translation();
+    Eigen::Isometry3d T_world_cam =
+        T_world_boat * DataParser::get_T_boat_camera(img_pt);
+    gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix());
+    cam_pose.emplace(id, T_world_cam_gtsam);
+
+    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
+        gtsam::Symbol('x', id), T_world_cam_gtsam, pose_noise);
+
+    Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix()));
+
+    std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
+        frontend.get_keypoints_and_descriptors(img_undistorted);
+    KeypointsCV kpts;
+    for (const cv::KeyPoint &kpt : kpts_descs.first) {
+      kpts.push_back(kpt.pt);
     }
+    frame.add_keypoints(kpts);
+    frame.assign_descriptors(kpts_descs.second);
 
-    std::vector<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>> projection_factors;
-    for (const std::pair<LandmarkId, FeatureTrack> &lmk_feat : feature_tracks) {
-        // std::vector<FrameId> cam_poses;
-        // std::vector<
-        LandmarkId lmk_id = lmk_feat.first;
-        FeatureTrack feature_track = lmk_feat.second;
-        for (const std::pair<FrameId, KeypointCV> &feat_track : feature_track.obs_) {
-            graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-                feat_track.second, landmark_noise, gtsam::Symbol('x', feat_track.first),
-                gtsam::Symbol('l', lmk_id), K);
-        }
+    frames.push_back(frame);
+
+    id++;
+  }
+
+  // populate feature_tracks and lmk_id_map
+  FeatureTracks feature_tracks;
+  LandmarkIdMap lmk_id_map;
+  LandmarkId lmk_id;
+  for (size_t i = 0; i < indices.size() - 1; i++) {
+    std::vector<cv::DMatch> matches = frontend.get_matches(
+        frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+    frontend.enforce_bijective_matches(matches);
+    for (const cv::DMatch match : matches) {
+      const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
+      const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
+
+      auto key = std::make_pair(i, kpt_cam0);
+      if (lmk_id_map.find(key) != lmk_id_map.end()) {
+        auto id = lmk_id_map.at(key);
+        feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0);
+        feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1);
+      } else {
+        FeatureTrack feature_track(i, kpt_cam0);
+        feature_track.obs_.emplace_back(i + 1, kpt_cam1);
+        feature_tracks.emplace(lmk_id, feature_track);
+        lmk_id_map.emplace(std::make_pair(i, kpt_cam0), lmk_id);
+        lmk_id++;
+      }
     }
+  }
 
-    if (feature_track.obs_.size() >= 2) {
-        try {
-            // Attempt triangulation using DLT (or the GTSAM provided method)
-            gtsam::Point3 p_ = gtsam::triangulatePoint3(
-                lmk_obs.first, K,
-                gtsam::Point2Vector(lmk_obs.second.begin(), lmk_obs.second.end()));
-
-            // Optional: perform an explicit cheirality check
-            bool valid = true;
-            for (const auto &pose : lmk_obs.first) {
-                // Transform point to the camera coordinate system.
-                // transformTo() converts a world point to the camera frame.
-                gtsam::Point3 p_cam = pose.transformTo(p_world_lmk_estimate);
-                if (p_cam.z() <= 0) {  // Check that the depth is positive
-                    valid = false;
-                    break;
-                }
-            }
-
-            if (valid) {
-                initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate);
-            } else {
-                std::cerr << "Triangulated landmark failed cheirality check; keeping "
-                             "initial guess."
-                          << std::endl;
-            }
-        } catch (const gtsam::TriangulationCheiralityException &e) {
-            // Handle the exception gracefully by logging and retaining the previous
-            // estimate.
-            std::cerr << "Triangulation Cheirality Exception: " << e.what()
-                      << ". Keeping initial landmark estimate." << std::endl;
-        }
+  // populate graph
+  std::vector<gtsam::Symbol> symbols_pose;
+  std::vector<gtsam::Symbol> symbols_landmarks;
+  for (const std::pair<LandmarkId, FeatureTrack> lmk_feat : feature_tracks) {
+    std::vector<gtsam::Pose3> feat_cam_poses;
+    std::vector<gtsam::Point2> feat_kpts;
+    LandmarkId lmk_id = lmk_feat.first;
+    FeatureTrack feature_track = lmk_feat.second;
+    const gtsam::Symbol symbol_lmk('l', lmk_id);
+    symbols_landmarks.push_back(symbol_lmk);
+    for (const std::pair<FrameId, KeypointCV> &feat_track :
+         feature_track.obs_) {
+      initial_estimate_.insert_or_assign(symbol_lmk, gtsam::Point3(0, 0, 0));
+      graph_.emplace_shared<
+          gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+          gtsam::Point2(feat_track.second.x, feat_track.second.y),
+          landmark_noise, gtsam::Symbol('x', feat_track.first), symbol_lmk, K);
+
+      feat_cam_poses.push_back(cam_pose[feat_track.first]);
+      feat_kpts.push_back(
+          gtsam::Point2(feat_track.second.x, feat_track.second.y));
+    }
+    gtsam::Point3 p_wrld_kpt;
+    bool triangulate_success = SFMMvpHelper::attempt_triangulate(
+        feat_cam_poses, feat_kpts, K, p_wrld_kpt);
+    if (triangulate_success) {
+      initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt);
     }
+  }
+  graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
+      gtsam::Symbol('x', 0), cam_pose.at(0), pose_noise);
+  initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), cam_pose.at(0));
+  symbols_pose.push_back(gtsam::Symbol('x', 0));
+  for (const std::pair<FrameId, gtsam::Pose3> cam_id_pose : cam_pose) {
+    if (cam_id_pose.first == 0) {
+      continue;
+    }
+    const gtsam::Symbol key_cam('x', cam_id_pose.first);
+    symbols_pose.push_back(key_cam);
+    gtsam::Point3 p_wrld_cam = cam_id_pose.second.translation();
+    graph_.emplace_shared<gtsam::GPSFactor>(key_cam, p_wrld_cam, gps_noise);
+    initial_estimate_.insert_or_assign(key_cam,
+                                       gtsam::Pose3(gtsam::Rot3(), p_wrld_cam));
+  }
+
+  gtsam::LevenbergMarquardtParams params;
+  params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
+  params.maxIterations = 1;          // We'll manually step it
+  gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_,
+                                               params);
+
+  double prev_error = optimizer.error();
+  const int num_steps = 5;
+  const bool viz_itr = true;
+  typedef int epoch;
+  std::function<void(const gtsam::Values &, const epoch,
+                     const std::vector<gtsam::Symbol> &,
+                     const std::vector<gtsam::Symbol> &)>
+      graph_itr_debug_func =
+          [&](const gtsam::Values &vals, const epoch iter,
+              const std::vector<gtsam::Symbol> &symbols_pose,
+              const std::vector<gtsam::Symbol> &symbols_landmarks) {
+            std::cout << "iteration " << iter << " complete!";
+            std::string window_name = "Iteration_" + std::to_string(iter);
+            SFMMvpHelper::graph_values(vals, window_name, symbols_pose,
+                                       symbols_landmarks);
+          };
+
+  SFMMvpHelper::graph_values(initial_estimate_, "Initial Graph", symbols_pose,
+                             symbols_landmarks);
+  for (int i = 0; i < num_steps; i++) {
+    optimizer.iterate();
+    double curr_error = optimizer.error();
+
+    if (viz_itr) {
+      graph_itr_debug_func(optimizer.values(), i, symbols_pose,
+                           symbols_landmarks);
+    }
+    if (std::abs(prev_error - curr_error) < 1e-6) {
+      std::cout << "Converged at iteration " << i << "\n";
+      break;
+    }
+  }
+  const gtsam::Values result = optimizer.values();
 }
 
-// const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values();
+// const gtsam::Values initial_values =
+// sfm.get_backend().get_current_initial_values();
 // sfm.graph_values(initial_values, "initial values");
 
 // std::cout << "Solving for structure!" << std::endl;
 
-// Backend::graph_step_debug_func solve_iter_debug_func = [&sfm](const gtsam::Values &vals,
-//                                                               const Backend::epoch iter) {
+// Backend::graph_step_debug_func solve_iter_debug_func = [&sfm](const
+// gtsam::Values &vals,
+//                                                               const
+//                                                               Backend::epoch
+//                                                               iter) {
 //     std::cout << "iteration " << iter << " complete!";
 //     std::string window_name = "Iteration_" + std::to_string(iter);
 //     sfm.graph_values(vals, window_name);
@@ -193,5 +309,4 @@ TEST(SFMMvp, sfm_building_manual) {
 
 // const gtsam::Values result_values = sfm.get_structure_result();
 // sfm.graph_values(result_values, "optimized values");
-}
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file

From 981a0f2967221c08636d7656680fda208f1050a2 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Tue, 22 Apr 2025 12:45:18 -0400
Subject: [PATCH 19/45] SFM MVP working. Tried out an iterative + global
 optimization

---
 experimental/learn_descriptors/BUILD          |  11 +
 .../learn_descriptors/frontend_definitions.hh |   3 +-
 .../get_colmap_groundtruth.cc                 |  82 ++
 experimental/learn_descriptors/sfm_mvp.cc     | 845 ++++++++++++------
 4 files changed, 680 insertions(+), 261 deletions(-)
 create mode 100644 experimental/learn_descriptors/get_colmap_groundtruth.cc

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index 681ebde27..666668ad0 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -270,3 +270,14 @@ cc_test(
     ":feature_manager"
   ]
 )
+
+cc_binary(
+  name = "get_colmap_groundtruth",
+  srcs = ["get_colmap_groundtruth.cc"],
+  copts = [
+    "-Wno-error=unused-parameter",
+  ],
+  deps = [
+    "symphony_lake_parser"
+  ]
+)
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frontend_definitions.hh b/experimental/learn_descriptors/frontend_definitions.hh
index 9883230b3..188b91e53 100644
--- a/experimental/learn_descriptors/frontend_definitions.hh
+++ b/experimental/learn_descriptors/frontend_definitions.hh
@@ -37,6 +37,7 @@ class FeatureTrack {
 };
 
 using FeatureTracks = std::unordered_map<LandmarkId, FeatureTrack>;
-using LandmarkIdMap = std::unordered_map<std::pair<FrameId, KeypointCV>, LandmarkId>;
+using FrameLandmarkIdMap = std::unordered_map<std::pair<FrameId, KeypointCV>, LandmarkId>;
+using LandmarkFrameIdMap = std::unordered_map<LandmarkId, std::pair<FrameId, KeypointCV>>;
 
 }  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/get_colmap_groundtruth.cc b/experimental/learn_descriptors/get_colmap_groundtruth.cc
new file mode 100644
index 000000000..6afc9fff1
--- /dev/null
+++ b/experimental/learn_descriptors/get_colmap_groundtruth.cc
@@ -0,0 +1,82 @@
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "experimental/learn_descriptors/symphony_lake_parser.hh"
+
+namespace ld = robot::experimental::learn_descriptors;
+
+struct ImagePose {
+    std::string filename;
+    double X;
+    double Y;
+    double Z;
+    double heading;
+};
+
+void writeCsv(const std::string& path, const std::vector<ImagePose>& rows) {
+    std::ofstream out(path);
+    if (!out) {
+        throw std::runtime_error("Failed to open " + path);
+    }
+
+    out << "filename,X,Y,Z,heading\n";
+
+    out << std::fixed << std::setprecision(6);
+    for (const auto& r : rows) {
+        out << r.filename << ',' << r.X << ',' << r.Y << ',' << r.Z << ',' << r.heading << '\n';
+    }
+}
+
+void writeTxt(const std::string& path, const std::vector<ImagePose>& rows) {
+    std::ofstream out(path);
+    if (!out) {
+        throw std::runtime_error("Failed to open " + path);
+    }
+
+    out << std::fixed << std::setprecision(6);
+    for (const auto& r : rows) {
+        out << r.filename << ' ' << r.X << ' ' << r.Y << ' ' << r.Z << ' ' << r.heading << '\n';
+    }
+}
+
+int main() {
+    ld::DataParser data_parser("/home/pizzaroll04/Documents/datasets",
+                               std::vector<std::string>{"symphony_lake_building_test"});
+
+    const symphony_lake_dataset::SurveyVector& survey_vector = data_parser.get_surveys();
+    const symphony_lake_dataset::Survey& survey = survey_vector.get(0);
+
+    std::vector<ImagePose> poses;
+    for (int i = 0; i < 718; i++) {
+        symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(i);
+        const Eigen::Isometry3d T_wrld_cam =
+            data_parser.get_T_world_boat(img_pt) * data_parser.get_T_boat_camera(img_pt);
+        ImagePose img_pose;
+
+        std::ostringstream oss;
+        oss << std::setw(4) << std::setfill('0') << i;
+        img_pose.filename = oss.str() + ".jpg";
+        img_pose.X = T_wrld_cam.translation().x();
+        img_pose.Y = T_wrld_cam.translation().y();
+        img_pose.Z = T_wrld_cam.translation().z();
+        Eigen::Matrix3d R = T_wrld_cam.linear();
+        img_pose.heading = std::atan2(R(1, 0), R(0, 0));
+
+        poses.push_back(img_pose);
+    }
+
+    try {
+        writeTxt("/home/pizzaroll04/Documents/datasets/symphony_lake_building_test/ref_images.txt",
+                 poses);
+        std::cout << "Wrote " << poses.size() << " rows to ref_images.csv\n";
+    } catch (const std::exception& e) {
+        std::cerr << "Error: " << e.what() << "\n";
+        return 1;
+    }
+
+    return 0;
+}
diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc
index b2686f7b5..f27c1ede7 100644
--- a/experimental/learn_descriptors/sfm_mvp.cc
+++ b/experimental/learn_descriptors/sfm_mvp.cc
@@ -26,287 +26,612 @@
 #include "gtsam/slam/ProjectionFactor.h"
 #include "opencv2/opencv.hpp"
 
+namespace std {
+template <>
+struct hash<gtsam::Symbol> {
+    size_t operator()(const gtsam::Symbol &s) const { return hash<uint64_t>()(s.key()); }
+};
+}  // namespace std
+
 namespace robot::experimental::learn_descriptors {
 class SFMMvpHelper {
- public:
-  static bool attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
-                                  const std::vector<gtsam::Point2> &cam_obs,
-                                  gtsam::Cal3_S2::shared_ptr K,
-                                  gtsam::Point3 &out_p_world_landmark) {
-    bool valid = true;
-    if (cam_poses.size() >= 2) {
-      try {
-        // Attempt triangulation using DLT (or the GTSAM provided method)
-        gtsam::Point3 p_world_lmk = gtsam::triangulatePoint3(
-            cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end()));
-
-        out_p_world_landmark = p_world_lmk;
-
-        // Optional: perform an explicit cheirality check
-        for (const auto &pose : cam_poses) {
-          // Transform point to the camera coordinate system.
-          // transformTo() converts a world point to the camera frame.
-          gtsam::Point3 p_cam_lmk = pose.transformTo(p_world_lmk);
-          if (p_cam_lmk.z() <= 0) {  // Check that the depth is positive
+   public:
+    // struct SymbolHasher {
+    //     size_t operator()(const gtsam::Symbol &s) const { return std::hash<uint64_t>()(s.key());
+    //     }
+    // };
+
+    static bool attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
+                                    const std::vector<gtsam::Point2> &cam_obs,
+                                    gtsam::Cal3_S2::shared_ptr K,
+                                    gtsam::Point3 &out_p_world_landmark) {
+        bool valid = true;
+        if (cam_poses.size() >= 2) {
+            try {
+                // Attempt triangulation using DLT (or the GTSAM provided method)
+                gtsam::Point3 p_world_lmk = gtsam::triangulatePoint3(
+                    cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end()));
+
+                out_p_world_landmark = p_world_lmk;
+
+                // Optional: perform an explicit cheirality check
+                for (const auto &pose : cam_poses) {
+                    // Transform point to the camera coordinate system.
+                    // transformTo() converts a world point to the camera frame.
+                    gtsam::Point3 p_cam_lmk = pose.transformTo(p_world_lmk);
+                    if (p_cam_lmk.z() <= 0) {  // Check that the depth is positive
+                        valid = false;
+                        break;
+                    }
+                }
+            } catch (const gtsam::TriangulationCheiralityException &e) {
+                // Handle the exception gracefully by logging and retaining the previous
+                // estimate.
+                std::cerr << "Triangulation Cheirality Exception: " << e.what()
+                          << ". Keeping initial landmark estimate." << std::endl;
+            }
+        } else {
             valid = false;
-            break;
-          }
         }
-      } catch (const gtsam::TriangulationCheiralityException &e) {
-        // Handle the exception gracefully by logging and retaining the previous
-        // estimate.
-        std::cerr << "Triangulation Cheirality Exception: " << e.what()
-                  << ". Keeping initial landmark estimate." << std::endl;
-      }
-    } else {
-      valid = false;
+        return valid;
     }
-    return valid;
-  }
-
-  static void graph_values(
-      const gtsam::Values &values, const std::string &window_name,
-      const std::vector<gtsam::Symbol> &symbols_pose,
-      const std::vector<gtsam::Symbol> &symbols_landmarks) {
-    std::vector<Eigen::Isometry3d> final_poses;
-    std::vector<Eigen::Vector3d> final_lmks;
-    for (const gtsam::Symbol &symbol_pose : symbols_pose) {
-      final_poses.emplace_back(values.at<gtsam::Pose3>(symbol_pose).matrix());
+
+    static void graph_values(const gtsam::Values &values, const std::string &window_name,
+                             const std::vector<gtsam::Symbol> &symbols_pose,
+                             const std::vector<gtsam::Symbol> &symbols_landmarks) {
+        std::vector<Eigen::Isometry3d> final_poses;
+        std::vector<Eigen::Vector3d> final_lmks;
+        for (const gtsam::Symbol &symbol_pose : symbols_pose) {
+            final_poses.emplace_back(values.at<gtsam::Pose3>(symbol_pose).matrix());
+        }
+        for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) {
+            if (!values.exists(symbol_lmk)) {
+                std::cout << "WTF " << symbol_lmk << std::endl;
+            }
+            final_lmks.emplace_back(values.at<gtsam::Point3>(symbol_lmk));
+        }
+        geometry::viz_scene(final_poses, final_lmks, true, true, window_name);
     }
-    for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) {
-      if (!values.exists(symbol_lmk)) {
-        std::cout << "WTF " << symbol_lmk << std::endl;
-      }
-      final_lmks.emplace_back(values.at<gtsam::Point3>(symbol_lmk));
+
+    static gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph,
+                                        const gtsam::Values &values,
+                                        const std::vector<gtsam::Symbol> &symbols_pose,
+                                        const std::vector<gtsam::Symbol> &symbols_landmarks,
+                                        bool viz_itr = false) {
+        gtsam::LevenbergMarquardtParams params;
+        params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
+        params.maxIterations = 1;
+        gtsam::LevenbergMarquardtOptimizer optimizer(graph, values, params);
+
+        double prev_error = optimizer.error();
+        const int num_steps = 5;
+        typedef int epoch;
+        std::function<void(const gtsam::Values &, const epoch, const std::vector<gtsam::Symbol> &,
+                           const std::vector<gtsam::Symbol> &)>
+            graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter,
+                                       const std::vector<gtsam::Symbol> &symbols_pose,
+                                       const std::vector<gtsam::Symbol> &symbols_landmarks) {
+                std::cout << "iteration " << iter << " complete!";
+                std::string window_name = "Iteration_" + std::to_string(iter);
+                SFMMvpHelper::graph_values(vals, window_name, symbols_pose, symbols_landmarks);
+            };
+
+        for (int i = 0; i < num_steps; i++) {
+            optimizer.iterate();
+            double curr_error = optimizer.error();
+
+            if (viz_itr) {
+                graph_itr_debug_func(optimizer.values(), i, symbols_pose, symbols_landmarks);
+            }
+            if (std::abs(prev_error - curr_error) < 1e-6) {
+                std::cout << "Converged at iteration " << i << "\n";
+                break;
+            }
+        }
+        return optimizer.values();
     }
-    geometry::viz_scene(final_poses, final_lmks, true, true, window_name);
-  }
-};
 
-TEST(SFMMvp, sfm_building_manual) {
-  // indices 0-199
-  const std::vector<int> indices = []() {
-    std::vector<int> tmp;
-    for (int i = 0; i < 200; i += 10) {
-      tmp.push_back(i);
+    static gtsam::Pose3 averagePoses(const std::vector<gtsam::Pose3> &poses, int maxIter = 10) {
+        if (poses.empty()) throw std::runtime_error("Empty pose vector");
+
+        gtsam::Pose3 mean = poses[0];
+
+        for (int iter = 0; iter < maxIter; ++iter) {
+            gtsam::Vector6 total = gtsam::Vector6::Zero();
+
+            for (const auto &pose : poses) {
+                gtsam::Pose3 delta = mean.between(pose);
+                total += gtsam::Pose3::Logmap(delta);
+            }
+
+            total /= poses.size();
+            mean = mean.compose(gtsam::Pose3::Expmap(total));
+        }
+
+        return mean;
     }
-    return tmp;
-  }();
-  DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
-  const symphony_lake_dataset::SurveyVector &survey_vector =
-      data_parser.get_surveys();
-  const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
-  const symphony_lake_dataset::ImagePoint img_pt_first =
-      survey.getImagePoint(indices.front());
-
-  // const size_t img_width = img_pt_first.width, img_height =
-  // img_pt_first.height;
-  const double fx = img_pt_first.fx, fy = img_pt_first.fy;
-  const double cx = img_pt_first.cx, cy = img_pt_first.cy;
-  gtsam::Cal3_S2::shared_ptr K =
-      boost::make_shared<gtsam::Cal3_S2>(fx, fy, 0, cx, cy);
-  Eigen::Matrix<double, 5, 1> D =
-      (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1,
-       SymphonyLakeCamParams::k2, SymphonyLakeCamParams::p1,
-       SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3)
-          .finished();
-  cv::Mat K_mat = (cv::Mat_<double>(3, 3) << K->fx(), 0, K->px(), 0, K->fy(),
-                   K->py(), 0, 0, 1);
-  cv::Mat D_mat =
-      (cv::Mat_<double>(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0));
-
-  // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0
-  // * T_boat_camera
-  Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first);
-  Eigen::Isometry3d T_world_boat0;
-  T_world_boat0.linear() = T_earth_boat0.linear();
-  Eigen::Isometry3d T_world_camera0 =
-      T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first);
-  // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
-  //                         gtsam::Pose3(T_world_camera0.matrix()));
-  Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN);
-
-  gtsam::Values initial_estimate_;
-  gtsam::NonlinearFactorGraph graph_;
-
-  gtsam::noiseModel::Isotropic::shared_ptr landmark_noise =
-      gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
-  gtsam::noiseModel::Diagonal::shared_ptr pose_noise =
-      gtsam::noiseModel::Diagonal::Sigmas(
-          gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01));
-  gtsam::noiseModel::Isotropic::shared_ptr translation_noise =
-      gtsam::noiseModel::Isotropic::Sigma(2, 0.1);
-  gtsam::noiseModel::Isotropic::shared_ptr gps_noise =
-      gtsam::noiseModel::Isotropic::Sigma(3, 2.);
-
-  // populate frames and cam_poses
-  std::vector<Frame> frames;
-  FrameId id = 0;
-  std::unordered_map<FrameId, gtsam::Pose3> cam_pose;
-  for (const int &idx : indices) {
-    const cv::Mat img = survey.loadImageByImageIndex(idx);
-    cv::Mat img_undistorted;
-    cv::undistort(img, img_undistorted, K_mat, D_mat);
-    const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);
-
-    Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt);
-    T_world_boat.translation() -= T_earth_boat0.translation();
-    Eigen::Isometry3d T_world_cam =
-        T_world_boat * DataParser::get_T_boat_camera(img_pt);
-    gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix());
-    cam_pose.emplace(id, T_world_cam_gtsam);
-
-    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
-        gtsam::Symbol('x', id), T_world_cam_gtsam, pose_noise);
-
-    Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix()));
-
-    std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
-        frontend.get_keypoints_and_descriptors(img_undistorted);
-    KeypointsCV kpts;
-    for (const cv::KeyPoint &kpt : kpts_descs.first) {
-      kpts.push_back(kpt.pt);
+
+    static gtsam::Point3 averagePoints(const std::vector<gtsam::Point3> &points) {
+        if (points.empty()) throw std::runtime_error("Empty point vector");
+        gtsam::Point3 sum(0, 0, 0);
+        for (const auto &pt : points) sum += pt;
+        return sum / points.size();
     }
-    frame.add_keypoints(kpts);
-    frame.assign_descriptors(kpts_descs.second);
-
-    frames.push_back(frame);
-
-    id++;
-  }
-
-  // populate feature_tracks and lmk_id_map
-  FeatureTracks feature_tracks;
-  LandmarkIdMap lmk_id_map;
-  LandmarkId lmk_id;
-  for (size_t i = 0; i < indices.size() - 1; i++) {
-    std::vector<cv::DMatch> matches = frontend.get_matches(
-        frames[i].get_descriptors(), frames[i + 1].get_descriptors());
-    frontend.enforce_bijective_matches(matches);
-    for (const cv::DMatch match : matches) {
-      const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
-      const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
-
-      auto key = std::make_pair(i, kpt_cam0);
-      if (lmk_id_map.find(key) != lmk_id_map.end()) {
-        auto id = lmk_id_map.at(key);
-        feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0);
-        feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1);
-      } else {
-        FeatureTrack feature_track(i, kpt_cam0);
-        feature_track.obs_.emplace_back(i + 1, kpt_cam1);
-        feature_tracks.emplace(lmk_id, feature_track);
-        lmk_id_map.emplace(std::make_pair(i, kpt_cam0), lmk_id);
-        lmk_id++;
-      }
+};
+
+TEST(SFMMvp, sfm_building_manual_global) {
+    const std::vector<int> indices{60, 80, 100, 120, 140};
+    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
+    const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
+    const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
+    const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front());
+
+    // const size_t img_width = img_pt_first.width, img_height =
+    // img_pt_first.height;
+    const double fx = img_pt_first.fx, fy = img_pt_first.fy;
+    const double cx = img_pt_first.cx, cy = img_pt_first.cy;
+    gtsam::Cal3_S2::shared_ptr K = boost::make_shared<gtsam::Cal3_S2>(fx, fy, 0, cx, cy);
+    Eigen::Matrix<double, 5, 1> D =
+        (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2,
+         SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3)
+            .finished();
+    cv::Mat K_mat = (cv::Mat_<double>(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1);
+    cv::Mat D_mat = (cv::Mat_<double>(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0));
+
+    // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0
+    // * T_boat_camera
+    Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first);
+    Eigen::Isometry3d T_world_boat0;
+    T_world_boat0.linear() = T_earth_boat0.linear();
+    Eigen::Isometry3d T_world_camera0 = T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first);
+    // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
+    //                         gtsam::Pose3(T_world_camera0.matrix()));
+    Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN);
+
+    gtsam::Values initial_estimate_;
+    gtsam::NonlinearFactorGraph graph_;
+
+    gtsam::noiseModel::Isotropic::shared_ptr landmark_noise =
+        gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
+    gtsam::Vector6 prior_sigmas;
+    prior_sigmas << 0.04, 0.04, 0.09, 0.01, 0.01, 0.01;
+    gtsam::Vector3 gps_sigmas;
+    gps_sigmas << 2.1, 2.1, 0.1;
+    gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise =
+        gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas);
+    gtsam::noiseModel::Diagonal::shared_ptr gps_noise =
+        gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas);
+
+    // populate frames and cam_poses
+    std::vector<Frame> frames;
+    FrameId id = 0;
+    std::unordered_map<FrameId, gtsam::Pose3> cam_pose;
+    std::vector<Eigen::Isometry3d> cam_isometries;
+    for (const int &idx : indices) {
+        const cv::Mat img = survey.loadImageByImageIndex(idx);
+        cv::Mat img_undistorted;
+        cv::undistort(img, img_undistorted, K_mat, D_mat);
+        const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);
+
+        Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt);
+        T_world_boat.translation() -= T_earth_boat0.translation();
+        Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt);
+        gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix());
+        cam_pose.emplace(id, T_world_cam_gtsam);
+        cam_isometries.push_back(T_world_cam);
+
+        // graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(gtsam::Symbol('x', id),
+        //                                                         T_world_cam_gtsam, pose_noise);
+
+        Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix()));
+
+        std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
+            frontend.get_keypoints_and_descriptors(img_undistorted);
+        KeypointsCV kpts;
+        for (const cv::KeyPoint &kpt : kpts_descs.first) {
+            kpts.push_back(kpt.pt);
+        }
+        frame.add_keypoints(kpts);
+        frame.assign_descriptors(kpts_descs.second);
+
+        frames.push_back(frame);
+
+        id++;
     }
-  }
-
-  // populate graph
-  std::vector<gtsam::Symbol> symbols_pose;
-  std::vector<gtsam::Symbol> symbols_landmarks;
-  for (const std::pair<LandmarkId, FeatureTrack> lmk_feat : feature_tracks) {
-    std::vector<gtsam::Pose3> feat_cam_poses;
-    std::vector<gtsam::Point2> feat_kpts;
-    LandmarkId lmk_id = lmk_feat.first;
-    FeatureTrack feature_track = lmk_feat.second;
-    const gtsam::Symbol symbol_lmk('l', lmk_id);
-    symbols_landmarks.push_back(symbol_lmk);
-    for (const std::pair<FrameId, KeypointCV> &feat_track :
-         feature_track.obs_) {
-      initial_estimate_.insert_or_assign(symbol_lmk, gtsam::Point3(0, 0, 0));
-      graph_.emplace_shared<
-          gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-          gtsam::Point2(feat_track.second.x, feat_track.second.y),
-          landmark_noise, gtsam::Symbol('x', feat_track.first), symbol_lmk, K);
-
-      feat_cam_poses.push_back(cam_pose[feat_track.first]);
-      feat_kpts.push_back(
-          gtsam::Point2(feat_track.second.x, feat_track.second.y));
+
+    geometry::viz_scene(cam_isometries, std::vector<Eigen::Vector3d>());
+
+    // populate feature_tracks and lmk_id_map
+    FeatureTracks feature_tracks;
+    FrameLandmarkIdMap lmk_id_map;
+    LandmarkId lmk_id = 0;
+    for (size_t i = 0; i < indices.size() - 1; i++) {
+        std::vector<cv::DMatch> matches =
+            frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+        frontend.enforce_bijective_matches(matches);
+        for (const cv::DMatch match : matches) {
+            const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
+            const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
+
+            auto key = std::make_pair(i, kpt_cam0);
+            if (lmk_id_map.find(key) != lmk_id_map.end()) {
+                auto id = lmk_id_map.at(key);
+                feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0);
+                feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1);
+            } else {
+                FeatureTrack feature_track(i, kpt_cam0);
+                feature_track.obs_.emplace_back(i + 1, kpt_cam1);
+                feature_tracks.emplace(lmk_id, feature_track);
+                lmk_id_map.emplace(std::make_pair(i, kpt_cam0), lmk_id);
+                lmk_id++;
+            }
+        }
     }
-    gtsam::Point3 p_wrld_kpt;
-    bool triangulate_success = SFMMvpHelper::attempt_triangulate(
-        feat_cam_poses, feat_kpts, K, p_wrld_kpt);
-    if (triangulate_success) {
-      initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt);
+
+    // populate graph
+    std::vector<gtsam::Symbol> symbols_pose;
+    std::vector<gtsam::Symbol> symbols_landmarks;
+    for (const std::pair<LandmarkId, FeatureTrack> lmk_feat : feature_tracks) {
+        std::vector<gtsam::Pose3> feat_cam_poses;
+        std::vector<gtsam::Point2> feat_kpts;
+        LandmarkId lmk_id = lmk_feat.first;
+        FeatureTrack feature_track = lmk_feat.second;
+        const gtsam::Symbol symbol_lmk('l', lmk_id);
+        symbols_landmarks.push_back(symbol_lmk);
+        for (const std::pair<FrameId, KeypointCV> &feat_track : feature_track.obs_) {
+            initial_estimate_.insert_or_assign(symbol_lmk, gtsam::Point3(0, 0, 0));
+            graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+                gtsam::Point2(feat_track.second.x, feat_track.second.y), landmark_noise,
+                gtsam::Symbol('x', feat_track.first), symbol_lmk, K);
+
+            feat_cam_poses.push_back(cam_pose[feat_track.first]);
+            feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y));
+        }
+        gtsam::Point3 p_wrld_kpt;
+        bool triangulate_success =
+            SFMMvpHelper::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt);
+        if (triangulate_success) {
+            initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt);
+        }
     }
-  }
-  graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
-      gtsam::Symbol('x', 0), cam_pose.at(0), pose_noise);
-  initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), cam_pose.at(0));
-  symbols_pose.push_back(gtsam::Symbol('x', 0));
-  for (const std::pair<FrameId, gtsam::Pose3> cam_id_pose : cam_pose) {
-    if (cam_id_pose.first == 0) {
-      continue;
+    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(gtsam::Symbol('x', 0), cam_pose.at(0),
+                                                            prior_pose_noise);
+    initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), cam_pose.at(0));
+    symbols_pose.push_back(gtsam::Symbol('x', 0));
+    for (const std::pair<FrameId, gtsam::Pose3> cam_id_pose : cam_pose) {
+        if (cam_id_pose.first == 0) {
+            continue;
+        }
+        const gtsam::Symbol key_cam('x', cam_id_pose.first);
+        symbols_pose.push_back(key_cam);
+        gtsam::Point3 p_wrld_cam = cam_id_pose.second.translation();
+        graph_.emplace_shared<gtsam::GPSFactor>(key_cam, p_wrld_cam, gps_noise);
+        initial_estimate_.insert_or_assign(key_cam, cam_id_pose.second);
+        // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_wrld_cam));
     }
-    const gtsam::Symbol key_cam('x', cam_id_pose.first);
-    symbols_pose.push_back(key_cam);
-    gtsam::Point3 p_wrld_cam = cam_id_pose.second.translation();
-    graph_.emplace_shared<gtsam::GPSFactor>(key_cam, p_wrld_cam, gps_noise);
-    initial_estimate_.insert_or_assign(key_cam,
-                                       gtsam::Pose3(gtsam::Rot3(), p_wrld_cam));
-  }
-
-  gtsam::LevenbergMarquardtParams params;
-  params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
-  params.maxIterations = 1;          // We'll manually step it
-  gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_,
-                                               params);
-
-  double prev_error = optimizer.error();
-  const int num_steps = 5;
-  const bool viz_itr = true;
-  typedef int epoch;
-  std::function<void(const gtsam::Values &, const epoch,
-                     const std::vector<gtsam::Symbol> &,
-                     const std::vector<gtsam::Symbol> &)>
-      graph_itr_debug_func =
-          [&](const gtsam::Values &vals, const epoch iter,
-              const std::vector<gtsam::Symbol> &symbols_pose,
-              const std::vector<gtsam::Symbol> &symbols_landmarks) {
+
+    // initial_estimate_.print("huh");
+    SFMMvpHelper::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks);
+
+    gtsam::LevenbergMarquardtParams params;
+    params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
+    params.maxIterations = 1;          // We'll manually step it
+    gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, params);
+
+    double prev_error = optimizer.error();
+    const int num_steps = 5;
+    const bool viz_itr = true;
+    typedef int epoch;
+    std::function<void(const gtsam::Values &, const epoch, const std::vector<gtsam::Symbol> &,
+                       const std::vector<gtsam::Symbol> &)>
+        graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter,
+                                   const std::vector<gtsam::Symbol> &symbols_pose,
+                                   const std::vector<gtsam::Symbol> &symbols_landmarks) {
             std::cout << "iteration " << iter << " complete!";
             std::string window_name = "Iteration_" + std::to_string(iter);
-            SFMMvpHelper::graph_values(vals, window_name, symbols_pose,
-                                       symbols_landmarks);
-          };
-
-  SFMMvpHelper::graph_values(initial_estimate_, "Initial Graph", symbols_pose,
-                             symbols_landmarks);
-  for (int i = 0; i < num_steps; i++) {
-    optimizer.iterate();
-    double curr_error = optimizer.error();
-
-    if (viz_itr) {
-      graph_itr_debug_func(optimizer.values(), i, symbols_pose,
-                           symbols_landmarks);
-    }
-    if (std::abs(prev_error - curr_error) < 1e-6) {
-      std::cout << "Converged at iteration " << i << "\n";
-      break;
+            SFMMvpHelper::graph_values(vals, window_name, symbols_pose, symbols_landmarks);
+        };
+
+    SFMMvpHelper::graph_values(initial_estimate_, "Initial Graph", symbols_pose, symbols_landmarks);
+    for (int i = 0; i < num_steps; i++) {
+        optimizer.iterate();
+        double curr_error = optimizer.error();
+
+        if (viz_itr) {
+            graph_itr_debug_func(optimizer.values(), i, symbols_pose, symbols_landmarks);
+        }
+        if (std::abs(prev_error - curr_error) < 1e-6) {
+            std::cout << "Converged at iteration " << i << "\n";
+            break;
+        }
     }
-  }
-  const gtsam::Values result = optimizer.values();
+    const gtsam::Values result = optimizer.values();
 }
 
-// const gtsam::Values initial_values =
-// sfm.get_backend().get_current_initial_values();
-// sfm.graph_values(initial_values, "initial values");
+TEST(SFMMvp, sfm_building_manual_incremental) {
+    const std::vector<int> indices{60, 80, 100, 120, 140};
+    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
+    const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
+    const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
+    const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front());
+
+    // const size_t img_width = img_pt_first.width, img_height =
+    // img_pt_first.height;
+    const double fx = img_pt_first.fx, fy = img_pt_first.fy;
+    const double cx = img_pt_first.cx, cy = img_pt_first.cy;
+    gtsam::Cal3_S2::shared_ptr K = boost::make_shared<gtsam::Cal3_S2>(fx, fy, 0, cx, cy);
+    Eigen::Matrix<double, 5, 1> D =
+        (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2,
+         SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3)
+            .finished();
+    cv::Mat K_mat = (cv::Mat_<double>(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1);
+    cv::Mat D_mat = (cv::Mat_<double>(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0));
+
+    // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0
+    // * T_boat_camera
+    Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first);
+    Eigen::Isometry3d T_world_boat0;
+    T_world_boat0.linear() = T_earth_boat0.linear();
+    Eigen::Isometry3d T_world_camera0 = T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first);
+    // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
+    //                         gtsam::Pose3(T_world_camera0.matrix()));
+    Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN);
 
-// std::cout << "Solving for structure!" << std::endl;
+    gtsam::Values initial_estimate_;
+    gtsam::NonlinearFactorGraph graph_;
 
-// Backend::graph_step_debug_func solve_iter_debug_func = [&sfm](const
-// gtsam::Values &vals,
-//                                                               const
-//                                                               Backend::epoch
-//                                                               iter) {
-//     std::cout << "iteration " << iter << " complete!";
-//     std::string window_name = "Iteration_" + std::to_string(iter);
-//     sfm.graph_values(vals, window_name);
-// };
-// sfm.solve_structure(5, solve_iter_debug_func);
+    gtsam::noiseModel::Isotropic::shared_ptr landmark_noise =
+        gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
+    gtsam::Vector6 prior_sigmas;
+    prior_sigmas << 0.04, 0.04, 0.09, 2.1, 2.1, 0.1;
+    gtsam::Vector3 gps_sigmas;
+    gps_sigmas << 2.1, 2.1, 0.1;
+    gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise =
+        gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas);
+    gtsam::noiseModel::Diagonal::shared_ptr gps_noise =
+        gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas);
+
+    // populate frames and cam_poses
+    std::vector<Frame> frames;
+    FrameId id = 0;
+    std::unordered_map<FrameId, gtsam::Pose3> cam_pose;
+    std::vector<Eigen::Isometry3d> cam_isometries;
+    for (const int &idx : indices) {
+        const cv::Mat img = survey.loadImageByImageIndex(idx);
+        cv::Mat img_undistorted;
+        cv::undistort(img, img_undistorted, K_mat, D_mat);
+        const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);
+
+        Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt);
+        T_world_boat.translation() -= T_earth_boat0.translation();
+        Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt);
+        gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix());
+        cam_pose.emplace(id, T_world_cam_gtsam);
+        cam_isometries.push_back(T_world_cam);
+
+        // graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(gtsam::Symbol('x', id),
+        //                                                         T_world_cam_gtsam, pose_noise);
+
+        Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix()));
+
+        std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
+            frontend.get_keypoints_and_descriptors(img_undistorted);
+        KeypointsCV kpts;
+        for (const cv::KeyPoint &kpt : kpts_descs.first) {
+            kpts.push_back(kpt.pt);
+        }
+        frame.add_keypoints(kpts);
+        frame.assign_descriptors(kpts_descs.second);
 
-// std::cout << "Solution complete." << std::endl;
+        frames.push_back(frame);
+
+        id++;
+    }
+
+    geometry::viz_scene(cam_isometries, std::vector<Eigen::Vector3d>());
+
+    // populate feature_tracks and lmk_id_map
+    FeatureTracks feature_tracks;
+    FrameLandmarkIdMap lmk_id_map;
+    LandmarkId lmk_id = 0;
+    for (size_t i = 0; i < indices.size() - 1; i++) {
+        std::vector<cv::DMatch> matches =
+            frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+        frontend.enforce_bijective_matches(matches);
+        for (const cv::DMatch match : matches) {
+            const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
+            const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
+
+            auto key = std::make_pair(frames[i].id_, kpt_cam0);
+            if (lmk_id_map.find(key) != lmk_id_map.end()) {
+                auto id = lmk_id_map.at(key);
+                feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0);
+                feature_tracks.at(id).obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
+            } else {
+                FeatureTrack feature_track(i, kpt_cam0);
+                feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
+                feature_tracks.emplace(lmk_id, feature_track);
+                lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id);
+                lmk_id++;
+            }
+        }
+    }
+
+    // populate graph
+    std::unordered_map<gtsam::Symbol, std::vector<gtsam::Pose3>> symbols_poses_values_iter;
+    std::unordered_map<gtsam::Symbol, std::vector<gtsam::Point3>> symbols_landmarks_values_iter;
+    std::vector<gtsam::Symbol> symbols_pose;
+    std::vector<gtsam::Symbol> symbols_landmarks;
+    for (const std::pair<LandmarkId, FeatureTrack> lmk_feat : feature_tracks) {
+        std::vector<gtsam::Pose3> feat_cam_poses;
+        std::vector<gtsam::Point2> feat_kpts;
+        LandmarkId lmk_id = lmk_feat.first;
+        FeatureTrack feature_track = lmk_feat.second;
+        const gtsam::Symbol symbol_lmk('l', lmk_id);
+        symbols_landmarks.push_back(symbol_lmk);
+        gtsam::Point3 p_wrld_kpt(0, 0, 0);
+        for (const std::pair<FrameId, KeypointCV> &feat_track : feature_track.obs_) {
+            initial_estimate_.insert_or_assign(symbol_lmk, gtsam::Point3(0, 0, 0));
+            symbols_landmarks_values_iter.emplace(
+                symbol_lmk, std::vector<gtsam::Point3>{gtsam::Point3(0, 0, 0)});
+            graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+                gtsam::Point2(feat_track.second.x, feat_track.second.y), landmark_noise,
+                gtsam::Symbol('x', feat_track.first), symbol_lmk, K);
+
+            feat_cam_poses.push_back(cam_pose[feat_track.first]);
+            feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y));
+        }
+        bool triangulate_success =
+            SFMMvpHelper::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt);
+        if (triangulate_success) {
+            initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt);
+        }
+        if (symbols_landmarks_values_iter.find(symbol_lmk) != symbols_landmarks_values_iter.end()) {
+            symbols_landmarks_values_iter[symbol_lmk].push_back(p_wrld_kpt);
+        } else {
+            symbols_landmarks_values_iter.emplace(symbol_lmk,
+                                                  std::vector<gtsam::Point3>{p_wrld_kpt});
+        }
+    }
+    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(gtsam::Symbol('x', 0), cam_pose.at(0),
+                                                            prior_pose_noise);
+    initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), cam_pose.at(0));
+    symbols_pose.push_back(gtsam::Symbol('x', 0));
+    symbols_poses_values_iter.emplace(gtsam::Symbol('x', 0),
+                                      std::vector<gtsam::Pose3>{cam_pose.at(0)});
+    for (const std::pair<FrameId, gtsam::Pose3> cam_id_pose : cam_pose) {
+        if (cam_id_pose.first == 0) {
+            continue;
+        }
+        const gtsam::Symbol key_cam('x', cam_id_pose.first);
+        symbols_pose.push_back(key_cam);
+        gtsam::Point3 p_wrld_cam = cam_id_pose.second.translation();
+        graph_.emplace_shared<gtsam::GPSFactor>(key_cam, p_wrld_cam, gps_noise);
+        initial_estimate_.insert_or_assign(key_cam, cam_id_pose.second);
+        symbols_poses_values_iter.emplace(key_cam, std::vector<gtsam::Pose3>{cam_id_pose.second});
+        // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_wrld_cam));
+    }
+
+    SFMMvpHelper::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks);
+
+    // do local optimizations and add to iter cache
+    // const int window = 2;
+    std::cout << "length of cam_poses: " << cam_pose.size() << std::endl;
+    for (const auto &id_pose : cam_pose) {
+        std::cout << "id: " << id_pose.first << "\tpose: " << id_pose.second << std::endl;
+    }
+    for (size_t i = 0; i < indices.size() - 1; i++) {
+        std::cout << "Local optimization " << i << std::endl;
+        gtsam::Values local_estimate_;
+        gtsam::NonlinearFactorGraph local_graph_;
+
+        std::vector<gtsam::Symbol> poses{gtsam::Symbol('x', i), gtsam::Symbol('x', i + 1)};
+        std::vector<gtsam::Symbol> lmks;
+
+        local_graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
+            gtsam::PriorFactor<gtsam::Pose3>(poses[0], cam_pose.at(i), prior_pose_noise));
+        std::cout << "fuck" << std::endl;
+        local_graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
+            gtsam::PriorFactor<gtsam::Pose3>(poses[1], cam_pose.at(i + 1), prior_pose_noise));
+        std::cout << "mega fuck" << std::endl;
+        local_estimate_.insert_or_assign(poses[0], cam_pose.at(i));
+        local_estimate_.insert_or_assign(poses[1], cam_pose.at(i + 1));
+
+        std::vector<cv::DMatch> matches =
+            frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+        frontend.enforce_bijective_matches(matches);
+        std::vector<gtsam::Pose3> feat_cam_poses{cam_pose.at(i), cam_pose.at(i + 1)};
+
+        std::vector<Eigen::Isometry3d> viz_poses{Eigen::Isometry3d(feat_cam_poses[0].matrix()),
+                                                 Eigen::Isometry3d(feat_cam_poses[1].matrix())};
+        std::vector<Eigen::Vector3d> viz_lmks;
+        for (const cv::DMatch match : matches) {
+            std::vector<gtsam::Point2> feat_kpts;
+            const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
+            const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
+            feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y);
+            feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y);
+
+            auto key = std::make_pair(frames[i].id_, kpt_cam0);
+            if (lmk_id_map.find(key) != lmk_id_map.end()) {
+                auto id = lmk_id_map.at(key);
+                std::cout << "good" << std::endl;
+                // do nothing
+                // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0);
+                // feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1);
+            } else {
+                std::cerr << "this shouldn't happen right?" << std::endl;
+                FeatureTrack feature_track(frames[i].id_, kpt_cam0);
+                feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
+                feature_tracks.emplace(lmk_id, feature_track);
+                lmk_id_map.emplace(key, lmk_id);
+                lmk_id++;
+            }
+            std::cout << "oog" << std::endl;
+            const gtsam::Symbol symbol_lmk =
+                gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0)));
+            // if (gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))) !=
+            //     gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam1)))) {
+            //     std::cerr << "UH OH" << std::endl;
+            // } else {
+            //     std::cout << "cool" << std::endl;
+            // }
+            lmks.push_back(symbol_lmk);
+            local_graph_
+                .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+                    feat_kpts[0], landmark_noise, poses[0], symbol_lmk, K);
+            local_graph_
+                .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+                    feat_kpts[1], landmark_noise, poses[1], symbol_lmk, K);
+
+            gtsam::Point3 p_wrld_kpt;
+            bool triangulate_success =
+                SFMMvpHelper::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt);
+            if (triangulate_success) {
+                local_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt);
+                viz_lmks.push_back(p_wrld_kpt);
+            }
+            if (symbols_landmarks_values_iter.find(symbol_lmk) !=
+                symbols_landmarks_values_iter.end()) {
+                symbols_landmarks_values_iter[symbol_lmk].push_back(p_wrld_kpt);
+            } else {
+                symbols_landmarks_values_iter.emplace(symbol_lmk,
+                                                      std::vector<gtsam::Point3>{p_wrld_kpt});
+            }
+        }
+        std::cout << "setup complete!" << std::endl;
+        geometry::viz_scene(viz_poses, viz_lmks, true, true,
+                            "Local Optimization " + std::to_string(i));
+
+        const gtsam::Values symbols_result_local = SFMMvpHelper::optimize_graph(
+            local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false);
+
+        for (const gtsam::Symbol &symbol_pose : poses) {
+            const gtsam::Pose3 T_wrld_cam = symbols_result_local.at<gtsam::Pose3>(symbol_pose);
+            symbols_poses_values_iter.at(symbol_pose).push_back(T_wrld_cam);
+        }
+        for (const gtsam::Symbol &symbol_lmk : lmks) {
+            const gtsam::Point3 p_wrld_lmk = symbols_result_local.at<gtsam::Point3>(symbol_lmk);
+            symbols_landmarks_values_iter.at(symbol_lmk).push_back(p_wrld_lmk);
+        }
+    }
+
+    std::cout << "\nLocal Optimizations Complete!" << std::endl;
+
+    for (std::pair<gtsam::Symbol, std::vector<gtsam::Pose3>> sym_pose : symbols_poses_values_iter) {
+        initial_estimate_.insert_or_assign(sym_pose.first,
+                                           SFMMvpHelper::averagePoses(sym_pose.second));
+    }
+    for (std::pair<gtsam::Symbol, std::vector<gtsam::Point3>> sym_lmk :
+         symbols_landmarks_values_iter) {
+        initial_estimate_.insert_or_assign(sym_lmk.first,
+                                           SFMMvpHelper::averagePoints(sym_lmk.second));
+    }
+
+    const gtsam::Values result =
+        SFMMvpHelper::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks);
+    std::cout << "about to visualize result" << std::endl;
+    SFMMvpHelper::graph_values(result, "Result", symbols_pose, symbols_landmarks);
+}
 
-// const gtsam::Values result_values = sfm.get_structure_result();
-// sfm.graph_values(result_values, "optimized values");
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file

From e482d5216ad3c81aa9ec81854e6a00382603d8d0 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Fri, 25 Apr 2025 13:48:55 -0400
Subject: [PATCH 20/45] added matching that chooses best bijective that are
 twice as good as second match. working on filtering spatial points, pipeline
 currently broken

---
 experimental/learn_descriptors/BUILD          |  4 +-
 .../learn_descriptors/backend_test.cc         |  2 +-
 experimental/learn_descriptors/frontend.cc    | 45 +++++++++
 experimental/learn_descriptors/frontend.hh    |  1 +
 .../learn_descriptors/frontend_definitions.hh |  4 +-
 experimental/learn_descriptors/sfm_mvp.cc     | 98 ++++++++++++++-----
 6 files changed, 122 insertions(+), 32 deletions(-)

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index 666668ad0..254f6c36b 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -266,8 +266,8 @@ cc_test(
     "@com_google_googletest//:gtest_main",    
     "@opencv//:opencv",
     ":backend",
-    ":spatial_scene_test_cube",
-    ":feature_manager"
+    ":feature_manager",
+    ":spatial_test_scene_cube"
   ]
 )
 
diff --git a/experimental/learn_descriptors/backend_test.cc b/experimental/learn_descriptors/backend_test.cc
index 94d7b5067..dfe413b03 100644
--- a/experimental/learn_descriptors/backend_test.cc
+++ b/experimental/learn_descriptors/backend_test.cc
@@ -2,7 +2,7 @@
 
 #include "experimental/learn_descriptors/feature_manager.hh"
 #include "gtest/gtest.h"
-#include "spatial_scene_test_cube.hh"
+#include "experimental/learn_descriptors/spatial_test_scene_cube.hh"
 
 namespace robot::experimental::learn_descriptors {
 TEST(BackendTest, cube) {
diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc
index 46368a31e..3ef2c408e 100644
--- a/experimental/learn_descriptors/frontend.cc
+++ b/experimental/learn_descriptors/frontend.cc
@@ -108,6 +108,51 @@ void Frontend::enforce_bijective_matches(std::vector<cv::DMatch> &matches) {
                   matches.end());
 }
 
+void Frontend::enforce_bijective_buffer_matches(std::vector<cv::DMatch> &matches) {
+    // Store best and second-best matches per query_idx
+    std::unordered_map<int, std::pair<cv::DMatch, float>> best_two_query_matches;
+    for (const auto &match : matches) {
+        int query_idx = match.queryIdx;
+        float dist = match.distance;
+
+        if (!best_two_query_matches.count(query_idx)) {
+            best_two_query_matches[query_idx] = {match, std::numeric_limits<float>::max()};
+        } else {
+            auto &[best_match, second_best_dist] = best_two_query_matches[query_idx];
+            if (dist < best_match.distance) {
+                second_best_dist = best_match.distance;
+                best_match = match;
+            } else if (dist < second_best_dist) {
+                second_best_dist = dist;
+            }
+        }
+    }
+
+    // Keep matches where best < 0.5 * second_best
+    std::vector<cv::DMatch> filtered;
+    for (const auto &[query_idx, pair] : best_two_query_matches) {
+        const auto &[best_match, second_best_dist] = pair;
+        if (best_match.distance < 0.5f * second_best_dist) {
+            filtered.push_back(best_match);
+        }
+    }
+
+    // Enforce bijection: each train_idx should also be best for only one query_idx
+    std::unordered_map<int, cv::DMatch> best_train_match;
+    for (const auto &match : filtered) {
+        int train_idx = match.trainIdx;
+        if (!best_train_match.count(train_idx) ||
+            match.distance < best_train_match[train_idx].distance) {
+            best_train_match[train_idx] = match;
+        }
+    }
+
+    matches.clear();
+    for (const auto &[train_idx, match] : best_train_match) {
+        matches.push_back(match);
+    }
+}
+
 bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
                                  std::vector<cv::DMatch> &matches_out) const {
     if (matcher_type_ != MatcherType::BRUTE_FORCE) {
diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh
index 9b720cc06..70baf55bd 100644
--- a/experimental/learn_descriptors/frontend.hh
+++ b/experimental/learn_descriptors/frontend.hh
@@ -24,6 +24,7 @@ class Frontend {
                                         const cv::Mat &descriptors2) const;
     static void threshold_matches(std::vector<cv::DMatch> &matches, float dist_threshhold);
     static void enforce_bijective_matches(std::vector<cv::DMatch> &matches);
+    static void enforce_bijective_buffer_matches(std::vector<cv::DMatch> &matches);
     static void draw_keypoints(const cv::Mat &img, std::vector<cv::KeyPoint> keypoints,
                                cv::Mat img_keypoints_out) {
         cv::drawKeypoints(img, keypoints, img_keypoints_out, cv::Scalar::all(-1),
diff --git a/experimental/learn_descriptors/frontend_definitions.hh b/experimental/learn_descriptors/frontend_definitions.hh
index 188b91e53..fac8b67b3 100644
--- a/experimental/learn_descriptors/frontend_definitions.hh
+++ b/experimental/learn_descriptors/frontend_definitions.hh
@@ -10,7 +10,7 @@
 namespace std {
 template <>
 struct hash<std::pair<FrameId, KeypointCV>> {
-    size_t operator()(const std::pair<FrameId, KeypointCV>& p) const {
+    size_t operator()(const std::pair<FrameId, KeypointCV> &p) const {
         size_t h1 = std::hash<FrameId>()(p.first);
         size_t h2 = std::hash<float>()(p.second.x) ^ (std::hash<float>()(p.second.y) << 1);
         return h1 ^ (h2 << 1);  // combine hashes
@@ -25,7 +25,7 @@ class FeatureTrack {
 
     bool in_ba_graph_;
 
-    FeatureTrack(FrameId frame_id, const KeypointCV& px) { obs_.emplace_back(frame_id, px); }
+    FeatureTrack(FrameId frame_id, const KeypointCV &px) { obs_.emplace_back(frame_id, px); }
 
     void print() const {
         std::cout << "Feature track with cameras: ";
diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc
index f27c1ede7..85ef7d3d1 100644
--- a/experimental/learn_descriptors/sfm_mvp.cc
+++ b/experimental/learn_descriptors/sfm_mvp.cc
@@ -157,6 +157,15 @@ class SFMMvpHelper {
         for (const auto &pt : points) sum += pt;
         return sum / points.size();
     }
+
+    static gtsam::Point3 get_variance(const std::vector<gtsam::Point3> &points) {
+        const gtsam::Point3 mean = SFMMvpHelper::averagePoints(points);
+        gtsam::Point3 var(0, 0, 0);
+        for (const gtsam::Point3 &pt : points) {
+            var += (mean - pt).array().square().matrix();
+        }
+        return var / points.size();
+    }
 };
 
 TEST(SFMMvp, sfm_building_manual_global) {
@@ -248,7 +257,8 @@ TEST(SFMMvp, sfm_building_manual_global) {
     for (size_t i = 0; i < indices.size() - 1; i++) {
         std::vector<cv::DMatch> matches =
             frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
-        frontend.enforce_bijective_matches(matches);
+        // frontend.enforce_bijective_matches(matches);
+        frontend.enforce_bijective_buffer_matches(matches);
         for (const cv::DMatch match : matches) {
             const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
             const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
@@ -350,6 +360,13 @@ TEST(SFMMvp, sfm_building_manual_global) {
 
 TEST(SFMMvp, sfm_building_manual_incremental) {
     const std::vector<int> indices{60, 80, 100, 120, 140};
+    // const std::vector<int> indices = []() {
+    //     std::vector<int> tmp;
+    //     for (int i = 0; i < 200; i += 10) {
+    //         tmp.push_back(i);
+    //     }
+    //     return tmp;
+    // }();
     DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
     const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
     const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
@@ -409,9 +426,6 @@ TEST(SFMMvp, sfm_building_manual_incremental) {
         cam_pose.emplace(id, T_world_cam_gtsam);
         cam_isometries.push_back(T_world_cam);
 
-        // graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(gtsam::Symbol('x', id),
-        //                                                         T_world_cam_gtsam, pose_noise);
-
         Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix()));
 
         std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
@@ -437,7 +451,7 @@ TEST(SFMMvp, sfm_building_manual_incremental) {
     for (size_t i = 0; i < indices.size() - 1; i++) {
         std::vector<cv::DMatch> matches =
             frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
-        frontend.enforce_bijective_matches(matches);
+        frontend.enforce_bijective_buffer_matches(matches);
         for (const cv::DMatch match : matches) {
             const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
             const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
@@ -457,40 +471,66 @@ TEST(SFMMvp, sfm_building_manual_incremental) {
         }
     }
 
-    // populate graph
-    std::unordered_map<gtsam::Symbol, std::vector<gtsam::Pose3>> symbols_poses_values_iter;
-    std::unordered_map<gtsam::Symbol, std::vector<gtsam::Point3>> symbols_landmarks_values_iter;
-    std::vector<gtsam::Symbol> symbols_pose;
-    std::vector<gtsam::Symbol> symbols_landmarks;
+    // triangulate all of the points
+
+    std::unordered_map<LandmarkId, gtsam::Point3> lmk_triangulated_map;
+    std::vector<gtsam::Point3> triangulated_lmks;
     for (const std::pair<LandmarkId, FeatureTrack> lmk_feat : feature_tracks) {
         std::vector<gtsam::Pose3> feat_cam_poses;
         std::vector<gtsam::Point2> feat_kpts;
         LandmarkId lmk_id = lmk_feat.first;
         FeatureTrack feature_track = lmk_feat.second;
-        const gtsam::Symbol symbol_lmk('l', lmk_id);
-        symbols_landmarks.push_back(symbol_lmk);
         gtsam::Point3 p_wrld_kpt(0, 0, 0);
         for (const std::pair<FrameId, KeypointCV> &feat_track : feature_track.obs_) {
-            initial_estimate_.insert_or_assign(symbol_lmk, gtsam::Point3(0, 0, 0));
-            symbols_landmarks_values_iter.emplace(
-                symbol_lmk, std::vector<gtsam::Point3>{gtsam::Point3(0, 0, 0)});
-            graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-                gtsam::Point2(feat_track.second.x, feat_track.second.y), landmark_noise,
-                gtsam::Symbol('x', feat_track.first), symbol_lmk, K);
-
             feat_cam_poses.push_back(cam_pose[feat_track.first]);
             feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y));
         }
         bool triangulate_success =
             SFMMvpHelper::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt);
         if (triangulate_success) {
-            initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt);
-        }
-        if (symbols_landmarks_values_iter.find(symbol_lmk) != symbols_landmarks_values_iter.end()) {
-            symbols_landmarks_values_iter[symbol_lmk].push_back(p_wrld_kpt);
+            lmk_triangulated_map.emplace(lmk_id, p_wrld_kpt);
+            triangulated_lmks.push_back(p_wrld_kpt);
         } else {
+            continue;
+        }
+    }
+    // filter points
+    geometry::viz_scene(std::vector<Eigen::Isometry3d>(), triangulated_lmks, true, true,
+                        "Unfiltered points");
+    const gtsam::Point3 variance_pts = SFMMvpHelper::get_variance(triangulated_lmks);
+    const gtsam::Point3 std_dev_pts = variance_pts.array().sqrt().matrix();
+    const gtsam::Point3 mean_pts = SFMMvpHelper::averagePoints(triangulated_lmks);
+    std::unordered_map<LandmarkId, gtsam::Point3> lmk_triangulated_map_filtered;
+    std::vector<gtsam::Point3> filtered_points;
+    std::cout << "original var " << variance_pts << std::endl;
+    for (const std::pair<LandmarkId, gtsam::Point3> lmk_id_pt : lmk_triangulated_map) {
+        const gtsam::Point3 dist_mean = (lmk_id_pt.second - mean_pts).array().abs().matrix();
+        if ((dist_mean.array() <= (3 * std_dev_pts).array()).all()) {
+            lmk_triangulated_map_filtered.emplace(lmk_id_pt);
+            filtered_points.push_back(lmk_id_pt.second);
+        }
+    }
+    std::cout << "filtered variance " << SFMMvpHelper::get_variance(filtered_points) << std::endl;
+    geometry::viz_scene(std::vector<Eigen::Isometry3d>(), filtered_points, true, true,
+                        "Unfiltered points");
+
+    // add filtered points to graph
+    std::unordered_map<gtsam::Symbol, std::vector<gtsam::Pose3>> symbols_poses_values_iter;
+    std::unordered_map<gtsam::Symbol, std::vector<gtsam::Point3>> symbols_landmarks_values_iter;
+    std::vector<gtsam::Symbol> symbols_pose;
+    std::vector<gtsam::Symbol> symbols_landmarks;
+    for (const std::pair<LandmarkId, gtsam::Point3> lmk_id_pt : lmk_triangulated_map_filtered) {
+        LandmarkId lmk_id = lmk_id_pt.first;
+        const gtsam::Point3 p_wrld_kpt = lmk_id_pt.second;
+        FeatureTrack feature_track = feature_tracks.at(lmk_id);
+        const gtsam::Symbol symbol_lmk('l', lmk_id);
+        for (const std::pair<FrameId, KeypointCV> &feat_track : feature_track.obs_) {
+            initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt);
             symbols_landmarks_values_iter.emplace(symbol_lmk,
                                                   std::vector<gtsam::Point3>{p_wrld_kpt});
+            graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+                gtsam::Point2(feat_track.second.x, feat_track.second.y), landmark_noise,
+                gtsam::Symbol('x', feat_track.first), symbol_lmk, K);
         }
     }
     graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(gtsam::Symbol('x', 0), cam_pose.at(0),
@@ -512,7 +552,8 @@ TEST(SFMMvp, sfm_building_manual_incremental) {
         // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_wrld_cam));
     }
 
-    SFMMvpHelper::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks);
+    // SFMMvpHelper::graph_values(initial_estimate_, "Confirmation", symbols_pose,
+    // symbols_landmarks);
 
     // do local optimizations and add to iter cache
     // const int window = 2;
@@ -555,6 +596,9 @@ TEST(SFMMvp, sfm_building_manual_incremental) {
             auto key = std::make_pair(frames[i].id_, kpt_cam0);
             if (lmk_id_map.find(key) != lmk_id_map.end()) {
                 auto id = lmk_id_map.at(key);
+                if (lmk_triangulated_map_filtered.find(id) == lmk_triangulated_map_filtered.end()) {
+                    continue;
+                }
                 std::cout << "good" << std::endl;
                 // do nothing
                 // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0);
@@ -600,8 +644,8 @@ TEST(SFMMvp, sfm_building_manual_incremental) {
             }
         }
         std::cout << "setup complete!" << std::endl;
-        geometry::viz_scene(viz_poses, viz_lmks, true, true,
-                            "Local Optimization " + std::to_string(i));
+        // geometry::viz_scene(viz_poses, viz_lmks, true, true,
+        //                     "Local Optimization " + std::to_string(i));
 
         const gtsam::Values symbols_result_local = SFMMvpHelper::optimize_graph(
             local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false);
@@ -616,7 +660,7 @@ TEST(SFMMvp, sfm_building_manual_incremental) {
         }
     }
 
-    std::cout << "\nLocal Optimizations Complete!" << std::endl;
+    std::cout << "\nLocal Optimizations Complete!\n" << std::endl;
 
     for (std::pair<gtsam::Symbol, std::vector<gtsam::Pose3>> sym_pose : symbols_poses_values_iter) {
         initial_estimate_.insert_or_assign(sym_pose.first,

From 1d03689218c923384c05667199d608ed44d3b24e Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Tue, 24 Jun 2025 17:45:16 -0400
Subject: [PATCH 21/45] make transform names more clear

---
 .../get_colmap_groundtruth.cc                 |  2 +-
 experimental/learn_descriptors/sfm_mvp.cc     | 38 ++++++++++---------
 .../structure_from_motion_test.cc             | 17 +++++----
 .../learn_descriptors/symphony_lake_parser.cc | 32 ++++++++--------
 .../learn_descriptors/symphony_lake_parser.hh | 13 ++++---
 .../symphony_lake_parser_test.cc              | 12 +++---
 6 files changed, 60 insertions(+), 54 deletions(-)

diff --git a/experimental/learn_descriptors/get_colmap_groundtruth.cc b/experimental/learn_descriptors/get_colmap_groundtruth.cc
index 6afc9fff1..7115daf0d 100644
--- a/experimental/learn_descriptors/get_colmap_groundtruth.cc
+++ b/experimental/learn_descriptors/get_colmap_groundtruth.cc
@@ -54,7 +54,7 @@ int main() {
     for (int i = 0; i < 718; i++) {
         symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(i);
         const Eigen::Isometry3d T_wrld_cam =
-            data_parser.get_T_world_boat(img_pt) * data_parser.get_T_boat_camera(img_pt);
+            data_parser.get_world_from_boat(img_pt) * data_parser.get_boat_from_camera(img_pt);
         ImagePose img_pose;
 
         std::ostringstream oss;
diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc
index 85ef7d3d1..68820b02e 100644
--- a/experimental/learn_descriptors/sfm_mvp.cc
+++ b/experimental/learn_descriptors/sfm_mvp.cc
@@ -187,12 +187,13 @@ TEST(SFMMvp, sfm_building_manual_global) {
     cv::Mat K_mat = (cv::Mat_<double>(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1);
     cv::Mat D_mat = (cv::Mat_<double>(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0));
 
-    // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0
-    // * T_boat_camera
-    Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first);
-    Eigen::Isometry3d T_world_boat0;
-    T_world_boat0.linear() = T_earth_boat0.linear();
-    Eigen::Isometry3d T_world_camera0 = T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first);
+    // let world be the first boat base recorded. T_world_camera0 = earth_from_boat0
+    // * boat_from_camera
+    Eigen::Isometry3d earth_from_boat0 = DataParser::get_world_from_boat(img_pt_first);
+    Eigen::Isometry3d world_from_boat0;
+    world_from_boat0.linear() = earth_from_boat0.linear();
+    Eigen::Isometry3d T_world_camera0 =
+        world_from_boat0 * DataParser::get_boat_from_camera(img_pt_first);
     // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
     //                         gtsam::Pose3(T_world_camera0.matrix()));
     Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN);
@@ -222,9 +223,9 @@ TEST(SFMMvp, sfm_building_manual_global) {
         cv::undistort(img, img_undistorted, K_mat, D_mat);
         const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);
 
-        Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt);
-        T_world_boat.translation() -= T_earth_boat0.translation();
-        Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt);
+        Eigen::Isometry3d world_from_boat = DataParser::get_world_from_boat(img_pt);
+        world_from_boat.translation() -= earth_from_boat0.translation();
+        Eigen::Isometry3d T_world_cam = world_from_boat * DataParser::get_boat_from_camera(img_pt);
         gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix());
         cam_pose.emplace(id, T_world_cam_gtsam);
         cam_isometries.push_back(T_world_cam);
@@ -384,12 +385,12 @@ TEST(SFMMvp, sfm_building_manual_incremental) {
     cv::Mat K_mat = (cv::Mat_<double>(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1);
     cv::Mat D_mat = (cv::Mat_<double>(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0));
 
-    // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0
-    // * T_boat_camera
-    Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first);
-    Eigen::Isometry3d T_world_boat0;
-    T_world_boat0.linear() = T_earth_boat0.linear();
-    Eigen::Isometry3d T_world_camera0 = T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first);
+    // let world be the first boat base recorded. T_world_camera0 = earth_from_boat0
+    Eigen::Isometry3d earth_from_boat0 = DataParser::get_world_from_boat(img_pt_first);
+    Eigen::Isometry3d world_from_boat0;
+    world_from_boat0.linear() = earth_from_boat0.linear();
+    Eigen::Isometry3d T_world_camera0 =
+        world_from_boat0 * DataParser::get_boat_from_camera(img_pt_first);
     // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
     //                         gtsam::Pose3(T_world_camera0.matrix()));
     Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN);
@@ -419,9 +420,9 @@ TEST(SFMMvp, sfm_building_manual_incremental) {
         cv::undistort(img, img_undistorted, K_mat, D_mat);
         const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);
 
-        Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt);
-        T_world_boat.translation() -= T_earth_boat0.translation();
-        Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt);
+        Eigen::Isometry3d world_from_boat = DataParser::get_world_from_boat(img_pt);
+        world_from_boat.translation() -= earth_from_boat0.translation();
+        Eigen::Isometry3d T_world_cam = world_from_boat * DataParser::get_boat_from_camera(img_pt);
         gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix());
         cam_pose.emplace(id, T_world_cam_gtsam);
         cam_isometries.push_back(T_world_cam);
@@ -672,6 +673,7 @@ TEST(SFMMvp, sfm_building_manual_incremental) {
                                            SFMMvpHelper::averagePoints(sym_lmk.second));
     }
 
+    // do global optimization
     const gtsam::Values result =
         SFMMvpHelper::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks);
     std::cout << "about to visualize result" << std::endl;
diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc
index 752daa6a3..cfbad2f4a 100644
--- a/experimental/learn_descriptors/structure_from_motion_test.cc
+++ b/experimental/learn_descriptors/structure_from_motion_test.cc
@@ -37,8 +37,8 @@ TEST(StructureFromMotiontest, sfm_snippet_small) {
 
     // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera
     // T_earth_boat0 =
-    Eigen::Isometry3d T_earth_world = DataParser::get_T_world_boat(img_pt_first);
-    Eigen::Isometry3d T_world_camera0 = DataParser::get_T_boat_camera(img_pt_first);
+    Eigen::Isometry3d T_earth_world = DataParser::get_world_from_boat(img_pt_first);
+    Eigen::Isometry3d T_world_camera0 = DataParser::get_boat_from_camera(img_pt_first);
     StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
                             gtsam::Pose3(T_world_camera0.matrix()));
 
@@ -47,9 +47,9 @@ TEST(StructureFromMotiontest, sfm_snippet_small) {
         const cv::Mat img = survey.loadImageByImageIndex(idx);
         img_vector.push_back(img);
         const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);
-        Eigen::Isometry3d T_earth_boat = DataParser::get_T_world_boat(img_pt);
+        Eigen::Isometry3d T_earth_boat = DataParser::get_world_from_boat(img_pt);
         Eigen::Isometry3d T_world_boat = T_earth_world.inverse() * T_earth_boat;
-        Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt);
+        Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_boat_from_camera(img_pt);
 
         sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix()));
     }
@@ -92,10 +92,11 @@ TEST(StructureFromMotiontest, sfm_building) {
             .finished();
 
     // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera
-    Eigen::Isometry3d T_earth_boat0 = DataParser::get_T_world_boat(img_pt_first);
+    Eigen::Isometry3d T_earth_boat0 = DataParser::get_world_from_boat(img_pt_first);
     Eigen::Isometry3d T_world_boat0;
     T_world_boat0.linear() = T_earth_boat0.linear();
-    Eigen::Isometry3d T_world_camera0 = T_world_boat0 * DataParser::get_T_boat_camera(img_pt_first);
+    Eigen::Isometry3d T_world_camera0 =
+        T_world_boat0 * DataParser::get_boat_from_camera(img_pt_first);
     StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
                             gtsam::Pose3(T_world_camera0.matrix()));
 
@@ -103,10 +104,10 @@ TEST(StructureFromMotiontest, sfm_building) {
         const cv::Mat img = survey.loadImageByImageIndex(idx);
         const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);
 
-        Eigen::Isometry3d T_world_boat = DataParser::get_T_world_boat(img_pt);
+        Eigen::Isometry3d T_world_boat = DataParser::get_world_from_boat(img_pt);
         T_world_boat.translation() -= T_earth_boat0.translation();
 
-        Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_T_boat_camera(img_pt);
+        Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_boat_from_camera(img_pt);
 
         sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix()));
     }
diff --git a/experimental/learn_descriptors/symphony_lake_parser.cc b/experimental/learn_descriptors/symphony_lake_parser.cc
index 4740deb1e..f06ebd116 100644
--- a/experimental/learn_descriptors/symphony_lake_parser.cc
+++ b/experimental/learn_descriptors/symphony_lake_parser.cc
@@ -26,11 +26,11 @@ DataParser::DataParser(const std::filesystem::path &image_root_dir,
 }
 DataParser::~DataParser() {}
 
-const Eigen::Isometry3d DataParser::get_T_boat_camera(
+const Eigen::Isometry3d DataParser::get_boat_from_camera(
     const symphony_lake_dataset::ImagePoint &img_pt) {
-    return get_T_boat_camera(img_pt.pan, img_pt.tilt);
+    return get_boat_from_camera(img_pt.pan, img_pt.tilt);
 }
-const Eigen::Isometry3d DataParser::get_T_boat_camera(double theta_pan, double theta_tilt) {
+const Eigen::Isometry3d DataParser::get_boat_from_camera(double theta_pan, double theta_tilt) {
     Eigen::AngleAxisd R_z(-theta_pan, Eigen::Vector3d::UnitZ());   // pan
     Eigen::AngleAxisd R_y(-theta_tilt, Eigen::Vector3d::UnitY());  // tilt
     Eigen::Matrix3d R_boat_gimbal(R_z * R_y);
@@ -46,31 +46,31 @@ const Eigen::Isometry3d DataParser::get_T_boat_camera(double theta_pan, double t
     return T_boat_cam;
 }
 
-const Eigen::Isometry3d DataParser::get_T_world_gps(
+const Eigen::Isometry3d DataParser::get_world_from_gps(
     const symphony_lake_dataset::ImagePoint &img_pt) {
-    Eigen::Isometry3d T_world_gps;
+    Eigen::Isometry3d world_from_gps;
     // img_pt.theta is in the North East Down frame, not lattitude longitude coords. However,
     // because of the alignment of the imu and gps, this rotation math holds.
-    T_world_gps.linear() = Eigen::AngleAxisd(img_pt.theta, Eigen::Vector3d::UnitZ()).matrix();
-    T_world_gps.translation() = Eigen::Vector3d(img_pt.x, img_pt.y, 0);
-    return T_world_gps;
+    world_from_gps.linear() = Eigen::AngleAxisd(img_pt.theta, Eigen::Vector3d::UnitZ()).matrix();
+    world_from_gps.translation() = Eigen::Vector3d(img_pt.x, img_pt.y, 0);
+    return world_from_gps;
 }
 
-const Eigen::Isometry3d DataParser::get_T_world_boat(
+const Eigen::Isometry3d DataParser::get_world_from_boat(
     const symphony_lake_dataset::ImagePoint &img_pt) {
-    // Eigen::Isometry3d T_world_gps;
+    // Eigen::Isometry3d world_from_gps;
 
     // Eigen::Matrix3d R_world_imu = get_R_world_imu(img_pt.theta);
     // Eigen::Isometry3d T_imu_gps = T_boat_imu.inverse() * T_boat_gps;
     // Eigen::Matrix3d R_world_gps = R_world_imu * T_imu_gps.linear();
 
-    // T_world_gps.linear() = R_world_gps;
-    // T_world_gps.translation() = Eigen::Vector3d(img_pt.x, img_pt.y, 0);
+    // world_from_gps.linear() = R_world_gps;
+    // world_from_gps.translation() = Eigen::Vector3d(img_pt.x, img_pt.y, 0);
 
-    // Eigen::Isometry3d T_world_boat = T_world_gps * T_boat_gps.inverse();
+    // Eigen::Isometry3d world_from_boat = world_from_gps * T_boat_gps.inverse();
 
-    Eigen::Isometry3d T_world_gps = get_T_world_gps(img_pt);
-    Eigen::Isometry3d T_world_boat = T_world_gps * T_boat_gps.inverse();
-    return T_world_boat;
+    Eigen::Isometry3d world_from_gps = get_world_from_gps(img_pt);
+    Eigen::Isometry3d world_from_boat = world_from_gps * T_boat_gps.inverse();
+    return world_from_boat;
 }
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/symphony_lake_parser.hh b/experimental/learn_descriptors/symphony_lake_parser.hh
index 5c6e91b54..536771d06 100644
--- a/experimental/learn_descriptors/symphony_lake_parser.hh
+++ b/experimental/learn_descriptors/symphony_lake_parser.hh
@@ -30,14 +30,15 @@ class DataParser {
 
     // Eigen::Affine3d get_T_world_camera(size_t survey_idx, size_t image_idx, bool use_gps = false,
     //                                    bool use_compass = false);
-    static const Eigen::Isometry3d get_T_boat_camera(
+    static const Eigen::Isometry3d get_boat_from_camera(
         const symphony_lake_dataset::ImagePoint &img_pt);
-    static const Eigen::Isometry3d get_T_boat_camera(double theta_pan, double theta_tilt);
-    static const Eigen::Isometry3d get_T_world_gps(const symphony_lake_dataset::ImagePoint &img_pt);
-    /// @brief get_T_world_boat assuming z_axis_boat dot z_axis_world ~ -1
+    static const Eigen::Isometry3d get_boat_from_camera(double theta_pan, double theta_tilt);
+    static const Eigen::Isometry3d get_world_from_gps(
+        const symphony_lake_dataset::ImagePoint &img_pt);
+    /// @brief get_world_from_boat assuming z_axis_boat dot z_axis_world ~ -1
     /// @param img_pt
-    /// @return T_world_boat
-    static const Eigen::Isometry3d get_T_world_boat(
+    /// @return world_from_boat
+    static const Eigen::Isometry3d get_world_from_boat(
         const symphony_lake_dataset::ImagePoint &img_pt);
 
     const symphony_lake_dataset::SurveyVector &get_surveys() const { return surveys_; };
diff --git a/experimental/learn_descriptors/symphony_lake_parser_test.cc b/experimental/learn_descriptors/symphony_lake_parser_test.cc
index 2a800db9b..eb81ad37f 100644
--- a/experimental/learn_descriptors/symphony_lake_parser_test.cc
+++ b/experimental/learn_descriptors/symphony_lake_parser_test.cc
@@ -74,13 +74,14 @@ TEST(SymphonyLakeParserTest, test_cam_frames) {
     std::vector<Eigen::Isometry3d> cam_frames;
 
     // NOTE: the world in these images is east, north, up centered at boat0 translation
-    Eigen::Vector3d t_world_boat0 = DataParser::get_T_world_boat(image_point_first).translation();
+    Eigen::Vector3d t_world_boat0 =
+        DataParser::get_world_from_boat(image_point_first).translation();
 
     for (size_t i = 0; i < indices.size(); i++) {
         const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(indices[i]);
-        Eigen::Isometry3d T_world_boatidx = DataParser::get_T_world_boat(img_pt);
+        Eigen::Isometry3d T_world_boatidx = DataParser::get_world_from_boat(img_pt);
         Eigen::Isometry3d T_boatidx_camidx =
-            DataParser::get_T_boat_camera(img_pt);  // current boat to current camera
+            DataParser::get_boat_from_camera(img_pt);  // current boat to current camera
 
         Eigen::Isometry3d T_world_camidx = T_world_boatidx * T_boatidx_camidx;
         T_world_camidx.translation() -= t_world_boat0;
@@ -108,12 +109,13 @@ TEST(SymphonyLakeParserTest, test_gps_frames) {
     std::vector<Eigen::Isometry3d> gps_frames;
 
     // NOTE: the world in these images is east, north, up centered at boat0 translation
-    Eigen::Vector3d t_world_boat0 = DataParser::get_T_world_boat(image_point_first).translation();
+    Eigen::Vector3d t_world_boat0 =
+        DataParser::get_world_from_boat(image_point_first).translation();
     // Eigen::Vector3d t_world_gps0(image_point_first.x, image_point_first.y, 0);
 
     for (size_t i = 0; i < indices.size(); i++) {
         const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(indices[i]);
-        Eigen::Isometry3d T_world_gpsidx = DataParser::get_T_world_gps(img_pt);
+        Eigen::Isometry3d T_world_gpsidx = DataParser::get_world_from_gps(img_pt);
         T_world_gpsidx.translation() -= t_world_boat0;
         gps_frames.push_back(T_world_gpsidx);
     }

From 94d5e5e5746d2b538a42ce50b319f4a8c0117804 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Wed, 25 Jun 2025 17:18:21 -0400
Subject: [PATCH 22/45] WIP

---
 common/geometry/camera.cc                     |   6 +-
 common/geometry/camera.hh                     |   6 +-
 common/geometry/camera_test.cc                |  51 +-
 experimental/learn_descriptors/BUILD          |  21 +-
 .../learn_descriptors/incremental_sfm_mvp.cc  | 676 +++++++++---------
 5 files changed, 408 insertions(+), 352 deletions(-)

diff --git a/common/geometry/camera.cc b/common/geometry/camera.cc
index b13718570..4994914c9 100644
--- a/common/geometry/camera.cc
+++ b/common/geometry/camera.cc
@@ -24,9 +24,9 @@ Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel
     return depth * K.inverse() * Eigen::Vector3d(pixel_inhomog(0), pixel_inhomog(1), 1.);
 }
 
-Eigen::Isometry3d estimate_c0_c1(const std::vector<cv::KeyPoint> &kpts0,
-                                 const std::vector<cv::KeyPoint> &kpts1,
-                                 const std::vector<cv::DMatch> &matches, const cv::Mat &K) {
+Eigen::Isometry3d estimate_c0_from_cam1(const std::vector<cv::KeyPoint> &kpts0,
+                                        const std::vector<cv::KeyPoint> &kpts1,
+                                        const std::vector<cv::DMatch> &matches, const cv::Mat &K) {
     Eigen::Isometry3d result;
     std::vector<cv::Point2f> pts1;
     std::vector<cv::Point2f> pts2;
diff --git a/common/geometry/camera.hh b/common/geometry/camera.hh
index 82b74c818..2feea1954 100644
--- a/common/geometry/camera.hh
+++ b/common/geometry/camera.hh
@@ -21,7 +21,7 @@ Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector3d &pixel
 Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel_inhomog,
                           const double depth);
 
-Eigen::Isometry3d estimate_c0_c1(const std::vector<cv::KeyPoint> &kpts0,
-                                 const std::vector<cv::KeyPoint> &kpts1,
-                                 const std::vector<cv::DMatch> &matches, const cv::Mat &K);
+Eigen::Isometry3d estimate_c0_from_cam1(const std::vector<cv::KeyPoint> &kpts0,
+                                        const std::vector<cv::KeyPoint> &kpts1,
+                                        const std::vector<cv::DMatch> &matches, const cv::Mat &K);
 }  // namespace robot::geometry
\ No newline at end of file
diff --git a/common/geometry/camera_test.cc b/common/geometry/camera_test.cc
index 392f876f6..ca2594abf 100644
--- a/common/geometry/camera_test.cc
+++ b/common/geometry/camera_test.cc
@@ -57,7 +57,7 @@ TEST(CameraTest, test_estimate_pose) {
         Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
         Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
 
-    const int initial_size = p_W_cube.size();
+    // const int initial_size = p_W_cube.size();
     const Eigen::Vector3d p_world_cube_center(cube_size / 2, cube_size / 2, cube_size / 2);
     for (const Eigen::Vector3d &point_W_cube : p_W_cube) {
         p_W_cube.emplace_back(R_new_points * (point_W_cube - p_world_cube_center) +
@@ -76,38 +76,37 @@ TEST(CameraTest, test_estimate_pose) {
     Eigen::Matrix3d K_eig = cv_to_eigen_mat(K);
     std::vector<Eigen::Isometry3d> poses;
 
-    Eigen::Matrix3d R_world_cam0(
+    Eigen::Matrix3d R_world_from_cam0(
         Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
         Eigen::AngleAxisd(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
-    Eigen::Isometry3d T_world_cam0 = Eigen::Isometry3d::Identity();
-    T_world_cam0.linear() = R_world_cam0;
-    T_world_cam0.translation() = Eigen::Vector3d(4, cube_size / 2, cube_size / 2);
-    poses.push_back(T_world_cam0);
+    Eigen::Isometry3d world_from_cam0 = Eigen::Isometry3d::Identity();
+    world_from_cam0.linear() = R_world_from_cam0;
+    world_from_cam0.translation() = Eigen::Vector3d(4, cube_size / 2, cube_size / 2);
+    poses.push_back(world_from_cam0);
 
-    Eigen::Matrix3d R_world_cam0_to_cam1(
+    Eigen::Matrix3d R_world_45deg(
         Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix());
-    Eigen::Isometry3d T_world_cam1;
-    T_world_cam1.linear() = R_world_cam0_to_cam1 * T_world_cam0.linear();
-    T_world_cam1.translation() =
-        R_world_cam0_to_cam1 * (T_world_cam0.translation() - p_world_cube_center) +
-        p_world_cube_center;
-    poses.push_back(T_world_cam1);
+    Eigen::Isometry3d world_from_cam1;
+    world_from_cam1.linear() = R_world_45deg * world_from_cam0.linear();
+    world_from_cam1.translation() =
+        R_world_45deg * (world_from_cam0.translation() - p_world_cube_center) + p_world_cube_center;
+    poses.push_back(world_from_cam1);
 
     std::vector<cv::KeyPoint> kpts0;
     std::vector<cv::KeyPoint> kpts1;
     std::vector<cv::DMatch> matches;
 
     for (const Eigen::Vector3d &point_W_cube : p_W_cube) {
-        Eigen::Vector3d p_cam0_cubep =
-            (T_world_cam0.inverse() *
+        Eigen::Vector3d p_cube_points_in_cam0 =
+            (world_from_cam0.inverse() *
              Eigen::Vector4d(point_W_cube(0), point_W_cube(1), point_W_cube(2), 1.))
                 .head<3>();
-        Eigen::Vector3d p_cam1_cubep =
-            (T_world_cam1.inverse() *
+        Eigen::Vector3d p_cube_points_in_cam1 =
+            (world_from_cam1.inverse() *
              Eigen::Vector4d(point_W_cube(0), point_W_cube(1), point_W_cube(2), 1.))
                 .head<3>();
-        Eigen::Vector3d pxl_c0_pcube_homog = K_eig * p_cam0_cubep;
-        Eigen::Vector3d pxl_c1_pcube_homog = K_eig * p_cam1_cubep;
+        Eigen::Vector3d pxl_c0_pcube_homog = K_eig * p_cube_points_in_cam0;
+        Eigen::Vector3d pxl_c1_pcube_homog = K_eig * p_cube_points_in_cam1;
         Eigen::Vector2d pxl_c0_pcube = pxl_c0_pcube_homog.head<2>() / pxl_c0_pcube_homog(2);
         Eigen::Vector2d pxl_c1_pcube = pxl_c1_pcube_homog.head<2>() / pxl_c1_pcube_homog(2);
         if (CameraTestHelper::pixel_in_range(pxl_c0_pcube, img_width, img_height) &&
@@ -118,14 +117,14 @@ TEST(CameraTest, test_estimate_pose) {
         }
     }
 
-    Eigen::Isometry3d T_cam0_cam1_estimate = estimate_c0_c1(kpts0, kpts1, matches, K);
-    Eigen::Isometry3d T_cam0_cam1_scale = T_world_cam0.inverse() * T_world_cam1;
-    T_cam0_cam1_scale.translation() /= T_cam0_cam1_scale.translation().norm();
+    Eigen::Isometry3d cam0_from_cam1_estimate = estimate_c0_from_cam1(kpts0, kpts1, matches, K);
+    Eigen::Isometry3d cam0_from_cam1 = world_from_cam0.inverse() * world_from_cam1;
+    cam0_from_cam1.translation() /= cam0_from_cam1.translation().norm();
     EXPECT_TRUE(
-        T_cam0_cam1_estimate.translation().isApprox(T_cam0_cam1_scale.translation(), 0.000001));
-    EXPECT_TRUE(T_cam0_cam1_estimate.linear().isApprox(T_cam0_cam1_scale.linear(), 0.001));
+        cam0_from_cam1_estimate.translation().isApprox(cam0_from_cam1.translation(), 0.000001));
+    EXPECT_TRUE(cam0_from_cam1_estimate.linear().isApprox(cam0_from_cam1.linear(), 0.001));
 
-    poses.emplace_back(T_world_cam0 * T_cam0_cam1_estimate);
-    // viz_scene(poses, p_W_cube);
+    poses.emplace_back(world_from_cam0 * cam0_from_cam1_estimate);
+    viz_scene(poses, p_W_cube);
 }
 }  // namespace robot::geometry
\ No newline at end of file
diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index a5b1a9e22..d0a748b1b 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -360,6 +360,25 @@ cc_binary(
     ":frame", 
     ":structure_from_motion_types",
     ":frontend",
-    "@boost//:smart_ptr"
+    "@boost//:smart_ptr",
+  ]
+)
+
+cc_binary(
+  name = "incrememtal_sfm_mvp",
+  srcs = ["incrememtal_sfm_mvp.cc"],
+  copts = [
+    "-Wno-error=unused-parameter",
+  ],
+  deps = [
+    "@opencv//:opencv",    
+    "@gtsam//:gtsam",  
+    ":frontend_definitions",
+    ":frontend",
+    ":four_seasons_parser",
+    "@eigen",
+    "//visualization/opencv:opencv_viz",
+    ":frame",
+    ":structure_from_motion_types"
   ]
 )
\ No newline at end of file
diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc
index d50d71208..021770c62 100644
--- a/experimental/learn_descriptors/incremental_sfm_mvp.cc
+++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc
@@ -2,6 +2,7 @@
 
 #include "Eigen/Core"
 #include "Eigen/Geometry"
+#include "experimental/learn_descriptors/four_seasons_parser.hh"
 #include "experimental/learn_descriptors/frame.hh"
 #include "experimental/learn_descriptors/frontend.hh"
 #include "experimental/learn_descriptors/frontend_definitions.hh"
@@ -156,327 +157,364 @@ static gtsam::Point3 get_variance(const std::vector<gtsam::Point3> &points) {
 }
 };  // namespace detail
 
-int main() {
+int main(int argc, const char **argv) {
+    using namespace robot::experimental::learn_descriptors;
+    // clang-format off
+    cxxopts::Options options("four_seasons_parser_example", "Demonstrate usage of four_seasons_parser");
+    options.add_options()
+    ("data_dir", "Path to dataset root directory", cxxopts::value<std::string>())
+    ("calibration_dir", "Path to dataset calibration directory", cxxopts::value<std::string>())
+    ("help", "Print usage");
+    // clang-format on
+
+    auto args = options.parse(argc, argv);
+
+    const auto check_required = [&](const std::string &opt) {
+        if (args.count(opt) == 0) {
+            std::cout << "Missing " << opt << " argument" << std::endl;
+            std::cout << options.help() << std::endl;
+            std::exit(1);
+        }
+    };
+
+    if (args.count("help")) {
+        std::cout << options.help() << std::endl;
+        return 0;
+    }
+
+    check_required("data_dir");
+    check_required("calibration_dir");
+
+    const std::filesystem::path path_data = args["data_dir"].as<std::string>();
+    const std::filesystem::path path_calibration = args["calibration_dir"].as<std::string>();
+    FourSeasonsParser parser(path_data, path_calibration);
+
     std::cout << "incremental_sfm_mvp!" << std::endl;
-    // const std::vector<int> indices{60, 80, 100, 120, 140};
-    // // const std::vector<int> indices = []() {
-    // //     std::vector<int> tmp;
-    // //     for (int i = 0; i < 200; i += 10) {
-    // //         tmp.push_back(i);
-    // //     }
-    // //     return tmp;
-    // // }();
-    // DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
-    // const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
-    // const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
-    // const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front());
-
-    // // const size_t img_width = img_pt_first.width, img_height =
-    // // img_pt_first.height;
-    // const double fx = img_pt_first.fx, fy = img_pt_first.fy;
-    // const double cx = img_pt_first.cx, cy = img_pt_first.cy;
-    // gtsam::Cal3_S2::shared_ptr K = boost::make_shared<gtsam::Cal3_S2>(fx, fy, 0, cx, cy);
-    // Eigen::Matrix<double, 5, 1> D =
-    //     (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2,
-    //      SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3)
-    //         .finished();
-    // cv::Mat K_mat = (cv::Mat_<double>(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0,
-    // 1); cv::Mat D_mat = (cv::Mat_<double>(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0));
-
-    // // let world be the first boat base recorded. T_world_camera0 = earth_from_boat0
-    // Eigen::Isometry3d earth_from_boat0 = DataParser::get_world_from_boat(img_pt_first);
-    // Eigen::Isometry3d world_from_boat0;
-    // world_from_boat0.linear() = earth_from_boat0.linear();
-    // Eigen::Isometry3d T_world_camera0 =
-    //     world_from_boat0 * DataParser::get_boat_from_camera(img_pt_first);
-    // // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
-    // //                         gtsam::Pose3(T_world_camera0.matrix()));
-    // Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN);
-
-    // gtsam::Values initial_estimate_;
-    // gtsam::NonlinearFactorGraph graph_;
-
-    // gtsam::noiseModel::Isotropic::shared_ptr landmark_noise =
-    //     gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
-    // gtsam::Vector6 prior_sigmas;
-    // prior_sigmas << 0.04, 0.04, 0.09, 2.1, 2.1, 0.1;
-    // gtsam::Vector3 gps_sigmas;
-    // gps_sigmas << 2.1, 2.1, 0.1;
-    // gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise =
-    //     gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas);
-    // gtsam::noiseModel::Diagonal::shared_ptr gps_noise =
-    //     gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas);
-
-    // // populate frames and cam_poses
-    // std::vector<Frame> frames;
-    // FrameId id = 0;
-    // std::unordered_map<FrameId, gtsam::Pose3> cam_pose;
-    // std::vector<Eigen::Isometry3d> cam_isometries;
-    // for (const int &idx : indices) {
-    //     const cv::Mat img = survey.loadImageByImageIndex(idx);
-    //     cv::Mat img_undistorted;
-    //     cv::undistort(img, img_undistorted, K_mat, D_mat);
-    //     const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);
-
-    //     Eigen::Isometry3d world_from_boat = DataParser::get_world_from_boat(img_pt);
-    //     world_from_boat.translation() -= earth_from_boat0.translation();
-    //     Eigen::Isometry3d T_world_cam = world_from_boat *
-    //     DataParser::get_boat_from_camera(img_pt); gtsam::Pose3
-    //     T_world_cam_gtsam(T_world_cam.matrix()); cam_pose.emplace(id, T_world_cam_gtsam);
-    //     cam_isometries.push_back(T_world_cam);
-
-    //     Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix()));
-
-    //     std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
-    //         frontend.get_keypoints_and_descriptors(img_undistorted);
-    //     KeypointsCV kpts;
-    //     for (const cv::KeyPoint &kpt : kpts_descs.first) {
-    //         kpts.push_back(kpt.pt);
-    //     }
-    //     frame.add_keypoints(kpts);
-    //     frame.assign_descriptors(kpts_descs.second);
-
-    //     frames.push_back(frame);
-
-    //     id++;
-    // }
-
-    // geometry::viz_scene(cam_isometries, std::vector<Eigen::Vector3d>());
-
-    // // populate feature_tracks and lmk_id_map
-    // FeatureTracks feature_tracks;
-    // FrameLandmarkIdMap lmk_id_map;
-    // LandmarkId lmk_id = 0;
-    // for (size_t i = 0; i < indices.size() - 1; i++) {
-    //     std::vector<cv::DMatch> matches =
-    //         frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
-    //     frontend.enforce_bijective_buffer_matches(matches);
-    //     for (const cv::DMatch match : matches) {
-    //         const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.`queryIdx];
-    //         const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
-
-    //         auto key = std::make_pair(frames[i].id_, kpt_cam0);
-    //         if (lmk_id_map.find(key) != lmk_id_map.end()) {
-    //             auto id = lmk_id_map.at(key);
-    //             feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0);
-    //             feature_tracks.at(id).obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
-    //         } else {
-    //             FeatureTrack feature_track(i, kpt_cam0);
-    //             feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
-    //             feature_tracks.emplace(lmk_id, feature_track);
-    //             lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id);
-    //             lmk_id++;
-    //         }
-    //     }
-    // }
-
-    // // triangulate all of the points
-
-    // std::unordered_map<LandmarkId, gtsam::Point3> lmk_triangulated_map;
-    // std::vector<gtsam::Point3> triangulated_lmks;
-    // for (const std::pair<LandmarkId, FeatureTrack> lmk_feat : feature_tracks) {
-    //     std::vector<gtsam::Pose3> feat_cam_poses;
-    //     std::vector<gtsam::Point2> feat_kpts;
-    //     LandmarkId lmk_id = lmk_feat.first;
-    //     FeatureTrack feature_track = lmk_feat.second;
-    //     gtsam::Point3 p_wrld_kpt(0, 0, 0);
-    //     for (const std::pair<FrameId, KeypointCV> &feat_track : feature_track.obs_) {
-    //         feat_cam_poses.push_back(cam_pose[feat_track.first]);
-    //         feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y));
-    //     }
-    //     bool triangulate_success =
-    //         detail::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt);
-    //     if (triangulate_success) {
-    //         lmk_triangulated_map.emplace(lmk_id, p_wrld_kpt);
-    //         triangulated_lmks.push_back(p_wrld_kpt);
-    //     } else {
-    //         continue;
-    //     }
-    // }
-    // // filter points
-    // geometry::viz_scene(std::vector<Eigen::Isometry3d>(), triangulated_lmks, true, true,
-    //                     "Unfiltered points");
-    // const gtsam::Point3 variance_pts = detail::get_variance(triangulated_lmks);
-    // const gtsam::Point3 std_dev_pts = variance_pts.array().sqrt().matrix();
-    // const gtsam::Point3 mean_pts = detail::averagePoints(triangulated_lmks);
-    // std::unordered_map<LandmarkId, gtsam::Point3> lmk_triangulated_map_filtered;
-    // std::vector<gtsam::Point3> filtered_points;
-    // std::cout << "original var " << variance_pts << std::endl;
-    // for (const std::pair<LandmarkId, gtsam::Point3> lmk_id_pt : lmk_triangulated_map) {
-    //     const gtsam::Point3 dist_mean = (lmk_id_pt.second - mean_pts).array().abs().matrix();
-    //     if ((dist_mean.array() <= (3 * std_dev_pts).array()).all()) {
-    //         lmk_triangulated_map_filtered.emplace(lmk_id_pt);
-    //         filtered_points.push_back(lmk_id_pt.second);
-    //     }
-    // }
-    // std::cout << "filtered variance " << detail::get_variance(filtered_points) << std::endl;
-    // geometry::viz_scene(std::vector<Eigen::Isometry3d>(), filtered_points, true, true,
-    //                     "Unfiltered points");
-
-    // // add filtered points to graph
-    // std::unordered_map<gtsam::Symbol, std::vector<gtsam::Pose3>> symbols_poses_values_iter;
-    // std::unordered_map<gtsam::Symbol, std::vector<gtsam::Point3>> symbols_landmarks_values_iter;
-    // std::vector<gtsam::Symbol> symbols_pose;
-    // std::vector<gtsam::Symbol> symbols_landmarks;
-    // for (const std::pair<LandmarkId, gtsam::Point3> lmk_id_pt : lmk_triangulated_map_filtered) {
-    //     LandmarkId lmk_id = lmk_id_pt.first;
-    //     const gtsam::Point3 p_wrld_kpt = lmk_id_pt.second;
-    //     FeatureTrack feature_track = feature_tracks.at(lmk_id);
-    //     const gtsam::Symbol symbol_lmk('l', lmk_id);
-    //     for (const std::pair<FrameId, KeypointCV> &feat_track : feature_track.obs_) {
-    //         initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt);
-    //         symbols_landmarks_values_iter.emplace(symbol_lmk,
-    //                                               std::vector<gtsam::Point3>{p_wrld_kpt});
-    //         graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-    //             gtsam::Point2(feat_track.second.x, feat_track.second.y), landmark_noise,
-    //             gtsam::Symbol('x', feat_track.first), symbol_lmk, K);
-    //     }
-    // }
-    // graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(gtsam::Symbol('x', 0),
-    // cam_pose.at(0),
-    //                                                         prior_pose_noise);
-    // initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), cam_pose.at(0));
-    // symbols_pose.push_back(gtsam::Symbol('x', 0));
-    // symbols_poses_values_iter.emplace(gtsam::Symbol('x', 0),
-    //                                   std::vector<gtsam::Pose3>{cam_pose.at(0)});
-    // for (const std::pair<FrameId, gtsam::Pose3> cam_id_pose : cam_pose) {
-    //     if (cam_id_pose.first == 0) {
-    //         continue;
-    //     }
-    //     const gtsam::Symbol key_cam('x', cam_id_pose.first);
-    //     symbols_pose.push_back(key_cam);
-    //     gtsam::Point3 p_wrld_cam = cam_id_pose.second.translation();
-    //     graph_.emplace_shared<gtsam::GPSFactor>(key_cam, p_wrld_cam, gps_noise);
-    //     initial_estimate_.insert_or_assign(key_cam, cam_id_pose.second);
-    //     symbols_poses_values_iter.emplace(key_cam,
-    //     std::vector<gtsam::Pose3>{cam_id_pose.second});
-    //     // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_wrld_cam));
-    // }
-
-    // // detail::graph_values(initial_estimate_, "Confirmation", symbols_pose,
-    // // symbols_landmarks);
-
-    // // do local optimizations and add to iter cache
-    // // const int window = 2;
-    // std::cout << "length of cam_poses: " << cam_pose.size() << std::endl;
-    // for (const auto &id_pose : cam_pose) {
-    //     std::cout << "id: " << id_pose.first << "\tpose: " << id_pose.second << std::endl;
-    // }
-    // for (size_t i = 0; i < indices.size() - 1; i++) {
-    //     std::cout << "Local optimization " << i << std::endl;
-    //     gtsam::Values local_estimate_;
-    //     gtsam::NonlinearFactorGraph local_graph_;
-
-    //     std::vector<gtsam::Symbol> poses{gtsam::Symbol('x', i), gtsam::Symbol('x', i + 1)};
-    //     std::vector<gtsam::Symbol> lmks;
-
-    //     local_graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
-    //         gtsam::PriorFactor<gtsam::Pose3>(poses[0], cam_pose.at(i), prior_pose_noise));
-    //     std::cout << "fuck" << std::endl;
-    //     local_graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
-    //         gtsam::PriorFactor<gtsam::Pose3>(poses[1], cam_pose.at(i + 1), prior_pose_noise));
-    //     std::cout << "mega fuck" << std::endl;
-    //     local_estimate_.insert_or_assign(poses[0], cam_pose.at(i));
-    //     local_estimate_.insert_or_assign(poses[1], cam_pose.at(i + 1));
-
-    //     std::vector<cv::DMatch> matches =
-    //         frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
-    //     frontend.enforce_bijective_matches(matches);
-    //     std::vector<gtsam::Pose3> feat_cam_poses{cam_pose.at(i), cam_pose.at(i + 1)};
-
-    //     std::vector<Eigen::Isometry3d> viz_poses{Eigen::Isometry3d(feat_cam_poses[0].matrix()),
-    //                                              Eigen::Isometry3d(feat_cam_poses[1].matrix())};
-    //     std::vector<Eigen::Vector3d> viz_lmks;
-    //     for (const cv::DMatch match : matches) {
-    //         std::vector<gtsam::Point2> feat_kpts;
-    //         const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
-    //         const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
-    //         feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y);
-    //         feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y);
-
-    //         auto key = std::make_pair(frames[i].id_, kpt_cam0);
-    //         if (lmk_id_map.find(key) != lmk_id_map.end()) {
-    //             auto id = lmk_id_map.at(key);
-    //             if (lmk_triangulated_map_filtered.find(id) ==
-    //             lmk_triangulated_map_filtered.end()) {
-    //                 continue;
-    //             }
-    //             std::cout << "good" << std::endl;
-    //             // do nothing
-    //             // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0);
-    //             // feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1);
-    //         } else {
-    //             std::cerr << "this shouldn't happen right?" << std::endl;
-    //             FeatureTrack feature_track(frames[i].id_, kpt_cam0);
-    //             feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
-    //             feature_tracks.emplace(lmk_id, feature_track);
-    //             lmk_id_map.emplace(key, lmk_id);
-    //             lmk_id++;
-    //         }
-    //         std::cout << "oog" << std::endl;
-    //         const gtsam::Symbol symbol_lmk =
-    //             gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0)));
-    //         // if (gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))) !=
-    //         //     gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam1)))) {
-    //         //     std::cerr << "UH OH" << std::endl;
-    //         // } else {
-    //         //     std::cout << "cool" << std::endl;
-    //         // }
-    //         lmks.push_back(symbol_lmk);
-    //         local_graph_
-    //             .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-    //                 feat_kpts[0], landmark_noise, poses[0], symbol_lmk, K);
-    //         local_graph_
-    //             .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-    //                 feat_kpts[1], landmark_noise, poses[1], symbol_lmk, K);
-
-    //         gtsam::Point3 p_wrld_kpt;
-    //         bool triangulate_success =
-    //             detail::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt);
-    //         if (triangulate_success) {
-    //             local_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt);
-    //             viz_lmks.push_back(p_wrld_kpt);
-    //         }
-    //         if (symbols_landmarks_values_iter.find(symbol_lmk) !=
-    //             symbols_landmarks_values_iter.end()) {
-    //             symbols_landmarks_values_iter[symbol_lmk].push_back(p_wrld_kpt);
-    //         } else {
-    //             symbols_landmarks_values_iter.emplace(symbol_lmk,
-    //                                                   std::vector<gtsam::Point3>{p_wrld_kpt});
-    //         }
+    const std::vector<int> indices{100, 120, 140, 160, 180};
+    // const std::vector<int> indices = []() {
+    //     std::vector<int> tmp;
+    //     for (int i = 0; i < 200; i += 10) {
+    //         tmp.push_back(i);
     //     }
-    //     std::cout << "setup complete!" << std::endl;
-    //     // geometry::viz_scene(viz_poses, viz_lmks, true, true,
-    //     //                     "Local Optimization " + std::to_string(i));
+    //     return tmp;
+    // }();
+    // FourSeasonsParser parser()
+    // const size_t img_width = img_pt_first.width, img_height =
+    // img_pt_first.height;
+    const FourSeasonsParser::CameraCalibrationFisheye cal_parser_left_cam =
+        parser.camera_calibration();
+    gtsam::Cal3_S2::shared_ptr K =
+        boost::make_shared<gtsam::Cal3_S2>(cal_parser_left_cam.fx, cal_parser_left_cam.fy, 0,
+                                           cal_parser_left_cam.cx, cal_parser_left_cam.cy);
+    cv::Mat K_mat = (cv::Mat_<double>(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1);
+    cv::Mat D_mat = (cv::Mat_<double>(4, 1) << cal_parser_left_cam.k1, cal_parser_left_cam.k2,
+                     cal_parser_left_cam.k3, cal_parser_left_cam.k4);
+
+    // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
+    //                         gtsam::Pose3(T_world_camera0.matrix()));
+    Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN);
+
+    gtsam::Values initial_estimate_;
+    gtsam::NonlinearFactorGraph graph_;
+
+    gtsam::noiseModel::Isotropic::shared_ptr landmark_noise =
+        gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
+    gtsam::Vector6 prior_sigmas;
+    prior_sigmas << 0.04, 0.04, 0.09, 2.1, 2.1, 0.1;
+    gtsam::Vector3 gps_sigmas;
+    gps_sigmas << 2.1, 2.1, 0.1;
+    gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise =
+        gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas);
+    gtsam::noiseModel::Diagonal::shared_ptr gps_noise =
+        gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas);
+
+    // populate frames and cam_poses
+    std::vector<Frame> frames;
+    FrameId id = 0;
+    std::unordered_map<FrameId, gtsam::Pose3> poses_cam_init;
+    std::vector<Eigen::Isometry3d> cam_references;
+    Eigen::Matrix4d scale_mat_reference = Eigen::Matrix4d::Identity();
+    scale_mat_reference(0, 0) = scale_mat_reference(1, 1) = scale_mat_reference(2, 2) =
+        parser.gnss_scale();
+    for (const int &idx : indices) {
+        const cv::Mat img = survey.loadImageByImageIndex(idx);
+        cv::Mat img_undistorted;
+        cv::fisheye::undistort(img, img_undistorted, K_mat, D_mat);
+        Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix()));
+
+        std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
+            frontend.get_keypoints_and_descriptors(img_undistorted);
+        KeypointsCV kpts;
+        for (const cv::KeyPoint &kpt : kpts_descs.first) {
+            kpts.push_back(kpt.pt);
+        }
+        frame.add_keypoints(kpts);
+        frame.assign_descriptors(kpts_descs.second);
+
+        frames.push_back(frame);
+
+        const ImagePoint img_pt = parser.get_image_point(idx);
+
+        Eigen::Isometry3d world_from_cam;
+        if (id == 0) {
+            // for now we will use an informative prior based on reference
+            world_from_cam = Eigen::Isometry3d((parser.S_from_AS()->matrix() * scale_mat_reference *
+                                                img_pt.AS_w_from_gnss_cam->matrix()));
+        } else if (img_pt.gps_gcs) {
+            const Eigen::Vector3d gps_gcs(
+                img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude,
+                img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0);
+            const Eigen::Vector3d p_gps_in_ECEF = parser.ECEF_from_gcs(gps_gcs);
+            world_from_cam.translation() =
+                (parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix() * p_gps_in_ECEF;
+
+            Eigen::
+        }
+        poses_cam_init.emplace(id, world_from_cam.matrix());
+
+        if (img_pt.AS_w_from_gnss_cam) {
+            cam_references.push_back(
+                Eigen::Isometry3d((parser.S_from_AS()->matrix() * scale_mat_reference *
+                                   img_pt.AS_w_from_gnss_cam->matrix())));
+        } else {
+            std::clog << "Warning: no reference data at img_pt " << idx << std::endl;
+        }
 
-    //     const gtsam::Values symbols_result_local = detail::optimize_graph(
-    //         local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false);
+        id++;
+    }
 
-    //     for (const gtsam::Symbol &symbol_pose : poses) {
-    //         const gtsam::Pose3 T_wrld_cam = symbols_result_local.at<gtsam::Pose3>(symbol_pose);
-    //         symbols_poses_values_iter.at(symbol_pose).push_back(T_wrld_cam);
-    //     }
-    //     for (const gtsam::Symbol &symbol_lmk : lmks) {
-    //         const gtsam::Point3 p_wrld_lmk = symbols_result_local.at<gtsam::Point3>(symbol_lmk);
-    //         symbols_landmarks_values_iter.at(symbol_lmk).push_back(p_wrld_lmk);
-    //     }
-    // }
-
-    // std::cout << "\nLocal Optimizations Complete!\n" << std::endl;
-
-    // for (std::pair<gtsam::Symbol, std::vector<gtsam::Pose3>> sym_pose :
-    // symbols_poses_values_iter) {
-    //     initial_estimate_.insert_or_assign(sym_pose.first,
-    //     detail::averagePoses(sym_pose.second));
-    // }
-    // for (std::pair<gtsam::Symbol, std::vector<gtsam::Point3>> sym_lmk :
-    //      symbols_landmarks_values_iter) {
-    //     initial_estimate_.insert_or_assign(sym_lmk.first, detail::averagePoints(sym_lmk.second));
-    // }
-
-    // // do global optimization
-    // const gtsam::Values result =
-    //     detail::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks);
-    // std::cout << "about to visualize result" << std::endl;
-    // detail::graph_values(result, "Result", symbols_pose, symbols_landmarks);
+    geometry::viz_scene(cam_references, std::vector<Eigen::Vector3d>(), true, true, "References");
+
+    // populate feature_tracks and lmk_id_map
+    // TODO: exhaustively get matches
+    FeatureTracks feature_tracks;
+    FrameLandmarkIdMap lmk_id_map;
+    LandmarkId lmk_id = 0;
+    for (size_t i = 0; i < indices.size() - 1; i++) {
+        std::vector<cv::DMatch> matches =
+            frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+        frontend.enforce_bijective_buffer_matches(matches);
+        for (const cv::DMatch match : matches) {
+            const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.`queryIdx];
+            const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
+
+            auto key = std::make_pair(frames[i].id_, kpt_cam0);
+            if (lmk_id_map.find(key) != lmk_id_map.end()) {
+                auto id = lmk_id_map.at(key);
+                feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0);
+                feature_tracks.at(id).obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
+            } else {
+                FeatureTrack feature_track(i, kpt_cam0);
+                feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
+                feature_tracks.emplace(lmk_id, feature_track);
+                lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id);
+                lmk_id++;
+            }
+        }
+    }
+
+    // triangulate all of the points
+
+    std::unordered_map<LandmarkId, gtsam::Point3> lmk_triangulated_map;
+    std::vector<gtsam::Point3> triangulated_lmks;
+    for (const std::pair<LandmarkId, FeatureTrack> lmk_feat : feature_tracks) {
+        std::vector<gtsam::Pose3> feat_cam_poses;
+        std::vector<gtsam::Point2> feat_kpts;
+        LandmarkId lmk_id = lmk_feat.first;
+        FeatureTrack feature_track = lmk_feat.second;
+        gtsam::Point3 p_wrld_kpt(0, 0, 0);
+        for (const std::pair<FrameId, KeypointCV> &feat_track : feature_track.obs_) {
+            feat_cam_poses.push_back(poses_cam_init[feat_track.first]);
+            feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y));
+        }
+        bool triangulate_success =
+            detail::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt);
+        if (triangulate_success) {
+            lmk_triangulated_map.emplace(lmk_id, p_wrld_kpt);
+            triangulated_lmks.push_back(p_wrld_kpt);
+        } else {
+            continue;
+        }
+    }
+    // filter points
+    geometry::viz_scene(std::vector<Eigen::Isometry3d>(), triangulated_lmks, true, true,
+                        "Unfiltered points");
+    const gtsam::Point3 variance_pts = detail::get_variance(triangulated_lmks);
+    const gtsam::Point3 std_dev_pts = variance_pts.array().sqrt().matrix();
+    const gtsam::Point3 mean_pts = detail::averagePoints(triangulated_lmks);
+    std::unordered_map<LandmarkId, gtsam::Point3> lmk_triangulated_map_filtered;
+    std::vector<gtsam::Point3> filtered_points;
+    std::cout << "original var " << variance_pts << std::endl;
+    for (const std::pair<LandmarkId, gtsam::Point3> lmk_id_pt : lmk_triangulated_map) {
+        const gtsam::Point3 dist_mean = (lmk_id_pt.second - mean_pts).array().abs().matrix();
+        if ((dist_mean.array() <= (3 * std_dev_pts).array()).all()) {
+            lmk_triangulated_map_filtered.emplace(lmk_id_pt);
+            filtered_points.push_back(lmk_id_pt.second);
+        }
+    }
+    std::cout << "filtered variance " << detail::get_variance(filtered_points) << std::endl;
+    geometry::viz_scene(std::vector<Eigen::Isometry3d>(), filtered_points, true, true,
+                        "Unfiltered points");
+
+    // add filtered points to graph
+    std::unordered_map<gtsam::Symbol, std::vector<gtsam::Pose3>> symbols_poses_values_iter;
+    std::unordered_map<gtsam::Symbol, std::vector<gtsam::Point3>> symbols_landmarks_values_iter;
+    std::vector<gtsam::Symbol> symbols_pose;
+    std::vector<gtsam::Symbol> symbols_landmarks;
+    for (const std::pair<LandmarkId, gtsam::Point3> lmk_id_pt : lmk_triangulated_map_filtered) {
+        LandmarkId lmk_id = lmk_id_pt.first;
+        const gtsam::Point3 p_wrld_kpt = lmk_id_pt.second;
+        FeatureTrack feature_track = feature_tracks.at(lmk_id);
+        const gtsam::Symbol symbol_lmk('l', lmk_id);
+        for (const std::pair<FrameId, KeypointCV> &feat_track : feature_track.obs_) {
+            initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt);
+            symbols_landmarks_values_iter.emplace(symbol_lmk,
+                                                  std::vector<gtsam::Point3>{p_wrld_kpt});
+            graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+                gtsam::Point2(feat_track.second.x, feat_track.second.y), landmark_noise,
+                gtsam::Symbol('x', feat_track.first), symbol_lmk, K);
+        }
+    }
+    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(gtsam::Symbol('x', 0),
+                                                            poses_cam_init.at(0), prior_pose_noise);
+    initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), poses_cam_init.at(0));
+    symbols_pose.push_back(gtsam::Symbol('x', 0));
+    symbols_poses_values_iter.emplace(gtsam::Symbol('x', 0),
+                                      std::vector<gtsam::Pose3>{poses_cam_init.at(0)});
+    for (const std::pair<FrameId, gtsam::Pose3> cam_id_pose : poses_cam_init) {
+        if (cam_id_pose.first == 0) {
+            continue;
+        }
+        const gtsam::Symbol key_cam('x', cam_id_pose.first);
+        symbols_pose.push_back(key_cam);
+        gtsam::Point3 p_wrld_cam = cam_id_pose.second.translation();
+        graph_.emplace_shared<gtsam::GPSFactor>(key_cam, p_wrld_cam, gps_noise);
+        initial_estimate_.insert_or_assign(key_cam, cam_id_pose.second);
+        symbols_poses_values_iter.emplace(key_cam, std::vector<gtsam::Pose3>{cam_id_pose.second});
+        // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_wrld_cam));
+    }
+
+    // detail::graph_values(initial_estimate_, "Confirmation", symbols_pose,
+    // symbols_landmarks);
+
+    // do local optimizations and add to iter cache
+    // const int window = 2;
+    std::cout << "length of cam_poses: " << poses_cam_init.size() << std::endl;
+    for (const auto &id_pose : poses_cam_init) {
+        std::cout << "id: " << id_pose.first << "\tpose: " << id_pose.second << std::endl;
+    }
+    for (size_t i = 0; i < indices.size() - 1; i++) {
+        std::cout << "Local optimization " << i << std::endl;
+        gtsam::Values local_estimate_;
+        gtsam::NonlinearFactorGraph local_graph_;
+
+        std::vector<gtsam::Symbol> poses{gtsam::Symbol('x', i), gtsam::Symbol('x', i + 1)};
+        std::vector<gtsam::Symbol> lmks;
+
+        local_graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
+            gtsam::PriorFactor<gtsam::Pose3>(poses[0], poses_cam_init.at(i), prior_pose_noise));
+        std::cout << "fuck" << std::endl;
+        local_graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
+            gtsam::PriorFactor<gtsam::Pose3>(poses[1], poses_cam_init.at(i + 1), prior_pose_noise));
+        std::cout << "mega fuck" << std::endl;
+        local_estimate_.insert_or_assign(poses[0], poses_cam_init.at(i));
+        local_estimate_.insert_or_assign(poses[1], poses_cam_init.at(i + 1));
+
+        std::vector<cv::DMatch> matches =
+            frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+        frontend.enforce_bijective_matches(matches);
+        std::vector<gtsam::Pose3> feat_cam_poses{poses_cam_init.at(i), poses_cam_init.at(i + 1)};
+
+        std::vector<Eigen::Isometry3d> viz_poses{Eigen::Isometry3d(feat_cam_poses[0].matrix()),
+                                                 Eigen::Isometry3d(feat_cam_poses[1].matrix())};
+        std::vector<Eigen::Vector3d> viz_lmks;
+        for (const cv::DMatch match : matches) {
+            std::vector<gtsam::Point2> feat_kpts;
+            const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
+            const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
+            feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y);
+            feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y);
+
+            auto key = std::make_pair(frames[i].id_, kpt_cam0);
+            if (lmk_id_map.find(key) != lmk_id_map.end()) {
+                auto id = lmk_id_map.at(key);
+                if (lmk_triangulated_map_filtered.find(id) == lmk_triangulated_map_filtered.end()) {
+                    continue;
+                }
+                std::cout << "good" << std::endl;
+                // do nothing
+                // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0);
+                // feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1);
+            } else {
+                std::cerr << "this shouldn't happen right?" << std::endl;
+                FeatureTrack feature_track(frames[i].id_, kpt_cam0);
+                feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
+                feature_tracks.emplace(lmk_id, feature_track);
+                lmk_id_map.emplace(key, lmk_id);
+                lmk_id++;
+            }
+            std::cout << "oog" << std::endl;
+            const gtsam::Symbol symbol_lmk =
+                gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0)));
+            // if (gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))) !=
+            //     gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam1)))) {
+            //     std::cerr << "UH OH" << std::endl;
+            // } else {
+            //     std::cout << "cool" << std::endl;
+            // }
+            lmks.push_back(symbol_lmk);
+            local_graph_
+                .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+                    feat_kpts[0], landmark_noise, poses[0], symbol_lmk, K);
+            local_graph_
+                .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+                    feat_kpts[1], landmark_noise, poses[1], symbol_lmk, K);
+
+            gtsam::Point3 p_wrld_kpt;
+            bool triangulate_success =
+                detail::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt);
+            if (triangulate_success) {
+                local_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt);
+                viz_lmks.push_back(p_wrld_kpt);
+            }
+            if (symbols_landmarks_values_iter.find(symbol_lmk) !=
+                symbols_landmarks_values_iter.end()) {
+                symbols_landmarks_values_iter[symbol_lmk].push_back(p_wrld_kpt);
+            } else {
+                symbols_landmarks_values_iter.emplace(symbol_lmk,
+                                                      std::vector<gtsam::Point3>{p_wrld_kpt});
+            }
+        }
+        std::cout << "setup complete!" << std::endl;
+        // geometry::viz_scene(viz_poses, viz_lmks, true, true,
+        //                     "Local Optimization " + std::to_string(i));
+
+        const gtsam::Values symbols_result_local = detail::optimize_graph(
+            local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false);
+
+        for (const gtsam::Symbol &symbol_pose : poses) {
+            const gtsam::Pose3 T_wrld_cam = symbols_result_local.at<gtsam::Pose3>(symbol_pose);
+            symbols_poses_values_iter.at(symbol_pose).push_back(T_wrld_cam);
+        }
+        for (const gtsam::Symbol &symbol_lmk : lmks) {
+            const gtsam::Point3 p_wrld_lmk = symbols_result_local.at<gtsam::Point3>(symbol_lmk);
+            symbols_landmarks_values_iter.at(symbol_lmk).push_back(p_wrld_lmk);
+        }
+    }
+
+    std::cout << "\nLocal Optimizations Complete!\n" << std::endl;
+
+    for (std::pair<gtsam::Symbol, std::vector<gtsam::Pose3>> sym_pose : symbols_poses_values_iter) {
+        initial_estimate_.insert_or_assign(sym_pose.first, detail::averagePoses(sym_pose.second));
+    }
+    for (std::pair<gtsam::Symbol, std::vector<gtsam::Point3>> sym_lmk :
+         symbols_landmarks_values_iter) {
+        initial_estimate_.insert_or_assign(sym_lmk.first, detail::averagePoints(sym_lmk.second));
+    }
+
+    // do global optimization
+    const gtsam::Values result =
+        detail::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks);
+    std::cout << "about to visualize result" << std::endl;
+    detail::graph_values(result, "Result", symbols_pose, symbols_landmarks);
 }
\ No newline at end of file

From 32052d2d5accc03c23389242e1335a1d17ccab3a Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Thu, 26 Jun 2025 17:20:44 -0400
Subject: [PATCH 23/45] WIP. pipeline code first pass, optimization is failing

---
 experimental/learn_descriptors/BUILD          |  26 +-
 .../learn_descriptors/four_seasons_parser.cc  |  28 +-
 .../four_seasons_parser_example.cc            |   2 +-
 .../four_seasons_parser_example_viz.cc        |   2 +-
 experimental/learn_descriptors/frame.hh       |   6 +-
 .../learn_descriptors/frontend_test.cc        |  41 +-
 .../learn_descriptors/incremental_sfm_mvp.cc  | 508 ++++++++++--------
 7 files changed, 350 insertions(+), 263 deletions(-)

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index d0a748b1b..0c22053f8 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -353,32 +353,14 @@ cc_binary(
   deps = [
     ":four_seasons_parser",
     "@eigen",
-    "@opencv//:opencv",    
-    "@gtsam//:gtsam",
+    "@opencv",    
+    "@gtsam",
     "//visualization/opencv:opencv_viz",
     ":spatial_test_scene_cube",
     ":frame", 
     ":structure_from_motion_types",
     ":frontend",
-    "@boost//:smart_ptr",
+    "@boost//:smart_ptr",    
+    "@cxxopts",
   ]
 )
-
-cc_binary(
-  name = "incrememtal_sfm_mvp",
-  srcs = ["incrememtal_sfm_mvp.cc"],
-  copts = [
-    "-Wno-error=unused-parameter",
-  ],
-  deps = [
-    "@opencv//:opencv",    
-    "@gtsam//:gtsam",  
-    ":frontend_definitions",
-    ":frontend",
-    ":four_seasons_parser",
-    "@eigen",
-    "//visualization/opencv:opencv_viz",
-    ":frame",
-    ":structure_from_motion_types"
-  ]
-)
\ No newline at end of file
diff --git a/experimental/learn_descriptors/four_seasons_parser.cc b/experimental/learn_descriptors/four_seasons_parser.cc
index 4d7d91b21..75886a552 100644
--- a/experimental/learn_descriptors/four_seasons_parser.cc
+++ b/experimental/learn_descriptors/four_seasons_parser.cc
@@ -6,6 +6,7 @@
 #include <exception>
 #include <fstream>
 #include <iostream>
+#include <sstream>
 #include <string>
 #include <unordered_map>
 #include <utility>
@@ -22,6 +23,8 @@
 #include "nmea/message/rmc.hpp"
 #include "nmea/sentence.hpp"
 
+#define IMU_HZ 30.0
+
 namespace robot::experimental::learn_descriptors {
 using namespace detail;
 FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
@@ -44,7 +47,8 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
     gps_parser_help::TimeGPSList gps_time_list =
         gps_parser_help::create_gps_time_data_map(path_gps);
 
-    const double imu_hz = 30.0;
+    // std::cout << "imu dt in nanoseconds: " << std::to_string((1.0 / IMU_HZ) * 1e9) << std::endl;
+
     size_t id = 0;
     size_t gps_idx = 0;
     for (const std::pair<size_t, std::vector<std::string>>& pair_time_data : img_time_list) {
@@ -97,12 +101,25 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
                       << std::endl;
         }
         while (gps_idx < gps_time_list.size() && time_key > gps_time_list[gps_idx].first &&
-               time_key - gps_time_list[gps_idx].first > (1.0 / imu_hz) * 1e9) {
+               time_key - gps_time_list[gps_idx].first > (1.0 / IMU_HZ) * 1e9) {
             gps_idx++;
+            // if (gps_idx < 200) {
+            //     std::stringstream ss;
+            //     ss << "uh oh: " << gps_idx;
+            //     if (time_key > gps_time_list[gps_idx].first) {
+            //         ss << "\ttime_key - gps_time: " << (time_key - gps_time_list[gps_idx].first);
+            //     } else {
+            //         ss << "\tgps_time - time_key: " << (gps_time_list[gps_idx].first - time_key);
+            //     }
+            //     std::cout << ss.str() << std::endl;
+            // }
         }  // find the closest gps point whose time is before the current image capture time
         if (gps_idx < gps_time_list.size() &&
-            time_key - gps_time_list[gps_idx].first <= (1.0 / imu_hz) * 1e9) {
+            time_key - gps_time_list[gps_idx].first <= (1.0 / IMU_HZ) * 1e9) {
             img_pt.gps_gcs = gps_time_list[gps_idx].second;
+            // std::cout << "adding gps data at img_pt: " << img_pt.id << " with gps_idx: " <<
+            // gps_idx
+            //           << std::endl;
             gps_idx++;
         }
         img_pt_vector_.push_back(img_pt);
@@ -313,7 +330,10 @@ TimeGPSList create_gps_time_data_map(const std::filesystem::path& path_gps) {
                 gps_data.longitude = gga.longitude.get();
                 if (gga.altitude.exists()) gps_data.altitude = gga.altitude.get();
                 if (gga.utc.get() == time_of_day_last) {  // GGA messages for this dataset come
-                                                          // after RMC messages
+                    // after RMC messages
+                    // std::cout << "adding gps data with time of day: " << gga.utc.get()
+                    //           << " and unix time: "
+                    //           << gps_utc_to_unix_time(date_last, gga.utc.get()) << std::endl;
                     time_map_gps.push_back(
                         std::make_pair(gps_utc_to_unix_time(date_last, gga.utc.get()), gps_data));
                 }
diff --git a/experimental/learn_descriptors/four_seasons_parser_example.cc b/experimental/learn_descriptors/four_seasons_parser_example.cc
index 0b5cadf7f..48c496579 100644
--- a/experimental/learn_descriptors/four_seasons_parser_example.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_example.cc
@@ -59,7 +59,7 @@ int main(int argc, const char** argv) {
                 img_first_and_last);
     cv::imshow("First + Last Images", img_first_and_last);
 
-    for (size_t i = 0; i < parser.num_images(); i += 99) {
+    for (size_t i = 0; i < parser.num_images(); i += 1) {
         cv::Mat img = parser.load_image(i);
         const std::string img_str = "img " + std::to_string(i);
         cv::putText(img, img_str, cv::Point(10, 20), cv::FONT_HERSHEY_SIMPLEX, 0.5,
diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
index 6b6954170..f0e5c5e5c 100644
--- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
@@ -2,7 +2,6 @@
 #include <cstddef>
 #include <filesystem>
 #include <iostream>
-#include <opencv2/opencv.hpp>
 #include <sstream>
 #include <string>
 #include <vector>
@@ -10,6 +9,7 @@
 #include "common/check.hh"
 #include "cxxopts.hpp"
 #include "experimental/learn_descriptors/four_seasons_parser.hh"
+#include "opencv2/opencv.hpp"
 #include "visualization/opencv/opencv_viz.hh"
 
 namespace lrn_desc = robot::experimental::learn_descriptors;
diff --git a/experimental/learn_descriptors/frame.hh b/experimental/learn_descriptors/frame.hh
index 720aa1f59..5668da713 100644
--- a/experimental/learn_descriptors/frame.hh
+++ b/experimental/learn_descriptors/frame.hh
@@ -1,4 +1,6 @@
 #pragma once
+#include <optional>
+
 #include "experimental/learn_descriptors/frontend_definitions.hh"
 #include "gtsam/geometry/Cal3_S2.h"
 #include "gtsam/geometry/Pose3.h"
@@ -10,6 +12,8 @@ class Frame {
     Frame(const FrameId& id, const cv::Mat& img_undistorted, const gtsam::Cal3_S2::shared_ptr& K,
           const gtsam::Pose3& T_wrld_grnd_truth)
         : id_(id), img_(img_undistorted), K_(K), groundtruth_(T_wrld_grnd_truth) {}
+    Frame(const FrameId& id, const cv::Mat& img_undistorted, const gtsam::Cal3_S2::shared_ptr& K)
+        : id_(id), img_(img_undistorted), K_(K) {}
 
     void add_keypoints(const KeypointsCV& kpts);
     void assign_descriptors(const cv::Mat& descriptors);
@@ -22,6 +26,6 @@ class Frame {
     gtsam::Cal3_S2::shared_ptr K_;
     KeypointsCV kpts_;
     cv::Mat descriptors_;
-    gtsam::Pose3 groundtruth_;
+    std::optional<gtsam::Pose3> groundtruth_;
 };
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frontend_test.cc b/experimental/learn_descriptors/frontend_test.cc
index 8ab092d3e..d90142e3c 100644
--- a/experimental/learn_descriptors/frontend_test.cc
+++ b/experimental/learn_descriptors/frontend_test.cc
@@ -47,10 +47,10 @@ TEST(FrontendTest, pipeline_sweep) {
     cv::warpAffine(image_1, image_1, rotation_matrix, image_1.size());
     cv::warpAffine(image_1, image_2, translation_mat, image_1.size());
 
-    // cv::Mat img_test_disp;
-    // cv::hconcat(image_1, image_2, img_test_disp);
-    // cv::imshow("Test", img_test_disp);
-    // cv::waitKey(1000);
+    cv::Mat img_test_disp;
+    cv::hconcat(image_1, image_2, img_test_disp);
+    cv::imshow("Test", img_test_disp);
+    cv::waitKey(1000);
 
     Frontend::ExtractorType extractor_types[2] = {Frontend::ExtractorType::SIFT,
                                                   Frontend::ExtractorType::ORB};
@@ -64,11 +64,11 @@ TEST(FrontendTest, pipeline_sweep) {
     std::vector<cv::DMatch> matches;
     cv::Mat img_keypoints_out_1(height, width, CV_8UC3),
         img_keypoints_out_2(height, width, CV_8UC3), img_matches_out(height, 2 * width, CV_8UC3);
-    // cv::Mat img_display_test;
+    cv::Mat img_display_test;
     for (Frontend::ExtractorType extractor_type : extractor_types) {
         for (Frontend::MatcherType matcher_type : matcher_types) {
-            // printf("started frontend combination: (%d, %d)\n", static_cast<int>(extractor_type),
-            //        static_cast<int>(matcher_type));
+            printf("started frontend combination: (%d, %d)\n", static_cast<int>(extractor_type),
+                   static_cast<int>(matcher_type));
             try {
                 frontend = Frontend(extractor_type, matcher_type);
             } catch (const std::invalid_argument &e) {
@@ -85,20 +85,19 @@ TEST(FrontendTest, pipeline_sweep) {
                                     img_keypoints_out_2);
             frontend.draw_matches(image_1, keypoints_descriptors_pair_1.first, image_2,
                                   keypoints_descriptors_pair_2.first, matches, img_matches_out);
-            // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test);
-            // cv::vconcat(img_display_test, img_matches_out, img_display_test);
-            // std::stringstream text;
-            // text << "Extractor " << static_cast<int>(extractor_type) << ", matcher "
-            //      << static_cast<int>(matcher_type);
-            // cv::putText(img_display_test, text.str(), cv::Point(20, height - 50),
-            //             cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2, cv::LINE_AA);
-            // cv::imshow("Keypoints and Matches Output.", img_display_test);
-            // std::cout << "Hold spacebar to pause." << std::endl;
-            // while (cv::waitKey(1000) == 32) {
-            // }
-            // printf("completed frontend combination: (%d, %d)\n",
-            // static_cast<int>(extractor_type),
-            //        static_cast<int>(matcher_type));
+            cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test);
+            cv::vconcat(img_display_test, img_matches_out, img_display_test);
+            std::stringstream text;
+            text << "Extractor " << static_cast<int>(extractor_type) << ", matcher "
+                 << static_cast<int>(matcher_type);
+            cv::putText(img_display_test, text.str(), cv::Point(20, height - 50),
+                        cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2, cv::LINE_AA);
+            cv::imshow("Keypoints and Matches Output.", img_display_test);
+            std::cout << "Hold spacebar to pause." << std::endl;
+            while (cv::waitKey(1000) == 32) {
+            }
+            printf("completed frontend combination: (%d, %d)\n", static_cast<int>(extractor_type),
+                   static_cast<int>(matcher_type));
             if (extractor_type != Frontend::ExtractorType::ORB) {  // don't check ORB for now
                 for (const cv::DMatch match : matches) {
                     EXPECT_NEAR(keypoints_descriptors_pair_1.first[match.queryIdx].pt.x -
diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc
index 021770c62..649d30479 100644
--- a/experimental/learn_descriptors/incremental_sfm_mvp.cc
+++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc
@@ -1,7 +1,11 @@
+#include <functional>
+#include <opencv2/calib3d.hpp>
+#include <sstream>
 #include <vector>
 
 #include "Eigen/Core"
 #include "Eigen/Geometry"
+#include "cxxopts.hpp"
 #include "experimental/learn_descriptors/four_seasons_parser.hh"
 #include "experimental/learn_descriptors/frame.hh"
 #include "experimental/learn_descriptors/frontend.hh"
@@ -22,14 +26,16 @@
 #include "gtsam/slam/BetweenFactor.h"
 #include "gtsam/slam/PriorFactor.h"
 #include "gtsam/slam/ProjectionFactor.h"
-#include "opencv2/opencv.hpp"
 #include "visualization/opencv/opencv_viz.hh"
 
-namespace detail {
-// struct SymbolHasher {
-//     size_t operator()(const gtsam::Symbol &s) const { return std::hash<uint64_t>()(s.key());
-//     }
-// };
+namespace std {
+template <>
+struct hash<gtsam::Symbol> {
+    size_t operator()(const gtsam::Symbol &s) const { return hash<uint64_t>()(s.key()); }
+};
+}  // namespace std
+
+namespace detail_sfm {
 
 bool attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
                          const std::vector<gtsam::Point2> &cam_obs, gtsam::Cal3_S2::shared_ptr K,
@@ -56,7 +62,7 @@ bool attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
         } catch (const gtsam::TriangulationCheiralityException &e) {
             // Handle the exception gracefully by logging and retaining the previous
             // estimate.
-            std::cerr << "Triangulation Cheirality Exception: " << e.what()
+            std::cerr << "attamp_triangulation failed. Likely cheirality exception: " << e.what()
                       << ". Keeping initial landmark estimate." << std::endl;
         }
     } else {
@@ -102,7 +108,7 @@ static gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph,
                                    const std::vector<gtsam::Symbol> &symbols_landmarks) {
             std::cout << "iteration " << iter << " complete!";
             std::string window_name = "Iteration_" + std::to_string(iter);
-            detail::graph_values(vals, window_name, symbols_pose, symbols_landmarks);
+            graph_values(vals, window_name, symbols_pose, symbols_landmarks);
         };
 
     for (int i = 0; i < num_steps; i++) {
@@ -148,14 +154,14 @@ static gtsam::Point3 averagePoints(const std::vector<gtsam::Point3> &points) {
 }
 
 static gtsam::Point3 get_variance(const std::vector<gtsam::Point3> &points) {
-    const gtsam::Point3 mean = detail::averagePoints(points);
+    const gtsam::Point3 mean = averagePoints(points);
     gtsam::Point3 var(0, 0, 0);
     for (const gtsam::Point3 &pt : points) {
         var += (mean - pt).array().square().matrix();
     }
     return var / points.size();
 }
-};  // namespace detail
+};  // namespace detail_sfm
 
 int main(int argc, const char **argv) {
     using namespace robot::experimental::learn_descriptors;
@@ -190,14 +196,14 @@ int main(int argc, const char **argv) {
     FourSeasonsParser parser(path_data, path_calibration);
 
     std::cout << "incremental_sfm_mvp!" << std::endl;
-    const std::vector<int> indices{100, 120, 140, 160, 180};
-    // const std::vector<int> indices = []() {
-    //     std::vector<int> tmp;
-    //     for (int i = 0; i < 200; i += 10) {
-    //         tmp.push_back(i);
-    //     }
-    //     return tmp;
-    // }();
+    // const std::vector<int> indices{650, 675, 140, 160, 180};
+    const std::vector<int> indices = []() {
+        std::vector<int> tmp;
+        for (int i = 660; i < 700; i += 3) {
+            tmp.push_back(i);
+        }
+        return tmp;
+    }();
     // FourSeasonsParser parser()
     // const size_t img_width = img_pt_first.width, img_height =
     // img_pt_first.height;
@@ -231,117 +237,171 @@ int main(int argc, const char **argv) {
     // populate frames and cam_poses
     std::vector<Frame> frames;
     FrameId id = 0;
-    std::unordered_map<FrameId, gtsam::Pose3> poses_cam_init;
-    std::vector<Eigen::Isometry3d> cam_references;
+    std::unordered_map<FrameId, gtsam::Pose3>
+        id_to_initial_world_from_cam;  // these are for initial guesses
+    std::vector<Eigen::Isometry3d> references_world_from_cam;
     Eigen::Matrix4d scale_mat_reference = Eigen::Matrix4d::Identity();
     scale_mat_reference(0, 0) = scale_mat_reference(1, 1) = scale_mat_reference(2, 2) =
         parser.gnss_scale();
     for (const int &idx : indices) {
-        const cv::Mat img = survey.loadImageByImageIndex(idx);
-        cv::Mat img_undistorted;
-        cv::fisheye::undistort(img, img_undistorted, K_mat, D_mat);
-        Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix()));
-
-        std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
-            frontend.get_keypoints_and_descriptors(img_undistorted);
-        KeypointsCV kpts;
-        for (const cv::KeyPoint &kpt : kpts_descs.first) {
-            kpts.push_back(kpt.pt);
-        }
-        frame.add_keypoints(kpts);
-        frame.assign_descriptors(kpts_descs.second);
-
-        frames.push_back(frame);
-
         const ImagePoint img_pt = parser.get_image_point(idx);
-
+        //
         Eigen::Isometry3d world_from_cam;
         if (id == 0) {
             // for now we will use an informative prior based on reference
-            world_from_cam = Eigen::Isometry3d((parser.S_from_AS()->matrix() * scale_mat_reference *
-                                                img_pt.AS_w_from_gnss_cam->matrix()));
+            world_from_cam = Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference *
+                                                img_pt.AS_w_from_gnss_cam->matrix())
+                                                   .matrix());
         } else if (img_pt.gps_gcs) {
             const Eigen::Vector3d gps_gcs(
                 img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude,
                 img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0);
             const Eigen::Vector3d p_gps_in_ECEF = parser.ECEF_from_gcs(gps_gcs);
+            const Eigen::Vector4d p_gps_in_ECEF_hom(p_gps_in_ECEF.x(), p_gps_in_ECEF.y(),
+                                                    p_gps_in_ECEF.z(), 1.0);
             world_from_cam.translation() =
-                (parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix() * p_gps_in_ECEF;
+                ((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix() *
+                 p_gps_in_ECEF_hom)
+                    .head<3>();
 
-            Eigen::
+            // Eigen::
         }
-        poses_cam_init.emplace(id, world_from_cam.matrix());
+        id_to_initial_world_from_cam.emplace(id, world_from_cam.matrix());
+
+        // populate frame
+        cv::Mat img_undistorted = parser.load_image(idx);
+        // cv::Mat img = parser.load_image(idx);
+        // cv::Mat img_undistorted;
+        // cv::fisheye::undistortImage(img, img_undistorted, K_mat, D_mat);
+
+        // std::stringstream ss;
+        // ss << "image " << idx;
+        // cv::imshow(ss.str(), img);
+        // cv::waitKey(0);
+        // cv::destroyWindow(ss.str());
 
+        std::optional<Frame> frame;
         if (img_pt.AS_w_from_gnss_cam) {
-            cam_references.push_back(
-                Eigen::Isometry3d((parser.S_from_AS()->matrix() * scale_mat_reference *
-                                   img_pt.AS_w_from_gnss_cam->matrix())));
+            const Eigen::Isometry3d world_from_cam_reference =
+                Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference *
+                                   img_pt.AS_w_from_gnss_cam->matrix()));
+            references_world_from_cam.push_back(world_from_cam_reference);
+            frame.emplace(id, img_undistorted, K, gtsam::Pose3(world_from_cam_reference.matrix()));
         } else {
+            frame.emplace(id, img_undistorted, K);
             std::clog << "Warning: no reference data at img_pt " << idx << std::endl;
         }
 
+        std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
+            frontend.get_keypoints_and_descriptors(img_undistorted);
+        KeypointsCV kpts;
+        for (const cv::KeyPoint &kpt : kpts_descs.first) {
+            kpts.push_back(kpt.pt);
+        }
+        std::cout << "idx " << idx << " has " << kpts.size() << " kpts" << std::endl;
+        frame->add_keypoints(kpts);
+        frame->assign_descriptors(kpts_descs.second);
+
+        frames.push_back(*frame);
+
         id++;
     }
 
-    geometry::viz_scene(cam_references, std::vector<Eigen::Vector3d>(), true, true, "References");
+    robot::geometry::viz_scene(references_world_from_cam, std::vector<Eigen::Vector3d>(), true,
+                               true, "References");
 
     // populate feature_tracks and lmk_id_map
-    // TODO: exhaustively get matches
+    // TODO: "smart" matching, using initial poses to filter which pairs make sense to compute
+    // matches
     FeatureTracks feature_tracks;
     FrameLandmarkIdMap lmk_id_map;
     LandmarkId lmk_id = 0;
-    for (size_t i = 0; i < indices.size() - 1; i++) {
-        std::vector<cv::DMatch> matches =
-            frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
-        frontend.enforce_bijective_buffer_matches(matches);
-        for (const cv::DMatch match : matches) {
-            const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.`queryIdx];
-            const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
-
-            auto key = std::make_pair(frames[i].id_, kpt_cam0);
-            if (lmk_id_map.find(key) != lmk_id_map.end()) {
-                auto id = lmk_id_map.at(key);
-                feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0);
-                feature_tracks.at(id).obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
-            } else {
-                FeatureTrack feature_track(i, kpt_cam0);
-                feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
-                feature_tracks.emplace(lmk_id, feature_track);
-                lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id);
-                lmk_id++;
+    bool exhaustive = false;
+    if (exhaustive) {
+        for (size_t i = 0; i < indices.size() - 1; i++) {
+            for (size_t j = i + 1; i < indices.size(); j++) {
+                std::vector<cv::DMatch> matches =
+                    frontend.get_matches(frames[i].get_descriptors(), frames[j].get_descriptors());
+                // DIAL TO MESS WITH
+                frontend.enforce_bijective_buffer_matches(matches);
+                for (const cv::DMatch match : matches) {
+                    const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
+                    const KeypointCV kpt_cam1 = frames[j].get_keypoints()[match.trainIdx];
+
+                    auto key = std::make_pair(frames[i].id_, kpt_cam0);
+                    if (lmk_id_map.find(key) != lmk_id_map.end()) {
+                        auto id = lmk_id_map.at(key);
+                        feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0);
+                        feature_tracks.at(id).obs_.emplace_back(frames[j].id_, kpt_cam1);
+                    } else {
+                        FeatureTrack feature_track(i, kpt_cam0);
+                        feature_track.obs_.emplace_back(frames[j].id_, kpt_cam1);
+                        feature_tracks.emplace(lmk_id, feature_track);
+                        lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id);
+                        lmk_id++;
+                    }
+                }
+            }
+        }
+    } else {  // successive only
+        for (size_t i = 0; i < indices.size() - 1; i++) {
+            std::vector<cv::DMatch> matches =
+                frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+            frontend.enforce_bijective_buffer_matches(matches);
+            for (const cv::DMatch match : matches) {
+                const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
+                const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
+
+                auto key = std::make_pair(frames[i].id_, kpt_cam0);
+                if (lmk_id_map.find(key) != lmk_id_map.end()) {
+                    auto id = lmk_id_map.at(key);
+                    feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0);
+                    feature_tracks.at(id).obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
+                } else {
+                    FeatureTrack feature_track(i, kpt_cam0);
+                    feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
+                    feature_tracks.emplace(lmk_id, feature_track);
+                    lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id);
+                    lmk_id++;
+                }
             }
         }
     }
 
     // triangulate all of the points
-
-    std::unordered_map<LandmarkId, gtsam::Point3> lmk_triangulated_map;
+    std::unordered_map<LandmarkId, gtsam::Point3>
+        lmk_triangulated_map;  // points are p_kpt_in_world
     std::vector<gtsam::Point3> triangulated_lmks;
     for (const std::pair<LandmarkId, FeatureTrack> lmk_feat : feature_tracks) {
-        std::vector<gtsam::Pose3> feat_cam_poses;
+        std::vector<gtsam::Pose3> world_from_cams;
         std::vector<gtsam::Point2> feat_kpts;
-        LandmarkId lmk_id = lmk_feat.first;
-        FeatureTrack feature_track = lmk_feat.second;
-        gtsam::Point3 p_wrld_kpt(0, 0, 0);
+        const LandmarkId lmk_id = lmk_feat.first;
+        const FeatureTrack feature_track = lmk_feat.second;
+        gtsam::Point3 p_kpt_in_world(0, 0, 0);
         for (const std::pair<FrameId, KeypointCV> &feat_track : feature_track.obs_) {
-            feat_cam_poses.push_back(poses_cam_init[feat_track.first]);
+            world_from_cams.push_back(id_to_initial_world_from_cam[feat_track.first]);
             feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y));
         }
         bool triangulate_success =
-            detail::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt);
+            detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, p_kpt_in_world);
         if (triangulate_success) {
-            lmk_triangulated_map.emplace(lmk_id, p_wrld_kpt);
-            triangulated_lmks.push_back(p_wrld_kpt);
+            lmk_triangulated_map.emplace(lmk_id, p_kpt_in_world);
+            triangulated_lmks.push_back(p_kpt_in_world);
         } else {
             continue;
         }
     }
-    // filter points
-    geometry::viz_scene(std::vector<Eigen::Isometry3d>(), triangulated_lmks, true, true,
-                        "Unfiltered points");
-    const gtsam::Point3 variance_pts = detail::get_variance(triangulated_lmks);
+    // robot::geometry::viz_scene(std::vector<Eigen::Isometry3d>(), triangulated_lmks, true, true,
+    //                            "Unfiltered, triangulated points");
+    std::cout << "heart beat 1" << std::endl;
+
+    // filter points via variance
+    // TODO: filter based on quality and/or quantity of matches
+    std::cout << "triangulated_lmks.size(): " << triangulated_lmks.size() << std::endl;
+    const gtsam::Point3 variance_pts = detail_sfm::get_variance(triangulated_lmks);
+    std::cout << "heart beat 2" << std::endl;
     const gtsam::Point3 std_dev_pts = variance_pts.array().sqrt().matrix();
-    const gtsam::Point3 mean_pts = detail::averagePoints(triangulated_lmks);
+    const gtsam::Point3 mean_pts = detail_sfm::averagePoints(triangulated_lmks);
     std::unordered_map<LandmarkId, gtsam::Point3> lmk_triangulated_map_filtered;
     std::vector<gtsam::Point3> filtered_points;
     std::cout << "original var " << variance_pts << std::endl;
@@ -352,169 +412,191 @@ int main(int argc, const char **argv) {
             filtered_points.push_back(lmk_id_pt.second);
         }
     }
-    std::cout << "filtered variance " << detail::get_variance(filtered_points) << std::endl;
-    geometry::viz_scene(std::vector<Eigen::Isometry3d>(), filtered_points, true, true,
-                        "Unfiltered points");
+    std::cout << "filtered variance " << detail_sfm::get_variance(filtered_points) << std::endl;
+    // robot::geometry::viz_scene(std::vector<Eigen::Isometry3d>(), filtered_points, true, true,
+    //                            "Unfiltered points");
+
+    // ############# BACKEND ###############
 
     // add filtered points to graph
     std::unordered_map<gtsam::Symbol, std::vector<gtsam::Pose3>> symbols_poses_values_iter;
     std::unordered_map<gtsam::Symbol, std::vector<gtsam::Point3>> symbols_landmarks_values_iter;
     std::vector<gtsam::Symbol> symbols_pose;
     std::vector<gtsam::Symbol> symbols_landmarks;
-    for (const std::pair<LandmarkId, gtsam::Point3> lmk_id_pt : lmk_triangulated_map_filtered) {
-        LandmarkId lmk_id = lmk_id_pt.first;
-        const gtsam::Point3 p_wrld_kpt = lmk_id_pt.second;
+    for (const auto &[lmk_id, p_kpt_in_world] : lmk_triangulated_map_filtered) {
+        // LandmarkId lmk_id = lmk_id_pt.first;
+        // const gtsam::Point3 p_kpt_in_world = lmk_id_pt.second;
         FeatureTrack feature_track = feature_tracks.at(lmk_id);
         const gtsam::Symbol symbol_lmk('l', lmk_id);
-        for (const std::pair<FrameId, KeypointCV> &feat_track : feature_track.obs_) {
-            initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt);
+        for (const auto &[frame_id, obs] : feature_track.obs_) {
+            initial_estimate_.insert_or_assign(symbol_lmk, p_kpt_in_world);
             symbols_landmarks_values_iter.emplace(symbol_lmk,
-                                                  std::vector<gtsam::Point3>{p_wrld_kpt});
+                                                  std::vector<gtsam::Point3>{p_kpt_in_world});
             graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-                gtsam::Point2(feat_track.second.x, feat_track.second.y), landmark_noise,
-                gtsam::Symbol('x', feat_track.first), symbol_lmk, K);
+                gtsam::Point2(obs.x, obs.y), landmark_noise, gtsam::Symbol('x', frame_id),
+                symbol_lmk, K);
         }
     }
-    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(gtsam::Symbol('x', 0),
-                                                            poses_cam_init.at(0), prior_pose_noise);
-    initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), poses_cam_init.at(0));
+
+    // add gps factors
+    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
+        gtsam::Symbol('x', 0), id_to_initial_world_from_cam.at(0), prior_pose_noise);
+    initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), id_to_initial_world_from_cam.at(0));
     symbols_pose.push_back(gtsam::Symbol('x', 0));
-    symbols_poses_values_iter.emplace(gtsam::Symbol('x', 0),
-                                      std::vector<gtsam::Pose3>{poses_cam_init.at(0)});
-    for (const std::pair<FrameId, gtsam::Pose3> cam_id_pose : poses_cam_init) {
-        if (cam_id_pose.first == 0) {
+    symbols_poses_values_iter.emplace(
+        gtsam::Symbol('x', 0), std::vector<gtsam::Pose3>{id_to_initial_world_from_cam.at(0)});
+    for (const auto &[frame_id, estimate_world_from_cam] : id_to_initial_world_from_cam) {
+        if (frame_id == 0) {
             continue;
         }
-        const gtsam::Symbol key_cam('x', cam_id_pose.first);
+        const gtsam::Symbol key_cam('x', frame_id);
         symbols_pose.push_back(key_cam);
-        gtsam::Point3 p_wrld_cam = cam_id_pose.second.translation();
-        graph_.emplace_shared<gtsam::GPSFactor>(key_cam, p_wrld_cam, gps_noise);
-        initial_estimate_.insert_or_assign(key_cam, cam_id_pose.second);
-        symbols_poses_values_iter.emplace(key_cam, std::vector<gtsam::Pose3>{cam_id_pose.second});
-        // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_wrld_cam));
+        gtsam::Point3 p_cam_in_world = estimate_world_from_cam.translation();
+        graph_.emplace_shared<gtsam::GPSFactor>(key_cam, p_cam_in_world, gps_noise);
+        initial_estimate_.insert_or_assign(key_cam, estimate_world_from_cam);
+        symbols_poses_values_iter.emplace(key_cam,
+                                          std::vector<gtsam::Pose3>{estimate_world_from_cam});
+        // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_cam_in_world));
     }
 
-    // detail::graph_values(initial_estimate_, "Confirmation", symbols_pose,
+    // detail_sfm::graph_values(initial_estimate_, "Confirmation", symbols_pose,
     // symbols_landmarks);
-
-    // do local optimizations and add to iter cache
-    // const int window = 2;
-    std::cout << "length of cam_poses: " << poses_cam_init.size() << std::endl;
-    for (const auto &id_pose : poses_cam_init) {
-        std::cout << "id: " << id_pose.first << "\tpose: " << id_pose.second << std::endl;
-    }
-    for (size_t i = 0; i < indices.size() - 1; i++) {
-        std::cout << "Local optimization " << i << std::endl;
-        gtsam::Values local_estimate_;
-        gtsam::NonlinearFactorGraph local_graph_;
-
-        std::vector<gtsam::Symbol> poses{gtsam::Symbol('x', i), gtsam::Symbol('x', i + 1)};
-        std::vector<gtsam::Symbol> lmks;
-
-        local_graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
-            gtsam::PriorFactor<gtsam::Pose3>(poses[0], poses_cam_init.at(i), prior_pose_noise));
-        std::cout << "fuck" << std::endl;
-        local_graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
-            gtsam::PriorFactor<gtsam::Pose3>(poses[1], poses_cam_init.at(i + 1), prior_pose_noise));
-        std::cout << "mega fuck" << std::endl;
-        local_estimate_.insert_or_assign(poses[0], poses_cam_init.at(i));
-        local_estimate_.insert_or_assign(poses[1], poses_cam_init.at(i + 1));
-
-        std::vector<cv::DMatch> matches =
-            frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
-        frontend.enforce_bijective_matches(matches);
-        std::vector<gtsam::Pose3> feat_cam_poses{poses_cam_init.at(i), poses_cam_init.at(i + 1)};
-
-        std::vector<Eigen::Isometry3d> viz_poses{Eigen::Isometry3d(feat_cam_poses[0].matrix()),
-                                                 Eigen::Isometry3d(feat_cam_poses[1].matrix())};
-        std::vector<Eigen::Vector3d> viz_lmks;
-        for (const cv::DMatch match : matches) {
-            std::vector<gtsam::Point2> feat_kpts;
-            const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
-            const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
-            feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y);
-            feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y);
-
-            auto key = std::make_pair(frames[i].id_, kpt_cam0);
-            if (lmk_id_map.find(key) != lmk_id_map.end()) {
-                auto id = lmk_id_map.at(key);
-                if (lmk_triangulated_map_filtered.find(id) == lmk_triangulated_map_filtered.end()) {
-                    continue;
+    std::cout << "heart beat 3" << std::endl;
+
+    bool local_optimizations = true;
+    if (local_optimizations) {
+        // TODO: do local optimizations between groups of any n nubmer of cameras with >= x number
+        // of matches
+        // do local optimizations and add to iter cache
+        // const int window = 2;
+        std::cout << "length of cam_poses: " << id_to_initial_world_from_cam.size() << std::endl;
+        for (const auto &[frame_id, world_from_cam] : id_to_initial_world_from_cam) {
+            std::cout << "id: " << frame_id << "\tpose: " << world_from_cam << std::endl;
+        }
+        for (size_t i = 0; i < indices.size() - 1; i++) {
+            std::cout << "Local optimization " << i << std::endl;
+            gtsam::Values local_estimate_;
+            gtsam::NonlinearFactorGraph local_graph_;
+
+            std::vector<gtsam::Symbol> symbols_poses{gtsam::Symbol('x', i),
+                                                     gtsam::Symbol('x', i + 1)};
+            std::vector<gtsam::Symbol> symbols_lmks;
+
+            local_graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
+                gtsam::PriorFactor<gtsam::Pose3>(
+                    symbols_poses[0], id_to_initial_world_from_cam.at(i), prior_pose_noise));
+            std::cout << "fuck" << std::endl;
+            local_graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
+                gtsam::PriorFactor<gtsam::Pose3>(
+                    symbols_poses[1], id_to_initial_world_from_cam.at(i + 1), prior_pose_noise));
+            std::cout << "mega fuck" << std::endl;
+            local_estimate_.insert_or_assign(symbols_poses[0], id_to_initial_world_from_cam.at(i));
+            local_estimate_.insert_or_assign(symbols_poses[1],
+                                             id_to_initial_world_from_cam.at(i + 1));
+
+            std::vector<cv::DMatch> matches =
+                frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+            // DIAL TO MESS WITH
+            frontend.enforce_bijective_matches(matches);
+            std::vector<gtsam::Pose3> world_from_cams{id_to_initial_world_from_cam.at(i),
+                                                      id_to_initial_world_from_cam.at(i + 1)};
+
+            std::vector<Eigen::Isometry3d> viz_world_from_cams{
+                Eigen::Isometry3d(world_from_cams[0].matrix()),
+                Eigen::Isometry3d(world_from_cams[1].matrix())};
+            std::vector<Eigen::Vector3d> viz_lmks;
+            for (const cv::DMatch match : matches) {
+                std::vector<gtsam::Point2> feat_kpts;
+                const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
+                const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
+                feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y);
+                feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y);
+
+                const std::pair<FrameId, KeypointCV> key_lmk_id =
+                    std::make_pair(frames[i].id_, kpt_cam0);
+                if (lmk_id_map.find(key_lmk_id) != lmk_id_map.end()) {
+                    LandmarkId match_lmk_id = lmk_id_map.at(key_lmk_id);
+                    if (lmk_triangulated_map_filtered.find(match_lmk_id) ==
+                        lmk_triangulated_map_filtered.end()) {
+                        continue;
+                    }
+                    std::cout << "good" << std::endl;
+                    // do nothing
+                    // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0);
+                    // feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1);
+                } else {
+                    std::cerr << "ERROR: this shouldn't happen right?" << std::endl;
+                    FeatureTrack feature_track(frames[i].id_, kpt_cam0);
+                    feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
+                    feature_tracks.emplace(lmk_id, feature_track);
+                    lmk_id_map.emplace(key_lmk_id, lmk_id);
+                    lmk_id++;
+                }
+                std::cout << "oog" << std::endl;
+                const gtsam::Symbol symbol_lmk = gtsam::Symbol('l', lmk_id_map.at(key_lmk_id));
+                // if (gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))) !=
+                //     gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam1)))) {
+                //     std::cerr << "UH OH" << std::endl;
+                // } else {
+                //     std::cout << "cool" << std::endl;
+                // }
+                symbols_lmks.push_back(symbol_lmk);
+                local_graph_
+                    .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+                        feat_kpts[0], landmark_noise, symbols_poses[0], symbol_lmk, K);
+                local_graph_
+                    .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+                        feat_kpts[1], landmark_noise, symbols_poses[1], symbol_lmk, K);
+
+                gtsam::Point3 p_kpt_in_world;
+                bool triangulate_success =
+                    detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, p_kpt_in_world);
+                if (triangulate_success) {
+                    local_estimate_.insert_or_assign(symbol_lmk, p_kpt_in_world);
+                    viz_lmks.push_back(p_kpt_in_world);
+                }
+                if (symbols_landmarks_values_iter.find(symbol_lmk) !=
+                    symbols_landmarks_values_iter.end()) {
+                    symbols_landmarks_values_iter[symbol_lmk].push_back(p_kpt_in_world);
+                } else {
+                    symbols_landmarks_values_iter.emplace(
+                        symbol_lmk, std::vector<gtsam::Point3>{p_kpt_in_world});
                 }
-                std::cout << "good" << std::endl;
-                // do nothing
-                // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0);
-                // feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1);
-            } else {
-                std::cerr << "this shouldn't happen right?" << std::endl;
-                FeatureTrack feature_track(frames[i].id_, kpt_cam0);
-                feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
-                feature_tracks.emplace(lmk_id, feature_track);
-                lmk_id_map.emplace(key, lmk_id);
-                lmk_id++;
             }
-            std::cout << "oog" << std::endl;
-            const gtsam::Symbol symbol_lmk =
-                gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0)));
-            // if (gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))) !=
-            //     gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam1)))) {
-            //     std::cerr << "UH OH" << std::endl;
-            // } else {
-            //     std::cout << "cool" << std::endl;
-            // }
-            lmks.push_back(symbol_lmk);
-            local_graph_
-                .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-                    feat_kpts[0], landmark_noise, poses[0], symbol_lmk, K);
-            local_graph_
-                .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-                    feat_kpts[1], landmark_noise, poses[1], symbol_lmk, K);
-
-            gtsam::Point3 p_wrld_kpt;
-            bool triangulate_success =
-                detail::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt);
-            if (triangulate_success) {
-                local_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt);
-                viz_lmks.push_back(p_wrld_kpt);
+            std::cout << "setup complete!" << std::endl;
+            // robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, true, true,
+            //                     "Local Optimization " + std::to_string(i));
+
+            const gtsam::Values symbols_result_local = detail_sfm::optimize_graph(
+                local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false);
+
+            for (const gtsam::Symbol &symbol_pose : symbols_poses) {
+                const gtsam::Pose3 T_wrld_cam = symbols_result_local.at<gtsam::Pose3>(symbol_pose);
+                symbols_poses_values_iter.at(symbol_pose).push_back(T_wrld_cam);
             }
-            if (symbols_landmarks_values_iter.find(symbol_lmk) !=
-                symbols_landmarks_values_iter.end()) {
-                symbols_landmarks_values_iter[symbol_lmk].push_back(p_wrld_kpt);
-            } else {
-                symbols_landmarks_values_iter.emplace(symbol_lmk,
-                                                      std::vector<gtsam::Point3>{p_wrld_kpt});
+            for (const gtsam::Symbol &symbol_lmk : symbols_lmks) {
+                const gtsam::Point3 p_wrld_lmk = symbols_result_local.at<gtsam::Point3>(symbol_lmk);
+                symbols_landmarks_values_iter.at(symbol_lmk).push_back(p_wrld_lmk);
             }
         }
-        std::cout << "setup complete!" << std::endl;
-        // geometry::viz_scene(viz_poses, viz_lmks, true, true,
-        //                     "Local Optimization " + std::to_string(i));
 
-        const gtsam::Values symbols_result_local = detail::optimize_graph(
-            local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false);
+        std::cout << "\nLocal Optimizations Complete!\n" << std::endl;
 
-        for (const gtsam::Symbol &symbol_pose : poses) {
-            const gtsam::Pose3 T_wrld_cam = symbols_result_local.at<gtsam::Pose3>(symbol_pose);
-            symbols_poses_values_iter.at(symbol_pose).push_back(T_wrld_cam);
+        for (std::pair<gtsam::Symbol, std::vector<gtsam::Pose3>> sym_pose :
+             symbols_poses_values_iter) {
+            initial_estimate_.insert_or_assign(sym_pose.first,
+                                               detail_sfm::averagePoses(sym_pose.second));
         }
-        for (const gtsam::Symbol &symbol_lmk : lmks) {
-            const gtsam::Point3 p_wrld_lmk = symbols_result_local.at<gtsam::Point3>(symbol_lmk);
-            symbols_landmarks_values_iter.at(symbol_lmk).push_back(p_wrld_lmk);
+        for (std::pair<gtsam::Symbol, std::vector<gtsam::Point3>> sym_lmk :
+             symbols_landmarks_values_iter) {
+            initial_estimate_.insert_or_assign(sym_lmk.first,
+                                               detail_sfm::averagePoints(sym_lmk.second));
         }
     }
 
-    std::cout << "\nLocal Optimizations Complete!\n" << std::endl;
-
-    for (std::pair<gtsam::Symbol, std::vector<gtsam::Pose3>> sym_pose : symbols_poses_values_iter) {
-        initial_estimate_.insert_or_assign(sym_pose.first, detail::averagePoses(sym_pose.second));
-    }
-    for (std::pair<gtsam::Symbol, std::vector<gtsam::Point3>> sym_lmk :
-         symbols_landmarks_values_iter) {
-        initial_estimate_.insert_or_assign(sym_lmk.first, detail::averagePoints(sym_lmk.second));
-    }
-
     // do global optimization
     const gtsam::Values result =
-        detail::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks);
+        detail_sfm::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks);
     std::cout << "about to visualize result" << std::endl;
-    detail::graph_values(result, "Result", symbols_pose, symbols_landmarks);
+    detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks);
 }
\ No newline at end of file

From ecbba4131619e36d1f2f775f6cda3e15b24ff88d Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Fri, 27 Jun 2025 16:21:56 -0400
Subject: [PATCH 24/45] pipeline working. unsure about quality of results.
 unoptimized point cloud seemed decent

---
 experimental/learn_descriptors/frontend.hh    |   4 +-
 .../learn_descriptors/incremental_sfm_mvp.cc  | 129 +++++++++++-------
 experimental/learn_descriptors/sfm_mvp.cc     |  11 +-
 .../structure_from_motion.cc                  |   2 +-
 .../symphony_lake_parser_test.cc              |   8 +-
 visualization/opencv/opencv_viz.cc            |  63 ++++++++-
 visualization/opencv/opencv_viz.hh            |  26 +++-
 visualization/opencv/opencv_viz_test.cc       |  76 ++++++++---
 8 files changed, 226 insertions(+), 93 deletions(-)

diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh
index 70baf55bd..2b8c7b9f7 100644
--- a/experimental/learn_descriptors/frontend.hh
+++ b/experimental/learn_descriptors/frontend.hh
@@ -26,13 +26,13 @@ class Frontend {
     static void enforce_bijective_matches(std::vector<cv::DMatch> &matches);
     static void enforce_bijective_buffer_matches(std::vector<cv::DMatch> &matches);
     static void draw_keypoints(const cv::Mat &img, std::vector<cv::KeyPoint> keypoints,
-                               cv::Mat img_keypoints_out) {
+                               cv::Mat &img_keypoints_out) {
         cv::drawKeypoints(img, keypoints, img_keypoints_out, cv::Scalar::all(-1),
                           cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS);
     }
     static void draw_matches(const cv::Mat &img1, std::vector<cv::KeyPoint> keypoints1,
                              const cv::Mat &img2, std::vector<cv::KeyPoint> keypoints2,
-                             std::vector<cv::DMatch> matches, cv::Mat img_matches_out) {
+                             std::vector<cv::DMatch> matches, cv::Mat &img_matches_out) {
         cv::drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches_out);
     }
 
diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc
index 649d30479..6c5ceb2a3 100644
--- a/experimental/learn_descriptors/incremental_sfm_mvp.cc
+++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc
@@ -39,7 +39,7 @@ namespace detail_sfm {
 
 bool attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
                          const std::vector<gtsam::Point2> &cam_obs, gtsam::Cal3_S2::shared_ptr K,
-                         gtsam::Point3 &out_p_world_landmark) {
+                         gtsam::Point3 &out_p_world_landmark, const bool verbose = true) {
     bool valid = true;
     if (cam_poses.size() >= 2) {
         try {
@@ -62,8 +62,9 @@ bool attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
         } catch (const gtsam::TriangulationCheiralityException &e) {
             // Handle the exception gracefully by logging and retaining the previous
             // estimate.
-            std::cerr << "attamp_triangulation failed. Likely cheirality exception: " << e.what()
-                      << ". Keeping initial landmark estimate." << std::endl;
+            if (verbose)
+                std::cerr << "attempt_triangulation failed. Likely cheirality exception: "
+                          << e.what() << ". Keeping initial landmark estimate." << std::endl;
         }
     } else {
         valid = false;
@@ -85,7 +86,8 @@ static void graph_values(const gtsam::Values &values, const std::string &window_
         }
         final_lmks.emplace_back(values.at<gtsam::Point3>(symbol_lmk));
     }
-    robot::geometry::viz_scene(final_poses, final_lmks, true, true, window_name);
+    robot::geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::black(), true, true,
+                               window_name);
 }
 
 static gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph,
@@ -196,14 +198,14 @@ int main(int argc, const char **argv) {
     FourSeasonsParser parser(path_data, path_calibration);
 
     std::cout << "incremental_sfm_mvp!" << std::endl;
-    // const std::vector<int> indices{650, 675, 140, 160, 180};
-    const std::vector<int> indices = []() {
-        std::vector<int> tmp;
-        for (int i = 660; i < 700; i += 3) {
-            tmp.push_back(i);
-        }
-        return tmp;
-    }();
+    const std::vector<int> indices{380, 387, 393};
+    // const std::vector<int> indices = []() {
+    //     std::vector<int> tmp;
+    //     for (int i = 660; i < 681; i += 10) {
+    //         tmp.push_back(i);
+    //     }
+    //     return tmp;
+    // }();
     // FourSeasonsParser parser()
     // const size_t img_width = img_pt_first.width, img_height =
     // img_pt_first.height;
@@ -238,13 +240,17 @@ int main(int argc, const char **argv) {
     std::vector<Frame> frames;
     FrameId id = 0;
     std::unordered_map<FrameId, gtsam::Pose3>
-        id_to_initial_world_from_cam;  // these are for initial guesses
+        id_to_initial_world_from_cam;      // these are for initial guesses
+    Eigen::Vector3d t_first_cam_in_world;  // to create our own world
     std::vector<Eigen::Isometry3d> references_world_from_cam;
+    std::vector<robot::geometry::VizPose> viz_references_world_from_cam;
     Eigen::Matrix4d scale_mat_reference = Eigen::Matrix4d::Identity();
     scale_mat_reference(0, 0) = scale_mat_reference(1, 1) = scale_mat_reference(2, 2) =
         parser.gnss_scale();
+    std::vector<std::vector<cv::KeyPoint>> kpt_list;
     for (const int &idx : indices) {
         const ImagePoint img_pt = parser.get_image_point(idx);
+        std::cout << img_pt.to_string() << std::endl;
         //
         Eigen::Isometry3d world_from_cam;
         if (id == 0) {
@@ -252,6 +258,7 @@ int main(int argc, const char **argv) {
             world_from_cam = Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference *
                                                 img_pt.AS_w_from_gnss_cam->matrix())
                                                    .matrix());
+            t_first_cam_in_world = world_from_cam.translation();
         } else if (img_pt.gps_gcs) {
             const Eigen::Vector3d gps_gcs(
                 img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude,
@@ -266,6 +273,7 @@ int main(int argc, const char **argv) {
 
             // Eigen::
         }
+        world_from_cam.translation() = world_from_cam.translation() - t_first_cam_in_world;
         id_to_initial_world_from_cam.emplace(id, world_from_cam.matrix());
 
         // populate frame
@@ -276,15 +284,19 @@ int main(int argc, const char **argv) {
 
         // std::stringstream ss;
         // ss << "image " << idx;
-        // cv::imshow(ss.str(), img);
+        // cv::imshow(ss.str(), img_undistorted);
         // cv::waitKey(0);
         // cv::destroyWindow(ss.str());
 
         std::optional<Frame> frame;
         if (img_pt.AS_w_from_gnss_cam) {
-            const Eigen::Isometry3d world_from_cam_reference =
+            Eigen::Isometry3d world_from_cam_reference =
                 Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference *
                                    img_pt.AS_w_from_gnss_cam->matrix()));
+            world_from_cam_reference.translation() =
+                world_from_cam_reference.translation() - t_first_cam_in_world;
+            viz_references_world_from_cam.emplace_back(world_from_cam_reference,
+                                                       "img " + std::to_string(idx));
             references_world_from_cam.push_back(world_from_cam_reference);
             frame.emplace(id, img_undistorted, K, gtsam::Pose3(world_from_cam_reference.matrix()));
         } else {
@@ -294,11 +306,12 @@ int main(int argc, const char **argv) {
 
         std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
             frontend.get_keypoints_and_descriptors(img_undistorted);
+        kpt_list.push_back(kpts_descs.first);
         KeypointsCV kpts;
         for (const cv::KeyPoint &kpt : kpts_descs.first) {
             kpts.push_back(kpt.pt);
         }
-        std::cout << "idx " << idx << " has " << kpts.size() << " kpts" << std::endl;
+        // std::cout << "idx " << idx << " has " << kpts.size() << " kpts" << std::endl;
         frame->add_keypoints(kpts);
         frame->assign_descriptors(kpts_descs.second);
 
@@ -307,8 +320,9 @@ int main(int argc, const char **argv) {
         id++;
     }
 
-    robot::geometry::viz_scene(references_world_from_cam, std::vector<Eigen::Vector3d>(), true,
-                               true, "References");
+    // robot::geometry::viz_scene(viz_references_world_from_cam,
+    //                            std::vector<robot::geometry::VizPoint>(), true, true,
+    //                            "References");
 
     // populate feature_tracks and lmk_id_map
     // TODO: "smart" matching, using initial poses to filter which pairs make sense to compute
@@ -316,14 +330,26 @@ int main(int argc, const char **argv) {
     FeatureTracks feature_tracks;
     FrameLandmarkIdMap lmk_id_map;
     LandmarkId lmk_id = 0;
-    bool exhaustive = false;
+    bool exhaustive = true;
     if (exhaustive) {
         for (size_t i = 0; i < indices.size() - 1; i++) {
-            for (size_t j = i + 1; i < indices.size(); j++) {
+            std::cout << "i: " << i << std::endl;
+            for (size_t j = i + 1; j < indices.size(); j++) {
+                std::cout << "j: " << j << std::endl;
                 std::vector<cv::DMatch> matches =
                     frontend.get_matches(frames[i].get_descriptors(), frames[j].get_descriptors());
                 // DIAL TO MESS WITH
                 frontend.enforce_bijective_buffer_matches(matches);
+                // std::cout << "matching heartbeat" << std::endl;
+                // cv::Mat viz_matches;
+                // Frontend::draw_matches(parser.load_image(i), kpt_list[i], parser.load_image(j),
+                //                        kpt_list[j], matches, viz_matches);
+                // std::cout << "image height: " << viz_matches.rows << " width: " <<
+                // viz_matches.cols
+                //           << std::endl;
+                // cv::imshow("viz_matches", viz_matches);
+                // cv::waitKey(0);
+                std::cout << "still alive with: " << matches.size() << " matches" << std::endl;
                 for (const cv::DMatch match : matches) {
                     const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
                     const KeypointCV kpt_cam1 = frames[j].get_keypoints()[match.trainIdx];
@@ -343,6 +369,7 @@ int main(int argc, const char **argv) {
                 }
             }
         }
+        std::cout << "done processing matches" << std::endl;
     } else {  // successive only
         for (size_t i = 0; i < indices.size() - 1; i++) {
             std::vector<cv::DMatch> matches =
@@ -367,33 +394,36 @@ int main(int argc, const char **argv) {
             }
         }
     }
+    std::cout << "heartbeat" << std::endl;
 
     // triangulate all of the points
-    std::unordered_map<LandmarkId, gtsam::Point3>
-        lmk_triangulated_map;  // points are p_kpt_in_world
+    std::unordered_map<LandmarkId, gtsam::Point3> lmk_triangulated_map;  // points are kpt_in_world
+    std::vector<robot::geometry::VizPoint> triangulated_lmks_viz;
     std::vector<gtsam::Point3> triangulated_lmks;
     for (const std::pair<LandmarkId, FeatureTrack> lmk_feat : feature_tracks) {
         std::vector<gtsam::Pose3> world_from_cams;
         std::vector<gtsam::Point2> feat_kpts;
         const LandmarkId lmk_id = lmk_feat.first;
         const FeatureTrack feature_track = lmk_feat.second;
-        gtsam::Point3 p_kpt_in_world(0, 0, 0);
+        gtsam::Point3 kpt_in_world(0, 0, 0);
         for (const std::pair<FrameId, KeypointCV> &feat_track : feature_track.obs_) {
             world_from_cams.push_back(id_to_initial_world_from_cam[feat_track.first]);
             feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y));
         }
         bool triangulate_success =
-            detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, p_kpt_in_world);
+            detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world, false);
         if (triangulate_success) {
-            lmk_triangulated_map.emplace(lmk_id, p_kpt_in_world);
-            triangulated_lmks.push_back(p_kpt_in_world);
+            lmk_triangulated_map.emplace(lmk_id, kpt_in_world);
+            triangulated_lmks.push_back(kpt_in_world);
+            triangulated_lmks_viz.emplace_back(kpt_in_world, "lmk " + std::to_string(lmk_id));
         } else {
             continue;
         }
     }
-    // robot::geometry::viz_scene(std::vector<Eigen::Isometry3d>(), triangulated_lmks, true, true,
-    //                            "Unfiltered, triangulated points");
-    std::cout << "heart beat 1" << std::endl;
+    std::cout << "there are " << triangulated_lmks.size() << " triangulated lmks" << std::endl;
+    robot::geometry::viz_scene(viz_references_world_from_cam, triangulated_lmks_viz,
+                               cv::viz::Color::black(), true, true,
+                               "Unfiltered, triangulated points");
 
     // filter points via variance
     // TODO: filter based on quality and/or quantity of matches
@@ -413,8 +443,9 @@ int main(int argc, const char **argv) {
         }
     }
     std::cout << "filtered variance " << detail_sfm::get_variance(filtered_points) << std::endl;
-    // robot::geometry::viz_scene(std::vector<Eigen::Isometry3d>(), filtered_points, true, true,
-    //                            "Unfiltered points");
+    robot::geometry::viz_scene(std::vector<Eigen::Isometry3d>(), filtered_points,
+                               cv::viz::Color::black(), true, true,
+                               "Filtered, triangulated points");
 
     // ############# BACKEND ###############
 
@@ -423,15 +454,15 @@ int main(int argc, const char **argv) {
     std::unordered_map<gtsam::Symbol, std::vector<gtsam::Point3>> symbols_landmarks_values_iter;
     std::vector<gtsam::Symbol> symbols_pose;
     std::vector<gtsam::Symbol> symbols_landmarks;
-    for (const auto &[lmk_id, p_kpt_in_world] : lmk_triangulated_map_filtered) {
+    for (const auto &[lmk_id, kpt_in_world] : lmk_triangulated_map_filtered) {
         // LandmarkId lmk_id = lmk_id_pt.first;
-        // const gtsam::Point3 p_kpt_in_world = lmk_id_pt.second;
+        // const gtsam::Point3 kpt_in_world = lmk_id_pt.second;
         FeatureTrack feature_track = feature_tracks.at(lmk_id);
         const gtsam::Symbol symbol_lmk('l', lmk_id);
         for (const auto &[frame_id, obs] : feature_track.obs_) {
-            initial_estimate_.insert_or_assign(symbol_lmk, p_kpt_in_world);
+            initial_estimate_.insert_or_assign(symbol_lmk, kpt_in_world);
             symbols_landmarks_values_iter.emplace(symbol_lmk,
-                                                  std::vector<gtsam::Point3>{p_kpt_in_world});
+                                                  std::vector<gtsam::Point3>{kpt_in_world});
             graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
                 gtsam::Point2(obs.x, obs.y), landmark_noise, gtsam::Symbol('x', frame_id),
                 symbol_lmk, K);
@@ -463,7 +494,7 @@ int main(int argc, const char **argv) {
     // symbols_landmarks);
     std::cout << "heart beat 3" << std::endl;
 
-    bool local_optimizations = true;
+    bool local_optimizations = false;
     if (local_optimizations) {
         // TODO: do local optimizations between groups of any n nubmer of cameras with >= x number
         // of matches
@@ -501,10 +532,10 @@ int main(int argc, const char **argv) {
             std::vector<gtsam::Pose3> world_from_cams{id_to_initial_world_from_cam.at(i),
                                                       id_to_initial_world_from_cam.at(i + 1)};
 
-            std::vector<Eigen::Isometry3d> viz_world_from_cams{
-                Eigen::Isometry3d(world_from_cams[0].matrix()),
-                Eigen::Isometry3d(world_from_cams[1].matrix())};
-            std::vector<Eigen::Vector3d> viz_lmks;
+            std::vector<robot::geometry::VizPose> viz_world_from_cams{
+                {Eigen::Isometry3d(world_from_cams[0].matrix()), "cam 0"},
+                {Eigen::Isometry3d(world_from_cams[1].matrix()), "cam 1"}};
+            std::vector<robot::geometry::VizPoint> viz_lmks;
             for (const cv::DMatch match : matches) {
                 std::vector<gtsam::Point2> feat_kpts;
                 const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
@@ -548,24 +579,24 @@ int main(int argc, const char **argv) {
                     .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
                         feat_kpts[1], landmark_noise, symbols_poses[1], symbol_lmk, K);
 
-                gtsam::Point3 p_kpt_in_world;
+                gtsam::Point3 kpt_in_world;
                 bool triangulate_success =
-                    detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, p_kpt_in_world);
+                    detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world);
                 if (triangulate_success) {
-                    local_estimate_.insert_or_assign(symbol_lmk, p_kpt_in_world);
-                    viz_lmks.push_back(p_kpt_in_world);
+                    local_estimate_.insert_or_assign(symbol_lmk, kpt_in_world);
+                    viz_lmks.emplace_back(kpt_in_world);
                 }
                 if (symbols_landmarks_values_iter.find(symbol_lmk) !=
                     symbols_landmarks_values_iter.end()) {
-                    symbols_landmarks_values_iter[symbol_lmk].push_back(p_kpt_in_world);
+                    symbols_landmarks_values_iter[symbol_lmk].push_back(kpt_in_world);
                 } else {
-                    symbols_landmarks_values_iter.emplace(
-                        symbol_lmk, std::vector<gtsam::Point3>{p_kpt_in_world});
+                    symbols_landmarks_values_iter.emplace(symbol_lmk,
+                                                          std::vector<gtsam::Point3>{kpt_in_world});
                 }
             }
             std::cout << "setup complete!" << std::endl;
-            // robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, true, true,
-            //                     "Local Optimization " + std::to_string(i));
+            robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, cv::viz::Color::black(), true,
+                                       true, "Local Optimization " + std::to_string(i));
 
             const gtsam::Values symbols_result_local = detail_sfm::optimize_graph(
                 local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false);
diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc
index 597941c7e..21966cf4a 100644
--- a/experimental/learn_descriptors/sfm_mvp.cc
+++ b/experimental/learn_descriptors/sfm_mvp.cc
@@ -91,7 +91,8 @@ class SFMMvpHelper {
             }
             final_lmks.emplace_back(values.at<gtsam::Point3>(symbol_lmk));
         }
-        geometry::viz_scene(final_poses, final_lmks, true, true, window_name);
+        geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::black(), true, true,
+                            window_name);
     }
 
     static gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph,
@@ -497,8 +498,8 @@ TEST(SFMMvp, sfm_building_manual_incremental) {
         }
     }
     // filter points
-    geometry::viz_scene(std::vector<Eigen::Isometry3d>(), triangulated_lmks, true, true,
-                        "Unfiltered points");
+    geometry::viz_scene(std::vector<Eigen::Isometry3d>(), triangulated_lmks,
+                        cv::viz::Color::black(), true, true, "Unfiltered points");
     const gtsam::Point3 variance_pts = SFMMvpHelper::get_variance(triangulated_lmks);
     const gtsam::Point3 std_dev_pts = variance_pts.array().sqrt().matrix();
     const gtsam::Point3 mean_pts = SFMMvpHelper::averagePoints(triangulated_lmks);
@@ -513,8 +514,8 @@ TEST(SFMMvp, sfm_building_manual_incremental) {
         }
     }
     std::cout << "filtered variance " << SFMMvpHelper::get_variance(filtered_points) << std::endl;
-    geometry::viz_scene(std::vector<Eigen::Isometry3d>(), filtered_points, true, true,
-                        "Unfiltered points");
+    geometry::viz_scene(std::vector<Eigen::Isometry3d>(), filtered_points, cv::viz::Color::black(),
+                        true, true, "Unfiltered points");
 
     // add filtered points to graph
     std::unordered_map<gtsam::Symbol, std::vector<gtsam::Pose3>> symbols_poses_values_iter;
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index db5cedd86..02dc28a2d 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -150,7 +150,7 @@ void StructureFromMotion::graph_values(const gtsam::Values &values,
     //     }
     //     final_lmks.emplace_back(values.at<gtsam::Point3>(lmk_symbol));
     // }
-    geometry::viz_scene(final_poses, final_lmks, true, true, window_name);
+    geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::black(), true, true, window_name);
 }
 
 void StructureFromMotion::solve_structure(
diff --git a/experimental/learn_descriptors/symphony_lake_parser_test.cc b/experimental/learn_descriptors/symphony_lake_parser_test.cc
index 91a275b6f..a8c309c4e 100644
--- a/experimental/learn_descriptors/symphony_lake_parser_test.cc
+++ b/experimental/learn_descriptors/symphony_lake_parser_test.cc
@@ -89,8 +89,8 @@ TEST(SymphonyLakeParserTest, test_cam_frames) {
         cam_frames.push_back(T_world_camidx);
     }
 
-    // geometry::viz_scene(cam_frames, std::vector<Eigen::Vector3d>(), true, true,
-    // "test_cam_frames");
+    // geometry::viz_scene(cam_frames, std::vector<Eigen::Vector3d>(), cv::viz::Color::black(),
+    // true, true, "test_cam_frames");
 }
 
 TEST(SymphonyLakeParserTest, test_gps_frames) {
@@ -121,7 +121,7 @@ TEST(SymphonyLakeParserTest, test_gps_frames) {
         gps_frames.push_back(T_world_gpsidx);
     }
 
-    // geometry::viz_scene(gps_frames, std::vector<Eigen::Vector3d>(), true, true,
-    // "test_gps_frames");
+    // geometry::viz_scene(gps_frames, std::vector<Eigen::Vector3d>(), cv::viz::Color::black(),
+    // true, true, "test_gps_frames");
 }
 }  // namespace robot::experimental::learn_descriptors
diff --git a/visualization/opencv/opencv_viz.cc b/visualization/opencv/opencv_viz.cc
index 1e1bc914b..9af1f14b9 100644
--- a/visualization/opencv/opencv_viz.cc
+++ b/visualization/opencv/opencv_viz.cc
@@ -24,25 +24,68 @@ cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) {
     return axis * theta;
 }
 
-void viz_scene(const std::vector<Eigen::Isometry3d> &poses_world,
-               const std::vector<Eigen::Vector3d> &points_world, const bool show_grid,
-               const bool show_origin, const std::string &window_name) {
+void viz_scene(const std::vector<Eigen::Isometry3d> &world_from_poses,
+               const std::vector<Eigen::Vector3d> &points_in_world,
+               const cv::viz ::Color color_background, const bool show_grid, const bool show_origin,
+               const std::string &window_name, const double text_scale) {
+    std::vector<VizPose> viz_poses;
+    std::vector<VizPoint> viz_points;
+    for (const Eigen::Isometry3d &world_from_pose : world_from_poses) {
+        viz_poses.emplace_back(world_from_pose);
+    }
+    for (const Eigen::Vector3d &t_point_in_world : points_in_world) {
+        viz_points.emplace_back(t_point_in_world);
+    }
+    viz_scene(viz_poses, viz_points, color_background, show_grid, show_origin, window_name,
+              text_scale);
+}
+
+void viz_scene(const std::vector<VizPose> &world_from_poses,
+               const std::vector<VizPoint> &points_in_world, const cv::viz ::Color color_background,
+               const bool show_grid, const bool show_origin, const std::string &window_name,
+               const double text_scale) {
     cv::viz::Viz3d window(window_name);
 
+    window.setBackgroundColor(color_background);
+
+    constexpr bool ALWAYS_FACE_CAMERA = true;
     constexpr double pose_size = .5;
-    for (unsigned int i = 0; i < poses_world.size(); i++) {
-        cv::Affine3d cv_pose(eigen_mat_to_cv(Eigen::Matrix4d(poses_world[i].matrix())));
+    for (unsigned int i = 0; i < world_from_poses.size(); i++) {
+        cv::Affine3d cv_pose(
+            eigen_mat_to_cv(Eigen::Matrix4d(world_from_poses[i].world_from_pose.matrix())));
         window.showWidget("rigid_transform_" + std::to_string(i),
                           cv::viz::WCoordinateSystem(pose_size), cv_pose);
+        if (world_from_poses[i].label) {
+            window.showWidget(
+                "text_pose_" + std::to_string(i),
+                cv::viz::WText3D(
+                    *(world_from_poses[i].label),
+                    cv::Point3d(eigen_vec_to_cv(
+                        world_from_poses[i].world_from_pose.translation() +
+                        Eigen::Vector3d(0, 0,
+                                        0.001))),  // small offset is for occasional rendering bug
+                    text_scale, ALWAYS_FACE_CAMERA));
+        }
     }
     constexpr double point_radius = 0.08;
     constexpr int sphere_res = 10;
     const cv::viz::Color point_color = cv::viz::Color::celestial_blue();
-    for (unsigned int i = 0; i < points_world.size(); i++) {
-        const Eigen::Vector3d &point = points_world[i];
+    for (unsigned int i = 0; i < points_in_world.size(); i++) {
+        const Eigen::Vector3d &point = points_in_world[i].t_point_in_world;
         window.showWidget("point_" + std::to_string(i),
                           cv::viz::WSphere(cv::Point3d(point[0], point[1], point[2]), point_radius,
                                            sphere_res, point_color));
+        if (points_in_world[i].label) {
+            window.showWidget(
+                "text_point_" + std::to_string(i),
+                cv::viz::WText3D(
+                    *(points_in_world[i].label),
+                    cv::Point3d(eigen_vec_to_cv(
+                        points_in_world[i].t_point_in_world +
+                        Eigen::Vector3d(0, 0,
+                                        0.001))),  // small offset is for occasional rendering bug
+                    text_scale, ALWAYS_FACE_CAMERA));
+        }
     }
 
     if (show_origin) {
@@ -64,6 +107,12 @@ void viz_scene(const std::vector<Eigen::Isometry3d> &poses_world,
                                          cv::Point3d(0, 0, 1), cells, cell_sizes, color));
     }
 
+    std::cout << "viz heartbeat" << std::endl;
+    // window.setViewerPose(cv::Affine3d(
+    //     eigen_mat_to_cv(Eigen::Matrix4d(world_from_poses[0].world_from_pose.matrix()))));
+    window.setViewerPose(cv::Affine3d::Identity());
+    std::cout << "success" << std::endl;
+
     window.spin();
 }
 }  // namespace robot::geometry
diff --git a/visualization/opencv/opencv_viz.hh b/visualization/opencv/opencv_viz.hh
index d6474f58f..28dbc197b 100644
--- a/visualization/opencv/opencv_viz.hh
+++ b/visualization/opencv/opencv_viz.hh
@@ -1,12 +1,30 @@
 #pragma once
+#include <optional>
 #include <string>
 #include <vector>
 
 #include "Eigen/Core"
 #include "Eigen/Geometry"
+#include "opencv2/viz.hpp"
 
 namespace robot::geometry {
-void viz_scene(const std::vector<Eigen::Isometry3d> &poses_world,
-               const std::vector<Eigen::Vector3d> &points_world, const bool show_grid = true,
-               const bool show_origin = true, const std::string &window_name = "Viz Window");
-}
\ No newline at end of file
+struct VizPose {
+    const Eigen::Isometry3d world_from_pose;
+    const std::optional<std::string> label;
+};
+
+struct VizPoint {
+    const Eigen::Vector3d t_point_in_world;
+    const std::optional<std::string> label;
+};
+void viz_scene(const std::vector<Eigen::Isometry3d> &world_from_poses,
+               const std::vector<Eigen::Vector3d> &points_in_world,
+               const cv::viz::Color color_background = cv::viz::Color::black(),
+               const bool show_grid = true, const bool show_origin = true,
+               const std::string &window_name = "Viz Window", const double text_scale = 0.1);
+void viz_scene(const std::vector<VizPose> &world_from_poses,
+               const std::vector<VizPoint> &points_in_world,
+               const cv::viz::Color color_background = cv::viz::Color::black(),
+               const bool show_grid = true, const bool show_origin = true,
+               const std::string &window_name = "Viz Window", const double text_scale = 0.1);
+}  // namespace robot::geometry
\ No newline at end of file
diff --git a/visualization/opencv/opencv_viz_test.cc b/visualization/opencv/opencv_viz_test.cc
index 8d10decb3..502f8e4f4 100644
--- a/visualization/opencv/opencv_viz_test.cc
+++ b/visualization/opencv/opencv_viz_test.cc
@@ -26,23 +26,23 @@ TEST(OpencvVizTest, demo) {
                                                   TEXT_SCALE, ALWAYS_FACE_CAMERA));
 
     constexpr bool FIXED_TEXT = false;
-    cv::Affine3d world_to_fixed_text(cv::Affine3d::Vec3{M_PI_2, 0.0, 0.0},  // rotation
-                                     {0.2, 0.4, 0.6}                        // translation
+    cv::Affine3d world_from_fixed_text(cv::Affine3d::Vec3{M_PI_2, 0.0, 0.0},  // rotation
+                                       {0.2, 0.4, 0.6}                        // translation
     );
 
     window.showWidget(
         "text_3d_fixed",
         cv::viz::WText3D("hello world fixed!", cv::Point3d(0.0, 0.0, 0.0), TEXT_SCALE, FIXED_TEXT),
-        world_to_fixed_text);
+        world_from_fixed_text);
     constexpr double COORD_SCALE = 0.2;
     window.showWidget("text_3d_fixed_frame", cv::viz::WCoordinateSystem(COORD_SCALE),
-                      world_to_fixed_text);
+                      world_from_fixed_text);
 
     constexpr double CIRCLE_RADIUS_M = 0.5;
-    cv::Affine3d world_to_circle(cv::Affine3d::Vec3{0.0, 0.0, 0.0},  // rotation
-                                 {0.0, 0.0, 1.5}                     // translation
+    cv::Affine3d world_from_circle(cv::Affine3d::Vec3{0.0, 0.0, 0.0},  // rotation
+                                   {0.0, 0.0, 1.5}                     // translation
     );
-    window.showWidget("circle", cv::viz::WCircle(CIRCLE_RADIUS_M), world_to_circle);
+    window.showWidget("circle", cv::viz::WCircle(CIRCLE_RADIUS_M), world_from_circle);
 
     window.spin();
 }
@@ -59,22 +59,56 @@ TEST(OpencvVizTest, cube_test) {
     cube_W.push_back(Eigen::Vector3d(cube_size, cube_size, cube_size));
     cube_W.push_back(Eigen::Vector3d(0, cube_size, cube_size));
 
-    std::vector<Eigen::Isometry3d> poses;
+    std::vector<Eigen::Isometry3d> world_from_cams;
 
-    Eigen::Matrix3d R_W_cam0(
+    Eigen::Matrix3d R_world_from_cam0(
         Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
         Eigen::AngleAxisd(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
-    Eigen::Isometry3d T_W_cam0;
-    T_W_cam0.translation() = Eigen::Vector3d(4, 0, 0);
-    T_W_cam0.linear() = R_W_cam0;
-    poses.push_back(T_W_cam0);
-
-    Eigen::Isometry3d T_W_cam1;
-    T_W_cam1.linear() =
-        Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() * R_W_cam0;
-    T_W_cam1.translation() = Eigen::Vector3d(0, 4, 0);
-    poses.push_back(T_W_cam1);
-
-    viz_scene(poses, cube_W);
+    Eigen::Isometry3d world_from_cam0;
+    world_from_cam0.translation() = Eigen::Vector3d(4, 0, 0);
+    world_from_cam0.linear() = R_world_from_cam0;
+    world_from_cams.push_back(world_from_cam0);
+
+    Eigen::Isometry3d world_from_cam1;
+    world_from_cam1.linear() =
+        Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+        R_world_from_cam0;
+    world_from_cam1.translation() = Eigen::Vector3d(0, 4, 0);
+    world_from_cams.push_back(world_from_cam1);
+
+    viz_scene(world_from_cams, cube_W);
+}
+
+TEST(OpencvVizTest, cube_test_labeled) {
+    std::vector<VizPoint> cube_points_in_world;
+    float cube_size = 1.0f;
+    cube_points_in_world.emplace_back(Eigen::Vector3d(0, 0, 0), "cube_point_1");
+    cube_points_in_world.emplace_back(Eigen::Vector3d(cube_size, 0, 0), "cube_point_2");
+    cube_points_in_world.emplace_back(Eigen::Vector3d(cube_size, cube_size, 0), "cube_point_3");
+    cube_points_in_world.emplace_back(Eigen::Vector3d(0, cube_size, 0), "cube_point_4");
+    cube_points_in_world.emplace_back(Eigen::Vector3d(0, 0, cube_size), "cube_point_5");
+    cube_points_in_world.emplace_back(Eigen::Vector3d(cube_size, 0, cube_size), "cube_point_6");
+    cube_points_in_world.emplace_back(Eigen::Vector3d(cube_size, cube_size, cube_size),
+                                      "cube_point_7");
+    cube_points_in_world.emplace_back(Eigen::Vector3d(0, cube_size, cube_size), "cube_point_8");
+
+    std::vector<VizPose> world_from_cams;
+
+    Eigen::Matrix3d R_world_from_cam0(
+        Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+        Eigen::AngleAxisd(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
+    Eigen::Isometry3d world_from_cam0;
+    world_from_cam0.translation() = Eigen::Vector3d(4, 0, 0);
+    world_from_cam0.linear() = R_world_from_cam0;
+    world_from_cams.emplace_back(world_from_cam0, "world_from_cam0");
+
+    Eigen::Isometry3d world_from_cam1;
+    world_from_cam1.linear() =
+        Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+        R_world_from_cam0;
+    world_from_cam1.translation() = Eigen::Vector3d(0, 4, 0);
+    world_from_cams.emplace_back(world_from_cam1, "world_from_cam1");
+
+    viz_scene(world_from_cams, cube_points_in_world);
 }
 }  // namespace robot::geometry
\ No newline at end of file

From bc8da627ab471871361a96def01da1f4991c547c Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Mon, 30 Jun 2025 17:02:59 -0400
Subject: [PATCH 25/45] WIP, sfm works with a few imgaes

---
 common/geometry/BUILD                         |  10 +
 common/geometry/camera.cc                     |   7 +-
 common/geometry/camera.hh                     |   6 +-
 common/geometry/camera_test.cc                |   2 +-
 common/geometry/trajectory_evaluation.hh      |  17 ++
 experimental/learn_descriptors/BUILD          |   7 +-
 .../learn_descriptors/incremental_sfm_mvp.cc  | 204 ++++++++++++------
 experimental/learn_descriptors/map.hh         |   4 +
 experimental/learn_descriptors/map.proto      |  15 ++
 ....cc => spatial_test_scene_cube_example.cc} |   2 +-
 visualization/opencv/opencv_viz.cc            |   7 +-
 11 files changed, 204 insertions(+), 77 deletions(-)
 create mode 100644 common/geometry/trajectory_evaluation.hh
 create mode 100644 experimental/learn_descriptors/map.hh
 create mode 100644 experimental/learn_descriptors/map.proto
 rename experimental/learn_descriptors/{spatial_test_scene_cube_test.cc => spatial_test_scene_cube_example.cc} (94%)

diff --git a/common/geometry/BUILD b/common/geometry/BUILD
index 606304189..2ef17fb83 100644
--- a/common/geometry/BUILD
+++ b/common/geometry/BUILD
@@ -64,4 +64,14 @@ cc_test(
     "//visualization/opencv:opencv_viz",
     ":translate_types"
   ]
+)
+
+cc_library(
+  name = "trajectory_evaluation",
+  hdrs = ["trajectory_evaluation.hh"],
+  srcs = ["trajectory_evaluation.cc"],
+  visibility = ["//visibility:public"],
+  deps = [
+    "@eigen",     
+  ]
 )
\ No newline at end of file
diff --git a/common/geometry/camera.cc b/common/geometry/camera.cc
index 4994914c9..7e5a5fb3a 100644
--- a/common/geometry/camera.cc
+++ b/common/geometry/camera.cc
@@ -24,9 +24,10 @@ Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel
     return depth * K.inverse() * Eigen::Vector3d(pixel_inhomog(0), pixel_inhomog(1), 1.);
 }
 
-Eigen::Isometry3d estimate_c0_from_cam1(const std::vector<cv::KeyPoint> &kpts0,
-                                        const std::vector<cv::KeyPoint> &kpts1,
-                                        const std::vector<cv::DMatch> &matches, const cv::Mat &K) {
+Eigen::Isometry3d estimate_cam0_from_cam1(const std::vector<cv::KeyPoint> &kpts0,
+                                          const std::vector<cv::KeyPoint> &kpts1,
+                                          const std::vector<cv::DMatch> &matches,
+                                          const cv::Mat &K) {
     Eigen::Isometry3d result;
     std::vector<cv::Point2f> pts1;
     std::vector<cv::Point2f> pts2;
diff --git a/common/geometry/camera.hh b/common/geometry/camera.hh
index 2feea1954..9164e4129 100644
--- a/common/geometry/camera.hh
+++ b/common/geometry/camera.hh
@@ -21,7 +21,7 @@ Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector3d &pixel
 Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel_inhomog,
                           const double depth);
 
-Eigen::Isometry3d estimate_c0_from_cam1(const std::vector<cv::KeyPoint> &kpts0,
-                                        const std::vector<cv::KeyPoint> &kpts1,
-                                        const std::vector<cv::DMatch> &matches, const cv::Mat &K);
+Eigen::Isometry3d estimate_cam0_from_cam1(const std::vector<cv::KeyPoint> &kpts0,
+                                          const std::vector<cv::KeyPoint> &kpts1,
+                                          const std::vector<cv::DMatch> &matches, const cv::Mat &K);
 }  // namespace robot::geometry
\ No newline at end of file
diff --git a/common/geometry/camera_test.cc b/common/geometry/camera_test.cc
index ca2594abf..19f5389cd 100644
--- a/common/geometry/camera_test.cc
+++ b/common/geometry/camera_test.cc
@@ -117,7 +117,7 @@ TEST(CameraTest, test_estimate_pose) {
         }
     }
 
-    Eigen::Isometry3d cam0_from_cam1_estimate = estimate_c0_from_cam1(kpts0, kpts1, matches, K);
+    Eigen::Isometry3d cam0_from_cam1_estimate = estimate_cam0_from_cam1(kpts0, kpts1, matches, K);
     Eigen::Isometry3d cam0_from_cam1 = world_from_cam0.inverse() * world_from_cam1;
     cam0_from_cam1.translation() /= cam0_from_cam1.translation().norm();
     EXPECT_TRUE(
diff --git a/common/geometry/trajectory_evaluation.hh b/common/geometry/trajectory_evaluation.hh
new file mode 100644
index 000000000..c3832359a
--- /dev/null
+++ b/common/geometry/trajectory_evaluation.hh
@@ -0,0 +1,17 @@
+#pragma once
+#include <cstddef>
+#include <vector>
+
+#include "Eigen/Core"
+
+namespace robot::geometry {
+const std::vector<double> get_absolute_trajectory_error(const std::vector<Eigen::Vector3d> &traj1,
+                                                        const std::vector<Eigen::Vector3d> &traj2) {
+    assert(traj1.size() == traj2.size());
+    std::vector<double> result;
+    for (size_t i = 0; i < traj2.size(); i++) {
+        result.push_back((traj2[i] - traj1[i]).norm());
+    }
+    return result;
+}
+}  // namespace robot::geometry
\ No newline at end of file
diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index 0c22053f8..bc7e84bd5 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -264,9 +264,9 @@ cc_library(
   ]
 )
 
-cc_test(
-  name = "spatial_test_scene_cube_test",
-  srcs = ["spatial_test_scene_cube_test.cc"],
+cc_binary(
+  name = "spatial_test_scene_cube_example",
+  srcs = ["spatial_test_scene_cube_example.cc"],
   deps = [    
     "@com_google_googletest//:gtest_main",    
     ":spatial_test_scene_cube",
@@ -362,5 +362,6 @@ cc_binary(
     ":frontend",
     "@boost//:smart_ptr",    
     "@cxxopts",
+    "//common/geometry:camera"
   ]
 )
diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc
index 6c5ceb2a3..0d8da7623 100644
--- a/experimental/learn_descriptors/incremental_sfm_mvp.cc
+++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc
@@ -5,6 +5,7 @@
 
 #include "Eigen/Core"
 #include "Eigen/Geometry"
+#include "common/geometry/camera.hh"
 #include "cxxopts.hpp"
 #include "experimental/learn_descriptors/four_seasons_parser.hh"
 #include "experimental/learn_descriptors/frame.hh"
@@ -37,17 +38,18 @@ struct hash<gtsam::Symbol> {
 
 namespace detail_sfm {
 
-bool attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
-                         const std::vector<gtsam::Point2> &cam_obs, gtsam::Cal3_S2::shared_ptr K,
-                         gtsam::Point3 &out_p_world_landmark, const bool verbose = true) {
-    bool valid = true;
+std::optional<gtsam::Point3> attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
+                                                 const std::vector<gtsam::Point2> &cam_obs,
+                                                 gtsam::Cal3_S2::shared_ptr K,
+                                                 const bool verbose = true) {
+    std::optional<gtsam::Point3> result;
     if (cam_poses.size() >= 2) {
         try {
             // Attempt triangulation using DLT (or the GTSAM provided method)
             gtsam::Point3 p_world_lmk = gtsam::triangulatePoint3(
                 cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end()));
 
-            out_p_world_landmark = p_world_lmk;
+            result = p_world_lmk;
 
             // Optional: perform an explicit cheirality check
             for (const auto &pose : cam_poses) {
@@ -55,7 +57,6 @@ bool attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
                 // transformTo() converts a world point to the camera frame.
                 gtsam::Point3 p_cam_lmk = pose.transformTo(p_world_lmk);
                 if (p_cam_lmk.z() <= 0) {  // Check that the depth is positive
-                    valid = false;
                     break;
                 }
             }
@@ -66,35 +67,69 @@ bool attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
                 std::cerr << "attempt_triangulation failed. Likely cheirality exception: "
                           << e.what() << ". Keeping initial landmark estimate." << std::endl;
         }
-    } else {
-        valid = false;
     }
-    return valid;
+    return result;
 }
 
-static void graph_values(const gtsam::Values &values, const std::string &window_name,
-                         const std::vector<gtsam::Symbol> &symbols_pose,
-                         const std::vector<gtsam::Symbol> &symbols_landmarks) {
-    std::vector<Eigen::Isometry3d> final_poses;
-    std::vector<Eigen::Vector3d> final_lmks;
+// bool attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
+//                          const std::vector<gtsam::Point2> &cam_obs, gtsam::Cal3_S2::shared_ptr K,
+//                          gtsam::Point3 &out_p_world_landmark, const bool verbose = true) {
+//     bool valid = true;
+//     if (cam_poses.size() >= 2) {
+//         try {
+//             // Attempt triangulation using DLT (or the GTSAM provided method)
+//             gtsam::Point3 p_world_lmk = gtsam::triangulatePoint3(
+//                 cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end()));
+
+//             out_p_world_landmark = p_world_lmk;
+
+//             // Optional: perform an explicit cheirality check
+//             for (const auto &pose : cam_poses) {
+//                 // Transform point to the camera coordinate system.
+//                 // transformTo() converts a world point to the camera frame.
+//                 gtsam::Point3 p_cam_lmk = pose.transformTo(p_world_lmk);
+//                 if (p_cam_lmk.z() <= 0) {  // Check that the depth is positive
+//                     valid = false;
+//                     break;
+//                 }
+//             }
+//         } catch (const gtsam::TriangulationCheiralityException &e) {
+//             // Handle the exception gracefully by logging and retaining the previous
+//             // estimate.
+//             if (verbose)
+//                 std::cerr << "attempt_triangulation failed. Likely cheirality exception: "
+//                           << e.what() << ". Keeping initial landmark estimate." << std::endl;
+//         }
+//     } else {
+//         valid = false;
+//     }
+//     return valid;
+// }
+
+void graph_values(const gtsam::Values &values, const std::string &window_name,
+                  const std::vector<gtsam::Symbol> &symbols_pose,
+                  const std::vector<gtsam::Symbol> &symbols_landmarks) {
+    std::vector<robot::geometry::VizPose> final_poses;
+    std::vector<robot::geometry::VizPoint> final_lmks;
     for (const gtsam::Symbol &symbol_pose : symbols_pose) {
-        final_poses.emplace_back(values.at<gtsam::Pose3>(symbol_pose).matrix());
+        final_poses.emplace_back(Eigen::Isometry3d(values.at<gtsam::Pose3>(symbol_pose).matrix()),
+                                 symbol_pose.string());
     }
     for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) {
         if (!values.exists(symbol_lmk)) {
             std::cout << "WTF " << symbol_lmk << std::endl;
         }
-        final_lmks.emplace_back(values.at<gtsam::Point3>(symbol_lmk));
+        final_lmks.emplace_back(values.at<gtsam::Point3>(symbol_lmk), symbol_lmk.string());
     }
-    robot::geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::black(), true, true,
+    std::cout << "About to viz gtsam::Values with " << values.size() << " variables." << std::endl;
+    robot::geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true,
                                window_name);
 }
 
-static gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph,
-                                    const gtsam::Values &values,
-                                    const std::vector<gtsam::Symbol> &symbols_pose,
-                                    const std::vector<gtsam::Symbol> &symbols_landmarks,
-                                    bool viz_itr = false) {
+gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gtsam::Values &values,
+                             const std::vector<gtsam::Symbol> &symbols_pose,
+                             const std::vector<gtsam::Symbol> &symbols_landmarks,
+                             bool viz_itr = false) {
     gtsam::LevenbergMarquardtParams params;
     params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
     params.maxIterations = 1;
@@ -128,7 +163,7 @@ static gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph,
     return optimizer.values();
 }
 
-static gtsam::Pose3 averagePoses(const std::vector<gtsam::Pose3> &poses, int maxIter = 10) {
+gtsam::Pose3 averagePoses(const std::vector<gtsam::Pose3> &poses, int maxIter = 10) {
     if (poses.empty()) throw std::runtime_error("Empty pose vector");
 
     gtsam::Pose3 mean = poses[0];
@@ -148,14 +183,14 @@ static gtsam::Pose3 averagePoses(const std::vector<gtsam::Pose3> &poses, int max
     return mean;
 }
 
-static gtsam::Point3 averagePoints(const std::vector<gtsam::Point3> &points) {
+gtsam::Point3 averagePoints(const std::vector<gtsam::Point3> &points) {
     if (points.empty()) throw std::runtime_error("Empty point vector");
     gtsam::Point3 sum(0, 0, 0);
     for (const auto &pt : points) sum += pt;
     return sum / points.size();
 }
 
-static gtsam::Point3 get_variance(const std::vector<gtsam::Point3> &points) {
+gtsam::Point3 get_variance(const std::vector<gtsam::Point3> &points) {
     const gtsam::Point3 mean = averagePoints(points);
     gtsam::Point3 var(0, 0, 0);
     for (const gtsam::Point3 &pt : points) {
@@ -163,6 +198,8 @@ static gtsam::Point3 get_variance(const std::vector<gtsam::Point3> &points) {
     }
     return var / points.size();
 }
+
+// const std::vector<double> get_absolute_trajectory_error(const std::vector<Eigen::Vector3d> &) {}
 };  // namespace detail_sfm
 
 int main(int argc, const char **argv) {
@@ -198,8 +235,10 @@ int main(int argc, const char **argv) {
     FourSeasonsParser parser(path_data, path_calibration);
 
     std::cout << "incremental_sfm_mvp!" << std::endl;
-    const std::vector<int> indices{380, 387, 393};
-    // const std::vector<int> indices = []() {
+    const std::vector<int> indices{581, 609,
+                                   633};  // indices with all data fields on neighborhood_5_train
+    // const std::vector<int> indices{581, 593, 609, 621, 633, 636, 639, 642};    // indices with
+    // all data fields on neighborhood_5_train const std::vector<int> indices = []() {
     //     std::vector<int> tmp;
     //     for (int i = 660; i < 681; i += 10) {
     //         tmp.push_back(i);
@@ -240,7 +279,7 @@ int main(int argc, const char **argv) {
     std::vector<Frame> frames;
     FrameId id = 0;
     std::unordered_map<FrameId, gtsam::Pose3>
-        id_to_initial_world_from_cam;      // these are for initial guesses
+        id_to_initial_world_from_cam;      // these are for initial guesses!!
     Eigen::Vector3d t_first_cam_in_world;  // to create our own world
     std::vector<Eigen::Isometry3d> references_world_from_cam;
     std::vector<robot::geometry::VizPose> viz_references_world_from_cam;
@@ -266,10 +305,14 @@ int main(int argc, const char **argv) {
             const Eigen::Vector3d p_gps_in_ECEF = parser.ECEF_from_gcs(gps_gcs);
             const Eigen::Vector4d p_gps_in_ECEF_hom(p_gps_in_ECEF.x(), p_gps_in_ECEF.y(),
                                                     p_gps_in_ECEF.z(), 1.0);
-            world_from_cam.translation() =
-                ((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix() *
-                 p_gps_in_ECEF_hom)
-                    .head<3>();
+            world_from_cam = Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference *
+                                                img_pt.AS_w_from_gnss_cam->matrix())
+                                                   .matrix());
+            std::cout << "ok" << std::endl;
+            // world_from_cam.translation() =
+            //     ((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix() *
+            //      p_gps_in_ECEF_hom)
+            //         .head<3>();
 
             // Eigen::
         }
@@ -320,9 +363,9 @@ int main(int argc, const char **argv) {
         id++;
     }
 
-    // robot::geometry::viz_scene(viz_references_world_from_cam,
-    //                            std::vector<robot::geometry::VizPoint>(), true, true,
-    //                            "References");
+    robot::geometry::viz_scene(viz_references_world_from_cam,
+                               std::vector<robot::geometry::VizPoint>(), cv::viz::Color::brown(),
+                               true, true, "References");
 
     // populate feature_tracks and lmk_id_map
     // TODO: "smart" matching, using initial poses to filter which pairs make sense to compute
@@ -330,7 +373,7 @@ int main(int argc, const char **argv) {
     FeatureTracks feature_tracks;
     FrameLandmarkIdMap lmk_id_map;
     LandmarkId lmk_id = 0;
-    bool exhaustive = true;
+    constexpr bool exhaustive = true;
     if (exhaustive) {
         for (size_t i = 0; i < indices.size() - 1; i++) {
             std::cout << "i: " << i << std::endl;
@@ -340,6 +383,7 @@ int main(int argc, const char **argv) {
                     frontend.get_matches(frames[i].get_descriptors(), frames[j].get_descriptors());
                 // DIAL TO MESS WITH
                 frontend.enforce_bijective_buffer_matches(matches);
+
                 // std::cout << "matching heartbeat" << std::endl;
                 // cv::Mat viz_matches;
                 // Frontend::draw_matches(parser.load_image(i), kpt_list[i], parser.load_image(j),
@@ -349,6 +393,24 @@ int main(int argc, const char **argv) {
                 //           << std::endl;
                 // cv::imshow("viz_matches", viz_matches);
                 // cv::waitKey(0);
+                std::vector<cv::KeyPoint> cv_kpts_1;
+                std::vector<cv::KeyPoint> cv_kpts_2;
+                for (const KeypointCV &kpt : frames[i].get_keypoints()) {
+                    cv::KeyPoint cv_kpt;
+                    cv_kpt.pt = kpt;
+                    cv_kpts_1.push_back(cv_kpt);
+                }
+                for (const KeypointCV &kpt : frames[j].get_keypoints()) {
+                    cv::KeyPoint cv_kpt;
+                    cv_kpt.pt = kpt;
+                    cv_kpts_2.push_back(cv_kpt);
+                }
+                Eigen::Isometry3d world_from_camj(id_to_initial_world_from_cam.at(j).matrix());
+                world_from_camj.linear() =
+                    (Eigen::Isometry3d(id_to_initial_world_from_cam.at(i).matrix()) *
+                     robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat))
+                        .linear();
+                id_to_initial_world_from_cam.at(j) = gtsam::Pose3(world_from_camj.matrix());
                 std::cout << "still alive with: " << matches.size() << " matches" << std::endl;
                 for (const cv::DMatch match : matches) {
                     const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
@@ -394,9 +456,17 @@ int main(int argc, const char **argv) {
             }
         }
     }
+    std::vector<robot::geometry::VizPose> viz_world_from_cam_init;
+    for (const auto &[id, world_from_cam] : id_to_initial_world_from_cam) {
+        viz_world_from_cam_init.emplace_back(Eigen::Isometry3d(world_from_cam.matrix()),
+                                             std::to_string(id));
+    }
+
+    robot::geometry::viz_scene(viz_world_from_cam_init, std::vector<robot::geometry::VizPoint>(),
+                               cv::viz::Color::brown(), true, true, "world_from_cam_estimates");
     std::cout << "heartbeat" << std::endl;
 
-    // triangulate all of the points
+    // TRIANGULATE all of the points (for initial guess)
     std::unordered_map<LandmarkId, gtsam::Point3> lmk_triangulated_map;  // points are kpt_in_world
     std::vector<robot::geometry::VizPoint> triangulated_lmks_viz;
     std::vector<gtsam::Point3> triangulated_lmks;
@@ -405,24 +475,28 @@ int main(int argc, const char **argv) {
         std::vector<gtsam::Point2> feat_kpts;
         const LandmarkId lmk_id = lmk_feat.first;
         const FeatureTrack feature_track = lmk_feat.second;
-        gtsam::Point3 kpt_in_world(0, 0, 0);
+        // gtsam::Point3 kpt_in_world(0, 0, 0);
         for (const std::pair<FrameId, KeypointCV> &feat_track : feature_track.obs_) {
             world_from_cams.push_back(id_to_initial_world_from_cam[feat_track.first]);
             feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y));
         }
-        bool triangulate_success =
-            detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world, false);
-        if (triangulate_success) {
-            lmk_triangulated_map.emplace(lmk_id, kpt_in_world);
-            triangulated_lmks.push_back(kpt_in_world);
-            triangulated_lmks_viz.emplace_back(kpt_in_world, "lmk " + std::to_string(lmk_id));
+        std::optional<gtsam::Point3> kpt_in_world =
+            detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, false);
+        // bool triangulate_success =
+        // detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world, false);
+        if (kpt_in_world) {
+            std::cout << "triangulation success: " << *kpt_in_world << std::endl;
+            lmk_triangulated_map.emplace(lmk_id, *kpt_in_world);
+            triangulated_lmks.push_back(*kpt_in_world);
+            triangulated_lmks_viz.emplace_back(*kpt_in_world, "lmk " + std::to_string(lmk_id));
         } else {
+            std::cout << "triangulation not success, leaving it out." << std::endl;
             continue;
         }
     }
     std::cout << "there are " << triangulated_lmks.size() << " triangulated lmks" << std::endl;
     robot::geometry::viz_scene(viz_references_world_from_cam, triangulated_lmks_viz,
-                               cv::viz::Color::black(), true, true,
+                               cv::viz::Color::brown(), true, true,
                                "Unfiltered, triangulated points");
 
     // filter points via variance
@@ -443,8 +517,9 @@ int main(int argc, const char **argv) {
         }
     }
     std::cout << "filtered variance " << detail_sfm::get_variance(filtered_points) << std::endl;
+    std::cout << "number of filtered points: " << lmk_triangulated_map_filtered.size() << std::endl;
     robot::geometry::viz_scene(std::vector<Eigen::Isometry3d>(), filtered_points,
-                               cv::viz::Color::black(), true, true,
+                               cv::viz::Color::brown(), true, true,
                                "Filtered, triangulated points");
 
     // ############# BACKEND ###############
@@ -461,6 +536,7 @@ int main(int argc, const char **argv) {
         const gtsam::Symbol symbol_lmk('l', lmk_id);
         for (const auto &[frame_id, obs] : feature_track.obs_) {
             initial_estimate_.insert_or_assign(symbol_lmk, kpt_in_world);
+            symbols_landmarks.push_back(symbol_lmk);
             symbols_landmarks_values_iter.emplace(symbol_lmk,
                                                   std::vector<gtsam::Point3>{kpt_in_world});
             graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
@@ -490,11 +566,10 @@ int main(int argc, const char **argv) {
         // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_cam_in_world));
     }
 
-    // detail_sfm::graph_values(initial_estimate_, "Confirmation", symbols_pose,
-    // symbols_landmarks);
+    detail_sfm::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks);
     std::cout << "heart beat 3" << std::endl;
 
-    bool local_optimizations = false;
+    constexpr bool local_optimizations = false;
     if (local_optimizations) {
         // TODO: do local optimizations between groups of any n nubmer of cameras with >= x number
         // of matches
@@ -579,23 +654,25 @@ int main(int argc, const char **argv) {
                     .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
                         feat_kpts[1], landmark_noise, symbols_poses[1], symbol_lmk, K);
 
-                gtsam::Point3 kpt_in_world;
-                bool triangulate_success =
-                    detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world);
-                if (triangulate_success) {
-                    local_estimate_.insert_or_assign(symbol_lmk, kpt_in_world);
-                    viz_lmks.emplace_back(kpt_in_world);
-                }
-                if (symbols_landmarks_values_iter.find(symbol_lmk) !=
-                    symbols_landmarks_values_iter.end()) {
-                    symbols_landmarks_values_iter[symbol_lmk].push_back(kpt_in_world);
-                } else {
-                    symbols_landmarks_values_iter.emplace(symbol_lmk,
-                                                          std::vector<gtsam::Point3>{kpt_in_world});
+                std::optional<gtsam::Point3> kpt_in_world =
+                    detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K);
+                // gtsam::Point3 kpt_in_world;
+                // bool triangulate_success =
+                //     detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world);
+                if (kpt_in_world) {
+                    local_estimate_.insert_or_assign(symbol_lmk, *kpt_in_world);
+                    viz_lmks.emplace_back(*kpt_in_world);
+                    if (symbols_landmarks_values_iter.find(symbol_lmk) !=
+                        symbols_landmarks_values_iter.end()) {
+                        symbols_landmarks_values_iter[symbol_lmk].push_back(*kpt_in_world);
+                    } else {
+                        symbols_landmarks_values_iter.emplace(
+                            symbol_lmk, std::vector<gtsam::Point3>{*kpt_in_world});
+                    }
                 }
             }
             std::cout << "setup complete!" << std::endl;
-            robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, cv::viz::Color::black(), true,
+            robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, cv::viz::Color::brown(), true,
                                        true, "Local Optimization " + std::to_string(i));
 
             const gtsam::Values symbols_result_local = detail_sfm::optimize_graph(
@@ -629,5 +706,6 @@ int main(int argc, const char **argv) {
     const gtsam::Values result =
         detail_sfm::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks);
     std::cout << "about to visualize result" << std::endl;
+    result.print();
     detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks);
 }
\ No newline at end of file
diff --git a/experimental/learn_descriptors/map.hh b/experimental/learn_descriptors/map.hh
new file mode 100644
index 000000000..a22c1e5c7
--- /dev/null
+++ b/experimental/learn_descriptors/map.hh
@@ -0,0 +1,4 @@
+#pragma once
+// #include "Eigen/"
+
+namespace robot::experimental::learn_descriptors {}
\ No newline at end of file
diff --git a/experimental/learn_descriptors/map.proto b/experimental/learn_descriptors/map.proto
new file mode 100644
index 000000000..67d0c5db0
--- /dev/null
+++ b/experimental/learn_descriptors/map.proto
@@ -0,0 +1,15 @@
+
+syntax = "proto3";
+
+package robot.experimental.learn_descriptors;
+
+message Map {
+  message Quaternion {
+    // Represents a quaternion as a + bi + cj + dk
+    double a = 1;
+    double b = 2;
+    double c = 3;
+    double d = 4;
+  }
+  Quaternion quaternion = 1;
+}
diff --git a/experimental/learn_descriptors/spatial_test_scene_cube_test.cc b/experimental/learn_descriptors/spatial_test_scene_cube_example.cc
similarity index 94%
rename from experimental/learn_descriptors/spatial_test_scene_cube_test.cc
rename to experimental/learn_descriptors/spatial_test_scene_cube_example.cc
index ab7b06208..ad1130338 100644
--- a/experimental/learn_descriptors/spatial_test_scene_cube_test.cc
+++ b/experimental/learn_descriptors/spatial_test_scene_cube_example.cc
@@ -32,6 +32,6 @@ TEST(SpatialTestSceneCubeTest, viz_cube_with_cameras) {
         camera_poses.emplace_back(T_world_cam.pose().matrix());
     }
 
-    // geometry::viz_scene(camera_poses, test_scene.get_points());
+    geometry::viz_scene(camera_poses, test_scene.get_points());
 }
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/visualization/opencv/opencv_viz.cc b/visualization/opencv/opencv_viz.cc
index 9af1f14b9..d8f693ea6 100644
--- a/visualization/opencv/opencv_viz.cc
+++ b/visualization/opencv/opencv_viz.cc
@@ -72,6 +72,7 @@ void viz_scene(const std::vector<VizPose> &world_from_poses,
     const cv::viz::Color point_color = cv::viz::Color::celestial_blue();
     for (unsigned int i = 0; i < points_in_world.size(); i++) {
         const Eigen::Vector3d &point = points_in_world[i].t_point_in_world;
+        std::cout << "pt to viz: " << point << std::endl;
         window.showWidget("point_" + std::to_string(i),
                           cv::viz::WSphere(cv::Point3d(point[0], point[1], point[2]), point_radius,
                                            sphere_res, point_color));
@@ -107,11 +108,11 @@ void viz_scene(const std::vector<VizPose> &world_from_poses,
                                          cv::Point3d(0, 0, 1), cells, cell_sizes, color));
     }
 
-    std::cout << "viz heartbeat" << std::endl;
+    // std::cout << "viz heartbeat" << std::endl;
     // window.setViewerPose(cv::Affine3d(
     //     eigen_mat_to_cv(Eigen::Matrix4d(world_from_poses[0].world_from_pose.matrix()))));
-    window.setViewerPose(cv::Affine3d::Identity());
-    std::cout << "success" << std::endl;
+    // window.setViewerPose(cv::Affine3d::Identity());
+    // std::cout << "success" << std::endl;
 
     window.spin();
 }

From 28bd689d38316985253836307763ec4198d345cb Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Tue, 1 Jul 2025 15:48:34 -0400
Subject: [PATCH 26/45] WIP

---
 common/geometry/camera.cc                     |   1 +
 .../four_seasons_parser_example_viz.cc        |  48 ++++-
 experimental/learn_descriptors/frontend.cc    |   3 +-
 experimental/learn_descriptors/frontend.hh    |   3 +-
 .../learn_descriptors/frontend_test.cc        |   4 +-
 .../learn_descriptors/incremental_sfm_mvp.cc  | 181 ++++++++++++------
 experimental/learn_descriptors/sfm_mvp.cc     |   4 +-
 .../structure_from_motion.cc                  |   2 +-
 visualization/opencv/opencv_viz.cc            |   1 -
 9 files changed, 165 insertions(+), 82 deletions(-)

diff --git a/common/geometry/camera.cc b/common/geometry/camera.cc
index 7e5a5fb3a..c7908c4f9 100644
--- a/common/geometry/camera.cc
+++ b/common/geometry/camera.cc
@@ -28,6 +28,7 @@ Eigen::Isometry3d estimate_cam0_from_cam1(const std::vector<cv::KeyPoint> &kpts0
                                           const std::vector<cv::KeyPoint> &kpts1,
                                           const std::vector<cv::DMatch> &matches,
                                           const cv::Mat &K) {
+    assert(kpts1.size() != 0 && kpts0.size() != 0);
     Eigen::Isometry3d result;
     std::vector<cv::Point2f> pts1;
     std::vector<cv::Point2f> pts2;
diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
index f0e5c5e5c..639192cd9 100644
--- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
@@ -48,23 +48,53 @@ int main(int argc, const char** argv) {
 
     ROBOT_CHECK(parser.num_images() != 0);
 
-    std::vector<Eigen::Isometry3d> w_from_gnss_cams;  // camera frames from visual world frame
+    // std::vector<robot::geometry::VizPose>
+    //     w_from_gnss_cams;                                   // camera frames from visual world
+    //     frame
+    // std::vector<robot::geometry::VizPose> w_from_gcs_cams;  // camera frames from visual world
+    // frame
+    std::vector<robot::geometry::VizPose> viz_frames;  // camera frames from visual world frame
+
     Eigen::Isometry3d scale_mat = Eigen::Isometry3d::Identity();
     std::cout << "gnss scale: " << parser.gnss_scale() << std::endl;
     scale_mat.linear() *= parser.gnss_scale();
     std::cout << "scale mat: " << scale_mat.matrix() << std::endl;
-    for (size_t i = 100; i < std::min(static_cast<size_t>(800), parser.num_images()); i += 49) {
+    constexpr size_t START = 100;
+    const size_t END = std::min(static_cast<size_t>(800), parser.num_images());
+    for (size_t i = START; i < END; i += 49) {
         const lrn_desc::ImagePoint img_pt = parser.get_image_point(i);
         Eigen::Isometry3d AS_w_from_gnss_cam =
             scale_mat * Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix());
         Eigen::Isometry3d w_from_gnss_cam =
             Eigen::Isometry3d(parser.S_from_AS().matrix()) * AS_w_from_gnss_cam;
-        Eigen::Isometry3d w_from_vio_cam = Eigen::Isometry3d(parser.S_from_AS().matrix()) *
-                                           Eigen::Isometry3d(img_pt.AS_w_from_vio_cam->matrix());
-        w_from_gnss_cams.push_back(w_from_gnss_cam);
-        std::cout << "\npose gnss metric " << i << w_from_gnss_cam.matrix() << std::endl;
-        std::cout << "pose vio" << i << w_from_vio_cam.matrix() << std::endl;
+        Eigen::Vector3d
+            // Eigen::Isometry3d w_from_vio_cam = Eigen::Isometry3d(parser.S_from_AS().matrix()) *
+            //                                    Eigen::Isometry3d(img_pt.AS_w_from_vio_cam->matrix());
+            viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i));
+        if (i == START) {
+            viz_frames.emplace_back(w_from_gnss_cam, "x_gps_" + std::to_string(i));
+        } else if (img_pt.gps_gcs) {
+            const Eigen::Vector3d gcs_coordinate(
+                img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude,
+                img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0);
+            const Eigen::Vector3d ECEF_from_gps = parser.ECEF_from_gcs(gcs_coordinate);
+            const Eigen::Vector4d ECEF_from_gps_hom(ECEF_from_gps.x(), ECEF_from_gps.y(),
+                                                    ECEF_from_gps.z(), 1);
+            // Eigen::Isometry3d w_from_gcs_gps =
+            Eigen::Vector4d gps_in_w =
+                Eigen::Matrix4d((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix()) *
+                ECEF_from_gps_hom;
+            Eigen::Isometry3d w_from_gcs_gps;
+            w_from_gcs_gps.translation() = gps_in_w.head<3>();
+            viz_frames.emplace_back(w_from_gcs_gps, "x_gps_" + std::to_string(i));
+            const Eigen::Vector3d ref_to_gps_in_world =
+                w_from_gcs_gps.translation() - w_from_gnss_cam.translation();
+            std::cout << "ref_" << i << "_to_gps_in_world: " << ref_to_gps_in_world << std::endl;
+        }
+        // std::cout << "\npose gnss metric " << i << w_from_gnss_cam.matrix() << std::endl;
+        // std::cout << "pose vio" << i << w_from_vio_cam.matrix() << std::endl;
     }
-    std::cout << "got " << w_from_gnss_cams.size() << " poses" << std::endl;
-    robot::geometry::viz_scene(w_from_gnss_cams, std::vector<Eigen::Vector3d>());
+    std::cout << "got " << viz_frames.size() << " poses" << std::endl;
+    robot::geometry::viz_scene(viz_frames, std::vector<robot::geometry::VizPoint>(),
+                               cv::viz::Color::brown());
 }
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc
index 3ef2c408e..134c4914a 100644
--- a/experimental/learn_descriptors/frontend.cc
+++ b/experimental/learn_descriptors/frontend.cc
@@ -38,8 +38,7 @@ Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matche
     }
 }
 
-std::pair<std::vector<cv::KeyPoint>, cv::Mat> Frontend::get_keypoints_and_descriptors(
-    const cv::Mat &img) const {
+std::pair<std::vector<cv::KeyPoint>, cv::Mat> Frontend::extract_features(const cv::Mat &img) const {
     std::vector<cv::KeyPoint> keypoints;
     cv::Mat descriptors;
     switch (extractor_type_) {
diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh
index 2b8c7b9f7..12332e4fb 100644
--- a/experimental/learn_descriptors/frontend.hh
+++ b/experimental/learn_descriptors/frontend.hh
@@ -18,8 +18,7 @@ class Frontend {
     const ExtractorType &get_extractor_type() const { return extractor_type_; };
     const MatcherType &get_matcher_type() const { return matcher_type_; };
 
-    std::pair<std::vector<cv::KeyPoint>, cv::Mat> get_keypoints_and_descriptors(
-        const cv::Mat &img) const;
+    std::pair<std::vector<cv::KeyPoint>, cv::Mat> extract_features(const cv::Mat &img) const;
     std::vector<cv::DMatch> get_matches(const cv::Mat &descriptors1,
                                         const cv::Mat &descriptors2) const;
     static void threshold_matches(std::vector<cv::DMatch> &matches, float dist_threshhold);
diff --git a/experimental/learn_descriptors/frontend_test.cc b/experimental/learn_descriptors/frontend_test.cc
index d90142e3c..25e60a690 100644
--- a/experimental/learn_descriptors/frontend_test.cc
+++ b/experimental/learn_descriptors/frontend_test.cc
@@ -75,8 +75,8 @@ TEST(FrontendTest, pipeline_sweep) {
                 assert(std::string(e.what()) == "FLANN can not be used with ORB.");  // very jank...
                 continue;
             }
-            keypoints_descriptors_pair_1 = frontend.get_keypoints_and_descriptors(image_1);
-            keypoints_descriptors_pair_2 = frontend.get_keypoints_and_descriptors(image_2);
+            keypoints_descriptors_pair_1 = frontend.extract_features(image_1);
+            keypoints_descriptors_pair_2 = frontend.extract_features(image_2);
             matches = frontend.get_matches(keypoints_descriptors_pair_1.second,
                                            keypoints_descriptors_pair_2.second);
             frontend.draw_keypoints(image_1, keypoints_descriptors_pair_1.first,
diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc
index 0d8da7623..214cd3105 100644
--- a/experimental/learn_descriptors/incremental_sfm_mvp.cc
+++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc
@@ -1,3 +1,4 @@
+#include <cmath>
 #include <functional>
 #include <opencv2/calib3d.hpp>
 #include <sstream>
@@ -41,53 +42,81 @@ namespace detail_sfm {
 std::optional<gtsam::Point3> attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
                                                  const std::vector<gtsam::Point2> &cam_obs,
                                                  gtsam::Cal3_S2::shared_ptr K,
+                                                 const double max_reproj_error = 2.0,
                                                  const bool verbose = true) {
-    std::optional<gtsam::Point3> result;
+    gtsam::Point3 p_lmk_in_world;
     if (cam_poses.size() >= 2) {
         try {
             // Attempt triangulation using DLT (or the GTSAM provided method)
-            gtsam::Point3 p_world_lmk = gtsam::triangulatePoint3(
+            p_lmk_in_world = gtsam::triangulatePoint3(
                 cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end()));
 
-            result = p_world_lmk;
-
-            // Optional: perform an explicit cheirality check
-            for (const auto &pose : cam_poses) {
-                // Transform point to the camera coordinate system.
-                // transformTo() converts a world point to the camera frame.
-                gtsam::Point3 p_cam_lmk = pose.transformTo(p_world_lmk);
-                if (p_cam_lmk.z() <= 0) {  // Check that the depth is positive
-                    break;
-                }
-            }
         } catch (const gtsam::TriangulationCheiralityException &e) {
             // Handle the exception gracefully by logging and retaining the previous
             // estimate.
             if (verbose)
                 std::cerr << "attempt_triangulation failed. Likely cheirality exception: "
-                          << e.what() << ". Keeping initial landmark estimate." << std::endl;
+                          << e.what() << ". Discarding involved keypoints." << std::endl;
+        }
+    }
+    // Optional: perform an explicit cheirality check
+    for (const auto &pose : cam_poses) {
+        // Transform point to the camera coordinate system.
+        // transformTo() converts a world point to the camera frame.
+        gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world);
+        if (p_cam_lmk.z() <= 0) {  // Check that the depth is positive
+            return std::nullopt;
+        }
+    }
+
+    // Cheirality & reprojection checks
+    for (size_t i = 0; i < cam_poses.size(); ++i) {
+        const auto &pose = cam_poses[i];
+        // Cheirality
+        gtsam::Point3 p_cam = pose.transformTo(p_lmk_in_world);
+        if (p_cam.z() <= 0) {
+            if (verbose) {
+                std::cerr << "[triangulate] point behind camera " << i << " (z=" << p_cam.z()
+                          << ")\n";
+            }
+            return std::nullopt;
+        }
+        // Reprojection error
+        if (max_reproj_error > 0) {
+            gtsam::PinholeCamera<gtsam::Cal3_S2> cam(pose, *K);
+            const auto reproj = cam.project(p_lmk_in_world);
+            const double err = (reproj - cam_obs[i]).norm();
+            if (err > max_reproj_error) {
+                if (verbose) {
+                    std::cerr << "[triangulate] reprojection error too large on view " << i << " ("
+                              << err << " px)\n";
+                }
+                return std::nullopt;
+            }
         }
     }
-    return result;
+
+    return p_lmk_in_world;
 }
 
 // bool attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
-//                          const std::vector<gtsam::Point2> &cam_obs, gtsam::Cal3_S2::shared_ptr K,
-//                          gtsam::Point3 &out_p_world_landmark, const bool verbose = true) {
+//                          const std::vector<gtsam::Point2> &cam_obs,
+//                          gtsam::Cal3_S2::shared_ptr K, gtsam::Point3 &out_p_world_landmark,
+//                          const bool verbose = true) {
 //     bool valid = true;
 //     if (cam_poses.size() >= 2) {
 //         try {
 //             // Attempt triangulation using DLT (or the GTSAM provided method)
-//             gtsam::Point3 p_world_lmk = gtsam::triangulatePoint3(
+//             gtsam::Point3 p_lmk_in_world = gtsam::triangulatePoint3(
 //                 cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end()));
 
-//             out_p_world_landmark = p_world_lmk;
+//             out_p_world_landmark = p_lmk_in_world;
 
 //             // Optional: perform an explicit cheirality check
 //             for (const auto &pose : cam_poses) {
 //                 // Transform point to the camera coordinate system.
 //                 // transformTo() converts a world point to the camera frame.
-//                 gtsam::Point3 p_cam_lmk = pose.transformTo(p_world_lmk);
+//                 gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world);
 //                 if (p_cam_lmk.z() <= 0) {  // Check that the depth is positive
 //                     valid = false;
 //                     break;
@@ -129,14 +158,13 @@ void graph_values(const gtsam::Values &values, const std::string &window_name,
 gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gtsam::Values &values,
                              const std::vector<gtsam::Symbol> &symbols_pose,
                              const std::vector<gtsam::Symbol> &symbols_landmarks,
-                             bool viz_itr = false) {
+                             const int num_epochs = 5, bool viz_itr = false) {
     gtsam::LevenbergMarquardtParams params;
     params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
     params.maxIterations = 1;
     gtsam::LevenbergMarquardtOptimizer optimizer(graph, values, params);
 
     double prev_error = optimizer.error();
-    const int num_steps = 5;
     typedef int epoch;
     std::function<void(const gtsam::Values &, const epoch, const std::vector<gtsam::Symbol> &,
                        const std::vector<gtsam::Symbol> &)>
@@ -148,7 +176,7 @@ gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gts
             graph_values(vals, window_name, symbols_pose, symbols_landmarks);
         };
 
-    for (int i = 0; i < num_steps; i++) {
+    for (int i = 0; i < num_epochs; i++) {
         optimizer.iterate();
         double curr_error = optimizer.error();
 
@@ -199,7 +227,19 @@ gtsam::Point3 get_variance(const std::vector<gtsam::Point3> &points) {
     return var / points.size();
 }
 
-// const std::vector<double> get_absolute_trajectory_error(const std::vector<Eigen::Vector3d> &) {}
+double rotation_error(const Eigen::Isometry3d &T_est, const Eigen::Isometry3d &T_gt) {
+    Eigen::Matrix3d R_err = T_gt.rotation().transpose() * T_est.rotation();
+
+    // 2. Compute trace and clamp to [-1,1] for numerical safety
+    double tr = R_err.trace();
+    double cos_theta = std::min(1.0, std::max(-1.0, (tr - 1.0) / 2.0));
+
+    // 3. Recover angle (in radians)
+    return std::acos(cos_theta);
+}
+
+// const std::vector<double> get_absolute_trajectory_error(const std::vector<Eigen::Vector3d> &)
+// {}
 };  // namespace detail_sfm
 
 int main(int argc, const char **argv) {
@@ -235,9 +275,10 @@ int main(int argc, const char **argv) {
     FourSeasonsParser parser(path_data, path_calibration);
 
     std::cout << "incremental_sfm_mvp!" << std::endl;
-    const std::vector<int> indices{581, 609,
-                                   633};  // indices with all data fields on neighborhood_5_train
-    // const std::vector<int> indices{581, 593, 609, 621, 633, 636, 639, 642};    // indices with
+    // const std::vector<int> indices{581, 609,
+    //                                633};  // indices with all data fields on neighborhood_5_train
+    const std::vector<int> indices{581, 593, 609, 621, 633, 636, 639, 642};  // indices with
+    constexpr bool visualize = false;
     // all data fields on neighborhood_5_train const std::vector<int> indices = []() {
     //     std::vector<int> tmp;
     //     for (int i = 660; i < 681; i += 10) {
@@ -320,13 +361,13 @@ int main(int argc, const char **argv) {
         id_to_initial_world_from_cam.emplace(id, world_from_cam.matrix());
 
         // populate frame
-        cv::Mat img_undistorted = parser.load_image(idx);
-        // cv::Mat img = parser.load_image(idx);
-        // cv::Mat img_undistorted;
-        // cv::fisheye::undistortImage(img, img_undistorted, K_mat, D_mat);
+        cv::Mat img = parser.load_image(idx);
+        cv::Mat img_undistorted;
+        cv::fisheye::undistortImage(img, img_undistorted, K_mat, D_mat, K_mat, img.size());
 
         // std::stringstream ss;
-        // ss << "image " << idx;
+        // ss << "image " << idx << " of size "
+        //    << "(" << img_undistorted.cols << ", " << img_undistorted.rows << ")";
         // cv::imshow(ss.str(), img_undistorted);
         // cv::waitKey(0);
         // cv::destroyWindow(ss.str());
@@ -348,7 +389,8 @@ int main(int argc, const char **argv) {
         }
 
         std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
-            frontend.get_keypoints_and_descriptors(img_undistorted);
+            frontend.extract_features(img_undistorted);
+        std::cout << "kpts_descs.size(): " << kpts_descs.first.size() << std::endl;
         kpt_list.push_back(kpts_descs.first);
         KeypointsCV kpts;
         for (const cv::KeyPoint &kpt : kpts_descs.first) {
@@ -362,10 +404,10 @@ int main(int argc, const char **argv) {
 
         id++;
     }
-
-    robot::geometry::viz_scene(viz_references_world_from_cam,
-                               std::vector<robot::geometry::VizPoint>(), cv::viz::Color::brown(),
-                               true, true, "References");
+    if (visualize)
+        robot::geometry::viz_scene(viz_references_world_from_cam,
+                                   std::vector<robot::geometry::VizPoint>(),
+                                   cv::viz::Color::brown(), true, true, "References");
 
     // populate feature_tracks and lmk_id_map
     // TODO: "smart" matching, using initial poses to filter which pairs make sense to compute
@@ -384,15 +426,6 @@ int main(int argc, const char **argv) {
                 // DIAL TO MESS WITH
                 frontend.enforce_bijective_buffer_matches(matches);
 
-                // std::cout << "matching heartbeat" << std::endl;
-                // cv::Mat viz_matches;
-                // Frontend::draw_matches(parser.load_image(i), kpt_list[i], parser.load_image(j),
-                //                        kpt_list[j], matches, viz_matches);
-                // std::cout << "image height: " << viz_matches.rows << " width: " <<
-                // viz_matches.cols
-                //           << std::endl;
-                // cv::imshow("viz_matches", viz_matches);
-                // cv::waitKey(0);
                 std::vector<cv::KeyPoint> cv_kpts_1;
                 std::vector<cv::KeyPoint> cv_kpts_2;
                 for (const KeypointCV &kpt : frames[i].get_keypoints()) {
@@ -411,7 +444,6 @@ int main(int argc, const char **argv) {
                      robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat))
                         .linear();
                 id_to_initial_world_from_cam.at(j) = gtsam::Pose3(world_from_camj.matrix());
-                std::cout << "still alive with: " << matches.size() << " matches" << std::endl;
                 for (const cv::DMatch match : matches) {
                     const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
                     const KeypointCV kpt_cam1 = frames[j].get_keypoints()[match.trainIdx];
@@ -462,8 +494,11 @@ int main(int argc, const char **argv) {
                                              std::to_string(id));
     }
 
-    robot::geometry::viz_scene(viz_world_from_cam_init, std::vector<robot::geometry::VizPoint>(),
-                               cv::viz::Color::brown(), true, true, "world_from_cam_estimates");
+    std::cout << "pre heartbeat" << std::endl;
+    if (visualize)
+        robot::geometry::viz_scene(viz_world_from_cam_init,
+                                   std::vector<robot::geometry::VizPoint>(),
+                                   cv::viz::Color::brown(), true, true, "world_from_cam_estimates");
     std::cout << "heartbeat" << std::endl;
 
     // TRIANGULATE all of the points (for initial guess)
@@ -485,7 +520,6 @@ int main(int argc, const char **argv) {
         // bool triangulate_success =
         // detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world, false);
         if (kpt_in_world) {
-            std::cout << "triangulation success: " << *kpt_in_world << std::endl;
             lmk_triangulated_map.emplace(lmk_id, *kpt_in_world);
             triangulated_lmks.push_back(*kpt_in_world);
             triangulated_lmks_viz.emplace_back(*kpt_in_world, "lmk " + std::to_string(lmk_id));
@@ -495,9 +529,10 @@ int main(int argc, const char **argv) {
         }
     }
     std::cout << "there are " << triangulated_lmks.size() << " triangulated lmks" << std::endl;
-    robot::geometry::viz_scene(viz_references_world_from_cam, triangulated_lmks_viz,
-                               cv::viz::Color::brown(), true, true,
-                               "Unfiltered, triangulated points");
+    if (visualize)
+        robot::geometry::viz_scene(viz_references_world_from_cam, triangulated_lmks_viz,
+                                   cv::viz::Color::brown(), true, true,
+                                   "Unfiltered, triangulated points");
 
     // filter points via variance
     // TODO: filter based on quality and/or quantity of matches
@@ -518,9 +553,10 @@ int main(int argc, const char **argv) {
     }
     std::cout << "filtered variance " << detail_sfm::get_variance(filtered_points) << std::endl;
     std::cout << "number of filtered points: " << lmk_triangulated_map_filtered.size() << std::endl;
-    robot::geometry::viz_scene(std::vector<Eigen::Isometry3d>(), filtered_points,
-                               cv::viz::Color::brown(), true, true,
-                               "Filtered, triangulated points");
+    if (visualize)
+        robot::geometry::viz_scene(std::vector<Eigen::Isometry3d>(), filtered_points,
+                                   cv::viz::Color::brown(), true, true,
+                                   "Filtered, triangulated points");
 
     // ############# BACKEND ###############
 
@@ -566,7 +602,7 @@ int main(int argc, const char **argv) {
         // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_cam_in_world));
     }
 
-    detail_sfm::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks);
+    // detail_sfm::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks);
     std::cout << "heart beat 3" << std::endl;
 
     constexpr bool local_optimizations = false;
@@ -672,8 +708,9 @@ int main(int argc, const char **argv) {
                 }
             }
             std::cout << "setup complete!" << std::endl;
-            robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, cv::viz::Color::brown(), true,
-                                       true, "Local Optimization " + std::to_string(i));
+            if (visualize)
+                robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, cv::viz::Color::brown(),
+                                           true, true, "Local Optimization " + std::to_string(i));
 
             const gtsam::Values symbols_result_local = detail_sfm::optimize_graph(
                 local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false);
@@ -704,8 +741,26 @@ int main(int argc, const char **argv) {
 
     // do global optimization
     const gtsam::Values result =
-        detail_sfm::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks);
-    std::cout << "about to visualize result" << std::endl;
-    result.print();
-    detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks);
+        detail_sfm::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks, 0);
+
+    if (visualize) {
+        std::cout << "about to visualize result" << std::endl;
+        result.print();
+        detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks);
+    }
+
+    // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference
+    double sum_traj_error = 0;
+    double sum_rot_error = 0;
+    for (size_t i = 0; i < symbols_pose.size(); i++) {
+        const gtsam::Pose3 traj_pose = result.at<gtsam::Pose3>(symbols_pose[i]);
+        sum_traj_error += std::pow(
+            (references_world_from_cam[i].translation() - traj_pose.translation()).norm(), 2);
+        sum_rot_error += detail_sfm::rotation_error(references_world_from_cam[i],
+                                                    Eigen::Isometry3d(traj_pose.matrix()));
+    }
+    std::cout << "sum_rot_error: " << sum_rot_error << std::endl;
+    double rmse_ate = std::sqrt(sum_traj_error / symbols_pose.size());
+    double rmse_rot = std::sqrt(sum_rot_error / symbols_pose.size());
+    std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl;
 }
\ No newline at end of file
diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc
index 21966cf4a..9d41270d7 100644
--- a/experimental/learn_descriptors/sfm_mvp.cc
+++ b/experimental/learn_descriptors/sfm_mvp.cc
@@ -238,7 +238,7 @@ TEST(SFMMvp, sfm_building_manual_global) {
         Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix()));
 
         std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
-            frontend.get_keypoints_and_descriptors(img_undistorted);
+            frontend.extract_features(img_undistorted);
         KeypointsCV kpts;
         for (const cv::KeyPoint &kpt : kpts_descs.first) {
             kpts.push_back(kpt.pt);
@@ -432,7 +432,7 @@ TEST(SFMMvp, sfm_building_manual_incremental) {
         Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix()));
 
         std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
-            frontend.get_keypoints_and_descriptors(img_undistorted);
+            frontend.extract_features(img_undistorted);
         KeypointsCV kpts;
         for (const cv::KeyPoint &kpt : kpts_descs.first) {
             kpts.push_back(kpt.pt);
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index 02dc28a2d..df84bb7cf 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -59,7 +59,7 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
     cv::Mat img_undistorted;
     cv::undistort(img, img_undistorted, K_, D_);
     std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_and_descriptors =
-        frontend_.get_keypoints_and_descriptors(img);
+        frontend_.extract_features(img);
     feature_manager_->append_img_data(keypoints_and_descriptors.first,
                                       keypoints_and_descriptors.second);
     keypoint_to_landmarks_.push_back(std::unordered_map<cv::KeyPoint, Backend::Landmark>());
diff --git a/visualization/opencv/opencv_viz.cc b/visualization/opencv/opencv_viz.cc
index d8f693ea6..d750555ed 100644
--- a/visualization/opencv/opencv_viz.cc
+++ b/visualization/opencv/opencv_viz.cc
@@ -72,7 +72,6 @@ void viz_scene(const std::vector<VizPose> &world_from_poses,
     const cv::viz::Color point_color = cv::viz::Color::celestial_blue();
     for (unsigned int i = 0; i < points_in_world.size(); i++) {
         const Eigen::Vector3d &point = points_in_world[i].t_point_in_world;
-        std::cout << "pt to viz: " << point << std::endl;
         window.showWidget("point_" + std::to_string(i),
                           cv::viz::WSphere(cv::Point3d(point[0], point[1], point[2]), point_radius,
                                            sphere_res, point_color));

From dc8dd641033394bdae52c0ae2d5b8deb697fc657 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Wed, 2 Jul 2025 14:43:03 -0400
Subject: [PATCH 27/45] Debugging discrepancy between gps data and reference
 data

---
 .../learn_descriptors/four_seasons_parser.cc  | 22 ++----
 .../four_seasons_parser_example_viz.cc        | 67 ++++++++++++++-----
 experimental/learn_descriptors/image_point.hh |  1 +
 3 files changed, 57 insertions(+), 33 deletions(-)

diff --git a/experimental/learn_descriptors/four_seasons_parser.cc b/experimental/learn_descriptors/four_seasons_parser.cc
index 75886a552..8c850aa2e 100644
--- a/experimental/learn_descriptors/four_seasons_parser.cc
+++ b/experimental/learn_descriptors/four_seasons_parser.cc
@@ -103,23 +103,10 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
         while (gps_idx < gps_time_list.size() && time_key > gps_time_list[gps_idx].first &&
                time_key - gps_time_list[gps_idx].first > (1.0 / IMU_HZ) * 1e9) {
             gps_idx++;
-            // if (gps_idx < 200) {
-            //     std::stringstream ss;
-            //     ss << "uh oh: " << gps_idx;
-            //     if (time_key > gps_time_list[gps_idx].first) {
-            //         ss << "\ttime_key - gps_time: " << (time_key - gps_time_list[gps_idx].first);
-            //     } else {
-            //         ss << "\tgps_time - time_key: " << (gps_time_list[gps_idx].first - time_key);
-            //     }
-            //     std::cout << ss.str() << std::endl;
-            // }
         }  // find the closest gps point whose time is before the current image capture time
         if (gps_idx < gps_time_list.size() &&
             time_key - gps_time_list[gps_idx].first <= (1.0 / IMU_HZ) * 1e9) {
             img_pt.gps_gcs = gps_time_list[gps_idx].second;
-            // std::cout << "adding gps data at img_pt: " << img_pt.id << " with gps_idx: " <<
-            // gps_idx
-            //           << std::endl;
             gps_idx++;
         }
         img_pt_vector_.push_back(img_pt);
@@ -325,11 +312,12 @@ TimeGPSList create_gps_time_data_map(const std::filesystem::path& path_gps) {
         if (nmea_sentence->type() == "GGA") {
             nmea::gga gga(*nmea_sentence);
             if (gga.utc.exists() && gga.latitude.exists() && gga.longitude.exists()) {
-                ImagePoint::GPSData gps_data;
-                gps_data.latitude = gga.latitude.get();
-                gps_data.longitude = gga.longitude.get();
-                if (gga.altitude.exists()) gps_data.altitude = gga.altitude.get();
                 if (gga.utc.get() == time_of_day_last) {  // GGA messages for this dataset come
+                    ImagePoint::GPSData gps_data;
+                    gps_data.seq = gps_utc_to_unix_time(date_last, gga.utc.get());
+                    gps_data.latitude = gga.latitude.get();
+                    gps_data.longitude = gga.longitude.get();
+                    if (gga.altitude.exists()) gps_data.altitude = gga.altitude.get();
                     // after RMC messages
                     // std::cout << "adding gps data with time of day: " << gga.utc.get()
                     //           << " and unix time: "
diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
index 639192cd9..bfa8bdf3d 100644
--- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
@@ -53,7 +53,8 @@ int main(int argc, const char** argv) {
     //     frame
     // std::vector<robot::geometry::VizPose> w_from_gcs_cams;  // camera frames from visual world
     // frame
-    std::vector<robot::geometry::VizPose> viz_frames;  // camera frames from visual world frame
+    std::vector<robot::geometry::VizPose> viz_frames;   // camera frames from visual world frame
+    std::vector<robot::geometry::VizPoint> viz_points;  // camera frames from visual world frame
 
     Eigen::Isometry3d scale_mat = Eigen::Isometry3d::Identity();
     std::cout << "gnss scale: " << parser.gnss_scale() << std::endl;
@@ -61,22 +62,58 @@ int main(int argc, const char** argv) {
     std::cout << "scale mat: " << scale_mat.matrix() << std::endl;
     constexpr size_t START = 100;
     const size_t END = std::min(static_cast<size_t>(800), parser.num_images());
+    std::optional<double> altitude_gps_from_gnss_cam;
     for (size_t i = START; i < END; i += 49) {
+        std::cout << "\nImage point " << i << std::endl;
         const lrn_desc::ImagePoint img_pt = parser.get_image_point(i);
+        std::cout << "Image point seq: " << img_pt.seq << std::endl;
+        if (!img_pt.AS_w_from_gnss_cam) {
+            continue;
+        }
         Eigen::Isometry3d AS_w_from_gnss_cam =
             scale_mat * Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix());
+        // Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix());
+        std::cout << "img_pt.AS_w_from_gnss_cam->matrix()" << img_pt.AS_w_from_gnss_cam->matrix()
+                  << std::endl;
+        std::cout << "AS_w_from_gnss_cam: " << AS_w_from_gnss_cam.matrix() << std::endl;
         Eigen::Isometry3d w_from_gnss_cam =
             Eigen::Isometry3d(parser.S_from_AS().matrix()) * AS_w_from_gnss_cam;
-        Eigen::Vector3d
-            // Eigen::Isometry3d w_from_vio_cam = Eigen::Isometry3d(parser.S_from_AS().matrix()) *
-            //                                    Eigen::Isometry3d(img_pt.AS_w_from_vio_cam->matrix());
-            viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i));
+        w_from_gnss_cam.translation().x() += 1.2;
+        w_from_gnss_cam.translation().y() += 1.2;
+        // Eigen::Isometry3d w_from_vio_cam = Eigen::Isometry3d(parser.S_from_AS().matrix()) *
+        //                                    Eigen::Isometry3d(img_pt.AS_w_from_vio_cam->matrix());
+        std::cout << "w_from_gnss_cam: " << w_from_gnss_cam.matrix() << std::endl;
+        viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i));
+        viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i));
         if (i == START) {
-            viz_frames.emplace_back(w_from_gnss_cam, "x_gps_" + std::to_string(i));
+            break;
+            viz_points.emplace_back(w_from_gnss_cam.translation(), "x_gps_" + std::to_string(i));
         } else if (img_pt.gps_gcs) {
-            const Eigen::Vector3d gcs_coordinate(
+            std::cout << "gps time: " << img_pt.gps_gcs->seq << std::endl;
+            Eigen::Vector3d gcs_coordinate(
                 img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude,
                 img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0);
+
+            std::cout << "gcs_coordinate altitude: "
+                      << (img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0) << std::endl;
+
+            // compare the reference in gcs to the gps in gcs
+            Eigen::Isometry3d ECEF_from_gnss_cam =
+                Eigen::Isometry3d(
+                    (parser.e_from_gpsw() * parser.w_from_gpsw().inverse()).matrix()) *
+                w_from_gnss_cam;
+            const Eigen::Vector3d gnss_cam_in_gcs =
+                parser.gcs_from_ECEF(ECEF_from_gnss_cam.translation());
+
+            if (!altitude_gps_from_gnss_cam) {
+                altitude_gps_from_gnss_cam = gnss_cam_in_gcs.z() - *(img_pt.gps_gcs->altitude);
+            }
+            gcs_coordinate.z() += *altitude_gps_from_gnss_cam;
+
+            std::cout << std::setprecision(20) << "gnss_cam_in_gcs: " << gnss_cam_in_gcs
+                      << "\ngps_gcs: " << gcs_coordinate
+                      << "\ngps-gnss_cam: " << (gcs_coordinate - gnss_cam_in_gcs) << std::endl;
+
             const Eigen::Vector3d ECEF_from_gps = parser.ECEF_from_gcs(gcs_coordinate);
             const Eigen::Vector4d ECEF_from_gps_hom(ECEF_from_gps.x(), ECEF_from_gps.y(),
                                                     ECEF_from_gps.z(), 1);
@@ -84,17 +121,15 @@ int main(int argc, const char** argv) {
             Eigen::Vector4d gps_in_w =
                 Eigen::Matrix4d((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix()) *
                 ECEF_from_gps_hom;
-            Eigen::Isometry3d w_from_gcs_gps;
-            w_from_gcs_gps.translation() = gps_in_w.head<3>();
-            viz_frames.emplace_back(w_from_gcs_gps, "x_gps_" + std::to_string(i));
-            const Eigen::Vector3d ref_to_gps_in_world =
-                w_from_gcs_gps.translation() - w_from_gnss_cam.translation();
-            std::cout << "ref_" << i << "_to_gps_in_world: " << ref_to_gps_in_world << std::endl;
+            viz_points.emplace_back(gps_in_w.head<3>(), "x_gps_" + std::to_string(i));
+            const Eigen::Vector3d gps_from_ref_in_world =
+                gps_in_w.head<3>() - w_from_gnss_cam.translation();
+            std::cout << "gps_from_ref_in_world: " << gps_from_ref_in_world << std::endl;
         }
         // std::cout << "\npose gnss metric " << i << w_from_gnss_cam.matrix() << std::endl;
         // std::cout << "pose vio" << i << w_from_vio_cam.matrix() << std::endl;
     }
-    std::cout << "got " << viz_frames.size() << " poses" << std::endl;
-    robot::geometry::viz_scene(viz_frames, std::vector<robot::geometry::VizPoint>(),
-                               cv::viz::Color::brown());
+    std::cout << "\ngot " << viz_frames.size() << " poses" << std::endl;
+    std::cout << "got " << viz_points.size() << " points" << std::endl;
+    robot::geometry::viz_scene(viz_frames, viz_points, cv::viz::Color::brown());
 }
\ No newline at end of file
diff --git a/experimental/learn_descriptors/image_point.hh b/experimental/learn_descriptors/image_point.hh
index 109f49296..33d48e9f3 100644
--- a/experimental/learn_descriptors/image_point.hh
+++ b/experimental/learn_descriptors/image_point.hh
@@ -15,6 +15,7 @@
 namespace robot::experimental::learn_descriptors {
 struct ImagePoint {
     struct GPSData {
+        size_t seq;  // time in nanoseconds, may differ from shutter time
         double latitude;
         double longitude;
         std::optional<double> altitude;  // meters above sea level

From e90e90956a53be8c551634c505e829b1a95338b7 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Thu, 3 Jul 2025 16:42:48 -0400
Subject: [PATCH 28/45] incorporated new four seasons parser changes

---
 experimental/learn_descriptors/BUILD          | 218 +-----------------
 .../learn_descriptors/four_seasons_parser.cc  | 128 +++++++---
 .../learn_descriptors/four_seasons_parser.hh  |  19 +-
 .../four_seasons_parser_example.cc            |   3 +-
 .../four_seasons_parser_example_viz.cc        |  80 ++++---
 .../four_seasons_parser_test.cc               |  18 +-
 experimental/learn_descriptors/image_point.hh |  63 ++++-
 7 files changed, 240 insertions(+), 289 deletions(-)

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index bc7e84bd5..01d7e9a83 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -75,7 +75,6 @@ cc_test(
   deps = [
     "@com_google_googletest//:gtest_main",
     ":symphony_lake_parser",
-    "//common:check",
     "//visualization/opencv:opencv_viz"
   ]
 )
@@ -111,7 +110,8 @@ cc_library(
       "@com_google_absl//absl/strings:str_format",
       ":image_point",
       "@nmea",
-      "@geographiclib"
+      "@geographiclib",
+      "//common:check"
     ]
 )
 
@@ -150,218 +150,6 @@ cc_test(
     ":four_seasons_parser",
     "@nmea",
     "@geographiclib",
-  ]
-)
-
-cc_library(
-  name = "frontend_definitions",
-  hdrs = ["frontend_definitions.hh"],
-  visibility = ["//visibility:public"],
-  deps = [        
-    "@gtsam//:gtsam",    
-    ":structure_from_motion_types"
-  ]
-)
-
-cc_library(
-  name = "structure_from_motion_types",
-  hdrs = ["structure_from_motion_types.hh"],
-  visibility = ["//visibility:public"],
-  deps = [        
-    "@gtsam//:gtsam",    
-    "@opencv//:opencv"
-  ]
-)
-
-cc_library(
-  name = "frame",
-  hdrs = ["frame.hh"],
-  srcs = ["frame.cc"],
-  visibility = ["//visibility:public"],
-  deps = [             
-    "@opencv//:opencv",
-    ":frontend_definitions",
-    "@gtsam//:gtsam"
-  ]
-)
-
-cc_test(
-  name = "sfm_mvp",
-  srcs = ["sfm_mvp.cc"],
-    copts = [
-    "-Wno-error=unused-parameter",
-    "-Wno-error=unused-variable",
-  ],
-  deps = [
-    ":frontend",
-    "@com_google_googletest//:gtest_main",
-    "@gtsam//:gtsam",
-    "@opencv//:opencv",
-    "@eigen",
-    ":spatial_test_scene_cube",
-    ":symphony_lake_parser",
-    ":frame", 
-    ":structure_from_motion_types",
-    "//visualization/opencv:opencv_viz"
-  ]
-)
-
-
-cc_library(
-  name = "feature_set",
-  hdrs = ["feature_set.hh"],
-  visibility = ["//visibility:public"],
-  deps = [        
-    "@gtsam//:gtsam",
-    "@opencv//:opencv"
-  ]
-)
-
-cc_library(
-  name = "feature_manager",
-  hdrs = ["feature_manager.hh"],
-  visibility = ["//visibility:public"],
-  deps = [        
-    "@gtsam//:gtsam",
-    "@opencv//:opencv",
-    ":feature_set",
-  ]
-)
-
-cc_test(
-  name = "feature_manager_test",
-  srcs = ["feature_manager_test.cc"],
-  deps = [
-    ":feature_manager",
-    "@com_google_googletest//:gtest_main",
-    "@gtsam//:gtsam",
-    "@opencv//:opencv",
-    "@eigen",
-    ":spatial_test_scene_cube"
-  ]
-)
-
-
-cc_library(
-  name = "spatial_test_scene",
-  hdrs = ["spatial_test_scene.hh"],
-  visibility = ["//visibility:public"],
-  srcs = ["spatial_test_scene.cc"],
-  deps = [        
-    "@gtsam//:gtsam",
-    "@opencv//:opencv",    
-    "@eigen"
-  ]
-)
-
-cc_library(
-  name = "spatial_test_scene_cube",
-  hdrs = ["spatial_test_scene_cube.hh"],
-  visibility = ["//visibility:public"],
-  deps = [        
-    "@eigen",
-    ":spatial_test_scene"
-  ]
-)
-
-cc_binary(
-  name = "spatial_test_scene_cube_example",
-  srcs = ["spatial_test_scene_cube_example.cc"],
-  deps = [    
-    "@com_google_googletest//:gtest_main",    
-    ":spatial_test_scene_cube",
-    "//visualization/opencv:opencv_viz",
-    "@eigen"
-  ]
-)
-
-cc_library(
-  name = "frontend",
-  hdrs = ["frontend.hh"],
-  visibility = ["//visibility:public"],
-  srcs = ["frontend.cc"],
-  copts = [
-    "-Wno-error=unused-parameter",
-  ],
-  deps = [
-    "@opencv//:opencv",    
-  ]
-)
-
-
-cc_test(
-  name = "frontend_test",
-  srcs = ["frontend_test.cc"],
-  deps = [    
-    "@com_google_googletest//:gtest_main",    
-    ":frontend"
-  ]
-)
-
-cc_library(
-  name = "backend",
-  hdrs = ["backend.hh"],
-  visibility = ["//visibility:public"],
-  srcs = ["backend.cc"],
-  copts = [
-    "-Wno-error=unused-parameter",
-  ],
-  deps = [
-    "@opencv//:opencv",    
-    "@gtsam//:gtsam",
-    "//common/geometry:camera",
-    "@boost//:smart_ptr",
-    ":feature_manager"
-  ]
-)
-
-
-cc_test(
-  name = "backend_test",
-  srcs = ["backend_test.cc"],
-    copts = [
-    "-Wno-error=unused-parameter",
-  ],
-  deps = [    
-    "@com_google_googletest//:gtest_main",    
-    "@opencv//:opencv",
-    ":backend",
-    ":feature_manager",
-    ":spatial_test_scene_cube",
-    "//visualization/opencv:opencv_viz"
-  ]
-)
-
-cc_binary(
-  name = "get_colmap_groundtruth",
-  srcs = ["get_colmap_groundtruth.cc"],
-  copts = [
-    "-Wno-error=unused-parameter",
-  ],
-  deps = [
-    ":symphony_lake_parser"
-  ]
-)
-
-cc_binary(
-  name = "incremental_sfm_mvp",
-  srcs = ["incremental_sfm_mvp.cc"],
-  copts = [
-    "-Wno-error=unused-parameter",
-    "-Wno-error=unused-function",
-  ],
-  deps = [
-    ":four_seasons_parser",
-    "@eigen",
-    "@opencv",    
-    "@gtsam",
-    "//visualization/opencv:opencv_viz",
-    ":spatial_test_scene_cube",
-    ":frame", 
-    ":structure_from_motion_types",
-    ":frontend",
-    "@boost//:smart_ptr",    
-    "@cxxopts",
-    "//common/geometry:camera"
+    "//common:check"
   ]
 )
diff --git a/experimental/learn_descriptors/four_seasons_parser.cc b/experimental/learn_descriptors/four_seasons_parser.cc
index 8c850aa2e..fdf76afde 100644
--- a/experimental/learn_descriptors/four_seasons_parser.cc
+++ b/experimental/learn_descriptors/four_seasons_parser.cc
@@ -1,13 +1,15 @@
 #include "experimental/learn_descriptors/four_seasons_parser.hh"
 
-#include <limits.h>
-
 #include <cmath>
+#include <cstddef>
+#include <cstdlib>
 #include <exception>
 #include <fstream>
 #include <iostream>
+#include <limits>
 #include <sstream>
 #include <string>
+#include <type_traits>
 #include <unordered_map>
 #include <utility>
 
@@ -17,14 +19,13 @@
 #include "GeographicLib/LocalCartesian.hpp"
 #include "absl/strings/str_split.h"
 #include "absl/strings/strip.h"
+#include "common/check.hh"
 #include "common/liegroups/se3.hh"
 #include "common/time/robot_time.hh"
 #include "nmea/message/gga.hpp"
 #include "nmea/message/rmc.hpp"
 #include "nmea/sentence.hpp"
 
-#define IMU_HZ 30.0
-
 namespace robot::experimental::learn_descriptors {
 using namespace detail;
 FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
@@ -45,12 +46,9 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
     txt_parser_help::TimeDataMap vio_poses_time_map =
         txt_parser_help::create_vio_time_data_map(path_vio, min_time_sig_figs);
     gps_parser_help::TimeGPSList gps_time_list =
-        gps_parser_help::create_gps_time_data_map(path_gps);
-
-    // std::cout << "imu dt in nanoseconds: " << std::to_string((1.0 / IMU_HZ) * 1e9) << std::endl;
+        gps_parser_help::create_gps_time_data_list(path_gps);
 
     size_t id = 0;
-    size_t gps_idx = 0;
     for (const std::pair<size_t, std::vector<std::string>>& pair_time_data : img_time_list) {
         const size_t time_key = pair_time_data.first;
         ImagePoint img_pt;
@@ -100,22 +98,33 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
             std::clog << "There is no AS_w_from_vio_cam data at img_pt with id: " << id
                       << std::endl;
         }
-        while (gps_idx < gps_time_list.size() && time_key > gps_time_list[gps_idx].first &&
-               time_key - gps_time_list[gps_idx].first > (1.0 / IMU_HZ) * 1e9) {
-            gps_idx++;
-        }  // find the closest gps point whose time is before the current image capture time
-        if (gps_idx < gps_time_list.size() &&
-            time_key - gps_time_list[gps_idx].first <= (1.0 / IMU_HZ) * 1e9) {
-            img_pt.gps_gcs = gps_time_list[gps_idx].second;
-            gps_idx++;
-        }
         img_pt_vector_.push_back(img_pt);
         id++;
     }
+    // popoulate gps to nearest img time
+    // TODO: could be linear time... but good enough
+    for (const auto& [time_unix_ns, gps_data] : gps_time_list) {
+        auto it = std::lower_bound(img_pt_vector_.begin(), img_pt_vector_.end(), time_unix_ns,
+                                   [](const ImagePoint& img_pt, const size_t query_unix_time) {
+                                       return img_pt.seq < query_unix_time;
+                                   });
+        size_t insert_idx = std::distance(img_pt_vector_.begin(), it);
+        if (it != img_pt_vector_.begin() &&
+            detail::abs_diff(it->seq, time_unix_ns) >
+                detail::abs_diff(std::prev(it)->seq, time_unix_ns)) {
+            insert_idx--;
+        }
+        // NOTE: in future, could perhaps use gps data that isn't associated with an img_pt in some
+        // way. maybe to help with interpolation, estimate velocity
+        if (detail::abs_diff(img_pt_vector_[insert_idx].seq, time_unix_ns) <
+            FourSeasonsParser::CAM_CAP_DELTA_NS) {
+            img_pt_vector_[insert_idx].gps_gcs = gps_data;
+        }
+    }
 }
 
-cv::Mat FourSeasonsParser::load_image(const size_t idx) const {
-    return get_image_point(idx).load_image(img_dir_);
+cv::Mat FourSeasonsParser::load_image(const size_t m) const {
+    return get_image_point(m).load_image(img_dir_);
 }
 
 FourSeasonsParser::FourSeasonsTransforms::FourSeasonsTransforms(
@@ -291,8 +300,8 @@ size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_da
     std::chrono::sys_time<std::chrono::nanoseconds> timestamp = date + utc_time_day_ns;
     return timestamp.time_since_epoch().count();
 }
-TimeGPSList create_gps_time_data_map(const std::filesystem::path& path_gps) {
-    TimeGPSList time_map_gps;
+TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps) {
+    TimeGPSList time_list_gps;
     std::ifstream file_gps(path_gps);
     std::string line;
     std::optional<nmea::sentence> nmea_sentence;
@@ -300,7 +309,7 @@ TimeGPSList create_gps_time_data_map(const std::filesystem::path& path_gps) {
     date_last.day = 255;
     date_last.month = 255;
     date_last.year = 255;
-    double time_of_day_last = 0;
+    double time_of_day_last = -1.0;
     while (std::getline(file_gps, line) && !line.empty()) {
         try {
             nmea_sentence = nmea::sentence(line);
@@ -312,18 +321,14 @@ TimeGPSList create_gps_time_data_map(const std::filesystem::path& path_gps) {
         if (nmea_sentence->type() == "GGA") {
             nmea::gga gga(*nmea_sentence);
             if (gga.utc.exists() && gga.latitude.exists() && gga.longitude.exists()) {
-                if (gga.utc.get() == time_of_day_last) {  // GGA messages for this dataset come
+                if (std::abs(gga.utc.get() - time_of_day_last) <
+                    1e-3) {  // GGA messages for this dataset come second
                     ImagePoint::GPSData gps_data;
                     gps_data.seq = gps_utc_to_unix_time(date_last, gga.utc.get());
                     gps_data.latitude = gga.latitude.get();
                     gps_data.longitude = gga.longitude.get();
                     if (gga.altitude.exists()) gps_data.altitude = gga.altitude.get();
-                    // after RMC messages
-                    // std::cout << "adding gps data with time of day: " << gga.utc.get()
-                    //           << " and unix time: "
-                    //           << gps_utc_to_unix_time(date_last, gga.utc.get()) << std::endl;
-                    time_map_gps.push_back(
-                        std::make_pair(gps_utc_to_unix_time(date_last, gga.utc.get()), gps_data));
+                    time_list_gps.push_back(std::make_pair(gps_data.seq, gps_data));
                 }
             }
         } else if (nmea_sentence->type() == "RMC") {
@@ -332,10 +337,73 @@ TimeGPSList create_gps_time_data_map(const std::filesystem::path& path_gps) {
                 date_last = rmc.date.get();
                 time_of_day_last = rmc.utc.get();
             }
+        } else if (nmea_sentence->type() == "GST") {
+            std::optional<GSTData> gst_data = parse_gpgst(nmea_sentence->nmea_string());
+            if (gst_data && std::abs(gst_data->utc_time_ns - time_of_day_last) <
+                                1e-3) {  // GST message for this dataset come third
+                size_t unix_time_ns = gps_utc_to_unix_time(date_last, gst_data->utc_time_ns);
+                if (time_list_gps.back().first == unix_time_ns) {
+                    time_list_gps.back().second.uncertainty.emplace(
+                        gst_data->sigma_lat_m, gst_data->sigma_lon_m, gst_data->sigma_alt_m,
+                        gst_data->error_orientation_deg, gst_data->rms_range_error_m);
+                }
+            }
         }
     }
-    return time_map_gps;
+    return time_list_gps;
 }
+
+std::vector<std::string> split_nmea_sentence(const std::string& sentence) {
+    std::vector<std::string> fields;
+    std::string field;
+    std::stringstream ss(sentence);
+
+    while (std::getline(ss, field, ',')) {
+        // remove checksum from the last field if present
+        auto asterisk = field.find('*');
+        if (asterisk != std::string::npos) field = field.substr(0, asterisk);
+        fields.push_back(field);
+    }
+
+    return fields;
+}
+double time_of_day_seconds(const double utc_time_hhmmss) {
+    int hours = static_cast<int>(utc_time_hhmmss / 10000);
+    int minutes = static_cast<int>((utc_time_hhmmss - hours * 10000) / 100);
+    double seconds = utc_time_hhmmss - hours * 10000 - minutes * 100;
+    return hours * 3600 + minutes * 60 + seconds;
+}
+std::optional<GSTData> parse_gpgst(const std::string& sentence) {
+    if (sentence.substr(0, 6) != "$GPGST") {
+        return std::nullopt;
+    }
+
+    std::vector<std::string> fields = split_nmea_sentence(sentence);
+    if (fields.size() != 9) {
+        return std::nullopt;
+    }
+    for (size_t i = 0; i < fields.size(); i++) {
+        if (fields[i].empty()) return std::nullopt;
+    }
+
+    GSTData gst;
+    gst.utc_time_ns = time_of_day_seconds(std::stod(fields[1]));
+    gst.rms_range_error_m = std::stod(fields[2]);
+    gst.error_semi_major_m = std::stod(fields[3]);
+    gst.error_semi_minor_m = std::stod(fields[4]);
+    gst.error_orientation_deg = std::stod(fields[5]);
+    gst.sigma_lat_m = std::stod(fields[6]);
+    gst.sigma_lon_m = std::stod(fields[7]);
+    gst.sigma_alt_m = std::stod(fields[8]);
+
+    return gst;
+}
+
 }  // namespace gps_parser_help
+
+template <typename T>
+size_t abs_diff(const T& a, const T& b) {
+    return a > b ? a - b : b - a;
+}
 }  // namespace detail
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/four_seasons_parser.hh b/experimental/learn_descriptors/four_seasons_parser.hh
index 0f3d2c7fb..9709bc396 100644
--- a/experimental/learn_descriptors/four_seasons_parser.hh
+++ b/experimental/learn_descriptors/four_seasons_parser.hh
@@ -17,6 +17,8 @@
 namespace robot::experimental::learn_descriptors {
 class FourSeasonsParser {
    public:
+    static constexpr double CAM_HZ = 30.0;
+    static constexpr double CAM_CAP_DELTA_NS = 1e9 / CAM_HZ;
     struct CameraCalibrationFisheye {
         double fx, fy, cx, cy, k1, k2, k3, k4;
     };
@@ -70,6 +72,8 @@ class FourSeasonsParser {
 };
 
 namespace detail {
+template <typename T>
+std::size_t abs_diff(const T& a, const T& b);
 namespace txt_parser_help {
 using TimeDataMap = std::unordered_map<size_t, std::vector<std::string>>;
 using TimeDataList = std::vector<std::pair<size_t, std::vector<std::string>>>;
@@ -120,8 +124,21 @@ const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio
 }  // namespace txt_parser_help
 namespace gps_parser_help {
 using TimeGPSList = std::vector<std::pair<size_t, ImagePoint::GPSData>>;
+struct GSTData {
+    double utc_time_ns;
+    double rms_range_error_m;
+    double error_semi_major_m;
+    double error_semi_minor_m;
+    double error_orientation_deg;
+    double sigma_lat_m;
+    double sigma_lon_m;
+    double sigma_alt_m;
+};
+std::vector<std::string> split_nmea_sentence(const std::string& nmea_sentence);
+double time_of_day_seconds(const double utc_time_hhmmss);
+std::optional<GSTData> parse_gpgst(const std::string& nmea_sentence);
 size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds);
-TimeGPSList create_gps_time_data_map(const std::filesystem::path& path_gps);
+TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps);
 }  // namespace gps_parser_help
 }  // namespace detail
 }  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/four_seasons_parser_example.cc b/experimental/learn_descriptors/four_seasons_parser_example.cc
index 48c496579..064f404ef 100644
--- a/experimental/learn_descriptors/four_seasons_parser_example.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_example.cc
@@ -98,8 +98,7 @@ int main(int argc, const char** argv) {
                     0.5, cv::Scalar(0, 255, 0), 2);
         cv::putText(img, ss_gps.str(), cv::Point(10, 80), cv::FONT_HERSHEY_SIMPLEX, 0.5,
                     cv::Scalar(0, 255, 0), 2);
-        std::cout << "img_pt " << i << "\n\t" << ss_AS_w_from_gnss_cam.str() << "\n\t"
-                  << ss_AS_w_from_vio_cam.str() << "\n\t" << ss_gps.str() << std::endl;
+        std::cout << img_pt.to_string() << std::endl;
         cv::imshow("FourSeasonsParserExample", img);
         int key = cv::waitKey(0);
 
diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
index bfa8bdf3d..bca7ec2c4 100644
--- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
@@ -2,6 +2,7 @@
 #include <cstddef>
 #include <filesystem>
 #include <iostream>
+#include <numeric>
 #include <sstream>
 #include <string>
 #include <vector>
@@ -48,11 +49,6 @@ int main(int argc, const char** argv) {
 
     ROBOT_CHECK(parser.num_images() != 0);
 
-    // std::vector<robot::geometry::VizPose>
-    //     w_from_gnss_cams;                                   // camera frames from visual world
-    //     frame
-    // std::vector<robot::geometry::VizPose> w_from_gcs_cams;  // camera frames from visual world
-    // frame
     std::vector<robot::geometry::VizPose> viz_frames;   // camera frames from visual world frame
     std::vector<robot::geometry::VizPoint> viz_points;  // camera frames from visual world frame
 
@@ -60,43 +56,32 @@ int main(int argc, const char** argv) {
     std::cout << "gnss scale: " << parser.gnss_scale() << std::endl;
     scale_mat.linear() *= parser.gnss_scale();
     std::cout << "scale mat: " << scale_mat.matrix() << std::endl;
-    constexpr size_t START = 100;
-    const size_t END = std::min(static_cast<size_t>(800), parser.num_images());
     std::optional<double> altitude_gps_from_gnss_cam;
-    for (size_t i = START; i < END; i += 49) {
-        std::cout << "\nImage point " << i << std::endl;
+    std::vector<double> gps_ns_delta_from_shutter;
+    for (size_t i = 0; i < parser.num_images(); i += 1) {
         const lrn_desc::ImagePoint img_pt = parser.get_image_point(i);
-        std::cout << "Image point seq: " << img_pt.seq << std::endl;
+        std::cout << img_pt.to_string() << std::endl;
         if (!img_pt.AS_w_from_gnss_cam) {
             continue;
         }
-        Eigen::Isometry3d AS_w_from_gnss_cam =
-            scale_mat * Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix());
-        // Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix());
-        std::cout << "img_pt.AS_w_from_gnss_cam->matrix()" << img_pt.AS_w_from_gnss_cam->matrix()
-                  << std::endl;
-        std::cout << "AS_w_from_gnss_cam: " << AS_w_from_gnss_cam.matrix() << std::endl;
-        Eigen::Isometry3d w_from_gnss_cam =
-            Eigen::Isometry3d(parser.S_from_AS().matrix()) * AS_w_from_gnss_cam;
-        w_from_gnss_cam.translation().x() += 1.2;
-        w_from_gnss_cam.translation().y() += 1.2;
-        // Eigen::Isometry3d w_from_vio_cam = Eigen::Isometry3d(parser.S_from_AS().matrix()) *
-        //                                    Eigen::Isometry3d(img_pt.AS_w_from_vio_cam->matrix());
-        std::cout << "w_from_gnss_cam: " << w_from_gnss_cam.matrix() << std::endl;
+        Eigen::Isometry3d w_from_gnss_cam(Eigen::Isometry3d(parser.S_from_AS().matrix()) *
+                                          scale_mat *
+                                          Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix()));
+
         viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i));
         viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i));
-        if (i == START) {
-            break;
-            viz_points.emplace_back(w_from_gnss_cam.translation(), "x_gps_" + std::to_string(i));
-        } else if (img_pt.gps_gcs) {
-            std::cout << "gps time: " << img_pt.gps_gcs->seq << std::endl;
+        if (img_pt.gps_gcs) {
+            if (img_pt.gps_gcs->seq > img_pt.seq) {
+                gps_ns_delta_from_shutter.push_back(
+                    static_cast<double>(img_pt.gps_gcs->seq - img_pt.seq));
+            } else {
+                gps_ns_delta_from_shutter.push_back(
+                    -static_cast<double>(img_pt.seq - img_pt.gps_gcs->seq));
+            }
             Eigen::Vector3d gcs_coordinate(
                 img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude,
                 img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0);
 
-            std::cout << "gcs_coordinate altitude: "
-                      << (img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0) << std::endl;
-
             // compare the reference in gcs to the gps in gcs
             Eigen::Isometry3d ECEF_from_gnss_cam =
                 Eigen::Isometry3d(
@@ -109,7 +94,6 @@ int main(int argc, const char** argv) {
                 altitude_gps_from_gnss_cam = gnss_cam_in_gcs.z() - *(img_pt.gps_gcs->altitude);
             }
             gcs_coordinate.z() += *altitude_gps_from_gnss_cam;
-
             std::cout << std::setprecision(20) << "gnss_cam_in_gcs: " << gnss_cam_in_gcs
                       << "\ngps_gcs: " << gcs_coordinate
                       << "\ngps-gnss_cam: " << (gcs_coordinate - gnss_cam_in_gcs) << std::endl;
@@ -117,7 +101,6 @@ int main(int argc, const char** argv) {
             const Eigen::Vector3d ECEF_from_gps = parser.ECEF_from_gcs(gcs_coordinate);
             const Eigen::Vector4d ECEF_from_gps_hom(ECEF_from_gps.x(), ECEF_from_gps.y(),
                                                     ECEF_from_gps.z(), 1);
-            // Eigen::Isometry3d w_from_gcs_gps =
             Eigen::Vector4d gps_in_w =
                 Eigen::Matrix4d((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix()) *
                 ECEF_from_gps_hom;
@@ -126,10 +109,33 @@ int main(int argc, const char** argv) {
                 gps_in_w.head<3>() - w_from_gnss_cam.translation();
             std::cout << "gps_from_ref_in_world: " << gps_from_ref_in_world << std::endl;
         }
-        // std::cout << "\npose gnss metric " << i << w_from_gnss_cam.matrix() << std::endl;
-        // std::cout << "pose vio" << i << w_from_vio_cam.matrix() << std::endl;
     }
+    const double sum =
+        std::accumulate(gps_ns_delta_from_shutter.begin(), gps_ns_delta_from_shutter.end(), 0.0);
+    double avg_ns_gps_delta = sum / gps_ns_delta_from_shutter.size();
+    double var_ns_gps_delta;
+    double var_sum = 0.0;
+    size_t num_greater_cam_hz = 0;
+
+    for (const double delta : gps_ns_delta_from_shutter) {
+        var_sum += std::pow(delta - avg_ns_gps_delta, 2);
+        if (delta > lrn_desc::FourSeasonsParser::CAM_CAP_DELTA_NS) {
+            num_greater_cam_hz++;
+        }
+    }
+    var_ns_gps_delta = var_sum / gps_ns_delta_from_shutter.size();
+    double max_delta =
+        *std::max_element(gps_ns_delta_from_shutter.begin(), gps_ns_delta_from_shutter.end());
+    ROBOT_CHECK(max_delta < lrn_desc::FourSeasonsParser::CAM_CAP_DELTA_NS);
+    std::cout << "\nGPS Analysis: " << std::endl;
+    std::cout << "\tavg delta time ns img_pt_seq from gps_gcs_seq: " << avg_ns_gps_delta
+              << std::endl;
+    std::cout << "\tstd_var delta time ns img_pt_seq from gps_gcs_seq: "
+              << std::pow(var_ns_gps_delta, 0.5) << std::endl;
+    std::cout << "\tnum deltas greater than cam_hz: " << num_greater_cam_hz << std::endl;
+    std::cout << "max delta: " << max_delta << std::endl;
     std::cout << "\ngot " << viz_frames.size() << " poses" << std::endl;
     std::cout << "got " << viz_points.size() << " points" << std::endl;
-    robot::geometry::viz_scene(viz_frames, viz_points, cv::viz::Color::brown());
-}
\ No newline at end of file
+    robot::geometry::viz_scene(viz_frames, viz_points, cv::viz::Color::brown(), true, true,
+                               "Viz Trajectory + GPS Points");
+}
diff --git a/experimental/learn_descriptors/four_seasons_parser_test.cc b/experimental/learn_descriptors/four_seasons_parser_test.cc
index b5662039e..ff49d935d 100644
--- a/experimental/learn_descriptors/four_seasons_parser_test.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_test.cc
@@ -16,6 +16,7 @@
 #include "GeographicLib/LocalCartesian.hpp"
 #include "absl/strings/str_split.h"
 #include "absl/strings/strip.h"
+#include "common/check.hh"
 #include "common/liegroups/se3.hh"
 #include "gtest/gtest.h"
 #include "nmea/message/gga.hpp"
@@ -104,6 +105,8 @@ TEST(FourSeasonsParserTest, parser_test) {
         (parser.e_from_gpsw() * parser.w_from_gpsw().inverse() * parser.S_from_AS()).matrix() *
         scale_mat);
 
+    std::vector<double> gps_ns_delta_from_shutter;
+
     for (size_t i = 0; i < parser.num_images(); i++) {
         const ImagePoint& img_pt = parser.get_image_point(i);
         const cv::Mat img = parser.load_image(i);
@@ -181,10 +184,23 @@ TEST(FourSeasonsParserTest, parser_test) {
                 img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude,
                 img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0);
 
-            EXPECT_NEAR(raw_gps_gcs.x(), gnss_gps_gcs.x(), 1e-4);  // lattitude
+            EXPECT_NEAR(raw_gps_gcs.x(), gnss_gps_gcs.x(), 1e-4);  // latitude
             EXPECT_NEAR(raw_gps_gcs.y(), gnss_gps_gcs.y(), 1e-4);  // longitude
             EXPECT_NEAR(raw_gps_gcs.z(), gnss_gps_gcs.z(), 60);    // height above sea level
         }
+        if (img_pt.gps_gcs) {
+            if (img_pt.gps_gcs->seq > img_pt.seq) {
+                gps_ns_delta_from_shutter.push_back(
+                    static_cast<double>(img_pt.gps_gcs->seq - img_pt.seq));
+            } else {
+                gps_ns_delta_from_shutter.push_back(
+                    -static_cast<double>(img_pt.seq - img_pt.gps_gcs->seq));
+            }
+        }
     }
+
+    double max_delta =
+        *std::max_element(gps_ns_delta_from_shutter.begin(), gps_ns_delta_from_shutter.end());
+    ROBOT_CHECK(max_delta < FourSeasonsParser::CAM_CAP_DELTA_NS);
 }
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/image_point.hh b/experimental/learn_descriptors/image_point.hh
index 33d48e9f3..c46708d17 100644
--- a/experimental/learn_descriptors/image_point.hh
+++ b/experimental/learn_descriptors/image_point.hh
@@ -15,13 +15,62 @@
 namespace robot::experimental::learn_descriptors {
 struct ImagePoint {
     struct GPSData {
-        size_t seq;  // time in nanoseconds, may differ from shutter time
+        struct Uncertainty {
+            double sigma_lat_mitude;
+            double sigma_longitude;
+            double sigma_altitude;
+            double orientation_deg;
+            double rms_range_error_m;
+
+            // diagonal latitude, longitude, altitude covariance in meters squared
+            Eigen::Matrix3d to_LLA_covariance() const {
+                Eigen::Matrix3d cov = Eigen::Matrix3d::Zero();
+                cov(0, 0) = sigma_lat_mitude * sigma_lat_mitude;
+                cov(1, 1) = sigma_longitude * sigma_longitude;
+                cov(2, 2) = sigma_altitude * sigma_altitude;
+                return cov;
+            }
+
+            // Convert lat/lon covariance to ENU at given latitude
+            Eigen::Matrix3d to_ENU_covariance(double lat_deg) const {
+                double lat_rad = lat_deg * M_PI / 180.0;
+
+                // meters per degree scaling
+                double kLat =
+                    111132.92 - 559.82 * std::cos(2 * lat_rad) + 1.175 * std::cos(4 * lat_rad);
+                double kLon = 111412.84 * std::cos(lat_rad) - 93.5 * std::cos(3 * lat_rad);
+
+                Eigen::Matrix3d lla_cov = to_LLA_covariance();
+
+                // rotate lat/lon covariance (upper-left 2x2) using orientation
+                double theta = orientation_deg * M_PI / 180.0;
+                Eigen::Matrix2d R;
+                R << std::cos(theta), -std::sin(theta), std::sin(theta), std::cos(theta);
+
+                Eigen::Matrix2d D = lla_cov.topLeftCorner<2, 2>();
+                Eigen::Matrix2d rotated_latlon_cov = R * D * R.transpose();
+
+                // apply linear scaling to convert degrees to meters in EN
+                Eigen::Matrix2d J;
+                J << kLat, 0, 0, kLon;
+
+                Eigen::Matrix2d en_cov = J * rotated_latlon_cov * J.transpose();
+
+                Eigen::Matrix3d enu_cov = Eigen::Matrix3d::Zero();
+                enu_cov.topLeftCorner<2, 2>() = en_cov;
+                enu_cov(2, 2) = lla_cov(2, 2);  // alt variance unchanged
+
+                return enu_cov;
+            }
+        };
+        size_t seq;  // time in nanoseconds, may differ from image capturetime
         double latitude;
         double longitude;
         std::optional<double> altitude;  // meters above sea level
+        std::optional<Uncertainty> uncertainty;
     };
     size_t id;   // idx for DB
-    size_t seq;  // time in nanoseconds (also the name of image)
+    size_t seq;  // time in nanoseconds of image capture (also the name of image)
     std::optional<liegroups::SE3>
         AS_w_from_gnss_cam;  // globally opimized arbitrary scale visual world (gnss + VIO + loop
                              // closure + etc.) from cam
@@ -54,13 +103,21 @@ struct ImagePoint {
         }
         ss << "\n\tgps_gcs: ";
         if (gps_gcs) {
+            ss << "\n\t\tseq: " << gps_gcs->seq;
             ss << "\n\t\t" << gps_gcs->latitude << "\t" << gps_gcs->longitude << "\t";
             if (gps_gcs->altitude) {
                 ss << *(gps_gcs->altitude);
             } else {
                 ss << "alt N/A";
             }
-
+            ss << "\n\t\tsigma: ";
+            if (gps_gcs->uncertainty) {
+                ss << gps_gcs->uncertainty->sigma_lat_mitude << "\t"
+                   << gps_gcs->uncertainty->sigma_longitude << "\t"
+                   << gps_gcs->uncertainty->sigma_altitude;
+            } else {
+                ss << "N/A";
+            }
         } else {
             ss << "N/A";
         }

From fa8ba85d0ea4b3970dd7b67446a190d63f6d17ec Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Tue, 8 Jul 2025 17:30:48 -0400
Subject: [PATCH 29/45] WIP make SFM object-oriented

---
 common/geometry/BUILD                         |   5 +-
 common/geometry/camera.cc                     |  44 +++-
 common/geometry/camera.hh                     |   9 +-
 common/geometry/camera_test.cc                |  29 ++-
 experimental/learn_descriptors/BUILD          | 216 ++++++++++++++++++
 experimental/learn_descriptors/frontend.cc    | 150 +++++++++---
 experimental/learn_descriptors/frontend.hh    |  57 +++--
 .../learn_descriptors/frontend_definitions.hh |   2 -
 .../learn_descriptors/frontend_test.cc        |   4 +-
 .../learn_descriptors/incremental_sfm_mvp.cc  |  66 ++++--
 experimental/learn_descriptors/sfm_mvp.cc     |   6 +-
 .../structure_from_motion.cc                  |   8 +-
 .../structure_from_motion.hh                  |   4 +-
 13 files changed, 489 insertions(+), 111 deletions(-)

diff --git a/common/geometry/BUILD b/common/geometry/BUILD
index 2ef17fb83..6b11a2b02 100644
--- a/common/geometry/BUILD
+++ b/common/geometry/BUILD
@@ -48,9 +48,10 @@ cc_library(
   visibility = ["//visibility:public"],
   deps = [
     "@eigen",     
-    "@opencv//:opencv",
+    "@opencv",
     ":translate_types",
-    "@gtsam"
+    "@gtsam",
+    "//common:check"
   ]
 )
 
diff --git a/common/geometry/camera.cc b/common/geometry/camera.cc
index c7908c4f9..38733d6f4 100644
--- a/common/geometry/camera.cc
+++ b/common/geometry/camera.cc
@@ -1,5 +1,9 @@
 #include "common/geometry/camera.hh"
 
+#include <exception>
+#include <optional>
+
+#include "common/check.hh"
 #include "common/geometry/translate_types.hh"
 
 namespace robot::geometry {
@@ -24,11 +28,11 @@ Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel
     return depth * K.inverse() * Eigen::Vector3d(pixel_inhomog(0), pixel_inhomog(1), 1.);
 }
 
-Eigen::Isometry3d estimate_cam0_from_cam1(const std::vector<cv::KeyPoint> &kpts0,
-                                          const std::vector<cv::KeyPoint> &kpts1,
-                                          const std::vector<cv::DMatch> &matches,
-                                          const cv::Mat &K) {
-    assert(kpts1.size() != 0 && kpts0.size() != 0);
+std::optional<Eigen::Isometry3d> estimate_cam0_from_cam1(const std::vector<cv::KeyPoint> &kpts0,
+                                                         const std::vector<cv::KeyPoint> &kpts1,
+                                                         const std::vector<cv::DMatch> &matches,
+                                                         const cv::Mat &K) {
+    ROBOT_CHECK(matches.size() >= 5 && kpts1.size() >= 5 && kpts0.size() >= 5);
     Eigen::Isometry3d result;
     std::vector<cv::Point2f> pts1;
     std::vector<cv::Point2f> pts2;
@@ -36,12 +40,28 @@ Eigen::Isometry3d estimate_cam0_from_cam1(const std::vector<cv::KeyPoint> &kpts0
         pts1.push_back(kpts0[match.queryIdx].pt);
         pts2.push_back(kpts1[match.trainIdx].pt);
     }
-    cv::Mat E = cv::findEssentialMat(pts1, pts2, K, cv::RANSAC, 0.999, 1.0);
-    cv::Mat R_c1_c0, t_c1_c0;
-    cv::recoverPose(E, pts1, pts2, K, R_c1_c0, t_c1_c0);
-    result.linear() = cv_to_eigen_mat(R_c1_c0);
-    result.translation() = cv_to_eigen_mat(t_c1_c0);
-    result = result.inverse();
-    return result;
+    ROBOT_CHECK(pts1.size() == pts2.size() && pts1.size() >= 5);
+    // std::cout << "heartbeat 3" << std::endl;
+    try {
+        cv::Mat E = cv::findEssentialMat(pts1, pts2, K, cv::RANSAC, 0.999, 1.0);
+        cv::Mat R_c1_c0, t_c1_c0;
+        // std::cout << "heartbest 4" << std::endl;
+        ROBOT_CHECK(!E.empty(), "Essential matrix is empty.");
+        // TOOD: handle multiple returned candidate E matrices better (they are stacked on top of
+        // each other in E)
+        if (E.rows > 3) {
+            E = E.rowRange(0, 3);
+        }
+        // std::cout << E << std::endl;
+        cv::recoverPose(E, pts1, pts2, K, R_c1_c0, t_c1_c0);
+        result.linear() = cv_to_eigen_mat(R_c1_c0);
+        result.translation() = cv_to_eigen_mat(t_c1_c0);
+        result = result.inverse();
+        return result;
+    } catch (const std::exception e) {
+        std::cerr << "Failed to estimate pose up to scale cam0_from_cam1.\n"
+                  << e.what() << std::endl;
+        return std::nullopt;
+    }
 }
 }  // namespace robot::geometry
\ No newline at end of file
diff --git a/common/geometry/camera.hh b/common/geometry/camera.hh
index 9164e4129..8048bac87 100644
--- a/common/geometry/camera.hh
+++ b/common/geometry/camera.hh
@@ -1,4 +1,6 @@
 #pragma once
+#include <optional>
+
 #include "Eigen/Core"
 #include "Eigen/Geometry"
 #include "gtsam/geometry/Cal3_S2.h"
@@ -21,7 +23,8 @@ Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector3d &pixel
 Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel_inhomog,
                           const double depth);
 
-Eigen::Isometry3d estimate_cam0_from_cam1(const std::vector<cv::KeyPoint> &kpts0,
-                                          const std::vector<cv::KeyPoint> &kpts1,
-                                          const std::vector<cv::DMatch> &matches, const cv::Mat &K);
+std::optional<Eigen::Isometry3d> estimate_cam0_from_cam1(const std::vector<cv::KeyPoint> &kpts0,
+                                                         const std::vector<cv::KeyPoint> &kpts1,
+                                                         const std::vector<cv::DMatch> &matches,
+                                                         const cv::Mat &K);
 }  // namespace robot::geometry
\ No newline at end of file
diff --git a/common/geometry/camera_test.cc b/common/geometry/camera_test.cc
index 19f5389cd..4d8b810fa 100644
--- a/common/geometry/camera_test.cc
+++ b/common/geometry/camera_test.cc
@@ -1,5 +1,7 @@
 #include "common/geometry/camera.hh"
 
+#include <optional>
+
 #include "common/geometry/translate_types.hh"
 #include "gtest/gtest.h"
 #include "visualization/opencv/opencv_viz.hh"
@@ -111,20 +113,23 @@ TEST(CameraTest, test_estimate_pose) {
         Eigen::Vector2d pxl_c1_pcube = pxl_c1_pcube_homog.head<2>() / pxl_c1_pcube_homog(2);
         if (CameraTestHelper::pixel_in_range(pxl_c0_pcube, img_width, img_height) &&
             CameraTestHelper::pixel_in_range(pxl_c1_pcube, img_width, img_height)) {
-            kpts0.push_back(cv::KeyPoint(pxl_c0_pcube[0], pxl_c0_pcube[1], 3));
-            kpts1.push_back(cv::KeyPoint(pxl_c1_pcube[0], pxl_c1_pcube[1], 3));
-            matches.emplace_back(cv::DMatch(kpts0.size() - 1, kpts1.size() - 1, 0));
+            kpts0.emplace_back(pxl_c0_pcube[0], pxl_c0_pcube[1], 3);
+            kpts1.emplace_back(pxl_c1_pcube[0], pxl_c1_pcube[1], 3);
+            matches.emplace_back(kpts0.size() - 1, kpts1.size() - 1, 0);
         }
     }
 
-    Eigen::Isometry3d cam0_from_cam1_estimate = estimate_cam0_from_cam1(kpts0, kpts1, matches, K);
-    Eigen::Isometry3d cam0_from_cam1 = world_from_cam0.inverse() * world_from_cam1;
-    cam0_from_cam1.translation() /= cam0_from_cam1.translation().norm();
-    EXPECT_TRUE(
-        cam0_from_cam1_estimate.translation().isApprox(cam0_from_cam1.translation(), 0.000001));
-    EXPECT_TRUE(cam0_from_cam1_estimate.linear().isApprox(cam0_from_cam1.linear(), 0.001));
-
-    poses.emplace_back(world_from_cam0 * cam0_from_cam1_estimate);
-    viz_scene(poses, p_W_cube);
+    std::optional<Eigen::Isometry3d> cam0_from_cam1_estimate =
+        estimate_cam0_from_cam1(kpts0, kpts1, matches, K);
+    if (cam0_from_cam1_estimate) {
+        Eigen::Isometry3d cam0_from_cam1 = world_from_cam0.inverse() * world_from_cam1;
+        cam0_from_cam1.translation() /= cam0_from_cam1.translation().norm();
+        EXPECT_TRUE(cam0_from_cam1_estimate->translation().isApprox(cam0_from_cam1.translation(),
+                                                                    0.000001));
+        EXPECT_TRUE(cam0_from_cam1_estimate->linear().isApprox(cam0_from_cam1.linear(), 0.001));
+
+        poses.emplace_back(world_from_cam0 * *cam0_from_cam1_estimate);
+        viz_scene(poses, p_W_cube);
+    }
 }
 }  // namespace robot::geometry
\ No newline at end of file
diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index 01d7e9a83..5d3fbf77c 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -153,3 +153,219 @@ cc_test(
     "//common:check"
   ]
 )
+
+cc_library(
+  name = "frontend_definitions",
+  hdrs = ["frontend_definitions.hh"],
+  visibility = ["//visibility:public"],
+  deps = [        
+    "@gtsam//:gtsam",    
+    ":structure_from_motion_types"
+  ]
+)
+
+cc_library(
+  name = "structure_from_motion_types",
+  hdrs = ["structure_from_motion_types.hh"],
+  visibility = ["//visibility:public"],
+  deps = [        
+    "@gtsam//:gtsam",    
+    "@opencv//:opencv"
+  ]
+)
+
+cc_library(
+  name = "frame",
+  hdrs = ["frame.hh"],
+  srcs = ["frame.cc"],
+  visibility = ["//visibility:public"],
+  deps = [             
+    "@opencv//:opencv",
+    ":frontend_definitions",
+    "@gtsam//:gtsam"
+  ]
+)
+
+cc_test(
+  name = "sfm_mvp",
+  srcs = ["sfm_mvp.cc"],
+    copts = [
+    "-Wno-error=unused-parameter",
+    "-Wno-error=unused-variable",
+  ],
+  deps = [
+    ":frontend",
+    "@com_google_googletest//:gtest_main",
+    "@gtsam//:gtsam",
+    "@opencv//:opencv",
+    "@eigen",
+    ":spatial_test_scene_cube",
+    ":symphony_lake_parser",
+    ":frame", 
+    ":structure_from_motion_types",
+    "//visualization/opencv:opencv_viz"
+  ]
+)
+
+
+cc_library(
+  name = "feature_set",
+  hdrs = ["feature_set.hh"],
+  visibility = ["//visibility:public"],
+  deps = [        
+    "@gtsam//:gtsam",
+    "@opencv//:opencv"
+  ]
+)
+
+cc_library(
+  name = "feature_manager",
+  hdrs = ["feature_manager.hh"],
+  visibility = ["//visibility:public"],
+  deps = [        
+    "@gtsam//:gtsam",
+    "@opencv//:opencv",
+    ":feature_set",
+  ]
+)
+
+cc_test(
+  name = "feature_manager_test",
+  srcs = ["feature_manager_test.cc"],
+  deps = [
+    ":feature_manager",
+    "@com_google_googletest//:gtest_main",
+    "@gtsam//:gtsam",
+    "@opencv//:opencv",
+    "@eigen",
+    ":spatial_test_scene_cube"
+  ]
+)
+
+
+cc_library(
+  name = "spatial_test_scene",
+  hdrs = ["spatial_test_scene.hh"],
+  visibility = ["//visibility:public"],
+  srcs = ["spatial_test_scene.cc"],
+  deps = [        
+    "@gtsam//:gtsam",
+    "@opencv//:opencv",    
+    "@eigen"
+  ]
+)
+
+cc_library(
+  name = "spatial_test_scene_cube",
+  hdrs = ["spatial_test_scene_cube.hh"],
+  visibility = ["//visibility:public"],
+  deps = [        
+    "@eigen",
+    ":spatial_test_scene"
+  ]
+)
+
+cc_binary(
+  name = "spatial_test_scene_cube_example",
+  srcs = ["spatial_test_scene_cube_example.cc"],
+  deps = [    
+    "@com_google_googletest//:gtest_main",    
+    ":spatial_test_scene_cube",
+    "//visualization/opencv:opencv_viz",
+    "@eigen"
+  ]
+)
+
+cc_library(
+  name = "frontend",
+  hdrs = ["frontend.hh"],
+  visibility = ["//visibility:public"],
+  srcs = ["frontend.cc"],
+  copts = [
+    "-Wno-error=unused-parameter",
+  ],
+  deps = [
+    "@opencv//:opencv",    
+    ":frame",
+    ":frontend_definitions",
+    ":structure_from_motion_types"
+  ]
+)
+
+
+cc_test(
+  name = "frontend_test",
+  srcs = ["frontend_test.cc"],
+  deps = [    
+    "@com_google_googletest//:gtest_main",    
+    ":frontend"
+  ]
+)
+
+cc_library(
+  name = "backend",
+  hdrs = ["backend.hh"],
+  visibility = ["//visibility:public"],
+  srcs = ["backend.cc"],
+  copts = [
+    "-Wno-error=unused-parameter",
+  ],
+  deps = [
+    "@opencv//:opencv",    
+    "@gtsam//:gtsam",
+    "//common/geometry:camera",
+    "@boost//:smart_ptr",
+    ":feature_manager"
+  ]
+)
+
+
+cc_test(
+  name = "backend_test",
+  srcs = ["backend_test.cc"],
+    copts = [
+    "-Wno-error=unused-parameter",
+  ],
+  deps = [    
+    "@com_google_googletest//:gtest_main",    
+    "@opencv//:opencv",
+    ":backend",
+    ":feature_manager",
+    ":spatial_test_scene_cube",
+    "//visualization/opencv:opencv_viz"
+  ]
+)
+
+cc_binary(
+  name = "get_colmap_groundtruth",
+  srcs = ["get_colmap_groundtruth.cc"],
+  copts = [
+    "-Wno-error=unused-parameter",
+  ],
+  deps = [
+    ":symphony_lake_parser"
+  ]
+)
+
+cc_binary(
+  name = "incremental_sfm_mvp",
+  srcs = ["incremental_sfm_mvp.cc"],
+  copts = [
+    "-Wno-error=unused-parameter",
+    "-Wno-error=unused-function",
+  ],
+  deps = [
+    ":four_seasons_parser",
+    "@eigen",
+    "@opencv",    
+    "@gtsam",
+    "//visualization/opencv:opencv_viz",
+    ":spatial_test_scene_cube",
+    ":frame", 
+    ":structure_from_motion_types",
+    ":frontend",
+    "@boost//:smart_ptr",    
+    "@cxxopts",
+    "//common/geometry:camera"
+  ]
+)
diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc
index 134c4914a..194122420 100644
--- a/experimental/learn_descriptors/frontend.cc
+++ b/experimental/learn_descriptors/frontend.cc
@@ -4,30 +4,27 @@
 
 namespace robot::experimental::learn_descriptors {
 
-Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matcher) {
-    extractor_type_ = frontend_algorithm;
-    matcher_type_ = frontend_matcher;
-
-    switch (extractor_type_) {
-        case ExtractorType::SIFT:
+Frontend::Frontend(FrontendParams params_) : params_(params_) {
+    switch (params_.extractor_type) {
+        case FrontendParams::ExtractorType::SIFT:
             feature_extractor_ = cv::SIFT::create();
             break;
-        case ExtractorType::ORB:
+        case FrontendParams::ExtractorType::ORB:
             feature_extractor_ = cv::ORB::create();
             break;
         default:
             // Error handling needed?
             break;
     }
-    switch (matcher_type_) {
-        case MatcherType::BRUTE_FORCE:
+    switch (params_.matcher_type) {
+        case FrontendParams::MatcherType::BRUTE_FORCE:
             descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2);
             break;
-        case MatcherType::KNN:
+        case FrontendParams::MatcherType::KNN:
             descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2);
             break;
-        case MatcherType::FLANN:
-            if (frontend_algorithm == ExtractorType::ORB) {
+        case FrontendParams::MatcherType::FLANN:
+            if (params_.extractor_type == FrontendParams::ExtractorType::ORB) {
                 throw std::invalid_argument("FLANN can not be used with ORB.");
             }
             descriptor_matcher_ = cv::FlannBasedMatcher::create();
@@ -38,10 +35,101 @@ Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matche
     }
 }
 
+void Frontend::populate_frames() {
+    for (const cv::Mat &img : images_) {
+    }
+}
+
+void Frontend::match_frames_and_build_tracks() {
+    if (params_.exhaustive) {
+        for (size_t i = 0; i < indices.size() - 1; i += 4) {
+            // std::cout << "i: " << i << std::endl;
+            for (size_t j = i + 1; j < indices.size(); j++) {
+                // std::cout << "j: " << j << std::endl;
+                std::vector<cv::DMatch> matches = frontend.compute_matches(
+                    frames[i].get_descriptors(), frames[j].get_descriptors());
+                // DIAL TO MESS WITH
+                frontend.enforce_bijective_buffer_matches(matches);
+
+                if (matches.size() < 5) {
+                    continue;
+                }
+
+                std::vector<cv::KeyPoint> cv_kpts_1;
+                std::vector<cv::KeyPoint> cv_kpts_2;
+                for (const KeypointCV &kpt : frames[i].get_keypoints()) {
+                    cv::KeyPoint cv_kpt;
+                    cv_kpt.pt = kpt;
+                    cv_kpts_1.push_back(cv_kpt);
+                }
+                for (const KeypointCV &kpt : frames[j].get_keypoints()) {
+                    cv::KeyPoint cv_kpt;
+                    cv_kpt.pt = kpt;
+                    cv_kpts_2.push_back(cv_kpt);
+                }
+                Eigen::Isometry3d world_from_camj(id_to_initial_world_from_cam.at(j).matrix());
+                // std::cout << "heartbeat" << std::endl;
+                std::optional<Eigen::Isometry3d> scale_cam0_from_cam1 =
+                    robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat);
+                if (!scale_cam0_from_cam1) {
+                    continue;
+                }
+                world_from_camj.linear() =
+                    (Eigen::Isometry3d(id_to_initial_world_from_cam.at(i).matrix()) *
+                     *scale_cam0_from_cam1)
+                        .linear();
+                // std::cout << "heartbeat 2" << std::endl;
+                id_to_initial_world_from_cam.at(j) = gtsam::Pose3(world_from_camj.matrix());
+                for (const cv::DMatch match : matches) {
+                    const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
+                    const KeypointCV kpt_cam1 = frames[j].get_keypoints()[match.trainIdx];
+
+                    auto key = std::make_pair(frames[i].id_, kpt_cam0);
+                    if (lmk_id_map.find(key) != lmk_id_map.end()) {
+                        auto id = lmk_id_map.at(key);
+                        feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0);
+                        feature_tracks.at(id).obs_.emplace_back(frames[j].id_, kpt_cam1);
+                    } else {
+                        FeatureTrack feature_track(i, kpt_cam0);
+                        feature_track.obs_.emplace_back(frames[j].id_, kpt_cam1);
+                        feature_tracks.emplace(lmk_id, feature_track);
+                        lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id);
+                        lmk_id++;
+                    }
+                }
+            }
+        }
+        std::cout << "done processing matches" << std::endl;
+    } else {  // successive only
+        for (size_t i = 0; i < indices.size() - 1; i++) {
+            std::vector<cv::DMatch> matches = frontend.compute_matches(
+                frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+            frontend.enforce_bijective_buffer_matches(matches);
+            for (const cv::DMatch match : matches) {
+                const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
+                const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
+
+                auto key = std::make_pair(frames[i].id_, kpt_cam0);
+                if (lmk_id_map.find(key) != lmk_id_map.end()) {
+                    auto id = lmk_id_map.at(key);
+                    feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0);
+                    feature_tracks.at(id).obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
+                } else {
+                    FeatureTrack feature_track(i, kpt_cam0);
+                    feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
+                    feature_tracks.emplace(lmk_id, feature_track);
+                    lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id);
+                    lmk_id++;
+                }
+            }
+        }
+    }
+}
+
 std::pair<std::vector<cv::KeyPoint>, cv::Mat> Frontend::extract_features(const cv::Mat &img) const {
     std::vector<cv::KeyPoint> keypoints;
     cv::Mat descriptors;
-    switch (extractor_type_) {
+    switch (params_.extractor_type) {
         default:  // the opencv extractors have the same function signature
             feature_extractor_->detectAndCompute(img, cv::noArray(), keypoints, descriptors);
             break;
@@ -49,18 +137,18 @@ std::pair<std::vector<cv::KeyPoint>, cv::Mat> Frontend::extract_features(const c
     return std::pair<std::vector<cv::KeyPoint>, cv::Mat>(keypoints, descriptors);
 }
 
-std::vector<cv::DMatch> Frontend::get_matches(const cv::Mat &descriptors1,
-                                              const cv::Mat &descriptors2) const {
+std::vector<cv::DMatch> Frontend::compute_matches(const cv::Mat &descriptors1,
+                                                  const cv::Mat &descriptors2) const {
     std::vector<cv::DMatch> matches;
-    switch (matcher_type_) {
-        case MatcherType::BRUTE_FORCE:
-            get_brute_matches(descriptors1, descriptors2, matches);
+    switch (params_.matcher_type) {
+        case FrontendParams::MatcherType::BRUTE_FORCE:
+            compute_brute_matches(descriptors1, descriptors2, matches);
             break;
-        case MatcherType::KNN:
-            get_KNN_matches(descriptors1, descriptors2, matches);
+        case FrontendParams::MatcherType::KNN:
+            compute_KNN_matches(descriptors1, descriptors2, matches);
             break;
-        case MatcherType::FLANN:
-            get_FLANN_matches(descriptors1, descriptors2, matches);
+        case FrontendParams::MatcherType::FLANN:
+            compute_FLANN_matches(descriptors1, descriptors2, matches);
             break;
         default:
             break;
@@ -152,9 +240,9 @@ void Frontend::enforce_bijective_buffer_matches(std::vector<cv::DMatch> &matches
     }
 }
 
-bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                                 std::vector<cv::DMatch> &matches_out) const {
-    if (matcher_type_ != MatcherType::BRUTE_FORCE) {
+bool Frontend::compute_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                                     std::vector<cv::DMatch> &matches_out) const {
+    if (params_.matcher_type != FrontendParams::MatcherType::BRUTE_FORCE) {
         return false;
     }
     matches_out.clear();
@@ -162,9 +250,9 @@ bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &des
     return true;
 }
 
-bool Frontend::get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                               std::vector<cv::DMatch> &matches_out) const {
-    if (matcher_type_ != MatcherType::KNN) {
+bool Frontend::compute_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                                   std::vector<cv::DMatch> &matches_out) const {
+    if (params_.matcher_type != FrontendParams::MatcherType::KNN) {
         return false;
     }
     std::vector<std::vector<cv::DMatch>> knn_matches;
@@ -180,9 +268,9 @@ bool Frontend::get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descr
     return true;
 }
 
-bool Frontend::get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                                 std::vector<cv::DMatch> &matches_out) const {
-    if (matcher_type_ != MatcherType::FLANN) {
+bool Frontend::compute_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                                     std::vector<cv::DMatch> &matches_out) const {
+    if (params_.matcher_type != FrontendParams::MatcherType::FLANN) {
         return false;
     }
     std::vector<std::vector<cv::DMatch>> knn_matches;
diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh
index 12332e4fb..85b112c66 100644
--- a/experimental/learn_descriptors/frontend.hh
+++ b/experimental/learn_descriptors/frontend.hh
@@ -3,24 +3,43 @@
 #include <utility>
 #include <vector>
 
+#include "experimental/learn_descriptors/frame.hh"
+#include "experimental/learn_descriptors/frontend_definitions.hh"
+#include "experimental/learn_descriptors/structure_from_motion_types.hh"
 #include "opencv2/opencv.hpp"
 
 namespace robot::experimental::learn_descriptors {
-class Frontend {
-   public:
+struct FrontendParams {
     enum class ExtractorType { SIFT, ORB };
     enum class MatcherType { BRUTE_FORCE, KNN, FLANN };
-
-    Frontend(){};
-    Frontend(ExtractorType frontend_extractor, MatcherType frontend_matcher);
+    ExtractorType extractor_type = ExtractorType::SIFT;
+    MatcherType matcher_type = MatcherType::KNN;
+    bool exhaustive = true;
+    bool incremental = false;  // a.k.a. images are successive
+};
+class Frontend {
+   public:
+    static constexpr char symbol_pose_char = 'x';
+    static constexpr char symbol_lmk_char = 'l';
+    explicit Frontend(FrontendParams params);
     ~Frontend(){};
 
-    const ExtractorType &get_extractor_type() const { return extractor_type_; };
-    const MatcherType &get_matcher_type() const { return matcher_type_; };
+    void populate_frames();
+    void match_frames_and_build_tracks();
+
+    void add_image(const cv::Mat &img) { images_.push_back(img); };
+    void add_images(const std::vector<cv::Mat> &imgs) {
+        for (const cv::Mat &img : imgs) {
+            images_.push_back(img);
+        }
+    };
+
+    const FrontendParams::ExtractorType &extractor_type() const { return params_.extractor_type; };
+    const FrontendParams::MatcherType &matcher_type() const { return params_.matcher_type; };
 
     std::pair<std::vector<cv::KeyPoint>, cv::Mat> extract_features(const cv::Mat &img) const;
-    std::vector<cv::DMatch> get_matches(const cv::Mat &descriptors1,
-                                        const cv::Mat &descriptors2) const;
+    std::vector<cv::DMatch> compute_matches(const cv::Mat &descriptors1,
+                                            const cv::Mat &descriptors2) const;
     static void threshold_matches(std::vector<cv::DMatch> &matches, float dist_threshhold);
     static void enforce_bijective_matches(std::vector<cv::DMatch> &matches);
     static void enforce_bijective_buffer_matches(std::vector<cv::DMatch> &matches);
@@ -36,16 +55,20 @@ class Frontend {
     }
 
    private:
-    bool get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                           std::vector<cv::DMatch> &matches_out) const;
-    bool get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                         std::vector<cv::DMatch> &matches_out) const;
-    bool get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                           std::vector<cv::DMatch> &matches_out) const;
-    ExtractorType extractor_type_;
-    MatcherType matcher_type_;
+    bool compute_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                               std::vector<cv::DMatch> &matches_out) const;
+    bool compute_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                             std::vector<cv::DMatch> &matches_out) const;
+    bool compute_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                               std::vector<cv::DMatch> &matches_out) const;
+    FrontendParams params_;
 
     cv::Ptr<cv::Feature2D> feature_extractor_;
     cv::Ptr<cv::DescriptorMatcher> descriptor_matcher_;
+
+    std::vector<cv::Mat> images_;
+    FeatureTracks feature_tracks_;
+    FrameLandmarkIdMap lmk_id_map_;
+    std::vector<Frame> frames_;
 };
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frontend_definitions.hh b/experimental/learn_descriptors/frontend_definitions.hh
index fac8b67b3..2965d6c97 100644
--- a/experimental/learn_descriptors/frontend_definitions.hh
+++ b/experimental/learn_descriptors/frontend_definitions.hh
@@ -38,6 +38,4 @@ class FeatureTrack {
 
 using FeatureTracks = std::unordered_map<LandmarkId, FeatureTrack>;
 using FrameLandmarkIdMap = std::unordered_map<std::pair<FrameId, KeypointCV>, LandmarkId>;
-using LandmarkFrameIdMap = std::unordered_map<LandmarkId, std::pair<FrameId, KeypointCV>>;
-
 }  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/frontend_test.cc b/experimental/learn_descriptors/frontend_test.cc
index 25e60a690..28e7dc0c8 100644
--- a/experimental/learn_descriptors/frontend_test.cc
+++ b/experimental/learn_descriptors/frontend_test.cc
@@ -77,8 +77,8 @@ TEST(FrontendTest, pipeline_sweep) {
             }
             keypoints_descriptors_pair_1 = frontend.extract_features(image_1);
             keypoints_descriptors_pair_2 = frontend.extract_features(image_2);
-            matches = frontend.get_matches(keypoints_descriptors_pair_1.second,
-                                           keypoints_descriptors_pair_2.second);
+            matches = frontend.compute_matches(keypoints_descriptors_pair_1.second,
+                                               keypoints_descriptors_pair_2.second);
             frontend.draw_keypoints(image_1, keypoints_descriptors_pair_1.first,
                                     img_keypoints_out_1);
             frontend.draw_keypoints(image_2, keypoints_descriptors_pair_2.first,
diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc
index 214cd3105..ff7df085c 100644
--- a/experimental/learn_descriptors/incremental_sfm_mvp.cc
+++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc
@@ -1,6 +1,7 @@
 #include <cmath>
 #include <functional>
 #include <opencv2/calib3d.hpp>
+#include <optional>
 #include <sstream>
 #include <vector>
 
@@ -277,7 +278,18 @@ int main(int argc, const char **argv) {
     std::cout << "incremental_sfm_mvp!" << std::endl;
     // const std::vector<int> indices{581, 609,
     //                                633};  // indices with all data fields on neighborhood_5_train
-    const std::vector<int> indices{581, 593, 609, 621, 633, 636, 639, 642};  // indices with
+    // const std::vector<int> indices{581, 593, 609, 621, 633, 636, 639, 642};  // indices with
+    // get indices with fully populated img_pt containing full gps data and reference
+    const std::vector<int> indices = [&parser]() -> std::vector<int> {
+        std::vector<int> tmp;
+        for (size_t i = 0; i < parser.num_images(); i++) {
+            const ImagePoint img_pt = parser.get_image_point(i);
+            if (img_pt.AS_w_from_gnss_cam && img_pt.gps_gcs && img_pt.gps_gcs &&
+                img_pt.gps_gcs->uncertainty && img_pt.gps_gcs->altitude)
+                tmp.push_back(i);
+        }
+        return tmp;
+    }();
     constexpr bool visualize = false;
     // all data fields on neighborhood_5_train const std::vector<int> indices = []() {
     //     std::vector<int> tmp;
@@ -300,7 +312,9 @@ int main(int argc, const char **argv) {
 
     // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
     //                         gtsam::Pose3(T_world_camera0.matrix()));
-    Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN);
+    FrontendParams params{FrontendParams::ExtractorType::SIFT, FrontendParams::MatcherType::KNN,
+                          true, false};
+    Frontend frontend(params);
 
     gtsam::Values initial_estimate_;
     gtsam::NonlinearFactorGraph graph_;
@@ -349,7 +363,7 @@ int main(int argc, const char **argv) {
             world_from_cam = Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference *
                                                 img_pt.AS_w_from_gnss_cam->matrix())
                                                    .matrix());
-            std::cout << "ok" << std::endl;
+            // std::cout << "ok" << std::endl;
             // world_from_cam.translation() =
             //     ((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix() *
             //      p_gps_in_ECEF_hom)
@@ -417,15 +431,19 @@ int main(int argc, const char **argv) {
     LandmarkId lmk_id = 0;
     constexpr bool exhaustive = true;
     if (exhaustive) {
-        for (size_t i = 0; i < indices.size() - 1; i++) {
-            std::cout << "i: " << i << std::endl;
+        for (size_t i = 0; i < indices.size() - 1; i += 4) {
+            // std::cout << "i: " << i << std::endl;
             for (size_t j = i + 1; j < indices.size(); j++) {
-                std::cout << "j: " << j << std::endl;
-                std::vector<cv::DMatch> matches =
-                    frontend.get_matches(frames[i].get_descriptors(), frames[j].get_descriptors());
+                // std::cout << "j: " << j << std::endl;
+                std::vector<cv::DMatch> matches = frontend.compute_matches(
+                    frames[i].get_descriptors(), frames[j].get_descriptors());
                 // DIAL TO MESS WITH
                 frontend.enforce_bijective_buffer_matches(matches);
 
+                if (matches.size() < 5) {
+                    continue;
+                }
+
                 std::vector<cv::KeyPoint> cv_kpts_1;
                 std::vector<cv::KeyPoint> cv_kpts_2;
                 for (const KeypointCV &kpt : frames[i].get_keypoints()) {
@@ -439,10 +457,17 @@ int main(int argc, const char **argv) {
                     cv_kpts_2.push_back(cv_kpt);
                 }
                 Eigen::Isometry3d world_from_camj(id_to_initial_world_from_cam.at(j).matrix());
+                // std::cout << "heartbeat" << std::endl;
+                std::optional<Eigen::Isometry3d> scale_cam0_from_cam1 =
+                    robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat);
+                if (!scale_cam0_from_cam1) {
+                    continue;
+                }
                 world_from_camj.linear() =
                     (Eigen::Isometry3d(id_to_initial_world_from_cam.at(i).matrix()) *
-                     robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat))
+                     *scale_cam0_from_cam1)
                         .linear();
+                // std::cout << "heartbeat 2" << std::endl;
                 id_to_initial_world_from_cam.at(j) = gtsam::Pose3(world_from_camj.matrix());
                 for (const cv::DMatch match : matches) {
                     const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
@@ -466,8 +491,8 @@ int main(int argc, const char **argv) {
         std::cout << "done processing matches" << std::endl;
     } else {  // successive only
         for (size_t i = 0; i < indices.size() - 1; i++) {
-            std::vector<cv::DMatch> matches =
-                frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+            std::vector<cv::DMatch> matches = frontend.compute_matches(
+                frames[i].get_descriptors(), frames[i + 1].get_descriptors());
             frontend.enforce_bijective_buffer_matches(matches);
             for (const cv::DMatch match : matches) {
                 const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
@@ -494,12 +519,12 @@ int main(int argc, const char **argv) {
                                              std::to_string(id));
     }
 
-    std::cout << "pre heartbeat" << std::endl;
+    // std::cout << "pre heartbeat" << std::endl;
     if (visualize)
         robot::geometry::viz_scene(viz_world_from_cam_init,
                                    std::vector<robot::geometry::VizPoint>(),
                                    cv::viz::Color::brown(), true, true, "world_from_cam_estimates");
-    std::cout << "heartbeat" << std::endl;
+    // std::cout << "heartbeat" << std::endl;
 
     // TRIANGULATE all of the points (for initial guess)
     std::unordered_map<LandmarkId, gtsam::Point3> lmk_triangulated_map;  // points are kpt_in_world
@@ -636,8 +661,8 @@ int main(int argc, const char **argv) {
             local_estimate_.insert_or_assign(symbols_poses[1],
                                              id_to_initial_world_from_cam.at(i + 1));
 
-            std::vector<cv::DMatch> matches =
-                frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+            std::vector<cv::DMatch> matches = frontend.compute_matches(
+                frames[i].get_descriptors(), frames[i + 1].get_descriptors());
             // DIAL TO MESS WITH
             frontend.enforce_bijective_matches(matches);
             std::vector<gtsam::Pose3> world_from_cams{id_to_initial_world_from_cam.at(i),
@@ -743,12 +768,6 @@ int main(int argc, const char **argv) {
     const gtsam::Values result =
         detail_sfm::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks, 0);
 
-    if (visualize) {
-        std::cout << "about to visualize result" << std::endl;
-        result.print();
-        detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks);
-    }
-
     // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference
     double sum_traj_error = 0;
     double sum_rot_error = 0;
@@ -763,4 +782,9 @@ int main(int argc, const char **argv) {
     double rmse_ate = std::sqrt(sum_traj_error / symbols_pose.size());
     double rmse_rot = std::sqrt(sum_rot_error / symbols_pose.size());
     std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl;
+
+    std::cout << "about to visualize result" << std::endl;
+    result.print();
+    detail_sfm::graph_values(result, "Result", symbols_pose, std::vector<gtsam::Symbol>());
+    // detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks);
 }
\ No newline at end of file
diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc
index 9d41270d7..b4f98a01c 100644
--- a/experimental/learn_descriptors/sfm_mvp.cc
+++ b/experimental/learn_descriptors/sfm_mvp.cc
@@ -259,7 +259,7 @@ TEST(SFMMvp, sfm_building_manual_global) {
     LandmarkId lmk_id = 0;
     for (size_t i = 0; i < indices.size() - 1; i++) {
         std::vector<cv::DMatch> matches =
-            frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+            frontend.compute_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
         // frontend.enforce_bijective_matches(matches);
         frontend.enforce_bijective_buffer_matches(matches);
         for (const cv::DMatch match : matches) {
@@ -453,7 +453,7 @@ TEST(SFMMvp, sfm_building_manual_incremental) {
     LandmarkId lmk_id = 0;
     for (size_t i = 0; i < indices.size() - 1; i++) {
         std::vector<cv::DMatch> matches =
-            frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+            frontend.compute_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
         frontend.enforce_bijective_buffer_matches(matches);
         for (const cv::DMatch match : matches) {
             const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
@@ -582,7 +582,7 @@ TEST(SFMMvp, sfm_building_manual_incremental) {
         local_estimate_.insert_or_assign(poses[1], cam_pose.at(i + 1));
 
         std::vector<cv::DMatch> matches =
-            frontend.get_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+            frontend.compute_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
         frontend.enforce_bijective_matches(matches);
         std::vector<gtsam::Pose3> feat_cam_poses{cam_pose.at(i), cam_pose.at(i + 1)};
 
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index df84bb7cf..9ed6f6e5f 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -68,8 +68,8 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
     std::cout << "current index " << idx_img_current << std::endl;
     if (idx_img_current > 0) {
         std::vector<cv::DMatch> matches =
-            get_matches(img_keypoints_and_descriptors_.back().second,
-                        keypoints_and_descriptors.second, Frontend::enforce_bijective_matches);
+            compute_matches(img_keypoints_and_descriptors_.back().second,
+                            keypoints_and_descriptors.second, Frontend::enforce_bijective_matches);
 
         const gtsam::Symbol sym_T_w_c0(Backend::pose_symbol_char, idx_img_current - 1);
         const gtsam::Symbol sym_T_w_c1(Backend::pose_symbol_char, idx_img_current);
@@ -125,10 +125,10 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
     img_keypoints_and_descriptors_.push_back(keypoints_and_descriptors);
 }
 
-std::vector<cv::DMatch> StructureFromMotion::get_matches(
+std::vector<cv::DMatch> StructureFromMotion::compute_matches(
     const cv::Mat &descriptors_1, const cv::Mat &descriptors_2,
     std::optional<StructureFromMotion::match_function> post_process_func) {
-    std::vector<cv::DMatch> matches = frontend_.get_matches(descriptors_1, descriptors_2);
+    std::vector<cv::DMatch> matches = frontend_.compute_matches(descriptors_1, descriptors_2);
     if (post_process_func) {
         (*post_process_func)(matches);
     }
diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh
index a4e3850d2..90b6bcfbd 100644
--- a/experimental/learn_descriptors/structure_from_motion.hh
+++ b/experimental/learn_descriptors/structure_from_motion.hh
@@ -40,7 +40,7 @@ class StructureFromMotion {
         std::optional<Backend::graph_step_debug_func> inter_debug_func = std::nullopt);
     const gtsam::Values &get_structure_result() { return backend_.get_result(); };
     using match_function = std::function<void(std::vector<cv::DMatch> &)>;
-    std::vector<cv::DMatch> get_matches(
+    std::vector<cv::DMatch> compute_matches(
         const cv::Mat &descriptors_1, const cv::Mat &descriptors_2,
         std::optional<match_function> post_process_func = std::nullopt);
     void graph_values(const gtsam::Values &values, const std::string &window_name = "graph values");
@@ -49,7 +49,7 @@ class StructureFromMotion {
     Backend get_backend() { return backend_; }
     size_t get_num_images_added() { return img_keypoints_and_descriptors_.size(); };
     size_t get_landmark_count() { return landmark_count_; };
-    const std::vector<std::vector<cv::DMatch>> get_matches() { return matches_; };
+    const std::vector<std::vector<cv::DMatch>> compute_matches() { return matches_; };
 
    private:
     std::shared_ptr<FeatureManager> feature_manager_;

From 99c99f869ec5c331c0c84f41924867510b75a888 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Thu, 10 Jul 2025 17:31:18 -0400
Subject: [PATCH 30/45] WIP

---
 common/gps/BUILD                              |  23 +
 common/gps/frame_translation.cc               |  32 +
 common/gps/frame_translation.hh               |   8 +
 common/gps/frame_translation_test.cc          |  29 +
 common/gps/geographiclib_test.cc              |   4 +-
 common/gps/nmea_test.cc                       |   4 +-
 experimental/learn_descriptors/BUILD          | 627 ++++++++++--------
 .../learn_descriptors/camera_calibration.hh   |  19 +
 .../learn_descriptors/four_seasons_parser.cc  | 323 +--------
 .../learn_descriptors/four_seasons_parser.hh  | 128 +---
 .../four_seasons_parser_detail.cc             | 223 +++++++
 .../four_seasons_parser_detail.hh             |  86 +++
 .../four_seasons_parser_example.cc            |   4 +-
 .../four_seasons_parser_example_viz.cc        |   7 +-
 .../four_seasons_parser_test.cc               | 104 ++-
 .../four_seasons_transforms.cc                |  67 ++
 .../four_seasons_transforms.hh                |  29 +
 experimental/learn_descriptors/frame.hh       |  15 +-
 experimental/learn_descriptors/frontend.cc    | 328 +++++++--
 experimental/learn_descriptors/frontend.hh    |  18 +-
 .../learn_descriptors/frontend_definitions.hh |   2 +-
 experimental/learn_descriptors/gps_data.cc    |   0
 experimental/learn_descriptors/gps_data.hh    |  65 ++
 experimental/learn_descriptors/image_point.hh | 129 +---
 .../image_point_four_seasons.cc               |  49 ++
 .../image_point_four_seasons.hh               |  90 +++
 .../learn_descriptors/incremental_sfm_mvp.cc  |  10 +-
 .../learn_descriptors/refactor_sfm_mvp.cc     | 333 ++++++++++
 experimental/learn_descriptors/sfm_mvp.cc     |   2 +-
 .../spatial_test_scene_cube_example.cc        |   3 +-
 .../structure_from_motion_types.hh            |  10 +-
 31 files changed, 1851 insertions(+), 920 deletions(-)
 create mode 100644 common/gps/frame_translation.cc
 create mode 100644 common/gps/frame_translation.hh
 create mode 100644 common/gps/frame_translation_test.cc
 create mode 100644 experimental/learn_descriptors/camera_calibration.hh
 create mode 100644 experimental/learn_descriptors/four_seasons_parser_detail.cc
 create mode 100644 experimental/learn_descriptors/four_seasons_parser_detail.hh
 create mode 100644 experimental/learn_descriptors/four_seasons_transforms.cc
 create mode 100644 experimental/learn_descriptors/four_seasons_transforms.hh
 create mode 100644 experimental/learn_descriptors/gps_data.cc
 create mode 100644 experimental/learn_descriptors/gps_data.hh
 create mode 100644 experimental/learn_descriptors/image_point_four_seasons.cc
 create mode 100644 experimental/learn_descriptors/image_point_four_seasons.hh
 create mode 100644 experimental/learn_descriptors/refactor_sfm_mvp.cc

diff --git a/common/gps/BUILD b/common/gps/BUILD
index ab02194e9..934bc7bec 100644
--- a/common/gps/BUILD
+++ b/common/gps/BUILD
@@ -16,4 +16,27 @@ cc_test(
     "@com_google_googletest//:gtest_main",
     "@geographiclib//:geographiclib",
   ]
+)
+
+cc_library(
+  name = "frame_translation",
+  srcs = ["frame_translation.cc"],
+  hdrs = ["frame_translation.hh"],
+  visibility = ["//visibility:public"],
+  deps = [
+    "@com_google_googletest//:gtest_main",
+    "@geographiclib//:geographiclib",
+    "@eigen"
+  ]
+)
+
+cc_test(
+  name = "frame_translation_test",
+  srcs = ["frame_translation_test.cc"],
+  deps = [
+    "@com_google_googletest//:gtest_main",
+    ":frame_translation",
+    "@eigen",
+    "//common:check"
+  ]
 )
\ No newline at end of file
diff --git a/common/gps/frame_translation.cc b/common/gps/frame_translation.cc
new file mode 100644
index 000000000..28c0875ea
--- /dev/null
+++ b/common/gps/frame_translation.cc
@@ -0,0 +1,32 @@
+#include "common/gps/frame_translation.hh"
+
+#include "GeographicLib/Constants.hpp"
+#include "GeographicLib/Geocentric.hpp"
+#include "GeographicLib/Geodesic.hpp"
+#include "GeographicLib/LocalCartesian.hpp"
+
+namespace robot::gps {
+Eigen::Vector3d lla_from_ecef(const Eigen::Vector3d& t_place_from_ECEF) {
+    static const GeographicLib::Geocentric earth = GeographicLib::Geocentric::WGS84();
+
+    double x = t_place_from_ECEF.x(), y = t_place_from_ECEF.y(), z = t_place_from_ECEF.z();
+
+    double lat_deg, lon_deg, alt_m;
+    earth.Reverse(x, y, z, lat_deg, lon_deg, alt_m);
+
+    return Eigen::Vector3d(lat_deg, lon_deg, alt_m);
+}
+
+Eigen::Vector3d ecef_from_lla(const Eigen::Vector3d& gcs_coordinate) {
+    double lat_deg = gcs_coordinate.x();
+    double lon_deg = gcs_coordinate.y();
+    double alt_m = gcs_coordinate.z();
+
+    double x, y, z;
+
+    static const GeographicLib::Geocentric earth = GeographicLib::Geocentric::WGS84();
+    earth.Forward(lat_deg, lon_deg, alt_m, x, y, z);
+
+    return Eigen::Vector3d(x, y, z);
+}
+}  // namespace robot::gps
\ No newline at end of file
diff --git a/common/gps/frame_translation.hh b/common/gps/frame_translation.hh
new file mode 100644
index 000000000..60d03c8e9
--- /dev/null
+++ b/common/gps/frame_translation.hh
@@ -0,0 +1,8 @@
+#pragma once
+
+#include "Eigen/Core"
+
+namespace robot::gps {
+Eigen::Vector3d lla_from_ecef(const Eigen::Vector3d& t_place_from_ECEF);
+Eigen::Vector3d ecef_from_lla(const Eigen::Vector3d& gcs_coordinate);
+}  // namespace robot::gps
diff --git a/common/gps/frame_translation_test.cc b/common/gps/frame_translation_test.cc
new file mode 100644
index 000000000..7e6afa47c
--- /dev/null
+++ b/common/gps/frame_translation_test.cc
@@ -0,0 +1,29 @@
+#include "common/gps/frame_translation.hh"
+
+#include "Eigen/Core"
+#include "common/check.hh"
+#include "gtest/gtest.h"
+
+namespace robot::gps {
+TEST(frame_translation_test, lla_from_ecef) {
+    const Eigen::Vector3d lla_easthampton_ma(42.2608, -72.6634, 51.82);  //
+    const Eigen::Vector3d ecef_easthampton_ma = ecef_from_lla(lla_easthampton_ma);
+
+    const Eigen::Vector3d ecef_expected_easthampton_ma(1408753.87, -4512832.60, 4267122.34);
+
+    constexpr double tol = 1e-9;
+    ROBOT_CHECK(ecef_easthampton_ma.isApprox(ecef_expected_easthampton_ma, tol),
+                ecef_easthampton_ma, ecef_expected_easthampton_ma);
+}
+
+TEST(frame_translation_test, ecef_from_lla) {
+    const Eigen::Vector3d ecef_palace_versailles(4205987.87, 155694.68, 4776399.15);
+    const Eigen::Vector3d lla_palace_versailles = lla_from_ecef(ecef_palace_versailles);
+
+    const Eigen::Vector3d expected_lla_palace_versailles(48.8049440, 2.1199720, 131.985);
+
+    constexpr double tol = 1e-4;
+    ROBOT_CHECK(lla_palace_versailles.isApprox(expected_lla_palace_versailles, tol),
+                lla_palace_versailles, expected_lla_palace_versailles);
+}
+}  // namespace robot::gps
\ No newline at end of file
diff --git a/common/gps/geographiclib_test.cc b/common/gps/geographiclib_test.cc
index 5ef8a1fb3..19bc98a2b 100644
--- a/common/gps/geographiclib_test.cc
+++ b/common/gps/geographiclib_test.cc
@@ -8,7 +8,7 @@
 #include "GeographicLib/LocalCartesian.hpp"
 #include "gtest/gtest.h"
 
-namespace robot::experimental::learn_descriptors {
+namespace robot::gps {
 TEST(GeographiclibTest, airport_dist) {
     const GeographicLib::Geodesic& geod = GeographicLib::Geodesic::WGS84();
     // Distance from JFK to LHR
@@ -58,4 +58,4 @@ TEST(GeographiclibTest, local_cartestian) {
         EXPECT_NEAR(calais_height_meters, 264.915, 1e-3);
     }
 }
-}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
+}  // namespace robot::gps
\ No newline at end of file
diff --git a/common/gps/nmea_test.cc b/common/gps/nmea_test.cc
index c1be0aca4..25b38b09f 100644
--- a/common/gps/nmea_test.cc
+++ b/common/gps/nmea_test.cc
@@ -2,7 +2,7 @@
 #include "nmea/message/gga.hpp"
 #include "nmea/sentence.hpp"
 
-namespace robot::experimental::learn_descriptors {
+namespace robot::gps {
 TEST(NmeaTest, parse) {
     // Read an NMEA string from your serial port
     std::string nmea_string =
@@ -44,4 +44,4 @@ TEST(NmeaTest, generate) {
     EXPECT_EQ(nmea_sentence.get_field(1), "abc");
     EXPECT_EQ(nmea_sentence.get_field(2), "");
 }
-}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
+}  // namespace robot::gps
\ No newline at end of file
diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index 5d3fbf77c..facd64bdf 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -1,371 +1,440 @@
-package(features=["warning_compile_flags"])
+package(features = ["warning_compile_flags"])
 
 cc_library(
-  name = "learn_descriptors",
-  hdrs = ["learn_descriptors.hh"],
-  visibility = ["//visibility:public"],
-  srcs = ["learn_descriptors.cc"],
+    name = "learn_descriptors",
+    srcs = ["learn_descriptors.cc"],
+    hdrs = ["learn_descriptors.hh"],
+    visibility = ["//visibility:public"],
 )
 
 cc_test(
-  name = "learn_descriptors_test",
-  srcs = ["learn_descriptors_test.cc"],
-  deps = [
-    "@com_google_googletest//:gtest_main",
-    ":learn_descriptors",
-  ]
+    name = "learn_descriptors_test",
+    srcs = ["learn_descriptors_test.cc"],
+    deps = [
+        ":learn_descriptors",
+        "@com_google_googletest//:gtest_main",
+    ],
 )
 
 cc_library(
-  name = "structure_from_motion",
-  hdrs = ["structure_from_motion.hh"],
-  visibility = ["//visibility:public"],
-  srcs = ["structure_from_motion.cc"],
-  copts = [
-    "-Wno-error=unused-parameter",
-  ],
-  deps = [
-    "@opencv//:opencv",
-    "@gtsam//:gtsam",
-    "@eigen",
-    "//visualization/opencv:opencv_viz",
-    ":feature_manager",
-    ":frontend",
-    ":backend"
-  ]
+    name = "structure_from_motion",
+    srcs = ["structure_from_motion.cc"],
+    hdrs = ["structure_from_motion.hh"],
+    copts = [
+        "-Wno-error=unused-parameter",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":backend",
+        ":feature_manager",
+        ":frontend",
+        "//visualization/opencv:opencv_viz",
+        "@eigen",
+        "@gtsam",
+        "@opencv",
+    ],
 )
 
 cc_test(
-  name = "structure_from_motion_test",
-  srcs = ["structure_from_motion_test.cc"],  
-  copts = [
-    "-Wno-error=unused-parameter",
-  ],
-  deps = [
-    "@com_google_googletest//:gtest_main",
-    ":symphony_lake_parser",
-    ":structure_from_motion",
-    "//visualization/opencv:opencv_viz",
-    "//common/geometry:translate_types",
-    ":frame"
-  ]
+    name = "structure_from_motion_test",
+    srcs = ["structure_from_motion_test.cc"],
+    copts = [
+        "-Wno-error=unused-parameter",
+    ],
+    deps = [
+        ":frame",
+        ":structure_from_motion",
+        ":symphony_lake_parser",
+        "//common/geometry:translate_types",
+        "//visualization/opencv:opencv_viz",
+        "@com_google_googletest//:gtest_main",
+    ],
 )
 
 cc_library(
-  name = "symphony_lake_parser",
-  hdrs = ["symphony_lake_parser.hh"],
-  copts = [
-    "-Wno-unused-parameter",
-  ],
-  data = ["@symphony_lake_snippet//:files"],
-  visibility = ["//visibility:public"],
-  srcs = ["symphony_lake_parser.cc"],
-  deps = [
-    "@symphony_lake_parser",
-    "//common:check",
-    "@eigen"
-  ]
+    name = "symphony_lake_parser",
+    srcs = ["symphony_lake_parser.cc"],
+    hdrs = ["symphony_lake_parser.hh"],
+    copts = [
+        "-Wno-unused-parameter",
+    ],
+    data = ["@symphony_lake_snippet//:files"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//common:check",
+        "@eigen",
+        "@symphony_lake_parser",
+    ],
 )
 
 cc_test(
-  name = "symphony_lake_parser_test",
-  srcs = ["symphony_lake_parser_test.cc"],
-  copts = ["-Wno-unused-parameter"],
-  data = ["@symphony_lake_snippet//:files"],
-  deps = [
-    "@com_google_googletest//:gtest_main",
-    ":symphony_lake_parser",
-    "//visualization/opencv:opencv_viz"
-  ]
+    name = "symphony_lake_parser_test",
+    srcs = ["symphony_lake_parser_test.cc"],
+    copts = ["-Wno-unused-parameter"],
+    data = ["@symphony_lake_snippet//:files"],
+    deps = [
+        ":symphony_lake_parser",
+        "//visualization/opencv:opencv_viz",
+        "@com_google_googletest//:gtest_main",
+    ],
 )
 
 cc_test(
-  name = "gtsam_test",
-  srcs = ["gtsam_test.cc"],
-  deps = [
-    "@com_google_googletest//:gtest_main",
-    "@gtsam//:gtsam",
-  ]
+    name = "gtsam_test",
+    srcs = ["gtsam_test.cc"],
+    deps = [
+        "@com_google_googletest//:gtest_main",
+        "@gtsam",
+    ],
 )
 
 cc_library(
-    name = "image_point", 
+    name = "image_point",
     hdrs = ["image_point.hh"],
+    visibility = ["//visibility:public"],
     deps = [
-      "//common/liegroups:se3",
-      "@opencv//:opencv",
-      "@eigen"
+        "//common/liegroups:se3",
+        "@eigen",
+    ],
+)
+
+cc_library(
+    name = "four_seasons_transforms",
+    hdrs = ["four_seasons_transforms.hh"],
+    srcs = ["four_seasons_transforms.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":four_seasons_parser_detail",
+        "//common/liegroups:se3",
+        "//common:check",
+        "@eigen",
+    ],
+)
+
+cc_library(
+    name = "image_point_four_seasons",
+    hdrs = ["image_point_four_seasons.hh"],
+    srcs = ["image_point_four_seasons.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":image_point",
+        ":gps_data",
+        ":four_seasons_transforms",
+        "//common/liegroups:se3",
+        "//common/gps:frame_translation",
+        "@eigen",
+        "@opencv",
+        "@geographiclib",
+    ]
+)
+
+cc_library(
+    name = "gps_data",
+    hdrs = ["gps_data.hh"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "@eigen",
     ]
 )
 
 cc_library(
-    name = "four_seasons_parser", 
+    name = "four_seasons_parser",
     hdrs = ["four_seasons_parser.hh"],
     srcs = ["four_seasons_parser.cc"],
+    visibility = ["//visibility:public"],
     deps = [
-      "@eigen",
-      "@opencv//:opencv",
-      "//common/liegroups:se3",
-      "//common/time:robot_time",
-      "@com_google_absl//absl/strings:str_format",
-      ":image_point",
-      "@nmea",
-      "@geographiclib",
-      "//common:check"
-    ]
+        ":image_point_four_seasons",
+        ":four_seasons_transforms",
+        ":four_seasons_parser_detail",
+        "//common:check",
+        "//common/liegroups:se3",
+        "@eigen",
+        "@opencv",
+        ":camera_calibration"
+    ],
+)
+
+cc_library(
+    name = "four_seasons_parser_detail",
+    hdrs = ["four_seasons_parser_detail.hh"],
+    srcs = ["four_seasons_parser_detail.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":camera_calibration",
+        ":gps_data",
+        "//common/liegroups:se3",
+        "@com_google_absl//absl/strings:str_format",
+        "@geographiclib",
+        "@nmea",
+    ],
 )
 
 cc_binary(
-  name = "four_seasons_parser_example",
-  srcs = ["four_seasons_parser_example.cc"],
-  deps = [
-    ":four_seasons_parser",
-    "@cxxopts//:cxxopts",
-    "//common:check",
-    "@geographiclib"
-  ]
+    name = "four_seasons_parser_example",
+    srcs = ["four_seasons_parser_example.cc"],
+    deps = [
+        ":four_seasons_parser",
+        ":gps_data",
+        "//common:check",
+        "@cxxopts",
+    ],
 )
 
 cc_binary(
-  name = "four_seasons_parser_example_viz",
-  srcs = ["four_seasons_parser_example_viz.cc"],
-  deps = [
-    ":four_seasons_parser",
-    "@cxxopts//:cxxopts",
-    "//common:check",
-    "//visualization/opencv:opencv_viz",
-    "@eigen",    
-  ]
+    name = "four_seasons_parser_example_viz",
+    srcs = ["four_seasons_parser_example_viz.cc"],
+    deps = [
+        ":four_seasons_parser",
+        "//common:check",
+        "//common/gps:frame_translation",
+        "//visualization/opencv:opencv_viz",
+        "@cxxopts",
+        "@eigen",
+    ],
 )
 
 cc_test(
-  name = "four_seasons_parser_test",
-  srcs = ["four_seasons_parser_test.cc"],
-  data = ["@four_seasons_snippet//:files"],
-  deps = [
-    "@com_google_googletest//:gtest_main",
-    "//common/liegroups:se3",
-    "@com_google_absl//absl/strings:str_format",
-    "@eigen",
-    ":four_seasons_parser",
-    "@nmea",
-    "@geographiclib",
-    "//common:check"
-  ]
+    name = "four_seasons_parser_test",
+    srcs = ["four_seasons_parser_test.cc"],
+    data = ["@four_seasons_snippet//:files"],
+    deps = [
+        ":four_seasons_parser",
+        ":four_seasons_parser_detail",
+        ":image_point_four_seasons",
+        "//common:check",
+        "//common/gps:frame_translation",
+        "//common/liegroups:se3",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_googletest//:gtest_main",
+        "@eigen",
+        "@geographiclib",
+        "@nmea",
+        ":camera_calibration"
+    ],
 )
 
 cc_library(
-  name = "frontend_definitions",
-  hdrs = ["frontend_definitions.hh"],
-  visibility = ["//visibility:public"],
-  deps = [        
-    "@gtsam//:gtsam",    
-    ":structure_from_motion_types"
-  ]
+    name = "frontend_definitions",
+    hdrs = ["frontend_definitions.hh"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":structure_from_motion_types",
+        "@gtsam",
+    ],
 )
 
 cc_library(
-  name = "structure_from_motion_types",
-  hdrs = ["structure_from_motion_types.hh"],
-  visibility = ["//visibility:public"],
-  deps = [        
-    "@gtsam//:gtsam",    
-    "@opencv//:opencv"
-  ]
+    name = "structure_from_motion_types",
+    hdrs = ["structure_from_motion_types.hh"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "@gtsam",
+        "@opencv",
+    ],
 )
 
 cc_library(
-  name = "frame",
-  hdrs = ["frame.hh"],
-  srcs = ["frame.cc"],
-  visibility = ["//visibility:public"],
-  deps = [             
-    "@opencv//:opencv",
-    ":frontend_definitions",
-    "@gtsam//:gtsam"
-  ]
+    name = "frame",
+    srcs = ["frame.cc"],
+    hdrs = ["frame.hh"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":frontend_definitions",
+        "@gtsam",
+        "@opencv",
+    ],
 )
 
 cc_test(
-  name = "sfm_mvp",
-  srcs = ["sfm_mvp.cc"],
+    name = "sfm_mvp",
+    srcs = ["sfm_mvp.cc"],
     copts = [
-    "-Wno-error=unused-parameter",
-    "-Wno-error=unused-variable",
-  ],
-  deps = [
-    ":frontend",
-    "@com_google_googletest//:gtest_main",
-    "@gtsam//:gtsam",
-    "@opencv//:opencv",
-    "@eigen",
-    ":spatial_test_scene_cube",
-    ":symphony_lake_parser",
-    ":frame", 
-    ":structure_from_motion_types",
-    "//visualization/opencv:opencv_viz"
-  ]
+        "-Wno-error=unused-parameter",
+        "-Wno-error=unused-variable",
+    ],
+    deps = [
+        ":frame",
+        ":frontend",
+        ":spatial_test_scene_cube",
+        ":structure_from_motion_types",
+        ":symphony_lake_parser",
+        "//visualization/opencv:opencv_viz",
+        "@com_google_googletest//:gtest_main",
+        "@eigen",
+        "@gtsam",
+        "@opencv",
+    ],
 )
 
-
 cc_library(
-  name = "feature_set",
-  hdrs = ["feature_set.hh"],
-  visibility = ["//visibility:public"],
-  deps = [        
-    "@gtsam//:gtsam",
-    "@opencv//:opencv"
-  ]
+    name = "feature_set",
+    hdrs = ["feature_set.hh"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "@gtsam",
+        "@opencv",
+    ],
 )
 
 cc_library(
-  name = "feature_manager",
-  hdrs = ["feature_manager.hh"],
-  visibility = ["//visibility:public"],
-  deps = [        
-    "@gtsam//:gtsam",
-    "@opencv//:opencv",
-    ":feature_set",
-  ]
+    name = "feature_manager",
+    hdrs = ["feature_manager.hh"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":feature_set",
+        "@gtsam",
+        "@opencv",
+    ],
 )
 
 cc_test(
-  name = "feature_manager_test",
-  srcs = ["feature_manager_test.cc"],
-  deps = [
-    ":feature_manager",
-    "@com_google_googletest//:gtest_main",
-    "@gtsam//:gtsam",
-    "@opencv//:opencv",
-    "@eigen",
-    ":spatial_test_scene_cube"
-  ]
+    name = "feature_manager_test",
+    srcs = ["feature_manager_test.cc"],
+    deps = [
+        ":feature_manager",
+        ":spatial_test_scene_cube",
+        "@com_google_googletest//:gtest_main",
+        "@eigen",
+        "@gtsam",
+        "@opencv",
+    ],
 )
 
-
 cc_library(
-  name = "spatial_test_scene",
-  hdrs = ["spatial_test_scene.hh"],
-  visibility = ["//visibility:public"],
-  srcs = ["spatial_test_scene.cc"],
-  deps = [        
-    "@gtsam//:gtsam",
-    "@opencv//:opencv",    
-    "@eigen"
-  ]
+    name = "spatial_test_scene",
+    srcs = ["spatial_test_scene.cc"],
+    hdrs = ["spatial_test_scene.hh"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "@eigen",
+        "@gtsam",
+        "@opencv",
+    ],
 )
 
 cc_library(
-  name = "spatial_test_scene_cube",
-  hdrs = ["spatial_test_scene_cube.hh"],
-  visibility = ["//visibility:public"],
-  deps = [        
-    "@eigen",
-    ":spatial_test_scene"
-  ]
+    name = "spatial_test_scene_cube",
+    hdrs = ["spatial_test_scene_cube.hh"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":spatial_test_scene",
+        "@eigen",
+    ],
 )
 
 cc_binary(
-  name = "spatial_test_scene_cube_example",
-  srcs = ["spatial_test_scene_cube_example.cc"],
-  deps = [    
-    "@com_google_googletest//:gtest_main",    
-    ":spatial_test_scene_cube",
-    "//visualization/opencv:opencv_viz",
-    "@eigen"
-  ]
+    name = "spatial_test_scene_cube_example",
+    srcs = ["spatial_test_scene_cube_example.cc"],
+    deps = [
+        ":spatial_test_scene_cube",
+        "//visualization/opencv:opencv_viz",
+        "@com_google_googletest//:gtest_main",
+        "@eigen",
+    ],
 )
 
 cc_library(
-  name = "frontend",
-  hdrs = ["frontend.hh"],
-  visibility = ["//visibility:public"],
-  srcs = ["frontend.cc"],
-  copts = [
-    "-Wno-error=unused-parameter",
-  ],
-  deps = [
-    "@opencv//:opencv",    
-    ":frame",
-    ":frontend_definitions",
-    ":structure_from_motion_types"
-  ]
+    name = "frontend",
+    srcs = ["frontend.cc"],
+    hdrs = ["frontend.hh"],
+    copts = [
+        "-Wno-error=unused-parameter",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":frame",
+        ":frontend_definitions",
+        ":image_point_four_seasons",
+        ":structure_from_motion_types",
+        "//common:check",
+        "@opencv",
+        "@gtsam",
+        "@eigen",
+    ],
 )
 
-
 cc_test(
-  name = "frontend_test",
-  srcs = ["frontend_test.cc"],
-  deps = [    
-    "@com_google_googletest//:gtest_main",    
-    ":frontend"
-  ]
+    name = "frontend_test",
+    srcs = ["frontend_test.cc"],
+    deps = [
+        ":frontend",
+        "@com_google_googletest//:gtest_main",
+    ],
 )
 
 cc_library(
-  name = "backend",
-  hdrs = ["backend.hh"],
-  visibility = ["//visibility:public"],
-  srcs = ["backend.cc"],
-  copts = [
-    "-Wno-error=unused-parameter",
-  ],
-  deps = [
-    "@opencv//:opencv",    
-    "@gtsam//:gtsam",
-    "//common/geometry:camera",
-    "@boost//:smart_ptr",
-    ":feature_manager"
-  ]
+    name = "backend",
+    srcs = ["backend.cc"],
+    hdrs = ["backend.hh"],
+    copts = [
+        "-Wno-error=unused-parameter",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":feature_manager",
+        "//common/geometry:camera",
+        "@boost//:smart_ptr",
+        "@gtsam",
+        "@opencv",
+    ],
 )
 
-
 cc_test(
-  name = "backend_test",
-  srcs = ["backend_test.cc"],
+    name = "backend_test",
+    srcs = ["backend_test.cc"],
     copts = [
-    "-Wno-error=unused-parameter",
-  ],
-  deps = [    
-    "@com_google_googletest//:gtest_main",    
-    "@opencv//:opencv",
-    ":backend",
-    ":feature_manager",
-    ":spatial_test_scene_cube",
-    "//visualization/opencv:opencv_viz"
-  ]
+        "-Wno-error=unused-parameter",
+    ],
+    deps = [
+        ":backend",
+        ":feature_manager",
+        ":spatial_test_scene_cube",
+        "//visualization/opencv:opencv_viz",
+        "@com_google_googletest//:gtest_main",
+        "@opencv",
+    ],
 )
 
 cc_binary(
-  name = "get_colmap_groundtruth",
-  srcs = ["get_colmap_groundtruth.cc"],
-  copts = [
-    "-Wno-error=unused-parameter",
-  ],
-  deps = [
-    ":symphony_lake_parser"
-  ]
+    name = "get_colmap_groundtruth",
+    srcs = ["get_colmap_groundtruth.cc"],
+    copts = [
+        "-Wno-error=unused-parameter",
+    ],
+    deps = [
+        ":symphony_lake_parser",
+    ],
 )
 
 cc_binary(
-  name = "incremental_sfm_mvp",
-  srcs = ["incremental_sfm_mvp.cc"],
-  copts = [
-    "-Wno-error=unused-parameter",
-    "-Wno-error=unused-function",
-  ],
-  deps = [
-    ":four_seasons_parser",
-    "@eigen",
-    "@opencv",    
-    "@gtsam",
-    "//visualization/opencv:opencv_viz",
-    ":spatial_test_scene_cube",
-    ":frame", 
-    ":structure_from_motion_types",
-    ":frontend",
-    "@boost//:smart_ptr",    
-    "@cxxopts",
-    "//common/geometry:camera"
-  ]
+    name = "incremental_sfm_mvp",
+    srcs = ["incremental_sfm_mvp.cc"],
+    copts = [
+        "-Wno-error=unused-parameter",
+        "-Wno-error=unused-function",
+    ],
+    deps = [
+        ":four_seasons_parser",
+        ":frame",
+        ":frontend",
+        ":spatial_test_scene_cube",
+        ":structure_from_motion_types",
+        "//common/geometry:camera",
+        "//visualization/opencv:opencv_viz",
+        "@boost//:smart_ptr",
+        "@cxxopts",
+        "@eigen",
+        "@gtsam",
+        "@opencv",
+        ":camera_calibration"
+    ],
 )
+
+cc_library(
+    name = "camera_calibration",
+    hdrs = ["camera_calibration.hh"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "@opencv"
+    ]
+)
\ No newline at end of file
diff --git a/experimental/learn_descriptors/camera_calibration.hh b/experimental/learn_descriptors/camera_calibration.hh
new file mode 100644
index 000000000..cfbd6fb42
--- /dev/null
+++ b/experimental/learn_descriptors/camera_calibration.hh
@@ -0,0 +1,19 @@
+#pragma once
+
+#include "opencv2/opencv.hpp"
+
+namespace robot::experimental::learn_descriptors {
+struct CameraCalibrationFisheye {
+    double fx, fy, cx, cy, k1, k2, k3, k4;
+
+    cv::Mat k_mat() {
+        cv::Mat K_mat = (cv::Mat_<double>(3, 3) << fx, 0, cx, 0, fy, py, 0, 0, 1);
+        return K_mat;
+    }
+
+    cv::Mat d_mat() {
+        cv::Mat D_mat = (cv::Mat_<double>(4, 1) << k1, k2, k3, k4);
+        return D_mat;
+    }
+};
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/four_seasons_parser.cc b/experimental/learn_descriptors/four_seasons_parser.cc
index fdf76afde..a295851f0 100644
--- a/experimental/learn_descriptors/four_seasons_parser.cc
+++ b/experimental/learn_descriptors/four_seasons_parser.cc
@@ -1,58 +1,59 @@
 #include "experimental/learn_descriptors/four_seasons_parser.hh"
 
-#include <cmath>
 #include <cstddef>
 #include <cstdlib>
 #include <exception>
 #include <fstream>
 #include <iostream>
 #include <limits>
+#include <memory>
 #include <sstream>
 #include <string>
-#include <type_traits>
 #include <unordered_map>
 #include <utility>
 
 #include "Eigen/Core"
 #include "Eigen/Geometry"
-#include "GeographicLib/Geocentric.hpp"
-#include "GeographicLib/LocalCartesian.hpp"
-#include "absl/strings/str_split.h"
-#include "absl/strings/strip.h"
-#include "common/check.hh"
 #include "common/liegroups/se3.hh"
-#include "common/time/robot_time.hh"
-#include "nmea/message/gga.hpp"
-#include "nmea/message/rmc.hpp"
-#include "nmea/sentence.hpp"
+#include "experimental/learn_descriptors/camera_calibration.hh"
+#include "experimental/learn_descriptors/four_seasons_parser_detail.hh"
 
 namespace robot::experimental::learn_descriptors {
-using namespace detail;
+using namespace detail::four_seasons_parser;
 FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
                                      const std::filesystem::path& calibration_dir)
     : root_dir_(root_dir),
       img_dir_(root_dir / "distorted_images" / "cam0"),
       cal_(txt_parser_help::load_camera_calibration(calibration_dir)),
-      transforms_(root_dir / "Transformations.txt") {
+      shared_transforms_(std::make_shared<FourSeasonsTransforms::StaticTransforms>(
+          root_dir / "Transformations.txt")) {
     const std::filesystem::path path_img = root_dir_ / "times.txt";
     const std::filesystem::path path_gnss = root_dir_ / "GNSSPoses.txt";
     const std::filesystem::path path_vio = root_dir_ / "result.txt";
     const std::filesystem::path path_gps = root_dir_ / "septentrio.nmea";
     const size_t min_time_sig_figs = txt_parser_help::min_sig_figs_result_time(path_vio);
+    std::cout << "heartbeat 0" << std::endl;
     txt_parser_help::TimeDataList img_time_list =
         txt_parser_help::create_img_time_data_list(path_img, min_time_sig_figs);
+    std::cout << "heartbeat 1" << std::endl;
     txt_parser_help::TimeDataMap gnss_poses_time_map =
         txt_parser_help::create_gnss_poses_time_data_map(path_gnss, min_time_sig_figs);
+    std::cout << "heartbeat 2" << std::endl;
     txt_parser_help::TimeDataMap vio_poses_time_map =
         txt_parser_help::create_vio_time_data_map(path_vio, min_time_sig_figs);
+    std::cout << "heartbeat 3" << std::endl;
     gps_parser_help::TimeGPSList gps_time_list =
         gps_parser_help::create_gps_time_data_list(path_gps);
+    std::cout << "heartbeat 4" << std::endl;
 
     size_t id = 0;
     for (const std::pair<size_t, std::vector<std::string>>& pair_time_data : img_time_list) {
         const size_t time_key = pair_time_data.first;
-        ImagePoint img_pt;
+        ImagePointFourSeasons img_pt;
         img_pt.id = id;
+        img_pt.K = cal_;
+        std::cout << "heartbeat 5" << std::endl;
+        img_pt.shared_static_transforms = shared_transforms_;
         img_pt.seq = std::stoull(
             pair_time_data.second[static_cast<size_t>(txt_parser_help::ImgIdx::TIME_NS)]);
         if (gnss_poses_time_map.find(time_key) != gnss_poses_time_map.end()) {
@@ -104,19 +105,19 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
     // popoulate gps to nearest img time
     // TODO: could be linear time... but good enough
     for (const auto& [time_unix_ns, gps_data] : gps_time_list) {
-        auto it = std::lower_bound(img_pt_vector_.begin(), img_pt_vector_.end(), time_unix_ns,
-                                   [](const ImagePoint& img_pt, const size_t query_unix_time) {
-                                       return img_pt.seq < query_unix_time;
-                                   });
+        auto it =
+            std::lower_bound(img_pt_vector_.begin(), img_pt_vector_.end(), time_unix_ns,
+                             [](const ImagePointFourSeasons& img_pt, const size_t query_unix_time) {
+                                 return img_pt.seq < query_unix_time;
+                             });
         size_t insert_idx = std::distance(img_pt_vector_.begin(), it);
         if (it != img_pt_vector_.begin() &&
-            detail::abs_diff(it->seq, time_unix_ns) >
-                detail::abs_diff(std::prev(it)->seq, time_unix_ns)) {
+            abs_diff(it->seq, time_unix_ns) > abs_diff(std::prev(it)->seq, time_unix_ns)) {
             insert_idx--;
         }
         // NOTE: in future, could perhaps use gps data that isn't associated with an img_pt in some
         // way. maybe to help with interpolation, estimate velocity
-        if (detail::abs_diff(img_pt_vector_[insert_idx].seq, time_unix_ns) <
+        if (abs_diff(img_pt_vector_[insert_idx].seq, time_unix_ns) <
             FourSeasonsParser::CAM_CAP_DELTA_NS) {
             img_pt_vector_[insert_idx].gps_gcs = gps_data;
         }
@@ -126,284 +127,4 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
 cv::Mat FourSeasonsParser::load_image(const size_t m) const {
     return get_image_point(m).load_image(img_dir_);
 }
-
-FourSeasonsParser::FourSeasonsTransforms::FourSeasonsTransforms(
-    const std::filesystem::path& path_transforms) {
-    std::ifstream file_transforms(path_transforms);
-    std::string line;
-    while (std::getline(file_transforms, line)) {
-        if (line.find("transform_S_AS") != std::string::npos) {
-            std::getline(file_transforms, line);
-            S_from_AS = get_transform_from_line(line);
-        } else if (line.find("TS_cam_imu") != std::string::npos) {
-            std::getline(file_transforms, line);
-            cam_from_imu = get_transform_from_line(line);
-        } else if (line.find("transform_w_gpsw") != std::string::npos) {
-            std::getline(file_transforms, line);
-            w_from_gpsw = get_transform_from_line(line);
-        } else if (line.find("transform_gps_imu") != std::string::npos) {
-            std::getline(file_transforms, line);
-            gps_from_imu = get_transform_from_line(line);
-        } else if (line.find("transform_e_gpsw") != std::string::npos) {
-            std::getline(file_transforms, line);
-            e_from_gpsw = get_transform_from_line(line);
-        } else if (line.find("GNSS scale") != std::string::npos) {
-            std::getline(file_transforms, line);
-            gnss_scale = std::stod(line);
-        }
-    }
-}
-
-liegroups::SE3 FourSeasonsParser::FourSeasonsTransforms::get_transform_from_line(
-    const std::string& line) {
-    enum TransformEntry { T_X, T_Y, T_Z, Q_X, Q_Y, Q_Z, Q_W };
-    std::vector<std::string> parsed_transform_line = txt_parser_help::parse_line_adv(line, ",");
-    if (parsed_transform_line.size() < 7) {
-        std::stringstream error_stream;
-        error_stream << "parsed_transform_line doesn't have sufficient entries for "
-                        "transform! parsed_transform_line.size(): "
-                     << parsed_transform_line.size() << std::endl;
-        throw std::runtime_error(error_stream.str());
-    }
-    std::vector<double> transform_nums;
-    for (const std::string& num : parsed_transform_line) {
-        transform_nums.push_back(static_cast<double>(std::stod(num)));
-    }
-    Eigen::Vector3d translation(transform_nums[TransformEntry::T_X],
-                                transform_nums[TransformEntry::T_Y],
-                                transform_nums[TransformEntry::T_Z]);
-    Eigen::Quaterniond rotation(
-        transform_nums[TransformEntry::Q_W], transform_nums[TransformEntry::Q_X],
-        transform_nums[TransformEntry::Q_Y], transform_nums[TransformEntry::Q_Z]);
-    return liegroups::SE3(rotation, translation);
-}
-
-Eigen::Vector3d FourSeasonsParser::gcs_from_ECEF(const Eigen::Vector3d& t_place_from_ECEF) {
-    static const GeographicLib::Geocentric earth = GeographicLib::Geocentric::WGS84();
-
-    double x = t_place_from_ECEF.x(), y = t_place_from_ECEF.y(), z = t_place_from_ECEF.z();
-
-    double lat_deg, lon_deg, alt_m;
-    earth.Reverse(x, y, z, lat_deg, lon_deg, alt_m);
-
-    return Eigen::Vector3d(lat_deg, lon_deg, alt_m);
-}
-
-Eigen::Vector3d FourSeasonsParser::ECEF_from_gcs(const Eigen::Vector3d& gcs_coordinate) {
-    double lat_deg = gcs_coordinate.x();
-    double lon_deg = gcs_coordinate.y();
-    double alt_m = gcs_coordinate.z();
-
-    double x, y, z;
-
-    static const GeographicLib::Geocentric earth = GeographicLib::Geocentric::WGS84();
-    earth.Forward(lat_deg, lon_deg, alt_m, x, y, z);
-
-    return Eigen::Vector3d(x, y, z);
-}
-
-namespace detail {
-namespace txt_parser_help {
-
-std::vector<std::string> parse_line_adv(const std::string& line, const std::string& delim) {
-    if (delim == " ") {
-        return std::vector<std::string>(
-            absl::StrSplit(line, absl::ByAnyChar(" \t\n\r"), absl::SkipWhitespace()));
-    }
-    return std::vector<std::string>(absl::StrSplit(line, delim, absl::SkipWhitespace()));
-}
-
-FourSeasonsParser::CameraCalibrationFisheye load_camera_calibration(
-    const std::filesystem::path& calibration_dir) {
-    std::ifstream file_calibration(calibration_dir / "calib_0.txt");
-    std::string line_calibration;
-    std::getline(file_calibration, line_calibration);
-    std::vector<std::string> parsed_calib_line = parse_line_adv(line_calibration, " ");
-    return FourSeasonsParser::CameraCalibrationFisheye{
-        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::FX)]),
-        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::FY)]),
-        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::CX)]),
-        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::CY)]),
-        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::K1)]),
-        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::K2)]),
-        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::K3)]),
-        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::K4)])};
-}
-
-// minimum sig figs of time in seconds (of type double) for all entries of results.txt
-size_t min_sig_figs_result_time(const std::filesystem::path& path_vio) {
-    std::ifstream file_vio(path_vio);
-    std::string line;
-    int min_figs = INT_MAX;
-    while (std::getline(file_vio, line)) {
-        const std::vector<std::string> parsed_line = parse_line_adv(line, " ");
-        const std::string str_time_seconds = parsed_line[static_cast<size_t>(ResultIdx::TIME_SEC)];
-        const int figs =
-            str_time_seconds.size() +
-            (str_time_seconds.find('.') != std::string::npos ? -1 : 0);  // '.' is not a fig
-        min_figs = std::min(min_figs, figs);
-    }
-    return min_figs;
-}
-
-const TimeDataList create_img_time_data_list(const std::filesystem::path& path_img,
-                                             const size_t time_sig_figs) {
-    TimeDataList time_map_img;
-    std::ifstream file_img(path_img);
-    std::string line;
-    while (std::getline(file_img, line)) {
-        std::vector<std::string> parsed_line = parse_line_adv(line, " ");
-        size_t time_ns = std::stoull(parsed_line[static_cast<size_t>(ImgIdx::TIME_NS)]);
-        time_map_img.push_back(
-            std::make_pair(round_to_sig_figs(time_ns, time_sig_figs), parsed_line));
-    }
-    return time_map_img;
-}
-
-const TimeDataMap create_gnss_poses_time_data_map(const std::filesystem::path& path_gnss,
-                                                  const size_t time_sig_figs) {
-    TimeDataMap time_map_gnss_poses;
-    std::ifstream file_gnss_poses(path_gnss);
-    std::string line;
-    std::getline(file_gnss_poses,
-                 line);  // advance a line to get past the top comment in GNSSPoses.txt
-    while (std::getline(file_gnss_poses, line)) {
-        std::vector<std::string> parsed_line = parse_line_adv(line, ",");
-        size_t time_ns = std::stoull(parsed_line[static_cast<size_t>(GPSIdx::TIME_NS)]);
-        time_map_gnss_poses.insert({round_to_sig_figs(time_ns, time_sig_figs), parsed_line});
-    }
-    return time_map_gnss_poses;
-}
-
-const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio,
-                                           const size_t time_sig_figs) {
-    TimeDataMap time_map_vio;
-    std::ifstream file_vio(path_vio);
-    std::string line;
-    while (std::getline(file_vio, line)) {
-        std::vector<std::string> parsed_line = parse_line_adv(line, " ");
-        double time_seconds = std::stod(parsed_line[static_cast<size_t>(ResultIdx::TIME_SEC)]);
-        size_t time_ns = time_seconds * 1e9;
-        time_map_vio.insert({round_to_sig_figs(time_ns, time_sig_figs), parsed_line});
-    }
-    return time_map_vio;
-}
-}  // namespace txt_parser_help
-
-namespace gps_parser_help {
-size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds) {
-    std::chrono::sys_days date = std::chrono::year_month_day{
-        std::chrono::year{utc_date.year + 2000}, std::chrono::month{utc_date.month},
-        std::chrono::day{utc_date.day}};
-    std::chrono::nanoseconds utc_time_day_ns =
-        std::chrono::nanoseconds{static_cast<size_t>(utc_time_day_seconds * 1e9)};
-    std::chrono::sys_time<std::chrono::nanoseconds> timestamp = date + utc_time_day_ns;
-    return timestamp.time_since_epoch().count();
-}
-TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps) {
-    TimeGPSList time_list_gps;
-    std::ifstream file_gps(path_gps);
-    std::string line;
-    std::optional<nmea::sentence> nmea_sentence;
-    nmea::date date_last;
-    date_last.day = 255;
-    date_last.month = 255;
-    date_last.year = 255;
-    double time_of_day_last = -1.0;
-    while (std::getline(file_gps, line) && !line.empty()) {
-        try {
-            nmea_sentence = nmea::sentence(line);
-        } catch (const std::exception& e) {
-            std::cerr << "failed to parse line as nmea sentence: " << e.what()
-                      << "\ncontinuing to next line\n";
-            continue;
-        }
-        if (nmea_sentence->type() == "GGA") {
-            nmea::gga gga(*nmea_sentence);
-            if (gga.utc.exists() && gga.latitude.exists() && gga.longitude.exists()) {
-                if (std::abs(gga.utc.get() - time_of_day_last) <
-                    1e-3) {  // GGA messages for this dataset come second
-                    ImagePoint::GPSData gps_data;
-                    gps_data.seq = gps_utc_to_unix_time(date_last, gga.utc.get());
-                    gps_data.latitude = gga.latitude.get();
-                    gps_data.longitude = gga.longitude.get();
-                    if (gga.altitude.exists()) gps_data.altitude = gga.altitude.get();
-                    time_list_gps.push_back(std::make_pair(gps_data.seq, gps_data));
-                }
-            }
-        } else if (nmea_sentence->type() == "RMC") {
-            nmea::rmc rmc(*nmea_sentence);
-            if (rmc.utc.exists() && rmc.date.exists()) {
-                date_last = rmc.date.get();
-                time_of_day_last = rmc.utc.get();
-            }
-        } else if (nmea_sentence->type() == "GST") {
-            std::optional<GSTData> gst_data = parse_gpgst(nmea_sentence->nmea_string());
-            if (gst_data && std::abs(gst_data->utc_time_ns - time_of_day_last) <
-                                1e-3) {  // GST message for this dataset come third
-                size_t unix_time_ns = gps_utc_to_unix_time(date_last, gst_data->utc_time_ns);
-                if (time_list_gps.back().first == unix_time_ns) {
-                    time_list_gps.back().second.uncertainty.emplace(
-                        gst_data->sigma_lat_m, gst_data->sigma_lon_m, gst_data->sigma_alt_m,
-                        gst_data->error_orientation_deg, gst_data->rms_range_error_m);
-                }
-            }
-        }
-    }
-    return time_list_gps;
-}
-
-std::vector<std::string> split_nmea_sentence(const std::string& sentence) {
-    std::vector<std::string> fields;
-    std::string field;
-    std::stringstream ss(sentence);
-
-    while (std::getline(ss, field, ',')) {
-        // remove checksum from the last field if present
-        auto asterisk = field.find('*');
-        if (asterisk != std::string::npos) field = field.substr(0, asterisk);
-        fields.push_back(field);
-    }
-
-    return fields;
-}
-double time_of_day_seconds(const double utc_time_hhmmss) {
-    int hours = static_cast<int>(utc_time_hhmmss / 10000);
-    int minutes = static_cast<int>((utc_time_hhmmss - hours * 10000) / 100);
-    double seconds = utc_time_hhmmss - hours * 10000 - minutes * 100;
-    return hours * 3600 + minutes * 60 + seconds;
-}
-std::optional<GSTData> parse_gpgst(const std::string& sentence) {
-    if (sentence.substr(0, 6) != "$GPGST") {
-        return std::nullopt;
-    }
-
-    std::vector<std::string> fields = split_nmea_sentence(sentence);
-    if (fields.size() != 9) {
-        return std::nullopt;
-    }
-    for (size_t i = 0; i < fields.size(); i++) {
-        if (fields[i].empty()) return std::nullopt;
-    }
-
-    GSTData gst;
-    gst.utc_time_ns = time_of_day_seconds(std::stod(fields[1]));
-    gst.rms_range_error_m = std::stod(fields[2]);
-    gst.error_semi_major_m = std::stod(fields[3]);
-    gst.error_semi_minor_m = std::stod(fields[4]);
-    gst.error_orientation_deg = std::stod(fields[5]);
-    gst.sigma_lat_m = std::stod(fields[6]);
-    gst.sigma_lon_m = std::stod(fields[7]);
-    gst.sigma_alt_m = std::stod(fields[8]);
-
-    return gst;
-}
-
-}  // namespace gps_parser_help
-
-template <typename T>
-size_t abs_diff(const T& a, const T& b) {
-    return a > b ? a - b : b - a;
-}
-}  // namespace detail
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/four_seasons_parser.hh b/experimental/learn_descriptors/four_seasons_parser.hh
index 9709bc396..4458215c8 100644
--- a/experimental/learn_descriptors/four_seasons_parser.hh
+++ b/experimental/learn_descriptors/four_seasons_parser.hh
@@ -1,144 +1,56 @@
 #pragma once
 
-#include <stdio.h>
-
 #include <cstddef>
 #include <filesystem>
+#include <memory>
 #include <opencv2/opencv.hpp>
 #include <string>
 #include <vector>
 
 #include "common/liegroups/se3.hh"
-#include "experimental/learn_descriptors/image_point.hh"
-#include "nmea/message/gga.hpp"
-#include "nmea/message/rmc.hpp"
-#include "nmea/sentence.hpp"
+#include "experimental/learn_descriptors/camera_calibration.hh"
+#include "experimental/learn_descriptors/four_seasons_transforms.hh"
+#include "experimental/learn_descriptors/image_point_four_seasons.hh"
 
 namespace robot::experimental::learn_descriptors {
-class FourSeasonsParser {
+class FourSeasonsParser : std::enable_shared_from_this<FourSeasonsParser> {
    public:
     static constexpr double CAM_HZ = 30.0;
     static constexpr double CAM_CAP_DELTA_NS = 1e9 / CAM_HZ;
-    struct CameraCalibrationFisheye {
-        double fx, fy, cx, cy, k1, k2, k3, k4;
-    };
+
     FourSeasonsParser(const std::filesystem::path& root_dir,
                       const std::filesystem::path& calibration_dir);
+    std::shared_ptr<FourSeasonsParser> get_shared() { return shared_from_this(); }
     cv::Mat load_image(const size_t idx) const;
-    const ImagePoint& get_image_point(const size_t idx) const { return img_pt_vector_[idx]; };
+    const ImagePointFourSeasons& get_image_point(const size_t idx) const {
+        return img_pt_vector_[idx];
+    };
     size_t num_images() const { return img_pt_vector_.size(); };
-    const CameraCalibrationFisheye& camera_calibration() const { return cal_; };
+    const std::shared_ptr<CameraCalibrationFisheye>& camera_calibration() const { return cal_; };
     const liegroups::SE3& S_from_AS() const {
-        return transforms_.S_from_AS;
+        return shared_transforms_->S_from_AS;
     };  // metric scale from arbitrary (internal slam) scale
-    const liegroups::SE3& cam_from_imu() const { return transforms_.cam_from_imu; };
+    const liegroups::SE3& cam_from_imu() const { return shared_transforms_->cam_from_imu; };
     const liegroups::SE3& w_from_gpsw() const {
-        return transforms_.w_from_gpsw;
+        return shared_transforms_->w_from_gpsw;
     };  // visual world from local gps (ENU)
     const liegroups::SE3& gps_from_imu() const {
-        return transforms_.gps_from_imu;
+        return shared_transforms_->gps_from_imu;
     };  // phsyical onboard gps from physical onboard imu
     const liegroups::SE3& e_from_gpsw() const {
-        return transforms_.e_from_gpsw;
+        return shared_transforms_->e_from_gpsw;
     };  // ECEF from local gps (ENU)
     double gnss_scale() const {
-        return transforms_.gnss_scale;
+        return shared_transforms_->gnss_scale;
     };  // scale from vio frame to gnss frame. WARNING: will require retooling if the scales per
         // keyframe (pose) are not all one value. See more here:
         // https://github.com/pmwenzel/4seasons-dataset
 
-    static Eigen::Vector3d gcs_from_ECEF(const Eigen::Vector3d& t_place_from_ECEF);
-    static Eigen::Vector3d ECEF_from_gcs(const Eigen::Vector3d& gcs_coordinate);
-
    protected:
-    struct FourSeasonsTransforms {
-        liegroups::SE3 S_from_AS;
-        liegroups::SE3 cam_from_imu;
-        liegroups::SE3 w_from_gpsw;
-        liegroups::SE3 gps_from_imu;
-        liegroups::SE3 e_from_gpsw;
-        double gnss_scale;
-
-        FourSeasonsTransforms(const std::filesystem::path& path_transforms);
-
-       private:
-        static liegroups::SE3 get_transform_from_line(const std::string& line);
-    };
     const std::filesystem::path root_dir_;
     const std::filesystem::path img_dir_;
-    const CameraCalibrationFisheye cal_;
-    const FourSeasonsTransforms transforms_;
-    ImagePointVector img_pt_vector_;
-};
-
-namespace detail {
-template <typename T>
-std::size_t abs_diff(const T& a, const T& b);
-namespace txt_parser_help {
-using TimeDataMap = std::unordered_map<size_t, std::vector<std::string>>;
-using TimeDataList = std::vector<std::pair<size_t, std::vector<std::string>>>;
-enum class GPSIdx {
-    TIME_NS = 0,
-    TRAN_X = 1,
-    TRAN_Y = 2,
-    TRAN_Z = 3,
-    QUAT_X = 4,
-    QUAT_Y = 5,
-    QUAT_Z = 6,
-    QUAT_W = 7,
-};
-enum class ImgIdx {
-    TIME_NS = 0,
-    TIME_SEC = 1,
-    EXPOSURE_TIME = 2  // I'm not completely sure if this is correct - Nico
-};
-enum class ResultIdx {
-    TIME_SEC = 0,
-    TRAN_X = 1,
-    TRAN_Y = 2,
-    TRAN_Z = 3,
-    QUAT_X = 4,
-    QUAT_Y = 5,
-    QUAT_Z = 6,
-    QUAT_W = 7,
-};
-enum class CalibIdx { FX = 1, FY = 2, CX = 3, CY = 4, K1 = 5, K2 = 6, K3 = 7, K4 = 8 };
-std::vector<std::string> parse_line_adv(const std::string& line, const std::string& delim = " ");
-FourSeasonsParser::CameraCalibrationFisheye load_camera_calibration(
-    const std::filesystem::path& calibration_dir);
-template <typename T>
-T round_to_sig_figs(T val, int n) {
-    if (val == 0) return 0;
-    double d = static_cast<double>(val);
-    int exponent = static_cast<int>(std::floor(std::log10(std::abs(d))));
-    double multiplier = std::pow(10.0, n - exponent - 1);
-    return static_cast<T>(std::round(d * multiplier) / multiplier);
-}
-size_t min_sig_figs_result_time(const std::filesystem::path& path_vio);
-const TimeDataList create_img_time_data_list(const std::filesystem::path& path_img,
-                                             const size_t time_sig_figs);
-const TimeDataMap create_gnss_poses_time_data_map(const std::filesystem::path& path_gnss,
-                                                  const size_t time_sig_figs);
-const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio,
-                                           const size_t time_sig_figs);
-}  // namespace txt_parser_help
-namespace gps_parser_help {
-using TimeGPSList = std::vector<std::pair<size_t, ImagePoint::GPSData>>;
-struct GSTData {
-    double utc_time_ns;
-    double rms_range_error_m;
-    double error_semi_major_m;
-    double error_semi_minor_m;
-    double error_orientation_deg;
-    double sigma_lat_m;
-    double sigma_lon_m;
-    double sigma_alt_m;
+    const std::shared_ptr<CameraCalibrationFisheye> cal_;
+    const std::shared_ptr<FourSeasonsTransforms::StaticTransforms> shared_transforms_;
+    ImagePointFourSeasonsVector img_pt_vector_;
 };
-std::vector<std::string> split_nmea_sentence(const std::string& nmea_sentence);
-double time_of_day_seconds(const double utc_time_hhmmss);
-std::optional<GSTData> parse_gpgst(const std::string& nmea_sentence);
-size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds);
-TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps);
-}  // namespace gps_parser_help
-}  // namespace detail
 }  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/four_seasons_parser_detail.cc b/experimental/learn_descriptors/four_seasons_parser_detail.cc
new file mode 100644
index 000000000..d9dc824a8
--- /dev/null
+++ b/experimental/learn_descriptors/four_seasons_parser_detail.cc
@@ -0,0 +1,223 @@
+#include "experimental/learn_descriptors/four_seasons_parser_detail.hh"
+
+#include <chrono>
+#include <cmath>
+#include <cstddef>
+#include <fstream>
+#include <iostream>
+#include <optional>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "absl/strings/str_split.h"
+#include "absl/strings/strip.h"
+#include "nmea/message/gga.hpp"
+#include "nmea/message/rmc.hpp"
+#include "nmea/sentence.hpp"
+
+namespace robot::experimental::learn_descriptors::detail::four_seasons_parser {
+
+// template <typename T>
+// size_t abs_diff(const T& a, const T& b) {
+//     return a > b ? a - b : b - a;
+// }
+
+namespace txt_parser_help {
+
+std::vector<std::string> parse_line_adv(const std::string& line, const std::string& delim) {
+    if (delim == " ") {
+        return std::vector<std::string>(
+            absl::StrSplit(line, absl::ByAnyChar(" \t\n\r"), absl::SkipWhitespace()));
+    }
+    return std::vector<std::string>(absl::StrSplit(line, delim, absl::SkipWhitespace()));
+}
+
+std::shared_ptr<CameraCalibrationFisheye> load_camera_calibration(
+    const std::filesystem::path& calibration_dir) {
+    std::ifstream file_calibration(calibration_dir / "calib_0.txt");
+    std::string line_calibration;
+    std::getline(file_calibration, line_calibration);
+    std::vector<std::string> parsed_calib_line = parse_line_adv(line_calibration, " ");
+    return std::make_shared<CameraCalibrationFisheye>(
+        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::FX)]),
+        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::FY)]),
+        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::CX)]),
+        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::CY)]),
+        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::K1)]),
+        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::K2)]),
+        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::K3)]),
+        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::K4)]));
+}
+
+// minimum sig figs of time in seconds (of type double) for all entries of results.txt
+size_t min_sig_figs_result_time(const std::filesystem::path& path_vio) {
+    std::ifstream file_vio(path_vio);
+    std::string line;
+    int min_figs = INT_MAX;
+    while (std::getline(file_vio, line)) {
+        const std::vector<std::string> parsed_line = parse_line_adv(line, " ");
+        const std::string str_time_seconds = parsed_line[static_cast<size_t>(ResultIdx::TIME_SEC)];
+        const int figs =
+            str_time_seconds.size() +
+            (str_time_seconds.find('.') != std::string::npos ? -1 : 0);  // '.' is not a fig
+        min_figs = std::min(min_figs, figs);
+    }
+    return min_figs;
+}
+
+const TimeDataList create_img_time_data_list(const std::filesystem::path& path_img,
+                                             const size_t time_sig_figs) {
+    TimeDataList time_map_img;
+    std::ifstream file_img(path_img);
+    std::string line;
+    while (std::getline(file_img, line)) {
+        std::vector<std::string> parsed_line = parse_line_adv(line, " ");
+        size_t time_ns = std::stoull(parsed_line[static_cast<size_t>(ImgIdx::TIME_NS)]);
+        time_map_img.push_back(
+            std::make_pair(round_to_sig_figs(time_ns, time_sig_figs), parsed_line));
+    }
+    return time_map_img;
+}
+
+const TimeDataMap create_gnss_poses_time_data_map(const std::filesystem::path& path_gnss,
+                                                  const size_t time_sig_figs) {
+    TimeDataMap time_map_gnss_poses;
+    std::ifstream file_gnss_poses(path_gnss);
+    std::string line;
+    std::getline(file_gnss_poses,
+                 line);  // advance a line to get past the top comment in GNSSPoses.txt
+    while (std::getline(file_gnss_poses, line)) {
+        std::vector<std::string> parsed_line = parse_line_adv(line, ",");
+        size_t time_ns = std::stoull(parsed_line[static_cast<size_t>(GPSIdx::TIME_NS)]);
+        time_map_gnss_poses.insert({round_to_sig_figs(time_ns, time_sig_figs), parsed_line});
+    }
+    return time_map_gnss_poses;
+}
+
+const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio,
+                                           const size_t time_sig_figs) {
+    TimeDataMap time_map_vio;
+    std::ifstream file_vio(path_vio);
+    std::string line;
+    while (std::getline(file_vio, line)) {
+        std::vector<std::string> parsed_line = parse_line_adv(line, " ");
+        double time_seconds = std::stod(parsed_line[static_cast<size_t>(ResultIdx::TIME_SEC)]);
+        size_t time_ns = time_seconds * 1e9;
+        time_map_vio.insert({round_to_sig_figs(time_ns, time_sig_figs), parsed_line});
+    }
+    return time_map_vio;
+}
+}  // namespace txt_parser_help
+
+namespace gps_parser_help {
+size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds) {
+    std::chrono::sys_days date = std::chrono::year_month_day{
+        std::chrono::year{utc_date.year + 2000}, std::chrono::month{utc_date.month},
+        std::chrono::day{utc_date.day}};
+    std::chrono::nanoseconds utc_time_day_ns =
+        std::chrono::nanoseconds{static_cast<size_t>(utc_time_day_seconds * 1e9)};
+    std::chrono::sys_time<std::chrono::nanoseconds> timestamp = date + utc_time_day_ns;
+    return timestamp.time_since_epoch().count();
+}
+TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps) {
+    TimeGPSList time_list_gps;
+    std::ifstream file_gps(path_gps);
+    std::string line;
+    std::optional<nmea::sentence> nmea_sentence;
+    nmea::date date_last;
+    date_last.day = 255;
+    date_last.month = 255;
+    date_last.year = 255;
+    double time_of_day_last = -1.0;
+    while (std::getline(file_gps, line) && !line.empty()) {
+        try {
+            nmea_sentence = nmea::sentence(line);
+        } catch (const std::exception& e) {
+            std::cerr << "failed to parse line as nmea sentence: " << e.what()
+                      << "\ncontinuing to next line\n";
+            continue;
+        }
+        if (nmea_sentence->type() == "GGA") {
+            nmea::gga gga(*nmea_sentence);
+            if (gga.utc.exists() && gga.latitude.exists() && gga.longitude.exists()) {
+                if (std::abs(gga.utc.get() - time_of_day_last) <
+                    1e-3) {  // GGA messages for this dataset come second
+                    GPSData gps_data;
+                    gps_data.seq = gps_utc_to_unix_time(date_last, gga.utc.get());
+                    gps_data.latitude = gga.latitude.get();
+                    gps_data.longitude = gga.longitude.get();
+                    if (gga.altitude.exists()) gps_data.altitude = gga.altitude.get();
+                    time_list_gps.push_back(std::make_pair(gps_data.seq, gps_data));
+                }
+            }
+        } else if (nmea_sentence->type() == "RMC") {
+            nmea::rmc rmc(*nmea_sentence);
+            if (rmc.utc.exists() && rmc.date.exists()) {
+                date_last = rmc.date.get();
+                time_of_day_last = rmc.utc.get();
+            }
+        } else if (nmea_sentence->type() == "GST") {
+            std::optional<GSTData> gst_data = parse_gpgst(nmea_sentence->nmea_string());
+            if (gst_data && std::abs(gst_data->utc_time_ns - time_of_day_last) <
+                                1e-3) {  // GST message for this dataset come third
+                size_t unix_time_ns = gps_utc_to_unix_time(date_last, gst_data->utc_time_ns);
+                if (time_list_gps.back().first == unix_time_ns) {
+                    time_list_gps.back().second.uncertainty.emplace(
+                        gst_data->sigma_lat_m, gst_data->sigma_lon_m, gst_data->sigma_alt_m,
+                        gst_data->error_orientation_deg, gst_data->rms_range_error_m);
+                }
+            }
+        }
+    }
+    return time_list_gps;
+}
+
+std::vector<std::string> split_nmea_sentence(const std::string& sentence) {
+    std::vector<std::string> fields;
+    std::string field;
+    std::stringstream ss(sentence);
+
+    while (std::getline(ss, field, ',')) {
+        // remove checksum from the last field if present
+        auto asterisk = field.find('*');
+        if (asterisk != std::string::npos) field = field.substr(0, asterisk);
+        fields.push_back(field);
+    }
+
+    return fields;
+}
+double time_of_day_seconds(const double utc_time_hhmmss) {
+    int hours = static_cast<int>(utc_time_hhmmss / 10000);
+    int minutes = static_cast<int>((utc_time_hhmmss - hours * 10000) / 100);
+    double seconds = utc_time_hhmmss - hours * 10000 - minutes * 100;
+    return hours * 3600 + minutes * 60 + seconds;
+}
+std::optional<GSTData> parse_gpgst(const std::string& sentence) {
+    if (sentence.substr(0, 6) != "$GPGST") {
+        return std::nullopt;
+    }
+
+    std::vector<std::string> fields = split_nmea_sentence(sentence);
+    if (fields.size() != 9) {
+        return std::nullopt;
+    }
+    for (size_t i = 0; i < fields.size(); i++) {
+        if (fields[i].empty()) return std::nullopt;
+    }
+
+    GSTData gst;
+    gst.utc_time_ns = time_of_day_seconds(std::stod(fields[1]));
+    gst.rms_range_error_m = std::stod(fields[2]);
+    gst.error_semi_major_m = std::stod(fields[3]);
+    gst.error_semi_minor_m = std::stod(fields[4]);
+    gst.error_orientation_deg = std::stod(fields[5]);
+    gst.sigma_lat_m = std::stod(fields[6]);
+    gst.sigma_lon_m = std::stod(fields[7]);
+    gst.sigma_alt_m = std::stod(fields[8]);
+
+    return gst;
+}
+
+}  // namespace gps_parser_help
+}  // namespace robot::experimental::learn_descriptors::detail::four_seasons_parser
\ No newline at end of file
diff --git a/experimental/learn_descriptors/four_seasons_parser_detail.hh b/experimental/learn_descriptors/four_seasons_parser_detail.hh
new file mode 100644
index 000000000..0f0c30ad5
--- /dev/null
+++ b/experimental/learn_descriptors/four_seasons_parser_detail.hh
@@ -0,0 +1,86 @@
+#pragma once
+
+#include <cstddef>
+#include <filesystem>
+#include <memory>
+#include <optional>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "experimental/learn_descriptors/camera_calibration.hh"
+#include "experimental/learn_descriptors/gps_data.hh"
+#include "nmea/object/date.hpp"
+
+namespace robot::experimental::learn_descriptors::detail::four_seasons_parser {
+template <typename T>
+std::size_t abs_diff(const T& a, const T& b) {
+    return a > b ? a - b : b - a;
+};
+namespace txt_parser_help {
+using TimeDataMap = std::unordered_map<size_t, std::vector<std::string>>;
+using TimeDataList = std::vector<std::pair<size_t, std::vector<std::string>>>;
+enum class GPSIdx {
+    TIME_NS = 0,
+    TRAN_X = 1,
+    TRAN_Y = 2,
+    TRAN_Z = 3,
+    QUAT_X = 4,
+    QUAT_Y = 5,
+    QUAT_Z = 6,
+    QUAT_W = 7,
+};
+enum class ImgIdx {
+    TIME_NS = 0,
+    TIME_SEC = 1,
+    EXPOSURE_TIME = 2  // I'm not completely sure if this is correct - Nico
+};
+enum class ResultIdx {
+    TIME_SEC = 0,
+    TRAN_X = 1,
+    TRAN_Y = 2,
+    TRAN_Z = 3,
+    QUAT_X = 4,
+    QUAT_Y = 5,
+    QUAT_Z = 6,
+    QUAT_W = 7,
+};
+enum class CalibIdx { FX = 1, FY = 2, CX = 3, CY = 4, K1 = 5, K2 = 6, K3 = 7, K4 = 8 };
+std::vector<std::string> parse_line_adv(const std::string& line, const std::string& delim = " ");
+std::shared_ptr<CameraCalibrationFisheye> load_camera_calibration(
+    const std::filesystem::path& calibration_dir);
+template <typename T>
+T round_to_sig_figs(T val, int n) {
+    if (val == 0) return 0;
+    double d = static_cast<double>(val);
+    int exponent = static_cast<int>(std::floor(std::log10(std::abs(d))));
+    double multiplier = std::pow(10.0, n - exponent - 1);
+    return static_cast<T>(std::round(d * multiplier) / multiplier);
+}
+size_t min_sig_figs_result_time(const std::filesystem::path& path_vio);
+const TimeDataList create_img_time_data_list(const std::filesystem::path& path_img,
+                                             const size_t time_sig_figs);
+const TimeDataMap create_gnss_poses_time_data_map(const std::filesystem::path& path_gnss,
+                                                  const size_t time_sig_figs);
+const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio,
+                                           const size_t time_sig_figs);
+}  // namespace txt_parser_help
+namespace gps_parser_help {
+using TimeGPSList = std::vector<std::pair<size_t, GPSData>>;
+struct GSTData {
+    double utc_time_ns;
+    double rms_range_error_m;
+    double error_semi_major_m;
+    double error_semi_minor_m;
+    double error_orientation_deg;
+    double sigma_lat_m;
+    double sigma_lon_m;
+    double sigma_alt_m;
+};
+std::vector<std::string> split_nmea_sentence(const std::string& nmea_sentence);
+double time_of_day_seconds(const double utc_time_hhmmss);
+std::optional<GSTData> parse_gpgst(const std::string& nmea_sentence);
+size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds);
+TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps);
+}  // namespace gps_parser_help
+}  // namespace robot::experimental::learn_descriptors::detail::four_seasons_parser
diff --git a/experimental/learn_descriptors/four_seasons_parser_example.cc b/experimental/learn_descriptors/four_seasons_parser_example.cc
index 064f404ef..e49467bfd 100644
--- a/experimental/learn_descriptors/four_seasons_parser_example.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_example.cc
@@ -64,7 +64,7 @@ int main(int argc, const char** argv) {
         const std::string img_str = "img " + std::to_string(i);
         cv::putText(img, img_str, cv::Point(10, 20), cv::FONT_HERSHEY_SIMPLEX, 0.5,
                     cv::Scalar(0, 255, 0), 2);
-        const lrn_desc::ImagePoint img_pt = parser.get_image_point(i);
+        const lrn_desc::ImagePointFourSeasons img_pt = parser.get_image_point(i);
         std::stringstream ss_AS_w_from_gnss_cam;
         ss_AS_w_from_gnss_cam << "AS_w_from_gnss_cam: ";
         if (img_pt.AS_w_from_gnss_cam) {
@@ -84,7 +84,7 @@ int main(int argc, const char** argv) {
         std::stringstream ss_gps;
         ss_gps << "gps_gcs: ";
         if (img_pt.gps_gcs) {
-            const lrn_desc::ImagePoint::GPSData& gps_data = *img_pt.gps_gcs;
+            const lrn_desc::GPSData& gps_data = *img_pt.gps_gcs;
             ss_gps << "long: " << gps_data.longitude << ", lat: " << gps_data.latitude;
             if (gps_data.altitude) {
                 ss_gps << ", alt: " << *gps_data.altitude;
diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
index bca7ec2c4..9befd4f2a 100644
--- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
@@ -8,6 +8,7 @@
 #include <vector>
 
 #include "common/check.hh"
+#include "common/gps/frame_translation.hh"
 #include "cxxopts.hpp"
 #include "experimental/learn_descriptors/four_seasons_parser.hh"
 #include "opencv2/opencv.hpp"
@@ -59,7 +60,7 @@ int main(int argc, const char** argv) {
     std::optional<double> altitude_gps_from_gnss_cam;
     std::vector<double> gps_ns_delta_from_shutter;
     for (size_t i = 0; i < parser.num_images(); i += 1) {
-        const lrn_desc::ImagePoint img_pt = parser.get_image_point(i);
+        const lrn_desc::ImagePointFourSeasons img_pt = parser.get_image_point(i);
         std::cout << img_pt.to_string() << std::endl;
         if (!img_pt.AS_w_from_gnss_cam) {
             continue;
@@ -88,7 +89,7 @@ int main(int argc, const char** argv) {
                     (parser.e_from_gpsw() * parser.w_from_gpsw().inverse()).matrix()) *
                 w_from_gnss_cam;
             const Eigen::Vector3d gnss_cam_in_gcs =
-                parser.gcs_from_ECEF(ECEF_from_gnss_cam.translation());
+                robot::gps::lla_from_ecef(ECEF_from_gnss_cam.translation());
 
             if (!altitude_gps_from_gnss_cam) {
                 altitude_gps_from_gnss_cam = gnss_cam_in_gcs.z() - *(img_pt.gps_gcs->altitude);
@@ -98,7 +99,7 @@ int main(int argc, const char** argv) {
                       << "\ngps_gcs: " << gcs_coordinate
                       << "\ngps-gnss_cam: " << (gcs_coordinate - gnss_cam_in_gcs) << std::endl;
 
-            const Eigen::Vector3d ECEF_from_gps = parser.ECEF_from_gcs(gcs_coordinate);
+            const Eigen::Vector3d ECEF_from_gps = robot::gps::ecef_from_lla(gcs_coordinate);
             const Eigen::Vector4d ECEF_from_gps_hom(ECEF_from_gps.x(), ECEF_from_gps.y(),
                                                     ECEF_from_gps.z(), 1);
             Eigen::Vector4d gps_in_w =
diff --git a/experimental/learn_descriptors/four_seasons_parser_test.cc b/experimental/learn_descriptors/four_seasons_parser_test.cc
index ff49d935d..1b1a940b9 100644
--- a/experimental/learn_descriptors/four_seasons_parser_test.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_test.cc
@@ -10,14 +10,12 @@
 
 #include "Eigen/Core"
 #include "Eigen/Geometry"
-#include "GeographicLib/Constants.hpp"
-#include "GeographicLib/Geocentric.hpp"
-#include "GeographicLib/Geodesic.hpp"
-#include "GeographicLib/LocalCartesian.hpp"
 #include "absl/strings/str_split.h"
 #include "absl/strings/strip.h"
 #include "common/check.hh"
+#include "common/gps/frame_translation.hh"
 #include "common/liegroups/se3.hh"
+#include "experimental/learn_descriptors/four_seasons_parser_detail.hh"
 #include "gtest/gtest.h"
 #include "nmea/message/gga.hpp"
 #include "nmea/message/rmc.hpp"
@@ -35,6 +33,7 @@ static bool images_equal(cv::Mat img1, cv::Mat img2) {
 }
 
 namespace robot::experimental::learn_descriptors {
+using namespace detail::four_seasons_parser;
 TEST(FourSeasonsParserTest, parser_test) {
     const std::filesystem::path dir_snippet =
         "external/four_seasons_snippet/recording_2020-04-07_11-33-45";
@@ -43,6 +42,7 @@ TEST(FourSeasonsParserTest, parser_test) {
     EXPECT_TRUE(std::filesystem::exists(dir_snippet));
     EXPECT_TRUE(std::filesystem::exists(dir_calibration));
 
+    std::cout << "heartbeat -1" << std::endl;
     FourSeasonsParser parser(dir_snippet, dir_calibration);
 
     // transformations test
@@ -65,17 +65,17 @@ TEST(FourSeasonsParserTest, parser_test) {
     EXPECT_DOUBLE_EQ(gnss_scale, parser.gnss_scale());
 
     // calibration test
-    const FourSeasonsParser::CameraCalibrationFisheye calibration_target =
-        detail::txt_parser_help::load_camera_calibration(dir_calibration);
-    const FourSeasonsParser::CameraCalibrationFisheye calibration = parser.camera_calibration();
-    EXPECT_DOUBLE_EQ(calibration_target.cx, calibration.cx);
-    EXPECT_DOUBLE_EQ(calibration_target.cy, calibration.cy);
-    EXPECT_DOUBLE_EQ(calibration_target.fx, calibration.fx);
-    EXPECT_DOUBLE_EQ(calibration_target.fy, calibration.fy);
-    EXPECT_DOUBLE_EQ(calibration_target.k1, calibration.k1);
-    EXPECT_DOUBLE_EQ(calibration_target.k2, calibration.k2);
-    EXPECT_DOUBLE_EQ(calibration_target.k3, calibration.k3);
-    EXPECT_DOUBLE_EQ(calibration_target.k4, calibration.k4);
+    const std::shared_ptr<CameraCalibrationFisheye> calibration_target =
+        txt_parser_help::load_camera_calibration(dir_calibration);
+    const std::shared_ptr<CameraCalibrationFisheye> calibration = parser.camera_calibration();
+    EXPECT_DOUBLE_EQ(calibration_target->cx, calibration->cx);
+    EXPECT_DOUBLE_EQ(calibration_target->cy, calibration->cy);
+    EXPECT_DOUBLE_EQ(calibration_target->fx, calibration->fx);
+    EXPECT_DOUBLE_EQ(calibration_target->fy, calibration->fy);
+    EXPECT_DOUBLE_EQ(calibration_target->k1, calibration->k1);
+    EXPECT_DOUBLE_EQ(calibration_target->k2, calibration->k2);
+    EXPECT_DOUBLE_EQ(calibration_target->k3, calibration->k3);
+    EXPECT_DOUBLE_EQ(calibration_target->k4, calibration->k4);
 
     EXPECT_NE(parser.num_images(), 0);
 
@@ -87,17 +87,16 @@ TEST(FourSeasonsParserTest, parser_test) {
     const std::filesystem::path path_img_time = dir_snippet / "times.txt";
     const std::filesystem::path path_gnss_poses = dir_snippet / "GNSSPoses.txt";
     const std::filesystem::path path_vio = dir_snippet / "result.txt";
-    const size_t min_sig_figs_time = detail::txt_parser_help::min_sig_figs_result_time(path_vio);
+    const size_t min_sig_figs_time = txt_parser_help::min_sig_figs_result_time(path_vio);
 
     // fetch data for all the fields (AS_w_from_gnss_cam, img times, result)
-    const detail::txt_parser_help::TimeDataList img_time_list =
-        detail::txt_parser_help::create_img_time_data_list(path_img_time, min_sig_figs_time);
+    const txt_parser_help::TimeDataList img_time_list =
+        txt_parser_help::create_img_time_data_list(path_img_time, min_sig_figs_time);
     EXPECT_EQ(img_time_list.size(), parser.num_images());
-    const detail::txt_parser_help::TimeDataMap gnss_poses_time_map =
-        detail::txt_parser_help::create_gnss_poses_time_data_map(path_gnss_poses,
-                                                                 min_sig_figs_time);
-    const detail::txt_parser_help::TimeDataMap vio_time_map =
-        detail::txt_parser_help::create_vio_time_data_map(path_vio, min_sig_figs_time);
+    const txt_parser_help::TimeDataMap gnss_poses_time_map =
+        txt_parser_help::create_gnss_poses_time_data_map(path_gnss_poses, min_sig_figs_time);
+    const txt_parser_help::TimeDataMap vio_time_map =
+        txt_parser_help::create_vio_time_data_map(path_vio, min_sig_figs_time);
 
     Eigen::Matrix4d scale_mat = Eigen::Matrix4d::Identity();
     scale_mat(0, 0) = scale_mat(1, 1) = scale_mat(2, 2) = parser.gnss_scale();
@@ -108,7 +107,7 @@ TEST(FourSeasonsParserTest, parser_test) {
     std::vector<double> gps_ns_delta_from_shutter;
 
     for (size_t i = 0; i < parser.num_images(); i++) {
-        const ImagePoint& img_pt = parser.get_image_point(i);
+        const ImagePointFourSeasons& img_pt = parser.get_image_point(i);
         const cv::Mat img = parser.load_image(i);
 
         // seq (time in nanoseconds) test
@@ -124,20 +123,22 @@ TEST(FourSeasonsParserTest, parser_test) {
             const std::vector<std::string>& parsed_line_gnss_poses =
                 gnss_poses_time_map.at(img_time_list[i].first);
 
-            Eigen::Vector3d gps_translation(std::stod(parsed_line_gnss_poses[static_cast<size_t>(
-                                                detail::txt_parser_help::GPSIdx::TRAN_X)]),
-                                            std::stod(parsed_line_gnss_poses[static_cast<size_t>(
-                                                detail::txt_parser_help::GPSIdx::TRAN_Y)]),
-                                            std::stod(parsed_line_gnss_poses[static_cast<size_t>(
-                                                detail::txt_parser_help::GPSIdx::TRAN_Z)]));
-            Eigen::Quaterniond gps_quat(std::stod(parsed_line_gnss_poses[static_cast<size_t>(
-                                            detail::txt_parser_help::GPSIdx::QUAT_W)]),
-                                        std::stod(parsed_line_gnss_poses[static_cast<size_t>(
-                                            detail::txt_parser_help::GPSIdx::QUAT_X)]),
-                                        std::stod(parsed_line_gnss_poses[static_cast<size_t>(
-                                            detail::txt_parser_help::GPSIdx::QUAT_Y)]),
-                                        std::stod(parsed_line_gnss_poses[static_cast<size_t>(
-                                            detail::txt_parser_help::GPSIdx::QUAT_Z)]));
+            Eigen::Vector3d gps_translation(
+                std::stod(
+                    parsed_line_gnss_poses[static_cast<size_t>(txt_parser_help::GPSIdx::TRAN_X)]),
+                std::stod(
+                    parsed_line_gnss_poses[static_cast<size_t>(txt_parser_help::GPSIdx::TRAN_Y)]),
+                std::stod(
+                    parsed_line_gnss_poses[static_cast<size_t>(txt_parser_help::GPSIdx::TRAN_Z)]));
+            Eigen::Quaterniond gps_quat(
+                std::stod(
+                    parsed_line_gnss_poses[static_cast<size_t>(txt_parser_help::GPSIdx::QUAT_W)]),
+                std::stod(
+                    parsed_line_gnss_poses[static_cast<size_t>(txt_parser_help::GPSIdx::QUAT_X)]),
+                std::stod(
+                    parsed_line_gnss_poses[static_cast<size_t>(txt_parser_help::GPSIdx::QUAT_Y)]),
+                std::stod(
+                    parsed_line_gnss_poses[static_cast<size_t>(txt_parser_help::GPSIdx::QUAT_Z)]));
             EXPECT_TRUE(liegroups::SE3(gps_quat, gps_translation)
                             .matrix()
                             .isApprox(img_pt.AS_w_from_gnss_cam->matrix()));
@@ -148,20 +149,16 @@ TEST(FourSeasonsParserTest, parser_test) {
             const std::vector<std::string>& parsed_line_gps =
                 vio_time_map.at(img_time_list[i].first);
             Eigen::Vector3d ground_truth_translation(
-                std::stod(parsed_line_gps[static_cast<size_t>(
-                    detail::txt_parser_help::ResultIdx::TRAN_X)]),
-                std::stod(parsed_line_gps[static_cast<size_t>(
-                    detail::txt_parser_help::ResultIdx::TRAN_Y)]),
-                std::stod(parsed_line_gps[static_cast<size_t>(
-                    detail::txt_parser_help::ResultIdx::TRAN_Z)]));
-            Eigen::Quaterniond ground_truth_quat(std::stod(parsed_line_gps[static_cast<size_t>(
-                                                     detail::txt_parser_help::ResultIdx::QUAT_W)]),
-                                                 std::stod(parsed_line_gps[static_cast<size_t>(
-                                                     detail::txt_parser_help::ResultIdx::QUAT_X)]),
-                                                 std::stod(parsed_line_gps[static_cast<size_t>(
-                                                     detail::txt_parser_help::ResultIdx::QUAT_Y)]),
-                                                 std::stod(parsed_line_gps[static_cast<size_t>(
-                                                     detail::txt_parser_help::ResultIdx::QUAT_Z)]));
+                std::stod(parsed_line_gps[static_cast<size_t>(txt_parser_help::ResultIdx::TRAN_X)]),
+                std::stod(parsed_line_gps[static_cast<size_t>(txt_parser_help::ResultIdx::TRAN_Y)]),
+                std::stod(
+                    parsed_line_gps[static_cast<size_t>(txt_parser_help::ResultIdx::TRAN_Z)]));
+            Eigen::Quaterniond ground_truth_quat(
+                std::stod(parsed_line_gps[static_cast<size_t>(txt_parser_help::ResultIdx::QUAT_W)]),
+                std::stod(parsed_line_gps[static_cast<size_t>(txt_parser_help::ResultIdx::QUAT_X)]),
+                std::stod(parsed_line_gps[static_cast<size_t>(txt_parser_help::ResultIdx::QUAT_Y)]),
+                std::stod(
+                    parsed_line_gps[static_cast<size_t>(txt_parser_help::ResultIdx::QUAT_Z)]));
             EXPECT_TRUE(liegroups::SE3(ground_truth_quat, ground_truth_translation)
                             .matrix()
                             .isApprox(img_pt.AS_w_from_vio_cam->matrix()));
@@ -177,8 +174,7 @@ TEST(FourSeasonsParserTest, parser_test) {
                 ECEF_from_gnss_cam * gps_from_cam.inverse();
 
             const Eigen::Vector3d& t_ECEF_from_gnss_gps = ECEF_from_gnss_gps.translation();
-            const Eigen::Vector3d gnss_gps_gcs =
-                FourSeasonsParser::gcs_from_ECEF(t_ECEF_from_gnss_gps);
+            const Eigen::Vector3d gnss_gps_gcs = gps::lla_from_ecef(t_ECEF_from_gnss_gps);
 
             const Eigen::Vector3d raw_gps_gcs(
                 img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude,
diff --git a/experimental/learn_descriptors/four_seasons_transforms.cc b/experimental/learn_descriptors/four_seasons_transforms.cc
new file mode 100644
index 000000000..c6dc25ac4
--- /dev/null
+++ b/experimental/learn_descriptors/four_seasons_transforms.cc
@@ -0,0 +1,67 @@
+#include "experimental/learn_descriptors/four_seasons_transforms.hh"
+
+#include <filesystem>
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include "Eigen/Core"
+#include "Eigen/Geometry"
+#include "common/check.hh"
+#include "common/liegroups/se3.hh"
+#include "experimental/learn_descriptors/four_seasons_parser_detail.hh"
+
+namespace robot::experimental::learn_descriptors {
+FourSeasonsTransforms::StaticTransforms::StaticTransforms(
+    const std::filesystem::path& path_transforms) {
+    std::ifstream file_transforms(path_transforms);
+    ROBOT_CHECK(file_transforms, path_transforms);
+    std::string line;
+    while (std::getline(file_transforms, line)) {
+        if (line.find("transform_S_AS") != std::string::npos) {
+            std::getline(file_transforms, line);
+            S_from_AS = get_transform_from_line(line);
+        } else if (line.find("TS_cam_imu") != std::string::npos) {
+            std::getline(file_transforms, line);
+            cam_from_imu = get_transform_from_line(line);
+        } else if (line.find("transform_w_gpsw") != std::string::npos) {
+            std::getline(file_transforms, line);
+            w_from_gpsw = get_transform_from_line(line);
+        } else if (line.find("transform_gps_imu") != std::string::npos) {
+            std::getline(file_transforms, line);
+            gps_from_imu = get_transform_from_line(line);
+        } else if (line.find("transform_e_gpsw") != std::string::npos) {
+            std::getline(file_transforms, line);
+            e_from_gpsw = get_transform_from_line(line);
+        } else if (line.find("GNSS scale") != std::string::npos) {
+            std::getline(file_transforms, line);
+            gnss_scale = std::stod(line);
+        }
+    }
+}
+
+liegroups::SE3 FourSeasonsTransforms::StaticTransforms::get_transform_from_line(
+    const std::string& line) {
+    enum TransformEntry { T_X, T_Y, T_Z, Q_X, Q_Y, Q_Z, Q_W };
+    std::vector<std::string> parsed_transform_line =
+        detail::four_seasons_parser::txt_parser_help::parse_line_adv(line, ",");
+    if (parsed_transform_line.size() < 7) {
+        std::stringstream error_stream;
+        error_stream << "parsed_transform_line doesn't have sufficient entries for "
+                        "transform! parsed_transform_line.size(): "
+                     << parsed_transform_line.size() << std::endl;
+        throw std::runtime_error(error_stream.str());
+    }
+    std::vector<double> transform_nums;
+    for (const std::string& num : parsed_transform_line) {
+        transform_nums.push_back(static_cast<double>(std::stod(num)));
+    }
+    Eigen::Vector3d translation(transform_nums[TransformEntry::T_X],
+                                transform_nums[TransformEntry::T_Y],
+                                transform_nums[TransformEntry::T_Z]);
+    Eigen::Quaterniond rotation(
+        transform_nums[TransformEntry::Q_W], transform_nums[TransformEntry::Q_X],
+        transform_nums[TransformEntry::Q_Y], transform_nums[TransformEntry::Q_Z]);
+    return liegroups::SE3(rotation, translation);
+}
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/four_seasons_transforms.hh b/experimental/learn_descriptors/four_seasons_transforms.hh
new file mode 100644
index 000000000..9e8ab1bde
--- /dev/null
+++ b/experimental/learn_descriptors/four_seasons_transforms.hh
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <filesystem>
+#include <string>
+
+#include "common/liegroups/se3.hh"
+
+namespace robot::experimental::learn_descriptors {
+struct FourSeasonsTransforms {
+    struct StaticTransforms {
+        liegroups::SE3 S_from_AS;
+        liegroups::SE3 cam_from_imu;
+        liegroups::SE3 w_from_gpsw;
+        liegroups::SE3 gps_from_imu;
+        liegroups::SE3 e_from_gpsw;
+        double gnss_scale;  // scale from vio frame to gnss frame. WARNING: will require retooling
+                            // if the scales per
+        // keyframe (pose) are not all one value. See more here:
+        // https://github.com/pmwenzel/4seasons-dataset
+
+        StaticTransforms(const std::filesystem::path& path_transforms);
+
+       private:
+        static liegroups::SE3 get_transform_from_line(const std::string& line);
+    };
+    // might consider defining a nested "DynamicTransforms" or "PerImageTransforms" at some point -
+    // Nico
+};
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frame.hh b/experimental/learn_descriptors/frame.hh
index 5668da713..322a823ac 100644
--- a/experimental/learn_descriptors/frame.hh
+++ b/experimental/learn_descriptors/frame.hh
@@ -2,19 +2,15 @@
 #include <optional>
 
 #include "experimental/learn_descriptors/frontend_definitions.hh"
+#include "gtsam/base/Matrix.h"
 #include "gtsam/geometry/Cal3_S2.h"
 #include "gtsam/geometry/Pose3.h"
+#include "gtsam/geometry/Rot3.h"
 #include "opencv2/opencv.hpp"
 
 namespace robot::experimental::learn_descriptors {
 class Frame {
    public:
-    Frame(const FrameId& id, const cv::Mat& img_undistorted, const gtsam::Cal3_S2::shared_ptr& K,
-          const gtsam::Pose3& T_wrld_grnd_truth)
-        : id_(id), img_(img_undistorted), K_(K), groundtruth_(T_wrld_grnd_truth) {}
-    Frame(const FrameId& id, const cv::Mat& img_undistorted, const gtsam::Cal3_S2::shared_ptr& K)
-        : id_(id), img_(img_undistorted), K_(K) {}
-
     void add_keypoints(const KeypointsCV& kpts);
     void assign_descriptors(const cv::Mat& descriptors);
 
@@ -22,10 +18,13 @@ class Frame {
     const cv::Mat get_descriptors() { return descriptors_; };
 
     const FrameId id_;
-    const cv::Mat img_;
+    const cv::Mat undistorted_img_;
     gtsam::Cal3_S2::shared_ptr K_;
     KeypointsCV kpts_;
     cv::Mat descriptors_;
-    std::optional<gtsam::Pose3> groundtruth_;
+    std::optional<gtsam::Pose3> world_from_cam_groundtruth_;
+    std::optional<gtsam::Point3> cam_in_world_initial_guess_;
+    std::optional<gtsam::Matrix3> translation_covariance_in_cam_;
+    std::optional<gtsam::Rot3> world_from_cam_initial_guess_;
 };
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc
index 194122420..dfe24fa1d 100644
--- a/experimental/learn_descriptors/frontend.cc
+++ b/experimental/learn_descriptors/frontend.cc
@@ -1,6 +1,188 @@
 #include "experimental/learn_descriptors/frontend.hh"
 
+#include <cstddef>
+#include <optional>
 #include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "Eigen/Core"
+#include "Eigen/Geometry"
+#include "common/check.hh"
+#include "experimental/learn_descriptors/frame.hh"
+#include "experimental/learn_descriptors/image_point.hh"
+#include "experimental/learn_descriptors/structure_from_motion_types.hh"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "opencv2/opencv.hpp"
+
+namespace detail_sfm {
+
+std::optional<gtsam::Point3> attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
+                                                 const std::vector<gtsam::Point2> &cam_obs,
+                                                 gtsam::Cal3_S2::shared_ptr K,
+                                                 const double max_reproj_error = 2.0,
+                                                 const bool verbose = true) {
+    gtsam::Point3 p_lmk_in_world;
+    if (cam_poses.size() >= 2) {
+        try {
+            // Attempt triangulation using DLT (or the GTSAM provided method)
+            p_lmk_in_world = gtsam::triangulatePoint3(
+                cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end()));
+
+        } catch (const gtsam::TriangulationCheiralityException &e) {
+            // Handle the exception gracefully by logging and retaining the previous
+            // estimate.
+            if (verbose)
+                std::cerr << "attempt_triangulation failed. Likely cheirality exception: "
+                          << e.what() << ". Discarding involved keypoints." << std::endl;
+        }
+    }
+    // Optional: perform an explicit cheirality check
+    for (const auto &pose : cam_poses) {
+        // Transform point to the camera coordinate system.
+        // transformTo() converts a world point to the camera frame.
+        gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world);
+        if (p_cam_lmk.z() <= 0) {  // Check that the depth is positive
+            return std::nullopt;
+        }
+    }
+
+    // Cheirality & reprojection checks
+    for (size_t i = 0; i < cam_poses.size(); ++i) {
+        const auto &pose = cam_poses[i];
+        // Cheirality
+        gtsam::Point3 p_cam = pose.transformTo(p_lmk_in_world);
+        if (p_cam.z() <= 0) {
+            if (verbose) {
+                std::cerr << "[triangulate] point behind camera " << i << " (z=" << p_cam.z()
+                          << ")\n";
+            }
+            return std::nullopt;
+        }
+        // Reprojection error
+        if (max_reproj_error > 0) {
+            gtsam::PinholeCamera<gtsam::Cal3_S2> cam(pose, *K);
+            const auto reproj = cam.project(p_lmk_in_world);
+            const double err = (reproj - cam_obs[i]).norm();
+            if (err > max_reproj_error) {
+                if (verbose) {
+                    std::cerr << "[triangulate] reprojection error too large on view " << i << " ("
+                              << err << " px)\n";
+                }
+                return std::nullopt;
+            }
+        }
+    }
+
+    return p_lmk_in_world;
+}
+
+void graph_values(const gtsam::Values &values, const std::string &window_name,
+                  const std::vector<gtsam::Symbol> &symbols_pose,
+                  const std::vector<gtsam::Symbol> &symbols_landmarks) {
+    std::vector<robot::geometry::VizPose> final_poses;
+    std::vector<robot::geometry::VizPoint> final_lmks;
+    for (const gtsam::Symbol &symbol_pose : symbols_pose) {
+        final_poses.emplace_back(Eigen::Isometry3d(values.at<gtsam::Pose3>(symbol_pose).matrix()),
+                                 symbol_pose.string());
+    }
+    for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) {
+        if (!values.exists(symbol_lmk)) {
+            std::cout << "WTF " << symbol_lmk << std::endl;
+        }
+        final_lmks.emplace_back(values.at<gtsam::Point3>(symbol_lmk), symbol_lmk.string());
+    }
+    std::cout << "About to viz gtsam::Values with " << values.size() << " variables." << std::endl;
+    robot::geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true,
+                               window_name);
+}
+
+gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gtsam::Values &values,
+                             const std::vector<gtsam::Symbol> &symbols_pose,
+                             const std::vector<gtsam::Symbol> &symbols_landmarks,
+                             const int num_epochs = 5, bool viz_itr = false) {
+    gtsam::LevenbergMarquardtParams params;
+    params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
+    params.maxIterations = 1;
+    gtsam::LevenbergMarquardtOptimizer optimizer(graph, values, params);
+
+    double prev_error = optimizer.error();
+    typedef int epoch;
+    std::function<void(const gtsam::Values &, const epoch, const std::vector<gtsam::Symbol> &,
+                       const std::vector<gtsam::Symbol> &)>
+        graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter,
+                                   const std::vector<gtsam::Symbol> &symbols_pose,
+                                   const std::vector<gtsam::Symbol> &symbols_landmarks) {
+            std::cout << "iteration " << iter << " complete!";
+            std::string window_name = "Iteration_" + std::to_string(iter);
+            graph_values(vals, window_name, symbols_pose, symbols_landmarks);
+        };
+
+    for (int i = 0; i < num_epochs; i++) {
+        optimizer.iterate();
+        double curr_error = optimizer.error();
+
+        if (viz_itr) {
+            graph_itr_debug_func(optimizer.values(), i, symbols_pose, symbols_landmarks);
+        }
+        if (std::abs(prev_error - curr_error) < 1e-6) {
+            std::cout << "Converged at iteration " << i << "\n";
+            break;
+        }
+    }
+    return optimizer.values();
+}
+
+gtsam::Pose3 averagePoses(const std::vector<gtsam::Pose3> &poses, int maxIter = 10) {
+    if (poses.empty()) throw std::runtime_error("Empty pose vector");
+
+    gtsam::Pose3 mean = poses[0];
+
+    for (int iter = 0; iter < maxIter; ++iter) {
+        gtsam::Vector6 total = gtsam::Vector6::Zero();
+
+        for (const auto &pose : poses) {
+            gtsam::Pose3 delta = mean.between(pose);
+            total += gtsam::Pose3::Logmap(delta);
+        }
+
+        total /= poses.size();
+        mean = mean.compose(gtsam::Pose3::Expmap(total));
+    }
+
+    return mean;
+}
+
+gtsam::Point3 averagePoints(const std::vector<gtsam::Point3> &points) {
+    if (points.empty()) throw std::runtime_error("Empty point vector");
+    gtsam::Point3 sum(0, 0, 0);
+    for (const auto &pt : points) sum += pt;
+    return sum / points.size();
+}
+
+gtsam::Point3 get_variance(const std::vector<gtsam::Point3> &points) {
+    const gtsam::Point3 mean = averagePoints(points);
+    gtsam::Point3 var(0, 0, 0);
+    for (const gtsam::Point3 &pt : points) {
+        var += (mean - pt).array().square().matrix();
+    }
+    return var / points.size();
+}
+
+double rotation_error(const Eigen::Isometry3d &T_est, const Eigen::Isometry3d &T_gt) {
+    Eigen::Matrix3d R_err = T_gt.rotation().transpose() * T_est.rotation();
+
+    // 2. Compute trace and clamp to [-1,1] for numerical safety
+    double tr = R_err.trace();
+    double cos_theta = std::min(1.0, std::max(-1.0, (tr - 1.0) / 2.0));
+
+    // 3. Recover angle (in radians)
+    return std::acos(cos_theta);
+}
+
+// const std::vector<double> get_absolute_trajectory_error(const std::vector<Eigen::Vector3d> &)
+// {}
+}  // namespace detail_sfm
 
 namespace robot::experimental::learn_descriptors {
 
@@ -36,94 +218,152 @@ Frontend::Frontend(FrontendParams params_) : params_(params_) {
 }
 
 void Frontend::populate_frames() {
-    for (const cv::Mat &img : images_) {
+    FrameId id = frames_.size();
+    for (const ImageAndPoint &img_and_pt : image_and_points_) {
+        const ImagePoint &img_pt = img_and_pt.img_pt;
+        cv::Mat &img_undistorted;
+        cv::fisheye::undistortImage(img_and_pt.image_distorted, img_undistorted, img_pt.K->K_mat(),
+                                    img_pt.K->D_mat(), img_pt.K->K_mat(), img.size());
+        auto [kpts, features] = extract_feaures(img_undistorted);
+
+        gtsam::Cal3_S2::shared_ptr K = boost::make_shared<gtsam::Cal3_S2>(
+            img_pt.K->fx, img_pt.K->fy, 0, img_pt.K->cx, img_pt.K->cy);
+
+        Frame frame{id, img_undistorted, K, kpts, features};
+        const std::optional<Eigen::Isometry3d> maybe_world_from_cam_grnd_trth =
+            img_pt.world_from_cam_ground_truth();
+        if (maybe_world_from_cam_grnd_trth) {
+            frame.world_from_cam_ground_truth =
+                gtsam::Pose3(maybe_world_from_cam_grnd_trth->matrix());
+        }
+        const std::optional<Eigen::Vector3d> maybe_cam_in_world = img_pt.cam_in_world();
+        if (maybe_cam_in_world) {
+            frame.cam_in_world_initial_guess = *maybe_cam_in_world;
+        }
+        const std::optional<gtsam::Matrix3> maybe_translation_covariance_in_cam =
+            img_pt.translation_covariance_in_cam();
+        if (maybe_translation_covariance_in_cam) {
+            frame.translation_covariance_in_cam = *maybe_translation_covariance_in_cam;
+        }
+        frames_.push_back(frame);
+        id++;
     }
 }
 
 void Frontend::match_frames_and_build_tracks() {
     if (params_.exhaustive) {
-        for (size_t i = 0; i < indices.size() - 1; i += 4) {
-            // std::cout << "i: " << i << std::endl;
-            for (size_t j = i + 1; j < indices.size(); j++) {
-                // std::cout << "j: " << j << std::endl;
+        for (size_t i = 0; i < frames_.size() - 1; i++) {
+            for (size_t j = i + 1; j < frames_.size(); j++) {
                 std::vector<cv::DMatch> matches = frontend.compute_matches(
-                    frames[i].get_descriptors(), frames[j].get_descriptors());
-                // DIAL TO MESS WITH
+                    frames_[i].get_descriptors(), frames_[j].get_descriptors());
                 frontend.enforce_bijective_buffer_matches(matches);
-
                 if (matches.size() < 5) {
                     continue;
                 }
 
                 std::vector<cv::KeyPoint> cv_kpts_1;
                 std::vector<cv::KeyPoint> cv_kpts_2;
-                for (const KeypointCV &kpt : frames[i].get_keypoints()) {
+                for (const KeypointCV &kpt : frames_[i].get_keypoints()) {
                     cv::KeyPoint cv_kpt;
                     cv_kpt.pt = kpt;
                     cv_kpts_1.push_back(cv_kpt);
                 }
-                for (const KeypointCV &kpt : frames[j].get_keypoints()) {
+                for (const KeypointCV &kpt : frames_[j].get_keypoints()) {
                     cv::KeyPoint cv_kpt;
                     cv_kpt.pt = kpt;
                     cv_kpts_2.push_back(cv_kpt);
                 }
-                Eigen::Isometry3d world_from_camj(id_to_initial_world_from_cam.at(j).matrix());
-                // std::cout << "heartbeat" << std::endl;
                 std::optional<Eigen::Isometry3d> scale_cam0_from_cam1 =
                     robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat);
                 if (!scale_cam0_from_cam1) {
                     continue;
                 }
-                world_from_camj.linear() =
-                    (Eigen::Isometry3d(id_to_initial_world_from_cam.at(i).matrix()) *
-                     *scale_cam0_from_cam1)
-                        .linear();
-                // std::cout << "heartbeat 2" << std::endl;
-                id_to_initial_world_from_cam.at(j) = gtsam::Pose3(world_from_camj.matrix());
+                ROBOT_CHECK(frames_[i].world_from_cam_initial_guess_,
+                            "This rotation should be populated.");
+                // this could use some work to verify the quality of the output, particularly inside
+                // of estiamte_cam0_from_cam1
+                // also, at the moment I am not accounting for any covariance between the gps
+                // measurement and the unit translation vector from estimate_cam0_from_cam1
+
+                // can try averaging poses here as well
+                frames[j].world_from_cam_initial_guess_.emplace(
+                    frames_[i].world_from_cam_initial_guess_->matrix() *
+                    scale_cam0_from_cam1->matrix());
                 for (const cv::DMatch match : matches) {
-                    const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
-                    const KeypointCV kpt_cam1 = frames[j].get_keypoints()[match.trainIdx];
-
-                    auto key = std::make_pair(frames[i].id_, kpt_cam0);
-                    if (lmk_id_map.find(key) != lmk_id_map.end()) {
-                        auto id = lmk_id_map.at(key);
-                        feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0);
-                        feature_tracks.at(id).obs_.emplace_back(frames[j].id_, kpt_cam1);
+                    const KeypointCV kpt_cam0 = frames_[i].get_keypoints()[match.queryIdx];
+                    const KeypointCV kpt_cam1 = frames_[j].get_keypoints()[match.trainIdx];
+
+                    auto key = std::make_pair(frames_[i].id_, kpt_cam0);
+                    if (lmk_id_map_.find(key) != lmk_id_map_.end()) {
+                        const size_t id = lmk_id_map_.at(key);
+                        ROBOT_CHECK(id < feature_tracks_.size(),
+                                    "lmk_id_map_ id's shuold not exceed feature_tracks_ size!");
+                        feature_tracks_[id].obs_.emplace_back(frames_[i].id_, kpt_cam0);
+                        feature_tracks_[id].obs_.emplace_back(frames_[j].id_, kpt_cam1);
                     } else {
                         FeatureTrack feature_track(i, kpt_cam0);
-                        feature_track.obs_.emplace_back(frames[j].id_, kpt_cam1);
-                        feature_tracks.emplace(lmk_id, feature_track);
-                        lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id);
-                        lmk_id++;
+                        feature_track.obs_.emplace_back(frames_[j].id_, kpt_cam1);
+                        feature_tracks_.push_back(feature_track);
+                        lmk_id_map_.emplace(std::make_pair(frames_[i].id_, kpt_cam0),
+                                            feature_tracks_.size() - 1);
                     }
                 }
             }
         }
-        std::cout << "done processing matches" << std::endl;
     } else {  // successive only
         for (size_t i = 0; i < indices.size() - 1; i++) {
             std::vector<cv::DMatch> matches = frontend.compute_matches(
-                frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+                frames_[i].get_descriptors(), frames_[i + 1].get_descriptors());
             frontend.enforce_bijective_buffer_matches(matches);
+            if (matches.size() < 5) {
+                continue;
+            }
+
+            std::vector<cv::KeyPoint> cv_kpts_1;
+            std::vector<cv::KeyPoint> cv_kpts_2;
+            for (const KeypointCV &kpt : frames_[i].get_keypoints()) {
+                cv::KeyPoint cv_kpt;
+                cv_kpt.pt = kpt;
+                cv_kpts_1.push_back(cv_kpt);
+            }
+            for (const KeypointCV &kpt : frames_[i + 1].get_keypoints()) {
+                cv::KeyPoint cv_kpt;
+                cv_kpt.pt = kpt;
+                cv_kpts_2.push_back(cv_kpt);
+            }
+            std::optional<Eigen::Isometry3d> scale_cam0_from_cam1 =
+                robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat);
+            if (!scale_cam0_from_cam1) {
+                continue;
+            }
+            ROBOT_CHECK(frames_[i].world_from_cam_initial_guess_,
+                        "This rotation should be populated.");
+            frames_[i + 1].world_from_cam_initial_guess_.emplace(
+                frames_[i].world_from_cam_initial_guess_->matrix() *
+                scale_cam0_from_cam1->matrix());
+
             for (const cv::DMatch match : matches) {
-                const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
-                const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
-
-                auto key = std::make_pair(frames[i].id_, kpt_cam0);
-                if (lmk_id_map.find(key) != lmk_id_map.end()) {
-                    auto id = lmk_id_map.at(key);
-                    feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0);
-                    feature_tracks.at(id).obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
+                const KeypointCV kpt_cam0 = frames_[i].get_keypoints()[match.queryIdx];
+                const KeypointCV kpt_cam1 = frames_[i + 1].get_keypoints()[match.trainIdx];
+
+                auto key = std::make_pair(frames_[i].id_, kpt_cam0);
+                if (lmk_id_map_.find(key) != lmk_id_map_.end()) {
+                    const size_t id = lmk_id_map_.at(key);
+                    ROBOT_CHECK(id < feature_tracks_.size(),
+                                "lmk_id_map_ id's shuold not exceed feature_tracks_ size!");
+                    feature_tracks_[id].obs_.emplace_back(frames_[i].id_, kpt_cam0);
+                    feature_tracks_[id].obs_.emplace_back(frames_[i + 1].id_, kpt_cam1);
                 } else {
                     FeatureTrack feature_track(i, kpt_cam0);
-                    feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
-                    feature_tracks.emplace(lmk_id, feature_track);
-                    lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id);
-                    lmk_id++;
+                    feature_track.obs_.emplace_back(frames_[i + 1].id_, kpt_cam1);
+                    feature_tracks_.emplace(lmk_id, feature_track);
+                    lmk_id_map_.emplace(std::make_pair(frames_[i].id_, kpt_cam0),
+                                        feature_tracks_.size() - 1);
                 }
             }
         }
     }
+    std::cout << "done processing matches" << std::endl;
 }
 
 std::pair<std::vector<cv::KeyPoint>, cv::Mat> Frontend::extract_features(const cv::Mat &img) const {
diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh
index 85b112c66..d71338936 100644
--- a/experimental/learn_descriptors/frontend.hh
+++ b/experimental/learn_descriptors/frontend.hh
@@ -5,6 +5,7 @@
 
 #include "experimental/learn_descriptors/frame.hh"
 #include "experimental/learn_descriptors/frontend_definitions.hh"
+#include "experimental/learn_descriptors/image_point.hh"
 #include "experimental/learn_descriptors/structure_from_motion_types.hh"
 #include "opencv2/opencv.hpp"
 
@@ -17,6 +18,10 @@ struct FrontendParams {
     bool exhaustive = true;
     bool incremental = false;  // a.k.a. images are successive
 };
+struct ImageAndPoint {
+    cv::Mat image_distorted;
+    ImagePoint img_pt;
+};
 class Frontend {
    public:
     static constexpr char symbol_pose_char = 'x';
@@ -27,15 +32,18 @@ class Frontend {
     void populate_frames();
     void match_frames_and_build_tracks();
 
-    void add_image(const cv::Mat &img) { images_.push_back(img); };
-    void add_images(const std::vector<cv::Mat> &imgs) {
-        for (const cv::Mat &img : imgs) {
-            images_.push_back(img);
+    void add_image(const ImageAndPoint &img_and_pt) { images_and_points_.push_back(img_and_pt); };
+    void add_images(const std::vector<ImageAndPoint> &img_and_pts) {
+        for (const ImageAndPoint &img_and_pt : img_and_pts) {
+            images_and_points_.push_back(img_and_pt);
         }
     };
 
     const FrontendParams::ExtractorType &extractor_type() const { return params_.extractor_type; };
     const FrontendParams::MatcherType &matcher_type() const { return params_.matcher_type; };
+    const FeatureTracks &feature_tracks() const { return feature_tracks_; };
+    const FrameLandmarkIdMap &frame_landmark_id_map() const { return lmk_id_map_; };
+    const std::vector<Frame> &frames() const { return frames_; };
 
     std::pair<std::vector<cv::KeyPoint>, cv::Mat> extract_features(const cv::Mat &img) const;
     std::vector<cv::DMatch> compute_matches(const cv::Mat &descriptors1,
@@ -66,7 +74,7 @@ class Frontend {
     cv::Ptr<cv::Feature2D> feature_extractor_;
     cv::Ptr<cv::DescriptorMatcher> descriptor_matcher_;
 
-    std::vector<cv::Mat> images_;
+    std::vector<ImageAndPoint> images_and_points_;
     FeatureTracks feature_tracks_;
     FrameLandmarkIdMap lmk_id_map_;
     std::vector<Frame> frames_;
diff --git a/experimental/learn_descriptors/frontend_definitions.hh b/experimental/learn_descriptors/frontend_definitions.hh
index 2965d6c97..e2822ef42 100644
--- a/experimental/learn_descriptors/frontend_definitions.hh
+++ b/experimental/learn_descriptors/frontend_definitions.hh
@@ -36,6 +36,6 @@ class FeatureTrack {
     }
 };
 
-using FeatureTracks = std::unordered_map<LandmarkId, FeatureTrack>;
+using FeatureTracks = std::vector<FeatureTrack>;  // each idx is the FeatureTracks's LandmarkId
 using FrameLandmarkIdMap = std::unordered_map<std::pair<FrameId, KeypointCV>, LandmarkId>;
 }  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/gps_data.cc b/experimental/learn_descriptors/gps_data.cc
new file mode 100644
index 000000000..e69de29bb
diff --git a/experimental/learn_descriptors/gps_data.hh b/experimental/learn_descriptors/gps_data.hh
new file mode 100644
index 000000000..13b032c25
--- /dev/null
+++ b/experimental/learn_descriptors/gps_data.hh
@@ -0,0 +1,65 @@
+#pragma once
+
+#include <cmath>
+#include <cstddef>
+#include <optional>
+
+#include "Eigen/Core"
+
+namespace robot::experimental::learn_descriptors {
+struct GPSData {
+    struct Uncertainty {
+        double sigma_lat_mitude;
+        double sigma_longitude;
+        double sigma_altitude;
+        double orientation_deg;
+        double rms_range_error_m;
+
+        // diagonal latitude, longitude, altitude covariance in meters squared
+        Eigen::Matrix3d to_LLA_covariance() const {
+            Eigen::Matrix3d cov = Eigen::Matrix3d::Zero();
+            cov(0, 0) = sigma_lat_mitude * sigma_lat_mitude;
+            cov(1, 1) = sigma_longitude * sigma_longitude;
+            cov(2, 2) = sigma_altitude * sigma_altitude;
+            return cov;
+        }
+
+        // Convert lat/lon covariance to ENU at given latitude
+        Eigen::Matrix3d to_ENU_covariance(double lat_deg) const {
+            double lat_rad = lat_deg * M_PI / 180.0;
+
+            // meters per degree scaling
+            double kLat =
+                111132.92 - 559.82 * std::cos(2 * lat_rad) + 1.175 * std::cos(4 * lat_rad);
+            double kLon = 111412.84 * std::cos(lat_rad) - 93.5 * std::cos(3 * lat_rad);
+
+            Eigen::Matrix3d lla_cov = to_LLA_covariance();
+
+            // rotate lat/lon covariance (upper-left 2x2) using orientation
+            double theta = orientation_deg * M_PI / 180.0;
+            Eigen::Matrix2d R;
+            R << std::cos(theta), -std::sin(theta), std::sin(theta), std::cos(theta);
+
+            Eigen::Matrix2d D = lla_cov.topLeftCorner<2, 2>();
+            Eigen::Matrix2d rotated_latlon_cov = R * D * R.transpose();
+
+            // apply linear scaling to convert degrees to meters in EN
+            Eigen::Matrix2d J;
+            J << kLat, 0, 0, kLon;
+
+            Eigen::Matrix2d en_cov = J * rotated_latlon_cov * J.transpose();
+
+            Eigen::Matrix3d enu_cov = Eigen::Matrix3d::Zero();
+            enu_cov.topLeftCorner<2, 2>() = en_cov;
+            enu_cov(2, 2) = lla_cov(2, 2);  // alt variance unchanged
+
+            return enu_cov;
+        }
+    };
+    size_t seq;  // time in nanoseconds, may differ from image capturetime
+    double latitude;
+    double longitude;
+    std::optional<double> altitude;  // meters above sea level
+    std::optional<Uncertainty> uncertainty;
+};
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/image_point.hh b/experimental/learn_descriptors/image_point.hh
index c46708d17..2fbaf52cb 100644
--- a/experimental/learn_descriptors/image_point.hh
+++ b/experimental/learn_descriptors/image_point.hh
@@ -2,132 +2,65 @@
 
 #include <cstddef>
 #include <filesystem>
-#include <opencv2/opencv.hpp>
+#include <memory>
 #include <optional>
 #include <sstream>
 #include <string>
 
 #include "Eigen/Core"
 #include "Eigen/Geometry"
+#include "camera_calibration.hh"
 #include "common/liegroups/se3.hh"
-#include "common/time/robot_time.hh"
+#include "experimental/learn_descriptors/camera_calibration.hh"
 
 namespace robot::experimental::learn_descriptors {
 struct ImagePoint {
-    struct GPSData {
-        struct Uncertainty {
-            double sigma_lat_mitude;
-            double sigma_longitude;
-            double sigma_altitude;
-            double orientation_deg;
-            double rms_range_error_m;
+    virtual ~ImagePoint() = default;
 
-            // diagonal latitude, longitude, altitude covariance in meters squared
-            Eigen::Matrix3d to_LLA_covariance() const {
-                Eigen::Matrix3d cov = Eigen::Matrix3d::Zero();
-                cov(0, 0) = sigma_lat_mitude * sigma_lat_mitude;
-                cov(1, 1) = sigma_longitude * sigma_longitude;
-                cov(2, 2) = sigma_altitude * sigma_altitude;
-                return cov;
-            }
-
-            // Convert lat/lon covariance to ENU at given latitude
-            Eigen::Matrix3d to_ENU_covariance(double lat_deg) const {
-                double lat_rad = lat_deg * M_PI / 180.0;
-
-                // meters per degree scaling
-                double kLat =
-                    111132.92 - 559.82 * std::cos(2 * lat_rad) + 1.175 * std::cos(4 * lat_rad);
-                double kLon = 111412.84 * std::cos(lat_rad) - 93.5 * std::cos(3 * lat_rad);
-
-                Eigen::Matrix3d lla_cov = to_LLA_covariance();
-
-                // rotate lat/lon covariance (upper-left 2x2) using orientation
-                double theta = orientation_deg * M_PI / 180.0;
-                Eigen::Matrix2d R;
-                R << std::cos(theta), -std::sin(theta), std::sin(theta), std::cos(theta);
-
-                Eigen::Matrix2d D = lla_cov.topLeftCorner<2, 2>();
-                Eigen::Matrix2d rotated_latlon_cov = R * D * R.transpose();
-
-                // apply linear scaling to convert degrees to meters in EN
-                Eigen::Matrix2d J;
-                J << kLat, 0, 0, kLon;
-
-                Eigen::Matrix2d en_cov = J * rotated_latlon_cov * J.transpose();
-
-                Eigen::Matrix3d enu_cov = Eigen::Matrix3d::Zero();
-                enu_cov.topLeftCorner<2, 2>() = en_cov;
-                enu_cov(2, 2) = lla_cov(2, 2);  // alt variance unchanged
-
-                return enu_cov;
-            }
-        };
-        size_t seq;  // time in nanoseconds, may differ from image capturetime
-        double latitude;
-        double longitude;
-        std::optional<double> altitude;  // meters above sea level
-        std::optional<Uncertainty> uncertainty;
+    size_t id;
+    std::shared_ptr<CameraCalibrationFisheye> K;
+    virtual std::optional<Eigen::Isometry3d> world_from_cam_ground_truth() const {
+        return std::nullopt;
     };
-    size_t id;   // idx for DB
-    size_t seq;  // time in nanoseconds of image capture (also the name of image)
-    std::optional<liegroups::SE3>
-        AS_w_from_gnss_cam;  // globally opimized arbitrary scale visual world (gnss + VIO + loop
-                             // closure + etc.) from cam
-    std::optional<liegroups::SE3> AS_w_from_vio_cam;  // arbitrary scale world from vio result cam
-    std::optional<GPSData> gps_gcs;  // raw gps measurement in gcs (global cordinate system)
-
-    const std::string to_string() const {
-        auto se3_to_str = [](const liegroups::SE3& se3) {
-            const Eigen::Vector3d& t = se3.translation();
-            const Eigen::Quaterniond& r = se3.so3().unit_quaternion();
+    virtual std::optional<Eigen::Vector3d> cam_in_world() const { return std::nullopt; };
+    virtual std::optional<Eigen::Matrix3d> translation_covariance_in_cam() const {
+        return std::nullopt;
+    };
+    std::string to_string() const {
+        auto vec3d_to_str = [](const Eigen::Vector3d& vec3d) {
+            std::stringstream ss;
+            ss << "[" << vec3d.x() << ", " << vec3d.y() << ", " << vec3d.z() << "]";
+            return ss.str();
+        };
+        auto isom3d_to_str = [&vec3d_to_str](const Eigen::Isometry3d& isom3d) {
+            const Eigen::Vector3d& t = isom3d.translation();
+            const Eigen::Quaterniond r(isom3d.rotation());
             std::stringstream ss;
-            ss << "\t\tt: [" << t.x() << ", " << t.y() << ", " << t.z() << "]\n";
+            ss << "\t\tt: " << vec3d_to_str(t) << "\n";
             ss << "\t\tq: [" << r.w() << ", " << r.x() << ", " << r.y() << ", " << r.z() << "]";
             return ss.str();
         };
         std::stringstream ss;
         ss << "Image Point " << id << ":\n";
-        ss << "\tseq: " << seq << "\n";
-        ss << "\tAS_w_from_gnss_cam: ";
-        if (AS_w_from_gnss_cam) {
-            ss << "\n" << se3_to_str(*AS_w_from_gnss_cam);
+        ss << "\tworld_from_cam_ground_truth: ";
+        if (world_from_cam_ground_truth()) {
+            ss << "\n" << isom3d_to_str(*world_from_cam_ground_truth());
         } else {
             ss << "N/A";
         }
-        ss << "\n\tAS_w_from_vio_cam: ";
-        if (AS_w_from_vio_cam) {
-            ss << "\n" << se3_to_str(*AS_w_from_vio_cam);
+        ss << "\n\tcam_in_world: ";
+        if (cam_in_world()) {
+            ss << vec3d_to_str(*cam_in_world());
         } else {
             ss << "N/A";
         }
-        ss << "\n\tgps_gcs: ";
-        if (gps_gcs) {
-            ss << "\n\t\tseq: " << gps_gcs->seq;
-            ss << "\n\t\t" << gps_gcs->latitude << "\t" << gps_gcs->longitude << "\t";
-            if (gps_gcs->altitude) {
-                ss << *(gps_gcs->altitude);
-            } else {
-                ss << "alt N/A";
-            }
-            ss << "\n\t\tsigma: ";
-            if (gps_gcs->uncertainty) {
-                ss << gps_gcs->uncertainty->sigma_lat_mitude << "\t"
-                   << gps_gcs->uncertainty->sigma_longitude << "\t"
-                   << gps_gcs->uncertainty->sigma_altitude;
-            } else {
-                ss << "N/A";
-            }
+        ss << "\n\ttranslation_covariance_in_cam: ";
+        if (translation_covariance_in_cam()) {
+            ss << *translation_covariance_in_cam();
         } else {
             ss << "N/A";
         }
         return ss.str();
     }
-
-    cv::Mat load_image(const std::filesystem::path& img_dir) const {
-        const std::filesystem::path path(img_dir / (std::to_string(seq) + ".png"));
-        return cv::imread(path);
-    }
 };
-typedef std::vector<ImagePoint> ImagePointVector;
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/image_point_four_seasons.cc b/experimental/learn_descriptors/image_point_four_seasons.cc
new file mode 100644
index 000000000..73f531d93
--- /dev/null
+++ b/experimental/learn_descriptors/image_point_four_seasons.cc
@@ -0,0 +1,49 @@
+#include "experimental/learn_descriptors/image_point_four_seasons.hh"
+
+#include <optional>
+
+#include "Eigen/Core"
+#include "Eigen/Geometry"
+#include "common/gps/frame_translation.hh"
+#include "common/liegroups/se3.hh"
+#include "experimental/learn_descriptors/four_seasons_transforms.hh"
+
+namespace robot::experimental::learn_descriptors {
+ImagePointFourSeasons::~ImagePointFourSeasons() = default;
+
+std::optional<Eigen::Isometry3d> ImagePointFourSeasons::world_from_cam_ground_truth() const {
+    if (!AS_w_from_gnss_cam) return std::nullopt;
+    Eigen::Matrix4d scale_mat_reference = Eigen::Matrix4d::Identity();
+    scale_mat_reference(0, 0) = scale_mat_reference(1, 1) = scale_mat_reference(2, 2) =
+        shared_static_transforms->gnss_scale;
+    Eigen::Isometry3d world_from_cam_ground_truth(
+        (shared_static_transforms->S_from_AS.matrix() * scale_mat_reference *
+         AS_w_from_gnss_cam->matrix())
+            .matrix());
+    return world_from_cam_ground_truth;
+}
+
+std::optional<Eigen::Vector3d> ImagePointFourSeasons::cam_in_world() const {
+    if (!gps_gcs) return std::nullopt;
+    const Eigen::Vector3d gps_in_ECEF(gps::ecef_from_lla(
+        Eigen::Vector3d(gps_gcs->latitude, gps_gcs->longitude, *gps_gcs->altitude)));
+    Eigen::Vector3d cam_in_world(shared_static_transforms->cam_from_imu *
+                                 shared_static_transforms->gps_from_imu.inverse() *
+                                 shared_static_transforms->w_from_gpsw *
+                                 shared_static_transforms->e_from_gpsw.inverse() * gps_in_ECEF);
+    return cam_in_world;
+}
+
+std::optional<Eigen::Matrix3d> ImagePointFourSeasons::translation_covariance_in_cam() const {
+    return gps_covariance_in_world();
+}
+
+std::optional<Eigen::Matrix3d> ImagePointFourSeasons::gps_covariance_in_world() const {
+    if (!gps_gcs || !gps_gcs->uncertainty) return std::nullopt;
+    const Eigen::Matrix3d ENU_covariance(
+        gps_gcs->uncertainty->to_ENU_covariance(gps_gcs->latitude));
+    const Eigen::Matrix3d w_from_ENU(shared_static_transforms->w_from_gpsw.so3().matrix());
+    const Eigen::Matrix3d w_covariance(w_from_ENU * ENU_covariance * w_from_ENU.transpose());
+    return w_covariance;
+}
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/image_point_four_seasons.hh b/experimental/learn_descriptors/image_point_four_seasons.hh
new file mode 100644
index 000000000..b636cd120
--- /dev/null
+++ b/experimental/learn_descriptors/image_point_four_seasons.hh
@@ -0,0 +1,90 @@
+#pragma once
+
+#include <cstddef>
+#include <filesystem>
+#include <memory>
+#include <optional>
+#include <sstream>
+#include <string>
+
+#include "Eigen/Core"
+#include "Eigen/Geometry"
+#include "common/liegroups/se3.hh"
+#include "experimental/learn_descriptors/four_seasons_transforms.hh"
+#include "experimental/learn_descriptors/gps_data.hh"
+#include "experimental/learn_descriptors/image_point.hh"
+#include "opencv2/opencv.hpp"
+
+namespace robot::experimental::learn_descriptors {
+class FourSeasonsParser;
+struct ImagePointFourSeasons : ImagePoint {
+    virtual ~ImagePointFourSeasons();
+
+    size_t id;   // idx for DB
+    size_t seq;  // time in nanoseconds of image capture (also the name of image)
+    std::optional<liegroups::SE3>
+        AS_w_from_gnss_cam;  // globally opimized arbitrary scale visual world (gnss + VIO + loop
+                             // closure + etc.) from cam
+    std::optional<liegroups::SE3> AS_w_from_vio_cam;  // arbitrary scale world from vio result cam
+    std::optional<GPSData> gps_gcs;  // raw gps measurement in gcs (global cordinate system)
+    std::shared_ptr<FourSeasonsTransforms::StaticTransforms> shared_static_transforms;
+
+    std::optional<Eigen::Isometry3d> world_from_cam_ground_truth() const override;
+    std::optional<Eigen::Vector3d> cam_in_world() const override;
+    std::optional<Eigen::Matrix3d> translation_covariance_in_cam() const override;
+    std::optional<Eigen::Matrix3d> gps_covariance_in_world() const;
+
+    std::string to_string() const {
+        auto se3_to_str = [](const liegroups::SE3& se3) {
+            const Eigen::Vector3d& t = se3.translation();
+            const Eigen::Quaterniond& r = se3.so3().unit_quaternion();
+            std::stringstream ss;
+            ss << "\t\tt: [" << t.x() << ", " << t.y() << ", " << t.z() << "]\n";
+            ss << "\t\tq: [" << r.w() << ", " << r.x() << ", " << r.y() << ", " << r.z() << "]";
+            return ss.str();
+        };
+        std::stringstream ss;
+        ss << "Image Point " << id << ":\n";
+        ss << "\tseq: " << seq << "\n";
+        ss << "\tAS_w_from_gnss_cam: ";
+        if (AS_w_from_gnss_cam) {
+            ss << "\n" << se3_to_str(*AS_w_from_gnss_cam);
+        } else {
+            ss << "N/A";
+        }
+        ss << "\n\tAS_w_from_vio_cam: ";
+        if (AS_w_from_vio_cam) {
+            ss << "\n" << se3_to_str(*AS_w_from_vio_cam);
+        } else {
+            ss << "N/A";
+        }
+        ss << "\n\tgps_gcs: ";
+        if (gps_gcs) {
+            ss << "\n\t\tseq: " << gps_gcs->seq;
+            ss << "\n\t\tval: " << gps_gcs->latitude << "\t" << gps_gcs->longitude << "\t";
+            if (gps_gcs->altitude) {
+                ss << *(gps_gcs->altitude);
+            } else {
+                ss << "alt N/A";
+            }
+            ss << "\n\t\tsigma: ";
+            if (gps_gcs->uncertainty) {
+                ss << gps_gcs->uncertainty->sigma_lat_mitude << "\t"
+                   << gps_gcs->uncertainty->sigma_longitude << "\t"
+                   << gps_gcs->uncertainty->sigma_altitude;
+            } else {
+                ss << "N/A";
+            }
+        } else {
+            ss << "N/A";
+        }
+        return ss.str();
+    }
+
+    cv::Mat load_image(const std::filesystem::path& img_dir) const {
+        const std::filesystem::path path(img_dir / (std::to_string(seq) + ".png"));
+        return cv::imread(path);
+    }
+};
+typedef std::vector<ImagePointFourSeasons> ImagePointFourSeasonsVector;
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc
index ff7df085c..2e056b71a 100644
--- a/experimental/learn_descriptors/incremental_sfm_mvp.cc
+++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc
@@ -9,6 +9,7 @@
 #include "Eigen/Geometry"
 #include "common/geometry/camera.hh"
 #include "cxxopts.hpp"
+#include "experimental/learn_descriptors/camera_calibration.hh"
 #include "experimental/learn_descriptors/four_seasons_parser.hh"
 #include "experimental/learn_descriptors/frame.hh"
 #include "experimental/learn_descriptors/frontend.hh"
@@ -283,7 +284,7 @@ int main(int argc, const char **argv) {
     const std::vector<int> indices = [&parser]() -> std::vector<int> {
         std::vector<int> tmp;
         for (size_t i = 0; i < parser.num_images(); i++) {
-            const ImagePoint img_pt = parser.get_image_point(i);
+            const ImagePointFourSeasons img_pt = parser.get_image_point(i);
             if (img_pt.AS_w_from_gnss_cam && img_pt.gps_gcs && img_pt.gps_gcs &&
                 img_pt.gps_gcs->uncertainty && img_pt.gps_gcs->altitude)
                 tmp.push_back(i);
@@ -301,8 +302,7 @@ int main(int argc, const char **argv) {
     // FourSeasonsParser parser()
     // const size_t img_width = img_pt_first.width, img_height =
     // img_pt_first.height;
-    const FourSeasonsParser::CameraCalibrationFisheye cal_parser_left_cam =
-        parser.camera_calibration();
+    const CameraCalibrationFisheye cal_parser_left_cam = parser.camera_calibration();
     gtsam::Cal3_S2::shared_ptr K =
         boost::make_shared<gtsam::Cal3_S2>(cal_parser_left_cam.fx, cal_parser_left_cam.fy, 0,
                                            cal_parser_left_cam.cx, cal_parser_left_cam.cy);
@@ -343,7 +343,7 @@ int main(int argc, const char **argv) {
         parser.gnss_scale();
     std::vector<std::vector<cv::KeyPoint>> kpt_list;
     for (const int &idx : indices) {
-        const ImagePoint img_pt = parser.get_image_point(idx);
+        const ImagePointFourSeasons img_pt = parser.get_image_point(idx);
         std::cout << img_pt.to_string() << std::endl;
         //
         Eigen::Isometry3d world_from_cam;
@@ -357,7 +357,7 @@ int main(int argc, const char **argv) {
             const Eigen::Vector3d gps_gcs(
                 img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude,
                 img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0);
-            const Eigen::Vector3d p_gps_in_ECEF = parser.ECEF_from_gcs(gps_gcs);
+            const Eigen::Vector3d p_gps_in_ECEF = parser.ecef_from_lla(gps_gcs);
             const Eigen::Vector4d p_gps_in_ECEF_hom(p_gps_in_ECEF.x(), p_gps_in_ECEF.y(),
                                                     p_gps_in_ECEF.z(), 1.0);
             world_from_cam = Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference *
diff --git a/experimental/learn_descriptors/refactor_sfm_mvp.cc b/experimental/learn_descriptors/refactor_sfm_mvp.cc
new file mode 100644
index 000000000..1abcba3b9
--- /dev/null
+++ b/experimental/learn_descriptors/refactor_sfm_mvp.cc
@@ -0,0 +1,333 @@
+#include <cmath>
+#include <functional>
+#include <opencv2/calib3d.hpp>
+#include <optional>
+#include <sstream>
+#include <vector>
+
+#include "Eigen/Core"
+#include "Eigen/Geometry"
+#include "common/geometry/camera.hh"
+#include "cxxopts.hpp"
+#include "experimental/learn_descriptors/camera_calibration.hh"
+#include "experimental/learn_descriptors/four_seasons_parser.hh"
+#include "experimental/learn_descriptors/frame.hh"
+#include "experimental/learn_descriptors/frontend.hh"
+#include "experimental/learn_descriptors/frontend_definitions.hh"
+#include "experimental/learn_descriptors/structure_from_motion_types.hh"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/Point2.h"
+#include "gtsam/geometry/Point3.h"
+#include "gtsam/geometry/Pose3.h"
+#include "gtsam/geometry/Rot3.h"
+#include "gtsam/geometry/triangulation.h"
+#include "gtsam/inference/Symbol.h"
+#include "gtsam/linear/NoiseModel.h"
+#include "gtsam/navigation/GPSFactor.h"
+#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
+#include "gtsam/nonlinear/NonlinearFactorGraph.h"
+#include "gtsam/nonlinear/Values.h"
+#include "gtsam/slam/BetweenFactor.h"
+#include "gtsam/slam/PriorFactor.h"
+#include "gtsam/slam/ProjectionFactor.h"
+#include "visualization/opencv/opencv_viz.hh"
+
+int main(int argc, const char **argv) {
+    using namespace robot::experimental::learn_descriptors;
+    // clang-format off
+    cxxopts::Options options("four_seasons_parser_example", "Demonstrate usage of four_seasons_parser");
+    options.add_options()
+    ("data_dir", "Path to dataset root directory", cxxopts::value<std::string>())
+    ("calibration_dir", "Path to dataset calibration directory", cxxopts::value<std::string>())
+    ("help", "Print usage");
+    // clang-format on
+
+    auto args = options.parse(argc, argv);
+
+    const auto check_required = [&](const std::string &opt) {
+        if (args.count(opt) == 0) {
+            std::cout << "Missing " << opt << " argument" << std::endl;
+            std::cout << options.help() << std::endl;
+            std::exit(1);
+        }
+    };
+
+    if (args.count("help")) {
+        std::cout << options.help() << std::endl;
+        return 0;
+    }
+
+    check_required("data_dir");
+    check_required("calibration_dir");
+
+    const std::filesystem::path path_data = args["data_dir"].as<std::string>();
+    const std::filesystem::path path_calibration = args["calibration_dir"].as<std::string>();
+    FourSeasonsParser parser(path_data, path_calibration);
+
+    std::cout << "refactor_sfm_mvp!" << std::endl;
+    // const std::vector<int> indices{581, 609,
+    //                                633};  // indices with all data fields on neighborhood_5_train
+    // const std::vector<int> indices{581, 593, 609, 621, 633, 636, 639, 642};  // indices with
+    // get indices with fully populated img_pt containing full gps data and reference
+    // const std::vector<int> indices = [&parser]() -> std::vector<int> {
+    //     std::vector<int> tmp;
+    //     for (size_t i = 0; i < parser.num_images(); i++) {
+    //         const ImagePointFourSeasons img_pt = parser.get_image_point(i);
+    //         if (img_pt.AS_w_from_gnss_cam && img_pt.gps_gcs && img_pt.gps_gcs &&
+    //             img_pt.gps_gcs->uncertainty && img_pt.gps_gcs->altitude)
+    //             tmp.push_back(i);
+    //     }
+    //     return tmp;
+    // }();
+    constexpr bool visualize = false;
+    // all data fields on neighborhood_5_train const std::vector<int> indices = []() {
+    //     std::vector<int> tmp;
+    //     for (int i = 660; i < 681; i += 10) {
+    //         tmp.push_back(i);
+    //     }
+    //     return tmp;
+    // }();
+    // FourSeasonsParser parser()
+    // const size_t img_width = img_pt_first.width, img_height =
+    // img_pt_first.height;
+
+    // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
+    //                         gtsam::Pose3(T_world_camera0.matrix()));
+    FrontendParams params{FrontendParams::ExtractorType::SIFT, FrontendParams::MatcherType::KNN,
+                          true, false};
+    Frontend frontend(params);
+
+    for (size_t i = 660, i < 1000; i += 5) {
+        frontend.add_images(ImageAndPoint{parser.load_image(i), parser.get_image_point(i)});
+    }
+    frontend.populate_frames();
+    frontend.match_frames_and_build_tracks();
+
+    // ############# BACKEND ###############
+    gtsam::Values initial_estimate_;
+    gtsam::NonlinearFactorGraph graph_;
+
+    gtsam::noiseModel::Isotropic::shared_ptr landmark_noise =
+        gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
+    gtsam::Vector6 prior_sigmas;
+    prior_sigmas << 0.04, 0.04, 0.09, 2.1, 2.1, 0.1;
+    gtsam::Vector3 gps_sigmas;
+    gps_sigmas << 2.1, 2.1, 0.1;
+    gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise =
+        gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas);
+    gtsam::noiseModel::Diagonal::shared_ptr gps_noise =
+        gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas);
+
+    // add gps factors
+    const std::vector<Frame> &frames = frontned.frames();
+    graph.add(PriorFactor<Pose3>(Symbol('x', 0), gtsam::Pose3::Identity(),
+                                 noiseModel::Constrained::All(6)  // effectively fixes the pose
+                                 ));
+    initial_estimate_.insert(Symbol('x', 0), gtsam::Pose3::Identity());
+    for (size_t i = 1; i < frames.size(); i++) {
+        if (frames[i].) graph.add(gtsam::GPSFactor(Symbol('x', i), ));
+    }
+
+    // add filtered points to graph
+    std::unordered_map<gtsam::Symbol, std::vector<gtsam::Pose3>> symbols_poses_values_iter;
+    std::unordered_map<gtsam::Symbol, std::vector<gtsam::Point3>> symbols_landmarks_values_iter;
+    std::vector<gtsam::Symbol> symbols_pose;
+    std::vector<gtsam::Symbol> symbols_landmarks;
+    for (const auto &[lmk_id, kpt_in_world] : lmk_triangulated_map_filtered) {
+        // LandmarkId lmk_id = lmk_id_pt.first;
+        // const gtsam::Point3 kpt_in_world = lmk_id_pt.second;
+        FeatureTrack feature_track = feature_tracks.at(lmk_id);
+        const gtsam::Symbol symbol_lmk('l', lmk_id);
+        for (const auto &[frame_id, obs] : feature_track.obs_) {
+            initial_estimate_.insert_or_assign(symbol_lmk, kpt_in_world);
+            symbols_landmarks.push_back(symbol_lmk);
+            symbols_landmarks_values_iter.emplace(symbol_lmk,
+                                                  std::vector<gtsam::Point3>{kpt_in_world});
+            graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+                gtsam::Point2(obs.x, obs.y), landmark_noise, gtsam::Symbol('x', frame_id),
+                symbol_lmk, K);
+        }
+    }
+
+    // add gps factors
+    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
+        gtsam::Symbol('x', 0), id_to_initial_world_from_cam.at(0), prior_pose_noise);
+    initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), id_to_initial_world_from_cam.at(0));
+    symbols_pose.push_back(gtsam::Symbol('x', 0));
+    symbols_poses_values_iter.emplace(
+        gtsam::Symbol('x', 0), std::vector<gtsam::Pose3>{id_to_initial_world_from_cam.at(0)});
+    for (const auto &[frame_id, estimate_world_from_cam] : id_to_initial_world_from_cam) {
+        if (frame_id == 0) {
+            continue;
+        }
+        const gtsam::Symbol key_cam('x', frame_id);
+        symbols_pose.push_back(key_cam);
+        gtsam::Point3 p_cam_in_world = estimate_world_from_cam.translation();
+        graph_.emplace_shared<gtsam::GPSFactor>(key_cam, p_cam_in_world, gps_noise);
+        initial_estimate_.insert_or_assign(key_cam, estimate_world_from_cam);
+        symbols_poses_values_iter.emplace(key_cam,
+                                          std::vector<gtsam::Pose3>{estimate_world_from_cam});
+        // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_cam_in_world));
+    }
+
+    // detail_sfm::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks);
+    std::cout << "heart beat 3" << std::endl;
+
+    constexpr bool local_optimizations = false;
+    if (local_optimizations) {
+        // TODO: do local optimizations between groups of any n nubmer of cameras with >= x number
+        // of matches
+        // do local optimizations and add to iter cache
+        // const int window = 2;
+        std::cout << "length of cam_poses: " << id_to_initial_world_from_cam.size() << std::endl;
+        for (const auto &[frame_id, world_from_cam] : id_to_initial_world_from_cam) {
+            std::cout << "id: " << frame_id << "\tpose: " << world_from_cam << std::endl;
+        }
+        for (size_t i = 0; i < indices.size() - 1; i++) {
+            std::cout << "Local optimization " << i << std::endl;
+            gtsam::Values local_estimate_;
+            gtsam::NonlinearFactorGraph local_graph_;
+
+            std::vector<gtsam::Symbol> symbols_poses{gtsam::Symbol('x', i),
+                                                     gtsam::Symbol('x', i + 1)};
+            std::vector<gtsam::Symbol> symbols_lmks;
+
+            local_graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
+                gtsam::PriorFactor<gtsam::Pose3>(
+                    symbols_poses[0], id_to_initial_world_from_cam.at(i), prior_pose_noise));
+            std::cout << "fuck" << std::endl;
+            local_graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
+                gtsam::PriorFactor<gtsam::Pose3>(
+                    symbols_poses[1], id_to_initial_world_from_cam.at(i + 1), prior_pose_noise));
+            std::cout << "mega fuck" << std::endl;
+            local_estimate_.insert_or_assign(symbols_poses[0], id_to_initial_world_from_cam.at(i));
+            local_estimate_.insert_or_assign(symbols_poses[1],
+                                             id_to_initial_world_from_cam.at(i + 1));
+
+            std::vector<cv::DMatch> matches = frontend.compute_matches(
+                frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+            // DIAL TO MESS WITH
+            frontend.enforce_bijective_matches(matches);
+            std::vector<gtsam::Pose3> world_from_cams{id_to_initial_world_from_cam.at(i),
+                                                      id_to_initial_world_from_cam.at(i + 1)};
+
+            std::vector<robot::geometry::VizPose> viz_world_from_cams{
+                {Eigen::Isometry3d(world_from_cams[0].matrix()), "cam 0"},
+                {Eigen::Isometry3d(world_from_cams[1].matrix()), "cam 1"}};
+            std::vector<robot::geometry::VizPoint> viz_lmks;
+            for (const cv::DMatch match : matches) {
+                std::vector<gtsam::Point2> feat_kpts;
+                const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
+                const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
+                feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y);
+                feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y);
+
+                const std::pair<FrameId, KeypointCV> key_lmk_id =
+                    std::make_pair(frames[i].id_, kpt_cam0);
+                if (lmk_id_map.find(key_lmk_id) != lmk_id_map.end()) {
+                    LandmarkId match_lmk_id = lmk_id_map.at(key_lmk_id);
+                    if (lmk_triangulated_map_filtered.find(match_lmk_id) ==
+                        lmk_triangulated_map_filtered.end()) {
+                        continue;
+                    }
+                    std::cout << "good" << std::endl;
+                    // do nothing
+                    // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0);
+                    // feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1);
+                } else {
+                    std::cerr << "ERROR: this shouldn't happen right?" << std::endl;
+                    FeatureTrack feature_track(frames[i].id_, kpt_cam0);
+                    feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
+                    feature_tracks.emplace(lmk_id, feature_track);
+                    lmk_id_map.emplace(key_lmk_id, lmk_id);
+                    lmk_id++;
+                }
+                std::cout << "oog" << std::endl;
+                const gtsam::Symbol symbol_lmk = gtsam::Symbol('l', lmk_id_map.at(key_lmk_id));
+                // if (gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))) !=
+                //     gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam1)))) {
+                //     std::cerr << "UH OH" << std::endl;
+                // } else {
+                //     std::cout << "cool" << std::endl;
+                // }
+                symbols_lmks.push_back(symbol_lmk);
+                local_graph_
+                    .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+                        feat_kpts[0], landmark_noise, symbols_poses[0], symbol_lmk, K);
+                local_graph_
+                    .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
+                        feat_kpts[1], landmark_noise, symbols_poses[1], symbol_lmk, K);
+
+                std::optional<gtsam::Point3> kpt_in_world =
+                    detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K);
+                // gtsam::Point3 kpt_in_world;
+                // bool triangulate_success =
+                //     detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world);
+                if (kpt_in_world) {
+                    local_estimate_.insert_or_assign(symbol_lmk, *kpt_in_world);
+                    viz_lmks.emplace_back(*kpt_in_world);
+                    if (symbols_landmarks_values_iter.find(symbol_lmk) !=
+                        symbols_landmarks_values_iter.end()) {
+                        symbols_landmarks_values_iter[symbol_lmk].push_back(*kpt_in_world);
+                    } else {
+                        symbols_landmarks_values_iter.emplace(
+                            symbol_lmk, std::vector<gtsam::Point3>{*kpt_in_world});
+                    }
+                }
+            }
+            std::cout << "setup complete!" << std::endl;
+            if (visualize)
+                robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, cv::viz::Color::brown(),
+                                           true, true, "Local Optimization " + std::to_string(i));
+
+            const gtsam::Values symbols_result_local = detail_sfm::optimize_graph(
+                local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false);
+
+            for (const gtsam::Symbol &symbol_pose : symbols_poses) {
+                const gtsam::Pose3 T_wrld_cam = symbols_result_local.at<gtsam::Pose3>(symbol_pose);
+                symbols_poses_values_iter.at(symbol_pose).push_back(T_wrld_cam);
+            }
+            for (const gtsam::Symbol &symbol_lmk : symbols_lmks) {
+                const gtsam::Point3 p_wrld_lmk = symbols_result_local.at<gtsam::Point3>(symbol_lmk);
+                symbols_landmarks_values_iter.at(symbol_lmk).push_back(p_wrld_lmk);
+            }
+        }
+
+        std::cout << "\nLocal Optimizations Complete!\n" << std::endl;
+
+        for (std::pair<gtsam::Symbol, std::vector<gtsam::Pose3>> sym_pose :
+             symbols_poses_values_iter) {
+            initial_estimate_.insert_or_assign(sym_pose.first,
+                                               detail_sfm::averagePoses(sym_pose.second));
+        }
+        for (std::pair<gtsam::Symbol, std::vector<gtsam::Point3>> sym_lmk :
+             symbols_landmarks_values_iter) {
+            initial_estimate_.insert_or_assign(sym_lmk.first,
+                                               detail_sfm::averagePoints(sym_lmk.second));
+        }
+    }
+
+    // do global optimization
+    const gtsam::Values result =
+        detail_sfm::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks, 0);
+
+    // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference
+    double sum_traj_error = 0;
+    double sum_rot_error = 0;
+    for (size_t i = 0; i < symbols_pose.size(); i++) {
+        const gtsam::Pose3 traj_pose = result.at<gtsam::Pose3>(symbols_pose[i]);
+        sum_traj_error += std::pow(
+            (references_world_from_cam[i].translation() - traj_pose.translation()).norm(), 2);
+        sum_rot_error += detail_sfm::rotation_error(references_world_from_cam[i],
+                                                    Eigen::Isometry3d(traj_pose.matrix()));
+    }
+    std::cout << "sum_rot_error: " << sum_rot_error << std::endl;
+    double rmse_ate = std::sqrt(sum_traj_error / symbols_pose.size());
+    double rmse_rot = std::sqrt(sum_rot_error / symbols_pose.size());
+    std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl;
+
+    std::cout << "about to visualize result" << std::endl;
+    result.print();
+    detail_sfm::graph_values(result, "Result", symbols_pose, std::vector<gtsam::Symbol>());
+    // detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks);
+}
\ No newline at end of file
diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc
index b4f98a01c..a63015b66 100644
--- a/experimental/learn_descriptors/sfm_mvp.cc
+++ b/experimental/learn_descriptors/sfm_mvp.cc
@@ -235,7 +235,7 @@ TEST(SFMMvp, sfm_building_manual_global) {
         // graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(gtsam::Symbol('x', id),
         //                                                         T_world_cam_gtsam, pose_noise);
 
-        Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix()));
+        Frame frame{id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix())};
 
         std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
             frontend.extract_features(img_undistorted);
diff --git a/experimental/learn_descriptors/spatial_test_scene_cube_example.cc b/experimental/learn_descriptors/spatial_test_scene_cube_example.cc
index ad1130338..c4f56d736 100644
--- a/experimental/learn_descriptors/spatial_test_scene_cube_example.cc
+++ b/experimental/learn_descriptors/spatial_test_scene_cube_example.cc
@@ -1,8 +1,7 @@
-#include "experimental/learn_descriptors/spatial_test_scene_cube.hh"
-
 #include <vector>
 
 #include "Eigen/Dense"
+#include "experimental/learn_descriptors/spatial_test_scene_cube.hh"
 #include "gtest/gtest.h"
 #include "visualization/opencv/opencv_viz.hh"
 
diff --git a/experimental/learn_descriptors/structure_from_motion_types.hh b/experimental/learn_descriptors/structure_from_motion_types.hh
index 582a3c70a..e8d0595bc 100644
--- a/experimental/learn_descriptors/structure_from_motion_types.hh
+++ b/experimental/learn_descriptors/structure_from_motion_types.hh
@@ -1,16 +1,16 @@
 #pragma once
 
-#include <cstdint>
+#include <cstddef>
 #include <vector>
 
 #include "gtsam/geometry/Point3.h"
 #include "opencv2/opencv.hpp"
 
-using Timestamp = std::int64_t;
+using Timestamp = size_t;
 
-using FrameId = std::uint64_t;  // Frame id is used as the index of gtsam symbol
-                                // (not as a gtsam key).
-using LandmarkId = long;        // -1 for invalid landmarks
+using FrameId = size_t;  // Frame id is used as the index of gtsam symbol
+                         // (not as a gtsam key).
+using LandmarkId = size_t;
 using LandmarkIds = std::vector<LandmarkId>;
 using Landmark = gtsam::Point3;
 using Landmarks = std::vector<Landmark, Eigen::aligned_allocator<Landmark>>;

From 241aefe9d880eaeedd6989f294f0bccd9ef44305 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Fri, 11 Jul 2025 15:08:25 -0400
Subject: [PATCH 31/45] WIP

---
 experimental/learn_descriptors/BUILD          |  25 +
 .../learn_descriptors/camera_calibration.hh   |   2 +-
 .../learn_descriptors/four_seasons_parser.cc  |  17 +-
 .../four_seasons_parser_detail.cc             |   5 +-
 .../four_seasons_parser_example_viz.cc        |   7 +-
 experimental/learn_descriptors/frame.hh       |  14 +-
 experimental/learn_descriptors/frontend.cc    | 134 ++---
 .../learn_descriptors/frontend_definitions.hh |   3 +-
 .../image_point_four_seasons.cc               |  11 +-
 .../learn_descriptors/incremental_sfm_mvp.cc  |  41 +-
 .../learn_descriptors/refactor_sfm_mvp.cc     | 456 +++++++++---------
 experimental/learn_descriptors/sfm_mvp.cc     |  18 +-
 12 files changed, 366 insertions(+), 367 deletions(-)

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index facd64bdf..2c647712b 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -347,6 +347,7 @@ cc_library(
         ":image_point_four_seasons",
         ":structure_from_motion_types",
         "//common:check",
+        "//common/geometry:camera",
         "@opencv",
         "@gtsam",
         "@eigen",
@@ -437,4 +438,28 @@ cc_library(
     deps = [
         "@opencv"
     ]
+)
+
+cc_binary(
+    name = "refactor_sfm_mvp",
+    srcs = ["refactor_sfm_mvp.cc"],
+    copts = [
+        "-Wno-error=unused-parameter",
+        "-Wno-error=unused-function",
+    ],
+    deps = [
+        ":four_seasons_parser",
+        ":frame",
+        ":frontend",
+        ":spatial_test_scene_cube",
+        ":structure_from_motion_types",
+        "//common/geometry:camera",
+        "//visualization/opencv:opencv_viz",
+        "@boost//:smart_ptr",
+        "@cxxopts",
+        "@eigen",
+        "@gtsam",
+        "@opencv",
+        ":camera_calibration"
+    ],
 )
\ No newline at end of file
diff --git a/experimental/learn_descriptors/camera_calibration.hh b/experimental/learn_descriptors/camera_calibration.hh
index cfbd6fb42..1e985acad 100644
--- a/experimental/learn_descriptors/camera_calibration.hh
+++ b/experimental/learn_descriptors/camera_calibration.hh
@@ -7,7 +7,7 @@ struct CameraCalibrationFisheye {
     double fx, fy, cx, cy, k1, k2, k3, k4;
 
     cv::Mat k_mat() {
-        cv::Mat K_mat = (cv::Mat_<double>(3, 3) << fx, 0, cx, 0, fy, py, 0, 0, 1);
+        cv::Mat K_mat = (cv::Mat_<double>(3, 3) << fx, 0, cx, 0, fy, cy, 0, 0, 1);
         return K_mat;
     }
 
diff --git a/experimental/learn_descriptors/four_seasons_parser.cc b/experimental/learn_descriptors/four_seasons_parser.cc
index a295851f0..0e2eb2be1 100644
--- a/experimental/learn_descriptors/four_seasons_parser.cc
+++ b/experimental/learn_descriptors/four_seasons_parser.cc
@@ -52,7 +52,6 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
         ImagePointFourSeasons img_pt;
         img_pt.id = id;
         img_pt.K = cal_;
-        std::cout << "heartbeat 5" << std::endl;
         img_pt.shared_static_transforms = shared_transforms_;
         img_pt.seq = std::stoull(
             pair_time_data.second[static_cast<size_t>(txt_parser_help::ImgIdx::TIME_NS)]);
@@ -77,10 +76,10 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
                 std::stod(
                     parsed_line_gnss_poses[static_cast<size_t>(txt_parser_help::GPSIdx::QUAT_Z)]));
             img_pt.AS_w_from_gnss_cam = liegroups::SE3(R_gps_cam_from_AS_w, t_gps_cam_from_AS_w);
-        } else {
-            std::clog << "There is no AS_w_from_gnss_cam data at img_pt with id: " << id
-                      << std::endl;
-        }
+        }  // else {
+        //     std::clog << "There is no AS_w_from_gnss_cam data at img_pt with id: " << id
+        //               << std::endl;
+        // }
         if (vio_poses_time_map.find(time_key) != vio_poses_time_map.end()) {
             const std::vector<std::string>& parsed_line_vio = vio_poses_time_map.at(time_key);
             Eigen::Vector3d t_AS_w_from_vio_cam(
@@ -95,10 +94,10 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
                 std::stod(
                     parsed_line_vio[static_cast<size_t>(txt_parser_help::ResultIdx::QUAT_Z)]));
             img_pt.AS_w_from_vio_cam = liegroups::SE3(R_AS_w_from_vio_cam, t_AS_w_from_vio_cam);
-        } else {
-            std::clog << "There is no AS_w_from_vio_cam data at img_pt with id: " << id
-                      << std::endl;
-        }
+        }  // else {
+        //     std::clog << "There is no AS_w_from_vio_cam data at img_pt with id: " << id
+        //               << std::endl;
+        // }
         img_pt_vector_.push_back(img_pt);
         id++;
     }
diff --git a/experimental/learn_descriptors/four_seasons_parser_detail.cc b/experimental/learn_descriptors/four_seasons_parser_detail.cc
index d9dc824a8..65dac2893 100644
--- a/experimental/learn_descriptors/four_seasons_parser_detail.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_detail.cc
@@ -111,6 +111,7 @@ const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio
 }  // namespace txt_parser_help
 
 namespace gps_parser_help {
+
 size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds) {
     std::chrono::sys_days date = std::chrono::year_month_day{
         std::chrono::year{utc_date.year + 2000}, std::chrono::month{utc_date.month},
@@ -120,6 +121,7 @@ size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_da
     std::chrono::sys_time<std::chrono::nanoseconds> timestamp = date + utc_time_day_ns;
     return timestamp.time_since_epoch().count();
 }
+
 TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps) {
     TimeGPSList time_list_gps;
     std::ifstream file_gps(path_gps);
@@ -187,12 +189,14 @@ std::vector<std::string> split_nmea_sentence(const std::string& sentence) {
 
     return fields;
 }
+
 double time_of_day_seconds(const double utc_time_hhmmss) {
     int hours = static_cast<int>(utc_time_hhmmss / 10000);
     int minutes = static_cast<int>((utc_time_hhmmss - hours * 10000) / 100);
     double seconds = utc_time_hhmmss - hours * 10000 - minutes * 100;
     return hours * 3600 + minutes * 60 + seconds;
 }
+
 std::optional<GSTData> parse_gpgst(const std::string& sentence) {
     if (sentence.substr(0, 6) != "$GPGST") {
         return std::nullopt;
@@ -218,6 +222,5 @@ std::optional<GSTData> parse_gpgst(const std::string& sentence) {
 
     return gst;
 }
-
 }  // namespace gps_parser_help
 }  // namespace robot::experimental::learn_descriptors::detail::four_seasons_parser
\ No newline at end of file
diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
index 9befd4f2a..88511a6e3 100644
--- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
@@ -69,7 +69,6 @@ int main(int argc, const char** argv) {
                                           scale_mat *
                                           Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix()));
 
-        viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i));
         viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i));
         if (img_pt.gps_gcs) {
             if (img_pt.gps_gcs->seq > img_pt.seq) {
@@ -105,7 +104,11 @@ int main(int argc, const char** argv) {
             Eigen::Vector4d gps_in_w =
                 Eigen::Matrix4d((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix()) *
                 ECEF_from_gps_hom;
-            viz_points.emplace_back(gps_in_w.head<3>(), "x_gps_" + std::to_string(i));
+            const Eigen::Isometry3d cam_from_gps(
+                (parser.cam_from_imu() * parser.gps_from_imu().inverse()).matrix());
+            Eigen::Vector3d cam_gps_in_w = gps_in_w.head<3>() + cam_from_gps.translation();
+            viz_points.emplace_back(cam_gps_in_w, "x_cam_gps" + std::to_string(i));
+            viz_points.emplace_back(gps_in_w.head<3>(), "x_gps" + std::to_string(i));
             const Eigen::Vector3d gps_from_ref_in_world =
                 gps_in_w.head<3>() - w_from_gnss_cam.translation();
             std::cout << "gps_from_ref_in_world: " << gps_from_ref_in_world << std::endl;
diff --git a/experimental/learn_descriptors/frame.hh b/experimental/learn_descriptors/frame.hh
index 322a823ac..70be07d27 100644
--- a/experimental/learn_descriptors/frame.hh
+++ b/experimental/learn_descriptors/frame.hh
@@ -11,11 +11,19 @@
 namespace robot::experimental::learn_descriptors {
 class Frame {
    public:
+    Frame(FrameId id, const cv::Mat img, gtsam::Cal3_S2::shared_ptr K, const KeypointsCV kpts,
+          const cv::Mat descriptors)
+        : id_(id),
+          undistorted_img_(img),
+          K_(std::move(K)),
+          kpts_(kpts),
+          descriptors_(descriptors) {}
+
     void add_keypoints(const KeypointsCV& kpts);
     void assign_descriptors(const cv::Mat& descriptors);
 
-    const KeypointsCV get_keypoints() { return kpts_; };
-    const cv::Mat get_descriptors() { return descriptors_; };
+    const KeypointsCV& keypoint() { return kpts_; };
+    const cv::Mat& descriptors() { return descriptors_; };
 
     const FrameId id_;
     const cv::Mat undistorted_img_;
@@ -24,7 +32,7 @@ class Frame {
     cv::Mat descriptors_;
     std::optional<gtsam::Pose3> world_from_cam_groundtruth_;
     std::optional<gtsam::Point3> cam_in_world_initial_guess_;
-    std::optional<gtsam::Matrix3> translation_covariance_in_cam_;
     std::optional<gtsam::Rot3> world_from_cam_initial_guess_;
+    std::optional<gtsam::Matrix3> translation_covariance_in_cam_;
 };
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc
index dfe24fa1d..8b7d5077a 100644
--- a/experimental/learn_descriptors/frontend.cc
+++ b/experimental/learn_descriptors/frontend.cc
@@ -1,5 +1,6 @@
 #include "experimental/learn_descriptors/frontend.hh"
 
+#include <cmath>
 #include <cstddef>
 #include <optional>
 #include <unordered_map>
@@ -9,10 +10,13 @@
 #include "Eigen/Core"
 #include "Eigen/Geometry"
 #include "common/check.hh"
+#include "common/geometry/camera.hh"
 #include "experimental/learn_descriptors/frame.hh"
 #include "experimental/learn_descriptors/image_point.hh"
 #include "experimental/learn_descriptors/structure_from_motion_types.hh"
 #include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/Pose3.h"
+#include "gtsam/geometry/triangulation.h"
 #include "opencv2/opencv.hpp"
 
 namespace detail_sfm {
@@ -77,62 +81,6 @@ std::optional<gtsam::Point3> attempt_triangulate(const std::vector<gtsam::Pose3>
     return p_lmk_in_world;
 }
 
-void graph_values(const gtsam::Values &values, const std::string &window_name,
-                  const std::vector<gtsam::Symbol> &symbols_pose,
-                  const std::vector<gtsam::Symbol> &symbols_landmarks) {
-    std::vector<robot::geometry::VizPose> final_poses;
-    std::vector<robot::geometry::VizPoint> final_lmks;
-    for (const gtsam::Symbol &symbol_pose : symbols_pose) {
-        final_poses.emplace_back(Eigen::Isometry3d(values.at<gtsam::Pose3>(symbol_pose).matrix()),
-                                 symbol_pose.string());
-    }
-    for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) {
-        if (!values.exists(symbol_lmk)) {
-            std::cout << "WTF " << symbol_lmk << std::endl;
-        }
-        final_lmks.emplace_back(values.at<gtsam::Point3>(symbol_lmk), symbol_lmk.string());
-    }
-    std::cout << "About to viz gtsam::Values with " << values.size() << " variables." << std::endl;
-    robot::geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true,
-                               window_name);
-}
-
-gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gtsam::Values &values,
-                             const std::vector<gtsam::Symbol> &symbols_pose,
-                             const std::vector<gtsam::Symbol> &symbols_landmarks,
-                             const int num_epochs = 5, bool viz_itr = false) {
-    gtsam::LevenbergMarquardtParams params;
-    params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
-    params.maxIterations = 1;
-    gtsam::LevenbergMarquardtOptimizer optimizer(graph, values, params);
-
-    double prev_error = optimizer.error();
-    typedef int epoch;
-    std::function<void(const gtsam::Values &, const epoch, const std::vector<gtsam::Symbol> &,
-                       const std::vector<gtsam::Symbol> &)>
-        graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter,
-                                   const std::vector<gtsam::Symbol> &symbols_pose,
-                                   const std::vector<gtsam::Symbol> &symbols_landmarks) {
-            std::cout << "iteration " << iter << " complete!";
-            std::string window_name = "Iteration_" + std::to_string(iter);
-            graph_values(vals, window_name, symbols_pose, symbols_landmarks);
-        };
-
-    for (int i = 0; i < num_epochs; i++) {
-        optimizer.iterate();
-        double curr_error = optimizer.error();
-
-        if (viz_itr) {
-            graph_itr_debug_func(optimizer.values(), i, symbols_pose, symbols_landmarks);
-        }
-        if (std::abs(prev_error - curr_error) < 1e-6) {
-            std::cout << "Converged at iteration " << i << "\n";
-            break;
-        }
-    }
-    return optimizer.values();
-}
-
 gtsam::Pose3 averagePoses(const std::vector<gtsam::Pose3> &poses, int maxIter = 10) {
     if (poses.empty()) throw std::runtime_error("Empty pose vector");
 
@@ -219,31 +167,39 @@ Frontend::Frontend(FrontendParams params_) : params_(params_) {
 
 void Frontend::populate_frames() {
     FrameId id = frames_.size();
-    for (const ImageAndPoint &img_and_pt : image_and_points_) {
+    for (const ImageAndPoint &img_and_pt : images_and_points_) {
         const ImagePoint &img_pt = img_and_pt.img_pt;
-        cv::Mat &img_undistorted;
-        cv::fisheye::undistortImage(img_and_pt.image_distorted, img_undistorted, img_pt.K->K_mat(),
-                                    img_pt.K->D_mat(), img_pt.K->K_mat(), img.size());
-        auto [kpts, features] = extract_feaures(img_undistorted);
+        cv::Mat img_undistorted;
+        cv::fisheye::undistortImage(img_and_pt.image_distorted, img_undistorted, img_pt.K->k_mat(),
+                                    img_pt.K->d_mat(), img_pt.K->k_mat(),
+                                    img_and_pt.image_distorted.size());
+        auto [kpts, descriptors] = extract_features(img_undistorted);
+        KeypointsCV kpts_cv;
+        for (const cv::KeyPoint &kpt : kpts) {
+            kpts_cv.push_back(kpt.pt);
+        }
 
         gtsam::Cal3_S2::shared_ptr K = boost::make_shared<gtsam::Cal3_S2>(
             img_pt.K->fx, img_pt.K->fy, 0, img_pt.K->cx, img_pt.K->cy);
 
-        Frame frame{id, img_undistorted, K, kpts, features};
+        if (!K) std::cout << "UH OH" << std::endl;
+
+        Frame frame(id, img_undistorted, K, kpts_cv, descriptors);
         const std::optional<Eigen::Isometry3d> maybe_world_from_cam_grnd_trth =
             img_pt.world_from_cam_ground_truth();
         if (maybe_world_from_cam_grnd_trth) {
-            frame.world_from_cam_ground_truth =
+            frame.world_from_cam_groundtruth_ =
                 gtsam::Pose3(maybe_world_from_cam_grnd_trth->matrix());
         }
         const std::optional<Eigen::Vector3d> maybe_cam_in_world = img_pt.cam_in_world();
         if (maybe_cam_in_world) {
-            frame.cam_in_world_initial_guess = *maybe_cam_in_world;
+            frame.cam_in_world_initial_guess_ = *maybe_cam_in_world;
         }
+        if (id == 0) frame.world_from_cam_initial_guess_ = gtsam::Rot3::Identity();
         const std::optional<gtsam::Matrix3> maybe_translation_covariance_in_cam =
             img_pt.translation_covariance_in_cam();
         if (maybe_translation_covariance_in_cam) {
-            frame.translation_covariance_in_cam = *maybe_translation_covariance_in_cam;
+            frame.translation_covariance_in_cam_ = *maybe_translation_covariance_in_cam;
         }
         frames_.push_back(frame);
         id++;
@@ -254,27 +210,32 @@ void Frontend::match_frames_and_build_tracks() {
     if (params_.exhaustive) {
         for (size_t i = 0; i < frames_.size() - 1; i++) {
             for (size_t j = i + 1; j < frames_.size(); j++) {
-                std::vector<cv::DMatch> matches = frontend.compute_matches(
-                    frames_[i].get_descriptors(), frames_[j].get_descriptors());
-                frontend.enforce_bijective_buffer_matches(matches);
+                std::vector<cv::DMatch> matches =
+                    compute_matches(frames_[i].descriptors(), frames_[j].descriptors());
+                enforce_bijective_buffer_matches(matches);
                 if (matches.size() < 5) {
                     continue;
                 }
 
                 std::vector<cv::KeyPoint> cv_kpts_1;
                 std::vector<cv::KeyPoint> cv_kpts_2;
-                for (const KeypointCV &kpt : frames_[i].get_keypoints()) {
+                for (const KeypointCV &kpt : frames_[i].keypoint()) {
                     cv::KeyPoint cv_kpt;
                     cv_kpt.pt = kpt;
                     cv_kpts_1.push_back(cv_kpt);
                 }
-                for (const KeypointCV &kpt : frames_[j].get_keypoints()) {
+                for (const KeypointCV &kpt : frames_[j].keypoint()) {
                     cv::KeyPoint cv_kpt;
                     cv_kpt.pt = kpt;
                     cv_kpts_2.push_back(cv_kpt);
                 }
                 std::optional<Eigen::Isometry3d> scale_cam0_from_cam1 =
-                    robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat);
+                    robot::geometry::estimate_cam0_from_cam1(
+                        cv_kpts_1, cv_kpts_2, matches,
+                        images_and_points_[i]
+                            .img_pt.K->k_mat());  // fine for now since all cameras have the same K,
+                                                  // how to reconcile later though...?
+
                 if (!scale_cam0_from_cam1) {
                     continue;
                 }
@@ -286,12 +247,12 @@ void Frontend::match_frames_and_build_tracks() {
                 // measurement and the unit translation vector from estimate_cam0_from_cam1
 
                 // can try averaging poses here as well
-                frames[j].world_from_cam_initial_guess_.emplace(
+                frames_[j].world_from_cam_initial_guess_.emplace(
                     frames_[i].world_from_cam_initial_guess_->matrix() *
-                    scale_cam0_from_cam1->matrix());
+                    scale_cam0_from_cam1->linear().matrix());
                 for (const cv::DMatch match : matches) {
-                    const KeypointCV kpt_cam0 = frames_[i].get_keypoints()[match.queryIdx];
-                    const KeypointCV kpt_cam1 = frames_[j].get_keypoints()[match.trainIdx];
+                    const KeypointCV kpt_cam0 = frames_[i].keypoint()[match.queryIdx];
+                    const KeypointCV kpt_cam1 = frames_[j].keypoint()[match.trainIdx];
 
                     auto key = std::make_pair(frames_[i].id_, kpt_cam0);
                     if (lmk_id_map_.find(key) != lmk_id_map_.end()) {
@@ -311,28 +272,29 @@ void Frontend::match_frames_and_build_tracks() {
             }
         }
     } else {  // successive only
-        for (size_t i = 0; i < indices.size() - 1; i++) {
-            std::vector<cv::DMatch> matches = frontend.compute_matches(
-                frames_[i].get_descriptors(), frames_[i + 1].get_descriptors());
-            frontend.enforce_bijective_buffer_matches(matches);
+        for (size_t i = 0; i < frames_.size() - 1; i++) {
+            std::vector<cv::DMatch> matches =
+                compute_matches(frames_[i].descriptors(), frames_[i + 1].descriptors());
+            enforce_bijective_buffer_matches(matches);
             if (matches.size() < 5) {
                 continue;
             }
 
             std::vector<cv::KeyPoint> cv_kpts_1;
             std::vector<cv::KeyPoint> cv_kpts_2;
-            for (const KeypointCV &kpt : frames_[i].get_keypoints()) {
+            for (const KeypointCV &kpt : frames_[i].keypoint()) {
                 cv::KeyPoint cv_kpt;
                 cv_kpt.pt = kpt;
                 cv_kpts_1.push_back(cv_kpt);
             }
-            for (const KeypointCV &kpt : frames_[i + 1].get_keypoints()) {
+            for (const KeypointCV &kpt : frames_[i + 1].keypoint()) {
                 cv::KeyPoint cv_kpt;
                 cv_kpt.pt = kpt;
                 cv_kpts_2.push_back(cv_kpt);
             }
             std::optional<Eigen::Isometry3d> scale_cam0_from_cam1 =
-                robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat);
+                robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches,
+                                                         images_and_points_[i].img_pt.K->k_mat());
             if (!scale_cam0_from_cam1) {
                 continue;
             }
@@ -340,11 +302,11 @@ void Frontend::match_frames_and_build_tracks() {
                         "This rotation should be populated.");
             frames_[i + 1].world_from_cam_initial_guess_.emplace(
                 frames_[i].world_from_cam_initial_guess_->matrix() *
-                scale_cam0_from_cam1->matrix());
+                scale_cam0_from_cam1->linear().matrix());
 
             for (const cv::DMatch match : matches) {
-                const KeypointCV kpt_cam0 = frames_[i].get_keypoints()[match.queryIdx];
-                const KeypointCV kpt_cam1 = frames_[i + 1].get_keypoints()[match.trainIdx];
+                const KeypointCV kpt_cam0 = frames_[i].keypoint()[match.queryIdx];
+                const KeypointCV kpt_cam1 = frames_[i + 1].keypoint()[match.trainIdx];
 
                 auto key = std::make_pair(frames_[i].id_, kpt_cam0);
                 if (lmk_id_map_.find(key) != lmk_id_map_.end()) {
@@ -356,7 +318,7 @@ void Frontend::match_frames_and_build_tracks() {
                 } else {
                     FeatureTrack feature_track(i, kpt_cam0);
                     feature_track.obs_.emplace_back(frames_[i + 1].id_, kpt_cam1);
-                    feature_tracks_.emplace(lmk_id, feature_track);
+                    feature_tracks_.push_back(feature_track);
                     lmk_id_map_.emplace(std::make_pair(frames_[i].id_, kpt_cam0),
                                         feature_tracks_.size() - 1);
                 }
diff --git a/experimental/learn_descriptors/frontend_definitions.hh b/experimental/learn_descriptors/frontend_definitions.hh
index e2822ef42..1a96aa6bb 100644
--- a/experimental/learn_descriptors/frontend_definitions.hh
+++ b/experimental/learn_descriptors/frontend_definitions.hh
@@ -5,6 +5,7 @@
 #include <vector>
 
 #include "experimental/learn_descriptors/structure_from_motion_types.hh"
+#include "gtsam/geometry/Point3.h"
 #include "opencv2/opencv.hpp"
 
 namespace std {
@@ -29,7 +30,7 @@ class FeatureTrack {
 
     void print() const {
         std::cout << "Feature track with cameras: ";
-        for (size_t i = 0u; i < obs_.size(); i++) {
+        for (size_t i = 0; i < obs_.size(); i++) {
             std::cout << " " << obs_[i].first << " ";
         }
         std::cout << std::endl;
diff --git a/experimental/learn_descriptors/image_point_four_seasons.cc b/experimental/learn_descriptors/image_point_four_seasons.cc
index 73f531d93..2ad0468fa 100644
--- a/experimental/learn_descriptors/image_point_four_seasons.cc
+++ b/experimental/learn_descriptors/image_point_four_seasons.cc
@@ -27,10 +27,13 @@ std::optional<Eigen::Vector3d> ImagePointFourSeasons::cam_in_world() const {
     if (!gps_gcs) return std::nullopt;
     const Eigen::Vector3d gps_in_ECEF(gps::ecef_from_lla(
         Eigen::Vector3d(gps_gcs->latitude, gps_gcs->longitude, *gps_gcs->altitude)));
-    Eigen::Vector3d cam_in_world(shared_static_transforms->cam_from_imu *
-                                 shared_static_transforms->gps_from_imu.inverse() *
-                                 shared_static_transforms->w_from_gpsw *
-                                 shared_static_transforms->e_from_gpsw.inverse() * gps_in_ECEF);
+    const Eigen::Vector3d gps_in_world(shared_static_transforms->w_from_gpsw *
+                                       shared_static_transforms->e_from_gpsw.inverse() *
+                                       gps_in_ECEF);
+    const Eigen::Isometry3d cam_from_gps(
+        (shared_static_transforms->cam_from_imu * shared_static_transforms->gps_from_imu.inverse())
+            .matrix());
+    const Eigen::Vector3d cam_in_world(gps_in_world + cam_from_gps.translation());
     return cam_in_world;
 }
 
diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc
index 2e056b71a..bafd667b5 100644
--- a/experimental/learn_descriptors/incremental_sfm_mvp.cc
+++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc
@@ -302,13 +302,14 @@ int main(int argc, const char **argv) {
     // FourSeasonsParser parser()
     // const size_t img_width = img_pt_first.width, img_height =
     // img_pt_first.height;
-    const CameraCalibrationFisheye cal_parser_left_cam = parser.camera_calibration();
+    const std::shared_ptr<CameraCalibrationFisheye> cal_parser_left_cam =
+        parser.camera_calibration();
     gtsam::Cal3_S2::shared_ptr K =
-        boost::make_shared<gtsam::Cal3_S2>(cal_parser_left_cam.fx, cal_parser_left_cam.fy, 0,
-                                           cal_parser_left_cam.cx, cal_parser_left_cam.cy);
+        boost::make_shared<gtsam::Cal3_S2>(cal_parser_left_cam->fx, cal_parser_left_cam->fy, 0,
+                                           cal_parser_left_cam->cx, cal_parser_left_cam->cy);
     cv::Mat K_mat = (cv::Mat_<double>(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1);
-    cv::Mat D_mat = (cv::Mat_<double>(4, 1) << cal_parser_left_cam.k1, cal_parser_left_cam.k2,
-                     cal_parser_left_cam.k3, cal_parser_left_cam.k4);
+    cv::Mat D_mat = (cv::Mat_<double>(4, 1) << cal_parser_left_cam->k1, cal_parser_left_cam->k2,
+                     cal_parser_left_cam->k3, cal_parser_left_cam->k4);
 
     // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
     //                         gtsam::Pose3(T_world_camera0.matrix()));
@@ -435,8 +436,8 @@ int main(int argc, const char **argv) {
             // std::cout << "i: " << i << std::endl;
             for (size_t j = i + 1; j < indices.size(); j++) {
                 // std::cout << "j: " << j << std::endl;
-                std::vector<cv::DMatch> matches = frontend.compute_matches(
-                    frames[i].get_descriptors(), frames[j].get_descriptors());
+                std::vector<cv::DMatch> matches =
+                    frontend.compute_matches(frames[i].descriptors(), frames[j].descriptors());
                 // DIAL TO MESS WITH
                 frontend.enforce_bijective_buffer_matches(matches);
 
@@ -446,12 +447,12 @@ int main(int argc, const char **argv) {
 
                 std::vector<cv::KeyPoint> cv_kpts_1;
                 std::vector<cv::KeyPoint> cv_kpts_2;
-                for (const KeypointCV &kpt : frames[i].get_keypoints()) {
+                for (const KeypointCV &kpt : frames[i].keypoint()) {
                     cv::KeyPoint cv_kpt;
                     cv_kpt.pt = kpt;
                     cv_kpts_1.push_back(cv_kpt);
                 }
-                for (const KeypointCV &kpt : frames[j].get_keypoints()) {
+                for (const KeypointCV &kpt : frames[j].keypoint()) {
                     cv::KeyPoint cv_kpt;
                     cv_kpt.pt = kpt;
                     cv_kpts_2.push_back(cv_kpt);
@@ -470,8 +471,8 @@ int main(int argc, const char **argv) {
                 // std::cout << "heartbeat 2" << std::endl;
                 id_to_initial_world_from_cam.at(j) = gtsam::Pose3(world_from_camj.matrix());
                 for (const cv::DMatch match : matches) {
-                    const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
-                    const KeypointCV kpt_cam1 = frames[j].get_keypoints()[match.trainIdx];
+                    const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx];
+                    const KeypointCV kpt_cam1 = frames[j].keypoint()[match.trainIdx];
 
                     auto key = std::make_pair(frames[i].id_, kpt_cam0);
                     if (lmk_id_map.find(key) != lmk_id_map.end()) {
@@ -491,12 +492,12 @@ int main(int argc, const char **argv) {
         std::cout << "done processing matches" << std::endl;
     } else {  // successive only
         for (size_t i = 0; i < indices.size() - 1; i++) {
-            std::vector<cv::DMatch> matches = frontend.compute_matches(
-                frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+            std::vector<cv::DMatch> matches =
+                frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors());
             frontend.enforce_bijective_buffer_matches(matches);
             for (const cv::DMatch match : matches) {
-                const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
-                const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
+                const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx];
+                const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx];
 
                 auto key = std::make_pair(frames[i].id_, kpt_cam0);
                 if (lmk_id_map.find(key) != lmk_id_map.end()) {
@@ -661,8 +662,8 @@ int main(int argc, const char **argv) {
             local_estimate_.insert_or_assign(symbols_poses[1],
                                              id_to_initial_world_from_cam.at(i + 1));
 
-            std::vector<cv::DMatch> matches = frontend.compute_matches(
-                frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+            std::vector<cv::DMatch> matches =
+                frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors());
             // DIAL TO MESS WITH
             frontend.enforce_bijective_matches(matches);
             std::vector<gtsam::Pose3> world_from_cams{id_to_initial_world_from_cam.at(i),
@@ -674,8 +675,8 @@ int main(int argc, const char **argv) {
             std::vector<robot::geometry::VizPoint> viz_lmks;
             for (const cv::DMatch match : matches) {
                 std::vector<gtsam::Point2> feat_kpts;
-                const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
-                const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
+                const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx];
+                const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx];
                 feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y);
                 feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y);
 
@@ -695,7 +696,7 @@ int main(int argc, const char **argv) {
                     std::cerr << "ERROR: this shouldn't happen right?" << std::endl;
                     FeatureTrack feature_track(frames[i].id_, kpt_cam0);
                     feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
-                    feature_tracks.emplace(lmk_id, feature_track);
+                    feature_tracks.push_back(feature_track);
                     lmk_id_map.emplace(key_lmk_id, lmk_id);
                     lmk_id++;
                 }
diff --git a/experimental/learn_descriptors/refactor_sfm_mvp.cc b/experimental/learn_descriptors/refactor_sfm_mvp.cc
index 1abcba3b9..22d63079c 100644
--- a/experimental/learn_descriptors/refactor_sfm_mvp.cc
+++ b/experimental/learn_descriptors/refactor_sfm_mvp.cc
@@ -1,5 +1,6 @@
 #include <cmath>
 #include <functional>
+#include <iostream>
 #include <opencv2/calib3d.hpp>
 #include <optional>
 #include <sstream>
@@ -32,6 +33,136 @@
 #include "gtsam/slam/ProjectionFactor.h"
 #include "visualization/opencv/opencv_viz.hh"
 
+std::optional<gtsam::Point3> attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
+                                                 const std::vector<gtsam::Point2> &cam_obs,
+                                                 gtsam::Cal3_S2::shared_ptr K,
+                                                 const double max_reproj_error = 2.0,
+                                                 const bool verbose = true) {
+    gtsam::Point3 p_lmk_in_world;
+    if (cam_poses.size() >= 2) {
+        try {
+            // Attempt triangulation using DLT (or the GTSAM provided method)
+            p_lmk_in_world = gtsam::triangulatePoint3(
+                cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end()));
+
+        } catch (const gtsam::TriangulationCheiralityException &e) {
+            // Handle the exception gracefully by logging and retaining the previous
+            // estimate or discard
+            if (verbose) {
+                std::cerr << "[attempt_triangulate] failed. Likely cheirality exception: "
+                          << e.what() << ". Discarding involved keypoints." << std::endl;
+            }
+            return std::nullopt;
+        }
+    } else {
+        return std::nullopt;
+    }
+    // Optional: perform an explicit cheirality check
+    for (const auto &pose : cam_poses) {
+        // Transform point to the camera coordinate system.
+        // transformTo() converts a world point to the camera frame.
+        gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world);
+        if (p_cam_lmk.z() <= 0) {  // Check that the depth is positive
+            return std::nullopt;
+        }
+    }
+
+    // Cheirality & reprojection checks
+    for (size_t i = 0; i < cam_poses.size(); ++i) {
+        const auto &pose = cam_poses[i];
+        // Cheirality
+        gtsam::Point3 p_cam = pose.transformTo(p_lmk_in_world);
+        if (p_cam.z() <= 0) {
+            if (verbose) {
+                std::cerr << "[attempt_triangulate] point behind camera " << i
+                          << " (z=" << p_cam.z() << ")\n";
+            }
+            return std::nullopt;
+        }
+        // Reprojection error
+        if (max_reproj_error > 0) {
+            gtsam::PinholeCamera<gtsam::Cal3_S2> cam(pose, *K);
+            const auto reproj = cam.project(p_lmk_in_world);
+            const double err = (reproj - cam_obs[i]).norm();
+            if (err > max_reproj_error) {
+                if (verbose) {
+                    std::cerr << "[attempt_triangulate] reprojection error too large on view " << i
+                              << " (" << err << " px)\n";
+                }
+                return std::nullopt;
+            }
+        }
+    }
+    return p_lmk_in_world;
+}
+
+void graph_values(const gtsam::Values &values, const std::string &window_name,
+                  const std::vector<gtsam::Symbol> &symbols_pose,
+                  const std::vector<gtsam::Symbol> &symbols_landmarks) {
+    std::vector<robot::geometry::VizPose> final_poses;
+    std::vector<robot::geometry::VizPoint> final_lmks;
+    for (const gtsam::Symbol &symbol_pose : symbols_pose) {
+        final_poses.emplace_back(Eigen::Isometry3d(values.at<gtsam::Pose3>(symbol_pose).matrix()),
+                                 symbol_pose.string());
+    }
+    for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) {
+        if (!values.exists(symbol_lmk)) {
+            std::cout << "WTF " << symbol_lmk << std::endl;
+        }
+        final_lmks.emplace_back(values.at<gtsam::Point3>(symbol_lmk), symbol_lmk.string());
+    }
+    std::cout << "About to viz gtsam::Values with " << values.size() << " variables." << std::endl;
+    robot::geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true,
+                               window_name);
+}
+
+gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gtsam::Values &values,
+                             const std::vector<gtsam::Symbol> &symbols_pose,
+                             const std::vector<gtsam::Symbol> &symbols_landmarks,
+                             const int num_epochs = 5, bool viz_itr = false) {
+    gtsam::LevenbergMarquardtParams params;
+    params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
+    params.maxIterations = 1;
+    gtsam::LevenbergMarquardtOptimizer optimizer(graph, values, params);
+
+    double prev_error = optimizer.error();
+    typedef int epoch;
+    std::function<void(const gtsam::Values &, const epoch, const std::vector<gtsam::Symbol> &,
+                       const std::vector<gtsam::Symbol> &)>
+        graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter,
+                                   const std::vector<gtsam::Symbol> &symbols_pose,
+                                   const std::vector<gtsam::Symbol> &symbols_landmarks) {
+            std::cout << "iteration " << iter << " complete!";
+            std::string window_name = "Iteration_" + std::to_string(iter);
+            graph_values(vals, window_name, symbols_pose, symbols_landmarks);
+        };
+
+    for (int i = 0; i < num_epochs; i++) {
+        optimizer.iterate();
+        double curr_error = optimizer.error();
+
+        if (viz_itr) {
+            graph_itr_debug_func(optimizer.values(), i, symbols_pose, symbols_landmarks);
+        }
+        if (std::abs(prev_error - curr_error) < 1e-6) {
+            std::cout << "Converged at iteration " << i << "\n";
+            break;
+        }
+    }
+    return optimizer.values();
+}
+
+double rotation_error(const Eigen::Isometry3d &T_est, const Eigen::Isometry3d &T_gt) {
+    Eigen::Matrix3d R_err = T_gt.rotation().transpose() * T_est.rotation();
+
+    // 2. Compute trace and clamp to [-1,1] for numerical safety
+    double tr = R_err.trace();
+    double cos_theta = std::min(1.0, std::max(-1.0, (tr - 1.0) / 2.0));
+
+    // 3. Recover angle (in radians)
+    return std::acos(cos_theta);
+}
+
 int main(int argc, const char **argv) {
     using namespace robot::experimental::learn_descriptors;
     // clang-format off
@@ -65,269 +196,132 @@ int main(int argc, const char **argv) {
     FourSeasonsParser parser(path_data, path_calibration);
 
     std::cout << "refactor_sfm_mvp!" << std::endl;
-    // const std::vector<int> indices{581, 609,
-    //                                633};  // indices with all data fields on neighborhood_5_train
-    // const std::vector<int> indices{581, 593, 609, 621, 633, 636, 639, 642};  // indices with
-    // get indices with fully populated img_pt containing full gps data and reference
-    // const std::vector<int> indices = [&parser]() -> std::vector<int> {
-    //     std::vector<int> tmp;
-    //     for (size_t i = 0; i < parser.num_images(); i++) {
-    //         const ImagePointFourSeasons img_pt = parser.get_image_point(i);
-    //         if (img_pt.AS_w_from_gnss_cam && img_pt.gps_gcs && img_pt.gps_gcs &&
-    //             img_pt.gps_gcs->uncertainty && img_pt.gps_gcs->altitude)
-    //             tmp.push_back(i);
-    //     }
-    //     return tmp;
-    // }();
-    constexpr bool visualize = false;
-    // all data fields on neighborhood_5_train const std::vector<int> indices = []() {
-    //     std::vector<int> tmp;
-    //     for (int i = 660; i < 681; i += 10) {
-    //         tmp.push_back(i);
-    //     }
-    //     return tmp;
-    // }();
-    // FourSeasonsParser parser()
-    // const size_t img_width = img_pt_first.width, img_height =
-    // img_pt_first.height;
-
-    // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
-    //                         gtsam::Pose3(T_world_camera0.matrix()));
+
     FrontendParams params{FrontendParams::ExtractorType::SIFT, FrontendParams::MatcherType::KNN,
                           true, false};
     Frontend frontend(params);
 
-    for (size_t i = 660, i < 1000; i += 5) {
-        frontend.add_images(ImageAndPoint{parser.load_image(i), parser.get_image_point(i)});
+    for (size_t i = 660; i < 1000; i += 5) {
+        // if (!parser.get_image_point(i).K) std::cout << "UH OH" << std::endl;
+        frontend.add_image(ImageAndPoint{parser.load_image(i), parser.get_image_point(i)});
     }
     frontend.populate_frames();
     frontend.match_frames_and_build_tracks();
 
+    std::cout << "first frame translation: " << *frontend.frames()[0].cam_in_world_initial_guess_
+              << std::endl;
+    std::cout << "ground truth translation: "
+              << frontend.frames()[0].world_from_cam_groundtruth_->translation() << std::endl;
+
+    std::vector<robot::geometry::VizPose> viz_poses_init;
+    for (const Frame &frame : frontend.frames()) {
+        Eigen::Isometry3d world_from_cam_init;
+        world_from_cam_init.linear() = frame.world_from_cam_initial_guess_->matrix();
+        world_from_cam_init.translation() = *frame.cam_in_world_initial_guess_;
+        viz_poses_init.emplace_back(world_from_cam_init,
+                                    gtsam::Symbol(Frontend::symbol_pose_char, frame.id_).string());
+    }
+    robot::geometry::viz_scene(viz_poses_init, std::vector<robot::geometry::VizPoint>(),
+                               cv::viz::Color::brown(), true, true, "Frontend initial guesses");
+
     // ############# BACKEND ###############
     gtsam::Values initial_estimate_;
     gtsam::NonlinearFactorGraph graph_;
 
-    gtsam::noiseModel::Isotropic::shared_ptr landmark_noise =
-        gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
+    gtsam::noiseModel::Isotropic::shared_ptr landmark_noise = gtsam::noiseModel::Isotropic::Sigma(
+        2, 1.0);  // should probably mess with this, could also use essential matrix values to guide
+                  // this potentially
     gtsam::Vector6 prior_sigmas;
     prior_sigmas << 0.04, 0.04, 0.09, 2.1, 2.1, 0.1;
-    gtsam::Vector3 gps_sigmas;
-    gps_sigmas << 2.1, 2.1, 0.1;
+    gtsam::Vector3 gps_sigmas_fallback;
+    gps_sigmas_fallback << 0.5, 0.5, 1.5;
     gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise =
         gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas);
     gtsam::noiseModel::Diagonal::shared_ptr gps_noise =
-        gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas);
+        gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas_fallback);
 
     // add gps factors
-    const std::vector<Frame> &frames = frontned.frames();
-    graph.add(PriorFactor<Pose3>(Symbol('x', 0), gtsam::Pose3::Identity(),
-                                 noiseModel::Constrained::All(6)  // effectively fixes the pose
-                                 ));
-    initial_estimate_.insert(Symbol('x', 0), gtsam::Pose3::Identity());
+    const std::vector<Frame> &frames = frontend.frames();
+    graph_.add(gtsam::PriorFactor<gtsam::Pose3>(
+        gtsam::Symbol('x', 0), gtsam::Pose3::Identity(),
+        gtsam::noiseModel::Constrained::All(6)  // effectively fixes the pose
+        ));
+    initial_estimate_.insert(gtsam::Symbol('x', 0), gtsam::Pose3::Identity());
+    std::vector<gtsam::Symbol> symbols_pose{gtsam::Symbol('x', 0)};
+    std::vector<gtsam::Pose3> world_from_cam_initial_guess;
     for (size_t i = 1; i < frames.size(); i++) {
-        if (frames[i].) graph.add(gtsam::GPSFactor(Symbol('x', i), ));
+        // should do some interpolation here most likely
+        const gtsam::Pose3 world_from_cam_estimate(
+            frames[i].world_from_cam_initial_guess_ ? *frames[i].world_from_cam_initial_guess_
+                                                    : gtsam::Rot3::Identity(),
+            frames[i].cam_in_world_initial_guess_ ? *frames[i].cam_in_world_initial_guess_
+                                                  : gtsam::Point3());
+        world_from_cam_initial_guess.push_back(world_from_cam_estimate);
+        gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas(
+            frames[i].translation_covariance_in_cam_
+                ? gtsam::Vector3((*frames[i].translation_covariance_in_cam_)(0, 0),
+                                 (*frames[i].translation_covariance_in_cam_)(1, 1),
+                                 (*frames[i].translation_covariance_in_cam_)(2, 2))
+                : gps_sigmas_fallback);
+        const gtsam::Symbol cam_symbol('x', i);
+        if (frames[i].cam_in_world_initial_guess_)
+            graph_.add(
+                gtsam::GPSFactor(cam_symbol, *frames[i].cam_in_world_initial_guess_, gps_noise));
+        initial_estimate_.insert(cam_symbol, world_from_cam_estimate);
+        symbols_pose.push_back(cam_symbol);
     }
 
-    // add filtered points to graph
-    std::unordered_map<gtsam::Symbol, std::vector<gtsam::Pose3>> symbols_poses_values_iter;
-    std::unordered_map<gtsam::Symbol, std::vector<gtsam::Point3>> symbols_landmarks_values_iter;
-    std::vector<gtsam::Symbol> symbols_pose;
-    std::vector<gtsam::Symbol> symbols_landmarks;
-    for (const auto &[lmk_id, kpt_in_world] : lmk_triangulated_map_filtered) {
-        // LandmarkId lmk_id = lmk_id_pt.first;
-        // const gtsam::Point3 kpt_in_world = lmk_id_pt.second;
-        FeatureTrack feature_track = feature_tracks.at(lmk_id);
-        const gtsam::Symbol symbol_lmk('l', lmk_id);
-        for (const auto &[frame_id, obs] : feature_track.obs_) {
-            initial_estimate_.insert_or_assign(symbol_lmk, kpt_in_world);
-            symbols_landmarks.push_back(symbol_lmk);
-            symbols_landmarks_values_iter.emplace(symbol_lmk,
-                                                  std::vector<gtsam::Point3>{kpt_in_world});
-            graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-                gtsam::Point2(obs.x, obs.y), landmark_noise, gtsam::Symbol('x', frame_id),
-                symbol_lmk, K);
-        }
-    }
-
-    // add gps factors
-    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
-        gtsam::Symbol('x', 0), id_to_initial_world_from_cam.at(0), prior_pose_noise);
-    initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), id_to_initial_world_from_cam.at(0));
-    symbols_pose.push_back(gtsam::Symbol('x', 0));
-    symbols_poses_values_iter.emplace(
-        gtsam::Symbol('x', 0), std::vector<gtsam::Pose3>{id_to_initial_world_from_cam.at(0)});
-    for (const auto &[frame_id, estimate_world_from_cam] : id_to_initial_world_from_cam) {
-        if (frame_id == 0) {
-            continue;
-        }
-        const gtsam::Symbol key_cam('x', frame_id);
-        symbols_pose.push_back(key_cam);
-        gtsam::Point3 p_cam_in_world = estimate_world_from_cam.translation();
-        graph_.emplace_shared<gtsam::GPSFactor>(key_cam, p_cam_in_world, gps_noise);
-        initial_estimate_.insert_or_assign(key_cam, estimate_world_from_cam);
-        symbols_poses_values_iter.emplace(key_cam,
-                                          std::vector<gtsam::Pose3>{estimate_world_from_cam});
-        // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_cam_in_world));
-    }
+    std::cout << "heartbeat pre lmks in graph" << std::endl;
 
-    // detail_sfm::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks);
-    std::cout << "heart beat 3" << std::endl;
-
-    constexpr bool local_optimizations = false;
-    if (local_optimizations) {
-        // TODO: do local optimizations between groups of any n nubmer of cameras with >= x number
-        // of matches
-        // do local optimizations and add to iter cache
-        // const int window = 2;
-        std::cout << "length of cam_poses: " << id_to_initial_world_from_cam.size() << std::endl;
-        for (const auto &[frame_id, world_from_cam] : id_to_initial_world_from_cam) {
-            std::cout << "id: " << frame_id << "\tpose: " << world_from_cam << std::endl;
+    // add landmarks to graph
+    std::vector<gtsam::Symbol> symbols_lmks;
+    const FeatureTracks feature_tracks = frontend.feature_tracks();
+    for (size_t i = 0; i < feature_tracks.size(); i++) {
+        std::vector<gtsam::Pose3> world_from_lmk_cams;
+        std::vector<gtsam::Point2> lmk_observations;
+        for (const auto &[frame_id, keypoint_cv] : feature_tracks[i].obs_) {
+            world_from_lmk_cams.push_back(world_from_cam_initial_guess[frame_id]);
+            lmk_observations.emplace_back(keypoint_cv.x, keypoint_cv.y);
         }
-        for (size_t i = 0; i < indices.size() - 1; i++) {
-            std::cout << "Local optimization " << i << std::endl;
-            gtsam::Values local_estimate_;
-            gtsam::NonlinearFactorGraph local_graph_;
-
-            std::vector<gtsam::Symbol> symbols_poses{gtsam::Symbol('x', i),
-                                                     gtsam::Symbol('x', i + 1)};
-            std::vector<gtsam::Symbol> symbols_lmks;
-
-            local_graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
-                gtsam::PriorFactor<gtsam::Pose3>(
-                    symbols_poses[0], id_to_initial_world_from_cam.at(i), prior_pose_noise));
-            std::cout << "fuck" << std::endl;
-            local_graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
-                gtsam::PriorFactor<gtsam::Pose3>(
-                    symbols_poses[1], id_to_initial_world_from_cam.at(i + 1), prior_pose_noise));
-            std::cout << "mega fuck" << std::endl;
-            local_estimate_.insert_or_assign(symbols_poses[0], id_to_initial_world_from_cam.at(i));
-            local_estimate_.insert_or_assign(symbols_poses[1],
-                                             id_to_initial_world_from_cam.at(i + 1));
-
-            std::vector<cv::DMatch> matches = frontend.compute_matches(
-                frames[i].get_descriptors(), frames[i + 1].get_descriptors());
-            // DIAL TO MESS WITH
-            frontend.enforce_bijective_matches(matches);
-            std::vector<gtsam::Pose3> world_from_cams{id_to_initial_world_from_cam.at(i),
-                                                      id_to_initial_world_from_cam.at(i + 1)};
-
-            std::vector<robot::geometry::VizPose> viz_world_from_cams{
-                {Eigen::Isometry3d(world_from_cams[0].matrix()), "cam 0"},
-                {Eigen::Isometry3d(world_from_cams[1].matrix()), "cam 1"}};
-            std::vector<robot::geometry::VizPoint> viz_lmks;
-            for (const cv::DMatch match : matches) {
-                std::vector<gtsam::Point2> feat_kpts;
-                const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
-                const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
-                feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y);
-                feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y);
-
-                const std::pair<FrameId, KeypointCV> key_lmk_id =
-                    std::make_pair(frames[i].id_, kpt_cam0);
-                if (lmk_id_map.find(key_lmk_id) != lmk_id_map.end()) {
-                    LandmarkId match_lmk_id = lmk_id_map.at(key_lmk_id);
-                    if (lmk_triangulated_map_filtered.find(match_lmk_id) ==
-                        lmk_triangulated_map_filtered.end()) {
-                        continue;
-                    }
-                    std::cout << "good" << std::endl;
-                    // do nothing
-                    // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0);
-                    // feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1);
-                } else {
-                    std::cerr << "ERROR: this shouldn't happen right?" << std::endl;
-                    FeatureTrack feature_track(frames[i].id_, kpt_cam0);
-                    feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
-                    feature_tracks.emplace(lmk_id, feature_track);
-                    lmk_id_map.emplace(key_lmk_id, lmk_id);
-                    lmk_id++;
-                }
-                std::cout << "oog" << std::endl;
-                const gtsam::Symbol symbol_lmk = gtsam::Symbol('l', lmk_id_map.at(key_lmk_id));
-                // if (gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))) !=
-                //     gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam1)))) {
-                //     std::cerr << "UH OH" << std::endl;
-                // } else {
-                //     std::cout << "cool" << std::endl;
-                // }
-                symbols_lmks.push_back(symbol_lmk);
-                local_graph_
-                    .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-                        feat_kpts[0], landmark_noise, symbols_poses[0], symbol_lmk, K);
-                local_graph_
-                    .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-                        feat_kpts[1], landmark_noise, symbols_poses[1], symbol_lmk, K);
-
-                std::optional<gtsam::Point3> kpt_in_world =
-                    detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K);
-                // gtsam::Point3 kpt_in_world;
-                // bool triangulate_success =
-                //     detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world);
-                if (kpt_in_world) {
-                    local_estimate_.insert_or_assign(symbol_lmk, *kpt_in_world);
-                    viz_lmks.emplace_back(*kpt_in_world);
-                    if (symbols_landmarks_values_iter.find(symbol_lmk) !=
-                        symbols_landmarks_values_iter.end()) {
-                        symbols_landmarks_values_iter[symbol_lmk].push_back(*kpt_in_world);
-                    } else {
-                        symbols_landmarks_values_iter.emplace(
-                            symbol_lmk, std::vector<gtsam::Point3>{*kpt_in_world});
-                    }
-                }
-            }
-            std::cout << "setup complete!" << std::endl;
-            if (visualize)
-                robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, cv::viz::Color::brown(),
-                                           true, true, "Local Optimization " + std::to_string(i));
-
-            const gtsam::Values symbols_result_local = detail_sfm::optimize_graph(
-                local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false);
-
-            for (const gtsam::Symbol &symbol_pose : symbols_poses) {
-                const gtsam::Pose3 T_wrld_cam = symbols_result_local.at<gtsam::Pose3>(symbol_pose);
-                symbols_poses_values_iter.at(symbol_pose).push_back(T_wrld_cam);
-            }
-            for (const gtsam::Symbol &symbol_lmk : symbols_lmks) {
-                const gtsam::Point3 p_wrld_lmk = symbols_result_local.at<gtsam::Point3>(symbol_lmk);
-                symbols_landmarks_values_iter.at(symbol_lmk).push_back(p_wrld_lmk);
+        std::optional<gtsam::Point3> landmark_estimate = attempt_triangulate(
+            world_from_lmk_cams, lmk_observations, frames[0].K_);  // all K are the same for now...
+        if (landmark_estimate) {
+            const gtsam::Symbol lmk_symbol(Frontend::symbol_lmk_char, i);
+            // if (!frames[i].K_) std::cout << "UH OH" << std::endl;
+            for (const auto &[frame_id, keypoint_cv] : feature_tracks[i].obs_) {
+                graph_.add(gtsam::GenericProjectionFactor(
+                    gtsam::Point2(keypoint_cv.x, keypoint_cv.y), landmark_noise,
+                    gtsam::Symbol(Frontend::symbol_pose_char, frame_id), lmk_symbol, frames[0].K_));
             }
+            initial_estimate_.insert(lmk_symbol, *landmark_estimate);
+            symbols_lmks.push_back(lmk_symbol);
         }
-
-        std::cout << "\nLocal Optimizations Complete!\n" << std::endl;
-
-        for (std::pair<gtsam::Symbol, std::vector<gtsam::Pose3>> sym_pose :
-             symbols_poses_values_iter) {
-            initial_estimate_.insert_or_assign(sym_pose.first,
-                                               detail_sfm::averagePoses(sym_pose.second));
-        }
-        for (std::pair<gtsam::Symbol, std::vector<gtsam::Point3>> sym_lmk :
-             symbols_landmarks_values_iter) {
-            initial_estimate_.insert_or_assign(sym_lmk.first,
-                                               detail_sfm::averagePoints(sym_lmk.second));
-        }
+        std::cout << "idx " << i << " out of " << feature_tracks.size() << std::endl;
     }
 
+    std::cout << "heartbeat" << std::endl;
+
     // do global optimization
     const gtsam::Values result =
-        detail_sfm::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks, 0);
-
-    // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference
-    double sum_traj_error = 0;
-    double sum_rot_error = 0;
-    for (size_t i = 0; i < symbols_pose.size(); i++) {
-        const gtsam::Pose3 traj_pose = result.at<gtsam::Pose3>(symbols_pose[i]);
-        sum_traj_error += std::pow(
-            (references_world_from_cam[i].translation() - traj_pose.translation()).norm(), 2);
-        sum_rot_error += detail_sfm::rotation_error(references_world_from_cam[i],
-                                                    Eigen::Isometry3d(traj_pose.matrix()));
-    }
-    std::cout << "sum_rot_error: " << sum_rot_error << std::endl;
-    double rmse_ate = std::sqrt(sum_traj_error / symbols_pose.size());
-    double rmse_rot = std::sqrt(sum_rot_error / symbols_pose.size());
-    std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl;
+        optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_lmks, 0);
+
+    // // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference
+    // double sum_traj_error = 0;
+    // double sum_rot_error = 0;
+    // for (size_t i = 0; i < symbols_pose.size(); i++) {
+    //     const gtsam::Pose3 traj_pose = result.at<gtsam::Pose3>(symbols_pose[i]);
+    //     sum_traj_error += std::pow(
+    //         (references_world_from_cam[i].translation() - traj_pose.translation()).norm(), 2);
+    //     sum_rot_error +=
+    //         rotation_error(references_world_from_cam[i], Eigen::Isometry3d(traj_pose.matrix()));
+    // }
+    // std::cout << "sum_rot_error: " << sum_rot_error << std::endl;
+    // double rmse_ate = std::sqrt(sum_traj_error / symbols_pose.size());
+    // double rmse_rot = std::sqrt(sum_rot_error / symbols_pose.size());
+    // std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl;
 
     std::cout << "about to visualize result" << std::endl;
     result.print();
-    detail_sfm::graph_values(result, "Result", symbols_pose, std::vector<gtsam::Symbol>());
+    graph_values(result, "Result", symbols_pose, std::vector<gtsam::Symbol>());
     // detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks);
 }
\ No newline at end of file
diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc
index a63015b66..9771f2a29 100644
--- a/experimental/learn_descriptors/sfm_mvp.cc
+++ b/experimental/learn_descriptors/sfm_mvp.cc
@@ -259,12 +259,12 @@ TEST(SFMMvp, sfm_building_manual_global) {
     LandmarkId lmk_id = 0;
     for (size_t i = 0; i < indices.size() - 1; i++) {
         std::vector<cv::DMatch> matches =
-            frontend.compute_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+            frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors());
         // frontend.enforce_bijective_matches(matches);
         frontend.enforce_bijective_buffer_matches(matches);
         for (const cv::DMatch match : matches) {
-            const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
-            const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
+            const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx];
+            const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx];
 
             auto key = std::make_pair(i, kpt_cam0);
             if (lmk_id_map.find(key) != lmk_id_map.end()) {
@@ -453,11 +453,11 @@ TEST(SFMMvp, sfm_building_manual_incremental) {
     LandmarkId lmk_id = 0;
     for (size_t i = 0; i < indices.size() - 1; i++) {
         std::vector<cv::DMatch> matches =
-            frontend.compute_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+            frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors());
         frontend.enforce_bijective_buffer_matches(matches);
         for (const cv::DMatch match : matches) {
-            const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
-            const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
+            const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx];
+            const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx];
 
             auto key = std::make_pair(frames[i].id_, kpt_cam0);
             if (lmk_id_map.find(key) != lmk_id_map.end()) {
@@ -582,7 +582,7 @@ TEST(SFMMvp, sfm_building_manual_incremental) {
         local_estimate_.insert_or_assign(poses[1], cam_pose.at(i + 1));
 
         std::vector<cv::DMatch> matches =
-            frontend.compute_matches(frames[i].get_descriptors(), frames[i + 1].get_descriptors());
+            frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors());
         frontend.enforce_bijective_matches(matches);
         std::vector<gtsam::Pose3> feat_cam_poses{cam_pose.at(i), cam_pose.at(i + 1)};
 
@@ -591,8 +591,8 @@ TEST(SFMMvp, sfm_building_manual_incremental) {
         std::vector<Eigen::Vector3d> viz_lmks;
         for (const cv::DMatch match : matches) {
             std::vector<gtsam::Point2> feat_kpts;
-            const KeypointCV kpt_cam0 = frames[i].get_keypoints()[match.queryIdx];
-            const KeypointCV kpt_cam1 = frames[i + 1].get_keypoints()[match.trainIdx];
+            const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx];
+            const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx];
             feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y);
             feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y);
 

From 66ec22aff4ba3444467441ca1a3b5e5f09c8a7ea Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Mon, 14 Jul 2025 17:32:11 -0400
Subject: [PATCH 32/45] WIP. Use 5pt alg 'heading' to improve camera
 orientation initial guess

---
 experimental/learn_descriptors/BUILD          |  3 +
 .../learn_descriptors/four_seasons_parser.cc  |  5 --
 .../four_seasons_parser_example_viz.cc        | 60 +++++++++++++-
 experimental/learn_descriptors/frontend.cc    | 32 ++++----
 experimental/learn_descriptors/frontend.hh    |  3 +-
 experimental/learn_descriptors/image_point.hh |  2 +-
 .../image_point_four_seasons.cc               |  4 +-
 .../image_point_four_seasons.hh               |  2 +-
 .../learn_descriptors/refactor_sfm_mvp.cc     | 81 ++++++++++++-------
 9 files changed, 134 insertions(+), 58 deletions(-)

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index 2c647712b..d17e1f8af 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -185,6 +185,7 @@ cc_binary(
     srcs = ["four_seasons_parser_example_viz.cc"],
     deps = [
         ":four_seasons_parser",
+        ":frontend",
         "//common:check",
         "//common/gps:frame_translation",
         "//visualization/opencv:opencv_viz",
@@ -453,6 +454,8 @@ cc_binary(
         ":frontend",
         ":spatial_test_scene_cube",
         ":structure_from_motion_types",
+        ":image_point",
+        ":image_point_four_seasons",
         "//common/geometry:camera",
         "//visualization/opencv:opencv_viz",
         "@boost//:smart_ptr",
diff --git a/experimental/learn_descriptors/four_seasons_parser.cc b/experimental/learn_descriptors/four_seasons_parser.cc
index 0e2eb2be1..d2bb76ffb 100644
--- a/experimental/learn_descriptors/four_seasons_parser.cc
+++ b/experimental/learn_descriptors/four_seasons_parser.cc
@@ -32,19 +32,14 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
     const std::filesystem::path path_vio = root_dir_ / "result.txt";
     const std::filesystem::path path_gps = root_dir_ / "septentrio.nmea";
     const size_t min_time_sig_figs = txt_parser_help::min_sig_figs_result_time(path_vio);
-    std::cout << "heartbeat 0" << std::endl;
     txt_parser_help::TimeDataList img_time_list =
         txt_parser_help::create_img_time_data_list(path_img, min_time_sig_figs);
-    std::cout << "heartbeat 1" << std::endl;
     txt_parser_help::TimeDataMap gnss_poses_time_map =
         txt_parser_help::create_gnss_poses_time_data_map(path_gnss, min_time_sig_figs);
-    std::cout << "heartbeat 2" << std::endl;
     txt_parser_help::TimeDataMap vio_poses_time_map =
         txt_parser_help::create_vio_time_data_map(path_vio, min_time_sig_figs);
-    std::cout << "heartbeat 3" << std::endl;
     gps_parser_help::TimeGPSList gps_time_list =
         gps_parser_help::create_gps_time_data_list(path_gps);
-    std::cout << "heartbeat 4" << std::endl;
 
     size_t id = 0;
     for (const std::pair<size_t, std::vector<std::string>>& pair_time_data : img_time_list) {
diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
index 88511a6e3..d9a6b89f1 100644
--- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
@@ -1,5 +1,7 @@
 #include <algorithm>
+#include <chrono>
 #include <cstddef>
+#include <cstdlib>
 #include <filesystem>
 #include <iostream>
 #include <numeric>
@@ -11,6 +13,7 @@
 #include "common/gps/frame_translation.hh"
 #include "cxxopts.hpp"
 #include "experimental/learn_descriptors/four_seasons_parser.hh"
+#include "experimental/learn_descriptors/frontend.hh"
 #include "opencv2/opencv.hpp"
 #include "visualization/opencv/opencv_viz.hh"
 
@@ -48,6 +51,35 @@ int main(int argc, const char** argv) {
 
     lrn_desc::FourSeasonsParser parser(path_data, path_calibration);
 
+    lrn_desc::FrontendParams params{lrn_desc::FrontendParams::ExtractorType::SIFT,
+                                    lrn_desc::FrontendParams::MatcherType::KNN, true, false};
+    lrn_desc::Frontend frontend(params);
+    constexpr size_t NUM_IMAGES = 100;
+    (void)NUM_IMAGES;
+    auto start = std::chrono::high_resolution_clock::now();
+    for (size_t i = 0; i < NUM_IMAGES; i += 1) {
+        // for (const size_t i : std::vector<size_t>{74, 82, 92}) {
+        const lrn_desc::ImagePointFourSeasons img_pt = parser.get_image_point(i);
+        frontend.add_image(lrn_desc::ImageAndPoint{
+            parser.load_image(i), std::make_shared<lrn_desc::ImagePointFourSeasons>(img_pt)});
+    }
+    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
+        std::chrono::high_resolution_clock::now() - start);
+    std::cout << "done adding! took " << duration.count() << " milliseconds" << std::endl;
+    frontend.populate_frames();
+    start = std::chrono::high_resolution_clock::now();
+    duration = std::chrono::duration_cast<std::chrono::milliseconds>(
+        std::chrono::high_resolution_clock::now() - start);
+    std::cout << "done populating frames! took " << duration.count() << " milliseconds"
+              << std::endl;
+    // frontend.match_frames_and_build_tracks();
+    // start = std::chrono::high_resolution_clock::now();
+    // duration = std::chrono::duration_cast<std::chrono::milliseconds>(
+    //     std::chrono::high_resolution_clock::now() - start);
+    // std::cout << "done matching and building tracks! took " << duration.count() << "
+    // milliseconds"
+    //           << std::endl;
+
     ROBOT_CHECK(parser.num_images() != 0);
 
     std::vector<robot::geometry::VizPose> viz_frames;   // camera frames from visual world frame
@@ -57,9 +89,12 @@ int main(int argc, const char** argv) {
     std::cout << "gnss scale: " << parser.gnss_scale() << std::endl;
     scale_mat.linear() *= parser.gnss_scale();
     std::cout << "scale mat: " << scale_mat.matrix() << std::endl;
+    std::optional<Eigen::Vector3d> first_gps_to_cam;
     std::optional<double> altitude_gps_from_gnss_cam;
     std::vector<double> gps_ns_delta_from_shutter;
-    for (size_t i = 0; i < parser.num_images(); i += 1) {
+    // double ate_cam = 0.0, ate_gps = 0.0;
+    for (size_t i = 0; i < NUM_IMAGES; i += 1) {
+        // for (const size_t i : std::vector<size_t>{74, 82, 92}) {
         const lrn_desc::ImagePointFourSeasons img_pt = parser.get_image_point(i);
         std::cout << img_pt.to_string() << std::endl;
         if (!img_pt.AS_w_from_gnss_cam) {
@@ -68,8 +103,18 @@ int main(int argc, const char** argv) {
         Eigen::Isometry3d w_from_gnss_cam(Eigen::Isometry3d(parser.S_from_AS().matrix()) *
                                           scale_mat *
                                           Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix()));
-
         viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i));
+        Eigen::Isometry3d frontend_w_from_cam_groundtruth(
+            frontend.frames()[i].world_from_cam_groundtruth_->matrix());
+        ROBOT_CHECK(frontend_w_from_cam_groundtruth.matrix().isApprox(w_from_gnss_cam.matrix()),
+                    frontend_w_from_cam_groundtruth.matrix(), w_from_gnss_cam.matrix());
+        // Eigen::Isometry3d frontend_w_from_cam_groundtruth(*img_pt.world_from_cam_ground_truth());
+        viz_frames.emplace_back(frontend_w_from_cam_groundtruth,
+                                "x_frontend_ref_" + std::to_string(i));
+        // std::cout << "frontend_w_from_cam_groundtruth: " <<
+        // frontend_w_from_cam_groundtruth.matrix()
+        //           << std::endl;
+
         if (img_pt.gps_gcs) {
             if (img_pt.gps_gcs->seq > img_pt.seq) {
                 gps_ns_delta_from_shutter.push_back(
@@ -93,6 +138,10 @@ int main(int argc, const char** argv) {
             if (!altitude_gps_from_gnss_cam) {
                 altitude_gps_from_gnss_cam = gnss_cam_in_gcs.z() - *(img_pt.gps_gcs->altitude);
             }
+            if (!first_gps_to_cam) {
+                first_gps_to_cam = frontend.frames()[i].world_from_cam_groundtruth_->translation() -
+                                   *frontend.frames()[i].cam_in_world_initial_guess_;
+            }
             gcs_coordinate.z() += *altitude_gps_from_gnss_cam;
             std::cout << std::setprecision(20) << "gnss_cam_in_gcs: " << gnss_cam_in_gcs
                       << "\ngps_gcs: " << gcs_coordinate
@@ -106,14 +155,17 @@ int main(int argc, const char** argv) {
                 ECEF_from_gps_hom;
             const Eigen::Isometry3d cam_from_gps(
                 (parser.cam_from_imu() * parser.gps_from_imu().inverse()).matrix());
-            Eigen::Vector3d cam_gps_in_w = gps_in_w.head<3>() + cam_from_gps.translation();
+            Eigen::Vector3d cam_gps_in_w = gps_in_w.head<3>() - cam_from_gps.translation();
             viz_points.emplace_back(cam_gps_in_w, "x_cam_gps" + std::to_string(i));
-            viz_points.emplace_back(gps_in_w.head<3>(), "x_gps" + std::to_string(i));
+            viz_points.emplace_back(
+                *frontend.frames()[i].cam_in_world_initial_guess_ + *first_gps_to_cam,
+                "x_frontend_cam_gps" + std::to_string(i));
             const Eigen::Vector3d gps_from_ref_in_world =
                 gps_in_w.head<3>() - w_from_gnss_cam.translation();
             std::cout << "gps_from_ref_in_world: " << gps_from_ref_in_world << std::endl;
         }
     }
+    // std::cout << "ate_cam: " << ate_cam << "\tate_gps: " << ate_gps << std::endl;
     const double sum =
         std::accumulate(gps_ns_delta_from_shutter.begin(), gps_ns_delta_from_shutter.end(), 0.0);
     double avg_ns_gps_delta = sum / gps_ns_delta_from_shutter.size();
diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc
index 8b7d5077a..a42a819bd 100644
--- a/experimental/learn_descriptors/frontend.cc
+++ b/experimental/learn_descriptors/frontend.cc
@@ -2,6 +2,7 @@
 
 #include <cmath>
 #include <cstddef>
+#include <memory>
 #include <optional>
 #include <unordered_map>
 #include <utility>
@@ -168,36 +169,36 @@ Frontend::Frontend(FrontendParams params_) : params_(params_) {
 void Frontend::populate_frames() {
     FrameId id = frames_.size();
     for (const ImageAndPoint &img_and_pt : images_and_points_) {
-        const ImagePoint &img_pt = img_and_pt.img_pt;
+        const std::shared_ptr<ImagePoint> shared_img_pt = img_and_pt.shared_img_pt;
+        std::cout << shared_img_pt->to_string() << std::endl;
         cv::Mat img_undistorted;
-        cv::fisheye::undistortImage(img_and_pt.image_distorted, img_undistorted, img_pt.K->k_mat(),
-                                    img_pt.K->d_mat(), img_pt.K->k_mat(),
-                                    img_and_pt.image_distorted.size());
+        cv::fisheye::undistortImage(img_and_pt.image_distorted, img_undistorted,
+                                    shared_img_pt->K->k_mat(), shared_img_pt->K->d_mat(),
+                                    shared_img_pt->K->k_mat(), img_and_pt.image_distorted.size());
         auto [kpts, descriptors] = extract_features(img_undistorted);
         KeypointsCV kpts_cv;
         for (const cv::KeyPoint &kpt : kpts) {
             kpts_cv.push_back(kpt.pt);
         }
 
-        gtsam::Cal3_S2::shared_ptr K = boost::make_shared<gtsam::Cal3_S2>(
-            img_pt.K->fx, img_pt.K->fy, 0, img_pt.K->cx, img_pt.K->cy);
-
-        if (!K) std::cout << "UH OH" << std::endl;
+        gtsam::Cal3_S2::shared_ptr K =
+            boost::make_shared<gtsam::Cal3_S2>(shared_img_pt->K->fx, shared_img_pt->K->fy, 0,
+                                               shared_img_pt->K->cx, shared_img_pt->K->cy);
 
         Frame frame(id, img_undistorted, K, kpts_cv, descriptors);
         const std::optional<Eigen::Isometry3d> maybe_world_from_cam_grnd_trth =
-            img_pt.world_from_cam_ground_truth();
+            shared_img_pt->world_from_cam_ground_truth();
         if (maybe_world_from_cam_grnd_trth) {
             frame.world_from_cam_groundtruth_ =
                 gtsam::Pose3(maybe_world_from_cam_grnd_trth->matrix());
         }
-        const std::optional<Eigen::Vector3d> maybe_cam_in_world = img_pt.cam_in_world();
+        const std::optional<Eigen::Vector3d> maybe_cam_in_world = shared_img_pt->cam_in_world();
         if (maybe_cam_in_world) {
             frame.cam_in_world_initial_guess_ = *maybe_cam_in_world;
         }
         if (id == 0) frame.world_from_cam_initial_guess_ = gtsam::Rot3::Identity();
         const std::optional<gtsam::Matrix3> maybe_translation_covariance_in_cam =
-            img_pt.translation_covariance_in_cam();
+            shared_img_pt->translation_covariance_in_cam();
         if (maybe_translation_covariance_in_cam) {
             frame.translation_covariance_in_cam_ = *maybe_translation_covariance_in_cam;
         }
@@ -233,8 +234,9 @@ void Frontend::match_frames_and_build_tracks() {
                     robot::geometry::estimate_cam0_from_cam1(
                         cv_kpts_1, cv_kpts_2, matches,
                         images_and_points_[i]
-                            .img_pt.K->k_mat());  // fine for now since all cameras have the same K,
-                                                  // how to reconcile later though...?
+                            .shared_img_pt->K
+                            ->k_mat());  // fine for now since all cameras have the same K,
+                                         // how to reconcile later though...?
 
                 if (!scale_cam0_from_cam1) {
                     continue;
@@ -293,8 +295,8 @@ void Frontend::match_frames_and_build_tracks() {
                 cv_kpts_2.push_back(cv_kpt);
             }
             std::optional<Eigen::Isometry3d> scale_cam0_from_cam1 =
-                robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches,
-                                                         images_and_points_[i].img_pt.K->k_mat());
+                robot::geometry::estimate_cam0_from_cam1(
+                    cv_kpts_1, cv_kpts_2, matches, images_and_points_[i].shared_img_pt->K->k_mat());
             if (!scale_cam0_from_cam1) {
                 continue;
             }
diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh
index d71338936..a9e1a01c2 100644
--- a/experimental/learn_descriptors/frontend.hh
+++ b/experimental/learn_descriptors/frontend.hh
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <memory>
 #include <utility>
 #include <vector>
 
@@ -20,7 +21,7 @@ struct FrontendParams {
 };
 struct ImageAndPoint {
     cv::Mat image_distorted;
-    ImagePoint img_pt;
+    std::shared_ptr<ImagePoint> shared_img_pt;
 };
 class Frontend {
    public:
diff --git a/experimental/learn_descriptors/image_point.hh b/experimental/learn_descriptors/image_point.hh
index 2fbaf52cb..f4a4635b9 100644
--- a/experimental/learn_descriptors/image_point.hh
+++ b/experimental/learn_descriptors/image_point.hh
@@ -26,7 +26,7 @@ struct ImagePoint {
     virtual std::optional<Eigen::Matrix3d> translation_covariance_in_cam() const {
         return std::nullopt;
     };
-    std::string to_string() const {
+    virtual std::string to_string() const {
         auto vec3d_to_str = [](const Eigen::Vector3d& vec3d) {
             std::stringstream ss;
             ss << "[" << vec3d.x() << ", " << vec3d.y() << ", " << vec3d.z() << "]";
diff --git a/experimental/learn_descriptors/image_point_four_seasons.cc b/experimental/learn_descriptors/image_point_four_seasons.cc
index 2ad0468fa..6cdec9d5f 100644
--- a/experimental/learn_descriptors/image_point_four_seasons.cc
+++ b/experimental/learn_descriptors/image_point_four_seasons.cc
@@ -12,7 +12,9 @@ namespace robot::experimental::learn_descriptors {
 ImagePointFourSeasons::~ImagePointFourSeasons() = default;
 
 std::optional<Eigen::Isometry3d> ImagePointFourSeasons::world_from_cam_ground_truth() const {
-    if (!AS_w_from_gnss_cam) return std::nullopt;
+    if (!AS_w_from_gnss_cam) {
+        return std::nullopt;
+    }
     Eigen::Matrix4d scale_mat_reference = Eigen::Matrix4d::Identity();
     scale_mat_reference(0, 0) = scale_mat_reference(1, 1) = scale_mat_reference(2, 2) =
         shared_static_transforms->gnss_scale;
diff --git a/experimental/learn_descriptors/image_point_four_seasons.hh b/experimental/learn_descriptors/image_point_four_seasons.hh
index b636cd120..a0c337c56 100644
--- a/experimental/learn_descriptors/image_point_four_seasons.hh
+++ b/experimental/learn_descriptors/image_point_four_seasons.hh
@@ -34,7 +34,7 @@ struct ImagePointFourSeasons : ImagePoint {
     std::optional<Eigen::Matrix3d> translation_covariance_in_cam() const override;
     std::optional<Eigen::Matrix3d> gps_covariance_in_world() const;
 
-    std::string to_string() const {
+    std::string to_string() const override {
         auto se3_to_str = [](const liegroups::SE3& se3) {
             const Eigen::Vector3d& t = se3.translation();
             const Eigen::Quaterniond& r = se3.so3().unit_quaternion();
diff --git a/experimental/learn_descriptors/refactor_sfm_mvp.cc b/experimental/learn_descriptors/refactor_sfm_mvp.cc
index 22d63079c..c2270e69a 100644
--- a/experimental/learn_descriptors/refactor_sfm_mvp.cc
+++ b/experimental/learn_descriptors/refactor_sfm_mvp.cc
@@ -1,9 +1,11 @@
 #include <cmath>
 #include <functional>
 #include <iostream>
+#include <memory>
 #include <opencv2/calib3d.hpp>
 #include <optional>
 #include <sstream>
+#include <unordered_map>
 #include <vector>
 
 #include "Eigen/Core"
@@ -15,6 +17,7 @@
 #include "experimental/learn_descriptors/frame.hh"
 #include "experimental/learn_descriptors/frontend.hh"
 #include "experimental/learn_descriptors/frontend_definitions.hh"
+#include "experimental/learn_descriptors/image_point_four_seasons.hh"
 #include "experimental/learn_descriptors/structure_from_motion_types.hh"
 #include "gtsam/geometry/Cal3_S2.h"
 #include "gtsam/geometry/Point2.h"
@@ -201,9 +204,11 @@ int main(int argc, const char **argv) {
                           true, false};
     Frontend frontend(params);
 
-    for (size_t i = 660; i < 1000; i += 5) {
+    for (size_t i = 660; i < 750; i += 5) {
         // if (!parser.get_image_point(i).K) std::cout << "UH OH" << std::endl;
-        frontend.add_image(ImageAndPoint{parser.load_image(i), parser.get_image_point(i)});
+        frontend.add_image(
+            ImageAndPoint{parser.load_image(i),
+                          std::make_shared<ImagePointFourSeasons>(parser.get_image_point(i))});
     }
     frontend.populate_frames();
     frontend.match_frames_and_build_tracks();
@@ -214,13 +219,29 @@ int main(int argc, const char **argv) {
               << frontend.frames()[0].world_from_cam_groundtruth_->translation() << std::endl;
 
     std::vector<robot::geometry::VizPose> viz_poses_init;
+    std::optional<Eigen::Vector3d> first_point;
+    std::optional<Eigen::Isometry3d> first_grnd_trth;
+    (void)first_point;
     for (const Frame &frame : frontend.frames()) {
-        Eigen::Isometry3d world_from_cam_init;
-        world_from_cam_init.linear() = frame.world_from_cam_initial_guess_->matrix();
-        world_from_cam_init.translation() = *frame.cam_in_world_initial_guess_;
-        viz_poses_init.emplace_back(world_from_cam_init,
-                                    gtsam::Symbol(Frontend::symbol_pose_char, frame.id_).string());
+        if (frame.cam_in_world_initial_guess_) {
+            Eigen::Isometry3d world_from_cam_init;
+            world_from_cam_init.linear() = frame.world_from_cam_initial_guess_->matrix();
+            if (!first_point) first_point = *frame.cam_in_world_initial_guess_;
+            world_from_cam_init.translation() = *frame.cam_in_world_initial_guess_ - *first_point;
+            std::cout << "world_from_cam_init: " << world_from_cam_init.matrix() << std::endl;
+            viz_poses_init.emplace_back(
+                world_from_cam_init, gtsam::Symbol(Frontend::symbol_pose_char, frame.id_).string());
+        }
+        if (frame.world_from_cam_groundtruth_) {
+            std::cout << "adding a ground truth frame to viz!" << std::endl;
+            Eigen::Isometry3d w_from_cam_grnd_trth(frame.world_from_cam_groundtruth_->matrix());
+            if (!first_grnd_trth) first_grnd_trth = w_from_cam_grnd_trth;
+            w_from_cam_grnd_trth.translation() -= first_grnd_trth->translation();
+            viz_poses_init.emplace_back(w_from_cam_grnd_trth,
+                                        "x_grnd_" + std::to_string(frame.id_));
+        }
     }
+    std::cout << "visualizing " << viz_poses_init.size() << " poses" << std::endl;
     robot::geometry::viz_scene(viz_poses_init, std::vector<robot::geometry::VizPoint>(),
                                cv::viz::Color::brown(), true, true, "Frontend initial guesses");
 
@@ -242,34 +263,35 @@ int main(int argc, const char **argv) {
 
     // add gps factors
     const std::vector<Frame> &frames = frontend.frames();
-    graph_.add(gtsam::PriorFactor<gtsam::Pose3>(
-        gtsam::Symbol('x', 0), gtsam::Pose3::Identity(),
-        gtsam::noiseModel::Constrained::All(6)  // effectively fixes the pose
-        ));
-    initial_estimate_.insert(gtsam::Symbol('x', 0), gtsam::Pose3::Identity());
-    std::vector<gtsam::Symbol> symbols_pose{gtsam::Symbol('x', 0)};
-    std::vector<gtsam::Pose3> world_from_cam_initial_guess;
-    for (size_t i = 1; i < frames.size(); i++) {
+    std::vector<gtsam::Symbol> symbols_pose;
+    std::unordered_map<size_t, gtsam::Pose3> world_from_cam_initial_guess;
+    std::optional<gtsam::Point3> cam0_in_w;
+    for (const Frame &frame : frames) {
+        if (!frame.cam_in_world_initial_guess_) continue;
+        if (!cam0_in_w) cam0_in_w = *frame.cam_in_world_initial_guess_;
         // should do some interpolation here most likely
-        const gtsam::Pose3 world_from_cam_estimate(
-            frames[i].world_from_cam_initial_guess_ ? *frames[i].world_from_cam_initial_guess_
-                                                    : gtsam::Rot3::Identity(),
-            frames[i].cam_in_world_initial_guess_ ? *frames[i].cam_in_world_initial_guess_
-                                                  : gtsam::Point3());
-        world_from_cam_initial_guess.push_back(world_from_cam_estimate);
+        const gtsam::Pose3 world_from_cam_estimate(frame.world_from_cam_initial_guess_
+                                                       ? *frame.world_from_cam_initial_guess_
+                                                       : gtsam::Rot3::Identity(),
+                                                   *frame.cam_in_world_initial_guess_ - *cam0_in_w);
+        world_from_cam_initial_guess[frame.id_] = world_from_cam_estimate;
         gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas(
-            frames[i].translation_covariance_in_cam_
-                ? gtsam::Vector3((*frames[i].translation_covariance_in_cam_)(0, 0),
-                                 (*frames[i].translation_covariance_in_cam_)(1, 1),
-                                 (*frames[i].translation_covariance_in_cam_)(2, 2))
+            frame.translation_covariance_in_cam_
+                ? gtsam::Vector3((*frame.translation_covariance_in_cam_)(0, 0),
+                                 (*frame.translation_covariance_in_cam_)(1, 1),
+                                 (*frame.translation_covariance_in_cam_)(2, 2))
                 : gps_sigmas_fallback);
-        const gtsam::Symbol cam_symbol('x', i);
-        if (frames[i].cam_in_world_initial_guess_)
-            graph_.add(
-                gtsam::GPSFactor(cam_symbol, *frames[i].cam_in_world_initial_guess_, gps_noise));
+        const gtsam::Symbol cam_symbol('x', frame.id_);
+        graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise));
         initial_estimate_.insert(cam_symbol, world_from_cam_estimate);
         symbols_pose.push_back(cam_symbol);
     }
+    std::vector<robot::geometry::VizPose> viz_pose;
+    for (const auto &[id, pose] : world_from_cam_initial_guess) {
+        viz_pose.emplace_back(Eigen::Isometry3d(pose.matrix()), "x_" + std::to_string(id));
+    }
+    robot::geometry::viz_scene(viz_pose, std::vector<robot::geometry::VizPoint>(),
+                               cv::viz::Color::brown(), true, true, "Initial guesses in backend");
 
     std::cout << "heartbeat pre lmks in graph" << std::endl;
 
@@ -287,7 +309,6 @@ int main(int argc, const char **argv) {
             world_from_lmk_cams, lmk_observations, frames[0].K_);  // all K are the same for now...
         if (landmark_estimate) {
             const gtsam::Symbol lmk_symbol(Frontend::symbol_lmk_char, i);
-            // if (!frames[i].K_) std::cout << "UH OH" << std::endl;
             for (const auto &[frame_id, keypoint_cv] : feature_tracks[i].obs_) {
                 graph_.add(gtsam::GenericProjectionFactor(
                     gtsam::Point2(keypoint_cv.x, keypoint_cv.y), landmark_noise,

From 4134a65a6feeef8ecec312c67cd3112a2087bef4 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Tue, 15 Jul 2025 17:08:17 -0400
Subject: [PATCH 33/45] WIP

---
 experimental/learn_descriptors/BUILD          |   3 +-
 experimental/learn_descriptors/backend.cc     | 258 +++++++-----------
 experimental/learn_descriptors/backend.hh     |  94 +------
 .../learn_descriptors/backend_test.cc         |   4 +-
 experimental/learn_descriptors/frame.hh       |   3 +
 experimental/learn_descriptors/frontend.cc    |   6 +-
 .../structure_from_motion.cc                  |  10 +-
 7 files changed, 134 insertions(+), 244 deletions(-)

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index d17e1f8af..06e1d6b5e 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -373,7 +373,8 @@ cc_library(
     ],
     visibility = ["//visibility:public"],
     deps = [
-        ":feature_manager",
+        ":structure_from_motion_types",
+        ":frame",
         "//common/geometry:camera",
         "@boost//:smart_ptr",
         "@gtsam",
diff --git a/experimental/learn_descriptors/backend.cc b/experimental/learn_descriptors/backend.cc
index b3b50e763..1b3559d0c 100644
--- a/experimental/learn_descriptors/backend.cc
+++ b/experimental/learn_descriptors/backend.cc
@@ -1,7 +1,11 @@
 #include "experimental/learn_descriptors/backend.hh"
 
+#include <unordered_set>
+
 #include "boost/make_shared.hpp"
 #include "experimental/learn_descriptors/feature_manager.hh"
+#include "experimental/learn_descriptors/frame.hh"
+#include "experimental/learn_descriptors/structure_from_motion_types.hh"
 #include "gtsam/geometry/triangulation.h"
 #include "gtsam/navigation/GPSFactor.h"
 #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
@@ -11,159 +15,6 @@
 
 namespace robot::experimental::learn_descriptors {
 
-Backend::Backend(std::shared_ptr<FeatureManager> feature_manager)
-    : feature_manager_(feature_manager) {
-    const size_t img_width = 640;
-    const size_t img_height = 480;
-    const double fx = 500.0;
-    const double fy = fx;
-    const double cx = img_width / 2.0;
-    const double cy = img_height / 2.0;
-
-    gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
-    K_ = boost::make_shared<gtsam::Cal3_S2>(K);
-    // initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K);
-}
-
-Backend::Backend(std::shared_ptr<FeatureManager> feature_manager, gtsam::Cal3_S2 K)
-    : feature_manager_(feature_manager) {
-    K_ = boost::make_shared<gtsam::Cal3_S2>(K);
-    // initial_estimate_.insert(gtsam::Symbol(camera_symbol_char, 0), K);
-}
-
-template <>
-void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Pose3 &value,
-                               const gtsam::SharedNoiseModel &noise) {
-    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(symbol, value, noise);
-    initial_estimate_.insert_or_assign(symbol, value);
-    // std::cout << "adding a prior factor! with symbol: " << symbol << std::endl;
-    // initial_estimate_.print("values after adding prior: ");
-}
-
-template <>
-void Backend::add_prior_factor(const gtsam::Symbol &symbol, const gtsam::Point3 &value,
-                               const gtsam::SharedNoiseModel &noise) {
-    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Point3>>(symbol, value, noise);
-    gtsam::Rot3 R;
-    if (initial_estimate_.exists(symbol)) {
-        R = initial_estimate_.at<gtsam::Pose3>(symbol).rotation();
-    }
-    initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R, value));
-}
-
-template <>
-void Backend::add_between_factor<gtsam::Pose3>(const gtsam::Symbol &symbol_1,
-                                               const gtsam::Symbol &symbol_2,
-                                               const gtsam::Pose3 &value,
-                                               const gtsam::SharedNoiseModel &model) {
-    graph_.emplace_shared<gtsam::BetweenFactor<gtsam::Pose3>>(symbol_1, symbol_2, value, model);
-    // std::cout << "adding between factor. symbol_1: " << symbol_1 << ". symbol_2: " << symbol_2 <<
-    // std::endl; initial_estimate_.print("values when adding between factor: ");
-    initial_estimate_.insert_or_assign(symbol_2,
-                                       initial_estimate_.at<gtsam::Pose3>(symbol_1).compose(value));
-}
-
-template <>
-void Backend::add_between_factor<gtsam::Rot3>(const gtsam::Symbol &symbol_1,
-                                              const gtsam::Symbol &symbol_2,
-                                              const gtsam::Rot3 &value,
-                                              const gtsam::SharedNoiseModel &model) {
-    graph_.emplace_shared<gtsam::BetweenFactor<gtsam::Rot3>>(symbol_1, symbol_2, value, model);
-    initial_estimate_.insert_or_assign(symbol_2,
-                                       initial_estimate_.at<gtsam::Pose3>(symbol_1).compose(
-                                           gtsam::Pose3(value, gtsam::Point3::Zero())));
-}
-
-void Backend::add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &t_world_cam,
-                             const gtsam::SharedNoiseModel &model, const gtsam::Rot3 &R_world_cam) {
-    graph_.emplace_shared<gtsam::GPSFactor>(symbol, t_world_cam, model);
-    initial_estimate_.insert_or_assign(symbol, gtsam::Pose3(R_world_cam, t_world_cam));
-}
-
-std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> Backend::get_obs_for_lmk(
-    const gtsam::Symbol &lmk_symbol) {
-    std::vector<gtsam::Pose3> cam_poses;
-    std::vector<gtsam::Point2> observations;
-
-    // Iterate over all factors in the graph
-    for (const auto &factor : graph_) {
-        auto projFactor = boost::dynamic_pointer_cast<
-            gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(factor);
-
-        if (projFactor && projFactor->keys().at(1) == lmk_symbol) {
-            // Get the camera pose symbol
-            gtsam::Symbol cameraSymbol = projFactor->keys().at(0);
-
-            // Retrieve the camera pose from values
-            if (!initial_estimate_.exists(cameraSymbol)) continue;
-            gtsam::Pose3 cameraPose = initial_estimate_.at<gtsam::Pose3>(cameraSymbol);
-
-            // Get the 2D observation (keypoint measurement)
-            gtsam::Point2 observation = projFactor->measured();
-
-            // Store the pose and corresponding observation
-            observations.push_back(observation);
-            cam_poses.push_back(cameraPose);
-        }
-    }
-    return std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>>(cam_poses,
-                                                                            observations);
-}
-
-void Backend::add_landmarks(const std::vector<Landmark> &landmarks) {
-    for (const Landmark &landmark : landmarks) {
-        add_landmark(landmark);
-    }
-}
-
-void Backend::add_landmark(const Landmark &landmark) {
-    graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-        landmark.projection, landmark_noise_, landmark.cam_pose_symbol, landmark.lmk_factor_symbol,
-        K_);
-    if (!initial_estimate_.exists(landmark.cam_pose_symbol)) {
-        throw std::runtime_error(
-            "landmark.cam_pose_symbol must already exist in Backend.initial_estimate_ before "
-            "add_landmark is called.");
-    }
-    gtsam::Point3 p_world_lmk_estimate =
-        initial_estimate_.at<gtsam::Pose3>(landmark.cam_pose_symbol) * landmark.p_cam_lmk_guess;
-    initial_estimate_.insert_or_assign(landmark.lmk_factor_symbol, p_world_lmk_estimate);
-
-    std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> lmk_obs =
-        get_obs_for_lmk(landmark.lmk_factor_symbol);
-    if (lmk_obs.first.size() >= 2) {
-        try {
-            // Attempt triangulation using DLT (or the GTSAM provided method)
-            p_world_lmk_estimate = gtsam::triangulatePoint3(
-                lmk_obs.first, K_,
-                gtsam::Point2Vector(lmk_obs.second.begin(), lmk_obs.second.end()));
-
-            // Optional: perform an explicit cheirality check
-            bool valid = true;
-            for (const auto &pose : lmk_obs.first) {
-                // Transform point to the camera coordinate system.
-                // transformTo() converts a world point to the camera frame.
-                gtsam::Point3 p_cam = pose.transformTo(p_world_lmk_estimate);
-                if (p_cam.z() <= 0) {  // Check that the depth is positive
-                    valid = false;
-                    break;
-                }
-            }
-
-            if (valid) {
-                initial_estimate_.update(landmark.lmk_factor_symbol, p_world_lmk_estimate);
-            } else {
-                std::cerr << "Triangulated landmark failed cheirality check; keeping initial guess."
-                          << std::endl;
-            }
-        } catch (const gtsam::TriangulationCheiralityException &e) {
-            // Handle the exception gracefully by logging and retaining the previous estimate.
-            std::cerr << "Triangulation Cheirality Exception: " << e.what()
-                      << ". Keeping initial landmark estimate." << std::endl;
-        }
-    }
-}
-
 void Backend::solve_graph() {
     gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_);
     result_ = optimizer.optimize();
@@ -191,4 +42,105 @@ void Backend::solve_graph(const int num_steps,
     }
     result_ = optimizer.values();
 }
+
+gtsam::Rot3 average_rotations(const std::vector<gtsam::Rot3>& rotations, int max_iter = 10) {
+    if (rotations.empty()) throw std::runtime_error("No rotations to average");
+    if (rotations.size() == 1) return rotations.front();
+
+    gtsam::Rot3 mean = rotations[0];
+
+    for (int iter = 0; iter < max_iter; ++iter) {
+        Eigen::Vector3d total = Eigen::Vector3d::Zero();
+        for (const gtsam::Rot3& R : rotations) {
+            gtsam::Rot3 delta = mean.between(R);  // R_mean^T * R
+            total += gtsam::Rot3::Logmap(delta);  // Lie algebra element in R³
+        }
+        total /= static_cast<double>(rotations.size());
+        mean = mean.compose(gtsam::Rot3::Expmap(total));  // update estimate
+    }
+
+    return mean;
+}
+
+void Backend::populate_rotation_estimate(std::vector<Frame>& frames) {
+    struct FrameVertex {
+        Frame& frame;
+        std::unordered_map<FrameId, gtsam::Rot3>
+            neighbor_id_to_rot;  // rot is frame_from_neighbor_frame
+    };
+    std::unordered_map<FrameId, FrameVertex> frame_tree;
+    // populate all frames and add children to neighbors
+    for (Frame& frame : frames) {
+        FrameVertex frame_vertex{frame};
+        for (const auto [other_frame_id, frame_from_other_frame] : frame.frame_from_other_frames_) {
+            frame_vertex.neighbor_id_to_rot.emplace(other_frame_id, frame_from_other_frame);
+        }
+        frame_tree.emplace(frame.id_, frame_vertex);
+    }
+    // add parents to neighbors
+    for (const Frame& frame : frames) {
+        for (const) }
+    gtsam::NonlinearFactorGraph graph;
+    gtsam::Values initial;
+    // TODO: somehow get more informative/more robust noise values
+    // maybe: monte carlo sampling + reprojection, use essential matrix to guide covariance strength
+    gtsam::noiseModel::Diagonal::shared_ptr noise =
+        gtsam::noiseModel::Isotropic::Sigma(3, 0.1);  // radians
+
+    std::unordered_set<FrameId> frames_seen;
+    for (size_t i = 0; i < frames.size(); i++) {
+        const Frame& frame = frames[i];
+        if (i == 0) {
+            initial.insert(gtsam::Symbol(symbol_char_rotation, i),
+                           frame.world_from_cam_initial_guess_
+                               ? *frame.world_from_cam_initial_guess_
+                               : gtsam::Rot3::Identity());
+        }
+        std::vector<gtsam::Rot3> candidates_world_from_frame;
+        for (const auto& [other_frame_id, frame_from_other_frame] :
+             frame.frame_from_other_frames_) {
+            if (frames_seen.find(other_frame_id) == frames_seen.end()) {
+                candidates_world_from_frame.push_back(frame_from_other_frame);
+            }
+        }
+        initial.insert(gtsam::Symbol(symbol_char_rotation, frame.id_),
+                       average_rotations(candidates_world_from_frame));
+        frames_seen.insert(frame.id_);
+    }
+
+    // Assume we have relative rotations Rij between node i and j
+    std::vector<std::pair<size_t, size_t>> edges = {{0, 1}, {1, 2}, {2, 3}, {3, 4}};
+    std::vector<gtsam::Rot3> relativeRotations = {
+        Rot3::RzRyRx(0.25, 0.0, 0.0), Rot3::RzRyRx(0.25, 0.0, 0.0), Rot3::RzRyRx(0.3, 0.0, 0.0),
+        Rot3::RzRyRx(0.2, 0.0, 0.0)};
+
+    // Add BetweenFactors for relative rotations
+    for (size_t k = 0; k < edges.size(); ++k) {
+        size_t i = edges[k].first;
+        size_t j = edges[k].second;
+        graph.emplace_shared<gtsam::BetweenFactor<gtsam::Rot3>>(
+            gtsam::Symbol(Backend::symbol_char_rotation, i),
+            gtsam::Symbol(Backend::symbol_char_rotation, j), relativeRotations[k], noise);
+    }
+
+    // Fix the first rotation
+    initial.insert(gtsam::Symbol(Backend::symbol_char_rotation, 0), gtsam::Rot3::Identity());
+
+    // Initialize other rotations naively
+    for (size_t i = 1; i <= 4; ++i) {
+        initial.insert(gtsam::Symbol(Backend::symbol_char_rotation, i),
+                       gtsam::Rot3::RzRyRx(0.0, 0.0, 0.0));
+    }
+
+    // Optimize
+    gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial);
+    gtsam::Values result = optimizer.optimize();
+
+    // Output results
+    for (size_t i = 0; i <= 4; ++i) {
+        gtsam::Rot3 Ri = result.at<gtsam::Rot3>(gtsam::Symbol(Backend::symbol_char_rotation, i));
+        std::cout << "Rotation " << i << ": " << Ri.rpy().transpose() << " (roll pitch yaw radians)"
+                  << std::endl;
+    }
+}
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/backend.hh b/experimental/learn_descriptors/backend.hh
index 4a43ad1ec..c042de3cb 100644
--- a/experimental/learn_descriptors/backend.hh
+++ b/experimental/learn_descriptors/backend.hh
@@ -6,7 +6,7 @@
 #include <vector>
 
 #include "common/geometry/camera.hh"
-#include "experimental/learn_descriptors/feature_manager.hh"
+#include "experimental/learn_descriptors/frame.hh"
 #include "gtsam/geometry/Cal3_S2.h"
 #include "gtsam/geometry/Point2.h"
 #include "gtsam/geometry/Point3.h"
@@ -20,49 +20,12 @@
 namespace robot::experimental::learn_descriptors {
 class Backend {
    public:
-    static constexpr char pose_symbol_char = 'x';
-    static constexpr char pose_rot_symbol_char = 'r';
-    static constexpr char pose_translation_symbol_char = 't';
-    static constexpr char pose_bearing_symbol_char = 'b';
-    static constexpr char landmark_symbol_char = 'l';
-    static constexpr char camera_symbol_char = 'k';
-
-    struct Landmark {
-        Landmark(const gtsam::Symbol &lmk_factor_symbol, const gtsam::Symbol &cam_pose_symbol,
-                 const gtsam::Point2 &projection, const gtsam::Cal3_S2 K,
-                 float initial_depth_guess = 5.0)
-            : lmk_factor_symbol(lmk_factor_symbol),
-              cam_pose_symbol(cam_pose_symbol),
-              projection(projection),
-              p_cam_lmk_guess(robot::geometry::deproject(robot::geometry::get_intrinsic_matrix(K),
-                                                         projection, initial_depth_guess)){};
-        const gtsam::Symbol lmk_factor_symbol;
-        const gtsam::Symbol cam_pose_symbol;
-        const gtsam::Point2 projection;
-        const gtsam::Point3 p_cam_lmk_guess;
-    };
-
-    Backend(std::shared_ptr<FeatureManager> feature_manager = nullptr);
-    Backend(std::shared_ptr<FeatureManager> feature_manager, gtsam::Cal3_S2 K);
-    Backend(gtsam::Cal3_S2::shared_ptr K) : K_(K){};
-    ~Backend(){};
-
-    template <typename T>
-    void add_prior_factor(const gtsam::Symbol &symbol, const T &value,
-                          const gtsam::SharedNoiseModel &model);
-
-    template <typename T>
-    void add_between_factor(const gtsam::Symbol &symbol_1, const gtsam::Symbol &symbol_2,
-                            const T &value, const gtsam::SharedNoiseModel &model);
-
-    void add_factor_GPS(const gtsam::Symbol &symbol, const gtsam::Point3 &p_world_gps,
-                        const gtsam::SharedNoiseModel &model,
-                        const gtsam::Rot3 &R_world_cam = gtsam::Rot3::Identity());
-
-    std::pair<std::vector<gtsam::Pose3>, std::vector<gtsam::Point2>> get_obs_for_lmk(
-        const gtsam::Symbol &lmk_symbol);
-    void add_landmarks(const std::vector<Landmark> &landmarks);
-    void add_landmark(const Landmark &landmark);
+    static constexpr char symbol_char_pose = 'x';
+    static constexpr char symbol_char_rotation = 'r';
+    static constexpr char symbol_char_translation = 't';
+    static constexpr char symbol_char_bearing = 'b';
+    static constexpr char symbol_char_landmark = 'l';
+    static constexpr char symbol_char_cam_cal = 'k';
 
     void solve_graph();
     typedef int epoch;
@@ -70,46 +33,19 @@ class Backend {
     void solve_graph(const int num_steps,
                      std::optional<graph_step_debug_func> inter_debug_func = std::nullopt);
 
-    const gtsam::Values &get_current_initial_values() const { return initial_estimate_; };
-    const gtsam::Values &get_result() const { return result_; };
-    const gtsam::Cal3_S2 &get_K() const { return *K_; };
+    static gtsam::Rot3 average_rotations(const std::vector<gtsam::Rot3> &rotations,
+                                         int max_iter = 10);
+    // use rotation averaging to set the rotation initial guess for each frame in the world frame.
+    // the world frame will be the first frame in the vector
+    static void populate_rotation_estimate(std::vector<Frame> &frames);
 
-    const gtsam::SharedNoiseModel get_lmk_noise() { return landmark_noise_; };
-    const gtsam::SharedNoiseModel get_pose_noise() { return pose_noise_; };
-    const gtsam::SharedNoiseModel get_translation_noise() { return translation_noise_; };
-    const gtsam::SharedNoiseModel get_gps_noise() { return gps_noise_; };
+    const gtsam::Values &current_initial_values() const { return initial_estimate_; };
+    const gtsam::Values &result() const { return result_; };
 
    private:
-    std::shared_ptr<FeatureManager> feature_manager_;
-    gtsam::Cal3_S2::shared_ptr K_;
-
     gtsam::Values initial_estimate_;
     gtsam::Values result_;
     gtsam::NonlinearFactorGraph graph_;
-
-    gtsam::noiseModel::Isotropic::shared_ptr landmark_noise_ =
-        gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
-    gtsam::noiseModel::Diagonal::shared_ptr pose_noise_ =
-        gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6(0.1, 0.1, 0.1, 0.01, 0.01, 0.01));
-    gtsam::noiseModel::Isotropic::shared_ptr translation_noise_ =
-        gtsam::noiseModel::Isotropic::Sigma(2, 0.1);
-    gtsam::noiseModel::Isotropic::shared_ptr gps_noise_ =
-        gtsam::noiseModel::Isotropic::Sigma(3, 2.);
 };
-
-template <>
-void Backend::add_prior_factor<gtsam::Pose3>(const gtsam::Symbol &, const gtsam::Pose3 &,
-                                             const gtsam::SharedNoiseModel &);
-
-template <>
-void Backend::add_prior_factor<gtsam::Point3>(const gtsam::Symbol &, const gtsam::Point3 &,
-                                              const gtsam::SharedNoiseModel &);
-
-template <>
-void Backend::add_between_factor<gtsam::Pose3>(const gtsam::Symbol &, const gtsam::Symbol &,
-                                               const gtsam::Pose3 &,
-                                               const gtsam::SharedNoiseModel &);
-template <>
-void Backend::add_between_factor<gtsam::Rot3>(const gtsam::Symbol &, const gtsam::Symbol &,
-                                              const gtsam::Rot3 &, const gtsam::SharedNoiseModel &);
+using Landmark = gtsam::Point3;
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/backend_test.cc b/experimental/learn_descriptors/backend_test.cc
index d294fa15e..d1cbec44e 100644
--- a/experimental/learn_descriptors/backend_test.cc
+++ b/experimental/learn_descriptors/backend_test.cc
@@ -31,10 +31,10 @@ TEST(BackendTest, cube) {
     //     camera_poses.emplace_back(T_world_cam.pose().matrix());
     // }
 
-    // backend.add_prior_factor(gtsam::Symbol(backend.camera_symbol_char, 0), camera_poses.front(),
+    // backend.add_prior_factor(gtsam::Symbol(backend.symbol_char_cam_cal, 0), camera_poses.front(),
     //                          backend.get_pose_noise());
     // for (size_t i = 1; i < camera_poses.size(); i++) {
-    //     backend.add_factor_GPS(gtsam::Symbol(backend.camera_symbol_char, i),
+    //     backend.add_factor_GPS(gtsam::Symbol(backend.symbol_char_cam_cal, i),
     //                            camera_poses[i].translation(), backend.get_gps_noise());
 
     //     std::pair<gtsam::PinholeCamera<gtsam::Cal3_S2>, cv::Size> cam_and_img_sizes[2] = {
diff --git a/experimental/learn_descriptors/frame.hh b/experimental/learn_descriptors/frame.hh
index 70be07d27..b6eb43a47 100644
--- a/experimental/learn_descriptors/frame.hh
+++ b/experimental/learn_descriptors/frame.hh
@@ -1,5 +1,6 @@
 #pragma once
 #include <optional>
+#include <unordered_map>
 
 #include "experimental/learn_descriptors/frontend_definitions.hh"
 #include "gtsam/base/Matrix.h"
@@ -30,6 +31,8 @@ class Frame {
     gtsam::Cal3_S2::shared_ptr K_;
     KeypointsCV kpts_;
     cv::Mat descriptors_;
+    std::unordered_map<FrameId, gtsam::Rot3>
+        frame_from_other_frames_;  // map of relative rotation for this_frame_from_frame_[FrameId]
     std::optional<gtsam::Pose3> world_from_cam_groundtruth_;
     std::optional<gtsam::Point3> cam_in_world_initial_guess_;
     std::optional<gtsam::Rot3> world_from_cam_initial_guess_;
diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc
index a42a819bd..1c527f5f2 100644
--- a/experimental/learn_descriptors/frontend.cc
+++ b/experimental/learn_descriptors/frontend.cc
@@ -248,10 +248,8 @@ void Frontend::match_frames_and_build_tracks() {
                 // also, at the moment I am not accounting for any covariance between the gps
                 // measurement and the unit translation vector from estimate_cam0_from_cam1
 
-                // can try averaging poses here as well
-                frames_[j].world_from_cam_initial_guess_.emplace(
-                    frames_[i].world_from_cam_initial_guess_->matrix() *
-                    scale_cam0_from_cam1->linear().matrix());
+                frames_[i].frame_from_other_frames_.emplace(
+                    j, gtsam::Rot3(scale_cam0_from_cam1->linear().matrix()));
                 for (const cv::DMatch match : matches) {
                     const KeypointCV kpt_cam0 = frames_[i].keypoint()[match.queryIdx];
                     const KeypointCV kpt_cam1 = frames_[j].keypoint()[match.trainIdx];
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index 9ed6f6e5f..c0c39d2a8 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -51,7 +51,7 @@ StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extrac
 }
 
 void StructureFromMotion::set_initial_pose(gtsam::Pose3 initial_pose) {
-    backend_.add_prior_factor(gtsam::Symbol(Backend::pose_symbol_char, 0), initial_pose,
+    backend_.add_prior_factor(gtsam::Symbol(Backend::symbol_char_pose, 0), initial_pose,
                               gtsam::noiseModel::Isotropic::Sigma(6, 0));
 }
 
@@ -71,8 +71,8 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
             compute_matches(img_keypoints_and_descriptors_.back().second,
                             keypoints_and_descriptors.second, Frontend::enforce_bijective_matches);
 
-        const gtsam::Symbol sym_T_w_c0(Backend::pose_symbol_char, idx_img_current - 1);
-        const gtsam::Symbol sym_T_w_c1(Backend::pose_symbol_char, idx_img_current);
+        const gtsam::Symbol sym_T_w_c0(Backend::symbol_char_pose, idx_img_current - 1);
+        const gtsam::Symbol sym_T_w_c1(Backend::symbol_char_pose, idx_img_current);
 
         backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise(),
                                 T_world_cam.rotation());
@@ -91,7 +91,7 @@ void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_wo
                 chr = (*maybe_symbol0).chr();
             } else {
                 gtsam::Symbol symbol_temp =
-                    gtsam::Symbol(Backend::landmark_symbol_char, landmark_count_);
+                    gtsam::Symbol(Backend::symbol_char_landmark, landmark_count_);
                 idx = symbol_temp.index();
                 chr = symbol_temp.chr();
                 landmark_count_++;
@@ -142,7 +142,7 @@ void StructureFromMotion::graph_values(const gtsam::Values &values,
     std::vector<Eigen::Vector3d> final_lmks;
     for (size_t i = 0; i < feature_manager_->get_num_images_added(); i++) {
         final_poses.emplace_back(
-            values.at<gtsam::Pose3>(gtsam::Symbol(get_backend().pose_symbol_char, i)).matrix());
+            values.at<gtsam::Pose3>(gtsam::Symbol(get_backend().symbol_char_pose, i)).matrix());
     }
     // for (const gtsam::Symbol &lmk_symbol : feature_manager_->get_added_symbols()) {
     //     if (!values.exists(lmk_symbol)) {

From 7ef8d0046793413a8c6a24e163aa9dbcf67fb5a1 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Thu, 17 Jul 2025 17:01:26 -0400
Subject: [PATCH 34/45] WIP

---
 experimental/learn_descriptors/BUILD          |  10 +-
 experimental/learn_descriptors/backend.cc     | 112 +++++++++---------
 .../learn_descriptors/backend_test.cc         |   6 +-
 .../four_seasons_parser_example_viz.cc        |   9 +-
 experimental/learn_descriptors/frontend.cc    |   4 +-
 experimental/learn_descriptors/frontend.hh    |   2 +-
 .../image_point_four_seasons.hh               |  14 +++
 .../learn_descriptors/incremental_sfm_mvp.cc  |  47 ++++----
 .../learn_descriptors/refactor_sfm_mvp.cc     |  33 +++---
 .../learn_descriptors/spatial_test_scene.cc   | 105 ++++++++++++----
 .../learn_descriptors/spatial_test_scene.hh   |  72 +++++++++--
 .../spatial_test_scene_cube.hh                |  16 +--
 .../spatial_test_scene_cube_example.cc        |  48 +++++---
 visualization/opencv/opencv_viz.cc            |  13 +-
 visualization/opencv/opencv_viz.hh            |   4 +-
 visualization/opencv/opencv_viz_test.cc       |   4 +-
 16 files changed, 327 insertions(+), 172 deletions(-)

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index 06e1d6b5e..f6bd3d71d 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -122,6 +122,7 @@ cc_library(
         ":four_seasons_transforms",
         "//common/liegroups:se3",
         "//common/gps:frame_translation",
+        "//common:check",
         "@eigen",
         "@opencv",
         "@geographiclib",
@@ -310,6 +311,7 @@ cc_library(
         "@eigen",
         "@gtsam",
         "@opencv",
+        "//common:check"
     ],
 )
 
@@ -329,8 +331,9 @@ cc_binary(
     deps = [
         ":spatial_test_scene_cube",
         "//visualization/opencv:opencv_viz",
-        "@com_google_googletest//:gtest_main",
+        "//common:check",
         "@eigen",
+        "@gtsam"
     ],
 )
 
@@ -450,13 +453,14 @@ cc_binary(
         "-Wno-error=unused-function",
     ],
     deps = [
+        ":backend",
         ":four_seasons_parser",
         ":frame",
         ":frontend",
-        ":spatial_test_scene_cube",
-        ":structure_from_motion_types",
         ":image_point",
         ":image_point_four_seasons",
+        ":spatial_test_scene_cube",
+        ":structure_from_motion_types",
         "//common/geometry:camera",
         "//visualization/opencv:opencv_viz",
         "@boost//:smart_ptr",
diff --git a/experimental/learn_descriptors/backend.cc b/experimental/learn_descriptors/backend.cc
index 1b3559d0c..03cfd3622 100644
--- a/experimental/learn_descriptors/backend.cc
+++ b/experimental/learn_descriptors/backend.cc
@@ -1,12 +1,15 @@
 #include "experimental/learn_descriptors/backend.hh"
 
+#include <memory>
+#include <optional>
+#include <queue>
 #include <unordered_set>
 
 #include "boost/make_shared.hpp"
-#include "experimental/learn_descriptors/feature_manager.hh"
 #include "experimental/learn_descriptors/frame.hh"
 #include "experimental/learn_descriptors/structure_from_motion_types.hh"
 #include "gtsam/geometry/triangulation.h"
+#include "gtsam/linear/NoiseModel.h"
 #include "gtsam/navigation/GPSFactor.h"
 #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
 #include "gtsam/slam/BetweenFactor.h"
@@ -63,84 +66,81 @@ gtsam::Rot3 average_rotations(const std::vector<gtsam::Rot3>& rotations, int max
 }
 
 void Backend::populate_rotation_estimate(std::vector<Frame>& frames) {
+    struct FrameVertex;
+    using SharedFrameVertex = std::shared_ptr<FrameVertex>;
     struct FrameVertex {
+        // std::unique_ptr<Frame> frame;
         Frame& frame;
+        std::optional<gtsam::Rot3> w_from_this;
         std::unordered_map<FrameId, gtsam::Rot3>
             neighbor_id_to_rot;  // rot is frame_from_neighbor_frame
+        std::unordered_set<SharedFrameVertex> parents;
+        std::unordered_set<SharedFrameVertex> children;
     };
-    std::unordered_map<FrameId, FrameVertex> frame_tree;
-    // populate all frames and add children to neighbors
+    std::unordered_map<FrameId, SharedFrameVertex> frame_tree;
     for (Frame& frame : frames) {
-        FrameVertex frame_vertex{frame};
-        for (const auto [other_frame_id, frame_from_other_frame] : frame.frame_from_other_frames_) {
-            frame_vertex.neighbor_id_to_rot.emplace(other_frame_id, frame_from_other_frame);
-        }
-        frame_tree.emplace(frame.id_, frame_vertex);
+        frame_tree.emplace(frame.id_, std::make_shared<FrameVertex>(frame));
     }
-    // add parents to neighbors
-    for (const Frame& frame : frames) {
-        for (const) }
+    // populate all frames and add children to neighbors
+    std::queue<size_t> q;  // indices of Frame in frames
+    std::unordered_set<FrameId> seen_frame_id;
+    q.push(frames.front().id_);
+
     gtsam::NonlinearFactorGraph graph;
     gtsam::Values initial;
     // TODO: somehow get more informative/more robust noise values
     // maybe: monte carlo sampling + reprojection, use essential matrix to guide covariance strength
-    gtsam::noiseModel::Diagonal::shared_ptr noise =
+    gtsam::noiseModel::Diagonal::shared_ptr noise_rotation =
         gtsam::noiseModel::Isotropic::Sigma(3, 0.1);  // radians
 
-    std::unordered_set<FrameId> frames_seen;
-    for (size_t i = 0; i < frames.size(); i++) {
-        const Frame& frame = frames[i];
-        if (i == 0) {
-            initial.insert(gtsam::Symbol(symbol_char_rotation, i),
-                           frame.world_from_cam_initial_guess_
-                               ? *frame.world_from_cam_initial_guess_
-                               : gtsam::Rot3::Identity());
+    std::vector<gtsam::Symbol> symbols_frames;
+    while (!q.empty()) {
+        Frame frame = frames[q.front()];
+        q.pop();
+        seen_frame_id.insert(frame.id_);
+        SharedFrameVertex frame_vertex = frame_tree.at(frame.id_);
+        if (seen_frame_id.size() == 1) {  // make the first frame rotation identity
+            frame_vertex->w_from_this = frame_vertex->frame.world_from_cam_initial_guess_
+                                            ? *frame_vertex->frame.world_from_cam_initial_guess_
+                                            : gtsam::Rot3::Identity();
         }
-        std::vector<gtsam::Rot3> candidates_world_from_frame;
+        frame_vertex->frame.world_from_cam_initial_guess_ = frame_vertex->w_from_this;
+        const gtsam::Symbol symbol_frame(symbol_char_rotation, frame.id_);
+        symbols_frames.push_back(symbol_frame);
+        initial.insert(symbol_frame, *frame.world_from_cam_initial_guess_);
         for (const auto& [other_frame_id, frame_from_other_frame] :
              frame.frame_from_other_frames_) {
-            if (frames_seen.find(other_frame_id) == frames_seen.end()) {
-                candidates_world_from_frame.push_back(frame_from_other_frame);
+            if (seen_frame_id.find(other_frame_id) == seen_frame_id.end()) {
+                q.push(other_frame_id);
+                seen_frame_id.insert(other_frame_id);
+            }
+            frame_vertex->neighbor_id_to_rot.emplace(other_frame_id, frame_from_other_frame);
+            SharedFrameVertex other_frame_vertex = frame_tree.at(other_frame_id);
+            frame_vertex->children.insert(other_frame_vertex);
+            other_frame_vertex->parents.insert(frame_vertex);
+            other_frame_vertex->neighbor_id_to_rot.emplace(frame.id_,
+                                                           frame_from_other_frame.inverse());
+            if (!other_frame_vertex->w_from_this) {
+                other_frame_vertex->w_from_this =
+                    *frame_vertex->w_from_this * frame_from_other_frame;
             }
-        }
-        initial.insert(gtsam::Symbol(symbol_char_rotation, frame.id_),
-                       average_rotations(candidates_world_from_frame));
-        frames_seen.insert(frame.id_);
-    }
-
-    // Assume we have relative rotations Rij between node i and j
-    std::vector<std::pair<size_t, size_t>> edges = {{0, 1}, {1, 2}, {2, 3}, {3, 4}};
-    std::vector<gtsam::Rot3> relativeRotations = {
-        Rot3::RzRyRx(0.25, 0.0, 0.0), Rot3::RzRyRx(0.25, 0.0, 0.0), Rot3::RzRyRx(0.3, 0.0, 0.0),
-        Rot3::RzRyRx(0.2, 0.0, 0.0)};
-
-    // Add BetweenFactors for relative rotations
-    for (size_t k = 0; k < edges.size(); ++k) {
-        size_t i = edges[k].first;
-        size_t j = edges[k].second;
-        graph.emplace_shared<gtsam::BetweenFactor<gtsam::Rot3>>(
-            gtsam::Symbol(Backend::symbol_char_rotation, i),
-            gtsam::Symbol(Backend::symbol_char_rotation, j), relativeRotations[k], noise);
-    }
-
-    // Fix the first rotation
-    initial.insert(gtsam::Symbol(Backend::symbol_char_rotation, 0), gtsam::Rot3::Identity());
 
-    // Initialize other rotations naively
-    for (size_t i = 1; i <= 4; ++i) {
-        initial.insert(gtsam::Symbol(Backend::symbol_char_rotation, i),
-                       gtsam::Rot3::RzRyRx(0.0, 0.0, 0.0));
+            graph.add(gtsam::BetweenFactor<gtsam::Rot3>(
+                symbol_frame, gtsam::Symbol(symbol_char_rotation, other_frame_id),
+                frame_from_other_frame.inverse(), noise_rotation));
+        }
+        frame_tree.emplace(frame.id_, frame_vertex);
     }
 
-    // Optimize
     gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial);
     gtsam::Values result = optimizer.optimize();
 
-    // Output results
-    for (size_t i = 0; i <= 4; ++i) {
-        gtsam::Rot3 Ri = result.at<gtsam::Rot3>(gtsam::Symbol(Backend::symbol_char_rotation, i));
-        std::cout << "Rotation " << i << ": " << Ri.rpy().transpose() << " (roll pitch yaw radians)"
-                  << std::endl;
+    // Output results and assign results to frames
+    for (const gtsam::Symbol& symbol_rotation : symbols_frames) {
+        gtsam::Rot3 w_from_frame = result.at<gtsam::Rot3>(symbol_rotation);
+        frame_tree.at(symbol_rotation.index())->frame.world_from_cam_initial_guess_ = w_from_frame;
+        // std::cout << "Rotation " << symbol_rotation.string() << ": "
+        //           << w_from_frame.rpy().transpose() << " (roll pitch yaw radians)" << std::endl;
     }
 }
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/backend_test.cc b/experimental/learn_descriptors/backend_test.cc
index d1cbec44e..48fe2c450 100644
--- a/experimental/learn_descriptors/backend_test.cc
+++ b/experimental/learn_descriptors/backend_test.cc
@@ -24,7 +24,7 @@ TEST(BackendTest, cube) {
     // Backend backend(K);
     // test_scene.add_rand_cameras_face_origin(7, 2.0, 6.0, *K);
 
-    // std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras = test_scene.get_cameras();
+    // std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras = test_scene.cameras()();
     // std::vector<Eigen::Isometry3d> camera_poses;
     // camera_poses.reserve(cameras.size());
     // for (const gtsam::PinholeCamera<gtsam::Cal3_S2> &T_world_cam : cameras) {
@@ -49,6 +49,8 @@ TEST(BackendTest, cube) {
     // }
     // }
 
-    // geometry::viz_scene(camera_poses, test_scene.get_points());
+    // geometry::viz_scene(camera_poses, test_scene.points());
 }
+
+TEST(BackendTest, test_rotation_estimation) {}
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
index d9a6b89f1..3b85aa540 100644
--- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
@@ -82,8 +82,9 @@ int main(int argc, const char** argv) {
 
     ROBOT_CHECK(parser.num_images() != 0);
 
-    std::vector<robot::geometry::VizPose> viz_frames;   // camera frames from visual world frame
-    std::vector<robot::geometry::VizPoint> viz_points;  // camera frames from visual world frame
+    std::vector<robot::visualization::VizPose> viz_frames;  // camera frames from visual world frame
+    std::vector<robot::visualization::VizPoint>
+        viz_points;  // camera frames from visual world frame
 
     Eigen::Isometry3d scale_mat = Eigen::Isometry3d::Identity();
     std::cout << "gnss scale: " << parser.gnss_scale() << std::endl;
@@ -192,6 +193,6 @@ int main(int argc, const char** argv) {
     std::cout << "max delta: " << max_delta << std::endl;
     std::cout << "\ngot " << viz_frames.size() << " poses" << std::endl;
     std::cout << "got " << viz_points.size() << " points" << std::endl;
-    robot::geometry::viz_scene(viz_frames, viz_points, cv::viz::Color::brown(), true, true,
-                               "Viz Trajectory + GPS Points");
+    robot::visualization::viz_scene(viz_frames, viz_points, cv::viz::Color::brown(), true, true,
+                                    "Viz Trajectory + GPS Points");
 }
diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc
index 1c527f5f2..4b1376dd5 100644
--- a/experimental/learn_descriptors/frontend.cc
+++ b/experimental/learn_descriptors/frontend.cc
@@ -241,8 +241,8 @@ void Frontend::match_frames_and_build_tracks() {
                 if (!scale_cam0_from_cam1) {
                     continue;
                 }
-                ROBOT_CHECK(frames_[i].world_from_cam_initial_guess_,
-                            "This rotation should be populated.");
+                // ROBOT_CHECK(frames_[i].world_from_cam_initial_guess_,
+                //             "This rotation should be populated.");
                 // this could use some work to verify the quality of the output, particularly inside
                 // of estiamte_cam0_from_cam1
                 // also, at the moment I am not accounting for any covariance between the gps
diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh
index a9e1a01c2..352330e3e 100644
--- a/experimental/learn_descriptors/frontend.hh
+++ b/experimental/learn_descriptors/frontend.hh
@@ -44,7 +44,7 @@ class Frontend {
     const FrontendParams::MatcherType &matcher_type() const { return params_.matcher_type; };
     const FeatureTracks &feature_tracks() const { return feature_tracks_; };
     const FrameLandmarkIdMap &frame_landmark_id_map() const { return lmk_id_map_; };
-    const std::vector<Frame> &frames() const { return frames_; };
+    std::vector<Frame> &frames() { return frames_; };
 
     std::pair<std::vector<cv::KeyPoint>, cv::Mat> extract_features(const cv::Mat &img) const;
     std::vector<cv::DMatch> compute_matches(const cv::Mat &descriptors1,
diff --git a/experimental/learn_descriptors/image_point_four_seasons.hh b/experimental/learn_descriptors/image_point_four_seasons.hh
index a0c337c56..645a2571a 100644
--- a/experimental/learn_descriptors/image_point_four_seasons.hh
+++ b/experimental/learn_descriptors/image_point_four_seasons.hh
@@ -9,6 +9,7 @@
 
 #include "Eigen/Core"
 #include "Eigen/Geometry"
+#include "common/check.hh"
 #include "common/liegroups/se3.hh"
 #include "experimental/learn_descriptors/four_seasons_transforms.hh"
 #include "experimental/learn_descriptors/gps_data.hh"
@@ -29,6 +30,19 @@ struct ImagePointFourSeasons : ImagePoint {
     std::optional<GPSData> gps_gcs;  // raw gps measurement in gcs (global cordinate system)
     std::shared_ptr<FourSeasonsTransforms::StaticTransforms> shared_static_transforms;
 
+    std::optional<Eigen::Vector3d> velocity_to(const ImagePointFourSeasons& other_img_pt) const {
+        ROBOT_CHECK(other_img_pt.seq >= seq);
+        if (!other_img_pt.cam_in_world() || !cam_in_world()) return std::nullopt;
+        double dt_seconds = static_cast<double>(other_img_pt.seq - seq) * 1e-9;
+        return (*other_img_pt.cam_in_world() - *cam_in_world()) / dt_seconds;
+    };
+    std::optional<Eigen::Vector3d> velocity_from(const ImagePointFourSeasons& other_img_pt) const {
+        ROBOT_CHECK(seq >= other_img_pt.seq);
+        if (!other_img_pt.cam_in_world() || !cam_in_world()) return std::nullopt;
+        double dt_seconds = static_cast<double>(seq - other_img_pt.seq) * 1e-9;
+        return (*cam_in_world() - *other_img_pt.cam_in_world()) / dt_seconds;
+    };
+
     std::optional<Eigen::Isometry3d> world_from_cam_ground_truth() const override;
     std::optional<Eigen::Vector3d> cam_in_world() const override;
     std::optional<Eigen::Matrix3d> translation_covariance_in_cam() const override;
diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc
index bafd667b5..b3b8a17e6 100644
--- a/experimental/learn_descriptors/incremental_sfm_mvp.cc
+++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc
@@ -140,8 +140,8 @@ std::optional<gtsam::Point3> attempt_triangulate(const std::vector<gtsam::Pose3>
 void graph_values(const gtsam::Values &values, const std::string &window_name,
                   const std::vector<gtsam::Symbol> &symbols_pose,
                   const std::vector<gtsam::Symbol> &symbols_landmarks) {
-    std::vector<robot::geometry::VizPose> final_poses;
-    std::vector<robot::geometry::VizPoint> final_lmks;
+    std::vector<robot::visualization::VizPose> final_poses;
+    std::vector<robot::visualization::VizPoint> final_lmks;
     for (const gtsam::Symbol &symbol_pose : symbols_pose) {
         final_poses.emplace_back(Eigen::Isometry3d(values.at<gtsam::Pose3>(symbol_pose).matrix()),
                                  symbol_pose.string());
@@ -153,8 +153,8 @@ void graph_values(const gtsam::Values &values, const std::string &window_name,
         final_lmks.emplace_back(values.at<gtsam::Point3>(symbol_lmk), symbol_lmk.string());
     }
     std::cout << "About to viz gtsam::Values with " << values.size() << " variables." << std::endl;
-    robot::geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true,
-                               window_name);
+    robot::visualization::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true,
+                                    window_name);
 }
 
 gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gtsam::Values &values,
@@ -338,7 +338,7 @@ int main(int argc, const char **argv) {
         id_to_initial_world_from_cam;      // these are for initial guesses!!
     Eigen::Vector3d t_first_cam_in_world;  // to create our own world
     std::vector<Eigen::Isometry3d> references_world_from_cam;
-    std::vector<robot::geometry::VizPose> viz_references_world_from_cam;
+    std::vector<robot::visualization::VizPose> viz_references_world_from_cam;
     Eigen::Matrix4d scale_mat_reference = Eigen::Matrix4d::Identity();
     scale_mat_reference(0, 0) = scale_mat_reference(1, 1) = scale_mat_reference(2, 2) =
         parser.gnss_scale();
@@ -420,9 +420,9 @@ int main(int argc, const char **argv) {
         id++;
     }
     if (visualize)
-        robot::geometry::viz_scene(viz_references_world_from_cam,
-                                   std::vector<robot::geometry::VizPoint>(),
-                                   cv::viz::Color::brown(), true, true, "References");
+        robot::visualization::viz_scene(viz_references_world_from_cam,
+                                        std::vector<robot::visualization::VizPoint>(),
+                                        cv::viz::Color::brown(), true, true, "References");
 
     // populate feature_tracks and lmk_id_map
     // TODO: "smart" matching, using initial poses to filter which pairs make sense to compute
@@ -514,7 +514,7 @@ int main(int argc, const char **argv) {
             }
         }
     }
-    std::vector<robot::geometry::VizPose> viz_world_from_cam_init;
+    std::vector<robot::visualization::VizPose> viz_world_from_cam_init;
     for (const auto &[id, world_from_cam] : id_to_initial_world_from_cam) {
         viz_world_from_cam_init.emplace_back(Eigen::Isometry3d(world_from_cam.matrix()),
                                              std::to_string(id));
@@ -522,14 +522,14 @@ int main(int argc, const char **argv) {
 
     // std::cout << "pre heartbeat" << std::endl;
     if (visualize)
-        robot::geometry::viz_scene(viz_world_from_cam_init,
-                                   std::vector<robot::geometry::VizPoint>(),
-                                   cv::viz::Color::brown(), true, true, "world_from_cam_estimates");
+        robot::visualization::viz_scene(
+            viz_world_from_cam_init, std::vector<robot::visualization::VizPoint>(),
+            cv::viz::Color::brown(), true, true, "world_from_cam_estimates");
     // std::cout << "heartbeat" << std::endl;
 
     // TRIANGULATE all of the points (for initial guess)
     std::unordered_map<LandmarkId, gtsam::Point3> lmk_triangulated_map;  // points are kpt_in_world
-    std::vector<robot::geometry::VizPoint> triangulated_lmks_viz;
+    std::vector<robot::visualization::VizPoint> triangulated_lmks_viz;
     std::vector<gtsam::Point3> triangulated_lmks;
     for (const std::pair<LandmarkId, FeatureTrack> lmk_feat : feature_tracks) {
         std::vector<gtsam::Pose3> world_from_cams;
@@ -556,9 +556,9 @@ int main(int argc, const char **argv) {
     }
     std::cout << "there are " << triangulated_lmks.size() << " triangulated lmks" << std::endl;
     if (visualize)
-        robot::geometry::viz_scene(viz_references_world_from_cam, triangulated_lmks_viz,
-                                   cv::viz::Color::brown(), true, true,
-                                   "Unfiltered, triangulated points");
+        robot::visualization::viz_scene(viz_references_world_from_cam, triangulated_lmks_viz,
+                                        cv::viz::Color::brown(), true, true,
+                                        "Unfiltered, triangulated points");
 
     // filter points via variance
     // TODO: filter based on quality and/or quantity of matches
@@ -580,9 +580,9 @@ int main(int argc, const char **argv) {
     std::cout << "filtered variance " << detail_sfm::get_variance(filtered_points) << std::endl;
     std::cout << "number of filtered points: " << lmk_triangulated_map_filtered.size() << std::endl;
     if (visualize)
-        robot::geometry::viz_scene(std::vector<Eigen::Isometry3d>(), filtered_points,
-                                   cv::viz::Color::brown(), true, true,
-                                   "Filtered, triangulated points");
+        robot::visualization::viz_scene(std::vector<Eigen::Isometry3d>(), filtered_points,
+                                        cv::viz::Color::brown(), true, true,
+                                        "Filtered, triangulated points");
 
     // ############# BACKEND ###############
 
@@ -669,10 +669,10 @@ int main(int argc, const char **argv) {
             std::vector<gtsam::Pose3> world_from_cams{id_to_initial_world_from_cam.at(i),
                                                       id_to_initial_world_from_cam.at(i + 1)};
 
-            std::vector<robot::geometry::VizPose> viz_world_from_cams{
+            std::vector<robot::visualization::VizPose> viz_world_from_cams{
                 {Eigen::Isometry3d(world_from_cams[0].matrix()), "cam 0"},
                 {Eigen::Isometry3d(world_from_cams[1].matrix()), "cam 1"}};
-            std::vector<robot::geometry::VizPoint> viz_lmks;
+            std::vector<robot::visualization::VizPoint> viz_lmks;
             for (const cv::DMatch match : matches) {
                 std::vector<gtsam::Point2> feat_kpts;
                 const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx];
@@ -735,8 +735,9 @@ int main(int argc, const char **argv) {
             }
             std::cout << "setup complete!" << std::endl;
             if (visualize)
-                robot::geometry::viz_scene(viz_world_from_cams, viz_lmks, cv::viz::Color::brown(),
-                                           true, true, "Local Optimization " + std::to_string(i));
+                robot::visualization::viz_scene(viz_world_from_cams, viz_lmks,
+                                                cv::viz::Color::brown(), true, true,
+                                                "Local Optimization " + std::to_string(i));
 
             const gtsam::Values symbols_result_local = detail_sfm::optimize_graph(
                 local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false);
diff --git a/experimental/learn_descriptors/refactor_sfm_mvp.cc b/experimental/learn_descriptors/refactor_sfm_mvp.cc
index c2270e69a..4071faac6 100644
--- a/experimental/learn_descriptors/refactor_sfm_mvp.cc
+++ b/experimental/learn_descriptors/refactor_sfm_mvp.cc
@@ -12,6 +12,7 @@
 #include "Eigen/Geometry"
 #include "common/geometry/camera.hh"
 #include "cxxopts.hpp"
+#include "experimental/learn_descriptors/backend.hh"
 #include "experimental/learn_descriptors/camera_calibration.hh"
 #include "experimental/learn_descriptors/four_seasons_parser.hh"
 #include "experimental/learn_descriptors/frame.hh"
@@ -102,8 +103,8 @@ std::optional<gtsam::Point3> attempt_triangulate(const std::vector<gtsam::Pose3>
 void graph_values(const gtsam::Values &values, const std::string &window_name,
                   const std::vector<gtsam::Symbol> &symbols_pose,
                   const std::vector<gtsam::Symbol> &symbols_landmarks) {
-    std::vector<robot::geometry::VizPose> final_poses;
-    std::vector<robot::geometry::VizPoint> final_lmks;
+    std::vector<robot::visualization::VizPose> final_poses;
+    std::vector<robot::visualization::VizPoint> final_lmks;
     for (const gtsam::Symbol &symbol_pose : symbols_pose) {
         final_poses.emplace_back(Eigen::Isometry3d(values.at<gtsam::Pose3>(symbol_pose).matrix()),
                                  symbol_pose.string());
@@ -115,8 +116,8 @@ void graph_values(const gtsam::Values &values, const std::string &window_name,
         final_lmks.emplace_back(values.at<gtsam::Point3>(symbol_lmk), symbol_lmk.string());
     }
     std::cout << "About to viz gtsam::Values with " << values.size() << " variables." << std::endl;
-    robot::geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true,
-                               window_name);
+    robot::visualization::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true,
+                                    window_name);
 }
 
 gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gtsam::Values &values,
@@ -204,7 +205,7 @@ int main(int argc, const char **argv) {
                           true, false};
     Frontend frontend(params);
 
-    for (size_t i = 660; i < 750; i += 5) {
+    for (size_t i = 581; i < 750; i += 5) {
         // if (!parser.get_image_point(i).K) std::cout << "UH OH" << std::endl;
         frontend.add_image(
             ImageAndPoint{parser.load_image(i),
@@ -218,7 +219,7 @@ int main(int argc, const char **argv) {
     std::cout << "ground truth translation: "
               << frontend.frames()[0].world_from_cam_groundtruth_->translation() << std::endl;
 
-    std::vector<robot::geometry::VizPose> viz_poses_init;
+    std::vector<robot::visualization::VizPose> viz_poses_init;
     std::optional<Eigen::Vector3d> first_point;
     std::optional<Eigen::Isometry3d> first_grnd_trth;
     (void)first_point;
@@ -242,8 +243,9 @@ int main(int argc, const char **argv) {
         }
     }
     std::cout << "visualizing " << viz_poses_init.size() << " poses" << std::endl;
-    robot::geometry::viz_scene(viz_poses_init, std::vector<robot::geometry::VizPoint>(),
-                               cv::viz::Color::brown(), true, true, "Frontend initial guesses");
+    robot::visualization::viz_scene(viz_poses_init, std::vector<robot::visualization::VizPoint>(),
+                                    cv::viz::Color::brown(), true, true,
+                                    "Frontend initial guesses");
 
     // ############# BACKEND ###############
     gtsam::Values initial_estimate_;
@@ -262,14 +264,16 @@ int main(int argc, const char **argv) {
         gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas_fallback);
 
     // add gps factors
-    const std::vector<Frame> &frames = frontend.frames();
+    std::vector<Frame> &frames = frontend.frames();
+    frames[0].world_from_cam_initial_guess_ =
+        frames[0].world_from_cam_groundtruth_->rotation();  // make sure first idx has grnd trth
+    Backend::populate_rotation_estimate(frames);
     std::vector<gtsam::Symbol> symbols_pose;
     std::unordered_map<size_t, gtsam::Pose3> world_from_cam_initial_guess;
     std::optional<gtsam::Point3> cam0_in_w;
     for (const Frame &frame : frames) {
         if (!frame.cam_in_world_initial_guess_) continue;
         if (!cam0_in_w) cam0_in_w = *frame.cam_in_world_initial_guess_;
-        // should do some interpolation here most likely
         const gtsam::Pose3 world_from_cam_estimate(frame.world_from_cam_initial_guess_
                                                        ? *frame.world_from_cam_initial_guess_
                                                        : gtsam::Rot3::Identity(),
@@ -286,12 +290,13 @@ int main(int argc, const char **argv) {
         initial_estimate_.insert(cam_symbol, world_from_cam_estimate);
         symbols_pose.push_back(cam_symbol);
     }
-    std::vector<robot::geometry::VizPose> viz_pose;
+    std::vector<robot::visualization::VizPose> viz_pose;
     for (const auto &[id, pose] : world_from_cam_initial_guess) {
         viz_pose.emplace_back(Eigen::Isometry3d(pose.matrix()), "x_" + std::to_string(id));
     }
-    robot::geometry::viz_scene(viz_pose, std::vector<robot::geometry::VizPoint>(),
-                               cv::viz::Color::brown(), true, true, "Initial guesses in backend");
+    robot::visualization::viz_scene(viz_pose, std::vector<robot::visualization::VizPoint>(),
+                                    cv::viz::Color::brown(), true, true,
+                                    "Initial guesses in backend");
 
     std::cout << "heartbeat pre lmks in graph" << std::endl;
 
@@ -324,7 +329,7 @@ int main(int argc, const char **argv) {
 
     // do global optimization
     const gtsam::Values result =
-        optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_lmks, 0);
+        optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_lmks, 10);
 
     // // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference
     // double sum_traj_error = 0;
diff --git a/experimental/learn_descriptors/spatial_test_scene.cc b/experimental/learn_descriptors/spatial_test_scene.cc
index 9d17d1d20..93168e118 100644
--- a/experimental/learn_descriptors/spatial_test_scene.cc
+++ b/experimental/learn_descriptors/spatial_test_scene.cc
@@ -2,6 +2,24 @@
 
 #include <random>
 
+#include "Eigen/Core"
+#include "Eigen/Geometry"
+#include "common/check.hh"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/PinholeCamera.h"
+#include "opencv2/opencv.hpp"
+
+Eigen::Isometry3d isometry_from_vector(const Eigen::VectorXd &pose_vector) {
+    ROBOT_CHECK(pose_vector.size() == 6);
+    Eigen::Isometry3d T = Eigen::Isometry3d::Identity();
+    if (pose_vector.tail<3>().norm() > 1e-6) {
+        T.linear() = Eigen::AngleAxisd(pose_vector.tail<3>().norm(), pose_vector.tail<3>())
+                         .toRotationMatrix();
+    }
+    T.translation() = pose_vector.head<3>();
+    return T;
+}
+
 namespace robot::experimental::learn_descriptors {
 template <typename T>
 std::vector<SpatialTestScene::ProjectedPoint> SpatialTestScene::get_projected_pixels(
@@ -38,22 +56,58 @@ SpatialTestScene::get_corresponding_pixels(
     return corresponding_pixels;
 };
 
-void SpatialTestScene::add_point(Eigen::Vector3d point) { points_.push_back(point); };
+void SpatialTestScene::add_point(Eigen::Vector3d point, std::optional<Noise> point_noise) {
+    points_groundtruth_.push_back(point);
+    if (point_noise) {
+        ROBOT_CHECK(point_noise->dim() == 3);
+        points_.push_back(point + point_noise->sample());
+    } else {
+        points_.push_back(point);
+    }
+};
 
-void SpatialTestScene::add_points(std::vector<Eigen::Vector3d> points) {
-    points_.insert(points_.end(), points.begin(), points.end());
+void SpatialTestScene::add_points(std::vector<Eigen::Vector3d> points,
+                                  std::optional<Noise> point_noise) {
+    for (const Eigen::Vector3d &point : points) {
+        points_groundtruth_.push_back(point);
+        if (point_noise) {
+            ROBOT_CHECK(point_noise->dim() == 3);
+            points_.push_back(point + point_noise->sample());
+        } else {
+            points_.push_back(point);
+        }
+    }
 }
 
-void SpatialTestScene::add_camera(gtsam::PinholeCamera<gtsam::Cal3_S2> camera) {
-    cameras_.push_back(camera);
+void SpatialTestScene::add_camera(gtsam::PinholeCamera<gtsam::Cal3_S2> camera,
+                                  std::optional<Noise> pose_noise) {
+    cameras_groundtruth_.push_back(camera);
+    if (pose_noise) {
+        cameras_.push_back(gtsam::PinholeCamera<gtsam::Cal3_S2>(
+            camera.pose() * gtsam::Pose3(isometry_from_vector(pose_noise->sample()).matrix()),
+            camera.calibration()));
+    } else {
+        cameras_.push_back(camera);
+    }
 }
 
-void SpatialTestScene::add_cameras(std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras) {
-    cameras_.insert(cameras_.end(), cameras.begin(), cameras.end());
+void SpatialTestScene::add_cameras(std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras,
+                                   std::optional<Noise> pose_noise) {
+    for (const gtsam::PinholeCamera<gtsam::Cal3_S2> &cam : cameras) {
+        cameras_groundtruth_.push_back(cam);
+        if (pose_noise) {
+            cameras_.push_back(gtsam::PinholeCamera<gtsam::Cal3_S2>(
+                cam.pose() * gtsam::Pose3(isometry_from_vector(pose_noise->sample()).matrix()),
+                cam.calibration()));
+        } else {
+            cameras_.push_back(cam);
+        }
+    }
 }
 
 void SpatialTestScene::add_rand_cameras_face_origin(int num_cameras, double min_radius_origin,
                                                     double max_radius_origin,
+                                                    std::optional<Noise> pose_noise,
                                                     const gtsam::Cal3_S2 &K) {
     std::random_device rd;
     std::mt19937 gen(rd());
@@ -61,22 +115,33 @@ void SpatialTestScene::add_rand_cameras_face_origin(int num_cameras, double min_
     std::uniform_real_distribution<double> dist_radius(min_radius_origin, max_radius_origin);
     std::uniform_real_distribution<double> dist_omega(0.0, 2 * M_PI);
 
+    const Eigen::Isometry3d world_from_cam_default =
+        Eigen::Translation3d(1.0, 0, 0) * Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d::UnitY()) *
+        Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d::UnitZ());
     for (int i = 0; i < num_cameras; i++) {
-        double angle_z = dist_radius(gen);
-        Eigen::Isometry3d R_z;
-        R_z.linear() = Eigen::AngleAxisd(angle_z, Eigen::Vector3d::UnitZ()).matrix();
-        double angle_x = dist_radius(gen);
-        Eigen::Isometry3d R_y;
-        R_y.linear() = Eigen::AngleAxisd(angle_x, Eigen::Vector3d::UnitY()).matrix();
+        double angle_z = dist_omega(gen);
+        Eigen::Isometry3d longitude_from_world = Eigen::Isometry3d::Identity();
+        longitude_from_world.linear() =
+            Eigen::AngleAxisd(angle_z, Eigen::Vector3d::UnitZ()).matrix();
+        double angle_x = dist_omega(gen);
+        Eigen::Isometry3d latitude_from_world = Eigen::Isometry3d::Identity();
+        latitude_from_world.linear() =
+            Eigen::AngleAxisd(angle_x, Eigen::Vector3d::UnitY()).matrix();
 
         double radius = dist_radius(gen);
-        Eigen::Isometry3d T_world_cam;
-        T_world_cam.translation() = Eigen::Vector3d(radius, 0, 0);
-        T_world_cam.linear() = (Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d::UnitY()) *
-                                Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d::UnitZ()))
-                                   .matrix();
-        T_world_cam = R_z * R_y * T_world_cam;
-        cameras_.push_back(gtsam::PinholeCamera(gtsam::Pose3(T_world_cam.matrix()), K));
+        Eigen::Isometry3d world_from_cam(world_from_cam_default);
+        world_from_cam.translation() *= radius;
+        world_from_cam = longitude_from_world * latitude_from_world * world_from_cam;
+        cameras_groundtruth_.push_back(
+            gtsam::PinholeCamera(gtsam::Pose3(world_from_cam.matrix()), K));
+        if (pose_noise) {
+            cameras_.push_back(gtsam::PinholeCamera(
+                gtsam::Pose3(
+                    (world_from_cam * isometry_from_vector(pose_noise->sample())).matrix()),
+                K));
+        } else {
+            cameras_.push_back(gtsam::PinholeCamera(gtsam::Pose3(world_from_cam.matrix()), K));
+        }
     }
 }
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/spatial_test_scene.hh b/experimental/learn_descriptors/spatial_test_scene.hh
index 49d033180..4f542223a 100644
--- a/experimental/learn_descriptors/spatial_test_scene.hh
+++ b/experimental/learn_descriptors/spatial_test_scene.hh
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <optional>
+#include <random>
 #include <unordered_map>
 #include <utility>
 #include <vector>
@@ -9,15 +11,58 @@
 #include "gtsam/geometry/PinholeCamera.h"
 #include "opencv2/opencv.hpp"
 
+namespace robot::experimental::learn_descriptors {
 class ProjectionHelper {
    public:
+    static constexpr size_t img_width_default = 640;
+    static constexpr size_t img_height_default = 480;
+    static constexpr double fx_default = 500.0;
+    static constexpr double fy_default = fx_default;
+    static constexpr double cx_default = img_width_default / 2.0;
+    static constexpr double cy_default = img_height_default / 2.0;
+    static const gtsam::Cal3_S2::shared_ptr &K_default() {
+        static gtsam::Cal3_S2::shared_ptr K(
+            new gtsam::Cal3_S2(fx_default, fy_default, 0.0, cx_default, cy_default));
+        return K;
+    }
     static bool pixel_in_range(Eigen::Vector2d pixel, size_t img_width, size_t img_height) {
         return pixel[0] >= 0 && pixel[0] < img_width && pixel[1] >= 0 && pixel[1] < img_height;
     }
 };
-namespace robot::experimental::learn_descriptors {
 class SpatialTestScene {
    public:
+    struct Noise {
+        Noise(const std::vector<double> &sigmas, std::optional<unsigned int> seed = std::nullopt)
+            : sigmas_(sigmas) {
+            if (seed) {
+                gen_.seed(*seed);
+            } else {
+                std::random_device rd;
+                gen_.seed(rd());
+            }
+
+            for (double sigma : sigmas_) {
+                dists_.emplace_back(0.0, sigma);
+            }
+        }
+
+        static Noise constrained(size_t dim = 6) { return Noise(std::vector<double>(dim, 0.0)); }
+
+        Eigen::VectorXd sample() {
+            Eigen::VectorXd result(sigmas_.size());
+            for (size_t i = 0; i < sigmas_.size(); ++i) {
+                result(i) = dists_[i](gen_);
+            }
+            return result;
+        }
+
+        size_t dim() const { return sigmas_.size(); }
+
+       private:
+        std::vector<double> sigmas_;
+        std::vector<std::normal_distribution<double>> dists_;
+        std::mt19937 gen_;
+    };
     struct ProjectedPoint {
         ProjectedPoint(const size_t pt_idx, const Eigen::Vector2d &pixel)
             : pt_idx(pt_idx), pixel(pixel){};
@@ -34,18 +79,29 @@ class SpatialTestScene {
         const std::vector<std::pair<gtsam::PinholeCamera<T>, cv::Size>>
             &cam_and_img_size);  // make this parameter of size 2
 
-    void add_point(Eigen::Vector3d point);
-    void add_points(std::vector<Eigen::Vector3d> points);
-    void add_camera(gtsam::PinholeCamera<gtsam::Cal3_S2> camera);
-    void add_cameras(std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras);
+    void add_point(Eigen::Vector3d point, std::optional<Noise> point_noise = std::nullopt);
+    void add_points(std::vector<Eigen::Vector3d> points,
+                    std::optional<Noise> point_noise = std::nullopt);
+    void add_camera(gtsam::PinholeCamera<gtsam::Cal3_S2> camera,
+                    std::optional<Noise> pose_noise = std::nullopt);
+    void add_cameras(std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras,
+                     std::optional<Noise> pose_noise = std::nullopt);
     void add_rand_cameras_face_origin(int num_cameras, double min_radius_origin,
-                                      double max_radius_origin, const gtsam::Cal3_S2 &K);
+                                      double max_radius_origin,
+                                      std::optional<Noise> pose_noise = std::nullopt,
+                                      const gtsam::Cal3_S2 &K = *ProjectionHelper::K_default());
 
-    std::vector<Eigen::Vector3d> &get_points() { return points_; };
-    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> &get_cameras() { return cameras_; };
+    const std::vector<Eigen::Vector3d> &points() { return points_; };
+    const std::vector<Eigen::Vector3d> &points_groundtruth() { return points_groundtruth_; };
+    const std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> &cameras() { return cameras_; };
+    const std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> &cameras_groundtruth() {
+        return cameras_groundtruth_;
+    };
 
    protected:
     std::vector<Eigen::Vector3d> points_;
+    std::vector<Eigen::Vector3d> points_groundtruth_;
     std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras_;
+    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras_groundtruth_;
 };
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/spatial_test_scene_cube.hh b/experimental/learn_descriptors/spatial_test_scene_cube.hh
index 306afae4b..1dd08b190 100644
--- a/experimental/learn_descriptors/spatial_test_scene_cube.hh
+++ b/experimental/learn_descriptors/spatial_test_scene_cube.hh
@@ -8,14 +8,14 @@ namespace robot::experimental::learn_descriptors {
 class SpatialTestSceneCube : public SpatialTestScene {
    public:
     SpatialTestSceneCube(const float cube_size) : SpatialTestScene() {
-        points_.push_back(Eigen::Vector3d(-cube_size / 2, -cube_size / 2, -cube_size / 2));
-        points_.push_back(Eigen::Vector3d(-cube_size / 2, cube_size / 2, -cube_size / 2));
-        points_.push_back(Eigen::Vector3d(cube_size / 2, -cube_size / 2, -cube_size / 2));
-        points_.push_back(Eigen::Vector3d(cube_size / 2, cube_size / 2, -cube_size / 2));
-        points_.push_back(Eigen::Vector3d(-cube_size / 2, -cube_size / 2, cube_size / 2));
-        points_.push_back(Eigen::Vector3d(-cube_size / 2, cube_size / 2, cube_size / 2));
-        points_.push_back(Eigen::Vector3d(cube_size / 2, -cube_size / 2, cube_size / 2));
-        points_.push_back(Eigen::Vector3d(cube_size / 2, cube_size / 2, cube_size / 2));
+        add_point(Eigen::Vector3d(-cube_size / 2, -cube_size / 2, -cube_size / 2));
+        add_point(Eigen::Vector3d(-cube_size / 2, cube_size / 2, -cube_size / 2));
+        add_point(Eigen::Vector3d(cube_size / 2, -cube_size / 2, -cube_size / 2));
+        add_point(Eigen::Vector3d(cube_size / 2, cube_size / 2, -cube_size / 2));
+        add_point(Eigen::Vector3d(-cube_size / 2, -cube_size / 2, cube_size / 2));
+        add_point(Eigen::Vector3d(-cube_size / 2, cube_size / 2, cube_size / 2));
+        add_point(Eigen::Vector3d(cube_size / 2, -cube_size / 2, cube_size / 2));
+        add_point(Eigen::Vector3d(cube_size / 2, cube_size / 2, cube_size / 2));
     };
 };
 }  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/spatial_test_scene_cube_example.cc b/experimental/learn_descriptors/spatial_test_scene_cube_example.cc
index c4f56d736..5ba850db7 100644
--- a/experimental/learn_descriptors/spatial_test_scene_cube_example.cc
+++ b/experimental/learn_descriptors/spatial_test_scene_cube_example.cc
@@ -1,36 +1,46 @@
+#include <string>
 #include <vector>
 
 #include "Eigen/Dense"
+#include "common/check.hh"
 #include "experimental/learn_descriptors/spatial_test_scene_cube.hh"
-#include "gtest/gtest.h"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/PinholeCamera.h"
 #include "visualization/opencv/opencv_viz.hh"
 
-namespace robot::experimental::learn_descriptors {
-TEST(SpatialTestSceneCubeTest, viz_cube) {
-    SpatialTestSceneCube test_scene(1.0f);
-    // geometry::viz_scene(std::vector<Eigen::Isometry3d>(), test_scene.get_points());
-}
+using namespace robot::experimental::learn_descriptors;
 
-TEST(SpatialTestSceneCubeTest, viz_cube_with_cameras) {
+int main() {
     SpatialTestSceneCube test_scene(1.0f);
 
     const size_t img_width = 640;
     const size_t img_height = 480;
     const double fx = 500.0;
     const double fy = fx;
-    const double cx = img_width / 2.0;
-    const double cy = img_height / 2.0;
+    const double cx = static_cast<double>(img_width) / 2.0;
+    const double cy = static_cast<double>(img_height) / 2.0;
     gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy));
 
-    test_scene.add_rand_cameras_face_origin(7, 2.0, 6.0, *K);
+    SpatialTestScene::Noise pose_noise({0.05, 0.05, 0.05, 0.15, 0.15, 0.15});
+    test_scene.add_rand_cameras_face_origin(7, 2.0, 6.0, pose_noise, *K);
 
-    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras = test_scene.get_cameras();
-    std::vector<Eigen::Isometry3d> camera_poses;
-    camera_poses.reserve(cameras.size());
-    for (const gtsam::PinholeCamera<gtsam::Cal3_S2> &T_world_cam : cameras) {
-        camera_poses.emplace_back(T_world_cam.pose().matrix());
+    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras = test_scene.cameras();
+    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras_grndtrth =
+        test_scene.cameras_groundtruth();
+    ROBOT_CHECK(cameras.size() == cameras_grndtrth.size());
+    std::vector<robot::visualization::VizPose> viz_cam_poses;
+    viz_cam_poses.reserve(cameras.size() + cameras_grndtrth.size());
+    for (size_t i = 0; i < cameras.size(); i++) {
+        viz_cam_poses.emplace_back(Eigen::Isometry3d(cameras[i].pose().matrix()),
+                                   "cam_" + std::to_string(i));
+        viz_cam_poses.emplace_back(Eigen::Isometry3d(cameras_grndtrth[i].pose().matrix()),
+                                   "cam_grnd_" + std::to_string(i));
     }
-
-    geometry::viz_scene(camera_poses, test_scene.get_points());
-}
-}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
+    ROBOT_CHECK(test_scene.points().size() == test_scene.points_groundtruth().size());
+    std::vector<robot::visualization::VizPoint> viz_points;
+    for (size_t i = 0; i < test_scene.points().size(); i++) {
+        viz_points.emplace_back(test_scene.points()[i], "pt_" + std::to_string(i));
+        viz_points.emplace_back(test_scene.points_groundtruth()[i], "pt_grnd_" + std::to_string(i));
+    }
+    robot::visualization::viz_scene(viz_cam_poses, viz_points);
+}
\ No newline at end of file
diff --git a/visualization/opencv/opencv_viz.cc b/visualization/opencv/opencv_viz.cc
index d750555ed..06c46da1f 100644
--- a/visualization/opencv/opencv_viz.cc
+++ b/visualization/opencv/opencv_viz.cc
@@ -2,10 +2,13 @@
 
 #include <iostream>
 
+#include "Eigen/Core"
+#include "Eigen/Geometry"
 #include "common/geometry/translate_types.hh"
 #include "opencv2/viz.hpp"
 
-namespace robot::geometry {
+using namespace robot::geometry;
+namespace robot::visualization {
 cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) {
     // Ensure R is a valid rotation matrix
     CV_Assert(cv::determinant(R) > 0.999 && cv::determinant(R) < 1.001);
@@ -107,12 +110,6 @@ void viz_scene(const std::vector<VizPose> &world_from_poses,
                                          cv::Point3d(0, 0, 1), cells, cell_sizes, color));
     }
 
-    // std::cout << "viz heartbeat" << std::endl;
-    // window.setViewerPose(cv::Affine3d(
-    //     eigen_mat_to_cv(Eigen::Matrix4d(world_from_poses[0].world_from_pose.matrix()))));
-    // window.setViewerPose(cv::Affine3d::Identity());
-    // std::cout << "success" << std::endl;
-
     window.spin();
 }
-}  // namespace robot::geometry
+}  // namespace robot::visualization
diff --git a/visualization/opencv/opencv_viz.hh b/visualization/opencv/opencv_viz.hh
index 28dbc197b..5f5956b43 100644
--- a/visualization/opencv/opencv_viz.hh
+++ b/visualization/opencv/opencv_viz.hh
@@ -7,7 +7,7 @@
 #include "Eigen/Geometry"
 #include "opencv2/viz.hpp"
 
-namespace robot::geometry {
+namespace robot::visualization {
 struct VizPose {
     const Eigen::Isometry3d world_from_pose;
     const std::optional<std::string> label;
@@ -27,4 +27,4 @@ void viz_scene(const std::vector<VizPose> &world_from_poses,
                const cv::viz::Color color_background = cv::viz::Color::black(),
                const bool show_grid = true, const bool show_origin = true,
                const std::string &window_name = "Viz Window", const double text_scale = 0.1);
-}  // namespace robot::geometry
\ No newline at end of file
+}  // namespace robot::visualization
\ No newline at end of file
diff --git a/visualization/opencv/opencv_viz_test.cc b/visualization/opencv/opencv_viz_test.cc
index 502f8e4f4..832ce2878 100644
--- a/visualization/opencv/opencv_viz_test.cc
+++ b/visualization/opencv/opencv_viz_test.cc
@@ -3,7 +3,7 @@
 #include "gtest/gtest.h"
 #include "opencv2/viz.hpp"
 
-namespace robot::geometry {
+namespace robot::visualization {
 
 TEST(OpencvVizTest, demo) {
     cv::viz::Viz3d window("My Window");
@@ -111,4 +111,4 @@ TEST(OpencvVizTest, cube_test_labeled) {
 
     viz_scene(world_from_cams, cube_points_in_world);
 }
-}  // namespace robot::geometry
\ No newline at end of file
+}  // namespace robot::visualization
\ No newline at end of file

From 3a2ba892aa81d12e75596d5192378e19b7dc9e77 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Fri, 18 Jul 2025 18:08:15 -0400
Subject: [PATCH 35/45] added tranlsation interpolation. a handful of initial
 rotations are erroneous. final optimization isn't quite there yet.

---
 experimental/learn_descriptors/BUILD          |   5 +
 experimental/learn_descriptors/backend.cc     |  27 +++--
 .../learn_descriptors/camera_calibration.hh   |   3 +
 experimental/learn_descriptors/frame.hh       |  29 ++++-
 experimental/learn_descriptors/frontend.cc    |  59 +++++++++-
 experimental/learn_descriptors/frontend.hh    |   5 +-
 .../learn_descriptors/frontend_test.cc        | 103 ++++++++++++++++--
 experimental/learn_descriptors/image_point.hh |   7 +-
 .../image_point_four_seasons.hh               |   4 +-
 .../learn_descriptors/refactor_sfm_mvp.cc     |  66 ++++++-----
 10 files changed, 249 insertions(+), 59 deletions(-)

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index f6bd3d71d..140d492d5 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -242,8 +242,10 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":frontend_definitions",
+        "//common:check",
         "@gtsam",
         "@opencv",
+        "@eigen"
     ],
 )
 
@@ -363,7 +365,10 @@ cc_test(
     srcs = ["frontend_test.cc"],
     deps = [
         ":frontend",
+        ":image_point",
         "@com_google_googletest//:gtest_main",
+        "@opencv",
+        "@eigen"
     ],
 )
 
diff --git a/experimental/learn_descriptors/backend.cc b/experimental/learn_descriptors/backend.cc
index 03cfd3622..4dbdbd8d1 100644
--- a/experimental/learn_descriptors/backend.cc
+++ b/experimental/learn_descriptors/backend.cc
@@ -103,6 +103,9 @@ void Backend::populate_rotation_estimate(std::vector<Frame>& frames) {
             frame_vertex->w_from_this = frame_vertex->frame.world_from_cam_initial_guess_
                                             ? *frame_vertex->frame.world_from_cam_initial_guess_
                                             : gtsam::Rot3::Identity();
+            graph.add(gtsam::PriorFactor<gtsam::Rot3>(
+                gtsam::Symbol(symbol_char_rotation, frame_vertex->frame.id_),
+                *frame_vertex->w_from_this, gtsam::noiseModel::Constrained::All(3)));
         }
         frame_vertex->frame.world_from_cam_initial_guess_ = frame_vertex->w_from_this;
         const gtsam::Symbol symbol_frame(symbol_char_rotation, frame.id_);
@@ -123,6 +126,8 @@ void Backend::populate_rotation_estimate(std::vector<Frame>& frames) {
             if (!other_frame_vertex->w_from_this) {
                 other_frame_vertex->w_from_this =
                     *frame_vertex->w_from_this * frame_from_other_frame;
+                other_frame_vertex->frame.world_from_cam_initial_guess_ =
+                    *frame_vertex->w_from_this * frame_from_other_frame;
             }
 
             graph.add(gtsam::BetweenFactor<gtsam::Rot3>(
@@ -132,15 +137,17 @@ void Backend::populate_rotation_estimate(std::vector<Frame>& frames) {
         frame_tree.emplace(frame.id_, frame_vertex);
     }
 
-    gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial);
-    gtsam::Values result = optimizer.optimize();
-
-    // Output results and assign results to frames
-    for (const gtsam::Symbol& symbol_rotation : symbols_frames) {
-        gtsam::Rot3 w_from_frame = result.at<gtsam::Rot3>(symbol_rotation);
-        frame_tree.at(symbol_rotation.index())->frame.world_from_cam_initial_guess_ = w_from_frame;
-        // std::cout << "Rotation " << symbol_rotation.string() << ": "
-        //           << w_from_frame.rpy().transpose() << " (roll pitch yaw radians)" << std::endl;
-    }
+    // gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial);
+    // gtsam::Values result = optimizer.optimize();
+
+    // // Output results and assign results to frames
+    // for (const gtsam::Symbol& symbol_rotation : symbols_frames) {
+    //     gtsam::Rot3 w_from_frame = result.at<gtsam::Rot3>(symbol_rotation);
+    //     frame_tree.at(symbol_rotation.index())->frame.world_from_cam_initial_guess_ =
+    //     w_from_frame;
+    //     // std::cout << "Rotation " << symbol_rotation.string() << ": "
+    //     //           << w_from_frame.rpy().transpose() << " (roll pitch yaw radians)" <<
+    //     std::endl;
+    // }
 }
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/camera_calibration.hh b/experimental/learn_descriptors/camera_calibration.hh
index 1e985acad..610dd2d8d 100644
--- a/experimental/learn_descriptors/camera_calibration.hh
+++ b/experimental/learn_descriptors/camera_calibration.hh
@@ -4,6 +4,9 @@
 
 namespace robot::experimental::learn_descriptors {
 struct CameraCalibrationFisheye {
+    CameraCalibrationFisheye(double fx, double fy, double cx, double cy, double k1, double k2,
+                             double k3, double k4)
+        : fx(fx), fy(fy), cx(cx), cy(cy), k1(k1), k2(k2), k3(k3), k4(k4) {}
     double fx, fy, cx, cy, k1, k2, k3, k4;
 
     cv::Mat k_mat() {
diff --git a/experimental/learn_descriptors/frame.hh b/experimental/learn_descriptors/frame.hh
index b6eb43a47..0d7808b01 100644
--- a/experimental/learn_descriptors/frame.hh
+++ b/experimental/learn_descriptors/frame.hh
@@ -2,6 +2,8 @@
 #include <optional>
 #include <unordered_map>
 
+#include "Eigen/Core"
+#include "common/check.hh"
 #include "experimental/learn_descriptors/frontend_definitions.hh"
 #include "gtsam/base/Matrix.h"
 #include "gtsam/geometry/Cal3_S2.h"
@@ -12,22 +14,41 @@
 namespace robot::experimental::learn_descriptors {
 class Frame {
    public:
-    Frame(FrameId id, const cv::Mat img, gtsam::Cal3_S2::shared_ptr K, const KeypointsCV kpts,
-          const cv::Mat descriptors)
+    Frame(FrameId id, size_t seq, const cv::Mat img, gtsam::Cal3_S2::shared_ptr K,
+          const KeypointsCV kpts, const cv::Mat descriptors)
         : id_(id),
+          seq_(seq),
           undistorted_img_(img),
           K_(std::move(K)),
           kpts_(kpts),
           descriptors_(descriptors) {}
 
+    std::optional<Eigen::Vector3d> velocity_to(const Frame& other_frame) const {
+        ROBOT_CHECK(other_frame.seq_ >= seq_);
+        if (!other_frame.cam_in_world_initial_guess_ || !cam_in_world_initial_guess_)
+            return std::nullopt;
+        double dt_seconds = static_cast<double>(other_frame.seq_ - seq_) * 1e-9;
+        return (*other_frame.cam_in_world_initial_guess_ - *cam_in_world_initial_guess_) /
+               dt_seconds;
+    };
+    std::optional<Eigen::Vector3d> velocity_from(const Frame& other_frame) const {
+        ROBOT_CHECK(seq_ >= other_frame.seq_);
+        if (!other_frame.cam_in_world_initial_guess_ || !cam_in_world_initial_guess_)
+            return std::nullopt;
+        double dt_seconds = static_cast<double>(seq_ - other_frame.seq_) * 1e-9;
+        return (*cam_in_world_initial_guess_ - *other_frame.cam_in_world_initial_guess_) /
+               dt_seconds;
+    };
+
     void add_keypoints(const KeypointsCV& kpts);
     void assign_descriptors(const cv::Mat& descriptors);
 
     const KeypointsCV& keypoint() { return kpts_; };
     const cv::Mat& descriptors() { return descriptors_; };
 
-    const FrameId id_;
-    const cv::Mat undistorted_img_;
+    FrameId id_;
+    size_t seq_;
+    cv::Mat undistorted_img_;
     gtsam::Cal3_S2::shared_ptr K_;
     KeypointsCV kpts_;
     cv::Mat descriptors_;
diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc
index 4b1376dd5..4b10579cd 100644
--- a/experimental/learn_descriptors/frontend.cc
+++ b/experimental/learn_descriptors/frontend.cc
@@ -2,6 +2,7 @@
 
 #include <cmath>
 #include <cstddef>
+#include <iostream>
 #include <memory>
 #include <optional>
 #include <unordered_map>
@@ -166,11 +167,13 @@ Frontend::Frontend(FrontendParams params_) : params_(params_) {
     }
 }
 
-void Frontend::populate_frames() {
+void Frontend::populate_frames(bool verbose) {
     FrameId id = frames_.size();
     for (const ImageAndPoint &img_and_pt : images_and_points_) {
         const std::shared_ptr<ImagePoint> shared_img_pt = img_and_pt.shared_img_pt;
-        std::cout << shared_img_pt->to_string() << std::endl;
+        if (verbose) {
+            std::cout << shared_img_pt->to_string() << std::endl;
+        }
         cv::Mat img_undistorted;
         cv::fisheye::undistortImage(img_and_pt.image_distorted, img_undistorted,
                                     shared_img_pt->K->k_mat(), shared_img_pt->K->d_mat(),
@@ -185,7 +188,7 @@ void Frontend::populate_frames() {
             boost::make_shared<gtsam::Cal3_S2>(shared_img_pt->K->fx, shared_img_pt->K->fy, 0,
                                                shared_img_pt->K->cx, shared_img_pt->K->cy);
 
-        Frame frame(id, img_undistorted, K, kpts_cv, descriptors);
+        Frame frame(id, shared_img_pt->seq, img_undistorted, K, kpts_cv, descriptors);
         const std::optional<Eigen::Isometry3d> maybe_world_from_cam_grnd_trth =
             shared_img_pt->world_from_cam_ground_truth();
         if (maybe_world_from_cam_grnd_trth) {
@@ -328,6 +331,56 @@ void Frontend::match_frames_and_build_tracks() {
     std::cout << "done processing matches" << std::endl;
 }
 
+std::optional<std::vector<Eigen::Vector3d>> Frontend::interpolated_initial_translations() {
+    std::vector<Frame *> frames_with_guess;
+    for (Frame &frame :
+         frames_) {  // this assumes that frames are ordered by seq (time), later on may have to
+                     // think about this more if trying to run pipeline on unordered images or sets
+                     // whose capture times are on different days
+        if (frame.cam_in_world_initial_guess_) {
+            frames_with_guess.push_back(&frame);
+        }
+    }
+    if (frames_with_guess.size() <= 2) return std::nullopt;
+    std::vector<Eigen::Vector3d> interpolated_init_guesses;
+    interpolated_init_guesses.reserve(frames_.size());
+    size_t idx_guess = 0;
+
+    for (const Frame &frame : frames_) {
+        Eigen::Vector3d interpolated;
+
+        // advance to next guess window if needed
+        while (idx_guess + 1 < frames_with_guess.size() - 1 &&
+               frame.seq_ > frames_with_guess[idx_guess + 1]->seq_) {
+            idx_guess++;
+        }
+
+        if (frame.cam_in_world_initial_guess_) {
+            interpolated = *frame.cam_in_world_initial_guess_;
+        } else if (frame.seq_ < frames_with_guess[idx_guess]->seq_) {
+            Eigen::Vector3d v_b_to_a =
+                *frames_with_guess[idx_guess]->velocity_from(*frames_with_guess[idx_guess + 1]);
+            double dt = 1e-9 * (frames_with_guess[idx_guess]->seq_ - frame.seq_);
+            interpolated =
+                *frames_with_guess[idx_guess]->cam_in_world_initial_guess_ + v_b_to_a * dt;
+            std::cout << "interpolated frame " << frame.id_ << ": " << interpolated << std::endl;
+            std::cout << "\tdt: " << dt << std::endl;
+            std::cout << "\tv_a_to_b: " << v_b_to_a << std::endl;
+        } else {
+            Eigen::Vector3d v_a_to_b =
+                *frames_with_guess[idx_guess]->velocity_to(*frames_with_guess[idx_guess + 1]);
+            double dt = 1e-9 * (frame.seq_ - frames_with_guess[idx_guess]->seq_);
+            interpolated =
+                *frames_with_guess[idx_guess]->cam_in_world_initial_guess_ + v_a_to_b * dt;
+            std::cout << "interpolated frame " << frame.id_ << ": " << interpolated << std::endl;
+            std::cout << "\tdt: " << dt << std::endl;
+            std::cout << "\tv_a_to_b: " << v_a_to_b << std::endl;
+        }
+        interpolated_init_guesses.push_back(interpolated);
+    }
+    return interpolated_init_guesses;
+}
+
 std::pair<std::vector<cv::KeyPoint>, cv::Mat> Frontend::extract_features(const cv::Mat &img) const {
     std::vector<cv::KeyPoint> keypoints;
     cv::Mat descriptors;
diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh
index 352330e3e..ce2f5c43d 100644
--- a/experimental/learn_descriptors/frontend.hh
+++ b/experimental/learn_descriptors/frontend.hh
@@ -1,9 +1,11 @@
 #pragma once
 
 #include <memory>
+#include <optional>
 #include <utility>
 #include <vector>
 
+#include "Eigen/Core"
 #include "experimental/learn_descriptors/frame.hh"
 #include "experimental/learn_descriptors/frontend_definitions.hh"
 #include "experimental/learn_descriptors/image_point.hh"
@@ -30,8 +32,9 @@ class Frontend {
     explicit Frontend(FrontendParams params);
     ~Frontend(){};
 
-    void populate_frames();
+    void populate_frames(bool verbose = false);
     void match_frames_and_build_tracks();
+    std::optional<std::vector<Eigen::Vector3d>> interpolated_initial_translations();
 
     void add_image(const ImageAndPoint &img_and_pt) { images_and_points_.push_back(img_and_pt); };
     void add_images(const std::vector<ImageAndPoint> &img_and_pts) {
diff --git a/experimental/learn_descriptors/frontend_test.cc b/experimental/learn_descriptors/frontend_test.cc
index 28e7dc0c8..a04a443ca 100644
--- a/experimental/learn_descriptors/frontend_test.cc
+++ b/experimental/learn_descriptors/frontend_test.cc
@@ -1,9 +1,13 @@
 #include "experimental/learn_descriptors/frontend.hh"
 
+#include <cstddef>
+#include <memory>
 #include <stdexcept>
 #include <utility>
 #include <vector>
 
+#include "Eigen/Core"
+#include "experimental/learn_descriptors/image_point.hh"
 #include "gtest/gtest.h"
 #include "opencv2/opencv.hpp"
 
@@ -52,27 +56,31 @@ TEST(FrontendTest, pipeline_sweep) {
     cv::imshow("Test", img_test_disp);
     cv::waitKey(1000);
 
-    Frontend::ExtractorType extractor_types[2] = {Frontend::ExtractorType::SIFT,
-                                                  Frontend::ExtractorType::ORB};
-    Frontend::MatcherType matcher_types[3] = {Frontend::MatcherType::BRUTE_FORCE,
-                                              Frontend::MatcherType::FLANN,
-                                              Frontend::MatcherType::KNN};
+    FrontendParams::ExtractorType extractor_types[2] = {FrontendParams::ExtractorType::SIFT,
+                                                        FrontendParams::ExtractorType::ORB};
+    FrontendParams::MatcherType matcher_types[3] = {FrontendParams::MatcherType::BRUTE_FORCE,
+                                                    FrontendParams::MatcherType::FLANN,
+                                                    FrontendParams::MatcherType::KNN};
 
-    Frontend frontend;
+    FrontendParams params{FrontendParams::ExtractorType::SIFT, FrontendParams::MatcherType::KNN,
+                          true, false};
+    Frontend frontend(params);
     std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_descriptors_pair_1;
     std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_descriptors_pair_2;
     std::vector<cv::DMatch> matches;
     cv::Mat img_keypoints_out_1(height, width, CV_8UC3),
         img_keypoints_out_2(height, width, CV_8UC3), img_matches_out(height, 2 * width, CV_8UC3);
     cv::Mat img_display_test;
-    for (Frontend::ExtractorType extractor_type : extractor_types) {
-        for (Frontend::MatcherType matcher_type : matcher_types) {
+    for (FrontendParams::ExtractorType extractor_type : extractor_types) {
+        for (FrontendParams::MatcherType matcher_type : matcher_types) {
             printf("started frontend combination: (%d, %d)\n", static_cast<int>(extractor_type),
                    static_cast<int>(matcher_type));
+            FrontendParams params{extractor_type, matcher_type, true, false};
             try {
-                frontend = Frontend(extractor_type, matcher_type);
+                frontend = Frontend(params);
             } catch (const std::invalid_argument &e) {
-                assert(std::string(e.what()) == "FLANN can not be used with ORB.");  // very jank...
+                ROBOT_CHECK(std::string(e.what()) ==
+                            "FLANN can not be used with ORB.");  // very jank...
                 continue;
             }
             keypoints_descriptors_pair_1 = frontend.extract_features(image_1);
@@ -98,7 +106,7 @@ TEST(FrontendTest, pipeline_sweep) {
             }
             printf("completed frontend combination: (%d, %d)\n", static_cast<int>(extractor_type),
                    static_cast<int>(matcher_type));
-            if (extractor_type != Frontend::ExtractorType::ORB) {  // don't check ORB for now
+            if (extractor_type != FrontendParams::ExtractorType::ORB) {  // don't check ORB for now
                 for (const cv::DMatch match : matches) {
                     EXPECT_NEAR(keypoints_descriptors_pair_1.first[match.queryIdx].pt.x -
                                     keypoints_descriptors_pair_2.first[match.trainIdx].pt.x,
@@ -111,4 +119,77 @@ TEST(FrontendTest, pipeline_sweep) {
         }
     }
 }
+
+TEST(FrontendTest, interpolate_frames) {
+    constexpr size_t width = 800;
+    constexpr size_t height = 400;
+    constexpr double fx = 500.0;
+    constexpr double fy = fx;
+    constexpr double cx = static_cast<double>(width) / 2.0;
+    constexpr double cy = static_cast<double>(height) / 2.0;
+    // gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy));
+    std::shared_ptr<CameraCalibrationFisheye> shared_K =
+        std::make_shared<CameraCalibrationFisheye>(fx, fy, cx, cy, 1, 1, 1, 1);
+    cv::Mat white_image(height, width, CV_8UC3, cv::Scalar(255, 255, 255));
+
+    FrontendParams params{FrontendParams::ExtractorType::SIFT, FrontendParams::MatcherType::KNN,
+                          true, false};
+    Frontend frontend(params);
+
+    struct VelocityPoint {
+        double time;               // time in seconds
+        Eigen::Vector3d velocity;  // m/s
+    };
+    std::vector<VelocityPoint> velocity_points{
+        // basically a piecewise constant function
+        VelocityPoint{1.0, Eigen::Vector3d(1, 0, 0)},  // go in x dir from seconds 0 - 1
+        VelocityPoint{2.0, Eigen::Vector3d(0, 1, 0)},  // go in y dir from seconds 1 - 2
+        VelocityPoint{
+            3.0, Eigen::Vector3d(1, 1, 0)},  // go diagonal in first quadrant from seconds 2 - 3
+    };
+    size_t idx_vel_pt = 0;
+    const Eigen::Vector3d pt0_in_world = Eigen::Vector3d::Zero();
+    std::vector<Eigen::Vector3d> pts_in_world{pt0_in_world};
+    double time = 0.0;
+    std::vector<std::shared_ptr<ImagePoint>> img_pts;
+    ImagePoint img_pt_first;
+    img_pt_first.id = 0;
+    img_pt_first.seq = static_cast<size_t>(time * 1e9);
+    img_pt_first.K = shared_K;
+    img_pt_first.set_cam_in_world(pt0_in_world);
+    img_pts.push_back(std::make_shared<ImagePoint>(img_pt_first));
+    frontend.add_image(ImageAndPoint{white_image, img_pts.back()});
+    constexpr double sample_hz = 6.0;
+    constexpr size_t num_samples_skip = 2;
+    ROBOT_CHECK(static_cast<size_t>(sample_hz) % (num_samples_skip + 1) ==
+                0);  // needed so that the interpolation isn't lossy
+    constexpr double dt = 1 / sample_hz;
+    time += dt;
+    while (time < velocity_points.back().time) {
+        while (idx_vel_pt < velocity_points.size() && time > velocity_points[idx_vel_pt].time) {
+            idx_vel_pt++;
+        }
+        pts_in_world.emplace_back(pts_in_world.back() + dt * velocity_points[idx_vel_pt].velocity);
+        ImagePoint img_pt;
+        img_pt.id = img_pts.size();
+        img_pt.seq = static_cast<size_t>(time * 1e9);
+        img_pt.K = shared_K;
+        if (img_pts.size() % (num_samples_skip + 1) == 0) {
+            img_pt.set_cam_in_world(pts_in_world.back());
+        }
+        img_pts.emplace_back(std::make_shared<ImagePoint>(img_pt));
+        frontend.add_image(ImageAndPoint{white_image, img_pts.back()});
+        time += dt;
+    }
+    frontend.populate_frames();
+    std::optional<std::vector<Eigen::Vector3d>> interpolated_pts =
+        frontend.interpolated_initial_translations();
+    ROBOT_CHECK(interpolated_pts);
+    ROBOT_CHECK(interpolated_pts->size() == img_pts.size());
+    constexpr double TOL = 1e-5;
+    for (size_t i = 0; i < pts_in_world.size(); i++) {
+        ROBOT_CHECK(pts_in_world[i].isApprox((*interpolated_pts)[i], TOL), i, pts_in_world[i],
+                    (*interpolated_pts)[i]);
+    }
+}
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/image_point.hh b/experimental/learn_descriptors/image_point.hh
index f4a4635b9..f5b9a681b 100644
--- a/experimental/learn_descriptors/image_point.hh
+++ b/experimental/learn_descriptors/image_point.hh
@@ -18,11 +18,13 @@ struct ImagePoint {
     virtual ~ImagePoint() = default;
 
     size_t id;
+    size_t seq;  // for time. TODO: make a time struct
     std::shared_ptr<CameraCalibrationFisheye> K;
+    void set_cam_in_world(const Eigen::Vector3d& cam_in_world) { cam_in_world_ = cam_in_world; };
     virtual std::optional<Eigen::Isometry3d> world_from_cam_ground_truth() const {
         return std::nullopt;
     };
-    virtual std::optional<Eigen::Vector3d> cam_in_world() const { return std::nullopt; };
+    virtual std::optional<Eigen::Vector3d> cam_in_world() const { return cam_in_world_; };
     virtual std::optional<Eigen::Matrix3d> translation_covariance_in_cam() const {
         return std::nullopt;
     };
@@ -62,5 +64,8 @@ struct ImagePoint {
         }
         return ss.str();
     }
+
+   private:
+    std::optional<Eigen::Vector3d> cam_in_world_;
 };
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/image_point_four_seasons.hh b/experimental/learn_descriptors/image_point_four_seasons.hh
index 645a2571a..4fce4f74a 100644
--- a/experimental/learn_descriptors/image_point_four_seasons.hh
+++ b/experimental/learn_descriptors/image_point_four_seasons.hh
@@ -21,8 +21,8 @@ class FourSeasonsParser;
 struct ImagePointFourSeasons : ImagePoint {
     virtual ~ImagePointFourSeasons();
 
-    size_t id;   // idx for DB
-    size_t seq;  // time in nanoseconds of image capture (also the name of image)
+    // size_t id;   // idx for DB
+    // size_t seq;  // time in nanoseconds of image capture (also the name of image)
     std::optional<liegroups::SE3>
         AS_w_from_gnss_cam;  // globally opimized arbitrary scale visual world (gnss + VIO + loop
                              // closure + etc.) from cam
diff --git a/experimental/learn_descriptors/refactor_sfm_mvp.cc b/experimental/learn_descriptors/refactor_sfm_mvp.cc
index 4071faac6..c82ccd50f 100644
--- a/experimental/learn_descriptors/refactor_sfm_mvp.cc
+++ b/experimental/learn_descriptors/refactor_sfm_mvp.cc
@@ -1,4 +1,5 @@
 #include <cmath>
+#include <exception>
 #include <functional>
 #include <iostream>
 #include <memory>
@@ -42,14 +43,15 @@ std::optional<gtsam::Point3> attempt_triangulate(const std::vector<gtsam::Pose3>
                                                  gtsam::Cal3_S2::shared_ptr K,
                                                  const double max_reproj_error = 2.0,
                                                  const bool verbose = true) {
+    std::cout << "attmepting to triangulate!" << std::endl;
     gtsam::Point3 p_lmk_in_world;
-    if (cam_poses.size() >= 2) {
+    if (cam_poses.size() > 2) {
         try {
             // Attempt triangulation using DLT (or the GTSAM provided method)
             p_lmk_in_world = gtsam::triangulatePoint3(
                 cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end()));
 
-        } catch (const gtsam::TriangulationCheiralityException &e) {
+        } catch (const std::exception &e) {
             // Handle the exception gracefully by logging and retaining the previous
             // estimate or discard
             if (verbose) {
@@ -84,6 +86,7 @@ std::optional<gtsam::Point3> attempt_triangulate(const std::vector<gtsam::Pose3>
             return std::nullopt;
         }
         // Reprojection error
+        std::cout << "heartbeat cheirality and reprojection checks" << std::endl;
         if (max_reproj_error > 0) {
             gtsam::PinholeCamera<gtsam::Cal3_S2> cam(pose, *K);
             const auto reproj = cam.project(p_lmk_in_world);
@@ -205,11 +208,12 @@ int main(int argc, const char **argv) {
                           true, false};
     Frontend frontend(params);
 
+    std::vector<size_t> idx_img_pts;
     for (size_t i = 581; i < 750; i += 5) {
-        // if (!parser.get_image_point(i).K) std::cout << "UH OH" << std::endl;
         frontend.add_image(
             ImageAndPoint{parser.load_image(i),
                           std::make_shared<ImagePointFourSeasons>(parser.get_image_point(i))});
+        idx_img_pts.push_back(i);
     }
     frontend.populate_frames();
     frontend.match_frames_and_build_tracks();
@@ -224,15 +228,16 @@ int main(int argc, const char **argv) {
     std::optional<Eigen::Isometry3d> first_grnd_trth;
     (void)first_point;
     for (const Frame &frame : frontend.frames()) {
-        if (frame.cam_in_world_initial_guess_) {
-            Eigen::Isometry3d world_from_cam_init;
-            world_from_cam_init.linear() = frame.world_from_cam_initial_guess_->matrix();
-            if (!first_point) first_point = *frame.cam_in_world_initial_guess_;
-            world_from_cam_init.translation() = *frame.cam_in_world_initial_guess_ - *first_point;
-            std::cout << "world_from_cam_init: " << world_from_cam_init.matrix() << std::endl;
-            viz_poses_init.emplace_back(
-                world_from_cam_init, gtsam::Symbol(Frontend::symbol_pose_char, frame.id_).string());
-        }
+        // if (frame.cam_in_world_initial_guess_) {
+        //     Eigen::Isometry3d world_from_cam_init;
+        //     world_from_cam_init.linear() = frame.world_from_cam_initial_guess_->matrix();
+        //     if (!first_point) first_point = *frame.cam_in_world_initial_guess_;
+        //     world_from_cam_init.translation() = *frame.cam_in_world_initial_guess_ -
+        //     *first_point; std::cout << "world_from_cam_init: " << world_from_cam_init.matrix() <<
+        //     std::endl; viz_poses_init.emplace_back(
+        //         world_from_cam_init, gtsam::Symbol(Frontend::symbol_pose_char,
+        //         frame.id_).string());
+        // }
         if (frame.world_from_cam_groundtruth_) {
             std::cout << "adding a ground truth frame to viz!" << std::endl;
             Eigen::Isometry3d w_from_cam_grnd_trth(frame.world_from_cam_groundtruth_->matrix());
@@ -270,24 +275,31 @@ int main(int argc, const char **argv) {
     Backend::populate_rotation_estimate(frames);
     std::vector<gtsam::Symbol> symbols_pose;
     std::unordered_map<size_t, gtsam::Pose3> world_from_cam_initial_guess;
-    std::optional<gtsam::Point3> cam0_in_w;
-    for (const Frame &frame : frames) {
-        if (!frame.cam_in_world_initial_guess_) continue;
-        if (!cam0_in_w) cam0_in_w = *frame.cam_in_world_initial_guess_;
+    std::optional<std::vector<Eigen::Vector3d>> interpolated_cam_in_w =
+        frontend.interpolated_initial_translations();
+    std::cout << "heartbeat" << std::endl;
+    ROBOT_CHECK(interpolated_cam_in_w, interpolated_cam_in_w);
+    Eigen::Vector3d cam0_in_w = interpolated_cam_in_w->front();
+    for (size_t i = 0; i < frames.size(); i++) {
+        // std::cout << "interpolated translation " << i << ": " << (*interpolated_cam_in_w)[i]
+        //           << std::endl;
+        const Frame &frame = frames[i];
+        const gtsam::Symbol cam_symbol(Backend::symbol_char_pose, frame.id_);
         const gtsam::Pose3 world_from_cam_estimate(frame.world_from_cam_initial_guess_
                                                        ? *frame.world_from_cam_initial_guess_
                                                        : gtsam::Rot3::Identity(),
-                                                   *frame.cam_in_world_initial_guess_ - *cam0_in_w);
+                                                   (*interpolated_cam_in_w)[i] - cam0_in_w);
         world_from_cam_initial_guess[frame.id_] = world_from_cam_estimate;
-        gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas(
-            frame.translation_covariance_in_cam_
-                ? gtsam::Vector3((*frame.translation_covariance_in_cam_)(0, 0),
-                                 (*frame.translation_covariance_in_cam_)(1, 1),
-                                 (*frame.translation_covariance_in_cam_)(2, 2))
-                : gps_sigmas_fallback);
-        const gtsam::Symbol cam_symbol('x', frame.id_);
-        graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise));
         initial_estimate_.insert(cam_symbol, world_from_cam_estimate);
+        if (frame.world_from_cam_initial_guess_) {
+            gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas(
+                frame.translation_covariance_in_cam_
+                    ? gtsam::Vector3((*frame.translation_covariance_in_cam_)(0, 0),
+                                     (*frame.translation_covariance_in_cam_)(1, 1),
+                                     (*frame.translation_covariance_in_cam_)(2, 2))
+                    : gps_sigmas_fallback);
+            graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise));
+        }
         symbols_pose.push_back(cam_symbol);
     }
     std::vector<robot::visualization::VizPose> viz_pose;
@@ -313,11 +325,11 @@ int main(int argc, const char **argv) {
         std::optional<gtsam::Point3> landmark_estimate = attempt_triangulate(
             world_from_lmk_cams, lmk_observations, frames[0].K_);  // all K are the same for now...
         if (landmark_estimate) {
-            const gtsam::Symbol lmk_symbol(Frontend::symbol_lmk_char, i);
+            const gtsam::Symbol lmk_symbol(Backend::symbol_char_landmark, i);
             for (const auto &[frame_id, keypoint_cv] : feature_tracks[i].obs_) {
                 graph_.add(gtsam::GenericProjectionFactor(
                     gtsam::Point2(keypoint_cv.x, keypoint_cv.y), landmark_noise,
-                    gtsam::Symbol(Frontend::symbol_pose_char, frame_id), lmk_symbol, frames[0].K_));
+                    gtsam::Symbol(Backend::symbol_char_pose, frame_id), lmk_symbol, frames[0].K_));
             }
             initial_estimate_.insert(lmk_symbol, *landmark_estimate);
             symbols_lmks.push_back(lmk_symbol);

From 901aac8ae2a82a486b5d936eb1fed1736cf8dc0c Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Sun, 20 Jul 2025 23:12:58 -0400
Subject: [PATCH 36/45] WIP

---
 experimental/learn_descriptors/refactor_sfm_mvp.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/experimental/learn_descriptors/refactor_sfm_mvp.cc b/experimental/learn_descriptors/refactor_sfm_mvp.cc
index c82ccd50f..c45dc0e65 100644
--- a/experimental/learn_descriptors/refactor_sfm_mvp.cc
+++ b/experimental/learn_descriptors/refactor_sfm_mvp.cc
@@ -280,6 +280,7 @@ int main(int argc, const char **argv) {
     std::cout << "heartbeat" << std::endl;
     ROBOT_CHECK(interpolated_cam_in_w, interpolated_cam_in_w);
     Eigen::Vector3d cam0_in_w = interpolated_cam_in_w->front();
+    std::vector<robot::visualization::VizPose> viz_pose;
     for (size_t i = 0; i < frames.size(); i++) {
         // std::cout << "interpolated translation " << i << ": " << (*interpolated_cam_in_w)[i]
         //           << std::endl;
@@ -300,12 +301,11 @@ int main(int argc, const char **argv) {
                     : gps_sigmas_fallback);
             graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise));
         }
+        viz_pose.emplace_back(
+            Eigen::Isometry3d(world_from_cam_estimate.matrix()),
+            (frame.cam_in_world_initial_guess_ ? "xg_" : "x_") + std::to_string(frame.id_));
         symbols_pose.push_back(cam_symbol);
     }
-    std::vector<robot::visualization::VizPose> viz_pose;
-    for (const auto &[id, pose] : world_from_cam_initial_guess) {
-        viz_pose.emplace_back(Eigen::Isometry3d(pose.matrix()), "x_" + std::to_string(id));
-    }
     robot::visualization::viz_scene(viz_pose, std::vector<robot::visualization::VizPoint>(),
                                     cv::viz::Color::brown(), true, true,
                                     "Initial guesses in backend");

From 73ced7875ba9c471a566544022f2329561121290 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Mon, 21 Jul 2025 17:27:21 -0400
Subject: [PATCH 37/45] optimizations look decent. need to fix and tune
 variance figures. need to also implement initial rotation alignment.

---
 experimental/learn_descriptors/BUILD          |   1 +
 experimental/learn_descriptors/backend.cc     |   7 +-
 experimental/learn_descriptors/backend.hh     |   3 +-
 experimental/learn_descriptors/frame.hh       |  42 +++++
 experimental/learn_descriptors/frontend.cc    |  20 +-
 experimental/learn_descriptors/gps_data.cc    |   0
 experimental/learn_descriptors/gps_data.hh    |  59 +++---
 .../image_point_four_seasons.hh               |   6 +-
 .../learn_descriptors/incremental_sfm_mvp.cc  |   3 +-
 .../learn_descriptors/refactor_sfm_mvp.cc     | 171 ++++++++++++------
 10 files changed, 219 insertions(+), 93 deletions(-)
 delete mode 100644 experimental/learn_descriptors/gps_data.cc

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index 140d492d5..bb369d6d1 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -431,6 +431,7 @@ cc_binary(
         ":spatial_test_scene_cube",
         ":structure_from_motion_types",
         "//common/geometry:camera",
+        "//common/gps:frame_translation",
         "//visualization/opencv:opencv_viz",
         "@boost//:smart_ptr",
         "@cxxopts",
diff --git a/experimental/learn_descriptors/backend.cc b/experimental/learn_descriptors/backend.cc
index 4dbdbd8d1..30706673a 100644
--- a/experimental/learn_descriptors/backend.cc
+++ b/experimental/learn_descriptors/backend.cc
@@ -99,7 +99,8 @@ void Backend::populate_rotation_estimate(std::vector<Frame>& frames) {
         q.pop();
         seen_frame_id.insert(frame.id_);
         SharedFrameVertex frame_vertex = frame_tree.at(frame.id_);
-        if (seen_frame_id.size() == 1) {  // make the first frame rotation identity
+        if (seen_frame_id.size() == 1) {  // make the first frame rotation identity unless the
+                                          // initial guess is set otherwise
             frame_vertex->w_from_this = frame_vertex->frame.world_from_cam_initial_guess_
                                             ? *frame_vertex->frame.world_from_cam_initial_guess_
                                             : gtsam::Rot3::Identity();
@@ -108,6 +109,8 @@ void Backend::populate_rotation_estimate(std::vector<Frame>& frames) {
                 *frame_vertex->w_from_this, gtsam::noiseModel::Constrained::All(3)));
         }
         frame_vertex->frame.world_from_cam_initial_guess_ = frame_vertex->w_from_this;
+        std::cout << frame.to_string() << std::endl;
+
         const gtsam::Symbol symbol_frame(symbol_char_rotation, frame.id_);
         symbols_frames.push_back(symbol_frame);
         initial.insert(symbol_frame, *frame.world_from_cam_initial_guess_);
@@ -127,7 +130,7 @@ void Backend::populate_rotation_estimate(std::vector<Frame>& frames) {
                 other_frame_vertex->w_from_this =
                     *frame_vertex->w_from_this * frame_from_other_frame;
                 other_frame_vertex->frame.world_from_cam_initial_guess_ =
-                    *frame_vertex->w_from_this * frame_from_other_frame;
+                    other_frame_vertex->w_from_this;
             }
 
             graph.add(gtsam::BetweenFactor<gtsam::Rot3>(
diff --git a/experimental/learn_descriptors/backend.hh b/experimental/learn_descriptors/backend.hh
index c042de3cb..9d7d403d9 100644
--- a/experimental/learn_descriptors/backend.hh
+++ b/experimental/learn_descriptors/backend.hh
@@ -36,7 +36,8 @@ class Backend {
     static gtsam::Rot3 average_rotations(const std::vector<gtsam::Rot3> &rotations,
                                          int max_iter = 10);
     // use rotation averaging to set the rotation initial guess for each frame in the world frame.
-    // the world frame will be the first frame in the vector
+    // the world frame will be the first frame in the vector. deadreckon_incrementally won't
+    // optimize if true
     static void populate_rotation_estimate(std::vector<Frame> &frames);
 
     const gtsam::Values &current_initial_values() const { return initial_estimate_; };
diff --git a/experimental/learn_descriptors/frame.hh b/experimental/learn_descriptors/frame.hh
index 0d7808b01..0d8390295 100644
--- a/experimental/learn_descriptors/frame.hh
+++ b/experimental/learn_descriptors/frame.hh
@@ -1,5 +1,7 @@
 #pragma once
 #include <optional>
+#include <sstream>
+#include <string>
 #include <unordered_map>
 
 #include "Eigen/Core"
@@ -46,6 +48,46 @@ class Frame {
     const KeypointsCV& keypoint() { return kpts_; };
     const cv::Mat& descriptors() { return descriptors_; };
 
+    std::string to_string() const {
+        std::stringstream ss;
+        ss << "Frame " << id_ << ":\n";
+        ss << "\tseq: " << seq_ << "\n";
+        ss << "\tK: ";
+        if (K_) {
+            ss << "fx: " << K_->fx() << ", fy: " << K_->fy() << ", s: " << K_->skew()
+               << ", px: " << K_->px() << ", py: " << K_->py();
+        } else {
+            ss << "N/A";
+        }
+        ss << "\n\tkpts_.size(): " << kpts_.size();
+        ss << "\n\tframe_from_other_frames_.size(): " << frame_from_other_frames_.size();
+        ss << "\n\tworld_from_cam_groundtruth_: ";
+        if (world_from_cam_groundtruth_) {
+            ss << "\n" << world_from_cam_groundtruth_->matrix();
+        } else {
+            ss << "N/A";
+        }
+        ss << "\n\tcam_in_world_initial_guess_: ";
+        if (cam_in_world_initial_guess_) {
+            ss << "\n" << cam_in_world_initial_guess_->matrix();
+        } else {
+            ss << "N/A";
+        }
+        ss << "\n\tworld_from_cam_initial_guess_: ";
+        if (world_from_cam_initial_guess_) {
+            ss << "\n" << world_from_cam_initial_guess_->matrix();
+        } else {
+            ss << "N/A";
+        }
+        ss << "\n\ttranslation_covariance_in_cam_: ";
+        if (translation_covariance_in_cam_) {
+            ss << "\n" << translation_covariance_in_cam_->matrix();
+        } else {
+            ss << "N/A";
+        }
+        return ss.str();
+    }
+
     FrameId id_;
     size_t seq_;
     cv::Mat undistorted_img_;
diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc
index 4b10579cd..e42fedb39 100644
--- a/experimental/learn_descriptors/frontend.cc
+++ b/experimental/learn_descriptors/frontend.cc
@@ -341,7 +341,7 @@ std::optional<std::vector<Eigen::Vector3d>> Frontend::interpolated_initial_trans
             frames_with_guess.push_back(&frame);
         }
     }
-    if (frames_with_guess.size() <= 2) return std::nullopt;
+    if (frames_with_guess.size() < 2) return std::nullopt;
     std::vector<Eigen::Vector3d> interpolated_init_guesses;
     interpolated_init_guesses.reserve(frames_.size());
     size_t idx_guess = 0;
@@ -357,21 +357,15 @@ std::optional<std::vector<Eigen::Vector3d>> Frontend::interpolated_initial_trans
 
         if (frame.cam_in_world_initial_guess_) {
             interpolated = *frame.cam_in_world_initial_guess_;
-        } else if (frame.seq_ < frames_with_guess[idx_guess]->seq_) {
-            Eigen::Vector3d v_b_to_a =
-                *frames_with_guess[idx_guess]->velocity_from(*frames_with_guess[idx_guess + 1]);
-            double dt = 1e-9 * (frames_with_guess[idx_guess]->seq_ - frame.seq_);
-            interpolated =
-                *frames_with_guess[idx_guess]->cam_in_world_initial_guess_ + v_b_to_a * dt;
-            std::cout << "interpolated frame " << frame.id_ << ": " << interpolated << std::endl;
-            std::cout << "\tdt: " << dt << std::endl;
-            std::cout << "\tv_a_to_b: " << v_b_to_a << std::endl;
         } else {
             Eigen::Vector3d v_a_to_b =
                 *frames_with_guess[idx_guess]->velocity_to(*frames_with_guess[idx_guess + 1]);
-            double dt = 1e-9 * (frame.seq_ - frames_with_guess[idx_guess]->seq_);
-            interpolated =
-                *frames_with_guess[idx_guess]->cam_in_world_initial_guess_ + v_a_to_b * dt;
+            double dt = (frame.seq_ < frames_with_guess[idx_guess]->seq_ ? -1.0 : 1.0) * 1e-9 *
+                        (frames_with_guess[idx_guess]->seq_ > frame.seq_
+                             ? frames_with_guess[idx_guess]->seq_ - frame.seq_
+                             : frame.seq_ - frames_with_guess[idx_guess]->seq_);
+            interpolated = *frames_with_guess[idx_guess]->cam_in_world_initial_guess_ +
+                           v_a_to_b * dt;  // add sign back to dt
             std::cout << "interpolated frame " << frame.id_ << ": " << interpolated << std::endl;
             std::cout << "\tdt: " << dt << std::endl;
             std::cout << "\tv_a_to_b: " << v_a_to_b << std::endl;
diff --git a/experimental/learn_descriptors/gps_data.cc b/experimental/learn_descriptors/gps_data.cc
deleted file mode 100644
index e69de29bb..000000000
diff --git a/experimental/learn_descriptors/gps_data.hh b/experimental/learn_descriptors/gps_data.hh
index 13b032c25..34f89b9b3 100644
--- a/experimental/learn_descriptors/gps_data.hh
+++ b/experimental/learn_descriptors/gps_data.hh
@@ -9,49 +9,64 @@
 namespace robot::experimental::learn_descriptors {
 struct GPSData {
     struct Uncertainty {
-        double sigma_lat_mitude;
-        double sigma_longitude;
-        double sigma_altitude;
+        double sigma_latitude_deg;
+        double sigma_longitude_deg;
+        double sigma_altitude_m;
         double orientation_deg;
         double rms_range_error_m;
 
         // diagonal latitude, longitude, altitude covariance in meters squared
         Eigen::Matrix3d to_LLA_covariance() const {
             Eigen::Matrix3d cov = Eigen::Matrix3d::Zero();
-            cov(0, 0) = sigma_lat_mitude * sigma_lat_mitude;
-            cov(1, 1) = sigma_longitude * sigma_longitude;
-            cov(2, 2) = sigma_altitude * sigma_altitude;
+            cov(0, 0) = sigma_latitude_deg * sigma_latitude_deg;
+            cov(1, 1) = sigma_longitude_deg * sigma_longitude_deg;
+            cov(2, 2) = sigma_altitude_m * sigma_altitude_m;
             return cov;
         }
 
         // Convert lat/lon covariance to ENU at given latitude
         Eigen::Matrix3d to_ENU_covariance(double lat_deg) const {
-            double lat_rad = lat_deg * M_PI / 180.0;
+            // –– Named conversion/constants ––
+            static constexpr double DEG2RAD = M_PI / 180.0;
+            static constexpr double ORIENTATION_OFFSET_DEG =
+                90.0;  // for CW‐from‐North → CCW‐from‐East
 
-            // meters per degree scaling
-            double kLat =
-                111132.92 - 559.82 * std::cos(2 * lat_rad) + 1.175 * std::cos(4 * lat_rad);
-            double kLon = 111412.84 * std::cos(lat_rad) - 93.5 * std::cos(3 * lat_rad);
+            // WGS‑84 approximate “meters per degree” polynomial coefficients
+            static constexpr double MERIDIONAL_A0 = 111132.92;
+            static constexpr double MERIDIONAL_A2 = -559.82;
+            static constexpr double MERIDIONAL_A4 = 1.175;
+            static constexpr double PARALLEL_B0 = 111412.84;
+            static constexpr double PARALLEL_B2 = -93.5;
 
+            // 1) get LLA covariance in degrees²
             Eigen::Matrix3d lla_cov = to_LLA_covariance();
 
-            // rotate lat/lon covariance (upper-left 2x2) using orientation
-            double theta = orientation_deg * M_PI / 180.0;
-            Eigen::Matrix2d R;
-            R << std::cos(theta), -std::sin(theta), std::sin(theta), std::cos(theta);
+            // 2) convert latitude to radians
+            double phi = lat_deg * DEG2RAD;
 
-            Eigen::Matrix2d D = lla_cov.topLeftCorner<2, 2>();
-            Eigen::Matrix2d rotated_latlon_cov = R * D * R.transpose();
+            // 3) compute meters/degree at this latitude
+            double kLat = MERIDIONAL_A0 + MERIDIONAL_A2 * std::cos(2.0 * phi) +
+                          MERIDIONAL_A4 * std::cos(4.0 * phi);
+            double kLon = PARALLEL_B0 * std::cos(phi) + PARALLEL_B2 * std::cos(3.0 * phi);
 
-            // apply linear scaling to convert degrees to meters in EN
+            // 4) build the Jacobian for degree→meter scaling
             Eigen::Matrix2d J;
-            J << kLat, 0, 0, kLon;
+            J << kLat, 0.0, 0.0, kLon;
 
-            Eigen::Matrix2d en_cov = J * rotated_latlon_cov * J.transpose();
+            // 5) scale the 2×2 lat/lon covariance into meters²
+            Eigen::Matrix2d D = lla_cov.topLeftCorner<2, 2>();
+            Eigen::Matrix2d cov_m = J * D * J.transpose();
+
+            // 6) rotate that ellipse into ENU axes (θ = CCW from East)
+            double theta = (ORIENTATION_OFFSET_DEG - orientation_deg) * DEG2RAD;
+            Eigen::Matrix2d R;
+            R << std::cos(theta), -std::sin(theta), std::sin(theta), std::cos(theta);
+            Eigen::Matrix2d horz_cov = R * cov_m * R.transpose();
 
+            // 7) assemble full 3×3 ENU covariance
             Eigen::Matrix3d enu_cov = Eigen::Matrix3d::Zero();
-            enu_cov.topLeftCorner<2, 2>() = en_cov;
-            enu_cov(2, 2) = lla_cov(2, 2);  // alt variance unchanged
+            enu_cov.topLeftCorner<2, 2>() = horz_cov;
+            enu_cov(2, 2) = lla_cov(2, 2);  // altitude variance already in m²
 
             return enu_cov;
         }
diff --git a/experimental/learn_descriptors/image_point_four_seasons.hh b/experimental/learn_descriptors/image_point_four_seasons.hh
index 4fce4f74a..368fe8351 100644
--- a/experimental/learn_descriptors/image_point_four_seasons.hh
+++ b/experimental/learn_descriptors/image_point_four_seasons.hh
@@ -83,9 +83,9 @@ struct ImagePointFourSeasons : ImagePoint {
             }
             ss << "\n\t\tsigma: ";
             if (gps_gcs->uncertainty) {
-                ss << gps_gcs->uncertainty->sigma_lat_mitude << "\t"
-                   << gps_gcs->uncertainty->sigma_longitude << "\t"
-                   << gps_gcs->uncertainty->sigma_altitude;
+                ss << gps_gcs->uncertainty->sigma_latitude_deg << "\t"
+                   << gps_gcs->uncertainty->sigma_longitude_deg << "\t"
+                   << gps_gcs->uncertainty->sigma_altitude_m;
             } else {
                 ss << "N/A";
             }
diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc
index b3b8a17e6..4906f0ff1 100644
--- a/experimental/learn_descriptors/incremental_sfm_mvp.cc
+++ b/experimental/learn_descriptors/incremental_sfm_mvp.cc
@@ -8,6 +8,7 @@
 #include "Eigen/Core"
 #include "Eigen/Geometry"
 #include "common/geometry/camera.hh"
+#include "common/gps/frame_translation.hh"
 #include "cxxopts.hpp"
 #include "experimental/learn_descriptors/camera_calibration.hh"
 #include "experimental/learn_descriptors/four_seasons_parser.hh"
@@ -358,7 +359,7 @@ int main(int argc, const char **argv) {
             const Eigen::Vector3d gps_gcs(
                 img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude,
                 img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0);
-            const Eigen::Vector3d p_gps_in_ECEF = parser.ecef_from_lla(gps_gcs);
+            const Eigen::Vector3d p_gps_in_ECEF = robot::gps::ecef_from_lla(gps_gcs);
             const Eigen::Vector4d p_gps_in_ECEF_hom(p_gps_in_ECEF.x(), p_gps_in_ECEF.y(),
                                                     p_gps_in_ECEF.z(), 1.0);
             world_from_cam = Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference *
diff --git a/experimental/learn_descriptors/refactor_sfm_mvp.cc b/experimental/learn_descriptors/refactor_sfm_mvp.cc
index c45dc0e65..ef87991f1 100644
--- a/experimental/learn_descriptors/refactor_sfm_mvp.cc
+++ b/experimental/learn_descriptors/refactor_sfm_mvp.cc
@@ -36,6 +36,7 @@
 #include "gtsam/slam/BetweenFactor.h"
 #include "gtsam/slam/PriorFactor.h"
 #include "gtsam/slam/ProjectionFactor.h"
+#include "opencv2/opencv.hpp"
 #include "visualization/opencv/opencv_viz.hh"
 
 std::optional<gtsam::Point3> attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
@@ -43,7 +44,6 @@ std::optional<gtsam::Point3> attempt_triangulate(const std::vector<gtsam::Pose3>
                                                  gtsam::Cal3_S2::shared_ptr K,
                                                  const double max_reproj_error = 2.0,
                                                  const bool verbose = true) {
-    std::cout << "attmepting to triangulate!" << std::endl;
     gtsam::Point3 p_lmk_in_world;
     if (cam_poses.size() > 2) {
         try {
@@ -86,7 +86,6 @@ std::optional<gtsam::Point3> attempt_triangulate(const std::vector<gtsam::Pose3>
             return std::nullopt;
         }
         // Reprojection error
-        std::cout << "heartbeat cheirality and reprojection checks" << std::endl;
         if (max_reproj_error > 0) {
             gtsam::PinholeCamera<gtsam::Cal3_S2> cam(pose, *K);
             const auto reproj = cam.project(p_lmk_in_world);
@@ -209,12 +208,18 @@ int main(int argc, const char **argv) {
     Frontend frontend(params);
 
     std::vector<size_t> idx_img_pts;
-    for (size_t i = 581; i < 750; i += 5) {
+    for (size_t i = 633; i < 646; i += 2) {
         frontend.add_image(
             ImageAndPoint{parser.load_image(i),
                           std::make_shared<ImagePointFourSeasons>(parser.get_image_point(i))});
         idx_img_pts.push_back(i);
     }
+    // for (size_t i = 581; i < 750; i += 5) {
+    //     frontend.add_image(
+    //         ImageAndPoint{parser.load_image(i),
+    //                       std::make_shared<ImagePointFourSeasons>(parser.get_image_point(i))});
+    //     idx_img_pts.push_back(i);
+    // }
     frontend.populate_frames();
     frontend.match_frames_and_build_tracks();
 
@@ -233,8 +238,8 @@ int main(int argc, const char **argv) {
         //     world_from_cam_init.linear() = frame.world_from_cam_initial_guess_->matrix();
         //     if (!first_point) first_point = *frame.cam_in_world_initial_guess_;
         //     world_from_cam_init.translation() = *frame.cam_in_world_initial_guess_ -
-        //     *first_point; std::cout << "world_from_cam_init: " << world_from_cam_init.matrix() <<
-        //     std::endl; viz_poses_init.emplace_back(
+        //     *first_point; std::cout << "world_from_cam_init: " <<
+        //     world_from_cam_init.matrix() << std::endl; viz_poses_init.emplace_back(
         //         world_from_cam_init, gtsam::Symbol(Frontend::symbol_pose_char,
         //         frame.id_).string());
         // }
@@ -245,14 +250,16 @@ int main(int argc, const char **argv) {
             w_from_cam_grnd_trth.translation() -= first_grnd_trth->translation();
             viz_poses_init.emplace_back(w_from_cam_grnd_trth,
                                         "x_grnd_" + std::to_string(frame.id_));
-        }
+        };
     }
+
     std::cout << "visualizing " << viz_poses_init.size() << " poses" << std::endl;
     robot::visualization::viz_scene(viz_poses_init, std::vector<robot::visualization::VizPoint>(),
                                     cv::viz::Color::brown(), true, true,
                                     "Frontend initial guesses");
 
     // ############# BACKEND ###############
+    constexpr bool use_grndtrth_only = false;
     gtsam::Values initial_estimate_;
     gtsam::NonlinearFactorGraph graph_;
 
@@ -262,7 +269,7 @@ int main(int argc, const char **argv) {
     gtsam::Vector6 prior_sigmas;
     prior_sigmas << 0.04, 0.04, 0.09, 2.1, 2.1, 0.1;
     gtsam::Vector3 gps_sigmas_fallback;
-    gps_sigmas_fallback << 0.5, 0.5, 1.5;
+    gps_sigmas_fallback << 1.5, 1.5, 1.5;
     gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise =
         gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas);
     gtsam::noiseModel::Diagonal::shared_ptr gps_noise =
@@ -272,48 +279,106 @@ int main(int argc, const char **argv) {
     std::vector<Frame> &frames = frontend.frames();
     frames[0].world_from_cam_initial_guess_ =
         frames[0].world_from_cam_groundtruth_->rotation();  // make sure first idx has grnd trth
+
+    // // let's examine the rotations to the first thingy
+    // std::vector<robot::visualization::VizPose> viz_5_pt_rot;
+    // for (const auto &[frame_id, rotation] : frames[0].frame_from_other_frames_) {
+    //     viz_5_pt_rot.emplace_back(
+    //         Eigen::Isometry3d(gtsam::Pose3(rotation, gtsam::Point3()).matrix()),
+    //         "rot_to_" + std::to_string(frame_id));
+    // }
+    // robot::visualization::viz_scene(viz_5_pt_rot, std::vector<robot::visualization::VizPoint>(),
+    //                                 cv::viz::Color::brown(), true, true,
+    //                                 "Frontend initial guesses");
+
     Backend::populate_rotation_estimate(frames);
     std::vector<gtsam::Symbol> symbols_pose;
     std::unordered_map<size_t, gtsam::Pose3> world_from_cam_initial_guess;
     std::optional<std::vector<Eigen::Vector3d>> interpolated_cam_in_w =
         frontend.interpolated_initial_translations();
-    std::cout << "heartbeat" << std::endl;
     ROBOT_CHECK(interpolated_cam_in_w, interpolated_cam_in_w);
+    // std::vector<robot::visualization::VizPoint> viz_pts_interpolated;
+    // for (size_t i = 0; i < interpolated_cam_in_w->size(); i++) {
+    //     viz_pts_interpolated.emplace_back(
+    //         (*interpolated_cam_in_w)[i] - interpolated_cam_in_w->front(),
+    //         "pt_" + std::to_string(i));
+    //     std::cout << "pt_" + std::to_string(i) << ": " << (*interpolated_cam_in_w)[i] <<
+    //     std::endl;
+    // }
+    // robot::visualization::viz_scene(std::vector<robot::visualization::VizPose>(),
+    //                                 viz_pts_interpolated, cv::viz::Color::brown(), true, true,
+    //                                 "Initial guesses in backend");
     Eigen::Vector3d cam0_in_w = interpolated_cam_in_w->front();
+    std::optional<Eigen::Vector3d> grndtrth_cam0_in_w;
+    std::vector<size_t> idx_grndtrth_frame;
     std::vector<robot::visualization::VizPose> viz_pose;
-    for (size_t i = 0; i < frames.size(); i++) {
-        // std::cout << "interpolated translation " << i << ": " << (*interpolated_cam_in_w)[i]
-        //           << std::endl;
+    auto noise_tight_prior = gtsam::noiseModel::Diagonal::Sigmas(
+        (gtsam::Vector(6) << 0, 0, 0,  // rotation stdev in radians
+         1e-6, 1e-6, 1e-6              // translation stdev in meters
+         )
+            .finished());
+    const gtsam::Pose3 w_from_cam0_init_estimate(*frames[0].world_from_cam_initial_guess_,
+                                                 gtsam::Point3());
+    const gtsam::Symbol symbol_cam0(Backend::symbol_char_pose, 0);
+    graph_.add(gtsam::PriorFactor<gtsam::Pose3>(
+        symbol_cam0, w_from_cam0_init_estimate,
+        noise_tight_prior));  // currently assuming that we have groundtruth on the first pose
+    initial_estimate_.insert(symbol_cam0, w_from_cam0_init_estimate);
+    std::string str_pose0 =
+        std::string("x_") + (frames[0].world_from_cam_groundtruth_ ? "g_" : "") +
+        (!frames[0].world_from_cam_initial_guess_ ? "norot_" : "") + std::to_string(frames[0].id_);
+    viz_pose.emplace_back(Eigen::Isometry3d(w_from_cam0_init_estimate.matrix()), str_pose0);
+    symbols_pose.push_back(symbol_cam0);
+    for (size_t i = 1; i < frames.size(); i++) {
         const Frame &frame = frames[i];
-        const gtsam::Symbol cam_symbol(Backend::symbol_char_pose, frame.id_);
-        const gtsam::Pose3 world_from_cam_estimate(frame.world_from_cam_initial_guess_
+        std::optional<gtsam::Pose3> world_from_cam_estimate;
+        if (use_grndtrth_only && !frame.world_from_cam_groundtruth_) {
+            continue;
+        } else if (use_grndtrth_only && frame.world_from_cam_groundtruth_) {
+            if (!grndtrth_cam0_in_w)
+                grndtrth_cam0_in_w = frame.world_from_cam_groundtruth_->translation();
+            world_from_cam_estimate =
+                gtsam::Pose3(frame.world_from_cam_groundtruth_->rotation(),
+                             gtsam::Point3(frame.world_from_cam_groundtruth_->translation() -
+                                           *grndtrth_cam0_in_w));
+            idx_grndtrth_frame.push_back(i);
+            std::cout << "populating groundtruth pose: " << world_from_cam_estimate->translation()
+                      << std::endl;
+        } else {
+            world_from_cam_estimate = gtsam::Pose3(frame.world_from_cam_initial_guess_
                                                        ? *frame.world_from_cam_initial_guess_
                                                        : gtsam::Rot3::Identity(),
                                                    (*interpolated_cam_in_w)[i] - cam0_in_w);
-        world_from_cam_initial_guess[frame.id_] = world_from_cam_estimate;
-        initial_estimate_.insert(cam_symbol, world_from_cam_estimate);
-        if (frame.world_from_cam_initial_guess_) {
+        }
+        std::cout << "world_from_cam_estimate_" << i << ": " << *world_from_cam_estimate
+                  << std::endl;
+        world_from_cam_initial_guess[frame.id_] = *world_from_cam_estimate;
+        const gtsam::Symbol cam_symbol(Backend::symbol_char_pose, frame.id_);
+        initial_estimate_.insert(cam_symbol, *world_from_cam_estimate);
+        if (frame.cam_in_world_initial_guess_) {
+            if (frame.translation_covariance_in_cam_) {
+                std::cout << "translation covariance: " << *frame.translation_covariance_in_cam_
+                          << std::endl;
+            }
             gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas(
-                frame.translation_covariance_in_cam_
-                    ? gtsam::Vector3((*frame.translation_covariance_in_cam_)(0, 0),
-                                     (*frame.translation_covariance_in_cam_)(1, 1),
-                                     (*frame.translation_covariance_in_cam_)(2, 2))
-                    : gps_sigmas_fallback);
+                // frame.translation_covariance_in_cam_
+                false ? gtsam::Vector3(std::sqrt((*frame.translation_covariance_in_cam_)(0, 0)),
+                                       std::sqrt((*frame.translation_covariance_in_cam_)(1, 1)),
+                                       std::sqrt((*frame.translation_covariance_in_cam_)(2, 2)))
+                      : gps_sigmas_fallback);
             graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise));
+            std::cout << "adding gps factor!" << std::endl;
         }
-        viz_pose.emplace_back(
-            Eigen::Isometry3d(world_from_cam_estimate.matrix()),
-            (frame.cam_in_world_initial_guess_ ? "xg_" : "x_") + std::to_string(frame.id_));
+        std::string str_pose = std::string("x_") + (frame.world_from_cam_groundtruth_ ? "g_" : "") +
+                               (!frame.world_from_cam_initial_guess_ ? "norot_" : "") +
+                               std::to_string(frame.id_);
+        viz_pose.emplace_back(Eigen::Isometry3d(world_from_cam_estimate->matrix()), str_pose);
         symbols_pose.push_back(cam_symbol);
     }
-    robot::visualization::viz_scene(viz_pose, std::vector<robot::visualization::VizPoint>(),
-                                    cv::viz::Color::brown(), true, true,
-                                    "Initial guesses in backend");
-
-    std::cout << "heartbeat pre lmks in graph" << std::endl;
 
     // add landmarks to graph
     std::vector<gtsam::Symbol> symbols_lmks;
+    std::vector<robot::visualization::VizPoint> viz_pts;
     const FeatureTracks feature_tracks = frontend.feature_tracks();
     for (size_t i = 0; i < feature_tracks.size(); i++) {
         std::vector<gtsam::Pose3> world_from_lmk_cams;
@@ -322,8 +387,9 @@ int main(int argc, const char **argv) {
             world_from_lmk_cams.push_back(world_from_cam_initial_guess[frame_id]);
             lmk_observations.emplace_back(keypoint_cv.x, keypoint_cv.y);
         }
-        std::optional<gtsam::Point3> landmark_estimate = attempt_triangulate(
-            world_from_lmk_cams, lmk_observations, frames[0].K_);  // all K are the same for now...
+        std::optional<gtsam::Point3> landmark_estimate =
+            attempt_triangulate(world_from_lmk_cams, lmk_observations, frames[0].K_, 2.0,
+                                false);  // all K are the same for now...
         if (landmark_estimate) {
             const gtsam::Symbol lmk_symbol(Backend::symbol_char_landmark, i);
             for (const auto &[frame_id, keypoint_cv] : feature_tracks[i].obs_) {
@@ -333,33 +399,36 @@ int main(int argc, const char **argv) {
             }
             initial_estimate_.insert(lmk_symbol, *landmark_estimate);
             symbols_lmks.push_back(lmk_symbol);
+            viz_pts.emplace_back(*landmark_estimate, "lmk_" + std::to_string(i));
         }
-        std::cout << "idx " << i << " out of " << feature_tracks.size() << std::endl;
     }
-
-    std::cout << "heartbeat" << std::endl;
+    robot::visualization::viz_scene(viz_pose, viz_pts, cv::viz::Color::brown(), true, true,
+                                    "Initial guesses in backend");
 
     // do global optimization
     const gtsam::Values result =
         optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_lmks, 10);
 
-    // // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference
-    // double sum_traj_error = 0;
-    // double sum_rot_error = 0;
-    // for (size_t i = 0; i < symbols_pose.size(); i++) {
-    //     const gtsam::Pose3 traj_pose = result.at<gtsam::Pose3>(symbols_pose[i]);
-    //     sum_traj_error += std::pow(
-    //         (references_world_from_cam[i].translation() - traj_pose.translation()).norm(), 2);
-    //     sum_rot_error +=
-    //         rotation_error(references_world_from_cam[i], Eigen::Isometry3d(traj_pose.matrix()));
-    // }
-    // std::cout << "sum_rot_error: " << sum_rot_error << std::endl;
-    // double rmse_ate = std::sqrt(sum_traj_error / symbols_pose.size());
-    // double rmse_rot = std::sqrt(sum_rot_error / symbols_pose.size());
-    // std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl;
+    // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference
+    double sum_traj_error = 0;
+    double sum_rot_error = 0;
+    // for (size_t i = 0; i < idx_grndtrth_frame.size(); i++) {
+    for (const size_t i_grndtrth_frame : idx_grndtrth_frame) {
+        const Frame &frame = frames[i_grndtrth_frame];
+        const gtsam::Pose3 traj_pose =
+            result.at<gtsam::Pose3>(gtsam::Symbol(Backend::symbol_char_pose, frame.id_));
+        sum_traj_error += std::pow(
+            (frame.world_from_cam_groundtruth_->translation() - traj_pose.translation()).norm(), 2);
+        sum_rot_error +=
+            rotation_error(Eigen::Isometry3d(frame.world_from_cam_groundtruth_->matrix()),
+                           Eigen::Isometry3d(traj_pose.matrix()));
+    }
+    std::cout << "sum_rot_error: " << sum_rot_error << std::endl;
+    double rmse_ate = std::sqrt(sum_traj_error / symbols_pose.size());
+    double rmse_rot = std::sqrt(sum_rot_error / symbols_pose.size());
+    std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl;
 
     std::cout << "about to visualize result" << std::endl;
-    result.print();
-    graph_values(result, "Result", symbols_pose, std::vector<gtsam::Symbol>());
-    // detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks);
+    // result.print();
+    graph_values(result, "Result", symbols_pose, symbols_lmks);
 }
\ No newline at end of file

From 3a5f6c6a38bd88311ed5f0d07dbe6ae31fdb3ee7 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Tue, 22 Jul 2025 18:01:07 -0400
Subject: [PATCH 38/45] WIP. need to fix backend adding frames

---
 experimental/learn_descriptors/BUILD          |  55 ++--
 experimental/learn_descriptors/backend.cc     | 286 ++++++++++++++++--
 experimental/learn_descriptors/backend.hh     |  44 ++-
 .../learn_descriptors/feature_manager.hh      |  49 ---
 .../learn_descriptors/feature_manager_test.cc |  68 -----
 experimental/learn_descriptors/feature_set.hh |  78 -----
 experimental/learn_descriptors/frame.hh       |  21 +-
 experimental/learn_descriptors/frontend.cc    | 120 ++++----
 experimental/learn_descriptors/frontend.hh    |   6 +-
 experimental/learn_descriptors/gps_data.hh    |  83 +++--
 experimental/learn_descriptors/image_point.hh |   1 +
 .../image_point_four_seasons.cc               |   4 +-
 .../image_point_four_seasons.hh               |  21 +-
 .../learn_descriptors/refactor_sfm_mvp.cc     |  63 +---
 .../structure_from_motion.cc                  | 200 ++++--------
 .../structure_from_motion.hh                  |  54 +---
 .../structure_from_motion_example.cc          |  75 +++++
 17 files changed, 606 insertions(+), 622 deletions(-)
 delete mode 100644 experimental/learn_descriptors/feature_manager.hh
 delete mode 100644 experimental/learn_descriptors/feature_manager_test.cc
 delete mode 100644 experimental/learn_descriptors/feature_set.hh
 create mode 100644 experimental/learn_descriptors/structure_from_motion_example.cc

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index bb369d6d1..63a162d08 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -26,8 +26,10 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":backend",
-        ":feature_manager",
         ":frontend",
+        ":frame",
+        ":image_point",
+        "//common:check",
         "//visualization/opencv:opencv_viz",
         "@eigen",
         "@gtsam",
@@ -280,30 +282,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "feature_manager",
-    hdrs = ["feature_manager.hh"],
-    visibility = ["//visibility:public"],
-    deps = [
-        ":feature_set",
-        "@gtsam",
-        "@opencv",
-    ],
-)
-
-cc_test(
-    name = "feature_manager_test",
-    srcs = ["feature_manager_test.cc"],
-    deps = [
-        ":feature_manager",
-        ":spatial_test_scene_cube",
-        "@com_google_googletest//:gtest_main",
-        "@eigen",
-        "@gtsam",
-        "@opencv",
-    ],
-)
-
 cc_library(
     name = "spatial_test_scene",
     srcs = ["spatial_test_scene.cc"],
@@ -383,6 +361,7 @@ cc_library(
     deps = [
         ":structure_from_motion_types",
         ":frame",
+        "//common:check",
         "//common/geometry:camera",
         "@boost//:smart_ptr",
         "@gtsam",
@@ -476,4 +455,30 @@ cc_binary(
         "@opencv",
         ":camera_calibration"
     ],
+)
+
+cc_binary(
+    name = "structure_from_motion_example",
+    srcs = ["structure_from_motion_example.cc"],
+    copts = [
+        "-Wno-error=unused-parameter",
+        "-Wno-error=unused-function",
+    ],
+    deps = [
+        ":structure_from_motion",
+        ":frontend",
+        ":backend",
+        ":four_seasons_parser",
+        ":frame",
+        ":image_point",
+        ":image_point_four_seasons",
+        ":spatial_test_scene_cube",
+        ":structure_from_motion_types",
+        ":camera_calibration",
+        "//visualization/opencv:opencv_viz",
+        "@boost//:smart_ptr",
+        "@cxxopts",
+        "@eigen",
+        "@opencv",
+    ],
 )
\ No newline at end of file
diff --git a/experimental/learn_descriptors/backend.cc b/experimental/learn_descriptors/backend.cc
index 30706673a..682c29ec3 100644
--- a/experimental/learn_descriptors/backend.cc
+++ b/experimental/learn_descriptors/backend.cc
@@ -1,49 +1,93 @@
 #include "experimental/learn_descriptors/backend.hh"
 
+#include <array>
+#include <cstddef>
 #include <memory>
 #include <optional>
 #include <queue>
 #include <unordered_set>
 
 #include "boost/make_shared.hpp"
+#include "common/check.hh"
 #include "experimental/learn_descriptors/frame.hh"
 #include "experimental/learn_descriptors/structure_from_motion_types.hh"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/Point2.h"
+#include "gtsam/geometry/Point3.h"
+#include "gtsam/geometry/Pose3.h"
+#include "gtsam/geometry/Rot3.h"
 #include "gtsam/geometry/triangulation.h"
+#include "gtsam/inference/Symbol.h"
 #include "gtsam/linear/NoiseModel.h"
 #include "gtsam/navigation/GPSFactor.h"
 #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
+#include "gtsam/nonlinear/NonlinearFactorGraph.h"
+#include "gtsam/nonlinear/Values.h"
 #include "gtsam/slam/BetweenFactor.h"
 #include "gtsam/slam/PriorFactor.h"
 #include "gtsam/slam/ProjectionFactor.h"
 
 namespace robot::experimental::learn_descriptors {
 
-void Backend::solve_graph() {
-    gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_);
-    result_ = optimizer.optimize();
-}
-
-void Backend::solve_graph(const int num_steps,
-                          std::optional<Backend::graph_step_debug_func> inter_debug_func) {
-    gtsam::LevenbergMarquardtParams params;
-    params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
-    params.maxIterations = 1;          // We'll manually step it
-    gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, params);
+std::optional<gtsam::Point3> Backend::attempt_triangulate(
+    const std::vector<gtsam::Pose3>& cam_poses, const std::vector<gtsam::Point2>& cam_obs,
+    gtsam::Cal3_S2::shared_ptr K, const double max_reproj_error, const bool verbose) {
+    gtsam::Point3 p_lmk_in_world;
+    if (cam_poses.size() > 2) {
+        try {
+            // Attempt triangulation using DLT (or the GTSAM provided method)
+            p_lmk_in_world = gtsam::triangulatePoint3(
+                cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end()));
 
-    double prev_error = optimizer.error();
-    for (int i = 0; i < num_steps; i++) {
-        optimizer.iterate();
-        double curr_error = optimizer.error();
+        } catch (const std::exception& e) {
+            // Handle the exception gracefully by logging and retaining the previous
+            // estimate or discard
+            if (verbose) {
+                std::cerr << "[attempt_triangulate] failed. Likely cheirality exception: "
+                          << e.what() << ". Discarding involved keypoints." << std::endl;
+            }
+            return std::nullopt;
+        }
+    } else {
+        return std::nullopt;
+    }
+    // Optional: perform an explicit cheirality check
+    for (const auto& pose : cam_poses) {
+        // Transform point to the camera coordinate system.
+        // transformTo() converts a world point to the camera frame.
+        gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world);
+        if (p_cam_lmk.z() <= 0) {  // Check that the depth is positive
+            return std::nullopt;
+        }
+    }
 
-        if (inter_debug_func) {
-            (*inter_debug_func)(optimizer.values(), i);
+    // Cheirality & reprojection checks
+    for (size_t i = 0; i < cam_poses.size(); ++i) {
+        const auto& pose = cam_poses[i];
+        // Cheirality
+        gtsam::Point3 p_cam = pose.transformTo(p_lmk_in_world);
+        if (p_cam.z() <= 0) {
+            if (verbose) {
+                std::cerr << "[attempt_triangulate] point behind camera " << i
+                          << " (z=" << p_cam.z() << ")\n";
+            }
+            return std::nullopt;
         }
-        if (std::abs(prev_error - curr_error) < 1e-6) {
-            std::cout << "Converged at iteration " << i << "\n";
-            break;
+        // Reprojection error
+        if (max_reproj_error > 0) {
+            gtsam::PinholeCamera<gtsam::Cal3_S2> cam(pose, *K);
+            const auto reproj = cam.project(p_lmk_in_world);
+            const double err = (reproj - cam_obs[i]).norm();
+            if (err > max_reproj_error) {
+                if (verbose) {
+                    std::cerr << "[attempt_triangulate] reprojection error too large on view " << i
+                              << " (" << err << " px)\n";
+                }
+                return std::nullopt;
+            }
         }
     }
-    result_ = optimizer.values();
+    return p_lmk_in_world;
 }
 
 gtsam::Rot3 average_rotations(const std::vector<gtsam::Rot3>& rotations, int max_iter = 10) {
@@ -140,17 +184,193 @@ void Backend::populate_rotation_estimate(std::vector<Frame>& frames) {
         frame_tree.emplace(frame.id_, frame_vertex);
     }
 
-    // gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial);
-    // gtsam::Values result = optimizer.optimize();
-
-    // // Output results and assign results to frames
-    // for (const gtsam::Symbol& symbol_rotation : symbols_frames) {
-    //     gtsam::Rot3 w_from_frame = result.at<gtsam::Rot3>(symbol_rotation);
-    //     frame_tree.at(symbol_rotation.index())->frame.world_from_cam_initial_guess_ =
-    //     w_from_frame;
-    //     // std::cout << "Rotation " << symbol_rotation.string() << ": "
-    //     //           << w_from_frame.rpy().transpose() << " (roll pitch yaw radians)" <<
-    //     std::endl;
-    // }
+    gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial);
+    gtsam::Values result = optimizer.optimize();
+
+    // Output results and assign results to frames
+    for (const gtsam::Symbol& symbol_rotation : symbols_frames) {
+        gtsam::Rot3 w_from_frame = result.at<gtsam::Rot3>(symbol_rotation);
+        frame_tree.at(symbol_rotation.index())->frame.world_from_cam_initial_guess_ = w_from_frame;
+    }
+}
+
+void Backend::populate_rotation_estimate(std::vector<SharedFrame>& shared_frames) {
+    struct FrameVertex;
+    using SharedFrameVertex = std::shared_ptr<FrameVertex>;
+    struct FrameVertex {
+        Frame& frame;
+        std::optional<gtsam::Rot3> w_from_this;
+        std::unordered_map<FrameId, gtsam::Rot3>
+            neighbor_id_to_rot;  // rot is frame_from_neighbor_frame
+        std::unordered_set<SharedFrameVertex> parents;
+        std::unordered_set<SharedFrameVertex> children;
+    };
+    std::unordered_map<FrameId, SharedFrameVertex> frame_tree;
+    for (SharedFrame& shared_frame : shared_frames) {
+        frame_tree.emplace(shared_frame->id_, std::make_shared<FrameVertex>(*shared_frame));
+    }
+    // populate all frames and add children to neighbors
+    std::queue<size_t> q;  // indices of Frame in frames
+    std::unordered_set<FrameId> seen_frame_id;
+    q.push(shared_frames.front()->id_);
+
+    gtsam::NonlinearFactorGraph graph;
+    gtsam::Values initial;
+    // TODO: somehow get more informative/more robust noise values
+    // maybe: monte carlo sampling + reprojection, use essential matrix to guide covariance strength
+    gtsam::noiseModel::Diagonal::shared_ptr noise_rotation =
+        gtsam::noiseModel::Isotropic::Sigma(3, 0.1);  // radians
+
+    std::vector<gtsam::Symbol> symbols_frames;
+    while (!q.empty()) {
+        Frame frame = *shared_frames[q.front()];
+        q.pop();
+        seen_frame_id.insert(frame.id_);
+        SharedFrameVertex frame_vertex = frame_tree.at(frame.id_);
+        if (seen_frame_id.size() == 1) {  // make the first frame rotation identity unless the
+                                          // initial guess is set otherwise
+            frame_vertex->w_from_this = frame_vertex->frame.world_from_cam_initial_guess_
+                                            ? *frame_vertex->frame.world_from_cam_initial_guess_
+                                            : gtsam::Rot3::Identity();
+            graph.add(gtsam::PriorFactor<gtsam::Rot3>(
+                gtsam::Symbol(symbol_char_rotation, frame_vertex->frame.id_),
+                *frame_vertex->w_from_this, gtsam::noiseModel::Constrained::All(3)));
+        }
+        frame_vertex->frame.world_from_cam_initial_guess_ = frame_vertex->w_from_this;
+        std::cout << frame.to_string() << std::endl;
+
+        const gtsam::Symbol symbol_frame(symbol_char_rotation, frame.id_);
+        symbols_frames.push_back(symbol_frame);
+        initial.insert(symbol_frame, *frame.world_from_cam_initial_guess_);
+        for (const auto& [other_frame_id, frame_from_other_frame] :
+             frame.frame_from_other_frames_) {
+            if (seen_frame_id.find(other_frame_id) == seen_frame_id.end()) {
+                q.push(other_frame_id);
+                seen_frame_id.insert(other_frame_id);
+            }
+            frame_vertex->neighbor_id_to_rot.emplace(other_frame_id, frame_from_other_frame);
+            SharedFrameVertex other_frame_vertex = frame_tree.at(other_frame_id);
+            frame_vertex->children.insert(other_frame_vertex);
+            other_frame_vertex->parents.insert(frame_vertex);
+            other_frame_vertex->neighbor_id_to_rot.emplace(frame.id_,
+                                                           frame_from_other_frame.inverse());
+            if (!other_frame_vertex->w_from_this) {
+                other_frame_vertex->w_from_this =
+                    *frame_vertex->w_from_this * frame_from_other_frame;
+                other_frame_vertex->frame.world_from_cam_initial_guess_ =
+                    other_frame_vertex->w_from_this;
+            }
+
+            graph.add(gtsam::BetweenFactor<gtsam::Rot3>(
+                symbol_frame, gtsam::Symbol(symbol_char_rotation, other_frame_id),
+                frame_from_other_frame.inverse(), noise_rotation));
+        }
+        frame_tree.emplace(frame.id_, frame_vertex);
+    }
+
+    gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial);
+    gtsam::Values result = optimizer.optimize();
+
+    // Output results and assign results to frames
+    for (const gtsam::Symbol& symbol_rotation : symbols_frames) {
+        gtsam::Rot3 w_from_frame = result.at<gtsam::Rot3>(symbol_rotation);
+        frame_tree.at(symbol_rotation.index())->frame.world_from_cam_initial_guess_ = w_from_frame;
+    }
+}
+
+void Backend::calculate_initial_values() { populate_rotation_estimate(shared_frames_); }
+
+void Backend::populate_graph(const FeatureTracks& feature_tracks) {
+    ROBOT_CHECK(shared_frames_.size() > 3);
+    ROBOT_CHECK(shared_frames_[0]->world_from_cam_groundtruth_,
+                "We're assuming the first camera has groundtruth for now");
+
+    shared_frames_[0]->world_from_cam_initial_guess_ =
+        shared_frames_[0]->world_from_cam_groundtruth_->rotation();  // align rotation
+
+    Backend::populate_rotation_estimate(shared_frames_);
+
+    // add first cam as prior
+    Eigen::Vector3d cam0_in_w = *shared_frames_.front()->cam_in_world_interpolated_guess_;
+    const gtsam::Pose3 w_from_cam0_init_estimate(*shared_frames_[0]->world_from_cam_initial_guess_,
+                                                 gtsam::Point3());
+    const gtsam::Symbol symbol_cam0(symbol_char_pose, 0);
+    graph_.add(gtsam::PriorFactor<gtsam::Pose3>(
+        symbol_cam0, w_from_cam0_init_estimate,
+        noise_tight_prior));  // currently assuming that we have groundtruth on the first pose
+    initial_estimate_.insert(symbol_cam0, w_from_cam0_init_estimate);
+
+    // add gps factors
+    for (size_t i = 1; i < shared_frames_.size(); i++) {
+        const Frame& frame = *shared_frames_[i];
+        gtsam::Pose3 world_from_cam_estimate(
+            frame.world_from_cam_initial_guess_ ? *frame.world_from_cam_initial_guess_
+                                                : gtsam::Rot3::Identity(),
+            *shared_frames_[i]->cam_in_world_interpolated_guess_ - cam0_in_w);
+        world_from_cam_initial_estimates_[frame.id_] = world_from_cam_estimate;
+        const gtsam::Symbol cam_symbol(symbol_char_pose, frame.id_);
+        initial_estimate_.insert(cam_symbol, world_from_cam_estimate);
+        world_from_cam_initial_estimates_.emplace(frame.id_, world_from_cam_estimate);
+        if (frame.cam_in_world_initial_guess_) {
+            gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas(
+                frame.translation_covariance_in_cam_
+                    ? gtsam::Vector3(std::sqrt((*frame.translation_covariance_in_cam_)(0, 0)),
+                                     std::sqrt((*frame.translation_covariance_in_cam_)(1, 1)),
+                                     std::sqrt((*frame.translation_covariance_in_cam_)(2, 2)))
+                    : gps_sigmas_fallback);
+            graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise));
+        }
+    }
+
+    // add landmarks
+    for (size_t i = 0; i < feature_tracks.size(); i++) {
+        std::vector<gtsam::Pose3> world_from_lmk_cams;
+        std::vector<gtsam::Point2> lmk_observations;
+        for (const auto& [frame_id, keypoint_cv] : feature_tracks[i].obs_) {
+            world_from_lmk_cams.push_back(world_from_cam_initial_estimates_[frame_id]);
+            lmk_observations.emplace_back(keypoint_cv.x, keypoint_cv.y);
+        }
+        std::optional<gtsam::Point3> landmark_estimate =
+            attempt_triangulate(world_from_lmk_cams, lmk_observations, shared_frames_[0]->K_, 2.0,
+                                false);  // all K are the same for now...
+        if (landmark_estimate) {
+            const gtsam::Symbol lmk_symbol(symbol_char_landmark, i);
+            for (const auto& [frame_id, keypoint_cv] : feature_tracks[i].obs_) {
+                graph_.add(gtsam::GenericProjectionFactor(
+                    gtsam::Point2(keypoint_cv.x, keypoint_cv.y), landmark_noise_,
+                    gtsam::Symbol(symbol_char_pose, frame_id), lmk_symbol, shared_frames_[0]->K_));
+            }
+            initial_estimate_.insert(lmk_symbol, *landmark_estimate);
+            lmk_initial_estimates_.emplace(i, *landmark_estimate);
+        }
+    }
+}
+
+void Backend::solve_graph() {
+    gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_);
+    result_ = optimizer.optimize();
+}
+
+void Backend::solve_graph(const int num_epochs,
+                          std::optional<Backend::graph_step_debug_func> iter_debug_func) {
+    gtsam::LevenbergMarquardtParams params;
+    params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
+    params.maxIterations = 1;          // We'll manually step it
+    gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, params);
+
+    double prev_error = optimizer.error();
+    for (int i = 0; i < num_epochs; i++) {
+        optimizer.iterate();
+        double curr_error = optimizer.error();
+
+        if (iter_debug_func) {
+            (*iter_debug_func)(optimizer.values(), i);
+        }
+        if (std::abs(prev_error - curr_error) < 1e-6) {
+            std::cout << "Converged at iteration " << i << "\n";
+            break;
+        }
+    }
+    result_ = optimizer.values();
 }
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/backend.hh b/experimental/learn_descriptors/backend.hh
index 9d7d403d9..50a934699 100644
--- a/experimental/learn_descriptors/backend.hh
+++ b/experimental/learn_descriptors/backend.hh
@@ -1,7 +1,9 @@
 #pragma once
 
+#include <cstddef>
 #include <memory>
 #include <optional>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -27,26 +29,56 @@ class Backend {
     static constexpr char symbol_char_landmark = 'l';
     static constexpr char symbol_char_cam_cal = 'k';
 
-    void solve_graph();
-    typedef int epoch;
-    using graph_step_debug_func = std::function<void(const gtsam::Values &, const epoch)>;
-    void solve_graph(const int num_steps,
-                     std::optional<graph_step_debug_func> inter_debug_func = std::nullopt);
-
+    static std::optional<gtsam::Point3> attempt_triangulate(
+        const std::vector<gtsam::Pose3> &cam_poses, const std::vector<gtsam::Point2> &cam_obs,
+        gtsam::Cal3_S2::shared_ptr K, const double max_reproj_error = 2.0,
+        const bool verbose = true);
     static gtsam::Rot3 average_rotations(const std::vector<gtsam::Rot3> &rotations,
                                          int max_iter = 10);
     // use rotation averaging to set the rotation initial guess for each frame in the world frame.
     // the world frame will be the first frame in the vector. deadreckon_incrementally won't
     // optimize if true
     static void populate_rotation_estimate(std::vector<Frame> &frames);
+    // use rotation averaging to set the rotation initial guess for each frame in the world frame.
+    // the world frame will be the first frame in the vector. deadreckon_incrementally won't
+    // optimize if true
+    static void populate_rotation_estimate(std::vector<SharedFrame> &shared_frames);
 
+    void add_frames(std::vector<SharedFrame> &shared_frames) {
+        shared_frames_.reserve(shared_frames_.size() + shared_frames.size());
+        shared_frames_.insert(shared_frames_.end(), shared_frames.begin(), shared_frames.end());
+    };
+    void calculate_initial_values();
+    void populate_graph(const FeatureTracks &feature_tracks);
+    void solve_graph();
+    typedef int epoch;
+    using graph_step_debug_func = std::function<void(const gtsam::Values &, const epoch)>;
+    void solve_graph(const int num_epochs,
+                     std::optional<graph_step_debug_func> iter_debug_func = std::nullopt);
+    // void imshow_keypoints(const size_t img_id, bool viz = true, bool viz_all = false);
+
+    const std::vector<SharedFrame> &shared_frames() const { return shared_frames_; };
     const gtsam::Values &current_initial_values() const { return initial_estimate_; };
     const gtsam::Values &result() const { return result_; };
 
    private:
+    std::vector<SharedFrame> shared_frames_;
+
     gtsam::Values initial_estimate_;
     gtsam::Values result_;
     gtsam::NonlinearFactorGraph graph_;
+
+    std::unordered_map<size_t, gtsam::Pose3> world_from_cam_initial_estimates_;
+    std::unordered_map<size_t, gtsam::Point3> lmk_initial_estimates_;
+
+    gtsam::noiseModel::Diagonal::shared_ptr noise_tight_prior = gtsam::noiseModel::Diagonal::Sigmas(
+        (gtsam::Vector(6) << 0, 0, 0,  // rotation stdev in radians
+         1e-6, 1e-6, 1e-6              // translation stdev in meters
+         )
+            .finished());
+    gtsam::Vector3 gps_sigmas_fallback{0.5, 0.5, 0.5};
+    gtsam::noiseModel::Isotropic::shared_ptr landmark_noise_ =
+        gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
 };
 using Landmark = gtsam::Point3;
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/feature_manager.hh b/experimental/learn_descriptors/feature_manager.hh
deleted file mode 100644
index d11fb8132..000000000
--- a/experimental/learn_descriptors/feature_manager.hh
+++ /dev/null
@@ -1,49 +0,0 @@
-#pragma once
-#include <optional>
-#include <sstream>
-#include <stdexcept>
-#include <unordered_map>
-#include <vector>
-
-#include "experimental/learn_descriptors/feature_set.hh"
-#include "gtsam/inference/Symbol.h"
-#include "opencv2/opencv.hpp"
-
-namespace robot::experimental::learn_descriptors {
-class FeatureManager {
-   public:
-    void append_img_data(const std::vector<cv::KeyPoint> &kpts, const cv::Mat &descriptors) {
-        feature_sets_.push_back(FeatureSet(kpts, descriptors));
-    };
-    void insert_symbol(const size_t idx_img, const cv::KeyPoint &kpt, const gtsam::Symbol &symbol) {
-        feature_sets_[idx_img].insert_symbol(kpt, symbol);
-        if (symbols_to_idx_img_.find(symbol) != symbols_to_idx_img_.end()) {
-            symbols_to_idx_img_.at(symbol).push_back(idx_img);
-        } else {
-            symbols_to_idx_img_.emplace(symbol, std::vector<size_t>{idx_img});
-        }
-    };
-
-    std::optional<gtsam::Symbol> get_symbol(const size_t idx_img, const cv::KeyPoint &kpt) {
-        return feature_sets_[idx_img].get_symbol(kpt);
-    };
-
-    size_t get_num_images_added() { return feature_sets_.size(); };
-
-    std::unordered_map<gtsam::Symbol, std::vector<size_t>> get_added_symbols() {
-        return symbols_to_idx_img_;
-    };
-
-    bool get_idxs_for_symbol(const gtsam::Symbol &symbol, std::vector<size_t> &out_cam_idxs) {
-        if (symbols_to_idx_img_.find(symbol) != symbols_to_idx_img_.end()) {
-            out_cam_idxs = symbols_to_idx_img_.at(symbol);
-            return true;
-        }
-        return false;
-    }
-
-   private:
-    std::vector<FeatureSet> feature_sets_;
-    std::unordered_map<gtsam::Symbol, std::vector<size_t>> symbols_to_idx_img_;
-};
-}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/feature_manager_test.cc b/experimental/learn_descriptors/feature_manager_test.cc
deleted file mode 100644
index b258f558a..000000000
--- a/experimental/learn_descriptors/feature_manager_test.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-#include "experimental/learn_descriptors/feature_manager.hh"
-
-#include "Eigen/Dense"
-#include "experimental/learn_descriptors/spatial_test_scene_cube.hh"
-#include "gtest/gtest.h"
-#include "gtsam/geometry/Cal3_S2.h"
-#include "gtsam/geometry/PinholeCamera.h"
-#include "gtsam/geometry/Point3.h"
-#include "gtsam/geometry/Pose3.h"
-#include "gtsam/geometry/Rot3.h"
-#include "opencv2/opencv.hpp"
-
-namespace robot::experimental::learn_descriptors {
-TEST(feature_manager_test, test) {
-    FeatureManager feature_manager;
-    SpatialTestSceneCube test_cube(1.f);
-
-    const size_t img_width = 640;
-    const size_t img_height = 480;
-    const double fx = 500.0;
-    const double fy = fx;
-    const double cx = img_width / 2.0;
-    const double cy = img_height / 2.0;
-
-    gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy));
-
-    Eigen::Matrix3d rotation0(
-        Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
-        Eigen::AngleAxis(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
-    gtsam::Pose3 pose0(gtsam::Rot3(rotation0), gtsam::Point3(4, 0, 0));
-    gtsam::PinholeCamera<gtsam::Cal3_S2> camera0(pose0, *K);
-
-    gtsam::Pose3 pose1(
-        gtsam::Rot3(Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
-                    rotation0),
-        gtsam::Point3(0, 4, 0));
-    gtsam::PinholeCamera<gtsam::Cal3_S2> camera1(pose1, *K);
-
-    gtsam::Pose3 pose2(
-        gtsam::Rot3(Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
-                    rotation0),
-        gtsam::Point3(0, 4, 0));
-    gtsam::PinholeCamera<gtsam::Cal3_S2> camera2(pose1, *K);
-
-    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras{camera0, camera1};
-
-    // std::vector<SpatialTestScene::ProjectedPoint> projected_pixels =
-    //     test_cube.get_projected_pixels(camera0, cv::Size(img_width, img_height));
-    // cv::Mat image(480, 640, CV_8UC3, cv::Scalar(255, 255, 255));
-    // for (const SpatialTestScene::ProjectedPoint &projected_pt : projected_pixels) {
-    //     const Eigen::Vector2d &pt = projected_pt.pixel;
-    //     cv::circle(image, cv::Point2i(static_cast<int>(pt[0]), static_cast<int>(pt[1])), 4,
-    //                cv::Scalar(0, 0, 0));
-    // }
-    // cv::imshow("projected cubes", image);
-    // cv::waitKey(0);
-
-    // std::vector<std::pair<gtsam::PinholeCamera<gtsam::Cal3_S2>, cv::Size>> cams{
-    //     std::pair<gtsam::PinholeCamera<gtsam::Cal3_S2>, cv::Size>(camera0,
-    //                                                               cv::Size(img_width,
-    //                                                               img_height)),
-    //     std::pair<gtsam::PinholeCamera<gtsam::Cal3_S2>, cv::Size>(camera1,
-    //                                                               cv::Size(img_width,
-    //                                                               img_height))};
-    // std::vector<std::pair<SpatialTestScene::ProjectedPoint, SpatialTestScene::ProjectedPoint>>
-    //     correspondences = test_cube.get_corresponding_pixels(cams);
-}
-}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/feature_set.hh b/experimental/learn_descriptors/feature_set.hh
deleted file mode 100644
index 24db3ee0e..000000000
--- a/experimental/learn_descriptors/feature_set.hh
+++ /dev/null
@@ -1,78 +0,0 @@
-#pragma once
-#include <optional>
-#include <sstream>
-#include <stdexcept>
-#include <unordered_map>
-
-#include "gtsam/geometry/Pose3.h"
-#include "gtsam/inference/Symbol.h"
-#include "opencv2/opencv.hpp"
-
-namespace std {
-template <>
-struct hash<cv::KeyPoint> {
-    size_t operator()(const cv::KeyPoint &kp) const {
-        size_t h1 = hash<float>()(kp.pt.x);
-        size_t h2 = hash<float>()(kp.pt.y);
-        size_t h3 = hash<float>()(kp.size);
-        size_t h4 = hash<float>()(kp.angle);
-        size_t h5 = hash<float>()(kp.response);
-        size_t h6 = hash<int>()(kp.octave);
-        size_t h7 = hash<int>()(kp.class_id);
-
-        return (h1 ^ (h2 << 1)) ^ (h3 << 2) ^ (h4 << 3) ^ (h5 << 4) ^ (h6 << 5) ^ (h7 << 6);
-    }
-};
-template <>
-struct hash<gtsam::Symbol> {
-    size_t operator()(const gtsam::Symbol &s) const noexcept {
-        return std::hash<size_t>()(s.key());
-    }
-};
-}  // namespace std
-struct KeyPointEqual {
-    bool operator()(const cv::KeyPoint &kp1, const cv::KeyPoint &kp2) const {
-        return kp1.pt == kp2.pt && kp1.size == kp2.size && kp1.angle == kp2.angle &&
-               kp1.response == kp2.response && kp1.octave == kp2.octave &&
-               kp1.class_id == kp2.class_id;
-    }
-};
-
-namespace robot::experimental::learn_descriptors {
-class FeatureSet {
-   public:
-    struct keypoints_descriptors {
-        keypoints_descriptors(const std::vector<cv::KeyPoint> &keypoints,
-                              const cv::Mat &descriptors)
-            : kpts(keypoints), descriptors(descriptors) {}
-        std::vector<cv::KeyPoint> kpts;
-        cv::Mat descriptors;
-    };
-
-    FeatureSet(const std::vector<cv::KeyPoint> &kpts, const cv::Mat &descriptors)
-        : kpts_descriptors_(keypoints_descriptors(kpts, descriptors)){};
-    ~FeatureSet() = default;
-
-    void insert_symbol(const cv::KeyPoint &kpt, const gtsam::Symbol &symbol) {
-        if (kpt_to_symbol_.find(kpt) != kpt_to_symbol_.end()) {
-            std::stringstream error_msg;
-            error_msg << "Keypoint already has symbol! Keypoint has symbol: "
-                      << kpt_to_symbol_.at(kpt);
-            throw std::runtime_error(error_msg.str());
-        }
-        kpt_to_symbol_.emplace(kpt, symbol);
-    };
-    std::optional<gtsam::Symbol> get_symbol(const cv::KeyPoint &kpt) {
-        if (kpt_to_symbol_.find(kpt) != kpt_to_symbol_.end()) {
-            return kpt_to_symbol_.at(kpt);
-        }
-        return std::nullopt;
-    };
-
-   private:
-    keypoints_descriptors kpts_descriptors_;
-    std::unordered_map<cv::KeyPoint, gtsam::Symbol, std::hash<cv::KeyPoint>, KeyPointEqual>
-        kpt_to_symbol_;
-    std::unordered_map<gtsam::Symbol, cv::KeyPoint> symbol_to_kpt_;
-};
-}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frame.hh b/experimental/learn_descriptors/frame.hh
index 0d8390295..0c5535899 100644
--- a/experimental/learn_descriptors/frame.hh
+++ b/experimental/learn_descriptors/frame.hh
@@ -1,4 +1,5 @@
 #pragma once
+#include <memory>
 #include <optional>
 #include <sstream>
 #include <string>
@@ -21,9 +22,22 @@ class Frame {
         : id_(id),
           seq_(seq),
           undistorted_img_(img),
-          K_(std::move(K)),
+          K_(K),
           kpts_(kpts),
           descriptors_(descriptors) {}
+    Frame(const Frame& other)
+        : id_(other.id_),
+          seq_(other.seq_),
+          undistorted_img_(other.undistorted_img_.clone()),
+          K_(other.K_),
+          kpts_(other.kpts_),
+          descriptors_(other.descriptors_.clone()),
+          frame_from_other_frames_(other.frame_from_other_frames_),
+          world_from_cam_groundtruth_(other.world_from_cam_groundtruth_),
+          cam_in_world_initial_guess_(other.cam_in_world_initial_guess_),
+          cam_in_world_interpolated_guess_(other.cam_in_world_interpolated_guess_),
+          world_from_cam_initial_guess_(other.world_from_cam_initial_guess_),
+          translation_covariance_in_cam_(other.translation_covariance_in_cam_) {}
 
     std::optional<Eigen::Vector3d> velocity_to(const Frame& other_frame) const {
         ROBOT_CHECK(other_frame.seq_ >= seq_);
@@ -97,8 +111,11 @@ class Frame {
     std::unordered_map<FrameId, gtsam::Rot3>
         frame_from_other_frames_;  // map of relative rotation for this_frame_from_frame_[FrameId]
     std::optional<gtsam::Pose3> world_from_cam_groundtruth_;
-    std::optional<gtsam::Point3> cam_in_world_initial_guess_;
+    std::optional<gtsam::Point3>
+        cam_in_world_initial_guess_;  // intended for data from sensors (GPS)
+    std::optional<gtsam::Point3> cam_in_world_interpolated_guess_;
     std::optional<gtsam::Rot3> world_from_cam_initial_guess_;
     std::optional<gtsam::Matrix3> translation_covariance_in_cam_;
 };
+using SharedFrame = std::shared_ptr<Frame>;
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc
index e42fedb39..c6dffd66b 100644
--- a/experimental/learn_descriptors/frontend.cc
+++ b/experimental/learn_descriptors/frontend.cc
@@ -168,9 +168,9 @@ Frontend::Frontend(FrontendParams params_) : params_(params_) {
 }
 
 void Frontend::populate_frames(bool verbose) {
-    FrameId id = frames_.size();
+    FrameId id = shared_frames_.size();
     for (const ImageAndPoint &img_and_pt : images_and_points_) {
-        const std::shared_ptr<ImagePoint> shared_img_pt = img_and_pt.shared_img_pt;
+        const std::shared_ptr<ImagePoint> &shared_img_pt = img_and_pt.shared_img_pt;
         if (verbose) {
             std::cout << shared_img_pt->to_string() << std::endl;
         }
@@ -205,17 +205,23 @@ void Frontend::populate_frames(bool verbose) {
         if (maybe_translation_covariance_in_cam) {
             frame.translation_covariance_in_cam_ = *maybe_translation_covariance_in_cam;
         }
-        frames_.push_back(frame);
+        if (id == 0) {
+            std::cout << (frame.world_from_cam_groundtruth_ ? "first frame has grnd"
+                                                            : "first frame doesn't have grnd")
+                      << std::endl;
+        }
+        shared_frames_.push_back(std::make_shared<Frame>(frame));
         id++;
     }
+    interpolate_frames();
 }
 
 void Frontend::match_frames_and_build_tracks() {
     if (params_.exhaustive) {
-        for (size_t i = 0; i < frames_.size() - 1; i++) {
-            for (size_t j = i + 1; j < frames_.size(); j++) {
-                std::vector<cv::DMatch> matches =
-                    compute_matches(frames_[i].descriptors(), frames_[j].descriptors());
+        for (size_t i = 0; i < shared_frames_.size() - 1; i++) {
+            for (size_t j = i + 1; j < shared_frames_.size(); j++) {
+                std::vector<cv::DMatch> matches = compute_matches(shared_frames_[i]->descriptors(),
+                                                                  shared_frames_[j]->descriptors());
                 enforce_bijective_buffer_matches(matches);
                 if (matches.size() < 5) {
                     continue;
@@ -223,12 +229,12 @@ void Frontend::match_frames_and_build_tracks() {
 
                 std::vector<cv::KeyPoint> cv_kpts_1;
                 std::vector<cv::KeyPoint> cv_kpts_2;
-                for (const KeypointCV &kpt : frames_[i].keypoint()) {
+                for (const KeypointCV &kpt : shared_frames_[i]->keypoint()) {
                     cv::KeyPoint cv_kpt;
                     cv_kpt.pt = kpt;
                     cv_kpts_1.push_back(cv_kpt);
                 }
-                for (const KeypointCV &kpt : frames_[j].keypoint()) {
+                for (const KeypointCV &kpt : shared_frames_[j]->keypoint()) {
                     cv::KeyPoint cv_kpt;
                     cv_kpt.pt = kpt;
                     cv_kpts_2.push_back(cv_kpt);
@@ -244,40 +250,34 @@ void Frontend::match_frames_and_build_tracks() {
                 if (!scale_cam0_from_cam1) {
                     continue;
                 }
-                // ROBOT_CHECK(frames_[i].world_from_cam_initial_guess_,
-                //             "This rotation should be populated.");
-                // this could use some work to verify the quality of the output, particularly inside
-                // of estiamte_cam0_from_cam1
-                // also, at the moment I am not accounting for any covariance between the gps
-                // measurement and the unit translation vector from estimate_cam0_from_cam1
-
-                frames_[i].frame_from_other_frames_.emplace(
+
+                shared_frames_[i]->frame_from_other_frames_.emplace(
                     j, gtsam::Rot3(scale_cam0_from_cam1->linear().matrix()));
                 for (const cv::DMatch match : matches) {
-                    const KeypointCV kpt_cam0 = frames_[i].keypoint()[match.queryIdx];
-                    const KeypointCV kpt_cam1 = frames_[j].keypoint()[match.trainIdx];
+                    const KeypointCV kpt_cam0 = shared_frames_[i]->keypoint()[match.queryIdx];
+                    const KeypointCV kpt_cam1 = shared_frames_[j]->keypoint()[match.trainIdx];
 
-                    auto key = std::make_pair(frames_[i].id_, kpt_cam0);
+                    auto key = std::make_pair(shared_frames_[i]->id_, kpt_cam0);
                     if (lmk_id_map_.find(key) != lmk_id_map_.end()) {
                         const size_t id = lmk_id_map_.at(key);
                         ROBOT_CHECK(id < feature_tracks_.size(),
                                     "lmk_id_map_ id's shuold not exceed feature_tracks_ size!");
-                        feature_tracks_[id].obs_.emplace_back(frames_[i].id_, kpt_cam0);
-                        feature_tracks_[id].obs_.emplace_back(frames_[j].id_, kpt_cam1);
+                        feature_tracks_[id].obs_.emplace_back(shared_frames_[i]->id_, kpt_cam0);
+                        feature_tracks_[id].obs_.emplace_back(shared_frames_[j]->id_, kpt_cam1);
                     } else {
                         FeatureTrack feature_track(i, kpt_cam0);
-                        feature_track.obs_.emplace_back(frames_[j].id_, kpt_cam1);
+                        feature_track.obs_.emplace_back(shared_frames_[j]->id_, kpt_cam1);
                         feature_tracks_.push_back(feature_track);
-                        lmk_id_map_.emplace(std::make_pair(frames_[i].id_, kpt_cam0),
+                        lmk_id_map_.emplace(std::make_pair(shared_frames_[i]->id_, kpt_cam0),
                                             feature_tracks_.size() - 1);
                     }
                 }
             }
         }
     } else {  // successive only
-        for (size_t i = 0; i < frames_.size() - 1; i++) {
-            std::vector<cv::DMatch> matches =
-                compute_matches(frames_[i].descriptors(), frames_[i + 1].descriptors());
+        for (size_t i = 0; i < shared_frames_.size() - 1; i++) {
+            std::vector<cv::DMatch> matches = compute_matches(shared_frames_[i]->descriptors(),
+                                                              shared_frames_[i + 1]->descriptors());
             enforce_bijective_buffer_matches(matches);
             if (matches.size() < 5) {
                 continue;
@@ -285,12 +285,12 @@ void Frontend::match_frames_and_build_tracks() {
 
             std::vector<cv::KeyPoint> cv_kpts_1;
             std::vector<cv::KeyPoint> cv_kpts_2;
-            for (const KeypointCV &kpt : frames_[i].keypoint()) {
+            for (const KeypointCV &kpt : shared_frames_[i]->keypoint()) {
                 cv::KeyPoint cv_kpt;
                 cv_kpt.pt = kpt;
                 cv_kpts_1.push_back(cv_kpt);
             }
-            for (const KeypointCV &kpt : frames_[i + 1].keypoint()) {
+            for (const KeypointCV &kpt : shared_frames_[i + 1]->keypoint()) {
                 cv::KeyPoint cv_kpt;
                 cv_kpt.pt = kpt;
                 cv_kpts_2.push_back(cv_kpt);
@@ -301,28 +301,28 @@ void Frontend::match_frames_and_build_tracks() {
             if (!scale_cam0_from_cam1) {
                 continue;
             }
-            ROBOT_CHECK(frames_[i].world_from_cam_initial_guess_,
+            ROBOT_CHECK(shared_frames_[i]->world_from_cam_initial_guess_,
                         "This rotation should be populated.");
-            frames_[i + 1].world_from_cam_initial_guess_.emplace(
-                frames_[i].world_from_cam_initial_guess_->matrix() *
+            shared_frames_[i + 1]->world_from_cam_initial_guess_.emplace(
+                shared_frames_[i]->world_from_cam_initial_guess_->matrix() *
                 scale_cam0_from_cam1->linear().matrix());
 
             for (const cv::DMatch match : matches) {
-                const KeypointCV kpt_cam0 = frames_[i].keypoint()[match.queryIdx];
-                const KeypointCV kpt_cam1 = frames_[i + 1].keypoint()[match.trainIdx];
+                const KeypointCV kpt_cam0 = shared_frames_[i]->keypoint()[match.queryIdx];
+                const KeypointCV kpt_cam1 = shared_frames_[i + 1]->keypoint()[match.trainIdx];
 
-                auto key = std::make_pair(frames_[i].id_, kpt_cam0);
+                auto key = std::make_pair(shared_frames_[i]->id_, kpt_cam0);
                 if (lmk_id_map_.find(key) != lmk_id_map_.end()) {
                     const size_t id = lmk_id_map_.at(key);
                     ROBOT_CHECK(id < feature_tracks_.size(),
                                 "lmk_id_map_ id's shuold not exceed feature_tracks_ size!");
-                    feature_tracks_[id].obs_.emplace_back(frames_[i].id_, kpt_cam0);
-                    feature_tracks_[id].obs_.emplace_back(frames_[i + 1].id_, kpt_cam1);
+                    feature_tracks_[id].obs_.emplace_back(shared_frames_[i]->id_, kpt_cam0);
+                    feature_tracks_[id].obs_.emplace_back(shared_frames_[i + 1]->id_, kpt_cam1);
                 } else {
                     FeatureTrack feature_track(i, kpt_cam0);
-                    feature_track.obs_.emplace_back(frames_[i + 1].id_, kpt_cam1);
+                    feature_track.obs_.emplace_back(shared_frames_[i + 1]->id_, kpt_cam1);
                     feature_tracks_.push_back(feature_track);
-                    lmk_id_map_.emplace(std::make_pair(frames_[i].id_, kpt_cam0),
+                    lmk_id_map_.emplace(std::make_pair(shared_frames_[i]->id_, kpt_cam0),
                                         feature_tracks_.size() - 1);
                 }
             }
@@ -331,48 +331,42 @@ void Frontend::match_frames_and_build_tracks() {
     std::cout << "done processing matches" << std::endl;
 }
 
-std::optional<std::vector<Eigen::Vector3d>> Frontend::interpolated_initial_translations() {
+void Frontend::interpolate_frames() {
     std::vector<Frame *> frames_with_guess;
-    for (Frame &frame :
-         frames_) {  // this assumes that frames are ordered by seq (time), later on may have to
-                     // think about this more if trying to run pipeline on unordered images or sets
-                     // whose capture times are on different days
-        if (frame.cam_in_world_initial_guess_) {
-            frames_with_guess.push_back(&frame);
+    for (SharedFrame &shared_frame : shared_frames_) {  // this assumes that frames are ordered by
+                                                        // seq (time), later on may have to
+        // think about this more if trying to run pipeline on unordered images or sets
+        // whose capture times are on different days
+        if (shared_frame->cam_in_world_initial_guess_) {
+            frames_with_guess.push_back(&(*shared_frame));
         }
     }
-    if (frames_with_guess.size() < 2) return std::nullopt;
-    std::vector<Eigen::Vector3d> interpolated_init_guesses;
-    interpolated_init_guesses.reserve(frames_.size());
+    if (frames_with_guess.size() < 2) return;
     size_t idx_guess = 0;
-
-    for (const Frame &frame : frames_) {
+    for (SharedFrame &shared_frame : shared_frames_) {
         Eigen::Vector3d interpolated;
 
         // advance to next guess window if needed
         while (idx_guess + 1 < frames_with_guess.size() - 1 &&
-               frame.seq_ > frames_with_guess[idx_guess + 1]->seq_) {
+               shared_frame->seq_ > frames_with_guess[idx_guess + 1]->seq_) {
             idx_guess++;
         }
 
-        if (frame.cam_in_world_initial_guess_) {
-            interpolated = *frame.cam_in_world_initial_guess_;
+        if (shared_frame->cam_in_world_initial_guess_) {
+            interpolated = *shared_frame->cam_in_world_initial_guess_;
         } else {
             Eigen::Vector3d v_a_to_b =
                 *frames_with_guess[idx_guess]->velocity_to(*frames_with_guess[idx_guess + 1]);
-            double dt = (frame.seq_ < frames_with_guess[idx_guess]->seq_ ? -1.0 : 1.0) * 1e-9 *
-                        (frames_with_guess[idx_guess]->seq_ > frame.seq_
-                             ? frames_with_guess[idx_guess]->seq_ - frame.seq_
-                             : frame.seq_ - frames_with_guess[idx_guess]->seq_);
+            double dt = (shared_frame->seq_ < frames_with_guess[idx_guess]->seq_ ? -1.0 : 1.0) *
+                        1e-9 *
+                        (frames_with_guess[idx_guess]->seq_ > shared_frame->seq_
+                             ? frames_with_guess[idx_guess]->seq_ - shared_frame->seq_
+                             : shared_frame->seq_ - frames_with_guess[idx_guess]->seq_);
             interpolated = *frames_with_guess[idx_guess]->cam_in_world_initial_guess_ +
                            v_a_to_b * dt;  // add sign back to dt
-            std::cout << "interpolated frame " << frame.id_ << ": " << interpolated << std::endl;
-            std::cout << "\tdt: " << dt << std::endl;
-            std::cout << "\tv_a_to_b: " << v_a_to_b << std::endl;
         }
-        interpolated_init_guesses.push_back(interpolated);
+        shared_frame->cam_in_world_interpolated_guess_ = interpolated;
     }
-    return interpolated_init_guesses;
 }
 
 std::pair<std::vector<cv::KeyPoint>, cv::Mat> Frontend::extract_features(const cv::Mat &img) const {
diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh
index ce2f5c43d..e381d8089 100644
--- a/experimental/learn_descriptors/frontend.hh
+++ b/experimental/learn_descriptors/frontend.hh
@@ -34,7 +34,7 @@ class Frontend {
 
     void populate_frames(bool verbose = false);
     void match_frames_and_build_tracks();
-    std::optional<std::vector<Eigen::Vector3d>> interpolated_initial_translations();
+    void interpolate_frames();
 
     void add_image(const ImageAndPoint &img_and_pt) { images_and_points_.push_back(img_and_pt); };
     void add_images(const std::vector<ImageAndPoint> &img_and_pts) {
@@ -47,7 +47,7 @@ class Frontend {
     const FrontendParams::MatcherType &matcher_type() const { return params_.matcher_type; };
     const FeatureTracks &feature_tracks() const { return feature_tracks_; };
     const FrameLandmarkIdMap &frame_landmark_id_map() const { return lmk_id_map_; };
-    std::vector<Frame> &frames() { return frames_; };
+    std::vector<SharedFrame> &frames() { return shared_frames_; };
 
     std::pair<std::vector<cv::KeyPoint>, cv::Mat> extract_features(const cv::Mat &img) const;
     std::vector<cv::DMatch> compute_matches(const cv::Mat &descriptors1,
@@ -81,6 +81,6 @@ class Frontend {
     std::vector<ImageAndPoint> images_and_points_;
     FeatureTracks feature_tracks_;
     FrameLandmarkIdMap lmk_id_map_;
-    std::vector<Frame> frames_;
+    std::vector<SharedFrame> shared_frames_;
 };
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/gps_data.hh b/experimental/learn_descriptors/gps_data.hh
index 34f89b9b3..52d900be1 100644
--- a/experimental/learn_descriptors/gps_data.hh
+++ b/experimental/learn_descriptors/gps_data.hh
@@ -3,14 +3,16 @@
 #include <cmath>
 #include <cstddef>
 #include <optional>
+#include <sstream>
+#include <string>
 
 #include "Eigen/Core"
 
 namespace robot::experimental::learn_descriptors {
 struct GPSData {
     struct Uncertainty {
-        double sigma_latitude_deg;
-        double sigma_longitude_deg;
+        double sigma_latitude_m;
+        double sigma_longitude_m;
         double sigma_altitude_m;
         double orientation_deg;
         double rms_range_error_m;
@@ -18,57 +20,45 @@ struct GPSData {
         // diagonal latitude, longitude, altitude covariance in meters squared
         Eigen::Matrix3d to_LLA_covariance() const {
             Eigen::Matrix3d cov = Eigen::Matrix3d::Zero();
-            cov(0, 0) = sigma_latitude_deg * sigma_latitude_deg;
-            cov(1, 1) = sigma_longitude_deg * sigma_longitude_deg;
+            cov(0, 0) = sigma_latitude_m * sigma_latitude_m;
+            cov(1, 1) = sigma_longitude_m * sigma_longitude_m;
             cov(2, 2) = sigma_altitude_m * sigma_altitude_m;
             return cov;
         }
 
-        // Convert lat/lon covariance to ENU at given latitude
-        Eigen::Matrix3d to_ENU_covariance(double lat_deg) const {
-            // –– Named conversion/constants ––
+        // Convert lat/lon covariance to ENU
+        Eigen::Matrix3d to_ENU_covariance() const {
             static constexpr double DEG2RAD = M_PI / 180.0;
-            static constexpr double ORIENTATION_OFFSET_DEG =
-                90.0;  // for CW‐from‐North → CCW‐from‐East
+            static constexpr double ORIENT_OFFSET_DEG = 90.0;  // CW‑from‑North → CCW‑from‑East
 
-            // WGS‑84 approximate “meters per degree” polynomial coefficients
-            static constexpr double MERIDIONAL_A0 = 111132.92;
-            static constexpr double MERIDIONAL_A2 = -559.82;
-            static constexpr double MERIDIONAL_A4 = 1.175;
-            static constexpr double PARALLEL_B0 = 111412.84;
-            static constexpr double PARALLEL_B2 = -93.5;
+            // 1) build the 2×2 variance ellipse in metres²
+            Eigen::Matrix2d D;
+            D << sigma_latitude_m * sigma_latitude_m, 0.0, 0.0,
+                sigma_longitude_m * sigma_longitude_m;
 
-            // 1) get LLA covariance in degrees²
-            Eigen::Matrix3d lla_cov = to_LLA_covariance();
-
-            // 2) convert latitude to radians
-            double phi = lat_deg * DEG2RAD;
-
-            // 3) compute meters/degree at this latitude
-            double kLat = MERIDIONAL_A0 + MERIDIONAL_A2 * std::cos(2.0 * phi) +
-                          MERIDIONAL_A4 * std::cos(4.0 * phi);
-            double kLon = PARALLEL_B0 * std::cos(phi) + PARALLEL_B2 * std::cos(3.0 * phi);
-
-            // 4) build the Jacobian for degree→meter scaling
-            Eigen::Matrix2d J;
-            J << kLat, 0.0, 0.0, kLon;
-
-            // 5) scale the 2×2 lat/lon covariance into meters²
-            Eigen::Matrix2d D = lla_cov.topLeftCorner<2, 2>();
-            Eigen::Matrix2d cov_m = J * D * J.transpose();
-
-            // 6) rotate that ellipse into ENU axes (θ = CCW from East)
-            double theta = (ORIENTATION_OFFSET_DEG - orientation_deg) * DEG2RAD;
+            // 2) rotate that into ENU axes
+            double theta = (ORIENT_OFFSET_DEG - orientation_deg) * DEG2RAD;
             Eigen::Matrix2d R;
             R << std::cos(theta), -std::sin(theta), std::sin(theta), std::cos(theta);
-            Eigen::Matrix2d horz_cov = R * cov_m * R.transpose();
 
-            // 7) assemble full 3×3 ENU covariance
-            Eigen::Matrix3d enu_cov = Eigen::Matrix3d::Zero();
-            enu_cov.topLeftCorner<2, 2>() = horz_cov;
-            enu_cov(2, 2) = lla_cov(2, 2);  // altitude variance already in m²
+            Eigen::Matrix2d horz = R * D * R.transpose();
+
+            // 3) assemble full 3×3 ENU covariance
+            Eigen::Matrix3d enu = Eigen::Matrix3d::Zero();
+            enu.topLeftCorner<2, 2>() = horz;
+            enu(2, 2) = sigma_altitude_m * sigma_altitude_m;
+            return enu;
+        }
 
-            return enu_cov;
+        std::string to_string() const {
+            std::stringstream ss;
+            ss << "Uncertainty:";
+            ss << "\n\tsigma_lat_deg: " << sigma_latitude_m;
+            ss << "\n\tsigma_lon_deg: " << sigma_longitude_m;
+            ss << "\n\tsigma_altitude_m: " << sigma_altitude_m;
+            ss << "\n\torientation_deg: " << orientation_deg;
+            ss << "\n\trms_range_error_m: " << rms_range_error_m;
+            return ss.str();
         }
     };
     size_t seq;  // time in nanoseconds, may differ from image capturetime
@@ -76,5 +66,14 @@ struct GPSData {
     double longitude;
     std::optional<double> altitude;  // meters above sea level
     std::optional<Uncertainty> uncertainty;
+    std::string to_string() const {
+        std::stringstream ss;
+        ss << "GPS Data: ";
+        ss << "\n\tseq: " << seq;
+        ss << "\n\tlla: " << latitude << ", " << longitude << ", "
+           << (altitude ? std::to_string(*altitude) : std::string("N/A"));
+        ss << "\n" << (uncertainty ? uncertainty->to_string() : "Uncertainty: N/A");
+        return ss.str();
+    }
 };
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/image_point.hh b/experimental/learn_descriptors/image_point.hh
index f5b9a681b..b9fae431c 100644
--- a/experimental/learn_descriptors/image_point.hh
+++ b/experimental/learn_descriptors/image_point.hh
@@ -68,4 +68,5 @@ struct ImagePoint {
    private:
     std::optional<Eigen::Vector3d> cam_in_world_;
 };
+using SharedImagePoint = std::shared_ptr<ImagePoint>;
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/image_point_four_seasons.cc b/experimental/learn_descriptors/image_point_four_seasons.cc
index 6cdec9d5f..1d5643abd 100644
--- a/experimental/learn_descriptors/image_point_four_seasons.cc
+++ b/experimental/learn_descriptors/image_point_four_seasons.cc
@@ -45,8 +45,8 @@ std::optional<Eigen::Matrix3d> ImagePointFourSeasons::translation_covariance_in_
 
 std::optional<Eigen::Matrix3d> ImagePointFourSeasons::gps_covariance_in_world() const {
     if (!gps_gcs || !gps_gcs->uncertainty) return std::nullopt;
-    const Eigen::Matrix3d ENU_covariance(
-        gps_gcs->uncertainty->to_ENU_covariance(gps_gcs->latitude));
+    const Eigen::Matrix3d ENU_covariance(gps_gcs->uncertainty->to_ENU_covariance());
+    std::cout << "image point " << id << " ENU_covariance: " << ENU_covariance << std::endl;
     const Eigen::Matrix3d w_from_ENU(shared_static_transforms->w_from_gpsw.so3().matrix());
     const Eigen::Matrix3d w_covariance(w_from_ENU * ENU_covariance * w_from_ENU.transpose());
     return w_covariance;
diff --git a/experimental/learn_descriptors/image_point_four_seasons.hh b/experimental/learn_descriptors/image_point_four_seasons.hh
index 368fe8351..95401d885 100644
--- a/experimental/learn_descriptors/image_point_four_seasons.hh
+++ b/experimental/learn_descriptors/image_point_four_seasons.hh
@@ -72,26 +72,7 @@ struct ImagePointFourSeasons : ImagePoint {
         } else {
             ss << "N/A";
         }
-        ss << "\n\tgps_gcs: ";
-        if (gps_gcs) {
-            ss << "\n\t\tseq: " << gps_gcs->seq;
-            ss << "\n\t\tval: " << gps_gcs->latitude << "\t" << gps_gcs->longitude << "\t";
-            if (gps_gcs->altitude) {
-                ss << *(gps_gcs->altitude);
-            } else {
-                ss << "alt N/A";
-            }
-            ss << "\n\t\tsigma: ";
-            if (gps_gcs->uncertainty) {
-                ss << gps_gcs->uncertainty->sigma_latitude_deg << "\t"
-                   << gps_gcs->uncertainty->sigma_longitude_deg << "\t"
-                   << gps_gcs->uncertainty->sigma_altitude_m;
-            } else {
-                ss << "N/A";
-            }
-        } else {
-            ss << "N/A";
-        }
+        ss << "\n" << (gps_gcs ? gps_gcs->to_string() : "GPS Data: N/A");
         return ss.str();
     }
 
diff --git a/experimental/learn_descriptors/refactor_sfm_mvp.cc b/experimental/learn_descriptors/refactor_sfm_mvp.cc
index ef87991f1..054c8a197 100644
--- a/experimental/learn_descriptors/refactor_sfm_mvp.cc
+++ b/experimental/learn_descriptors/refactor_sfm_mvp.cc
@@ -207,42 +207,20 @@ int main(int argc, const char **argv) {
                           true, false};
     Frontend frontend(params);
 
-    std::vector<size_t> idx_img_pts;
     for (size_t i = 633; i < 646; i += 2) {
         frontend.add_image(
             ImageAndPoint{parser.load_image(i),
                           std::make_shared<ImagePointFourSeasons>(parser.get_image_point(i))});
-        idx_img_pts.push_back(i);
+        const ImagePointFourSeasons img_pt = parser.get_image_point(i);
+        std::cout << img_pt.to_string() << std::endl;
     }
-    // for (size_t i = 581; i < 750; i += 5) {
-    //     frontend.add_image(
-    //         ImageAndPoint{parser.load_image(i),
-    //                       std::make_shared<ImagePointFourSeasons>(parser.get_image_point(i))});
-    //     idx_img_pts.push_back(i);
-    // }
     frontend.populate_frames();
     frontend.match_frames_and_build_tracks();
 
-    std::cout << "first frame translation: " << *frontend.frames()[0].cam_in_world_initial_guess_
-              << std::endl;
-    std::cout << "ground truth translation: "
-              << frontend.frames()[0].world_from_cam_groundtruth_->translation() << std::endl;
-
+    // visualization
     std::vector<robot::visualization::VizPose> viz_poses_init;
-    std::optional<Eigen::Vector3d> first_point;
     std::optional<Eigen::Isometry3d> first_grnd_trth;
-    (void)first_point;
     for (const Frame &frame : frontend.frames()) {
-        // if (frame.cam_in_world_initial_guess_) {
-        //     Eigen::Isometry3d world_from_cam_init;
-        //     world_from_cam_init.linear() = frame.world_from_cam_initial_guess_->matrix();
-        //     if (!first_point) first_point = *frame.cam_in_world_initial_guess_;
-        //     world_from_cam_init.translation() = *frame.cam_in_world_initial_guess_ -
-        //     *first_point; std::cout << "world_from_cam_init: " <<
-        //     world_from_cam_init.matrix() << std::endl; viz_poses_init.emplace_back(
-        //         world_from_cam_init, gtsam::Symbol(Frontend::symbol_pose_char,
-        //         frame.id_).string());
-        // }
         if (frame.world_from_cam_groundtruth_) {
             std::cout << "adding a ground truth frame to viz!" << std::endl;
             Eigen::Isometry3d w_from_cam_grnd_trth(frame.world_from_cam_groundtruth_->matrix());
@@ -280,34 +258,13 @@ int main(int argc, const char **argv) {
     frames[0].world_from_cam_initial_guess_ =
         frames[0].world_from_cam_groundtruth_->rotation();  // make sure first idx has grnd trth
 
-    // // let's examine the rotations to the first thingy
-    // std::vector<robot::visualization::VizPose> viz_5_pt_rot;
-    // for (const auto &[frame_id, rotation] : frames[0].frame_from_other_frames_) {
-    //     viz_5_pt_rot.emplace_back(
-    //         Eigen::Isometry3d(gtsam::Pose3(rotation, gtsam::Point3()).matrix()),
-    //         "rot_to_" + std::to_string(frame_id));
-    // }
-    // robot::visualization::viz_scene(viz_5_pt_rot, std::vector<robot::visualization::VizPoint>(),
-    //                                 cv::viz::Color::brown(), true, true,
-    //                                 "Frontend initial guesses");
-
     Backend::populate_rotation_estimate(frames);
     std::vector<gtsam::Symbol> symbols_pose;
     std::unordered_map<size_t, gtsam::Pose3> world_from_cam_initial_guess;
     std::optional<std::vector<Eigen::Vector3d>> interpolated_cam_in_w =
         frontend.interpolated_initial_translations();
     ROBOT_CHECK(interpolated_cam_in_w, interpolated_cam_in_w);
-    // std::vector<robot::visualization::VizPoint> viz_pts_interpolated;
-    // for (size_t i = 0; i < interpolated_cam_in_w->size(); i++) {
-    //     viz_pts_interpolated.emplace_back(
-    //         (*interpolated_cam_in_w)[i] - interpolated_cam_in_w->front(),
-    //         "pt_" + std::to_string(i));
-    //     std::cout << "pt_" + std::to_string(i) << ": " << (*interpolated_cam_in_w)[i] <<
-    //     std::endl;
-    // }
-    // robot::visualization::viz_scene(std::vector<robot::visualization::VizPose>(),
-    //                                 viz_pts_interpolated, cv::viz::Color::brown(), true, true,
-    //                                 "Initial guesses in backend");
+
     Eigen::Vector3d cam0_in_w = interpolated_cam_in_w->front();
     std::optional<Eigen::Vector3d> grndtrth_cam0_in_w;
     std::vector<size_t> idx_grndtrth_frame;
@@ -361,11 +318,11 @@ int main(int argc, const char **argv) {
                           << std::endl;
             }
             gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas(
-                // frame.translation_covariance_in_cam_
-                false ? gtsam::Vector3(std::sqrt((*frame.translation_covariance_in_cam_)(0, 0)),
-                                       std::sqrt((*frame.translation_covariance_in_cam_)(1, 1)),
-                                       std::sqrt((*frame.translation_covariance_in_cam_)(2, 2)))
-                      : gps_sigmas_fallback);
+                frame.translation_covariance_in_cam_
+                    ? gtsam::Vector3(std::sqrt((*frame.translation_covariance_in_cam_)(0, 0)),
+                                     std::sqrt((*frame.translation_covariance_in_cam_)(1, 1)),
+                                     std::sqrt((*frame.translation_covariance_in_cam_)(2, 2)))
+                    : gps_sigmas_fallback);
             graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise));
             std::cout << "adding gps factor!" << std::endl;
         }
@@ -412,7 +369,6 @@ int main(int argc, const char **argv) {
     // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference
     double sum_traj_error = 0;
     double sum_rot_error = 0;
-    // for (size_t i = 0; i < idx_grndtrth_frame.size(); i++) {
     for (const size_t i_grndtrth_frame : idx_grndtrth_frame) {
         const Frame &frame = frames[i_grndtrth_frame];
         const gtsam::Pose3 traj_pose =
@@ -429,6 +385,5 @@ int main(int argc, const char **argv) {
     std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl;
 
     std::cout << "about to visualize result" << std::endl;
-    // result.print();
     graph_values(result, "Result", symbols_pose, symbols_lmks);
 }
\ No newline at end of file
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index c0c39d2a8..a749b6098 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -1,160 +1,82 @@
 #include "experimental/learn_descriptors/structure_from_motion.hh"
 
 #include <filesystem>
+#include <memory>
 #include <sstream>
 #include <stdexcept>
 
+#include "common/check.hh"
 #include "common/geometry/camera.hh"
 #include "common/geometry/translate_types.hh"
+#include "experimental/learn_descriptors/backend.hh"
+#include "experimental/learn_descriptors/frame.hh"
+#include "experimental/learn_descriptors/frontend.hh"
 #include "gtsam/geometry/Point2.h"
+#include "opencv2/opencv.hpp"
+#include "opencv2/viz.hpp"
 #include "visualization/opencv/opencv_viz.hh"
 
-namespace fs = std::filesystem;
-
-namespace geom = robot::geometry;
-
 namespace robot::experimental::learn_descriptors {
-
-const Eigen::Isometry3d StructureFromMotion::T_symlake_boat_cam = []() {
-    Eigen::Isometry3d transform = Eigen::Isometry3d::Identity();
-    transform.linear() = (Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d(0, 1, 0)) *
-                          Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d(0, 0, 1)))
-                             .toRotationMatrix();
-    return transform;
-}();
-
-std::string pose_to_string(gtsam::Pose3 pose) {
-    std::stringstream ss;
-    ss << pose;
-    return ss.str();
-};
-
-const gtsam::Pose3 StructureFromMotion::default_initial_pose(
-    StructureFromMotion::T_symlake_boat_cam.matrix());
-
-StructureFromMotion::StructureFromMotion(Frontend::ExtractorType frontend_extractor,
-                                         gtsam::Cal3_S2 K, Eigen::Matrix<double, 5, 1> D,
-                                         gtsam::Pose3 initial_pose,
-                                         Frontend::MatcherType frontend_matcher)
-    : feature_manager_(std::make_shared<FeatureManager>()),
-      initial_pose_(initial_pose),
-      frontend_(frontend_extractor, frontend_matcher),
-      backend_(boost::make_shared<gtsam::Cal3_S2>(K)) {
-    // frontend_ = Frontend(frontend_extractor, frontend_matcher);
-    // backend_ = Backend(K);
-    // backend_ = Backend(feature_manager_, K);
-
-    K_ = (cv::Mat_<double>(3, 3) << K.fx(), 0, K.px(), 0, K.fy(), K.py(), 0, 0, 1);
-    D_ = (cv::Mat_<double>(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0));
-
-    set_initial_pose(initial_pose);
-}
-
-void StructureFromMotion::set_initial_pose(gtsam::Pose3 initial_pose) {
-    backend_.add_prior_factor(gtsam::Symbol(Backend::symbol_char_pose, 0), initial_pose,
-                              gtsam::noiseModel::Isotropic::Sigma(6, 0));
+void StructureFromMotion::add_image_point(const cv::Mat &img_undistorted,
+                                          const SharedImagePoint &image_point) {
+    frontend_.add_image(ImageAndPoint{img_undistorted, image_point});
 }
-
-void StructureFromMotion::add_image(const cv::Mat &img, const gtsam::Pose3 &T_world_cam) {
-    cv::Mat img_undistorted;
-    cv::undistort(img, img_undistorted, K_, D_);
-    std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_and_descriptors =
-        frontend_.extract_features(img);
-    feature_manager_->append_img_data(keypoints_and_descriptors.first,
-                                      keypoints_and_descriptors.second);
-    keypoint_to_landmarks_.push_back(std::unordered_map<cv::KeyPoint, Backend::Landmark>());
-    const size_t idx_img_current = get_num_images_added();
-    // const size_t idx_img_current = feature_manager_->get_num_images_added() - 1;
-    std::cout << "current index " << idx_img_current << std::endl;
-    if (idx_img_current > 0) {
-        std::vector<cv::DMatch> matches =
-            compute_matches(img_keypoints_and_descriptors_.back().second,
-                            keypoints_and_descriptors.second, Frontend::enforce_bijective_matches);
-
-        const gtsam::Symbol sym_T_w_c0(Backend::symbol_char_pose, idx_img_current - 1);
-        const gtsam::Symbol sym_T_w_c1(Backend::symbol_char_pose, idx_img_current);
-
-        backend_.add_factor_GPS(sym_T_w_c1, T_world_cam.translation(), backend_.get_gps_noise(),
-                                T_world_cam.rotation());
-
-        for (const cv::DMatch match : matches) {
-            const cv::KeyPoint kpt_cam0 =
-                img_keypoints_and_descriptors_.back().first[match.queryIdx];
-            const cv::KeyPoint kpt_cam1 = keypoints_and_descriptors.first[match.trainIdx];
-
-            size_t idx;
-            unsigned char chr;
-
-            auto maybe_symbol0 = feature_manager_->get_symbol(idx_img_current - 1, kpt_cam0);
-            if (maybe_symbol0) {
-                idx = (*maybe_symbol0).index();
-                chr = (*maybe_symbol0).chr();
-            } else {
-                gtsam::Symbol symbol_temp =
-                    gtsam::Symbol(Backend::symbol_char_landmark, landmark_count_);
-                idx = symbol_temp.index();
-                chr = symbol_temp.chr();
-                landmark_count_++;
-                feature_manager_->insert_symbol(idx_img_current - 1, kpt_cam0, symbol_temp);
-            }
-            gtsam::Symbol symbol_lmk = gtsam::Symbol(chr, idx);
-            std::cout << "value: char: " << chr << " , idx: " << idx << ". " << symbol_lmk
-                      << std::endl;
-            feature_manager_->insert_symbol(idx_img_current, kpt_cam1, symbol_lmk);
-
-            const Backend::Landmark landmark_cam_0(
-                symbol_lmk, sym_T_w_c0,
-                gtsam::Point2(
-                    static_cast<double>(
-                        img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.x),
-                    static_cast<double>(
-                        img_keypoints_and_descriptors_.back().first[match.queryIdx].pt.y)),
-                backend_.get_K(), 3.0);
-
-            const Backend::Landmark landmark_cam_1(
-                symbol_lmk, sym_T_w_c1,
-                gtsam::Point2(
-                    static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.x),
-                    static_cast<double>(keypoints_and_descriptors.first[match.trainIdx].pt.y)),
-                backend_.get_K(), 3.0);
-
-            backend_.add_landmark(landmark_cam_0);
-            backend_.add_landmark(landmark_cam_1);
-        }
+void StructureFromMotion::add_image_points(
+    const std::vector<cv::Mat> &imgs_undistorted,
+    const std::vector<SharedImagePoint> &shared_image_points) {
+    ROBOT_CHECK(imgs_undistorted.size() == shared_image_points.size());
+    std::vector<ImageAndPoint> img_and_pts;
+    img_and_pts.reserve(shared_image_points.size());
+    for (size_t i = 0; i < shared_image_points.size(); i++) {
+        img_and_pts.emplace_back(imgs_undistorted[i], shared_image_points[i]);
     }
-    img_keypoints_and_descriptors_.push_back(keypoints_and_descriptors);
+    frontend_.add_images(img_and_pts);
 }
 
-std::vector<cv::DMatch> StructureFromMotion::compute_matches(
-    const cv::Mat &descriptors_1, const cv::Mat &descriptors_2,
-    std::optional<StructureFromMotion::match_function> post_process_func) {
-    std::vector<cv::DMatch> matches = frontend_.compute_matches(descriptors_1, descriptors_2);
-    if (post_process_func) {
-        (*post_process_func)(matches);
-    }
-    matches_.push_back(matches);
-    return matches;
+void StructureFromMotion::solve_structure(
+    const int num_steps, std::optional<Backend::graph_step_debug_func> iter_debug_func) {
+    // FRONTEND
+    frontend_.populate_frames();
+    ROBOT_CHECK(frontend_.frames().size() > 2);
+    frontend_.match_frames_and_build_tracks();
+    ROBOT_CHECK(frontend_.frames().front()->world_from_cam_groundtruth_,
+                "Assuming that our first frame has groundtruth for now.",
+                *frontend_.frames().front()->world_from_cam_groundtruth_);
+    // align rotation via the initial guess
+    std::cout << "sfm heartbeat" << std::endl;
+    frontend_.frames().front()->world_from_cam_initial_guess_ =
+        frontend_.frames().front()->world_from_cam_groundtruth_->rotation();
+    std::cout << "sfm heartbeat 2" << std::endl;
+    std::cout << "sfm heartbeat 3" << std::endl;
+    std::cout << "sfm heartbeat 4" << std::endl;
+
+    // BACKEND
+    backend_.add_frames(frontend_.frames());
+    std::cout << "backend_.shared_frames.size(): " << backend_.shared_frames().size() << std::endl;
+    std::cout << "sfm heartbeat 5" << std::endl;
+    backend_.calculate_initial_values();
+    std::cout << "backend.initial_values.size(): " << backend_.current_initial_values().size()
+              << std::endl;
+    std::cout << "sfm heartbeat 6" << std::endl;
+    backend_.solve_graph(10, iter_debug_func);
+    std::cout << "sfm heartbeat 7" << std::endl;
 }
 
-void StructureFromMotion::graph_values(const gtsam::Values &values,
-                                       const std::string &window_name) {
-    std::vector<Eigen::Isometry3d> final_poses;
-    std::vector<Eigen::Vector3d> final_lmks;
-    for (size_t i = 0; i < feature_manager_->get_num_images_added(); i++) {
-        final_poses.emplace_back(
-            values.at<gtsam::Pose3>(gtsam::Symbol(get_backend().symbol_char_pose, i)).matrix());
+void StructureFromMotion::graph_values(const gtsam::Values &values, const std::string &window_name,
+                                       const cv::viz::Color color) {
+    std::vector<robot::visualization::VizPose> final_poses;
+    std::vector<robot::visualization::VizPoint> final_lmks;
+    for (const auto &key_value : values) {
+        gtsam::Key key = key_value.key;
+        gtsam::Symbol symbol(key);
+        if (values.exists<gtsam::Pose3>(key)) {
+            const gtsam::Pose3 &pose = values.at<gtsam::Pose3>(key);
+            final_poses.emplace_back(Eigen::Isometry3d(pose.matrix()), symbol.string());
+        } else if (values.exists<gtsam::Point3>(key)) {
+            const gtsam::Point3 &pt = values.at<gtsam::Point3>(key);
+            final_lmks.emplace_back(pt, symbol.string());
+        }
     }
-    // for (const gtsam::Symbol &lmk_symbol : feature_manager_->get_added_symbols()) {
-    //     if (!values.exists(lmk_symbol)) {
-    //         std::cout << "WTF " << lmk_symbol << std::endl;
-    //     }
-    //     final_lmks.emplace_back(values.at<gtsam::Point3>(lmk_symbol));
-    // }
-    geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::black(), true, true, window_name);
-}
-
-void StructureFromMotion::solve_structure(
-    const int num_steps, std::optional<Backend::graph_step_debug_func> inter_debug_func) {
-    backend_.solve_graph(num_steps, inter_debug_func);
+    robot::visualization::viz_scene(final_poses, final_lmks, color, true, true, window_name);
 }
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh
index 90b6bcfbd..93ffe0ddb 100644
--- a/experimental/learn_descriptors/structure_from_motion.hh
+++ b/experimental/learn_descriptors/structure_from_motion.hh
@@ -5,65 +5,43 @@
 #include <memory>
 #include <optional>
 #include <set>
+#include <string>
 #include <unordered_map>
 #include <utility>
 
 #include "Eigen/Dense"
 #include "common/geometry/camera.hh"
 #include "experimental/learn_descriptors/backend.hh"
-#include "experimental/learn_descriptors/feature_manager.hh"
+#include "experimental/learn_descriptors/frame.hh"
 #include "experimental/learn_descriptors/frontend.hh"
+#include "experimental/learn_descriptors/image_point.hh"
 #include "gtsam/geometry/Pose3.h"
 #include "gtsam/inference/Symbol.h"
 #include "gtsam/nonlinear/Values.h"
 #include "opencv2/opencv.hpp"
+#include "opencv2/viz.hpp"
 
 namespace robot::experimental::learn_descriptors {
 class StructureFromMotion {
    public:
-    static const Eigen::Isometry3d T_symlake_boat_cam;
-    static const gtsam::Pose3 default_initial_pose;
-    /**
-     * @param D is vector (5x1) of the distortion coefficients (k1, k2, p1, p2, k3)
-     */
-    StructureFromMotion(Frontend::ExtractorType frontend_extractor, gtsam::Cal3_S2 K,
-                        Eigen::Matrix<double, 5, 1> D,
-                        gtsam::Pose3 initial_pose = default_initial_pose,
-                        Frontend::MatcherType frontend_matcher = Frontend::MatcherType::KNN);
-    ~StructureFromMotion(){};
+    StructureFromMotion(FrontendParams frontend_params) : frontend_(Frontend(frontend_params)){};
 
-    void set_initial_pose(gtsam::Pose3 initial_pose);
-    void add_image(const cv::Mat &img, const gtsam::Pose3 &T_world_cam);
-    void solve_structure() { backend_.solve_graph(); };
+    static void graph_values(const gtsam::Values &values,
+                             const std::string &window_name = "Viz Structure",
+                             const cv::viz::Color color = cv::viz::Color::brown());
+
+    void add_image_point(const cv::Mat &img_undistorted, const SharedImagePoint &image_point);
+    void add_image_points(const std::vector<cv::Mat> &imgs_undistorted,
+                          const std::vector<SharedImagePoint> &image_points);
     void solve_structure(
         const int num_steps,
-        std::optional<Backend::graph_step_debug_func> inter_debug_func = std::nullopt);
-    const gtsam::Values &get_structure_result() { return backend_.get_result(); };
-    using match_function = std::function<void(std::vector<cv::DMatch> &)>;
-    std::vector<cv::DMatch> compute_matches(
-        const cv::Mat &descriptors_1, const cv::Mat &descriptors_2,
-        std::optional<match_function> post_process_func = std::nullopt);
-    void graph_values(const gtsam::Values &values, const std::string &window_name = "graph values");
+        std::optional<Backend::graph_step_debug_func> iter_debug_func = std::nullopt);
+    const gtsam::Values &result() { return backend_.result(); };
 
-    Frontend get_frontend() { return frontend_; };
-    Backend get_backend() { return backend_; }
-    size_t get_num_images_added() { return img_keypoints_and_descriptors_.size(); };
-    size_t get_landmark_count() { return landmark_count_; };
-    const std::vector<std::vector<cv::DMatch>> compute_matches() { return matches_; };
+    Frontend frontend() { return frontend_; };
+    Backend backend() { return backend_; }
 
    private:
-    std::shared_ptr<FeatureManager> feature_manager_;
-
-    gtsam::Pose3 initial_pose_;
-    std::vector<std::pair<std::vector<cv::KeyPoint>, cv::Mat>> img_keypoints_and_descriptors_;
-    std::vector<std::vector<cv::DMatch>> matches_;
-    std::vector<std::vector<Backend::Landmark>> landmarks_;
-    std::vector<std::unordered_map<cv::KeyPoint, Backend::Landmark>> keypoint_to_landmarks_;
-    size_t landmark_count_ = 0;
-
-    cv::Mat K_;
-    cv::Mat D_;
-
     Frontend frontend_;
     Backend backend_;
 };
diff --git a/experimental/learn_descriptors/structure_from_motion_example.cc b/experimental/learn_descriptors/structure_from_motion_example.cc
new file mode 100644
index 000000000..2be249af3
--- /dev/null
+++ b/experimental/learn_descriptors/structure_from_motion_example.cc
@@ -0,0 +1,75 @@
+#include <cstddef>
+#include <functional>
+#include <iostream>
+#include <memory>
+
+#include "Eigen/Core"
+#include "Eigen/Geometry"
+#include "cxxopts.hpp"
+#include "experimental/learn_descriptors/backend.hh"
+#include "experimental/learn_descriptors/four_seasons_parser.hh"
+#include "experimental/learn_descriptors/frame.hh"
+#include "experimental/learn_descriptors/frontend.hh"
+#include "experimental/learn_descriptors/frontend_definitions.hh"
+#include "experimental/learn_descriptors/image_point_four_seasons.hh"
+#include "experimental/learn_descriptors/structure_from_motion.hh"
+#include "experimental/learn_descriptors/structure_from_motion_types.hh"
+#include "opencv2/opencv.hpp"
+#include "visualization/opencv/opencv_viz.hh"
+
+int main(int argc, const char **argv) {
+    using namespace robot::experimental::learn_descriptors;
+    // clang-format off
+    cxxopts::Options options("four_seasons_parser_example", "Demonstrate usage of four_seasons_parser");
+    options.add_options()
+    ("data_dir", "Path to dataset root directory", cxxopts::value<std::string>())
+    ("calibration_dir", "Path to dataset calibration directory", cxxopts::value<std::string>())
+    ("help", "Print usage");
+    // clang-format on
+
+    auto args = options.parse(argc, argv);
+
+    const auto check_required = [&](const std::string &opt) {
+        if (args.count(opt) == 0) {
+            std::cout << "Missing " << opt << " argument" << std::endl;
+            std::cout << options.help() << std::endl;
+            std::exit(1);
+        }
+    };
+
+    if (args.count("help")) {
+        std::cout << options.help() << std::endl;
+        return 0;
+    }
+
+    check_required("data_dir");
+    check_required("calibration_dir");
+
+    const std::filesystem::path path_data = args["data_dir"].as<std::string>();
+    const std::filesystem::path path_calibration = args["calibration_dir"].as<std::string>();
+    FourSeasonsParser parser(path_data, path_calibration);
+
+    FrontendParams frontend_params{FrontendParams::ExtractorType::SIFT,
+                                   FrontendParams::MatcherType::KNN, true, false};
+    StructureFromMotion sfm(frontend_params);
+
+    for (size_t i = 633; i < 646; i += 2) {
+        const ImagePointFourSeasons img_pt = parser.get_image_point(i);
+        sfm.add_image_point(parser.load_image(i), std::make_shared<ImagePointFourSeasons>(img_pt));
+        std::cout << img_pt.to_string() << std::endl;
+    }
+
+    std::cout << "heartbeat" << std::endl;
+    using epoch = size_t;
+    Backend::graph_step_debug_func graph_itr_debug_func = [&](const gtsam::Values &vals,
+                                                              const epoch iter) {
+        std::cout << "iteration " << iter << " complete!";
+        std::string window_name = "Iteration_" + std::to_string(iter);
+        StructureFromMotion::graph_values(vals, window_name);
+    };
+    // sfm.solve_structure(10, graph_itr_debug_func);
+    sfm.solve_structure(10);
+    std::cout << "heartbeat 2" << std::endl;
+    StructureFromMotion::graph_values(sfm.result());
+    std::cout << "heartbeat 3" << std::endl;
+}
\ No newline at end of file

From 7c53d8ef6fac345381536b49ee958216082d946d Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Wed, 23 Jul 2025 15:57:49 -0400
Subject: [PATCH 39/45] WIP. SFM works on short to medium sequences. Likely
 need to do some sort of incremental initialization and incremental
 optimization(s)

---
 experimental/learn_descriptors/backend.cc     | 97 ++++++++++++++-----
 experimental/learn_descriptors/backend.hh     | 20 ++--
 experimental/learn_descriptors/frontend.cc    |  8 +-
 .../image_point_four_seasons.cc               |  1 -
 .../structure_from_motion.cc                  | 25 ++---
 .../structure_from_motion_example.cc          | 12 ++-
 6 files changed, 107 insertions(+), 56 deletions(-)

diff --git a/experimental/learn_descriptors/backend.cc b/experimental/learn_descriptors/backend.cc
index 682c29ec3..225aadda5 100644
--- a/experimental/learn_descriptors/backend.cc
+++ b/experimental/learn_descriptors/backend.cc
@@ -278,46 +278,83 @@ void Backend::populate_rotation_estimate(std::vector<SharedFrame>& shared_frames
     }
 }
 
-void Backend::calculate_initial_values() { populate_rotation_estimate(shared_frames_); }
+void Backend::calculate_initial_values(bool interpolate_gps) {
+    populate_rotation_estimate(shared_frames_);
+    if (interpolate_gps) {
+        std::vector<SharedFrame> frames_with_gps;
+        for (SharedFrame& shared_frame : shared_frames_) {  // this assumes that frames are ordered
+                                                            // by seq (time), later on may have to
+            // think about this more if trying to run pipeline on unordered images or sets
+            // whose capture times are on different days
+            if (shared_frame->cam_in_world_initial_guess_) {
+                frames_with_gps.push_back(shared_frame);
+            }
+        }
+        if (frames_with_gps.size() < 2) return;
+        size_t idx_gps = 0;
+        for (SharedFrame& shared_frame : shared_frames_) {
+            // advance to next guess window if needed
+            while (idx_gps + 1 < frames_with_gps.size() - 1 &&
+                   shared_frame->seq_ > frames_with_gps[idx_gps + 1]->seq_) {
+                idx_gps++;
+            }
+
+            if (shared_frame->cam_in_world_initial_guess_) {
+                continue;
+            }
+            ROBOT_CHECK(
+                shared_frame->cam_in_world_interpolated_guess_,
+                "Currently assuming that the frontend populated interpolated initial guesses");
+            Eigen::Matrix3d interpolated_covariance(
+                *shared_frame->translation_covariance_in_cam_ *
+                100.0);  // very jank for now, not true interpolation
+            shared_frame->translation_covariance_in_cam_ = interpolated_covariance;
+            shared_frame->cam_in_world_initial_guess_ =
+                shared_frame->cam_in_world_interpolated_guess_;
+        }
+    }
+}
 
 void Backend::populate_graph(const FeatureTracks& feature_tracks) {
     ROBOT_CHECK(shared_frames_.size() > 3);
-    ROBOT_CHECK(shared_frames_[0]->world_from_cam_groundtruth_,
+    ROBOT_CHECK(shared_frames_.front()->world_from_cam_groundtruth_,
                 "We're assuming the first camera has groundtruth for now");
 
-    shared_frames_[0]->world_from_cam_initial_guess_ =
-        shared_frames_[0]->world_from_cam_groundtruth_->rotation();  // align rotation
+    shared_frames_.front()->world_from_cam_initial_guess_ =
+        shared_frames_.front()
+            ->world_from_cam_groundtruth_->rotation();  // align rotation with groundtruth
 
     Backend::populate_rotation_estimate(shared_frames_);
 
     // add first cam as prior
     Eigen::Vector3d cam0_in_w = *shared_frames_.front()->cam_in_world_interpolated_guess_;
-    const gtsam::Pose3 w_from_cam0_init_estimate(*shared_frames_[0]->world_from_cam_initial_guess_,
-                                                 gtsam::Point3());
+    const gtsam::Pose3 w_from_cam0_init_estimate(
+        *shared_frames_.front()->world_from_cam_initial_guess_, gtsam::Point3{0, 0, 0});
     const gtsam::Symbol symbol_cam0(symbol_char_pose, 0);
     graph_.add(gtsam::PriorFactor<gtsam::Pose3>(
         symbol_cam0, w_from_cam0_init_estimate,
-        noise_tight_prior));  // currently assuming that we have groundtruth on the first pose
+        noise_tight_prior_));  // currently assuming that we have groundtruth on the first pose
     initial_estimate_.insert(symbol_cam0, w_from_cam0_init_estimate);
 
-    // add gps factors
+    // add gps factors and populate initial pose values
     for (size_t i = 1; i < shared_frames_.size(); i++) {
         const Frame& frame = *shared_frames_[i];
         gtsam::Pose3 world_from_cam_estimate(
             frame.world_from_cam_initial_guess_ ? *frame.world_from_cam_initial_guess_
                                                 : gtsam::Rot3::Identity(),
             *shared_frames_[i]->cam_in_world_interpolated_guess_ - cam0_in_w);
-        world_from_cam_initial_estimates_[frame.id_] = world_from_cam_estimate;
-        const gtsam::Symbol cam_symbol(symbol_char_pose, frame.id_);
-        initial_estimate_.insert(cam_symbol, world_from_cam_estimate);
         world_from_cam_initial_estimates_.emplace(frame.id_, world_from_cam_estimate);
+
+        const gtsam::Symbol cam_symbol(symbol_char_pose, frame.id_);
+        initial_estimate_.insert(cam_symbol, world_from_cam_initial_estimates_[frame.id_]);
         if (frame.cam_in_world_initial_guess_) {
             gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas(
                 frame.translation_covariance_in_cam_
                     ? gtsam::Vector3(std::sqrt((*frame.translation_covariance_in_cam_)(0, 0)),
                                      std::sqrt((*frame.translation_covariance_in_cam_)(1, 1)),
                                      std::sqrt((*frame.translation_covariance_in_cam_)(2, 2)))
-                    : gps_sigmas_fallback);
+                    : gps_sigmas_fallback_);
+            std::cout << "gps noise " << i << gps_noise->sigmas() << std::endl;
             graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise));
         }
     }
@@ -338,7 +375,8 @@ void Backend::populate_graph(const FeatureTracks& feature_tracks) {
             for (const auto& [frame_id, keypoint_cv] : feature_tracks[i].obs_) {
                 graph_.add(gtsam::GenericProjectionFactor(
                     gtsam::Point2(keypoint_cv.x, keypoint_cv.y), landmark_noise_,
-                    gtsam::Symbol(symbol_char_pose, frame_id), lmk_symbol, shared_frames_[0]->K_));
+                    gtsam::Symbol(symbol_char_pose, frame_id), lmk_symbol,
+                    shared_frames_[0]->K_));  // all K are the same for now...
             }
             initial_estimate_.insert(lmk_symbol, *landmark_estimate);
             lmk_initial_estimates_.emplace(i, *landmark_estimate);
@@ -346,11 +384,6 @@ void Backend::populate_graph(const FeatureTracks& feature_tracks) {
     }
 }
 
-void Backend::solve_graph() {
-    gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_);
-    result_ = optimizer.optimize();
-}
-
 void Backend::solve_graph(const int num_epochs,
                           std::optional<Backend::graph_step_debug_func> iter_debug_func) {
     gtsam::LevenbergMarquardtParams params;
@@ -359,18 +392,36 @@ void Backend::solve_graph(const int num_epochs,
     gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, params);
 
     double prev_error = optimizer.error();
+    double lowest_error = prev_error;
+    gtsam::Values best_values = optimizer.values();
     for (int i = 0; i < num_epochs; i++) {
         optimizer.iterate();
         double curr_error = optimizer.error();
+        if (curr_error < lowest_error) {
+            lowest_error = curr_error;
+            best_values = optimizer.values();
+        }
 
         if (iter_debug_func) {
             (*iter_debug_func)(optimizer.values(), i);
         }
-        if (std::abs(prev_error - curr_error) < 1e-6) {
-            std::cout << "Converged at iteration " << i << "\n";
-            break;
-        }
+        // if (std::abs(prev_error - curr_error) < 1e-6) {
+        //     std::cout << "Converged at iteration " << i << "\n";
+        //     break;
+        // }
     }
-    result_ = optimizer.values();
+    result_ = best_values;
+}
+
+void Backend::clear() {
+    shared_frames_.clear();
+    shared_frames_.shrink_to_fit();
+
+    initial_estimate_ = gtsam::Values();
+    result_ = gtsam::Values();
+    graph_ = gtsam::NonlinearFactorGraph();
+
+    std::unordered_map<size_t, gtsam::Pose3>().swap(world_from_cam_initial_estimates_);
+    std::unordered_map<size_t, gtsam::Point3>().swap(lmk_initial_estimates_);
 }
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/backend.hh b/experimental/learn_descriptors/backend.hh
index 50a934699..5b5dc66ed 100644
--- a/experimental/learn_descriptors/backend.hh
+++ b/experimental/learn_descriptors/backend.hh
@@ -48,14 +48,13 @@ class Backend {
         shared_frames_.reserve(shared_frames_.size() + shared_frames.size());
         shared_frames_.insert(shared_frames_.end(), shared_frames.begin(), shared_frames.end());
     };
-    void calculate_initial_values();
+    void calculate_initial_values(bool interpolate_gps = true);
     void populate_graph(const FeatureTracks &feature_tracks);
-    void solve_graph();
     typedef int epoch;
     using graph_step_debug_func = std::function<void(const gtsam::Values &, const epoch)>;
     void solve_graph(const int num_epochs,
                      std::optional<graph_step_debug_func> iter_debug_func = std::nullopt);
-    // void imshow_keypoints(const size_t img_id, bool viz = true, bool viz_all = false);
+    void clear();
 
     const std::vector<SharedFrame> &shared_frames() const { return shared_frames_; };
     const gtsam::Values &current_initial_values() const { return initial_estimate_; };
@@ -71,14 +70,15 @@ class Backend {
     std::unordered_map<size_t, gtsam::Pose3> world_from_cam_initial_estimates_;
     std::unordered_map<size_t, gtsam::Point3> lmk_initial_estimates_;
 
-    gtsam::noiseModel::Diagonal::shared_ptr noise_tight_prior = gtsam::noiseModel::Diagonal::Sigmas(
-        (gtsam::Vector(6) << 0, 0, 0,  // rotation stdev in radians
-         1e-6, 1e-6, 1e-6              // translation stdev in meters
-         )
-            .finished());
-    gtsam::Vector3 gps_sigmas_fallback{0.5, 0.5, 0.5};
+    gtsam::noiseModel::Diagonal::shared_ptr noise_tight_prior_ =
+        gtsam::noiseModel::Diagonal::Sigmas((gtsam::Vector(6) << 1e-3, 1e-3,
+                                             1e-3,             // rotation stdev in radians
+                                             1e-3, 1e-3, 1e-3  // translation stdev in meters
+                                             )
+                                                .finished());
+    gtsam::Vector3 gps_sigmas_fallback_{0.5, 0.5, 0.5};
     gtsam::noiseModel::Isotropic::shared_ptr landmark_noise_ =
-        gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
+        gtsam::noiseModel::Isotropic::Sigma(2, 10.0);
 };
 using Landmark = gtsam::Point3;
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc
index c6dffd66b..3a3b6af92 100644
--- a/experimental/learn_descriptors/frontend.cc
+++ b/experimental/learn_descriptors/frontend.cc
@@ -168,6 +168,9 @@ Frontend::Frontend(FrontendParams params_) : params_(params_) {
 }
 
 void Frontend::populate_frames(bool verbose) {
+    shared_frames_.clear();
+    shared_frames_.shrink_to_fit();
+    shared_frames_.reserve(images_and_points_.size());
     FrameId id = shared_frames_.size();
     for (const ImageAndPoint &img_and_pt : images_and_points_) {
         const std::shared_ptr<ImagePoint> &shared_img_pt = img_and_pt.shared_img_pt;
@@ -205,11 +208,6 @@ void Frontend::populate_frames(bool verbose) {
         if (maybe_translation_covariance_in_cam) {
             frame.translation_covariance_in_cam_ = *maybe_translation_covariance_in_cam;
         }
-        if (id == 0) {
-            std::cout << (frame.world_from_cam_groundtruth_ ? "first frame has grnd"
-                                                            : "first frame doesn't have grnd")
-                      << std::endl;
-        }
         shared_frames_.push_back(std::make_shared<Frame>(frame));
         id++;
     }
diff --git a/experimental/learn_descriptors/image_point_four_seasons.cc b/experimental/learn_descriptors/image_point_four_seasons.cc
index 1d5643abd..7c2829e94 100644
--- a/experimental/learn_descriptors/image_point_four_seasons.cc
+++ b/experimental/learn_descriptors/image_point_four_seasons.cc
@@ -46,7 +46,6 @@ std::optional<Eigen::Matrix3d> ImagePointFourSeasons::translation_covariance_in_
 std::optional<Eigen::Matrix3d> ImagePointFourSeasons::gps_covariance_in_world() const {
     if (!gps_gcs || !gps_gcs->uncertainty) return std::nullopt;
     const Eigen::Matrix3d ENU_covariance(gps_gcs->uncertainty->to_ENU_covariance());
-    std::cout << "image point " << id << " ENU_covariance: " << ENU_covariance << std::endl;
     const Eigen::Matrix3d w_from_ENU(shared_static_transforms->w_from_gpsw.so3().matrix());
     const Eigen::Matrix3d w_covariance(w_from_ENU * ENU_covariance * w_from_ENU.transpose());
     return w_covariance;
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index a749b6098..8afcba047 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -43,23 +43,16 @@ void StructureFromMotion::solve_structure(
                 "Assuming that our first frame has groundtruth for now.",
                 *frontend_.frames().front()->world_from_cam_groundtruth_);
     // align rotation via the initial guess
-    std::cout << "sfm heartbeat" << std::endl;
     frontend_.frames().front()->world_from_cam_initial_guess_ =
         frontend_.frames().front()->world_from_cam_groundtruth_->rotation();
-    std::cout << "sfm heartbeat 2" << std::endl;
-    std::cout << "sfm heartbeat 3" << std::endl;
-    std::cout << "sfm heartbeat 4" << std::endl;
 
     // BACKEND
+    backend_.clear();
     backend_.add_frames(frontend_.frames());
-    std::cout << "backend_.shared_frames.size(): " << backend_.shared_frames().size() << std::endl;
-    std::cout << "sfm heartbeat 5" << std::endl;
     backend_.calculate_initial_values();
-    std::cout << "backend.initial_values.size(): " << backend_.current_initial_values().size()
-              << std::endl;
-    std::cout << "sfm heartbeat 6" << std::endl;
+    backend_.populate_graph(frontend_.feature_tracks());
+    graph_values(backend_.current_initial_values(), "viz init values");
     backend_.solve_graph(10, iter_debug_func);
-    std::cout << "sfm heartbeat 7" << std::endl;
 }
 
 void StructureFromMotion::graph_values(const gtsam::Values &values, const std::string &window_name,
@@ -69,13 +62,21 @@ void StructureFromMotion::graph_values(const gtsam::Values &values, const std::s
     for (const auto &key_value : values) {
         gtsam::Key key = key_value.key;
         gtsam::Symbol symbol(key);
-        if (values.exists<gtsam::Pose3>(key)) {
+        try {
             const gtsam::Pose3 &pose = values.at<gtsam::Pose3>(key);
             final_poses.emplace_back(Eigen::Isometry3d(pose.matrix()), symbol.string());
-        } else if (values.exists<gtsam::Point3>(key)) {
+            continue;
+        } catch (const gtsam::ValuesIncorrectType &) {
+        }
+
+        try {
             const gtsam::Point3 &pt = values.at<gtsam::Point3>(key);
             final_lmks.emplace_back(pt, symbol.string());
+            continue;
+        } catch (const gtsam::ValuesIncorrectType &) {
         }
+
+        std::cerr << "Key " << symbol << " is neither Pose3 nor Point3.\n";
     }
     robot::visualization::viz_scene(final_poses, final_lmks, color, true, true, window_name);
 }
diff --git a/experimental/learn_descriptors/structure_from_motion_example.cc b/experimental/learn_descriptors/structure_from_motion_example.cc
index 2be249af3..95c45a650 100644
--- a/experimental/learn_descriptors/structure_from_motion_example.cc
+++ b/experimental/learn_descriptors/structure_from_motion_example.cc
@@ -53,13 +53,16 @@ int main(int argc, const char **argv) {
                                    FrontendParams::MatcherType::KNN, true, false};
     StructureFromMotion sfm(frontend_params);
 
-    for (size_t i = 633; i < 646; i += 2) {
+    // for (size_t i = 633; i < 1000; i += 1) {
+    //     const ImagePointFourSeasons img_pt = parser.get_image_point(i);
+    //     sfm.add_image_point(parser.load_image(i),
+    //     std::make_shared<ImagePointFourSeasons>(img_pt));
+    // }
+    for (size_t i = 835; i < 900; i += 4) {
         const ImagePointFourSeasons img_pt = parser.get_image_point(i);
         sfm.add_image_point(parser.load_image(i), std::make_shared<ImagePointFourSeasons>(img_pt));
-        std::cout << img_pt.to_string() << std::endl;
     }
 
-    std::cout << "heartbeat" << std::endl;
     using epoch = size_t;
     Backend::graph_step_debug_func graph_itr_debug_func = [&](const gtsam::Values &vals,
                                                               const epoch iter) {
@@ -67,9 +70,8 @@ int main(int argc, const char **argv) {
         std::string window_name = "Iteration_" + std::to_string(iter);
         StructureFromMotion::graph_values(vals, window_name);
     };
+    // passing graph_itr_debug_func will execute the function after each optimization epoch
     // sfm.solve_structure(10, graph_itr_debug_func);
     sfm.solve_structure(10);
-    std::cout << "heartbeat 2" << std::endl;
     StructureFromMotion::graph_values(sfm.result());
-    std::cout << "heartbeat 3" << std::endl;
 }
\ No newline at end of file

From 672efcc529891650d61f76b0e8b4e26c00acd19f Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Thu, 24 Jul 2025 11:50:37 -0400
Subject: [PATCH 40/45] clean up

---
 experimental/learn_descriptors/BUILD          |  84 --
 experimental/learn_descriptors/backend.cc     |  85 --
 experimental/learn_descriptors/backend.hh     |   7 +-
 .../get_colmap_groundtruth.cc                 |  82 --
 .../learn_descriptors/incremental_sfm_mvp.cc  | 793 ------------------
 experimental/learn_descriptors/map.hh         |   4 -
 experimental/learn_descriptors/map.proto      |  15 -
 .../learn_descriptors/refactor_sfm_mvp.cc     | 389 ---------
 experimental/learn_descriptors/sfm_mvp.cc     | 685 ---------------
 .../structure_from_motion_example.cc          |  10 +-
 10 files changed, 5 insertions(+), 2149 deletions(-)
 delete mode 100644 experimental/learn_descriptors/get_colmap_groundtruth.cc
 delete mode 100644 experimental/learn_descriptors/incremental_sfm_mvp.cc
 delete mode 100644 experimental/learn_descriptors/map.hh
 delete mode 100644 experimental/learn_descriptors/map.proto
 delete mode 100644 experimental/learn_descriptors/refactor_sfm_mvp.cc
 delete mode 100644 experimental/learn_descriptors/sfm_mvp.cc

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index 63a162d08..4a1fd3419 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -251,27 +251,6 @@ cc_library(
     ],
 )
 
-cc_test(
-    name = "sfm_mvp",
-    srcs = ["sfm_mvp.cc"],
-    copts = [
-        "-Wno-error=unused-parameter",
-        "-Wno-error=unused-variable",
-    ],
-    deps = [
-        ":frame",
-        ":frontend",
-        ":spatial_test_scene_cube",
-        ":structure_from_motion_types",
-        ":symphony_lake_parser",
-        "//visualization/opencv:opencv_viz",
-        "@com_google_googletest//:gtest_main",
-        "@eigen",
-        "@gtsam",
-        "@opencv",
-    ],
-)
-
 cc_library(
     name = "feature_set",
     hdrs = ["feature_set.hh"],
@@ -385,42 +364,6 @@ cc_test(
     ],
 )
 
-cc_binary(
-    name = "get_colmap_groundtruth",
-    srcs = ["get_colmap_groundtruth.cc"],
-    copts = [
-        "-Wno-error=unused-parameter",
-    ],
-    deps = [
-        ":symphony_lake_parser",
-    ],
-)
-
-cc_binary(
-    name = "incremental_sfm_mvp",
-    srcs = ["incremental_sfm_mvp.cc"],
-    copts = [
-        "-Wno-error=unused-parameter",
-        "-Wno-error=unused-function",
-    ],
-    deps = [
-        ":four_seasons_parser",
-        ":frame",
-        ":frontend",
-        ":spatial_test_scene_cube",
-        ":structure_from_motion_types",
-        "//common/geometry:camera",
-        "//common/gps:frame_translation",
-        "//visualization/opencv:opencv_viz",
-        "@boost//:smart_ptr",
-        "@cxxopts",
-        "@eigen",
-        "@gtsam",
-        "@opencv",
-        ":camera_calibration"
-    ],
-)
-
 cc_library(
     name = "camera_calibration",
     hdrs = ["camera_calibration.hh"],
@@ -430,33 +373,6 @@ cc_library(
     ]
 )
 
-cc_binary(
-    name = "refactor_sfm_mvp",
-    srcs = ["refactor_sfm_mvp.cc"],
-    copts = [
-        "-Wno-error=unused-parameter",
-        "-Wno-error=unused-function",
-    ],
-    deps = [
-        ":backend",
-        ":four_seasons_parser",
-        ":frame",
-        ":frontend",
-        ":image_point",
-        ":image_point_four_seasons",
-        ":spatial_test_scene_cube",
-        ":structure_from_motion_types",
-        "//common/geometry:camera",
-        "//visualization/opencv:opencv_viz",
-        "@boost//:smart_ptr",
-        "@cxxopts",
-        "@eigen",
-        "@gtsam",
-        "@opencv",
-        ":camera_calibration"
-    ],
-)
-
 cc_binary(
     name = "structure_from_motion_example",
     srcs = ["structure_from_motion_example.cc"],
diff --git a/experimental/learn_descriptors/backend.cc b/experimental/learn_descriptors/backend.cc
index 225aadda5..37a9c7a4d 100644
--- a/experimental/learn_descriptors/backend.cc
+++ b/experimental/learn_descriptors/backend.cc
@@ -109,91 +109,6 @@ gtsam::Rot3 average_rotations(const std::vector<gtsam::Rot3>& rotations, int max
     return mean;
 }
 
-void Backend::populate_rotation_estimate(std::vector<Frame>& frames) {
-    struct FrameVertex;
-    using SharedFrameVertex = std::shared_ptr<FrameVertex>;
-    struct FrameVertex {
-        // std::unique_ptr<Frame> frame;
-        Frame& frame;
-        std::optional<gtsam::Rot3> w_from_this;
-        std::unordered_map<FrameId, gtsam::Rot3>
-            neighbor_id_to_rot;  // rot is frame_from_neighbor_frame
-        std::unordered_set<SharedFrameVertex> parents;
-        std::unordered_set<SharedFrameVertex> children;
-    };
-    std::unordered_map<FrameId, SharedFrameVertex> frame_tree;
-    for (Frame& frame : frames) {
-        frame_tree.emplace(frame.id_, std::make_shared<FrameVertex>(frame));
-    }
-    // populate all frames and add children to neighbors
-    std::queue<size_t> q;  // indices of Frame in frames
-    std::unordered_set<FrameId> seen_frame_id;
-    q.push(frames.front().id_);
-
-    gtsam::NonlinearFactorGraph graph;
-    gtsam::Values initial;
-    // TODO: somehow get more informative/more robust noise values
-    // maybe: monte carlo sampling + reprojection, use essential matrix to guide covariance strength
-    gtsam::noiseModel::Diagonal::shared_ptr noise_rotation =
-        gtsam::noiseModel::Isotropic::Sigma(3, 0.1);  // radians
-
-    std::vector<gtsam::Symbol> symbols_frames;
-    while (!q.empty()) {
-        Frame frame = frames[q.front()];
-        q.pop();
-        seen_frame_id.insert(frame.id_);
-        SharedFrameVertex frame_vertex = frame_tree.at(frame.id_);
-        if (seen_frame_id.size() == 1) {  // make the first frame rotation identity unless the
-                                          // initial guess is set otherwise
-            frame_vertex->w_from_this = frame_vertex->frame.world_from_cam_initial_guess_
-                                            ? *frame_vertex->frame.world_from_cam_initial_guess_
-                                            : gtsam::Rot3::Identity();
-            graph.add(gtsam::PriorFactor<gtsam::Rot3>(
-                gtsam::Symbol(symbol_char_rotation, frame_vertex->frame.id_),
-                *frame_vertex->w_from_this, gtsam::noiseModel::Constrained::All(3)));
-        }
-        frame_vertex->frame.world_from_cam_initial_guess_ = frame_vertex->w_from_this;
-        std::cout << frame.to_string() << std::endl;
-
-        const gtsam::Symbol symbol_frame(symbol_char_rotation, frame.id_);
-        symbols_frames.push_back(symbol_frame);
-        initial.insert(symbol_frame, *frame.world_from_cam_initial_guess_);
-        for (const auto& [other_frame_id, frame_from_other_frame] :
-             frame.frame_from_other_frames_) {
-            if (seen_frame_id.find(other_frame_id) == seen_frame_id.end()) {
-                q.push(other_frame_id);
-                seen_frame_id.insert(other_frame_id);
-            }
-            frame_vertex->neighbor_id_to_rot.emplace(other_frame_id, frame_from_other_frame);
-            SharedFrameVertex other_frame_vertex = frame_tree.at(other_frame_id);
-            frame_vertex->children.insert(other_frame_vertex);
-            other_frame_vertex->parents.insert(frame_vertex);
-            other_frame_vertex->neighbor_id_to_rot.emplace(frame.id_,
-                                                           frame_from_other_frame.inverse());
-            if (!other_frame_vertex->w_from_this) {
-                other_frame_vertex->w_from_this =
-                    *frame_vertex->w_from_this * frame_from_other_frame;
-                other_frame_vertex->frame.world_from_cam_initial_guess_ =
-                    other_frame_vertex->w_from_this;
-            }
-
-            graph.add(gtsam::BetweenFactor<gtsam::Rot3>(
-                symbol_frame, gtsam::Symbol(symbol_char_rotation, other_frame_id),
-                frame_from_other_frame.inverse(), noise_rotation));
-        }
-        frame_tree.emplace(frame.id_, frame_vertex);
-    }
-
-    gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial);
-    gtsam::Values result = optimizer.optimize();
-
-    // Output results and assign results to frames
-    for (const gtsam::Symbol& symbol_rotation : symbols_frames) {
-        gtsam::Rot3 w_from_frame = result.at<gtsam::Rot3>(symbol_rotation);
-        frame_tree.at(symbol_rotation.index())->frame.world_from_cam_initial_guess_ = w_from_frame;
-    }
-}
-
 void Backend::populate_rotation_estimate(std::vector<SharedFrame>& shared_frames) {
     struct FrameVertex;
     using SharedFrameVertex = std::shared_ptr<FrameVertex>;
diff --git a/experimental/learn_descriptors/backend.hh b/experimental/learn_descriptors/backend.hh
index 5b5dc66ed..6f9a8c0c7 100644
--- a/experimental/learn_descriptors/backend.hh
+++ b/experimental/learn_descriptors/backend.hh
@@ -36,12 +36,7 @@ class Backend {
     static gtsam::Rot3 average_rotations(const std::vector<gtsam::Rot3> &rotations,
                                          int max_iter = 10);
     // use rotation averaging to set the rotation initial guess for each frame in the world frame.
-    // the world frame will be the first frame in the vector. deadreckon_incrementally won't
-    // optimize if true
-    static void populate_rotation_estimate(std::vector<Frame> &frames);
-    // use rotation averaging to set the rotation initial guess for each frame in the world frame.
-    // the world frame will be the first frame in the vector. deadreckon_incrementally won't
-    // optimize if true
+    // the world frame will be the first frame in the vector
     static void populate_rotation_estimate(std::vector<SharedFrame> &shared_frames);
 
     void add_frames(std::vector<SharedFrame> &shared_frames) {
diff --git a/experimental/learn_descriptors/get_colmap_groundtruth.cc b/experimental/learn_descriptors/get_colmap_groundtruth.cc
deleted file mode 100644
index 7115daf0d..000000000
--- a/experimental/learn_descriptors/get_colmap_groundtruth.cc
+++ /dev/null
@@ -1,82 +0,0 @@
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "experimental/learn_descriptors/symphony_lake_parser.hh"
-
-namespace ld = robot::experimental::learn_descriptors;
-
-struct ImagePose {
-    std::string filename;
-    double X;
-    double Y;
-    double Z;
-    double heading;
-};
-
-void writeCsv(const std::string& path, const std::vector<ImagePose>& rows) {
-    std::ofstream out(path);
-    if (!out) {
-        throw std::runtime_error("Failed to open " + path);
-    }
-
-    out << "filename,X,Y,Z,heading\n";
-
-    out << std::fixed << std::setprecision(6);
-    for (const auto& r : rows) {
-        out << r.filename << ',' << r.X << ',' << r.Y << ',' << r.Z << ',' << r.heading << '\n';
-    }
-}
-
-void writeTxt(const std::string& path, const std::vector<ImagePose>& rows) {
-    std::ofstream out(path);
-    if (!out) {
-        throw std::runtime_error("Failed to open " + path);
-    }
-
-    out << std::fixed << std::setprecision(6);
-    for (const auto& r : rows) {
-        out << r.filename << ' ' << r.X << ' ' << r.Y << ' ' << r.Z << ' ' << r.heading << '\n';
-    }
-}
-
-int main() {
-    ld::DataParser data_parser("/home/pizzaroll04/Documents/datasets",
-                               std::vector<std::string>{"symphony_lake_building_test"});
-
-    const symphony_lake_dataset::SurveyVector& survey_vector = data_parser.get_surveys();
-    const symphony_lake_dataset::Survey& survey = survey_vector.get(0);
-
-    std::vector<ImagePose> poses;
-    for (int i = 0; i < 718; i++) {
-        symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(i);
-        const Eigen::Isometry3d T_wrld_cam =
-            data_parser.get_world_from_boat(img_pt) * data_parser.get_boat_from_camera(img_pt);
-        ImagePose img_pose;
-
-        std::ostringstream oss;
-        oss << std::setw(4) << std::setfill('0') << i;
-        img_pose.filename = oss.str() + ".jpg";
-        img_pose.X = T_wrld_cam.translation().x();
-        img_pose.Y = T_wrld_cam.translation().y();
-        img_pose.Z = T_wrld_cam.translation().z();
-        Eigen::Matrix3d R = T_wrld_cam.linear();
-        img_pose.heading = std::atan2(R(1, 0), R(0, 0));
-
-        poses.push_back(img_pose);
-    }
-
-    try {
-        writeTxt("/home/pizzaroll04/Documents/datasets/symphony_lake_building_test/ref_images.txt",
-                 poses);
-        std::cout << "Wrote " << poses.size() << " rows to ref_images.csv\n";
-    } catch (const std::exception& e) {
-        std::cerr << "Error: " << e.what() << "\n";
-        return 1;
-    }
-
-    return 0;
-}
diff --git a/experimental/learn_descriptors/incremental_sfm_mvp.cc b/experimental/learn_descriptors/incremental_sfm_mvp.cc
deleted file mode 100644
index 4906f0ff1..000000000
--- a/experimental/learn_descriptors/incremental_sfm_mvp.cc
+++ /dev/null
@@ -1,793 +0,0 @@
-#include <cmath>
-#include <functional>
-#include <opencv2/calib3d.hpp>
-#include <optional>
-#include <sstream>
-#include <vector>
-
-#include "Eigen/Core"
-#include "Eigen/Geometry"
-#include "common/geometry/camera.hh"
-#include "common/gps/frame_translation.hh"
-#include "cxxopts.hpp"
-#include "experimental/learn_descriptors/camera_calibration.hh"
-#include "experimental/learn_descriptors/four_seasons_parser.hh"
-#include "experimental/learn_descriptors/frame.hh"
-#include "experimental/learn_descriptors/frontend.hh"
-#include "experimental/learn_descriptors/frontend_definitions.hh"
-#include "experimental/learn_descriptors/structure_from_motion_types.hh"
-#include "gtsam/geometry/Cal3_S2.h"
-#include "gtsam/geometry/Point2.h"
-#include "gtsam/geometry/Point3.h"
-#include "gtsam/geometry/Pose3.h"
-#include "gtsam/geometry/Rot3.h"
-#include "gtsam/geometry/triangulation.h"
-#include "gtsam/inference/Symbol.h"
-#include "gtsam/linear/NoiseModel.h"
-#include "gtsam/navigation/GPSFactor.h"
-#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
-#include "gtsam/nonlinear/NonlinearFactorGraph.h"
-#include "gtsam/nonlinear/Values.h"
-#include "gtsam/slam/BetweenFactor.h"
-#include "gtsam/slam/PriorFactor.h"
-#include "gtsam/slam/ProjectionFactor.h"
-#include "visualization/opencv/opencv_viz.hh"
-
-namespace std {
-template <>
-struct hash<gtsam::Symbol> {
-    size_t operator()(const gtsam::Symbol &s) const { return hash<uint64_t>()(s.key()); }
-};
-}  // namespace std
-
-namespace detail_sfm {
-
-std::optional<gtsam::Point3> attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
-                                                 const std::vector<gtsam::Point2> &cam_obs,
-                                                 gtsam::Cal3_S2::shared_ptr K,
-                                                 const double max_reproj_error = 2.0,
-                                                 const bool verbose = true) {
-    gtsam::Point3 p_lmk_in_world;
-    if (cam_poses.size() >= 2) {
-        try {
-            // Attempt triangulation using DLT (or the GTSAM provided method)
-            p_lmk_in_world = gtsam::triangulatePoint3(
-                cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end()));
-
-        } catch (const gtsam::TriangulationCheiralityException &e) {
-            // Handle the exception gracefully by logging and retaining the previous
-            // estimate.
-            if (verbose)
-                std::cerr << "attempt_triangulation failed. Likely cheirality exception: "
-                          << e.what() << ". Discarding involved keypoints." << std::endl;
-        }
-    }
-    // Optional: perform an explicit cheirality check
-    for (const auto &pose : cam_poses) {
-        // Transform point to the camera coordinate system.
-        // transformTo() converts a world point to the camera frame.
-        gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world);
-        if (p_cam_lmk.z() <= 0) {  // Check that the depth is positive
-            return std::nullopt;
-        }
-    }
-
-    // Cheirality & reprojection checks
-    for (size_t i = 0; i < cam_poses.size(); ++i) {
-        const auto &pose = cam_poses[i];
-        // Cheirality
-        gtsam::Point3 p_cam = pose.transformTo(p_lmk_in_world);
-        if (p_cam.z() <= 0) {
-            if (verbose) {
-                std::cerr << "[triangulate] point behind camera " << i << " (z=" << p_cam.z()
-                          << ")\n";
-            }
-            return std::nullopt;
-        }
-        // Reprojection error
-        if (max_reproj_error > 0) {
-            gtsam::PinholeCamera<gtsam::Cal3_S2> cam(pose, *K);
-            const auto reproj = cam.project(p_lmk_in_world);
-            const double err = (reproj - cam_obs[i]).norm();
-            if (err > max_reproj_error) {
-                if (verbose) {
-                    std::cerr << "[triangulate] reprojection error too large on view " << i << " ("
-                              << err << " px)\n";
-                }
-                return std::nullopt;
-            }
-        }
-    }
-
-    return p_lmk_in_world;
-}
-
-// bool attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
-//                          const std::vector<gtsam::Point2> &cam_obs,
-//                          gtsam::Cal3_S2::shared_ptr K, gtsam::Point3 &out_p_world_landmark,
-//                          const bool verbose = true) {
-//     bool valid = true;
-//     if (cam_poses.size() >= 2) {
-//         try {
-//             // Attempt triangulation using DLT (or the GTSAM provided method)
-//             gtsam::Point3 p_lmk_in_world = gtsam::triangulatePoint3(
-//                 cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end()));
-
-//             out_p_world_landmark = p_lmk_in_world;
-
-//             // Optional: perform an explicit cheirality check
-//             for (const auto &pose : cam_poses) {
-//                 // Transform point to the camera coordinate system.
-//                 // transformTo() converts a world point to the camera frame.
-//                 gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world);
-//                 if (p_cam_lmk.z() <= 0) {  // Check that the depth is positive
-//                     valid = false;
-//                     break;
-//                 }
-//             }
-//         } catch (const gtsam::TriangulationCheiralityException &e) {
-//             // Handle the exception gracefully by logging and retaining the previous
-//             // estimate.
-//             if (verbose)
-//                 std::cerr << "attempt_triangulation failed. Likely cheirality exception: "
-//                           << e.what() << ". Keeping initial landmark estimate." << std::endl;
-//         }
-//     } else {
-//         valid = false;
-//     }
-//     return valid;
-// }
-
-void graph_values(const gtsam::Values &values, const std::string &window_name,
-                  const std::vector<gtsam::Symbol> &symbols_pose,
-                  const std::vector<gtsam::Symbol> &symbols_landmarks) {
-    std::vector<robot::visualization::VizPose> final_poses;
-    std::vector<robot::visualization::VizPoint> final_lmks;
-    for (const gtsam::Symbol &symbol_pose : symbols_pose) {
-        final_poses.emplace_back(Eigen::Isometry3d(values.at<gtsam::Pose3>(symbol_pose).matrix()),
-                                 symbol_pose.string());
-    }
-    for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) {
-        if (!values.exists(symbol_lmk)) {
-            std::cout << "WTF " << symbol_lmk << std::endl;
-        }
-        final_lmks.emplace_back(values.at<gtsam::Point3>(symbol_lmk), symbol_lmk.string());
-    }
-    std::cout << "About to viz gtsam::Values with " << values.size() << " variables." << std::endl;
-    robot::visualization::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true,
-                                    window_name);
-}
-
-gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gtsam::Values &values,
-                             const std::vector<gtsam::Symbol> &symbols_pose,
-                             const std::vector<gtsam::Symbol> &symbols_landmarks,
-                             const int num_epochs = 5, bool viz_itr = false) {
-    gtsam::LevenbergMarquardtParams params;
-    params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
-    params.maxIterations = 1;
-    gtsam::LevenbergMarquardtOptimizer optimizer(graph, values, params);
-
-    double prev_error = optimizer.error();
-    typedef int epoch;
-    std::function<void(const gtsam::Values &, const epoch, const std::vector<gtsam::Symbol> &,
-                       const std::vector<gtsam::Symbol> &)>
-        graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter,
-                                   const std::vector<gtsam::Symbol> &symbols_pose,
-                                   const std::vector<gtsam::Symbol> &symbols_landmarks) {
-            std::cout << "iteration " << iter << " complete!";
-            std::string window_name = "Iteration_" + std::to_string(iter);
-            graph_values(vals, window_name, symbols_pose, symbols_landmarks);
-        };
-
-    for (int i = 0; i < num_epochs; i++) {
-        optimizer.iterate();
-        double curr_error = optimizer.error();
-
-        if (viz_itr) {
-            graph_itr_debug_func(optimizer.values(), i, symbols_pose, symbols_landmarks);
-        }
-        if (std::abs(prev_error - curr_error) < 1e-6) {
-            std::cout << "Converged at iteration " << i << "\n";
-            break;
-        }
-    }
-    return optimizer.values();
-}
-
-gtsam::Pose3 averagePoses(const std::vector<gtsam::Pose3> &poses, int maxIter = 10) {
-    if (poses.empty()) throw std::runtime_error("Empty pose vector");
-
-    gtsam::Pose3 mean = poses[0];
-
-    for (int iter = 0; iter < maxIter; ++iter) {
-        gtsam::Vector6 total = gtsam::Vector6::Zero();
-
-        for (const auto &pose : poses) {
-            gtsam::Pose3 delta = mean.between(pose);
-            total += gtsam::Pose3::Logmap(delta);
-        }
-
-        total /= poses.size();
-        mean = mean.compose(gtsam::Pose3::Expmap(total));
-    }
-
-    return mean;
-}
-
-gtsam::Point3 averagePoints(const std::vector<gtsam::Point3> &points) {
-    if (points.empty()) throw std::runtime_error("Empty point vector");
-    gtsam::Point3 sum(0, 0, 0);
-    for (const auto &pt : points) sum += pt;
-    return sum / points.size();
-}
-
-gtsam::Point3 get_variance(const std::vector<gtsam::Point3> &points) {
-    const gtsam::Point3 mean = averagePoints(points);
-    gtsam::Point3 var(0, 0, 0);
-    for (const gtsam::Point3 &pt : points) {
-        var += (mean - pt).array().square().matrix();
-    }
-    return var / points.size();
-}
-
-double rotation_error(const Eigen::Isometry3d &T_est, const Eigen::Isometry3d &T_gt) {
-    Eigen::Matrix3d R_err = T_gt.rotation().transpose() * T_est.rotation();
-
-    // 2. Compute trace and clamp to [-1,1] for numerical safety
-    double tr = R_err.trace();
-    double cos_theta = std::min(1.0, std::max(-1.0, (tr - 1.0) / 2.0));
-
-    // 3. Recover angle (in radians)
-    return std::acos(cos_theta);
-}
-
-// const std::vector<double> get_absolute_trajectory_error(const std::vector<Eigen::Vector3d> &)
-// {}
-};  // namespace detail_sfm
-
-int main(int argc, const char **argv) {
-    using namespace robot::experimental::learn_descriptors;
-    // clang-format off
-    cxxopts::Options options("four_seasons_parser_example", "Demonstrate usage of four_seasons_parser");
-    options.add_options()
-    ("data_dir", "Path to dataset root directory", cxxopts::value<std::string>())
-    ("calibration_dir", "Path to dataset calibration directory", cxxopts::value<std::string>())
-    ("help", "Print usage");
-    // clang-format on
-
-    auto args = options.parse(argc, argv);
-
-    const auto check_required = [&](const std::string &opt) {
-        if (args.count(opt) == 0) {
-            std::cout << "Missing " << opt << " argument" << std::endl;
-            std::cout << options.help() << std::endl;
-            std::exit(1);
-        }
-    };
-
-    if (args.count("help")) {
-        std::cout << options.help() << std::endl;
-        return 0;
-    }
-
-    check_required("data_dir");
-    check_required("calibration_dir");
-
-    const std::filesystem::path path_data = args["data_dir"].as<std::string>();
-    const std::filesystem::path path_calibration = args["calibration_dir"].as<std::string>();
-    FourSeasonsParser parser(path_data, path_calibration);
-
-    std::cout << "incremental_sfm_mvp!" << std::endl;
-    // const std::vector<int> indices{581, 609,
-    //                                633};  // indices with all data fields on neighborhood_5_train
-    // const std::vector<int> indices{581, 593, 609, 621, 633, 636, 639, 642};  // indices with
-    // get indices with fully populated img_pt containing full gps data and reference
-    const std::vector<int> indices = [&parser]() -> std::vector<int> {
-        std::vector<int> tmp;
-        for (size_t i = 0; i < parser.num_images(); i++) {
-            const ImagePointFourSeasons img_pt = parser.get_image_point(i);
-            if (img_pt.AS_w_from_gnss_cam && img_pt.gps_gcs && img_pt.gps_gcs &&
-                img_pt.gps_gcs->uncertainty && img_pt.gps_gcs->altitude)
-                tmp.push_back(i);
-        }
-        return tmp;
-    }();
-    constexpr bool visualize = false;
-    // all data fields on neighborhood_5_train const std::vector<int> indices = []() {
-    //     std::vector<int> tmp;
-    //     for (int i = 660; i < 681; i += 10) {
-    //         tmp.push_back(i);
-    //     }
-    //     return tmp;
-    // }();
-    // FourSeasonsParser parser()
-    // const size_t img_width = img_pt_first.width, img_height =
-    // img_pt_first.height;
-    const std::shared_ptr<CameraCalibrationFisheye> cal_parser_left_cam =
-        parser.camera_calibration();
-    gtsam::Cal3_S2::shared_ptr K =
-        boost::make_shared<gtsam::Cal3_S2>(cal_parser_left_cam->fx, cal_parser_left_cam->fy, 0,
-                                           cal_parser_left_cam->cx, cal_parser_left_cam->cy);
-    cv::Mat K_mat = (cv::Mat_<double>(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1);
-    cv::Mat D_mat = (cv::Mat_<double>(4, 1) << cal_parser_left_cam->k1, cal_parser_left_cam->k2,
-                     cal_parser_left_cam->k3, cal_parser_left_cam->k4);
-
-    // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
-    //                         gtsam::Pose3(T_world_camera0.matrix()));
-    FrontendParams params{FrontendParams::ExtractorType::SIFT, FrontendParams::MatcherType::KNN,
-                          true, false};
-    Frontend frontend(params);
-
-    gtsam::Values initial_estimate_;
-    gtsam::NonlinearFactorGraph graph_;
-
-    gtsam::noiseModel::Isotropic::shared_ptr landmark_noise =
-        gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
-    gtsam::Vector6 prior_sigmas;
-    prior_sigmas << 0.04, 0.04, 0.09, 2.1, 2.1, 0.1;
-    gtsam::Vector3 gps_sigmas;
-    gps_sigmas << 2.1, 2.1, 0.1;
-    gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise =
-        gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas);
-    gtsam::noiseModel::Diagonal::shared_ptr gps_noise =
-        gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas);
-
-    // populate frames and cam_poses
-    std::vector<Frame> frames;
-    FrameId id = 0;
-    std::unordered_map<FrameId, gtsam::Pose3>
-        id_to_initial_world_from_cam;      // these are for initial guesses!!
-    Eigen::Vector3d t_first_cam_in_world;  // to create our own world
-    std::vector<Eigen::Isometry3d> references_world_from_cam;
-    std::vector<robot::visualization::VizPose> viz_references_world_from_cam;
-    Eigen::Matrix4d scale_mat_reference = Eigen::Matrix4d::Identity();
-    scale_mat_reference(0, 0) = scale_mat_reference(1, 1) = scale_mat_reference(2, 2) =
-        parser.gnss_scale();
-    std::vector<std::vector<cv::KeyPoint>> kpt_list;
-    for (const int &idx : indices) {
-        const ImagePointFourSeasons img_pt = parser.get_image_point(idx);
-        std::cout << img_pt.to_string() << std::endl;
-        //
-        Eigen::Isometry3d world_from_cam;
-        if (id == 0) {
-            // for now we will use an informative prior based on reference
-            world_from_cam = Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference *
-                                                img_pt.AS_w_from_gnss_cam->matrix())
-                                                   .matrix());
-            t_first_cam_in_world = world_from_cam.translation();
-        } else if (img_pt.gps_gcs) {
-            const Eigen::Vector3d gps_gcs(
-                img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude,
-                img_pt.gps_gcs->altitude ? *(img_pt.gps_gcs->altitude) : 0);
-            const Eigen::Vector3d p_gps_in_ECEF = robot::gps::ecef_from_lla(gps_gcs);
-            const Eigen::Vector4d p_gps_in_ECEF_hom(p_gps_in_ECEF.x(), p_gps_in_ECEF.y(),
-                                                    p_gps_in_ECEF.z(), 1.0);
-            world_from_cam = Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference *
-                                                img_pt.AS_w_from_gnss_cam->matrix())
-                                                   .matrix());
-            // std::cout << "ok" << std::endl;
-            // world_from_cam.translation() =
-            //     ((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix() *
-            //      p_gps_in_ECEF_hom)
-            //         .head<3>();
-
-            // Eigen::
-        }
-        world_from_cam.translation() = world_from_cam.translation() - t_first_cam_in_world;
-        id_to_initial_world_from_cam.emplace(id, world_from_cam.matrix());
-
-        // populate frame
-        cv::Mat img = parser.load_image(idx);
-        cv::Mat img_undistorted;
-        cv::fisheye::undistortImage(img, img_undistorted, K_mat, D_mat, K_mat, img.size());
-
-        // std::stringstream ss;
-        // ss << "image " << idx << " of size "
-        //    << "(" << img_undistorted.cols << ", " << img_undistorted.rows << ")";
-        // cv::imshow(ss.str(), img_undistorted);
-        // cv::waitKey(0);
-        // cv::destroyWindow(ss.str());
-
-        std::optional<Frame> frame;
-        if (img_pt.AS_w_from_gnss_cam) {
-            Eigen::Isometry3d world_from_cam_reference =
-                Eigen::Isometry3d((parser.S_from_AS().matrix() * scale_mat_reference *
-                                   img_pt.AS_w_from_gnss_cam->matrix()));
-            world_from_cam_reference.translation() =
-                world_from_cam_reference.translation() - t_first_cam_in_world;
-            viz_references_world_from_cam.emplace_back(world_from_cam_reference,
-                                                       "img " + std::to_string(idx));
-            references_world_from_cam.push_back(world_from_cam_reference);
-            frame.emplace(id, img_undistorted, K, gtsam::Pose3(world_from_cam_reference.matrix()));
-        } else {
-            frame.emplace(id, img_undistorted, K);
-            std::clog << "Warning: no reference data at img_pt " << idx << std::endl;
-        }
-
-        std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
-            frontend.extract_features(img_undistorted);
-        std::cout << "kpts_descs.size(): " << kpts_descs.first.size() << std::endl;
-        kpt_list.push_back(kpts_descs.first);
-        KeypointsCV kpts;
-        for (const cv::KeyPoint &kpt : kpts_descs.first) {
-            kpts.push_back(kpt.pt);
-        }
-        // std::cout << "idx " << idx << " has " << kpts.size() << " kpts" << std::endl;
-        frame->add_keypoints(kpts);
-        frame->assign_descriptors(kpts_descs.second);
-
-        frames.push_back(*frame);
-
-        id++;
-    }
-    if (visualize)
-        robot::visualization::viz_scene(viz_references_world_from_cam,
-                                        std::vector<robot::visualization::VizPoint>(),
-                                        cv::viz::Color::brown(), true, true, "References");
-
-    // populate feature_tracks and lmk_id_map
-    // TODO: "smart" matching, using initial poses to filter which pairs make sense to compute
-    // matches
-    FeatureTracks feature_tracks;
-    FrameLandmarkIdMap lmk_id_map;
-    LandmarkId lmk_id = 0;
-    constexpr bool exhaustive = true;
-    if (exhaustive) {
-        for (size_t i = 0; i < indices.size() - 1; i += 4) {
-            // std::cout << "i: " << i << std::endl;
-            for (size_t j = i + 1; j < indices.size(); j++) {
-                // std::cout << "j: " << j << std::endl;
-                std::vector<cv::DMatch> matches =
-                    frontend.compute_matches(frames[i].descriptors(), frames[j].descriptors());
-                // DIAL TO MESS WITH
-                frontend.enforce_bijective_buffer_matches(matches);
-
-                if (matches.size() < 5) {
-                    continue;
-                }
-
-                std::vector<cv::KeyPoint> cv_kpts_1;
-                std::vector<cv::KeyPoint> cv_kpts_2;
-                for (const KeypointCV &kpt : frames[i].keypoint()) {
-                    cv::KeyPoint cv_kpt;
-                    cv_kpt.pt = kpt;
-                    cv_kpts_1.push_back(cv_kpt);
-                }
-                for (const KeypointCV &kpt : frames[j].keypoint()) {
-                    cv::KeyPoint cv_kpt;
-                    cv_kpt.pt = kpt;
-                    cv_kpts_2.push_back(cv_kpt);
-                }
-                Eigen::Isometry3d world_from_camj(id_to_initial_world_from_cam.at(j).matrix());
-                // std::cout << "heartbeat" << std::endl;
-                std::optional<Eigen::Isometry3d> scale_cam0_from_cam1 =
-                    robot::geometry::estimate_cam0_from_cam1(cv_kpts_1, cv_kpts_2, matches, K_mat);
-                if (!scale_cam0_from_cam1) {
-                    continue;
-                }
-                world_from_camj.linear() =
-                    (Eigen::Isometry3d(id_to_initial_world_from_cam.at(i).matrix()) *
-                     *scale_cam0_from_cam1)
-                        .linear();
-                // std::cout << "heartbeat 2" << std::endl;
-                id_to_initial_world_from_cam.at(j) = gtsam::Pose3(world_from_camj.matrix());
-                for (const cv::DMatch match : matches) {
-                    const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx];
-                    const KeypointCV kpt_cam1 = frames[j].keypoint()[match.trainIdx];
-
-                    auto key = std::make_pair(frames[i].id_, kpt_cam0);
-                    if (lmk_id_map.find(key) != lmk_id_map.end()) {
-                        auto id = lmk_id_map.at(key);
-                        feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0);
-                        feature_tracks.at(id).obs_.emplace_back(frames[j].id_, kpt_cam1);
-                    } else {
-                        FeatureTrack feature_track(i, kpt_cam0);
-                        feature_track.obs_.emplace_back(frames[j].id_, kpt_cam1);
-                        feature_tracks.emplace(lmk_id, feature_track);
-                        lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id);
-                        lmk_id++;
-                    }
-                }
-            }
-        }
-        std::cout << "done processing matches" << std::endl;
-    } else {  // successive only
-        for (size_t i = 0; i < indices.size() - 1; i++) {
-            std::vector<cv::DMatch> matches =
-                frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors());
-            frontend.enforce_bijective_buffer_matches(matches);
-            for (const cv::DMatch match : matches) {
-                const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx];
-                const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx];
-
-                auto key = std::make_pair(frames[i].id_, kpt_cam0);
-                if (lmk_id_map.find(key) != lmk_id_map.end()) {
-                    auto id = lmk_id_map.at(key);
-                    feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0);
-                    feature_tracks.at(id).obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
-                } else {
-                    FeatureTrack feature_track(i, kpt_cam0);
-                    feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
-                    feature_tracks.emplace(lmk_id, feature_track);
-                    lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id);
-                    lmk_id++;
-                }
-            }
-        }
-    }
-    std::vector<robot::visualization::VizPose> viz_world_from_cam_init;
-    for (const auto &[id, world_from_cam] : id_to_initial_world_from_cam) {
-        viz_world_from_cam_init.emplace_back(Eigen::Isometry3d(world_from_cam.matrix()),
-                                             std::to_string(id));
-    }
-
-    // std::cout << "pre heartbeat" << std::endl;
-    if (visualize)
-        robot::visualization::viz_scene(
-            viz_world_from_cam_init, std::vector<robot::visualization::VizPoint>(),
-            cv::viz::Color::brown(), true, true, "world_from_cam_estimates");
-    // std::cout << "heartbeat" << std::endl;
-
-    // TRIANGULATE all of the points (for initial guess)
-    std::unordered_map<LandmarkId, gtsam::Point3> lmk_triangulated_map;  // points are kpt_in_world
-    std::vector<robot::visualization::VizPoint> triangulated_lmks_viz;
-    std::vector<gtsam::Point3> triangulated_lmks;
-    for (const std::pair<LandmarkId, FeatureTrack> lmk_feat : feature_tracks) {
-        std::vector<gtsam::Pose3> world_from_cams;
-        std::vector<gtsam::Point2> feat_kpts;
-        const LandmarkId lmk_id = lmk_feat.first;
-        const FeatureTrack feature_track = lmk_feat.second;
-        // gtsam::Point3 kpt_in_world(0, 0, 0);
-        for (const std::pair<FrameId, KeypointCV> &feat_track : feature_track.obs_) {
-            world_from_cams.push_back(id_to_initial_world_from_cam[feat_track.first]);
-            feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y));
-        }
-        std::optional<gtsam::Point3> kpt_in_world =
-            detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, false);
-        // bool triangulate_success =
-        // detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world, false);
-        if (kpt_in_world) {
-            lmk_triangulated_map.emplace(lmk_id, *kpt_in_world);
-            triangulated_lmks.push_back(*kpt_in_world);
-            triangulated_lmks_viz.emplace_back(*kpt_in_world, "lmk " + std::to_string(lmk_id));
-        } else {
-            std::cout << "triangulation not success, leaving it out." << std::endl;
-            continue;
-        }
-    }
-    std::cout << "there are " << triangulated_lmks.size() << " triangulated lmks" << std::endl;
-    if (visualize)
-        robot::visualization::viz_scene(viz_references_world_from_cam, triangulated_lmks_viz,
-                                        cv::viz::Color::brown(), true, true,
-                                        "Unfiltered, triangulated points");
-
-    // filter points via variance
-    // TODO: filter based on quality and/or quantity of matches
-    std::cout << "triangulated_lmks.size(): " << triangulated_lmks.size() << std::endl;
-    const gtsam::Point3 variance_pts = detail_sfm::get_variance(triangulated_lmks);
-    std::cout << "heart beat 2" << std::endl;
-    const gtsam::Point3 std_dev_pts = variance_pts.array().sqrt().matrix();
-    const gtsam::Point3 mean_pts = detail_sfm::averagePoints(triangulated_lmks);
-    std::unordered_map<LandmarkId, gtsam::Point3> lmk_triangulated_map_filtered;
-    std::vector<gtsam::Point3> filtered_points;
-    std::cout << "original var " << variance_pts << std::endl;
-    for (const std::pair<LandmarkId, gtsam::Point3> lmk_id_pt : lmk_triangulated_map) {
-        const gtsam::Point3 dist_mean = (lmk_id_pt.second - mean_pts).array().abs().matrix();
-        if ((dist_mean.array() <= (3 * std_dev_pts).array()).all()) {
-            lmk_triangulated_map_filtered.emplace(lmk_id_pt);
-            filtered_points.push_back(lmk_id_pt.second);
-        }
-    }
-    std::cout << "filtered variance " << detail_sfm::get_variance(filtered_points) << std::endl;
-    std::cout << "number of filtered points: " << lmk_triangulated_map_filtered.size() << std::endl;
-    if (visualize)
-        robot::visualization::viz_scene(std::vector<Eigen::Isometry3d>(), filtered_points,
-                                        cv::viz::Color::brown(), true, true,
-                                        "Filtered, triangulated points");
-
-    // ############# BACKEND ###############
-
-    // add filtered points to graph
-    std::unordered_map<gtsam::Symbol, std::vector<gtsam::Pose3>> symbols_poses_values_iter;
-    std::unordered_map<gtsam::Symbol, std::vector<gtsam::Point3>> symbols_landmarks_values_iter;
-    std::vector<gtsam::Symbol> symbols_pose;
-    std::vector<gtsam::Symbol> symbols_landmarks;
-    for (const auto &[lmk_id, kpt_in_world] : lmk_triangulated_map_filtered) {
-        // LandmarkId lmk_id = lmk_id_pt.first;
-        // const gtsam::Point3 kpt_in_world = lmk_id_pt.second;
-        FeatureTrack feature_track = feature_tracks.at(lmk_id);
-        const gtsam::Symbol symbol_lmk('l', lmk_id);
-        for (const auto &[frame_id, obs] : feature_track.obs_) {
-            initial_estimate_.insert_or_assign(symbol_lmk, kpt_in_world);
-            symbols_landmarks.push_back(symbol_lmk);
-            symbols_landmarks_values_iter.emplace(symbol_lmk,
-                                                  std::vector<gtsam::Point3>{kpt_in_world});
-            graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-                gtsam::Point2(obs.x, obs.y), landmark_noise, gtsam::Symbol('x', frame_id),
-                symbol_lmk, K);
-        }
-    }
-
-    // add gps factors
-    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
-        gtsam::Symbol('x', 0), id_to_initial_world_from_cam.at(0), prior_pose_noise);
-    initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), id_to_initial_world_from_cam.at(0));
-    symbols_pose.push_back(gtsam::Symbol('x', 0));
-    symbols_poses_values_iter.emplace(
-        gtsam::Symbol('x', 0), std::vector<gtsam::Pose3>{id_to_initial_world_from_cam.at(0)});
-    for (const auto &[frame_id, estimate_world_from_cam] : id_to_initial_world_from_cam) {
-        if (frame_id == 0) {
-            continue;
-        }
-        const gtsam::Symbol key_cam('x', frame_id);
-        symbols_pose.push_back(key_cam);
-        gtsam::Point3 p_cam_in_world = estimate_world_from_cam.translation();
-        graph_.emplace_shared<gtsam::GPSFactor>(key_cam, p_cam_in_world, gps_noise);
-        initial_estimate_.insert_or_assign(key_cam, estimate_world_from_cam);
-        symbols_poses_values_iter.emplace(key_cam,
-                                          std::vector<gtsam::Pose3>{estimate_world_from_cam});
-        // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_cam_in_world));
-    }
-
-    // detail_sfm::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks);
-    std::cout << "heart beat 3" << std::endl;
-
-    constexpr bool local_optimizations = false;
-    if (local_optimizations) {
-        // TODO: do local optimizations between groups of any n nubmer of cameras with >= x number
-        // of matches
-        // do local optimizations and add to iter cache
-        // const int window = 2;
-        std::cout << "length of cam_poses: " << id_to_initial_world_from_cam.size() << std::endl;
-        for (const auto &[frame_id, world_from_cam] : id_to_initial_world_from_cam) {
-            std::cout << "id: " << frame_id << "\tpose: " << world_from_cam << std::endl;
-        }
-        for (size_t i = 0; i < indices.size() - 1; i++) {
-            std::cout << "Local optimization " << i << std::endl;
-            gtsam::Values local_estimate_;
-            gtsam::NonlinearFactorGraph local_graph_;
-
-            std::vector<gtsam::Symbol> symbols_poses{gtsam::Symbol('x', i),
-                                                     gtsam::Symbol('x', i + 1)};
-            std::vector<gtsam::Symbol> symbols_lmks;
-
-            local_graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
-                gtsam::PriorFactor<gtsam::Pose3>(
-                    symbols_poses[0], id_to_initial_world_from_cam.at(i), prior_pose_noise));
-            std::cout << "fuck" << std::endl;
-            local_graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
-                gtsam::PriorFactor<gtsam::Pose3>(
-                    symbols_poses[1], id_to_initial_world_from_cam.at(i + 1), prior_pose_noise));
-            std::cout << "mega fuck" << std::endl;
-            local_estimate_.insert_or_assign(symbols_poses[0], id_to_initial_world_from_cam.at(i));
-            local_estimate_.insert_or_assign(symbols_poses[1],
-                                             id_to_initial_world_from_cam.at(i + 1));
-
-            std::vector<cv::DMatch> matches =
-                frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors());
-            // DIAL TO MESS WITH
-            frontend.enforce_bijective_matches(matches);
-            std::vector<gtsam::Pose3> world_from_cams{id_to_initial_world_from_cam.at(i),
-                                                      id_to_initial_world_from_cam.at(i + 1)};
-
-            std::vector<robot::visualization::VizPose> viz_world_from_cams{
-                {Eigen::Isometry3d(world_from_cams[0].matrix()), "cam 0"},
-                {Eigen::Isometry3d(world_from_cams[1].matrix()), "cam 1"}};
-            std::vector<robot::visualization::VizPoint> viz_lmks;
-            for (const cv::DMatch match : matches) {
-                std::vector<gtsam::Point2> feat_kpts;
-                const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx];
-                const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx];
-                feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y);
-                feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y);
-
-                const std::pair<FrameId, KeypointCV> key_lmk_id =
-                    std::make_pair(frames[i].id_, kpt_cam0);
-                if (lmk_id_map.find(key_lmk_id) != lmk_id_map.end()) {
-                    LandmarkId match_lmk_id = lmk_id_map.at(key_lmk_id);
-                    if (lmk_triangulated_map_filtered.find(match_lmk_id) ==
-                        lmk_triangulated_map_filtered.end()) {
-                        continue;
-                    }
-                    std::cout << "good" << std::endl;
-                    // do nothing
-                    // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0);
-                    // feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1);
-                } else {
-                    std::cerr << "ERROR: this shouldn't happen right?" << std::endl;
-                    FeatureTrack feature_track(frames[i].id_, kpt_cam0);
-                    feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
-                    feature_tracks.push_back(feature_track);
-                    lmk_id_map.emplace(key_lmk_id, lmk_id);
-                    lmk_id++;
-                }
-                std::cout << "oog" << std::endl;
-                const gtsam::Symbol symbol_lmk = gtsam::Symbol('l', lmk_id_map.at(key_lmk_id));
-                // if (gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))) !=
-                //     gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam1)))) {
-                //     std::cerr << "UH OH" << std::endl;
-                // } else {
-                //     std::cout << "cool" << std::endl;
-                // }
-                symbols_lmks.push_back(symbol_lmk);
-                local_graph_
-                    .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-                        feat_kpts[0], landmark_noise, symbols_poses[0], symbol_lmk, K);
-                local_graph_
-                    .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-                        feat_kpts[1], landmark_noise, symbols_poses[1], symbol_lmk, K);
-
-                std::optional<gtsam::Point3> kpt_in_world =
-                    detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K);
-                // gtsam::Point3 kpt_in_world;
-                // bool triangulate_success =
-                //     detail_sfm::attempt_triangulate(world_from_cams, feat_kpts, K, kpt_in_world);
-                if (kpt_in_world) {
-                    local_estimate_.insert_or_assign(symbol_lmk, *kpt_in_world);
-                    viz_lmks.emplace_back(*kpt_in_world);
-                    if (symbols_landmarks_values_iter.find(symbol_lmk) !=
-                        symbols_landmarks_values_iter.end()) {
-                        symbols_landmarks_values_iter[symbol_lmk].push_back(*kpt_in_world);
-                    } else {
-                        symbols_landmarks_values_iter.emplace(
-                            symbol_lmk, std::vector<gtsam::Point3>{*kpt_in_world});
-                    }
-                }
-            }
-            std::cout << "setup complete!" << std::endl;
-            if (visualize)
-                robot::visualization::viz_scene(viz_world_from_cams, viz_lmks,
-                                                cv::viz::Color::brown(), true, true,
-                                                "Local Optimization " + std::to_string(i));
-
-            const gtsam::Values symbols_result_local = detail_sfm::optimize_graph(
-                local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false);
-
-            for (const gtsam::Symbol &symbol_pose : symbols_poses) {
-                const gtsam::Pose3 T_wrld_cam = symbols_result_local.at<gtsam::Pose3>(symbol_pose);
-                symbols_poses_values_iter.at(symbol_pose).push_back(T_wrld_cam);
-            }
-            for (const gtsam::Symbol &symbol_lmk : symbols_lmks) {
-                const gtsam::Point3 p_wrld_lmk = symbols_result_local.at<gtsam::Point3>(symbol_lmk);
-                symbols_landmarks_values_iter.at(symbol_lmk).push_back(p_wrld_lmk);
-            }
-        }
-
-        std::cout << "\nLocal Optimizations Complete!\n" << std::endl;
-
-        for (std::pair<gtsam::Symbol, std::vector<gtsam::Pose3>> sym_pose :
-             symbols_poses_values_iter) {
-            initial_estimate_.insert_or_assign(sym_pose.first,
-                                               detail_sfm::averagePoses(sym_pose.second));
-        }
-        for (std::pair<gtsam::Symbol, std::vector<gtsam::Point3>> sym_lmk :
-             symbols_landmarks_values_iter) {
-            initial_estimate_.insert_or_assign(sym_lmk.first,
-                                               detail_sfm::averagePoints(sym_lmk.second));
-        }
-    }
-
-    // do global optimization
-    const gtsam::Values result =
-        detail_sfm::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks, 0);
-
-    // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference
-    double sum_traj_error = 0;
-    double sum_rot_error = 0;
-    for (size_t i = 0; i < symbols_pose.size(); i++) {
-        const gtsam::Pose3 traj_pose = result.at<gtsam::Pose3>(symbols_pose[i]);
-        sum_traj_error += std::pow(
-            (references_world_from_cam[i].translation() - traj_pose.translation()).norm(), 2);
-        sum_rot_error += detail_sfm::rotation_error(references_world_from_cam[i],
-                                                    Eigen::Isometry3d(traj_pose.matrix()));
-    }
-    std::cout << "sum_rot_error: " << sum_rot_error << std::endl;
-    double rmse_ate = std::sqrt(sum_traj_error / symbols_pose.size());
-    double rmse_rot = std::sqrt(sum_rot_error / symbols_pose.size());
-    std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl;
-
-    std::cout << "about to visualize result" << std::endl;
-    result.print();
-    detail_sfm::graph_values(result, "Result", symbols_pose, std::vector<gtsam::Symbol>());
-    // detail_sfm::graph_values(result, "Result", symbols_pose, symbols_landmarks);
-}
\ No newline at end of file
diff --git a/experimental/learn_descriptors/map.hh b/experimental/learn_descriptors/map.hh
deleted file mode 100644
index a22c1e5c7..000000000
--- a/experimental/learn_descriptors/map.hh
+++ /dev/null
@@ -1,4 +0,0 @@
-#pragma once
-// #include "Eigen/"
-
-namespace robot::experimental::learn_descriptors {}
\ No newline at end of file
diff --git a/experimental/learn_descriptors/map.proto b/experimental/learn_descriptors/map.proto
deleted file mode 100644
index 67d0c5db0..000000000
--- a/experimental/learn_descriptors/map.proto
+++ /dev/null
@@ -1,15 +0,0 @@
-
-syntax = "proto3";
-
-package robot.experimental.learn_descriptors;
-
-message Map {
-  message Quaternion {
-    // Represents a quaternion as a + bi + cj + dk
-    double a = 1;
-    double b = 2;
-    double c = 3;
-    double d = 4;
-  }
-  Quaternion quaternion = 1;
-}
diff --git a/experimental/learn_descriptors/refactor_sfm_mvp.cc b/experimental/learn_descriptors/refactor_sfm_mvp.cc
deleted file mode 100644
index 054c8a197..000000000
--- a/experimental/learn_descriptors/refactor_sfm_mvp.cc
+++ /dev/null
@@ -1,389 +0,0 @@
-#include <cmath>
-#include <exception>
-#include <functional>
-#include <iostream>
-#include <memory>
-#include <opencv2/calib3d.hpp>
-#include <optional>
-#include <sstream>
-#include <unordered_map>
-#include <vector>
-
-#include "Eigen/Core"
-#include "Eigen/Geometry"
-#include "common/geometry/camera.hh"
-#include "cxxopts.hpp"
-#include "experimental/learn_descriptors/backend.hh"
-#include "experimental/learn_descriptors/camera_calibration.hh"
-#include "experimental/learn_descriptors/four_seasons_parser.hh"
-#include "experimental/learn_descriptors/frame.hh"
-#include "experimental/learn_descriptors/frontend.hh"
-#include "experimental/learn_descriptors/frontend_definitions.hh"
-#include "experimental/learn_descriptors/image_point_four_seasons.hh"
-#include "experimental/learn_descriptors/structure_from_motion_types.hh"
-#include "gtsam/geometry/Cal3_S2.h"
-#include "gtsam/geometry/Point2.h"
-#include "gtsam/geometry/Point3.h"
-#include "gtsam/geometry/Pose3.h"
-#include "gtsam/geometry/Rot3.h"
-#include "gtsam/geometry/triangulation.h"
-#include "gtsam/inference/Symbol.h"
-#include "gtsam/linear/NoiseModel.h"
-#include "gtsam/navigation/GPSFactor.h"
-#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
-#include "gtsam/nonlinear/NonlinearFactorGraph.h"
-#include "gtsam/nonlinear/Values.h"
-#include "gtsam/slam/BetweenFactor.h"
-#include "gtsam/slam/PriorFactor.h"
-#include "gtsam/slam/ProjectionFactor.h"
-#include "opencv2/opencv.hpp"
-#include "visualization/opencv/opencv_viz.hh"
-
-std::optional<gtsam::Point3> attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
-                                                 const std::vector<gtsam::Point2> &cam_obs,
-                                                 gtsam::Cal3_S2::shared_ptr K,
-                                                 const double max_reproj_error = 2.0,
-                                                 const bool verbose = true) {
-    gtsam::Point3 p_lmk_in_world;
-    if (cam_poses.size() > 2) {
-        try {
-            // Attempt triangulation using DLT (or the GTSAM provided method)
-            p_lmk_in_world = gtsam::triangulatePoint3(
-                cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end()));
-
-        } catch (const std::exception &e) {
-            // Handle the exception gracefully by logging and retaining the previous
-            // estimate or discard
-            if (verbose) {
-                std::cerr << "[attempt_triangulate] failed. Likely cheirality exception: "
-                          << e.what() << ". Discarding involved keypoints." << std::endl;
-            }
-            return std::nullopt;
-        }
-    } else {
-        return std::nullopt;
-    }
-    // Optional: perform an explicit cheirality check
-    for (const auto &pose : cam_poses) {
-        // Transform point to the camera coordinate system.
-        // transformTo() converts a world point to the camera frame.
-        gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world);
-        if (p_cam_lmk.z() <= 0) {  // Check that the depth is positive
-            return std::nullopt;
-        }
-    }
-
-    // Cheirality & reprojection checks
-    for (size_t i = 0; i < cam_poses.size(); ++i) {
-        const auto &pose = cam_poses[i];
-        // Cheirality
-        gtsam::Point3 p_cam = pose.transformTo(p_lmk_in_world);
-        if (p_cam.z() <= 0) {
-            if (verbose) {
-                std::cerr << "[attempt_triangulate] point behind camera " << i
-                          << " (z=" << p_cam.z() << ")\n";
-            }
-            return std::nullopt;
-        }
-        // Reprojection error
-        if (max_reproj_error > 0) {
-            gtsam::PinholeCamera<gtsam::Cal3_S2> cam(pose, *K);
-            const auto reproj = cam.project(p_lmk_in_world);
-            const double err = (reproj - cam_obs[i]).norm();
-            if (err > max_reproj_error) {
-                if (verbose) {
-                    std::cerr << "[attempt_triangulate] reprojection error too large on view " << i
-                              << " (" << err << " px)\n";
-                }
-                return std::nullopt;
-            }
-        }
-    }
-    return p_lmk_in_world;
-}
-
-void graph_values(const gtsam::Values &values, const std::string &window_name,
-                  const std::vector<gtsam::Symbol> &symbols_pose,
-                  const std::vector<gtsam::Symbol> &symbols_landmarks) {
-    std::vector<robot::visualization::VizPose> final_poses;
-    std::vector<robot::visualization::VizPoint> final_lmks;
-    for (const gtsam::Symbol &symbol_pose : symbols_pose) {
-        final_poses.emplace_back(Eigen::Isometry3d(values.at<gtsam::Pose3>(symbol_pose).matrix()),
-                                 symbol_pose.string());
-    }
-    for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) {
-        if (!values.exists(symbol_lmk)) {
-            std::cout << "WTF " << symbol_lmk << std::endl;
-        }
-        final_lmks.emplace_back(values.at<gtsam::Point3>(symbol_lmk), symbol_lmk.string());
-    }
-    std::cout << "About to viz gtsam::Values with " << values.size() << " variables." << std::endl;
-    robot::visualization::viz_scene(final_poses, final_lmks, cv::viz::Color::brown(), true, true,
-                                    window_name);
-}
-
-gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph, const gtsam::Values &values,
-                             const std::vector<gtsam::Symbol> &symbols_pose,
-                             const std::vector<gtsam::Symbol> &symbols_landmarks,
-                             const int num_epochs = 5, bool viz_itr = false) {
-    gtsam::LevenbergMarquardtParams params;
-    params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
-    params.maxIterations = 1;
-    gtsam::LevenbergMarquardtOptimizer optimizer(graph, values, params);
-
-    double prev_error = optimizer.error();
-    typedef int epoch;
-    std::function<void(const gtsam::Values &, const epoch, const std::vector<gtsam::Symbol> &,
-                       const std::vector<gtsam::Symbol> &)>
-        graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter,
-                                   const std::vector<gtsam::Symbol> &symbols_pose,
-                                   const std::vector<gtsam::Symbol> &symbols_landmarks) {
-            std::cout << "iteration " << iter << " complete!";
-            std::string window_name = "Iteration_" + std::to_string(iter);
-            graph_values(vals, window_name, symbols_pose, symbols_landmarks);
-        };
-
-    for (int i = 0; i < num_epochs; i++) {
-        optimizer.iterate();
-        double curr_error = optimizer.error();
-
-        if (viz_itr) {
-            graph_itr_debug_func(optimizer.values(), i, symbols_pose, symbols_landmarks);
-        }
-        if (std::abs(prev_error - curr_error) < 1e-6) {
-            std::cout << "Converged at iteration " << i << "\n";
-            break;
-        }
-    }
-    return optimizer.values();
-}
-
-double rotation_error(const Eigen::Isometry3d &T_est, const Eigen::Isometry3d &T_gt) {
-    Eigen::Matrix3d R_err = T_gt.rotation().transpose() * T_est.rotation();
-
-    // 2. Compute trace and clamp to [-1,1] for numerical safety
-    double tr = R_err.trace();
-    double cos_theta = std::min(1.0, std::max(-1.0, (tr - 1.0) / 2.0));
-
-    // 3. Recover angle (in radians)
-    return std::acos(cos_theta);
-}
-
-int main(int argc, const char **argv) {
-    using namespace robot::experimental::learn_descriptors;
-    // clang-format off
-    cxxopts::Options options("four_seasons_parser_example", "Demonstrate usage of four_seasons_parser");
-    options.add_options()
-    ("data_dir", "Path to dataset root directory", cxxopts::value<std::string>())
-    ("calibration_dir", "Path to dataset calibration directory", cxxopts::value<std::string>())
-    ("help", "Print usage");
-    // clang-format on
-
-    auto args = options.parse(argc, argv);
-
-    const auto check_required = [&](const std::string &opt) {
-        if (args.count(opt) == 0) {
-            std::cout << "Missing " << opt << " argument" << std::endl;
-            std::cout << options.help() << std::endl;
-            std::exit(1);
-        }
-    };
-
-    if (args.count("help")) {
-        std::cout << options.help() << std::endl;
-        return 0;
-    }
-
-    check_required("data_dir");
-    check_required("calibration_dir");
-
-    const std::filesystem::path path_data = args["data_dir"].as<std::string>();
-    const std::filesystem::path path_calibration = args["calibration_dir"].as<std::string>();
-    FourSeasonsParser parser(path_data, path_calibration);
-
-    std::cout << "refactor_sfm_mvp!" << std::endl;
-
-    FrontendParams params{FrontendParams::ExtractorType::SIFT, FrontendParams::MatcherType::KNN,
-                          true, false};
-    Frontend frontend(params);
-
-    for (size_t i = 633; i < 646; i += 2) {
-        frontend.add_image(
-            ImageAndPoint{parser.load_image(i),
-                          std::make_shared<ImagePointFourSeasons>(parser.get_image_point(i))});
-        const ImagePointFourSeasons img_pt = parser.get_image_point(i);
-        std::cout << img_pt.to_string() << std::endl;
-    }
-    frontend.populate_frames();
-    frontend.match_frames_and_build_tracks();
-
-    // visualization
-    std::vector<robot::visualization::VizPose> viz_poses_init;
-    std::optional<Eigen::Isometry3d> first_grnd_trth;
-    for (const Frame &frame : frontend.frames()) {
-        if (frame.world_from_cam_groundtruth_) {
-            std::cout << "adding a ground truth frame to viz!" << std::endl;
-            Eigen::Isometry3d w_from_cam_grnd_trth(frame.world_from_cam_groundtruth_->matrix());
-            if (!first_grnd_trth) first_grnd_trth = w_from_cam_grnd_trth;
-            w_from_cam_grnd_trth.translation() -= first_grnd_trth->translation();
-            viz_poses_init.emplace_back(w_from_cam_grnd_trth,
-                                        "x_grnd_" + std::to_string(frame.id_));
-        };
-    }
-
-    std::cout << "visualizing " << viz_poses_init.size() << " poses" << std::endl;
-    robot::visualization::viz_scene(viz_poses_init, std::vector<robot::visualization::VizPoint>(),
-                                    cv::viz::Color::brown(), true, true,
-                                    "Frontend initial guesses");
-
-    // ############# BACKEND ###############
-    constexpr bool use_grndtrth_only = false;
-    gtsam::Values initial_estimate_;
-    gtsam::NonlinearFactorGraph graph_;
-
-    gtsam::noiseModel::Isotropic::shared_ptr landmark_noise = gtsam::noiseModel::Isotropic::Sigma(
-        2, 1.0);  // should probably mess with this, could also use essential matrix values to guide
-                  // this potentially
-    gtsam::Vector6 prior_sigmas;
-    prior_sigmas << 0.04, 0.04, 0.09, 2.1, 2.1, 0.1;
-    gtsam::Vector3 gps_sigmas_fallback;
-    gps_sigmas_fallback << 1.5, 1.5, 1.5;
-    gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise =
-        gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas);
-    gtsam::noiseModel::Diagonal::shared_ptr gps_noise =
-        gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas_fallback);
-
-    // add gps factors
-    std::vector<Frame> &frames = frontend.frames();
-    frames[0].world_from_cam_initial_guess_ =
-        frames[0].world_from_cam_groundtruth_->rotation();  // make sure first idx has grnd trth
-
-    Backend::populate_rotation_estimate(frames);
-    std::vector<gtsam::Symbol> symbols_pose;
-    std::unordered_map<size_t, gtsam::Pose3> world_from_cam_initial_guess;
-    std::optional<std::vector<Eigen::Vector3d>> interpolated_cam_in_w =
-        frontend.interpolated_initial_translations();
-    ROBOT_CHECK(interpolated_cam_in_w, interpolated_cam_in_w);
-
-    Eigen::Vector3d cam0_in_w = interpolated_cam_in_w->front();
-    std::optional<Eigen::Vector3d> grndtrth_cam0_in_w;
-    std::vector<size_t> idx_grndtrth_frame;
-    std::vector<robot::visualization::VizPose> viz_pose;
-    auto noise_tight_prior = gtsam::noiseModel::Diagonal::Sigmas(
-        (gtsam::Vector(6) << 0, 0, 0,  // rotation stdev in radians
-         1e-6, 1e-6, 1e-6              // translation stdev in meters
-         )
-            .finished());
-    const gtsam::Pose3 w_from_cam0_init_estimate(*frames[0].world_from_cam_initial_guess_,
-                                                 gtsam::Point3());
-    const gtsam::Symbol symbol_cam0(Backend::symbol_char_pose, 0);
-    graph_.add(gtsam::PriorFactor<gtsam::Pose3>(
-        symbol_cam0, w_from_cam0_init_estimate,
-        noise_tight_prior));  // currently assuming that we have groundtruth on the first pose
-    initial_estimate_.insert(symbol_cam0, w_from_cam0_init_estimate);
-    std::string str_pose0 =
-        std::string("x_") + (frames[0].world_from_cam_groundtruth_ ? "g_" : "") +
-        (!frames[0].world_from_cam_initial_guess_ ? "norot_" : "") + std::to_string(frames[0].id_);
-    viz_pose.emplace_back(Eigen::Isometry3d(w_from_cam0_init_estimate.matrix()), str_pose0);
-    symbols_pose.push_back(symbol_cam0);
-    for (size_t i = 1; i < frames.size(); i++) {
-        const Frame &frame = frames[i];
-        std::optional<gtsam::Pose3> world_from_cam_estimate;
-        if (use_grndtrth_only && !frame.world_from_cam_groundtruth_) {
-            continue;
-        } else if (use_grndtrth_only && frame.world_from_cam_groundtruth_) {
-            if (!grndtrth_cam0_in_w)
-                grndtrth_cam0_in_w = frame.world_from_cam_groundtruth_->translation();
-            world_from_cam_estimate =
-                gtsam::Pose3(frame.world_from_cam_groundtruth_->rotation(),
-                             gtsam::Point3(frame.world_from_cam_groundtruth_->translation() -
-                                           *grndtrth_cam0_in_w));
-            idx_grndtrth_frame.push_back(i);
-            std::cout << "populating groundtruth pose: " << world_from_cam_estimate->translation()
-                      << std::endl;
-        } else {
-            world_from_cam_estimate = gtsam::Pose3(frame.world_from_cam_initial_guess_
-                                                       ? *frame.world_from_cam_initial_guess_
-                                                       : gtsam::Rot3::Identity(),
-                                                   (*interpolated_cam_in_w)[i] - cam0_in_w);
-        }
-        std::cout << "world_from_cam_estimate_" << i << ": " << *world_from_cam_estimate
-                  << std::endl;
-        world_from_cam_initial_guess[frame.id_] = *world_from_cam_estimate;
-        const gtsam::Symbol cam_symbol(Backend::symbol_char_pose, frame.id_);
-        initial_estimate_.insert(cam_symbol, *world_from_cam_estimate);
-        if (frame.cam_in_world_initial_guess_) {
-            if (frame.translation_covariance_in_cam_) {
-                std::cout << "translation covariance: " << *frame.translation_covariance_in_cam_
-                          << std::endl;
-            }
-            gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas(
-                frame.translation_covariance_in_cam_
-                    ? gtsam::Vector3(std::sqrt((*frame.translation_covariance_in_cam_)(0, 0)),
-                                     std::sqrt((*frame.translation_covariance_in_cam_)(1, 1)),
-                                     std::sqrt((*frame.translation_covariance_in_cam_)(2, 2)))
-                    : gps_sigmas_fallback);
-            graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise));
-            std::cout << "adding gps factor!" << std::endl;
-        }
-        std::string str_pose = std::string("x_") + (frame.world_from_cam_groundtruth_ ? "g_" : "") +
-                               (!frame.world_from_cam_initial_guess_ ? "norot_" : "") +
-                               std::to_string(frame.id_);
-        viz_pose.emplace_back(Eigen::Isometry3d(world_from_cam_estimate->matrix()), str_pose);
-        symbols_pose.push_back(cam_symbol);
-    }
-
-    // add landmarks to graph
-    std::vector<gtsam::Symbol> symbols_lmks;
-    std::vector<robot::visualization::VizPoint> viz_pts;
-    const FeatureTracks feature_tracks = frontend.feature_tracks();
-    for (size_t i = 0; i < feature_tracks.size(); i++) {
-        std::vector<gtsam::Pose3> world_from_lmk_cams;
-        std::vector<gtsam::Point2> lmk_observations;
-        for (const auto &[frame_id, keypoint_cv] : feature_tracks[i].obs_) {
-            world_from_lmk_cams.push_back(world_from_cam_initial_guess[frame_id]);
-            lmk_observations.emplace_back(keypoint_cv.x, keypoint_cv.y);
-        }
-        std::optional<gtsam::Point3> landmark_estimate =
-            attempt_triangulate(world_from_lmk_cams, lmk_observations, frames[0].K_, 2.0,
-                                false);  // all K are the same for now...
-        if (landmark_estimate) {
-            const gtsam::Symbol lmk_symbol(Backend::symbol_char_landmark, i);
-            for (const auto &[frame_id, keypoint_cv] : feature_tracks[i].obs_) {
-                graph_.add(gtsam::GenericProjectionFactor(
-                    gtsam::Point2(keypoint_cv.x, keypoint_cv.y), landmark_noise,
-                    gtsam::Symbol(Backend::symbol_char_pose, frame_id), lmk_symbol, frames[0].K_));
-            }
-            initial_estimate_.insert(lmk_symbol, *landmark_estimate);
-            symbols_lmks.push_back(lmk_symbol);
-            viz_pts.emplace_back(*landmark_estimate, "lmk_" + std::to_string(i));
-        }
-    }
-    robot::visualization::viz_scene(viz_pose, viz_pts, cv::viz::Color::brown(), true, true,
-                                    "Initial guesses in backend");
-
-    // do global optimization
-    const gtsam::Values result =
-        optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_lmks, 10);
-
-    // calculate ATE (Absolute Trajectory Error) average (RMSE) to reference
-    double sum_traj_error = 0;
-    double sum_rot_error = 0;
-    for (const size_t i_grndtrth_frame : idx_grndtrth_frame) {
-        const Frame &frame = frames[i_grndtrth_frame];
-        const gtsam::Pose3 traj_pose =
-            result.at<gtsam::Pose3>(gtsam::Symbol(Backend::symbol_char_pose, frame.id_));
-        sum_traj_error += std::pow(
-            (frame.world_from_cam_groundtruth_->translation() - traj_pose.translation()).norm(), 2);
-        sum_rot_error +=
-            rotation_error(Eigen::Isometry3d(frame.world_from_cam_groundtruth_->matrix()),
-                           Eigen::Isometry3d(traj_pose.matrix()));
-    }
-    std::cout << "sum_rot_error: " << sum_rot_error << std::endl;
-    double rmse_ate = std::sqrt(sum_traj_error / symbols_pose.size());
-    double rmse_rot = std::sqrt(sum_rot_error / symbols_pose.size());
-    std::cout << "\n\nRMSE_ATE:\t" << rmse_ate << "\nRMSE_ROT:\t" << rmse_rot << std::endl;
-
-    std::cout << "about to visualize result" << std::endl;
-    graph_values(result, "Result", symbols_pose, symbols_lmks);
-}
\ No newline at end of file
diff --git a/experimental/learn_descriptors/sfm_mvp.cc b/experimental/learn_descriptors/sfm_mvp.cc
deleted file mode 100644
index 9771f2a29..000000000
--- a/experimental/learn_descriptors/sfm_mvp.cc
+++ /dev/null
@@ -1,685 +0,0 @@
-#include <memory>
-#include <utility>
-
-#include "Eigen/Dense"
-#include "boost/make_shared.hpp"
-#include "experimental/learn_descriptors/frame.hh"
-#include "experimental/learn_descriptors/frontend.hh"
-#include "experimental/learn_descriptors/frontend_definitions.hh"
-#include "experimental/learn_descriptors/structure_from_motion_types.hh"
-#include "experimental/learn_descriptors/symphony_lake_parser.hh"
-#include "gtest/gtest.h"
-#include "gtsam/geometry/Cal3_S2.h"
-#include "gtsam/geometry/Point2.h"
-#include "gtsam/geometry/Point3.h"
-#include "gtsam/geometry/Pose3.h"
-#include "gtsam/geometry/Rot3.h"
-#include "gtsam/geometry/triangulation.h"
-#include "gtsam/inference/Symbol.h"
-#include "gtsam/linear/NoiseModel.h"
-#include "gtsam/navigation/GPSFactor.h"
-#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
-#include "gtsam/nonlinear/NonlinearFactorGraph.h"
-#include "gtsam/nonlinear/Values.h"
-#include "gtsam/slam/BetweenFactor.h"
-#include "gtsam/slam/PriorFactor.h"
-#include "gtsam/slam/ProjectionFactor.h"
-#include "opencv2/opencv.hpp"
-#include "visualization/opencv/opencv_viz.hh"
-
-namespace std {
-template <>
-struct hash<gtsam::Symbol> {
-    size_t operator()(const gtsam::Symbol &s) const { return hash<uint64_t>()(s.key()); }
-};
-}  // namespace std
-
-namespace robot::experimental::learn_descriptors {
-class SFMMvpHelper {
-   public:
-    // struct SymbolHasher {
-    //     size_t operator()(const gtsam::Symbol &s) const { return std::hash<uint64_t>()(s.key());
-    //     }
-    // };
-
-    static bool attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
-                                    const std::vector<gtsam::Point2> &cam_obs,
-                                    gtsam::Cal3_S2::shared_ptr K,
-                                    gtsam::Point3 &out_p_world_landmark) {
-        bool valid = true;
-        if (cam_poses.size() >= 2) {
-            try {
-                // Attempt triangulation using DLT (or the GTSAM provided method)
-                gtsam::Point3 p_world_lmk = gtsam::triangulatePoint3(
-                    cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end()));
-
-                out_p_world_landmark = p_world_lmk;
-
-                // Optional: perform an explicit cheirality check
-                for (const auto &pose : cam_poses) {
-                    // Transform point to the camera coordinate system.
-                    // transformTo() converts a world point to the camera frame.
-                    gtsam::Point3 p_cam_lmk = pose.transformTo(p_world_lmk);
-                    if (p_cam_lmk.z() <= 0) {  // Check that the depth is positive
-                        valid = false;
-                        break;
-                    }
-                }
-            } catch (const gtsam::TriangulationCheiralityException &e) {
-                // Handle the exception gracefully by logging and retaining the previous
-                // estimate.
-                std::cerr << "Triangulation Cheirality Exception: " << e.what()
-                          << ". Keeping initial landmark estimate." << std::endl;
-            }
-        } else {
-            valid = false;
-        }
-        return valid;
-    }
-
-    static void graph_values(const gtsam::Values &values, const std::string &window_name,
-                             const std::vector<gtsam::Symbol> &symbols_pose,
-                             const std::vector<gtsam::Symbol> &symbols_landmarks) {
-        std::vector<Eigen::Isometry3d> final_poses;
-        std::vector<Eigen::Vector3d> final_lmks;
-        for (const gtsam::Symbol &symbol_pose : symbols_pose) {
-            final_poses.emplace_back(values.at<gtsam::Pose3>(symbol_pose).matrix());
-        }
-        for (const gtsam::Symbol &symbol_lmk : symbols_landmarks) {
-            if (!values.exists(symbol_lmk)) {
-                std::cout << "WTF " << symbol_lmk << std::endl;
-            }
-            final_lmks.emplace_back(values.at<gtsam::Point3>(symbol_lmk));
-        }
-        geometry::viz_scene(final_poses, final_lmks, cv::viz::Color::black(), true, true,
-                            window_name);
-    }
-
-    static gtsam::Values optimize_graph(const gtsam::NonlinearFactorGraph &graph,
-                                        const gtsam::Values &values,
-                                        const std::vector<gtsam::Symbol> &symbols_pose,
-                                        const std::vector<gtsam::Symbol> &symbols_landmarks,
-                                        bool viz_itr = false) {
-        gtsam::LevenbergMarquardtParams params;
-        params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
-        params.maxIterations = 1;
-        gtsam::LevenbergMarquardtOptimizer optimizer(graph, values, params);
-
-        double prev_error = optimizer.error();
-        const int num_steps = 5;
-        typedef int epoch;
-        std::function<void(const gtsam::Values &, const epoch, const std::vector<gtsam::Symbol> &,
-                           const std::vector<gtsam::Symbol> &)>
-            graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter,
-                                       const std::vector<gtsam::Symbol> &symbols_pose,
-                                       const std::vector<gtsam::Symbol> &symbols_landmarks) {
-                std::cout << "iteration " << iter << " complete!";
-                std::string window_name = "Iteration_" + std::to_string(iter);
-                SFMMvpHelper::graph_values(vals, window_name, symbols_pose, symbols_landmarks);
-            };
-
-        for (int i = 0; i < num_steps; i++) {
-            optimizer.iterate();
-            double curr_error = optimizer.error();
-
-            if (viz_itr) {
-                graph_itr_debug_func(optimizer.values(), i, symbols_pose, symbols_landmarks);
-            }
-            if (std::abs(prev_error - curr_error) < 1e-6) {
-                std::cout << "Converged at iteration " << i << "\n";
-                break;
-            }
-        }
-        return optimizer.values();
-    }
-
-    static gtsam::Pose3 averagePoses(const std::vector<gtsam::Pose3> &poses, int maxIter = 10) {
-        if (poses.empty()) throw std::runtime_error("Empty pose vector");
-
-        gtsam::Pose3 mean = poses[0];
-
-        for (int iter = 0; iter < maxIter; ++iter) {
-            gtsam::Vector6 total = gtsam::Vector6::Zero();
-
-            for (const auto &pose : poses) {
-                gtsam::Pose3 delta = mean.between(pose);
-                total += gtsam::Pose3::Logmap(delta);
-            }
-
-            total /= poses.size();
-            mean = mean.compose(gtsam::Pose3::Expmap(total));
-        }
-
-        return mean;
-    }
-
-    static gtsam::Point3 averagePoints(const std::vector<gtsam::Point3> &points) {
-        if (points.empty()) throw std::runtime_error("Empty point vector");
-        gtsam::Point3 sum(0, 0, 0);
-        for (const auto &pt : points) sum += pt;
-        return sum / points.size();
-    }
-
-    static gtsam::Point3 get_variance(const std::vector<gtsam::Point3> &points) {
-        const gtsam::Point3 mean = SFMMvpHelper::averagePoints(points);
-        gtsam::Point3 var(0, 0, 0);
-        for (const gtsam::Point3 &pt : points) {
-            var += (mean - pt).array().square().matrix();
-        }
-        return var / points.size();
-    }
-};
-
-TEST(SFMMvp, sfm_building_manual_global) {
-    const std::vector<int> indices{60, 80, 100, 120, 140};
-    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
-    const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
-    const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
-    const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front());
-
-    // const size_t img_width = img_pt_first.width, img_height =
-    // img_pt_first.height;
-    const double fx = img_pt_first.fx, fy = img_pt_first.fy;
-    const double cx = img_pt_first.cx, cy = img_pt_first.cy;
-    gtsam::Cal3_S2::shared_ptr K = boost::make_shared<gtsam::Cal3_S2>(fx, fy, 0, cx, cy);
-    Eigen::Matrix<double, 5, 1> D =
-        (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2,
-         SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3)
-            .finished();
-    cv::Mat K_mat = (cv::Mat_<double>(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1);
-    cv::Mat D_mat = (cv::Mat_<double>(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0));
-
-    // let world be the first boat base recorded. T_world_camera0 = earth_from_boat0
-    // * boat_from_camera
-    Eigen::Isometry3d earth_from_boat0 = DataParser::get_world_from_boat(img_pt_first);
-    Eigen::Isometry3d world_from_boat0;
-    world_from_boat0.linear() = earth_from_boat0.linear();
-    Eigen::Isometry3d T_world_camera0 =
-        world_from_boat0 * DataParser::get_boat_from_camera(img_pt_first);
-    // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
-    //                         gtsam::Pose3(T_world_camera0.matrix()));
-    Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN);
-
-    gtsam::Values initial_estimate_;
-    gtsam::NonlinearFactorGraph graph_;
-
-    gtsam::noiseModel::Isotropic::shared_ptr landmark_noise =
-        gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
-    gtsam::Vector6 prior_sigmas;
-    prior_sigmas << 0.04, 0.04, 0.09, 0.01, 0.01, 0.01;
-    gtsam::Vector3 gps_sigmas;
-    gps_sigmas << 2.1, 2.1, 0.1;
-    gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise =
-        gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas);
-    gtsam::noiseModel::Diagonal::shared_ptr gps_noise =
-        gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas);
-
-    // populate frames and cam_poses
-    std::vector<Frame> frames;
-    FrameId id = 0;
-    std::unordered_map<FrameId, gtsam::Pose3> cam_pose;
-    std::vector<Eigen::Isometry3d> cam_isometries;
-    for (const int &idx : indices) {
-        const cv::Mat img = survey.loadImageByImageIndex(idx);
-        cv::Mat img_undistorted;
-        cv::undistort(img, img_undistorted, K_mat, D_mat);
-        const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);
-
-        Eigen::Isometry3d world_from_boat = DataParser::get_world_from_boat(img_pt);
-        world_from_boat.translation() -= earth_from_boat0.translation();
-        Eigen::Isometry3d T_world_cam = world_from_boat * DataParser::get_boat_from_camera(img_pt);
-        gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix());
-        cam_pose.emplace(id, T_world_cam_gtsam);
-        cam_isometries.push_back(T_world_cam);
-
-        // graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(gtsam::Symbol('x', id),
-        //                                                         T_world_cam_gtsam, pose_noise);
-
-        Frame frame{id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix())};
-
-        std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
-            frontend.extract_features(img_undistorted);
-        KeypointsCV kpts;
-        for (const cv::KeyPoint &kpt : kpts_descs.first) {
-            kpts.push_back(kpt.pt);
-        }
-        frame.add_keypoints(kpts);
-        frame.assign_descriptors(kpts_descs.second);
-
-        frames.push_back(frame);
-
-        id++;
-    }
-
-    geometry::viz_scene(cam_isometries, std::vector<Eigen::Vector3d>());
-
-    // populate feature_tracks and lmk_id_map
-    FeatureTracks feature_tracks;
-    FrameLandmarkIdMap lmk_id_map;
-    LandmarkId lmk_id = 0;
-    for (size_t i = 0; i < indices.size() - 1; i++) {
-        std::vector<cv::DMatch> matches =
-            frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors());
-        // frontend.enforce_bijective_matches(matches);
-        frontend.enforce_bijective_buffer_matches(matches);
-        for (const cv::DMatch match : matches) {
-            const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx];
-            const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx];
-
-            auto key = std::make_pair(i, kpt_cam0);
-            if (lmk_id_map.find(key) != lmk_id_map.end()) {
-                auto id = lmk_id_map.at(key);
-                feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0);
-                feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1);
-            } else {
-                FeatureTrack feature_track(i, kpt_cam0);
-                feature_track.obs_.emplace_back(i + 1, kpt_cam1);
-                feature_tracks.emplace(lmk_id, feature_track);
-                lmk_id_map.emplace(std::make_pair(i, kpt_cam0), lmk_id);
-                lmk_id++;
-            }
-        }
-    }
-
-    // populate graph
-    std::vector<gtsam::Symbol> symbols_pose;
-    std::vector<gtsam::Symbol> symbols_landmarks;
-    for (const std::pair<LandmarkId, FeatureTrack> lmk_feat : feature_tracks) {
-        std::vector<gtsam::Pose3> feat_cam_poses;
-        std::vector<gtsam::Point2> feat_kpts;
-        LandmarkId lmk_id = lmk_feat.first;
-        FeatureTrack feature_track = lmk_feat.second;
-        const gtsam::Symbol symbol_lmk('l', lmk_id);
-        symbols_landmarks.push_back(symbol_lmk);
-        for (const std::pair<FrameId, KeypointCV> &feat_track : feature_track.obs_) {
-            initial_estimate_.insert_or_assign(symbol_lmk, gtsam::Point3(0, 0, 0));
-            graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-                gtsam::Point2(feat_track.second.x, feat_track.second.y), landmark_noise,
-                gtsam::Symbol('x', feat_track.first), symbol_lmk, K);
-
-            feat_cam_poses.push_back(cam_pose[feat_track.first]);
-            feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y));
-        }
-        gtsam::Point3 p_wrld_kpt;
-        bool triangulate_success =
-            SFMMvpHelper::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt);
-        if (triangulate_success) {
-            initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt);
-        }
-    }
-    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(gtsam::Symbol('x', 0), cam_pose.at(0),
-                                                            prior_pose_noise);
-    initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), cam_pose.at(0));
-    symbols_pose.push_back(gtsam::Symbol('x', 0));
-    for (const std::pair<FrameId, gtsam::Pose3> cam_id_pose : cam_pose) {
-        if (cam_id_pose.first == 0) {
-            continue;
-        }
-        const gtsam::Symbol key_cam('x', cam_id_pose.first);
-        symbols_pose.push_back(key_cam);
-        gtsam::Point3 p_wrld_cam = cam_id_pose.second.translation();
-        graph_.emplace_shared<gtsam::GPSFactor>(key_cam, p_wrld_cam, gps_noise);
-        initial_estimate_.insert_or_assign(key_cam, cam_id_pose.second);
-        // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_wrld_cam));
-    }
-
-    // initial_estimate_.print("huh");
-    SFMMvpHelper::graph_values(initial_estimate_, "Confirmation", symbols_pose, symbols_landmarks);
-
-    gtsam::LevenbergMarquardtParams params;
-    params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
-    params.maxIterations = 1;          // We'll manually step it
-    gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, params);
-
-    double prev_error = optimizer.error();
-    const int num_steps = 5;
-    const bool viz_itr = true;
-    typedef int epoch;
-    std::function<void(const gtsam::Values &, const epoch, const std::vector<gtsam::Symbol> &,
-                       const std::vector<gtsam::Symbol> &)>
-        graph_itr_debug_func = [&](const gtsam::Values &vals, const epoch iter,
-                                   const std::vector<gtsam::Symbol> &symbols_pose,
-                                   const std::vector<gtsam::Symbol> &symbols_landmarks) {
-            std::cout << "iteration " << iter << " complete!";
-            std::string window_name = "Iteration_" + std::to_string(iter);
-            SFMMvpHelper::graph_values(vals, window_name, symbols_pose, symbols_landmarks);
-        };
-
-    SFMMvpHelper::graph_values(initial_estimate_, "Initial Graph", symbols_pose, symbols_landmarks);
-    for (int i = 0; i < num_steps; i++) {
-        optimizer.iterate();
-        double curr_error = optimizer.error();
-
-        if (viz_itr) {
-            graph_itr_debug_func(optimizer.values(), i, symbols_pose, symbols_landmarks);
-        }
-        if (std::abs(prev_error - curr_error) < 1e-6) {
-            std::cout << "Converged at iteration " << i << "\n";
-            break;
-        }
-    }
-    const gtsam::Values result = optimizer.values();
-}
-
-TEST(SFMMvp, sfm_building_manual_incremental) {
-    const std::vector<int> indices{60, 80, 100, 120, 140};
-    // const std::vector<int> indices = []() {
-    //     std::vector<int> tmp;
-    //     for (int i = 0; i < 200; i += 10) {
-    //         tmp.push_back(i);
-    //     }
-    //     return tmp;
-    // }();
-    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
-    const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
-    const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
-    const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front());
-
-    // const size_t img_width = img_pt_first.width, img_height =
-    // img_pt_first.height;
-    const double fx = img_pt_first.fx, fy = img_pt_first.fy;
-    const double cx = img_pt_first.cx, cy = img_pt_first.cy;
-    gtsam::Cal3_S2::shared_ptr K = boost::make_shared<gtsam::Cal3_S2>(fx, fy, 0, cx, cy);
-    Eigen::Matrix<double, 5, 1> D =
-        (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2,
-         SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3)
-            .finished();
-    cv::Mat K_mat = (cv::Mat_<double>(3, 3) << K->fx(), 0, K->px(), 0, K->fy(), K->py(), 0, 0, 1);
-    cv::Mat D_mat = (cv::Mat_<double>(5, 1) << D(0, 0), D(1, 0), D(2, 0), D(3, 0), D(4, 0));
-
-    // let world be the first boat base recorded. T_world_camera0 = earth_from_boat0
-    Eigen::Isometry3d earth_from_boat0 = DataParser::get_world_from_boat(img_pt_first);
-    Eigen::Isometry3d world_from_boat0;
-    world_from_boat0.linear() = earth_from_boat0.linear();
-    Eigen::Isometry3d T_world_camera0 =
-        world_from_boat0 * DataParser::get_boat_from_camera(img_pt_first);
-    // StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
-    //                         gtsam::Pose3(T_world_camera0.matrix()));
-    Frontend frontend(Frontend::ExtractorType::SIFT, Frontend::MatcherType::KNN);
-
-    gtsam::Values initial_estimate_;
-    gtsam::NonlinearFactorGraph graph_;
-
-    gtsam::noiseModel::Isotropic::shared_ptr landmark_noise =
-        gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
-    gtsam::Vector6 prior_sigmas;
-    prior_sigmas << 0.04, 0.04, 0.09, 2.1, 2.1, 0.1;
-    gtsam::Vector3 gps_sigmas;
-    gps_sigmas << 2.1, 2.1, 0.1;
-    gtsam::noiseModel::Diagonal::shared_ptr prior_pose_noise =
-        gtsam::noiseModel::Diagonal::Sigmas(prior_sigmas);
-    gtsam::noiseModel::Diagonal::shared_ptr gps_noise =
-        gtsam::noiseModel::Diagonal::Sigmas(gps_sigmas);
-
-    // populate frames and cam_poses
-    std::vector<Frame> frames;
-    FrameId id = 0;
-    std::unordered_map<FrameId, gtsam::Pose3> cam_pose;
-    std::vector<Eigen::Isometry3d> cam_isometries;
-    for (const int &idx : indices) {
-        const cv::Mat img = survey.loadImageByImageIndex(idx);
-        cv::Mat img_undistorted;
-        cv::undistort(img, img_undistorted, K_mat, D_mat);
-        const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);
-
-        Eigen::Isometry3d world_from_boat = DataParser::get_world_from_boat(img_pt);
-        world_from_boat.translation() -= earth_from_boat0.translation();
-        Eigen::Isometry3d T_world_cam = world_from_boat * DataParser::get_boat_from_camera(img_pt);
-        gtsam::Pose3 T_world_cam_gtsam(T_world_cam.matrix());
-        cam_pose.emplace(id, T_world_cam_gtsam);
-        cam_isometries.push_back(T_world_cam);
-
-        Frame frame(id, img_undistorted, K, gtsam::Pose3(T_world_cam.matrix()));
-
-        std::pair<std::vector<cv::KeyPoint>, cv::Mat> kpts_descs =
-            frontend.extract_features(img_undistorted);
-        KeypointsCV kpts;
-        for (const cv::KeyPoint &kpt : kpts_descs.first) {
-            kpts.push_back(kpt.pt);
-        }
-        frame.add_keypoints(kpts);
-        frame.assign_descriptors(kpts_descs.second);
-
-        frames.push_back(frame);
-
-        id++;
-    }
-
-    geometry::viz_scene(cam_isometries, std::vector<Eigen::Vector3d>());
-
-    // populate feature_tracks and lmk_id_map
-    FeatureTracks feature_tracks;
-    FrameLandmarkIdMap lmk_id_map;
-    LandmarkId lmk_id = 0;
-    for (size_t i = 0; i < indices.size() - 1; i++) {
-        std::vector<cv::DMatch> matches =
-            frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors());
-        frontend.enforce_bijective_buffer_matches(matches);
-        for (const cv::DMatch match : matches) {
-            const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx];
-            const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx];
-
-            auto key = std::make_pair(frames[i].id_, kpt_cam0);
-            if (lmk_id_map.find(key) != lmk_id_map.end()) {
-                auto id = lmk_id_map.at(key);
-                feature_tracks.at(id).obs_.emplace_back(frames[i].id_, kpt_cam0);
-                feature_tracks.at(id).obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
-            } else {
-                FeatureTrack feature_track(i, kpt_cam0);
-                feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
-                feature_tracks.emplace(lmk_id, feature_track);
-                lmk_id_map.emplace(std::make_pair(frames[i].id_, kpt_cam0), lmk_id);
-                lmk_id++;
-            }
-        }
-    }
-
-    // triangulate all of the points
-
-    std::unordered_map<LandmarkId, gtsam::Point3> lmk_triangulated_map;
-    std::vector<gtsam::Point3> triangulated_lmks;
-    for (const std::pair<LandmarkId, FeatureTrack> lmk_feat : feature_tracks) {
-        std::vector<gtsam::Pose3> feat_cam_poses;
-        std::vector<gtsam::Point2> feat_kpts;
-        LandmarkId lmk_id = lmk_feat.first;
-        FeatureTrack feature_track = lmk_feat.second;
-        gtsam::Point3 p_wrld_kpt(0, 0, 0);
-        for (const std::pair<FrameId, KeypointCV> &feat_track : feature_track.obs_) {
-            feat_cam_poses.push_back(cam_pose[feat_track.first]);
-            feat_kpts.push_back(gtsam::Point2(feat_track.second.x, feat_track.second.y));
-        }
-        bool triangulate_success =
-            SFMMvpHelper::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt);
-        if (triangulate_success) {
-            lmk_triangulated_map.emplace(lmk_id, p_wrld_kpt);
-            triangulated_lmks.push_back(p_wrld_kpt);
-        } else {
-            continue;
-        }
-    }
-    // filter points
-    geometry::viz_scene(std::vector<Eigen::Isometry3d>(), triangulated_lmks,
-                        cv::viz::Color::black(), true, true, "Unfiltered points");
-    const gtsam::Point3 variance_pts = SFMMvpHelper::get_variance(triangulated_lmks);
-    const gtsam::Point3 std_dev_pts = variance_pts.array().sqrt().matrix();
-    const gtsam::Point3 mean_pts = SFMMvpHelper::averagePoints(triangulated_lmks);
-    std::unordered_map<LandmarkId, gtsam::Point3> lmk_triangulated_map_filtered;
-    std::vector<gtsam::Point3> filtered_points;
-    std::cout << "original var " << variance_pts << std::endl;
-    for (const std::pair<LandmarkId, gtsam::Point3> lmk_id_pt : lmk_triangulated_map) {
-        const gtsam::Point3 dist_mean = (lmk_id_pt.second - mean_pts).array().abs().matrix();
-        if ((dist_mean.array() <= (3 * std_dev_pts).array()).all()) {
-            lmk_triangulated_map_filtered.emplace(lmk_id_pt);
-            filtered_points.push_back(lmk_id_pt.second);
-        }
-    }
-    std::cout << "filtered variance " << SFMMvpHelper::get_variance(filtered_points) << std::endl;
-    geometry::viz_scene(std::vector<Eigen::Isometry3d>(), filtered_points, cv::viz::Color::black(),
-                        true, true, "Unfiltered points");
-
-    // add filtered points to graph
-    std::unordered_map<gtsam::Symbol, std::vector<gtsam::Pose3>> symbols_poses_values_iter;
-    std::unordered_map<gtsam::Symbol, std::vector<gtsam::Point3>> symbols_landmarks_values_iter;
-    std::vector<gtsam::Symbol> symbols_pose;
-    std::vector<gtsam::Symbol> symbols_landmarks;
-    for (const std::pair<LandmarkId, gtsam::Point3> lmk_id_pt : lmk_triangulated_map_filtered) {
-        LandmarkId lmk_id = lmk_id_pt.first;
-        const gtsam::Point3 p_wrld_kpt = lmk_id_pt.second;
-        FeatureTrack feature_track = feature_tracks.at(lmk_id);
-        const gtsam::Symbol symbol_lmk('l', lmk_id);
-        for (const std::pair<FrameId, KeypointCV> &feat_track : feature_track.obs_) {
-            initial_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt);
-            symbols_landmarks_values_iter.emplace(symbol_lmk,
-                                                  std::vector<gtsam::Point3>{p_wrld_kpt});
-            graph_.emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-                gtsam::Point2(feat_track.second.x, feat_track.second.y), landmark_noise,
-                gtsam::Symbol('x', feat_track.first), symbol_lmk, K);
-        }
-    }
-    graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(gtsam::Symbol('x', 0), cam_pose.at(0),
-                                                            prior_pose_noise);
-    initial_estimate_.insert_or_assign(gtsam::Symbol('x', 0), cam_pose.at(0));
-    symbols_pose.push_back(gtsam::Symbol('x', 0));
-    symbols_poses_values_iter.emplace(gtsam::Symbol('x', 0),
-                                      std::vector<gtsam::Pose3>{cam_pose.at(0)});
-    for (const std::pair<FrameId, gtsam::Pose3> cam_id_pose : cam_pose) {
-        if (cam_id_pose.first == 0) {
-            continue;
-        }
-        const gtsam::Symbol key_cam('x', cam_id_pose.first);
-        symbols_pose.push_back(key_cam);
-        gtsam::Point3 p_wrld_cam = cam_id_pose.second.translation();
-        graph_.emplace_shared<gtsam::GPSFactor>(key_cam, p_wrld_cam, gps_noise);
-        initial_estimate_.insert_or_assign(key_cam, cam_id_pose.second);
-        symbols_poses_values_iter.emplace(key_cam, std::vector<gtsam::Pose3>{cam_id_pose.second});
-        // initial_estimate_.insert_or_assign(key_cam, gtsam::Pose3(gtsam::Rot3(), p_wrld_cam));
-    }
-
-    // SFMMvpHelper::graph_values(initial_estimate_, "Confirmation", symbols_pose,
-    // symbols_landmarks);
-
-    // do local optimizations and add to iter cache
-    // const int window = 2;
-    std::cout << "length of cam_poses: " << cam_pose.size() << std::endl;
-    for (const auto &id_pose : cam_pose) {
-        std::cout << "id: " << id_pose.first << "\tpose: " << id_pose.second << std::endl;
-    }
-    for (size_t i = 0; i < indices.size() - 1; i++) {
-        std::cout << "Local optimization " << i << std::endl;
-        gtsam::Values local_estimate_;
-        gtsam::NonlinearFactorGraph local_graph_;
-
-        std::vector<gtsam::Symbol> poses{gtsam::Symbol('x', i), gtsam::Symbol('x', i + 1)};
-        std::vector<gtsam::Symbol> lmks;
-
-        local_graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
-            gtsam::PriorFactor<gtsam::Pose3>(poses[0], cam_pose.at(i), prior_pose_noise));
-        std::cout << "fuck" << std::endl;
-        local_graph_.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(
-            gtsam::PriorFactor<gtsam::Pose3>(poses[1], cam_pose.at(i + 1), prior_pose_noise));
-        std::cout << "mega fuck" << std::endl;
-        local_estimate_.insert_or_assign(poses[0], cam_pose.at(i));
-        local_estimate_.insert_or_assign(poses[1], cam_pose.at(i + 1));
-
-        std::vector<cv::DMatch> matches =
-            frontend.compute_matches(frames[i].descriptors(), frames[i + 1].descriptors());
-        frontend.enforce_bijective_matches(matches);
-        std::vector<gtsam::Pose3> feat_cam_poses{cam_pose.at(i), cam_pose.at(i + 1)};
-
-        std::vector<Eigen::Isometry3d> viz_poses{Eigen::Isometry3d(feat_cam_poses[0].matrix()),
-                                                 Eigen::Isometry3d(feat_cam_poses[1].matrix())};
-        std::vector<Eigen::Vector3d> viz_lmks;
-        for (const cv::DMatch match : matches) {
-            std::vector<gtsam::Point2> feat_kpts;
-            const KeypointCV kpt_cam0 = frames[i].keypoint()[match.queryIdx];
-            const KeypointCV kpt_cam1 = frames[i + 1].keypoint()[match.trainIdx];
-            feat_kpts.emplace_back(kpt_cam0.x, kpt_cam0.y);
-            feat_kpts.emplace_back(kpt_cam1.x, kpt_cam1.y);
-
-            auto key = std::make_pair(frames[i].id_, kpt_cam0);
-            if (lmk_id_map.find(key) != lmk_id_map.end()) {
-                auto id = lmk_id_map.at(key);
-                if (lmk_triangulated_map_filtered.find(id) == lmk_triangulated_map_filtered.end()) {
-                    continue;
-                }
-                std::cout << "good" << std::endl;
-                // do nothing
-                // feature_tracks.at(id).obs_.emplace_back(i, kpt_cam0);
-                // feature_tracks.at(id).obs_.emplace_back(i + 1, kpt_cam1);
-            } else {
-                std::cerr << "this shouldn't happen right?" << std::endl;
-                FeatureTrack feature_track(frames[i].id_, kpt_cam0);
-                feature_track.obs_.emplace_back(frames[i + 1].id_, kpt_cam1);
-                feature_tracks.emplace(lmk_id, feature_track);
-                lmk_id_map.emplace(key, lmk_id);
-                lmk_id++;
-            }
-            std::cout << "oog" << std::endl;
-            const gtsam::Symbol symbol_lmk =
-                gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0)));
-            // if (gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam0))) !=
-            //     gtsam::Symbol('l', lmk_id_map.at(std::make_pair(frames[i].id_, kpt_cam1)))) {
-            //     std::cerr << "UH OH" << std::endl;
-            // } else {
-            //     std::cout << "cool" << std::endl;
-            // }
-            lmks.push_back(symbol_lmk);
-            local_graph_
-                .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-                    feat_kpts[0], landmark_noise, poses[0], symbol_lmk, K);
-            local_graph_
-                .emplace_shared<gtsam::GenericProjectionFactor<gtsam::Pose3, gtsam::Point3>>(
-                    feat_kpts[1], landmark_noise, poses[1], symbol_lmk, K);
-
-            gtsam::Point3 p_wrld_kpt;
-            bool triangulate_success =
-                SFMMvpHelper::attempt_triangulate(feat_cam_poses, feat_kpts, K, p_wrld_kpt);
-            if (triangulate_success) {
-                local_estimate_.insert_or_assign(symbol_lmk, p_wrld_kpt);
-                viz_lmks.push_back(p_wrld_kpt);
-            }
-            if (symbols_landmarks_values_iter.find(symbol_lmk) !=
-                symbols_landmarks_values_iter.end()) {
-                symbols_landmarks_values_iter[symbol_lmk].push_back(p_wrld_kpt);
-            } else {
-                symbols_landmarks_values_iter.emplace(symbol_lmk,
-                                                      std::vector<gtsam::Point3>{p_wrld_kpt});
-            }
-        }
-        std::cout << "setup complete!" << std::endl;
-        // geometry::viz_scene(viz_poses, viz_lmks, true, true,
-        //                     "Local Optimization " + std::to_string(i));
-
-        const gtsam::Values symbols_result_local = SFMMvpHelper::optimize_graph(
-            local_graph_, local_estimate_, symbols_pose, symbols_landmarks, false);
-
-        for (const gtsam::Symbol &symbol_pose : poses) {
-            const gtsam::Pose3 T_wrld_cam = symbols_result_local.at<gtsam::Pose3>(symbol_pose);
-            symbols_poses_values_iter.at(symbol_pose).push_back(T_wrld_cam);
-        }
-        for (const gtsam::Symbol &symbol_lmk : lmks) {
-            const gtsam::Point3 p_wrld_lmk = symbols_result_local.at<gtsam::Point3>(symbol_lmk);
-            symbols_landmarks_values_iter.at(symbol_lmk).push_back(p_wrld_lmk);
-        }
-    }
-
-    std::cout << "\nLocal Optimizations Complete!\n" << std::endl;
-
-    for (std::pair<gtsam::Symbol, std::vector<gtsam::Pose3>> sym_pose : symbols_poses_values_iter) {
-        initial_estimate_.insert_or_assign(sym_pose.first,
-                                           SFMMvpHelper::averagePoses(sym_pose.second));
-    }
-    for (std::pair<gtsam::Symbol, std::vector<gtsam::Point3>> sym_lmk :
-         symbols_landmarks_values_iter) {
-        initial_estimate_.insert_or_assign(sym_lmk.first,
-                                           SFMMvpHelper::averagePoints(sym_lmk.second));
-    }
-
-    // do global optimization
-    const gtsam::Values result =
-        SFMMvpHelper::optimize_graph(graph_, initial_estimate_, symbols_pose, symbols_landmarks);
-    std::cout << "about to visualize result" << std::endl;
-    SFMMvpHelper::graph_values(result, "Result", symbols_pose, symbols_landmarks);
-}
-
-}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/structure_from_motion_example.cc b/experimental/learn_descriptors/structure_from_motion_example.cc
index 95c45a650..a6989ca95 100644
--- a/experimental/learn_descriptors/structure_from_motion_example.cc
+++ b/experimental/learn_descriptors/structure_from_motion_example.cc
@@ -53,12 +53,10 @@ int main(int argc, const char **argv) {
                                    FrontendParams::MatcherType::KNN, true, false};
     StructureFromMotion sfm(frontend_params);
 
-    // for (size_t i = 633; i < 1000; i += 1) {
-    //     const ImagePointFourSeasons img_pt = parser.get_image_point(i);
-    //     sfm.add_image_point(parser.load_image(i),
-    //     std::make_shared<ImagePointFourSeasons>(img_pt));
-    // }
-    for (size_t i = 835; i < 900; i += 4) {
+    // short sequences (maybe about a doezen points that have decent overlap and aren't spaced too
+    // far apart on the order of maybe a meter) perform ok. many improvements needed for the overall
+    // system
+    for (size_t i = 835; i < 855; i += 2) {
         const ImagePointFourSeasons img_pt = parser.get_image_point(i);
         sfm.add_image_point(parser.load_image(i), std::make_shared<ImagePointFourSeasons>(img_pt));
     }

From a17767f081be4775566b5d508fafa77e01c8df49 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Thu, 24 Jul 2025 12:47:13 -0400
Subject: [PATCH 41/45] bug fixes

---
 experimental/learn_descriptors/BUILD          |  55 +------
 .../learn_descriptors/backend_test.cc         |  56 -------
 .../learn_descriptors/four_seasons_parser.hh  |   2 +-
 .../four_seasons_parser_example_viz.cc        |   9 +-
 .../learn_descriptors/frontend_test.cc        |  43 +++---
 .../structure_from_motion_test.cc             | 140 ------------------
 6 files changed, 30 insertions(+), 275 deletions(-)
 delete mode 100644 experimental/learn_descriptors/backend_test.cc
 delete mode 100644 experimental/learn_descriptors/structure_from_motion_test.cc

diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index 6f710c081..3f1229b82 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -37,22 +37,6 @@ cc_library(
     ],
 )
 
-cc_test(
-    name = "structure_from_motion_test",
-    srcs = ["structure_from_motion_test.cc"],
-    copts = [
-        "-Wno-error=unused-parameter",
-    ],
-    deps = [
-        ":frame",
-        ":structure_from_motion",
-        ":symphony_lake_parser",
-        "//common/geometry:translate_types",
-        "//visualization/opencv:opencv_viz",
-        "@com_google_googletest//:gtest_main",
-    ],
-)
-
 cc_library(
     name = "symphony_lake_parser",
     srcs = ["symphony_lake_parser.cc"],
@@ -166,20 +150,8 @@ cc_library(
     deps = [
         ":camera_calibration",
         ":gps_data",
-        "//common/liegroups:se3",
         "@com_google_absl//absl/strings:str_format",
-        "@geographiclib",
         "@nmea",
-    ],
-      "@eigen",
-      "@opencv//:opencv",
-      "//common/liegroups:se3",
-      "//common/time:robot_time",
-      "@com_google_absl//absl/strings:str_format",
-      ":image_point",
-      "@nmea",
-      "@geographiclib",
-      "//common:check"
     ]
 )
 
@@ -262,16 +234,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "feature_set",
-    hdrs = ["feature_set.hh"],
-    visibility = ["//visibility:public"],
-    deps = [
-        "@gtsam",
-        "@opencv",
-    ],
-)
-
 cc_library(
     name = "spatial_test_scene",
     srcs = ["spatial_test_scene.cc"],
@@ -332,6 +294,7 @@ cc_test(
     name = "frontend_test",
     srcs = ["frontend_test.cc"],
     deps = [
+        ":frame",
         ":frontend",
         ":image_point",
         "@com_google_googletest//:gtest_main",
@@ -359,22 +322,6 @@ cc_library(
     ],
 )
 
-cc_test(
-    name = "backend_test",
-    srcs = ["backend_test.cc"],
-    copts = [
-        "-Wno-error=unused-parameter",
-    ],
-    deps = [
-        ":backend",
-        ":feature_manager",
-        ":spatial_test_scene_cube",
-        "//visualization/opencv:opencv_viz",
-        "@com_google_googletest//:gtest_main",
-        "@opencv",
-    ],
-)
-
 cc_library(
     name = "camera_calibration",
     hdrs = ["camera_calibration.hh"],
diff --git a/experimental/learn_descriptors/backend_test.cc b/experimental/learn_descriptors/backend_test.cc
deleted file mode 100644
index 48fe2c450..000000000
--- a/experimental/learn_descriptors/backend_test.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-#include "experimental/learn_descriptors/backend.hh"
-
-#include <cstddef>
-
-#include "experimental/learn_descriptors/feature_manager.hh"
-#include "experimental/learn_descriptors/spatial_test_scene_cube.hh"
-#include "gtest/gtest.h"
-#include "visualization/opencv/opencv_viz.hh"
-
-namespace robot::experimental::learn_descriptors {
-TEST(BackendTest, cube) {
-    // SpatialTestSceneCube test_scene(1.0f);
-
-    const size_t img_width = 640;
-    const size_t img_height = 480;
-    const cv::Size img_size(640, 480);
-    const double fx = 500.0;
-    const double fy = fx;
-    const double cx = img_width / 2.0;
-    const double cy = img_height / 2.0;
-    gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy));
-
-    // Backend backend(std::make_shared<FeatureManager>(), *K);
-    // Backend backend(K);
-    // test_scene.add_rand_cameras_face_origin(7, 2.0, 6.0, *K);
-
-    // std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras = test_scene.cameras()();
-    // std::vector<Eigen::Isometry3d> camera_poses;
-    // camera_poses.reserve(cameras.size());
-    // for (const gtsam::PinholeCamera<gtsam::Cal3_S2> &T_world_cam : cameras) {
-    //     camera_poses.emplace_back(T_world_cam.pose().matrix());
-    // }
-
-    // backend.add_prior_factor(gtsam::Symbol(backend.symbol_char_cam_cal, 0), camera_poses.front(),
-    //                          backend.get_pose_noise());
-    // for (size_t i = 1; i < camera_poses.size(); i++) {
-    //     backend.add_factor_GPS(gtsam::Symbol(backend.symbol_char_cam_cal, i),
-    //                            camera_poses[i].translation(), backend.get_gps_noise());
-
-    //     std::pair<gtsam::PinholeCamera<gtsam::Cal3_S2>, cv::Size> cam_and_img_sizes[2] = {
-    //         std::pair<gtsam::PinholeCamera<gtsam::Cal3_S2>, cv::Size>(cameras[i - 1], img_size),
-    //         std::pair<gtsam::PinholeCamera<gtsam::Cal3_S2>, cv::Size>(cameras[i], img_size)};
-    // std::vector<std::pair<SpatialTestScene::ProjectedPoint,
-    // SpatialTestScene::ProjectedPoint>>
-    //     lmks = test_scene.get_corresponding_pixels(cam_and_img_sizes);
-    // for (const &std::pair < SpatialTestScene::ProjectedPoint,
-    //      SpatialTestScene::ProjectedPoint >> lmk : lmks) {
-    //     //     // Backend::Landmark()
-    // }
-    // }
-
-    // geometry::viz_scene(camera_poses, test_scene.points());
-}
-
-TEST(BackendTest, test_rotation_estimation) {}
-}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/four_seasons_parser.hh b/experimental/learn_descriptors/four_seasons_parser.hh
index 2d188f1b3..4458215c8 100644
--- a/experimental/learn_descriptors/four_seasons_parser.hh
+++ b/experimental/learn_descriptors/four_seasons_parser.hh
@@ -17,7 +17,7 @@ class FourSeasonsParser : std::enable_shared_from_this<FourSeasonsParser> {
    public:
     static constexpr double CAM_HZ = 30.0;
     static constexpr double CAM_CAP_DELTA_NS = 1e9 / CAM_HZ;
-  
+
     FourSeasonsParser(const std::filesystem::path& root_dir,
                       const std::filesystem::path& calibration_dir);
     std::shared_ptr<FourSeasonsParser> get_shared() { return shared_from_this(); }
diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
index 771ab7d77..ae9036342 100644
--- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
@@ -106,7 +106,7 @@ int main(int argc, const char** argv) {
                                           Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix()));
         viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i));
         Eigen::Isometry3d frontend_w_from_cam_groundtruth(
-            frontend.frames()[i].world_from_cam_groundtruth_->matrix());
+            frontend.frames()[i]->world_from_cam_groundtruth_->matrix());
         ROBOT_CHECK(frontend_w_from_cam_groundtruth.matrix().isApprox(w_from_gnss_cam.matrix()),
                     frontend_w_from_cam_groundtruth.matrix(), w_from_gnss_cam.matrix());
         // Eigen::Isometry3d frontend_w_from_cam_groundtruth(*img_pt.world_from_cam_ground_truth());
@@ -139,8 +139,9 @@ int main(int argc, const char** argv) {
                 altitude_gps_from_gnss_cam = gnss_cam_in_gcs.z() - *(img_pt.gps_gcs->altitude);
             }
             if (!first_gps_to_cam) {
-                first_gps_to_cam = frontend.frames()[i].world_from_cam_groundtruth_->translation() -
-                                   *frontend.frames()[i].cam_in_world_initial_guess_;
+                first_gps_to_cam =
+                    frontend.frames()[i]->world_from_cam_groundtruth_->translation() -
+                    *frontend.frames()[i]->cam_in_world_initial_guess_;
             }
             gcs_coordinate.z() += *altitude_gps_from_gnss_cam;
             std::cout << std::setprecision(20) << "gnss_cam_in_gcs: " << gnss_cam_in_gcs
@@ -158,7 +159,7 @@ int main(int argc, const char** argv) {
             Eigen::Vector3d cam_gps_in_w = gps_in_w.head<3>() - cam_from_gps.translation();
             viz_points.emplace_back(cam_gps_in_w, "x_cam_gps" + std::to_string(i));
             viz_points.emplace_back(
-                *frontend.frames()[i].cam_in_world_initial_guess_ + *first_gps_to_cam,
+                *frontend.frames()[i]->cam_in_world_initial_guess_ + *first_gps_to_cam,
                 "x_frontend_cam_gps" + std::to_string(i));
             const Eigen::Vector3d gps_from_ref_in_world =
                 gps_in_w.head<3>() - w_from_gnss_cam.translation();
diff --git a/experimental/learn_descriptors/frontend_test.cc b/experimental/learn_descriptors/frontend_test.cc
index a04a443ca..c5046fd1f 100644
--- a/experimental/learn_descriptors/frontend_test.cc
+++ b/experimental/learn_descriptors/frontend_test.cc
@@ -7,6 +7,8 @@
 #include <vector>
 
 #include "Eigen/Core"
+#include "experimental/learn_descriptors/frame.hh"
+#include "experimental/learn_descriptors/frontend.hh"
 #include "experimental/learn_descriptors/image_point.hh"
 #include "gtest/gtest.h"
 #include "opencv2/opencv.hpp"
@@ -51,10 +53,10 @@ TEST(FrontendTest, pipeline_sweep) {
     cv::warpAffine(image_1, image_1, rotation_matrix, image_1.size());
     cv::warpAffine(image_1, image_2, translation_mat, image_1.size());
 
-    cv::Mat img_test_disp;
-    cv::hconcat(image_1, image_2, img_test_disp);
-    cv::imshow("Test", img_test_disp);
-    cv::waitKey(1000);
+    // cv::Mat img_test_disp;
+    // cv::hconcat(image_1, image_2, img_test_disp);
+    // cv::imshow("Test", img_test_disp);
+    // cv::waitKey(1000);
 
     FrontendParams::ExtractorType extractor_types[2] = {FrontendParams::ExtractorType::SIFT,
                                                         FrontendParams::ExtractorType::ORB};
@@ -70,7 +72,7 @@ TEST(FrontendTest, pipeline_sweep) {
     std::vector<cv::DMatch> matches;
     cv::Mat img_keypoints_out_1(height, width, CV_8UC3),
         img_keypoints_out_2(height, width, CV_8UC3), img_matches_out(height, 2 * width, CV_8UC3);
-    cv::Mat img_display_test;
+    // cv::Mat img_display_test;
     for (FrontendParams::ExtractorType extractor_type : extractor_types) {
         for (FrontendParams::MatcherType matcher_type : matcher_types) {
             printf("started frontend combination: (%d, %d)\n", static_cast<int>(extractor_type),
@@ -93,15 +95,15 @@ TEST(FrontendTest, pipeline_sweep) {
                                     img_keypoints_out_2);
             frontend.draw_matches(image_1, keypoints_descriptors_pair_1.first, image_2,
                                   keypoints_descriptors_pair_2.first, matches, img_matches_out);
-            cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test);
-            cv::vconcat(img_display_test, img_matches_out, img_display_test);
-            std::stringstream text;
-            text << "Extractor " << static_cast<int>(extractor_type) << ", matcher "
-                 << static_cast<int>(matcher_type);
-            cv::putText(img_display_test, text.str(), cv::Point(20, height - 50),
-                        cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2, cv::LINE_AA);
-            cv::imshow("Keypoints and Matches Output.", img_display_test);
-            std::cout << "Hold spacebar to pause." << std::endl;
+            // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test);
+            // cv::vconcat(img_display_test, img_matches_out, img_display_test);
+            // std::stringstream text;
+            // text << "Extractor " << static_cast<int>(extractor_type) << ", matcher "
+            //      << static_cast<int>(matcher_type);
+            // cv::putText(img_display_test, text.str(), cv::Point(20, height - 50),
+            //             cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2, cv::LINE_AA);
+            // cv::imshow("Keypoints and Matches Output.", img_display_test);
+            // std::cout << "Hold spacebar to pause." << std::endl;
             while (cv::waitKey(1000) == 32) {
             }
             printf("completed frontend combination: (%d, %d)\n", static_cast<int>(extractor_type),
@@ -182,14 +184,15 @@ TEST(FrontendTest, interpolate_frames) {
         time += dt;
     }
     frontend.populate_frames();
-    std::optional<std::vector<Eigen::Vector3d>> interpolated_pts =
-        frontend.interpolated_initial_translations();
-    ROBOT_CHECK(interpolated_pts);
-    ROBOT_CHECK(interpolated_pts->size() == img_pts.size());
+    frontend.interpolate_frames();
+
+    const std::vector<SharedFrame> &shared_frames = frontend.frames();
     constexpr double TOL = 1e-5;
     for (size_t i = 0; i < pts_in_world.size(); i++) {
-        ROBOT_CHECK(pts_in_world[i].isApprox((*interpolated_pts)[i], TOL), i, pts_in_world[i],
-                    (*interpolated_pts)[i]);
+        const Frame &frame = *shared_frames[i];
+        ROBOT_CHECK(frame.cam_in_world_interpolated_guess_);
+        ROBOT_CHECK(pts_in_world[i].isApprox(*frame.cam_in_world_interpolated_guess_, TOL), i,
+                    pts_in_world[i], *frame.cam_in_world_interpolated_guess_);
     }
 }
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/structure_from_motion_test.cc b/experimental/learn_descriptors/structure_from_motion_test.cc
deleted file mode 100644
index 3063c2234..000000000
--- a/experimental/learn_descriptors/structure_from_motion_test.cc
+++ /dev/null
@@ -1,140 +0,0 @@
-#include "experimental/learn_descriptors/structure_from_motion.hh"
-
-#include <iostream>
-#include <sstream>
-#include <thread>
-
-#include "Eigen/Geometry"
-#include "common/geometry/translate_types.hh"
-#include "experimental/learn_descriptors/frame.hh"
-#include "experimental/learn_descriptors/symphony_lake_parser.hh"
-#include "gtest/gtest.h"
-#include "visualization/opencv/opencv_viz.hh"
-
-class GtsamTestHelper {
-   public:
-    static bool pixel_in_range(Eigen::Vector2d pixel, size_t img_width, size_t img_height) {
-        return pixel[0] > 0 && pixel[0] < img_width && pixel[1] > 0 && pixel[1] < img_height;
-    }
-};
-
-namespace robot::experimental::learn_descriptors {
-TEST(StructureFromMotiontest, sfm_snippet_small) {
-    const std::vector<int> indices{120, 130};  // 0-199
-    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
-    const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
-    const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
-    const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front());
-
-    // const size_t img_width = img_pt_first.width, img_height = img_pt_first.height;
-    const double fx = img_pt_first.fx, fy = img_pt_first.fy;
-    const double cx = img_pt_first.cx, cy = img_pt_first.cy;
-    gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
-    Eigen::Matrix<double, 5, 1> D =
-        (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2,
-         SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3)
-            .finished();
-
-    // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera
-    // T_earth_boat0 =
-    Eigen::Isometry3d T_earth_world = DataParser::get_world_from_boat(img_pt_first);
-    Eigen::Isometry3d T_world_camera0 = DataParser::get_boat_from_camera(img_pt_first);
-    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
-                            gtsam::Pose3(T_world_camera0.matrix()));
-
-    std::vector<cv::Mat> img_vector;
-    for (const int &idx : indices) {
-        const cv::Mat img = survey.loadImageByImageIndex(idx);
-        img_vector.push_back(img);
-        const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);
-        Eigen::Isometry3d T_earth_boat = DataParser::get_world_from_boat(img_pt);
-        Eigen::Isometry3d T_world_boat = T_earth_world.inverse() * T_earth_boat;
-        Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_boat_from_camera(img_pt);
-
-        sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix()));
-    }
-
-    const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values();
-
-    sfm.graph_values(initial_values, "initial_values");
-
-    std::cout << "Solving for structure!" << std::endl;
-
-    sfm.solve_structure();
-
-    std::cout << "Solution complete." << std::endl;
-
-    const gtsam::Values result_values = sfm.get_structure_result();
-    sfm.graph_values(result_values, "result_values");
-}
-
-TEST(StructureFromMotiontest, sfm_building) {
-    // indices 0-199
-    const std::vector<int> indices = []() {
-        std::vector<int> tmp;
-        for (int i = 0; i < 200; i += 10) {
-            tmp.push_back(i);
-        }
-        return tmp;
-    }();
-    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
-    const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
-    const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
-    const symphony_lake_dataset::ImagePoint img_pt_first = survey.getImagePoint(indices.front());
-
-    // const size_t img_width = img_pt_first.width, img_height = img_pt_first.height;
-    const double fx = img_pt_first.fx, fy = img_pt_first.fy;
-    const double cx = img_pt_first.cx, cy = img_pt_first.cy;
-    gtsam::Cal3_S2 K(fx, fy, 0, cx, cy);
-    Eigen::Matrix<double, 5, 1> D =
-        (Eigen::Matrix<double, 5, 1>() << SymphonyLakeCamParams::k1, SymphonyLakeCamParams::k2,
-         SymphonyLakeCamParams::p1, SymphonyLakeCamParams::p2, SymphonyLakeCamParams::k3)
-            .finished();
-
-    // let world be the first boat base recorded. T_world_camera0 = T_earth_boat0 * T_boat_camera
-    Eigen::Isometry3d T_earth_boat0 = DataParser::get_world_from_boat(img_pt_first);
-    Eigen::Isometry3d T_world_boat0;
-    T_world_boat0.linear() = T_earth_boat0.linear();
-    Eigen::Isometry3d T_world_camera0 =
-        T_world_boat0 * DataParser::get_boat_from_camera(img_pt_first);
-    StructureFromMotion sfm(Frontend::ExtractorType::SIFT, K, D,
-                            gtsam::Pose3(T_world_camera0.matrix()));
-
-    for (const int &idx : indices) {
-        const cv::Mat img = survey.loadImageByImageIndex(idx);
-        const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(idx);
-
-        Eigen::Isometry3d T_world_boat = DataParser::get_world_from_boat(img_pt);
-        T_world_boat.translation() -= T_earth_boat0.translation();
-
-        Eigen::Isometry3d T_world_cam = T_world_boat * DataParser::get_boat_from_camera(img_pt);
-
-        sfm.add_image(img, gtsam::Pose3(T_world_cam.matrix()));
-    }
-
-    const gtsam::Values initial_values = sfm.get_backend().get_current_initial_values();
-    sfm.graph_values(initial_values, "initial values");
-
-    std::cout << "Solving for structure!" << std::endl;
-
-    Backend::graph_step_debug_func solve_iter_debug_func = [&sfm](const gtsam::Values &vals,
-                                                                  const Backend::epoch iter) {
-        std::cout << "iteration " << iter << " complete!";
-        std::string window_name = "Iteration_" + std::to_string(iter);
-        sfm.graph_values(vals, window_name);
-    };
-    sfm.solve_structure(5, solve_iter_debug_func);
-
-    std::cout << "Solution complete." << std::endl;
-
-    const gtsam::Values result_values = sfm.get_structure_result();
-    sfm.graph_values(result_values, "optimized values");
-}
-
-TEST(StructureFromMotiontest, random_test) {
-    std::cout << DataParser::T_boat_gps.matrix() << std::endl;
-    geometry::viz_scene(
-        std::vector<Eigen::Isometry3d>{DataParser::T_boat_imu, DataParser::T_boat_gps},
-        std::vector<Eigen::Vector3d>{DataParser::t_boat_cam});
-}
-}  // namespace robot::experimental::learn_descriptors

From 04fc7bce78c7ee65b8c68f93a9ade7b4a13789a3 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Thu, 24 Jul 2025 13:53:42 -0400
Subject: [PATCH 42/45] fix test errors

---
 common/geometry/BUILD                    | 10 ----------
 common/geometry/camera_test.cc           |  6 ------
 common/geometry/trajectory_evaluation.hh | 17 -----------------
 3 files changed, 33 deletions(-)
 delete mode 100644 common/geometry/trajectory_evaluation.hh

diff --git a/common/geometry/BUILD b/common/geometry/BUILD
index 6b11a2b02..2c750e8b0 100644
--- a/common/geometry/BUILD
+++ b/common/geometry/BUILD
@@ -66,13 +66,3 @@ cc_test(
     ":translate_types"
   ]
 )
-
-cc_library(
-  name = "trajectory_evaluation",
-  hdrs = ["trajectory_evaluation.hh"],
-  srcs = ["trajectory_evaluation.cc"],
-  visibility = ["//visibility:public"],
-  deps = [
-    "@eigen",     
-  ]
-)
\ No newline at end of file
diff --git a/common/geometry/camera_test.cc b/common/geometry/camera_test.cc
index 4d8b810fa..04c7411f2 100644
--- a/common/geometry/camera_test.cc
+++ b/common/geometry/camera_test.cc
@@ -76,7 +76,6 @@ TEST(CameraTest, test_estimate_pose) {
     cv::Mat K = (cv::Mat_<double>(3, 3) << fx, 0, cx, 0, fy, cy, 0, 0, 1);
 
     Eigen::Matrix3d K_eig = cv_to_eigen_mat(K);
-    std::vector<Eigen::Isometry3d> poses;
 
     Eigen::Matrix3d R_world_from_cam0(
         Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
@@ -84,7 +83,6 @@ TEST(CameraTest, test_estimate_pose) {
     Eigen::Isometry3d world_from_cam0 = Eigen::Isometry3d::Identity();
     world_from_cam0.linear() = R_world_from_cam0;
     world_from_cam0.translation() = Eigen::Vector3d(4, cube_size / 2, cube_size / 2);
-    poses.push_back(world_from_cam0);
 
     Eigen::Matrix3d R_world_45deg(
         Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix());
@@ -92,7 +90,6 @@ TEST(CameraTest, test_estimate_pose) {
     world_from_cam1.linear() = R_world_45deg * world_from_cam0.linear();
     world_from_cam1.translation() =
         R_world_45deg * (world_from_cam0.translation() - p_world_cube_center) + p_world_cube_center;
-    poses.push_back(world_from_cam1);
 
     std::vector<cv::KeyPoint> kpts0;
     std::vector<cv::KeyPoint> kpts1;
@@ -127,9 +124,6 @@ TEST(CameraTest, test_estimate_pose) {
         EXPECT_TRUE(cam0_from_cam1_estimate->translation().isApprox(cam0_from_cam1.translation(),
                                                                     0.000001));
         EXPECT_TRUE(cam0_from_cam1_estimate->linear().isApprox(cam0_from_cam1.linear(), 0.001));
-
-        poses.emplace_back(world_from_cam0 * *cam0_from_cam1_estimate);
-        viz_scene(poses, p_W_cube);
     }
 }
 }  // namespace robot::geometry
\ No newline at end of file
diff --git a/common/geometry/trajectory_evaluation.hh b/common/geometry/trajectory_evaluation.hh
deleted file mode 100644
index c3832359a..000000000
--- a/common/geometry/trajectory_evaluation.hh
+++ /dev/null
@@ -1,17 +0,0 @@
-#pragma once
-#include <cstddef>
-#include <vector>
-
-#include "Eigen/Core"
-
-namespace robot::geometry {
-const std::vector<double> get_absolute_trajectory_error(const std::vector<Eigen::Vector3d> &traj1,
-                                                        const std::vector<Eigen::Vector3d> &traj2) {
-    assert(traj1.size() == traj2.size());
-    std::vector<double> result;
-    for (size_t i = 0; i < traj2.size(); i++) {
-        result.push_back((traj2[i] - traj1[i]).norm());
-    }
-    return result;
-}
-}  // namespace robot::geometry
\ No newline at end of file

From 1fc67df1b7052238653df01e610a018996ac7122 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Thu, 24 Jul 2025 14:32:02 -0400
Subject: [PATCH 43/45] clean up includes

---
 .../learn_descriptors/four_seasons_parser.cc      | 14 ++------------
 .../four_seasons_parser_detail.cc                 |  6 ------
 .../four_seasons_parser_example_viz.cc            | 15 ---------------
 experimental/learn_descriptors/frontend.cc        |  3 ---
 .../learn_descriptors/frontend_definitions.hh     |  1 +
 experimental/learn_descriptors/image_point.hh     |  5 +++--
 .../learn_descriptors/learn_descriptors.cc        |  3 ++-
 .../learn_descriptors/spatial_test_scene_cube.hh  |  1 -
 .../learn_descriptors/structure_from_motion.cc    |  1 +
 .../structure_from_motion_example.cc              |  2 +-
 10 files changed, 10 insertions(+), 41 deletions(-)

diff --git a/experimental/learn_descriptors/four_seasons_parser.cc b/experimental/learn_descriptors/four_seasons_parser.cc
index 50d52e5f5..61ef35e18 100644
--- a/experimental/learn_descriptors/four_seasons_parser.cc
+++ b/experimental/learn_descriptors/four_seasons_parser.cc
@@ -1,15 +1,11 @@
 #include "experimental/learn_descriptors/four_seasons_parser.hh"
 
 #include <cstddef>
-#include <cstdlib>
-#include <exception>
 #include <fstream>
 #include <iostream>
-#include <limits>
 #include <memory>
 #include <sstream>
 #include <string>
-#include <type_traits>
 #include <unordered_map>
 #include <utility>
 
@@ -72,10 +68,7 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
                 std::stod(
                     parsed_line_gnss_poses[static_cast<size_t>(txt_parser_help::GPSIdx::QUAT_Z)]));
             img_pt.AS_w_from_gnss_cam = liegroups::SE3(R_gps_cam_from_AS_w, t_gps_cam_from_AS_w);
-        }  // else {
-        //     std::clog << "There is no AS_w_from_gnss_cam data at img_pt with id: " << id
-        //               << std::endl;
-        // }
+        }
         if (vio_poses_time_map.find(time_key) != vio_poses_time_map.end()) {
             const std::vector<std::string>& parsed_line_vio = vio_poses_time_map.at(time_key);
             Eigen::Vector3d t_AS_w_from_vio_cam(
@@ -90,10 +83,7 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
                 std::stod(
                     parsed_line_vio[static_cast<size_t>(txt_parser_help::ResultIdx::QUAT_Z)]));
             img_pt.AS_w_from_vio_cam = liegroups::SE3(R_AS_w_from_vio_cam, t_AS_w_from_vio_cam);
-        }  // else {
-        //     std::clog << "There is no AS_w_from_vio_cam data at img_pt with id: " << id
-        //               << std::endl;
-        // }
+        }
 
         img_pt_vector_.push_back(img_pt);
         id++;
diff --git a/experimental/learn_descriptors/four_seasons_parser_detail.cc b/experimental/learn_descriptors/four_seasons_parser_detail.cc
index 65dac2893..b41e71113 100644
--- a/experimental/learn_descriptors/four_seasons_parser_detail.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_detail.cc
@@ -17,12 +17,6 @@
 #include "nmea/sentence.hpp"
 
 namespace robot::experimental::learn_descriptors::detail::four_seasons_parser {
-
-// template <typename T>
-// size_t abs_diff(const T& a, const T& b) {
-//     return a > b ? a - b : b - a;
-// }
-
 namespace txt_parser_help {
 
 std::vector<std::string> parse_line_adv(const std::string& line, const std::string& delim) {
diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
index ae9036342..4e52440d2 100644
--- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
@@ -58,7 +58,6 @@ int main(int argc, const char** argv) {
     (void)NUM_IMAGES;
     auto start = std::chrono::high_resolution_clock::now();
     for (size_t i = 0; i < NUM_IMAGES; i += 1) {
-        // for (const size_t i : std::vector<size_t>{74, 82, 92}) {
         const lrn_desc::ImagePointFourSeasons img_pt = parser.get_image_point(i);
         frontend.add_image(lrn_desc::ImageAndPoint{
             parser.load_image(i), std::make_shared<lrn_desc::ImagePointFourSeasons>(img_pt)});
@@ -72,13 +71,6 @@ int main(int argc, const char** argv) {
         std::chrono::high_resolution_clock::now() - start);
     std::cout << "done populating frames! took " << duration.count() << " milliseconds"
               << std::endl;
-    // frontend.match_frames_and_build_tracks();
-    // start = std::chrono::high_resolution_clock::now();
-    // duration = std::chrono::duration_cast<std::chrono::milliseconds>(
-    //     std::chrono::high_resolution_clock::now() - start);
-    // std::cout << "done matching and building tracks! took " << duration.count() << "
-    // milliseconds"
-    //           << std::endl;
 
     ROBOT_CHECK(parser.num_images() != 0);
 
@@ -93,9 +85,7 @@ int main(int argc, const char** argv) {
     std::optional<Eigen::Vector3d> first_gps_to_cam;
     std::optional<double> altitude_gps_from_gnss_cam;
     std::vector<double> gps_ns_delta_from_shutter;
-    // double ate_cam = 0.0, ate_gps = 0.0;
     for (size_t i = 0; i < NUM_IMAGES; i += 1) {
-        // for (const size_t i : std::vector<size_t>{74, 82, 92}) {
         const lrn_desc::ImagePointFourSeasons img_pt = parser.get_image_point(i);
         std::cout << img_pt.to_string() << std::endl;
         if (!img_pt.AS_w_from_gnss_cam) {
@@ -109,12 +99,8 @@ int main(int argc, const char** argv) {
             frontend.frames()[i]->world_from_cam_groundtruth_->matrix());
         ROBOT_CHECK(frontend_w_from_cam_groundtruth.matrix().isApprox(w_from_gnss_cam.matrix()),
                     frontend_w_from_cam_groundtruth.matrix(), w_from_gnss_cam.matrix());
-        // Eigen::Isometry3d frontend_w_from_cam_groundtruth(*img_pt.world_from_cam_ground_truth());
         viz_frames.emplace_back(frontend_w_from_cam_groundtruth,
                                 "x_frontend_ref_" + std::to_string(i));
-        // std::cout << "frontend_w_from_cam_groundtruth: " <<
-        // frontend_w_from_cam_groundtruth.matrix()
-        //           << std::endl;
         if (img_pt.gps_gcs) {
             if (img_pt.gps_gcs->seq > img_pt.seq) {
                 gps_ns_delta_from_shutter.push_back(
@@ -166,7 +152,6 @@ int main(int argc, const char** argv) {
             std::cout << "gps_from_ref_in_world: " << gps_from_ref_in_world << std::endl;
         }
     }
-    // std::cout << "ate_cam: " << ate_cam << "\tate_gps: " << ate_gps << std::endl;
 
     const double sum =
         std::accumulate(gps_ns_delta_from_shutter.begin(), gps_ns_delta_from_shutter.end(), 0.0);
diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc
index 3a3b6af92..32af22642 100644
--- a/experimental/learn_descriptors/frontend.cc
+++ b/experimental/learn_descriptors/frontend.cc
@@ -129,9 +129,6 @@ double rotation_error(const Eigen::Isometry3d &T_est, const Eigen::Isometry3d &T
     // 3. Recover angle (in radians)
     return std::acos(cos_theta);
 }
-
-// const std::vector<double> get_absolute_trajectory_error(const std::vector<Eigen::Vector3d> &)
-// {}
 }  // namespace detail_sfm
 
 namespace robot::experimental::learn_descriptors {
diff --git a/experimental/learn_descriptors/frontend_definitions.hh b/experimental/learn_descriptors/frontend_definitions.hh
index 1a96aa6bb..6204af219 100644
--- a/experimental/learn_descriptors/frontend_definitions.hh
+++ b/experimental/learn_descriptors/frontend_definitions.hh
@@ -2,6 +2,7 @@
 
 #include <ostream>
 #include <unordered_map>
+#include <utility>
 #include <vector>
 
 #include "experimental/learn_descriptors/structure_from_motion_types.hh"
diff --git a/experimental/learn_descriptors/image_point.hh b/experimental/learn_descriptors/image_point.hh
index b9fae431c..6e9273eed 100644
--- a/experimental/learn_descriptors/image_point.hh
+++ b/experimental/learn_descriptors/image_point.hh
@@ -17,8 +17,9 @@ namespace robot::experimental::learn_descriptors {
 struct ImagePoint {
     virtual ~ImagePoint() = default;
 
-    size_t id;
-    size_t seq;  // for time. TODO: make a time struct
+    size_t id;   // idx for DB
+    size_t seq;  // time in nanoseconds of image capture (also could be name of image; this is also
+                 // time since unix epoch)
     std::shared_ptr<CameraCalibrationFisheye> K;
     void set_cam_in_world(const Eigen::Vector3d& cam_in_world) { cam_in_world_ = cam_in_world; };
     virtual std::optional<Eigen::Isometry3d> world_from_cam_ground_truth() const {
diff --git a/experimental/learn_descriptors/learn_descriptors.cc b/experimental/learn_descriptors/learn_descriptors.cc
index 94ba9833a..ee8ac0055 100644
--- a/experimental/learn_descriptors/learn_descriptors.cc
+++ b/experimental/learn_descriptors/learn_descriptors.cc
@@ -1,6 +1,7 @@
 #include "experimental/learn_descriptors/learn_descriptors.hh"
 
-#include <iostream>
+#include <ostream>
+#include <string>
 
 namespace robot::experimental::learn_descriptors {
 void hello_world(const std::string &in) { std::cout << in << std::endl; }
diff --git a/experimental/learn_descriptors/spatial_test_scene_cube.hh b/experimental/learn_descriptors/spatial_test_scene_cube.hh
index 1dd08b190..0d74262ac 100644
--- a/experimental/learn_descriptors/spatial_test_scene_cube.hh
+++ b/experimental/learn_descriptors/spatial_test_scene_cube.hh
@@ -1,5 +1,4 @@
 #pragma once
-#include <iostream>
 
 #include "Eigen/Core"
 #include "experimental/learn_descriptors/spatial_test_scene.hh"
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
index 8afcba047..ae46c2966 100644
--- a/experimental/learn_descriptors/structure_from_motion.cc
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -2,6 +2,7 @@
 
 #include <filesystem>
 #include <memory>
+#include <ostream>
 #include <sstream>
 #include <stdexcept>
 
diff --git a/experimental/learn_descriptors/structure_from_motion_example.cc b/experimental/learn_descriptors/structure_from_motion_example.cc
index a6989ca95..2dce0047e 100644
--- a/experimental/learn_descriptors/structure_from_motion_example.cc
+++ b/experimental/learn_descriptors/structure_from_motion_example.cc
@@ -1,7 +1,7 @@
 #include <cstddef>
 #include <functional>
-#include <iostream>
 #include <memory>
+#include <ostream>
 
 #include "Eigen/Core"
 #include "Eigen/Geometry"

From 02bf477af97a892a5ec604196f8e8cbc90367076 Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Thu, 24 Jul 2025 14:49:04 -0400
Subject: [PATCH 44/45] fix include

---
 experimental/learn_descriptors/learn_descriptors.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/experimental/learn_descriptors/learn_descriptors.cc b/experimental/learn_descriptors/learn_descriptors.cc
index ee8ac0055..f2938510f 100644
--- a/experimental/learn_descriptors/learn_descriptors.cc
+++ b/experimental/learn_descriptors/learn_descriptors.cc
@@ -1,6 +1,6 @@
 #include "experimental/learn_descriptors/learn_descriptors.hh"
 
-#include <ostream>
+#include <iostream>
 #include <string>
 
 namespace robot::experimental::learn_descriptors {

From f5fbfc0893933d0318117460564b20b6ab566afb Mon Sep 17 00:00:00 2001
From: Nicolaniello Buono <mason.cod02@gmail.com>
Date: Thu, 24 Jul 2025 15:37:24 -0400
Subject: [PATCH 45/45] fixed exception handle type

---
 common/geometry/camera.cc | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/common/geometry/camera.cc b/common/geometry/camera.cc
index 38733d6f4..46ddb340a 100644
--- a/common/geometry/camera.cc
+++ b/common/geometry/camera.cc
@@ -16,7 +16,6 @@ Eigen::Matrix3d get_intrinsic_matrix(const gtsam::Cal3_S2 &intrinsic) {
 
 Eigen::Vector3d project(const Eigen::Matrix3d &K, const Eigen::Vector3d &p_cam_point) {
     return K * p_cam_point;
-    // return (K * p_cam_point).head<2>();
 }
 
 Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector3d &pixel_homog) {
@@ -41,24 +40,21 @@ std::optional<Eigen::Isometry3d> estimate_cam0_from_cam1(const std::vector<cv::K
         pts2.push_back(kpts1[match.trainIdx].pt);
     }
     ROBOT_CHECK(pts1.size() == pts2.size() && pts1.size() >= 5);
-    // std::cout << "heartbeat 3" << std::endl;
     try {
         cv::Mat E = cv::findEssentialMat(pts1, pts2, K, cv::RANSAC, 0.999, 1.0);
         cv::Mat R_c1_c0, t_c1_c0;
-        // std::cout << "heartbest 4" << std::endl;
         ROBOT_CHECK(!E.empty(), "Essential matrix is empty.");
         // TOOD: handle multiple returned candidate E matrices better (they are stacked on top of
         // each other in E)
         if (E.rows > 3) {
             E = E.rowRange(0, 3);
         }
-        // std::cout << E << std::endl;
         cv::recoverPose(E, pts1, pts2, K, R_c1_c0, t_c1_c0);
         result.linear() = cv_to_eigen_mat(R_c1_c0);
         result.translation() = cv_to_eigen_mat(t_c1_c0);
         result = result.inverse();
         return result;
-    } catch (const std::exception e) {
+    } catch (const std::exception &e) {
         std::cerr << "Failed to estimate pose up to scale cam0_from_cam1.\n"
                   << e.what() << std::endl;
         return std::nullopt;