diff --git a/common/geometry/BUILD b/common/geometry/BUILD
index 0b95ff7df..2c750e8b0 100644
--- a/common/geometry/BUILD
+++ b/common/geometry/BUILD
@@ -38,4 +38,31 @@ cc_test(
     ":translate_types",
     "@com_google_googletest//:gtest_main"
   ]
-)
\ No newline at end of file
+)
+
+
+cc_library(
+  name = "camera",
+  hdrs = ["camera.hh"],
+  srcs = ["camera.cc"],
+  visibility = ["//visibility:public"],
+  deps = [
+    "@eigen",     
+    "@opencv",
+    ":translate_types",
+    "@gtsam",
+    "//common:check"
+  ]
+)
+
+cc_test(
+  name = "camera_test", 
+  srcs = ["camera_test.cc"],
+  visibility = ["//visibility:public"],
+  deps = [
+    ":camera",
+    "@com_google_googletest//:gtest_main",
+    "//visualization/opencv:opencv_viz",
+    ":translate_types"
+  ]
+)
diff --git a/common/geometry/camera.cc b/common/geometry/camera.cc
new file mode 100644
index 000000000..46ddb340a
--- /dev/null
+++ b/common/geometry/camera.cc
@@ -0,0 +1,63 @@
+#include "common/geometry/camera.hh"
+
+#include <exception>
+#include <optional>
+
+#include "common/check.hh"
+#include "common/geometry/translate_types.hh"
+
+namespace robot::geometry {
+Eigen::Matrix3d get_intrinsic_matrix(const gtsam::Cal3_S2 &intrinsic) {
+    Eigen::Matrix3d K;
+    K << intrinsic.fx(), intrinsic.skew(), intrinsic.px(), 0, intrinsic.fy(), intrinsic.py(), 0, 0,
+        1;
+    return K;
+}
+
+Eigen::Vector3d project(const Eigen::Matrix3d &K, const Eigen::Vector3d &p_cam_point) {
+    return K * p_cam_point;
+}
+
+Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector3d &pixel_homog) {
+    return K.inverse() * pixel_homog;
+}
+
+Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel_inhomog,
+                          const double depth) {
+    return depth * K.inverse() * Eigen::Vector3d(pixel_inhomog(0), pixel_inhomog(1), 1.);
+}
+
+std::optional<Eigen::Isometry3d> estimate_cam0_from_cam1(const std::vector<cv::KeyPoint> &kpts0,
+                                                         const std::vector<cv::KeyPoint> &kpts1,
+                                                         const std::vector<cv::DMatch> &matches,
+                                                         const cv::Mat &K) {
+    ROBOT_CHECK(matches.size() >= 5 && kpts1.size() >= 5 && kpts0.size() >= 5);
+    Eigen::Isometry3d result;
+    std::vector<cv::Point2f> pts1;
+    std::vector<cv::Point2f> pts2;
+    for (const cv::DMatch &match : matches) {
+        pts1.push_back(kpts0[match.queryIdx].pt);
+        pts2.push_back(kpts1[match.trainIdx].pt);
+    }
+    ROBOT_CHECK(pts1.size() == pts2.size() && pts1.size() >= 5);
+    try {
+        cv::Mat E = cv::findEssentialMat(pts1, pts2, K, cv::RANSAC, 0.999, 1.0);
+        cv::Mat R_c1_c0, t_c1_c0;
+        ROBOT_CHECK(!E.empty(), "Essential matrix is empty.");
+        // TOOD: handle multiple returned candidate E matrices better (they are stacked on top of
+        // each other in E)
+        if (E.rows > 3) {
+            E = E.rowRange(0, 3);
+        }
+        cv::recoverPose(E, pts1, pts2, K, R_c1_c0, t_c1_c0);
+        result.linear() = cv_to_eigen_mat(R_c1_c0);
+        result.translation() = cv_to_eigen_mat(t_c1_c0);
+        result = result.inverse();
+        return result;
+    } catch (const std::exception &e) {
+        std::cerr << "Failed to estimate pose up to scale cam0_from_cam1.\n"
+                  << e.what() << std::endl;
+        return std::nullopt;
+    }
+}
+}  // namespace robot::geometry
\ No newline at end of file
diff --git a/common/geometry/camera.hh b/common/geometry/camera.hh
new file mode 100644
index 000000000..8048bac87
--- /dev/null
+++ b/common/geometry/camera.hh
@@ -0,0 +1,30 @@
+#pragma once
+#include <optional>
+
+#include "Eigen/Core"
+#include "Eigen/Geometry"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "opencv2/opencv.hpp"
+
+namespace robot::geometry {
+Eigen::Matrix3d get_intrinsic_matrix(const gtsam::Cal3_S2 &intrinsic);
+
+/// @return Homogeneous pixel projection coordinates
+Eigen::Vector3d project(const Eigen::Matrix3d &K, const Eigen::Vector3d &p_cam_point);
+/**
+ * @param pixel_homog homogeneous pixel cordinates 3x1
+ * @return 3d coordinate in camera frame
+ */
+Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector3d &pixel_homog);
+/**
+ * @param pixel_inhomog inhomogeneous pixel cordinates 2x1
+ * @return 3d coordinate in camera frame
+ */
+Eigen::Vector3d deproject(const Eigen::Matrix3d &K, const Eigen::Vector2d &pixel_inhomog,
+                          const double depth);
+
+std::optional<Eigen::Isometry3d> estimate_cam0_from_cam1(const std::vector<cv::KeyPoint> &kpts0,
+                                                         const std::vector<cv::KeyPoint> &kpts1,
+                                                         const std::vector<cv::DMatch> &matches,
+                                                         const cv::Mat &K);
+}  // namespace robot::geometry
\ No newline at end of file
diff --git a/common/geometry/camera_test.cc b/common/geometry/camera_test.cc
new file mode 100644
index 000000000..04c7411f2
--- /dev/null
+++ b/common/geometry/camera_test.cc
@@ -0,0 +1,129 @@
+#include "common/geometry/camera.hh"
+
+#include <optional>
+
+#include "common/geometry/translate_types.hh"
+#include "gtest/gtest.h"
+#include "visualization/opencv/opencv_viz.hh"
+
+class CameraTestHelper {
+   public:
+    static bool pixel_in_range(Eigen::Vector2d pixel, size_t img_width, size_t img_height) {
+        return pixel[0] > 0 && pixel[0] < img_width && pixel[1] > 0 && pixel[1] < img_height;
+    }
+};
+
+namespace robot::geometry {
+TEST(CameraTest, test_get_intrinsics) {
+    const double fx = 1., fy = 1., s = 0, px = 0.5, py = 0.5;
+    const gtsam::Cal3_S2 K(fx, fy, s, px, py);
+    const Eigen::Matrix3d intrinsic_mat = get_intrinsic_matrix(K);
+
+    EXPECT_FLOAT_EQ(fx, intrinsic_mat(0, 0));
+    EXPECT_FLOAT_EQ(fy, intrinsic_mat(1, 1));
+    EXPECT_FLOAT_EQ(s, intrinsic_mat(0, 1));
+    EXPECT_FLOAT_EQ(px, intrinsic_mat(0, 2));
+    EXPECT_FLOAT_EQ(py, intrinsic_mat(1, 2));
+}
+
+TEST(CameraTest, test_projection) {
+    const double fx = 1., fy = 1., s = 0, px = 0.5, py = 0.5;
+    const gtsam::Cal3_S2 K(fx, fy, s, px, py);
+    const Eigen::Matrix3d intrinsic_mat = get_intrinsic_matrix(K);
+
+    const Eigen::Vector3d p_cam_lmk1(0.1, 0.15, 1.2);
+    const Eigen::Vector3d pxl_homog_cam_lmk1 = project(intrinsic_mat, p_cam_lmk1);
+    const Eigen::Vector2d pxl_inhomog_cam_lmk1 =
+        pxl_homog_cam_lmk1.head<2>() / pxl_homog_cam_lmk1(2);
+    const Eigen::Vector3d p_cam_lmk1_deproject = deproject(intrinsic_mat, pxl_homog_cam_lmk1);
+    const Eigen::Vector3d p_cam_lmk1_deproject_inhomog =
+        deproject(intrinsic_mat, pxl_inhomog_cam_lmk1, p_cam_lmk1(2));
+
+    EXPECT_TRUE(p_cam_lmk1.isApprox(p_cam_lmk1_deproject));
+    EXPECT_TRUE(p_cam_lmk1.isApprox(p_cam_lmk1_deproject_inhomog));
+}
+
+TEST(CameraTest, test_estimate_pose) {
+    std::vector<Eigen::Vector3d> p_W_cube;
+    float cube_size = 1.0f;
+    p_W_cube.push_back(Eigen::Vector3d(0, 0, 0));
+    p_W_cube.push_back(Eigen::Vector3d(cube_size, 0, 0));
+    p_W_cube.push_back(Eigen::Vector3d(cube_size, cube_size, 0));
+    p_W_cube.push_back(Eigen::Vector3d(0, cube_size, 0));
+    p_W_cube.push_back(Eigen::Vector3d(0, 0, cube_size));
+    p_W_cube.push_back(Eigen::Vector3d(cube_size, 0, cube_size));
+    p_W_cube.push_back(Eigen::Vector3d(cube_size, cube_size, cube_size));
+    p_W_cube.push_back(Eigen::Vector3d(0, cube_size, cube_size));
+
+    Eigen::Matrix3d R_new_points(
+        Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+        Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
+
+    // const int initial_size = p_W_cube.size();
+    const Eigen::Vector3d p_world_cube_center(cube_size / 2, cube_size / 2, cube_size / 2);
+    for (const Eigen::Vector3d &point_W_cube : p_W_cube) {
+        p_W_cube.emplace_back(R_new_points * (point_W_cube - p_world_cube_center) +
+                              p_world_cube_center);
+    }
+
+    const size_t img_width = 640;
+    const size_t img_height = 480;
+    const double fx = 500.0;
+    const double fy = fx;
+    const double cx = img_width / 2.0;
+    const double cy = img_height / 2.0;
+
+    cv::Mat K = (cv::Mat_<double>(3, 3) << fx, 0, cx, 0, fy, cy, 0, 0, 1);
+
+    Eigen::Matrix3d K_eig = cv_to_eigen_mat(K);
+
+    Eigen::Matrix3d R_world_from_cam0(
+        Eigen::AngleAxisd(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+        Eigen::AngleAxisd(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
+    Eigen::Isometry3d world_from_cam0 = Eigen::Isometry3d::Identity();
+    world_from_cam0.linear() = R_world_from_cam0;
+    world_from_cam0.translation() = Eigen::Vector3d(4, cube_size / 2, cube_size / 2);
+
+    Eigen::Matrix3d R_world_45deg(
+        Eigen::AngleAxisd(M_PI / 4, Eigen::Vector3d(0, 0, 1)).toRotationMatrix());
+    Eigen::Isometry3d world_from_cam1;
+    world_from_cam1.linear() = R_world_45deg * world_from_cam0.linear();
+    world_from_cam1.translation() =
+        R_world_45deg * (world_from_cam0.translation() - p_world_cube_center) + p_world_cube_center;
+
+    std::vector<cv::KeyPoint> kpts0;
+    std::vector<cv::KeyPoint> kpts1;
+    std::vector<cv::DMatch> matches;
+
+    for (const Eigen::Vector3d &point_W_cube : p_W_cube) {
+        Eigen::Vector3d p_cube_points_in_cam0 =
+            (world_from_cam0.inverse() *
+             Eigen::Vector4d(point_W_cube(0), point_W_cube(1), point_W_cube(2), 1.))
+                .head<3>();
+        Eigen::Vector3d p_cube_points_in_cam1 =
+            (world_from_cam1.inverse() *
+             Eigen::Vector4d(point_W_cube(0), point_W_cube(1), point_W_cube(2), 1.))
+                .head<3>();
+        Eigen::Vector3d pxl_c0_pcube_homog = K_eig * p_cube_points_in_cam0;
+        Eigen::Vector3d pxl_c1_pcube_homog = K_eig * p_cube_points_in_cam1;
+        Eigen::Vector2d pxl_c0_pcube = pxl_c0_pcube_homog.head<2>() / pxl_c0_pcube_homog(2);
+        Eigen::Vector2d pxl_c1_pcube = pxl_c1_pcube_homog.head<2>() / pxl_c1_pcube_homog(2);
+        if (CameraTestHelper::pixel_in_range(pxl_c0_pcube, img_width, img_height) &&
+            CameraTestHelper::pixel_in_range(pxl_c1_pcube, img_width, img_height)) {
+            kpts0.emplace_back(pxl_c0_pcube[0], pxl_c0_pcube[1], 3);
+            kpts1.emplace_back(pxl_c1_pcube[0], pxl_c1_pcube[1], 3);
+            matches.emplace_back(kpts0.size() - 1, kpts1.size() - 1, 0);
+        }
+    }
+
+    std::optional<Eigen::Isometry3d> cam0_from_cam1_estimate =
+        estimate_cam0_from_cam1(kpts0, kpts1, matches, K);
+    if (cam0_from_cam1_estimate) {
+        Eigen::Isometry3d cam0_from_cam1 = world_from_cam0.inverse() * world_from_cam1;
+        cam0_from_cam1.translation() /= cam0_from_cam1.translation().norm();
+        EXPECT_TRUE(cam0_from_cam1_estimate->translation().isApprox(cam0_from_cam1.translation(),
+                                                                    0.000001));
+        EXPECT_TRUE(cam0_from_cam1_estimate->linear().isApprox(cam0_from_cam1.linear(), 0.001));
+    }
+}
+}  // namespace robot::geometry
\ No newline at end of file
diff --git a/common/gps/BUILD b/common/gps/BUILD
index 43907c146..4c1877509 100644
--- a/common/gps/BUILD
+++ b/common/gps/BUILD
@@ -19,6 +19,29 @@ cc_test(
   ]
 )
 
+cc_library(
+  name = "frame_translation",
+  srcs = ["frame_translation.cc"],
+  hdrs = ["frame_translation.hh"],
+  visibility = ["//visibility:public"],
+  deps = [
+    "@com_google_googletest//:gtest_main",
+    "@geographiclib//:geographiclib",
+    "@eigen"
+  ]
+)
+
+cc_test(
+  name = "frame_translation_test",
+  srcs = ["frame_translation_test.cc"],
+  deps = [
+    "@com_google_googletest//:gtest_main",
+    ":frame_translation",
+    "@eigen",
+    "//common:check"
+  ]
+)
+
 py_library(
   name = "web_mercator",
   srcs = ["web_mercator.py"],
diff --git a/common/gps/frame_translation.cc b/common/gps/frame_translation.cc
new file mode 100644
index 000000000..28c0875ea
--- /dev/null
+++ b/common/gps/frame_translation.cc
@@ -0,0 +1,32 @@
+#include "common/gps/frame_translation.hh"
+
+#include "GeographicLib/Constants.hpp"
+#include "GeographicLib/Geocentric.hpp"
+#include "GeographicLib/Geodesic.hpp"
+#include "GeographicLib/LocalCartesian.hpp"
+
+namespace robot::gps {
+Eigen::Vector3d lla_from_ecef(const Eigen::Vector3d& t_place_from_ECEF) {
+    static const GeographicLib::Geocentric earth = GeographicLib::Geocentric::WGS84();
+
+    double x = t_place_from_ECEF.x(), y = t_place_from_ECEF.y(), z = t_place_from_ECEF.z();
+
+    double lat_deg, lon_deg, alt_m;
+    earth.Reverse(x, y, z, lat_deg, lon_deg, alt_m);
+
+    return Eigen::Vector3d(lat_deg, lon_deg, alt_m);
+}
+
+Eigen::Vector3d ecef_from_lla(const Eigen::Vector3d& gcs_coordinate) {
+    double lat_deg = gcs_coordinate.x();
+    double lon_deg = gcs_coordinate.y();
+    double alt_m = gcs_coordinate.z();
+
+    double x, y, z;
+
+    static const GeographicLib::Geocentric earth = GeographicLib::Geocentric::WGS84();
+    earth.Forward(lat_deg, lon_deg, alt_m, x, y, z);
+
+    return Eigen::Vector3d(x, y, z);
+}
+}  // namespace robot::gps
\ No newline at end of file
diff --git a/common/gps/frame_translation.hh b/common/gps/frame_translation.hh
new file mode 100644
index 000000000..60d03c8e9
--- /dev/null
+++ b/common/gps/frame_translation.hh
@@ -0,0 +1,8 @@
+#pragma once
+
+#include "Eigen/Core"
+
+namespace robot::gps {
+Eigen::Vector3d lla_from_ecef(const Eigen::Vector3d& t_place_from_ECEF);
+Eigen::Vector3d ecef_from_lla(const Eigen::Vector3d& gcs_coordinate);
+}  // namespace robot::gps
diff --git a/common/gps/frame_translation_test.cc b/common/gps/frame_translation_test.cc
new file mode 100644
index 000000000..7e6afa47c
--- /dev/null
+++ b/common/gps/frame_translation_test.cc
@@ -0,0 +1,29 @@
+#include "common/gps/frame_translation.hh"
+
+#include "Eigen/Core"
+#include "common/check.hh"
+#include "gtest/gtest.h"
+
+namespace robot::gps {
+TEST(frame_translation_test, lla_from_ecef) {
+    const Eigen::Vector3d lla_easthampton_ma(42.2608, -72.6634, 51.82);  //
+    const Eigen::Vector3d ecef_easthampton_ma = ecef_from_lla(lla_easthampton_ma);
+
+    const Eigen::Vector3d ecef_expected_easthampton_ma(1408753.87, -4512832.60, 4267122.34);
+
+    constexpr double tol = 1e-9;
+    ROBOT_CHECK(ecef_easthampton_ma.isApprox(ecef_expected_easthampton_ma, tol),
+                ecef_easthampton_ma, ecef_expected_easthampton_ma);
+}
+
+TEST(frame_translation_test, ecef_from_lla) {
+    const Eigen::Vector3d ecef_palace_versailles(4205987.87, 155694.68, 4776399.15);
+    const Eigen::Vector3d lla_palace_versailles = lla_from_ecef(ecef_palace_versailles);
+
+    const Eigen::Vector3d expected_lla_palace_versailles(48.8049440, 2.1199720, 131.985);
+
+    constexpr double tol = 1e-4;
+    ROBOT_CHECK(lla_palace_versailles.isApprox(expected_lla_palace_versailles, tol),
+                lla_palace_versailles, expected_lla_palace_versailles);
+}
+}  // namespace robot::gps
\ No newline at end of file
diff --git a/common/gps/geographiclib_test.cc b/common/gps/geographiclib_test.cc
index 5ef8a1fb3..19bc98a2b 100644
--- a/common/gps/geographiclib_test.cc
+++ b/common/gps/geographiclib_test.cc
@@ -8,7 +8,7 @@
 #include "GeographicLib/LocalCartesian.hpp"
 #include "gtest/gtest.h"
 
-namespace robot::experimental::learn_descriptors {
+namespace robot::gps {
 TEST(GeographiclibTest, airport_dist) {
     const GeographicLib::Geodesic& geod = GeographicLib::Geodesic::WGS84();
     // Distance from JFK to LHR
@@ -58,4 +58,4 @@ TEST(GeographiclibTest, local_cartestian) {
         EXPECT_NEAR(calais_height_meters, 264.915, 1e-3);
     }
 }
-}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
+}  // namespace robot::gps
\ No newline at end of file
diff --git a/common/gps/nmea_test.cc b/common/gps/nmea_test.cc
index c1be0aca4..25b38b09f 100644
--- a/common/gps/nmea_test.cc
+++ b/common/gps/nmea_test.cc
@@ -2,7 +2,7 @@
 #include "nmea/message/gga.hpp"
 #include "nmea/sentence.hpp"
 
-namespace robot::experimental::learn_descriptors {
+namespace robot::gps {
 TEST(NmeaTest, parse) {
     // Read an NMEA string from your serial port
     std::string nmea_string =
@@ -44,4 +44,4 @@ TEST(NmeaTest, generate) {
     EXPECT_EQ(nmea_sentence.get_field(1), "abc");
     EXPECT_EQ(nmea_sentence.get_field(2), "");
 }
-}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
+}  // namespace robot::gps
\ No newline at end of file
diff --git a/experimental/learn_descriptors/BUILD b/experimental/learn_descriptors/BUILD
index e80062e51..3f1229b82 100644
--- a/experimental/learn_descriptors/BUILD
+++ b/experimental/learn_descriptors/BUILD
@@ -1,134 +1,358 @@
-package(features=["warning_compile_flags"])
+package(features = ["warning_compile_flags"])
 
 cc_library(
-  name = "learn_descriptors",
-  hdrs = ["learn_descriptors.hh"],
-  visibility = ["//visibility:public"],
-  srcs = ["learn_descriptors.cc"],
+    name = "learn_descriptors",
+    srcs = ["learn_descriptors.cc"],
+    hdrs = ["learn_descriptors.hh"],
+    visibility = ["//visibility:public"],
 )
 
 cc_test(
-  name = "learn_descriptors_test",
-  srcs = ["learn_descriptors_test.cc"],
-  deps = [
-    "@com_google_googletest//:gtest_main",
-    ":learn_descriptors",
-  ]
+    name = "learn_descriptors_test",
+    srcs = ["learn_descriptors_test.cc"],
+    deps = [
+        ":learn_descriptors",
+        "@com_google_googletest//:gtest_main",
+    ],
 )
 
 cc_library(
-  name = "visual_odometry",
-  hdrs = ["visual_odometry.hh"],
-  visibility = ["//visibility:public"],
-  srcs = ["visual_odometry.cc"],
-  deps = [
-    "@opencv//:opencv"
-  ]
+    name = "structure_from_motion",
+    srcs = ["structure_from_motion.cc"],
+    hdrs = ["structure_from_motion.hh"],
+    copts = [
+        "-Wno-error=unused-parameter",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":backend",
+        ":frontend",
+        ":frame",
+        ":image_point",
+        "//common:check",
+        "//visualization/opencv:opencv_viz",
+        "@eigen",
+        "@gtsam",
+        "@opencv",
+    ],
 )
 
 cc_library(
-  name = "symphony_lake_parser",
-  hdrs = ["symphony_lake_parser.hh"],
-  copts = ["-Wno-unused-parameter"],
-  visibility = ["//visibility:public"],
-  srcs = ["symphony_lake_parser.cc"],
-  deps = [
-    "@symphony_lake_parser",
-    "//common:check"
-  ]
+    name = "symphony_lake_parser",
+    srcs = ["symphony_lake_parser.cc"],
+    hdrs = ["symphony_lake_parser.hh"],
+    copts = [
+        "-Wno-unused-parameter",
+    ],
+    data = ["@symphony_lake_snippet//:files"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//common:check",
+        "@eigen",
+        "@symphony_lake_parser",
+    ],
 )
 
 cc_test(
-  name = "visual_odometry_test",
-  srcs = ["visual_odometry_test.cc"],  
-  deps = [
-    "@com_google_googletest//:gtest_main",
-    ":visual_odometry"
-  ]
+    name = "symphony_lake_parser_test",
+    srcs = ["symphony_lake_parser_test.cc"],
+    copts = ["-Wno-unused-parameter"],
+    data = ["@symphony_lake_snippet//:files"],
+    deps = [
+        ":symphony_lake_parser",
+        "//visualization/opencv:opencv_viz",
+        "@com_google_googletest//:gtest_main",
+    ],
 )
 
 cc_test(
-  name = "symphony_lake_parser_test",
-  srcs = ["symphony_lake_parser_test.cc"],
-  copts = ["-Wno-unused-parameter"],
-  data = ["@symphony_lake_snippet//:files"],
-  deps = [
-    "@com_google_googletest//:gtest_main",
-    ":symphony_lake_parser",
-  ]
+    name = "gtsam_test",
+    srcs = ["gtsam_test.cc"],
+    deps = [
+        "@com_google_googletest//:gtest_main",
+        "@gtsam",
+    ],
 )
 
-cc_test(
-  name = "gtsam_test",
-  srcs = ["gtsam_test.cc"],
-  deps = [
-    "@com_google_googletest//:gtest_main",
-    "@gtsam//:gtsam",
-  ]
+cc_library(
+    name = "image_point",
+    hdrs = ["image_point.hh"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//common/liegroups:se3",
+        "@eigen",
+    ],
 )
 
 cc_library(
-    name = "image_point", 
-    hdrs = ["image_point.hh"],
+    name = "four_seasons_transforms",
+    hdrs = ["four_seasons_transforms.hh"],
+    srcs = ["four_seasons_transforms.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":four_seasons_parser_detail",
+        "//common/liegroups:se3",
+        "//common:check",
+        "@eigen",
+    ],
+)
+
+cc_library(
+    name = "image_point_four_seasons",
+    hdrs = ["image_point_four_seasons.hh"],
+    srcs = ["image_point_four_seasons.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":image_point",
+        ":gps_data",
+        ":four_seasons_transforms",
+        "//common/liegroups:se3",
+        "//common/gps:frame_translation",
+        "//common:check",
+        "@eigen",
+        "@opencv",
+        "@geographiclib",
+    ]
+)
+
+cc_library(
+    name = "gps_data",
+    hdrs = ["gps_data.hh"],
+    visibility = ["//visibility:public"],
     deps = [
-      "//common/liegroups:se3",
-      "@opencv//:opencv",
-      "@eigen"
+        "@eigen",
     ]
 )
 
 cc_library(
-    name = "four_seasons_parser", 
+    name = "four_seasons_parser",
     hdrs = ["four_seasons_parser.hh"],
     srcs = ["four_seasons_parser.cc"],
+    visibility = ["//visibility:public"],
     deps = [
-      "@eigen",
-      "@opencv//:opencv",
-      "//common/liegroups:se3",
-      "//common/time:robot_time",
-      "@com_google_absl//absl/strings:str_format",
-      ":image_point",
-      "@nmea",
-      "@geographiclib",
-      "//common:check"
+
+        ":image_point_four_seasons",
+        ":four_seasons_transforms",
+        ":four_seasons_parser_detail",
+        "//common:check",
+        "//common/liegroups:se3",
+        "@eigen",
+        "@opencv",
+        ":camera_calibration"
+    ],
+)
+
+cc_library(
+    name = "four_seasons_parser_detail",
+    hdrs = ["four_seasons_parser_detail.hh"],
+    srcs = ["four_seasons_parser_detail.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":camera_calibration",
+        ":gps_data",
+        "@com_google_absl//absl/strings:str_format",
+        "@nmea",
     ]
 )
 
 cc_binary(
-  name = "four_seasons_parser_example",
-  srcs = ["four_seasons_parser_example.cc"],
-  deps = [
-    ":four_seasons_parser",
-    "@cxxopts//:cxxopts",
-    "//common:check",
-    "@geographiclib"
-  ]
+    name = "four_seasons_parser_example",
+    srcs = ["four_seasons_parser_example.cc"],
+    deps = [
+        ":four_seasons_parser",
+        ":gps_data",
+        "//common:check",
+        "@cxxopts",
+    ],
 )
 
 cc_binary(
-  name = "four_seasons_parser_example_viz",
-  srcs = ["four_seasons_parser_example_viz.cc"],
-  deps = [
-    ":four_seasons_parser",
-    "@cxxopts//:cxxopts",
-    "//common:check",
-    "//visualization/opencv:opencv_viz",
-    "@eigen",    
-  ]
+    name = "four_seasons_parser_example_viz",
+    srcs = ["four_seasons_parser_example_viz.cc"],
+    deps = [
+        ":four_seasons_parser",
+        ":frontend",
+        "//common:check",
+        "//common/gps:frame_translation",
+        "//visualization/opencv:opencv_viz",
+        "@cxxopts",
+        "@eigen",
+    ],
 )
 
 cc_test(
-  name = "four_seasons_parser_test",
-  srcs = ["four_seasons_parser_test.cc"],
-  data = ["@four_seasons_snippet//:files"],
-  deps = [
-    "@com_google_googletest//:gtest_main",
-    "//common/liegroups:se3",
-    "@com_google_absl//absl/strings:str_format",
-    "@eigen",
-    ":four_seasons_parser",
-    "@nmea",
-    "@geographiclib",
-    "//common:check"
-  ]
+    name = "four_seasons_parser_test",
+    srcs = ["four_seasons_parser_test.cc"],
+    data = ["@four_seasons_snippet//:files"],
+    deps = [
+        ":four_seasons_parser",
+        ":four_seasons_parser_detail",
+        ":image_point_four_seasons",
+        "//common:check",
+        "//common/gps:frame_translation",
+        "//common/liegroups:se3",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_googletest//:gtest_main",
+        "@eigen",
+        "@geographiclib",
+        "@nmea",
+        ":camera_calibration"
+    ],
 )
+
+cc_library(
+    name = "frontend_definitions",
+    hdrs = ["frontend_definitions.hh"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":structure_from_motion_types",
+        "@gtsam",
+    ],
+)
+
+cc_library(
+    name = "structure_from_motion_types",
+    hdrs = ["structure_from_motion_types.hh"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "@gtsam",
+        "@opencv",
+    ],
+)
+
+cc_library(
+    name = "frame",
+    srcs = ["frame.cc"],
+    hdrs = ["frame.hh"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":frontend_definitions",
+        "//common:check",
+        "@gtsam",
+        "@opencv",
+        "@eigen"
+    ],
+)
+
+cc_library(
+    name = "spatial_test_scene",
+    srcs = ["spatial_test_scene.cc"],
+    hdrs = ["spatial_test_scene.hh"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "@eigen",
+        "@gtsam",
+        "@opencv",
+        "//common:check"
+    ],
+)
+
+cc_library(
+    name = "spatial_test_scene_cube",
+    hdrs = ["spatial_test_scene_cube.hh"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":spatial_test_scene",
+        "@eigen",
+    ],
+)
+
+cc_binary(
+    name = "spatial_test_scene_cube_example",
+    srcs = ["spatial_test_scene_cube_example.cc"],
+    deps = [
+        ":spatial_test_scene_cube",
+        "//visualization/opencv:opencv_viz",
+        "//common:check",
+        "@eigen",
+        "@gtsam"
+    ],
+)
+
+cc_library(
+    name = "frontend",
+    srcs = ["frontend.cc"],
+    hdrs = ["frontend.hh"],
+    copts = [
+        "-Wno-error=unused-parameter",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":frame",
+        ":frontend_definitions",
+        ":image_point_four_seasons",
+        ":structure_from_motion_types",
+        "//common:check",
+        "//common/geometry:camera",
+        "@opencv",
+        "@gtsam",
+        "@eigen",
+    ],
+)
+
+cc_test(
+    name = "frontend_test",
+    srcs = ["frontend_test.cc"],
+    deps = [
+        ":frame",
+        ":frontend",
+        ":image_point",
+        "@com_google_googletest//:gtest_main",
+        "@opencv",
+        "@eigen"
+    ],
+)
+
+cc_library(
+    name = "backend",
+    srcs = ["backend.cc"],
+    hdrs = ["backend.hh"],
+    copts = [
+        "-Wno-error=unused-parameter",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":structure_from_motion_types",
+        ":frame",
+        "//common:check",
+        "//common/geometry:camera",
+        "@boost//:smart_ptr",
+        "@gtsam",
+        "@opencv",
+    ],
+)
+
+cc_library(
+    name = "camera_calibration",
+    hdrs = ["camera_calibration.hh"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "@opencv"
+    ]
+)
+
+cc_binary(
+    name = "structure_from_motion_example",
+    srcs = ["structure_from_motion_example.cc"],
+    copts = [
+        "-Wno-error=unused-parameter",
+        "-Wno-error=unused-function",
+    ],
+    deps = [
+        ":structure_from_motion",
+        ":frontend",
+        ":backend",
+        ":four_seasons_parser",
+        ":frame",
+        ":image_point",
+        ":image_point_four_seasons",
+        ":spatial_test_scene_cube",
+        ":structure_from_motion_types",
+        ":camera_calibration",
+        "//visualization/opencv:opencv_viz",
+        "@boost//:smart_ptr",
+        "@cxxopts",
+        "@eigen",
+        "@opencv",
+    ],
+)
\ No newline at end of file
diff --git a/experimental/learn_descriptors/backend.cc b/experimental/learn_descriptors/backend.cc
new file mode 100644
index 000000000..37a9c7a4d
--- /dev/null
+++ b/experimental/learn_descriptors/backend.cc
@@ -0,0 +1,342 @@
+#include "experimental/learn_descriptors/backend.hh"
+
+#include <array>
+#include <cstddef>
+#include <memory>
+#include <optional>
+#include <queue>
+#include <unordered_set>
+
+#include "boost/make_shared.hpp"
+#include "common/check.hh"
+#include "experimental/learn_descriptors/frame.hh"
+#include "experimental/learn_descriptors/structure_from_motion_types.hh"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/Point2.h"
+#include "gtsam/geometry/Point3.h"
+#include "gtsam/geometry/Pose3.h"
+#include "gtsam/geometry/Rot3.h"
+#include "gtsam/geometry/triangulation.h"
+#include "gtsam/inference/Symbol.h"
+#include "gtsam/linear/NoiseModel.h"
+#include "gtsam/navigation/GPSFactor.h"
+#include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
+#include "gtsam/nonlinear/NonlinearFactorGraph.h"
+#include "gtsam/nonlinear/Values.h"
+#include "gtsam/slam/BetweenFactor.h"
+#include "gtsam/slam/PriorFactor.h"
+#include "gtsam/slam/ProjectionFactor.h"
+
+namespace robot::experimental::learn_descriptors {
+
+std::optional<gtsam::Point3> Backend::attempt_triangulate(
+    const std::vector<gtsam::Pose3>& cam_poses, const std::vector<gtsam::Point2>& cam_obs,
+    gtsam::Cal3_S2::shared_ptr K, const double max_reproj_error, const bool verbose) {
+    gtsam::Point3 p_lmk_in_world;
+    if (cam_poses.size() > 2) {
+        try {
+            // Attempt triangulation using DLT (or the GTSAM provided method)
+            p_lmk_in_world = gtsam::triangulatePoint3(
+                cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end()));
+
+        } catch (const std::exception& e) {
+            // Handle the exception gracefully by logging and retaining the previous
+            // estimate or discard
+            if (verbose) {
+                std::cerr << "[attempt_triangulate] failed. Likely cheirality exception: "
+                          << e.what() << ". Discarding involved keypoints." << std::endl;
+            }
+            return std::nullopt;
+        }
+    } else {
+        return std::nullopt;
+    }
+    // Optional: perform an explicit cheirality check
+    for (const auto& pose : cam_poses) {
+        // Transform point to the camera coordinate system.
+        // transformTo() converts a world point to the camera frame.
+        gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world);
+        if (p_cam_lmk.z() <= 0) {  // Check that the depth is positive
+            return std::nullopt;
+        }
+    }
+
+    // Cheirality & reprojection checks
+    for (size_t i = 0; i < cam_poses.size(); ++i) {
+        const auto& pose = cam_poses[i];
+        // Cheirality
+        gtsam::Point3 p_cam = pose.transformTo(p_lmk_in_world);
+        if (p_cam.z() <= 0) {
+            if (verbose) {
+                std::cerr << "[attempt_triangulate] point behind camera " << i
+                          << " (z=" << p_cam.z() << ")\n";
+            }
+            return std::nullopt;
+        }
+        // Reprojection error
+        if (max_reproj_error > 0) {
+            gtsam::PinholeCamera<gtsam::Cal3_S2> cam(pose, *K);
+            const auto reproj = cam.project(p_lmk_in_world);
+            const double err = (reproj - cam_obs[i]).norm();
+            if (err > max_reproj_error) {
+                if (verbose) {
+                    std::cerr << "[attempt_triangulate] reprojection error too large on view " << i
+                              << " (" << err << " px)\n";
+                }
+                return std::nullopt;
+            }
+        }
+    }
+    return p_lmk_in_world;
+}
+
+gtsam::Rot3 average_rotations(const std::vector<gtsam::Rot3>& rotations, int max_iter = 10) {
+    if (rotations.empty()) throw std::runtime_error("No rotations to average");
+    if (rotations.size() == 1) return rotations.front();
+
+    gtsam::Rot3 mean = rotations[0];
+
+    for (int iter = 0; iter < max_iter; ++iter) {
+        Eigen::Vector3d total = Eigen::Vector3d::Zero();
+        for (const gtsam::Rot3& R : rotations) {
+            gtsam::Rot3 delta = mean.between(R);  // R_mean^T * R
+            total += gtsam::Rot3::Logmap(delta);  // Lie algebra element in R³
+        }
+        total /= static_cast<double>(rotations.size());
+        mean = mean.compose(gtsam::Rot3::Expmap(total));  // update estimate
+    }
+
+    return mean;
+}
+
+void Backend::populate_rotation_estimate(std::vector<SharedFrame>& shared_frames) {
+    struct FrameVertex;
+    using SharedFrameVertex = std::shared_ptr<FrameVertex>;
+    struct FrameVertex {
+        Frame& frame;
+        std::optional<gtsam::Rot3> w_from_this;
+        std::unordered_map<FrameId, gtsam::Rot3>
+            neighbor_id_to_rot;  // rot is frame_from_neighbor_frame
+        std::unordered_set<SharedFrameVertex> parents;
+        std::unordered_set<SharedFrameVertex> children;
+    };
+    std::unordered_map<FrameId, SharedFrameVertex> frame_tree;
+    for (SharedFrame& shared_frame : shared_frames) {
+        frame_tree.emplace(shared_frame->id_, std::make_shared<FrameVertex>(*shared_frame));
+    }
+    // populate all frames and add children to neighbors
+    std::queue<size_t> q;  // indices of Frame in frames
+    std::unordered_set<FrameId> seen_frame_id;
+    q.push(shared_frames.front()->id_);
+
+    gtsam::NonlinearFactorGraph graph;
+    gtsam::Values initial;
+    // TODO: somehow get more informative/more robust noise values
+    // maybe: monte carlo sampling + reprojection, use essential matrix to guide covariance strength
+    gtsam::noiseModel::Diagonal::shared_ptr noise_rotation =
+        gtsam::noiseModel::Isotropic::Sigma(3, 0.1);  // radians
+
+    std::vector<gtsam::Symbol> symbols_frames;
+    while (!q.empty()) {
+        Frame frame = *shared_frames[q.front()];
+        q.pop();
+        seen_frame_id.insert(frame.id_);
+        SharedFrameVertex frame_vertex = frame_tree.at(frame.id_);
+        if (seen_frame_id.size() == 1) {  // make the first frame rotation identity unless the
+                                          // initial guess is set otherwise
+            frame_vertex->w_from_this = frame_vertex->frame.world_from_cam_initial_guess_
+                                            ? *frame_vertex->frame.world_from_cam_initial_guess_
+                                            : gtsam::Rot3::Identity();
+            graph.add(gtsam::PriorFactor<gtsam::Rot3>(
+                gtsam::Symbol(symbol_char_rotation, frame_vertex->frame.id_),
+                *frame_vertex->w_from_this, gtsam::noiseModel::Constrained::All(3)));
+        }
+        frame_vertex->frame.world_from_cam_initial_guess_ = frame_vertex->w_from_this;
+        std::cout << frame.to_string() << std::endl;
+
+        const gtsam::Symbol symbol_frame(symbol_char_rotation, frame.id_);
+        symbols_frames.push_back(symbol_frame);
+        initial.insert(symbol_frame, *frame.world_from_cam_initial_guess_);
+        for (const auto& [other_frame_id, frame_from_other_frame] :
+             frame.frame_from_other_frames_) {
+            if (seen_frame_id.find(other_frame_id) == seen_frame_id.end()) {
+                q.push(other_frame_id);
+                seen_frame_id.insert(other_frame_id);
+            }
+            frame_vertex->neighbor_id_to_rot.emplace(other_frame_id, frame_from_other_frame);
+            SharedFrameVertex other_frame_vertex = frame_tree.at(other_frame_id);
+            frame_vertex->children.insert(other_frame_vertex);
+            other_frame_vertex->parents.insert(frame_vertex);
+            other_frame_vertex->neighbor_id_to_rot.emplace(frame.id_,
+                                                           frame_from_other_frame.inverse());
+            if (!other_frame_vertex->w_from_this) {
+                other_frame_vertex->w_from_this =
+                    *frame_vertex->w_from_this * frame_from_other_frame;
+                other_frame_vertex->frame.world_from_cam_initial_guess_ =
+                    other_frame_vertex->w_from_this;
+            }
+
+            graph.add(gtsam::BetweenFactor<gtsam::Rot3>(
+                symbol_frame, gtsam::Symbol(symbol_char_rotation, other_frame_id),
+                frame_from_other_frame.inverse(), noise_rotation));
+        }
+        frame_tree.emplace(frame.id_, frame_vertex);
+    }
+
+    gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial);
+    gtsam::Values result = optimizer.optimize();
+
+    // Output results and assign results to frames
+    for (const gtsam::Symbol& symbol_rotation : symbols_frames) {
+        gtsam::Rot3 w_from_frame = result.at<gtsam::Rot3>(symbol_rotation);
+        frame_tree.at(symbol_rotation.index())->frame.world_from_cam_initial_guess_ = w_from_frame;
+    }
+}
+
+void Backend::calculate_initial_values(bool interpolate_gps) {
+    populate_rotation_estimate(shared_frames_);
+    if (interpolate_gps) {
+        std::vector<SharedFrame> frames_with_gps;
+        for (SharedFrame& shared_frame : shared_frames_) {  // this assumes that frames are ordered
+                                                            // by seq (time), later on may have to
+            // think about this more if trying to run pipeline on unordered images or sets
+            // whose capture times are on different days
+            if (shared_frame->cam_in_world_initial_guess_) {
+                frames_with_gps.push_back(shared_frame);
+            }
+        }
+        if (frames_with_gps.size() < 2) return;
+        size_t idx_gps = 0;
+        for (SharedFrame& shared_frame : shared_frames_) {
+            // advance to next guess window if needed
+            while (idx_gps + 1 < frames_with_gps.size() - 1 &&
+                   shared_frame->seq_ > frames_with_gps[idx_gps + 1]->seq_) {
+                idx_gps++;
+            }
+
+            if (shared_frame->cam_in_world_initial_guess_) {
+                continue;
+            }
+            ROBOT_CHECK(
+                shared_frame->cam_in_world_interpolated_guess_,
+                "Currently assuming that the frontend populated interpolated initial guesses");
+            Eigen::Matrix3d interpolated_covariance(
+                *shared_frame->translation_covariance_in_cam_ *
+                100.0);  // very jank for now, not true interpolation
+            shared_frame->translation_covariance_in_cam_ = interpolated_covariance;
+            shared_frame->cam_in_world_initial_guess_ =
+                shared_frame->cam_in_world_interpolated_guess_;
+        }
+    }
+}
+
+void Backend::populate_graph(const FeatureTracks& feature_tracks) {
+    ROBOT_CHECK(shared_frames_.size() > 3);
+    ROBOT_CHECK(shared_frames_.front()->world_from_cam_groundtruth_,
+                "We're assuming the first camera has groundtruth for now");
+
+    shared_frames_.front()->world_from_cam_initial_guess_ =
+        shared_frames_.front()
+            ->world_from_cam_groundtruth_->rotation();  // align rotation with groundtruth
+
+    Backend::populate_rotation_estimate(shared_frames_);
+
+    // add first cam as prior
+    Eigen::Vector3d cam0_in_w = *shared_frames_.front()->cam_in_world_interpolated_guess_;
+    const gtsam::Pose3 w_from_cam0_init_estimate(
+        *shared_frames_.front()->world_from_cam_initial_guess_, gtsam::Point3{0, 0, 0});
+    const gtsam::Symbol symbol_cam0(symbol_char_pose, 0);
+    graph_.add(gtsam::PriorFactor<gtsam::Pose3>(
+        symbol_cam0, w_from_cam0_init_estimate,
+        noise_tight_prior_));  // currently assuming that we have groundtruth on the first pose
+    initial_estimate_.insert(symbol_cam0, w_from_cam0_init_estimate);
+
+    // add gps factors and populate initial pose values
+    for (size_t i = 1; i < shared_frames_.size(); i++) {
+        const Frame& frame = *shared_frames_[i];
+        gtsam::Pose3 world_from_cam_estimate(
+            frame.world_from_cam_initial_guess_ ? *frame.world_from_cam_initial_guess_
+                                                : gtsam::Rot3::Identity(),
+            *shared_frames_[i]->cam_in_world_interpolated_guess_ - cam0_in_w);
+        world_from_cam_initial_estimates_.emplace(frame.id_, world_from_cam_estimate);
+
+        const gtsam::Symbol cam_symbol(symbol_char_pose, frame.id_);
+        initial_estimate_.insert(cam_symbol, world_from_cam_initial_estimates_[frame.id_]);
+        if (frame.cam_in_world_initial_guess_) {
+            gtsam::noiseModel::Diagonal::shared_ptr gps_noise = gtsam::noiseModel::Diagonal::Sigmas(
+                frame.translation_covariance_in_cam_
+                    ? gtsam::Vector3(std::sqrt((*frame.translation_covariance_in_cam_)(0, 0)),
+                                     std::sqrt((*frame.translation_covariance_in_cam_)(1, 1)),
+                                     std::sqrt((*frame.translation_covariance_in_cam_)(2, 2)))
+                    : gps_sigmas_fallback_);
+            std::cout << "gps noise " << i << gps_noise->sigmas() << std::endl;
+            graph_.add(gtsam::GPSFactor(cam_symbol, *frame.cam_in_world_initial_guess_, gps_noise));
+        }
+    }
+
+    // add landmarks
+    for (size_t i = 0; i < feature_tracks.size(); i++) {
+        std::vector<gtsam::Pose3> world_from_lmk_cams;
+        std::vector<gtsam::Point2> lmk_observations;
+        for (const auto& [frame_id, keypoint_cv] : feature_tracks[i].obs_) {
+            world_from_lmk_cams.push_back(world_from_cam_initial_estimates_[frame_id]);
+            lmk_observations.emplace_back(keypoint_cv.x, keypoint_cv.y);
+        }
+        std::optional<gtsam::Point3> landmark_estimate =
+            attempt_triangulate(world_from_lmk_cams, lmk_observations, shared_frames_[0]->K_, 2.0,
+                                false);  // all K are the same for now...
+        if (landmark_estimate) {
+            const gtsam::Symbol lmk_symbol(symbol_char_landmark, i);
+            for (const auto& [frame_id, keypoint_cv] : feature_tracks[i].obs_) {
+                graph_.add(gtsam::GenericProjectionFactor(
+                    gtsam::Point2(keypoint_cv.x, keypoint_cv.y), landmark_noise_,
+                    gtsam::Symbol(symbol_char_pose, frame_id), lmk_symbol,
+                    shared_frames_[0]->K_));  // all K are the same for now...
+            }
+            initial_estimate_.insert(lmk_symbol, *landmark_estimate);
+            lmk_initial_estimates_.emplace(i, *landmark_estimate);
+        }
+    }
+}
+
+void Backend::solve_graph(const int num_epochs,
+                          std::optional<Backend::graph_step_debug_func> iter_debug_func) {
+    gtsam::LevenbergMarquardtParams params;
+    params.setVerbosityLM("SUMMARY");  // or "TERMINATION", "TRYLAMBDA", etc.
+    params.maxIterations = 1;          // We'll manually step it
+    gtsam::LevenbergMarquardtOptimizer optimizer(graph_, initial_estimate_, params);
+
+    double prev_error = optimizer.error();
+    double lowest_error = prev_error;
+    gtsam::Values best_values = optimizer.values();
+    for (int i = 0; i < num_epochs; i++) {
+        optimizer.iterate();
+        double curr_error = optimizer.error();
+        if (curr_error < lowest_error) {
+            lowest_error = curr_error;
+            best_values = optimizer.values();
+        }
+
+        if (iter_debug_func) {
+            (*iter_debug_func)(optimizer.values(), i);
+        }
+        // if (std::abs(prev_error - curr_error) < 1e-6) {
+        //     std::cout << "Converged at iteration " << i << "\n";
+        //     break;
+        // }
+    }
+    result_ = best_values;
+}
+
+void Backend::clear() {
+    shared_frames_.clear();
+    shared_frames_.shrink_to_fit();
+
+    initial_estimate_ = gtsam::Values();
+    result_ = gtsam::Values();
+    graph_ = gtsam::NonlinearFactorGraph();
+
+    std::unordered_map<size_t, gtsam::Pose3>().swap(world_from_cam_initial_estimates_);
+    std::unordered_map<size_t, gtsam::Point3>().swap(lmk_initial_estimates_);
+}
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/backend.hh b/experimental/learn_descriptors/backend.hh
new file mode 100644
index 000000000..6f9a8c0c7
--- /dev/null
+++ b/experimental/learn_descriptors/backend.hh
@@ -0,0 +1,79 @@
+#pragma once
+
+#include <cstddef>
+#include <memory>
+#include <optional>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "common/geometry/camera.hh"
+#include "experimental/learn_descriptors/frame.hh"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/Point2.h"
+#include "gtsam/geometry/Point3.h"
+#include "gtsam/geometry/Pose3.h"
+#include "gtsam/geometry/Rot3.h"
+#include "gtsam/inference/Symbol.h"
+#include "gtsam/linear/NoiseModel.h"
+#include "gtsam/nonlinear/NonlinearFactorGraph.h"
+#include "gtsam/nonlinear/Values.h"
+
+namespace robot::experimental::learn_descriptors {
+class Backend {
+   public:
+    static constexpr char symbol_char_pose = 'x';
+    static constexpr char symbol_char_rotation = 'r';
+    static constexpr char symbol_char_translation = 't';
+    static constexpr char symbol_char_bearing = 'b';
+    static constexpr char symbol_char_landmark = 'l';
+    static constexpr char symbol_char_cam_cal = 'k';
+
+    static std::optional<gtsam::Point3> attempt_triangulate(
+        const std::vector<gtsam::Pose3> &cam_poses, const std::vector<gtsam::Point2> &cam_obs,
+        gtsam::Cal3_S2::shared_ptr K, const double max_reproj_error = 2.0,
+        const bool verbose = true);
+    static gtsam::Rot3 average_rotations(const std::vector<gtsam::Rot3> &rotations,
+                                         int max_iter = 10);
+    // use rotation averaging to set the rotation initial guess for each frame in the world frame.
+    // the world frame will be the first frame in the vector
+    static void populate_rotation_estimate(std::vector<SharedFrame> &shared_frames);
+
+    void add_frames(std::vector<SharedFrame> &shared_frames) {
+        shared_frames_.reserve(shared_frames_.size() + shared_frames.size());
+        shared_frames_.insert(shared_frames_.end(), shared_frames.begin(), shared_frames.end());
+    };
+    void calculate_initial_values(bool interpolate_gps = true);
+    void populate_graph(const FeatureTracks &feature_tracks);
+    typedef int epoch;
+    using graph_step_debug_func = std::function<void(const gtsam::Values &, const epoch)>;
+    void solve_graph(const int num_epochs,
+                     std::optional<graph_step_debug_func> iter_debug_func = std::nullopt);
+    void clear();
+
+    const std::vector<SharedFrame> &shared_frames() const { return shared_frames_; };
+    const gtsam::Values &current_initial_values() const { return initial_estimate_; };
+    const gtsam::Values &result() const { return result_; };
+
+   private:
+    std::vector<SharedFrame> shared_frames_;
+
+    gtsam::Values initial_estimate_;
+    gtsam::Values result_;
+    gtsam::NonlinearFactorGraph graph_;
+
+    std::unordered_map<size_t, gtsam::Pose3> world_from_cam_initial_estimates_;
+    std::unordered_map<size_t, gtsam::Point3> lmk_initial_estimates_;
+
+    gtsam::noiseModel::Diagonal::shared_ptr noise_tight_prior_ =
+        gtsam::noiseModel::Diagonal::Sigmas((gtsam::Vector(6) << 1e-3, 1e-3,
+                                             1e-3,             // rotation stdev in radians
+                                             1e-3, 1e-3, 1e-3  // translation stdev in meters
+                                             )
+                                                .finished());
+    gtsam::Vector3 gps_sigmas_fallback_{0.5, 0.5, 0.5};
+    gtsam::noiseModel::Isotropic::shared_ptr landmark_noise_ =
+        gtsam::noiseModel::Isotropic::Sigma(2, 10.0);
+};
+using Landmark = gtsam::Point3;
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/camera_calibration.hh b/experimental/learn_descriptors/camera_calibration.hh
new file mode 100644
index 000000000..610dd2d8d
--- /dev/null
+++ b/experimental/learn_descriptors/camera_calibration.hh
@@ -0,0 +1,22 @@
+#pragma once
+
+#include "opencv2/opencv.hpp"
+
+namespace robot::experimental::learn_descriptors {
+struct CameraCalibrationFisheye {
+    CameraCalibrationFisheye(double fx, double fy, double cx, double cy, double k1, double k2,
+                             double k3, double k4)
+        : fx(fx), fy(fy), cx(cx), cy(cy), k1(k1), k2(k2), k3(k3), k4(k4) {}
+    double fx, fy, cx, cy, k1, k2, k3, k4;
+
+    cv::Mat k_mat() {
+        cv::Mat K_mat = (cv::Mat_<double>(3, 3) << fx, 0, cx, 0, fy, cy, 0, 0, 1);
+        return K_mat;
+    }
+
+    cv::Mat d_mat() {
+        cv::Mat D_mat = (cv::Mat_<double>(4, 1) << k1, k2, k3, k4);
+        return D_mat;
+    }
+};
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/four_seasons_parser.cc b/experimental/learn_descriptors/four_seasons_parser.cc
index fdf76afde..61ef35e18 100644
--- a/experimental/learn_descriptors/four_seasons_parser.cc
+++ b/experimental/learn_descriptors/four_seasons_parser.cc
@@ -1,39 +1,29 @@
 #include "experimental/learn_descriptors/four_seasons_parser.hh"
 
-#include <cmath>
 #include <cstddef>
-#include <cstdlib>
-#include <exception>
 #include <fstream>
 #include <iostream>
-#include <limits>
+#include <memory>
 #include <sstream>
 #include <string>
-#include <type_traits>
 #include <unordered_map>
 #include <utility>
 
 #include "Eigen/Core"
 #include "Eigen/Geometry"
-#include "GeographicLib/Geocentric.hpp"
-#include "GeographicLib/LocalCartesian.hpp"
-#include "absl/strings/str_split.h"
-#include "absl/strings/strip.h"
-#include "common/check.hh"
 #include "common/liegroups/se3.hh"
-#include "common/time/robot_time.hh"
-#include "nmea/message/gga.hpp"
-#include "nmea/message/rmc.hpp"
-#include "nmea/sentence.hpp"
+#include "experimental/learn_descriptors/camera_calibration.hh"
+#include "experimental/learn_descriptors/four_seasons_parser_detail.hh"
 
 namespace robot::experimental::learn_descriptors {
-using namespace detail;
+using namespace detail::four_seasons_parser;
 FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
                                      const std::filesystem::path& calibration_dir)
     : root_dir_(root_dir),
       img_dir_(root_dir / "distorted_images" / "cam0"),
       cal_(txt_parser_help::load_camera_calibration(calibration_dir)),
-      transforms_(root_dir / "Transformations.txt") {
+      shared_transforms_(std::make_shared<FourSeasonsTransforms::StaticTransforms>(
+          root_dir / "Transformations.txt")) {
     const std::filesystem::path path_img = root_dir_ / "times.txt";
     const std::filesystem::path path_gnss = root_dir_ / "GNSSPoses.txt";
     const std::filesystem::path path_vio = root_dir_ / "result.txt";
@@ -51,8 +41,10 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
     size_t id = 0;
     for (const std::pair<size_t, std::vector<std::string>>& pair_time_data : img_time_list) {
         const size_t time_key = pair_time_data.first;
-        ImagePoint img_pt;
+        ImagePointFourSeasons img_pt;
         img_pt.id = id;
+        img_pt.K = cal_;
+        img_pt.shared_static_transforms = shared_transforms_;
         img_pt.seq = std::stoull(
             pair_time_data.second[static_cast<size_t>(txt_parser_help::ImgIdx::TIME_NS)]);
         if (gnss_poses_time_map.find(time_key) != gnss_poses_time_map.end()) {
@@ -76,9 +68,6 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
                 std::stod(
                     parsed_line_gnss_poses[static_cast<size_t>(txt_parser_help::GPSIdx::QUAT_Z)]));
             img_pt.AS_w_from_gnss_cam = liegroups::SE3(R_gps_cam_from_AS_w, t_gps_cam_from_AS_w);
-        } else {
-            std::clog << "There is no AS_w_from_gnss_cam data at img_pt with id: " << id
-                      << std::endl;
         }
         if (vio_poses_time_map.find(time_key) != vio_poses_time_map.end()) {
             const std::vector<std::string>& parsed_line_vio = vio_poses_time_map.at(time_key);
@@ -94,29 +83,27 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
                 std::stod(
                     parsed_line_vio[static_cast<size_t>(txt_parser_help::ResultIdx::QUAT_Z)]));
             img_pt.AS_w_from_vio_cam = liegroups::SE3(R_AS_w_from_vio_cam, t_AS_w_from_vio_cam);
-        } else {
-            std::clog << "There is no AS_w_from_vio_cam data at img_pt with id: " << id
-                      << std::endl;
         }
+
         img_pt_vector_.push_back(img_pt);
         id++;
     }
     // popoulate gps to nearest img time
     // TODO: could be linear time... but good enough
     for (const auto& [time_unix_ns, gps_data] : gps_time_list) {
-        auto it = std::lower_bound(img_pt_vector_.begin(), img_pt_vector_.end(), time_unix_ns,
-                                   [](const ImagePoint& img_pt, const size_t query_unix_time) {
-                                       return img_pt.seq < query_unix_time;
-                                   });
+        auto it =
+            std::lower_bound(img_pt_vector_.begin(), img_pt_vector_.end(), time_unix_ns,
+                             [](const ImagePointFourSeasons& img_pt, const size_t query_unix_time) {
+                                 return img_pt.seq < query_unix_time;
+                             });
         size_t insert_idx = std::distance(img_pt_vector_.begin(), it);
         if (it != img_pt_vector_.begin() &&
-            detail::abs_diff(it->seq, time_unix_ns) >
-                detail::abs_diff(std::prev(it)->seq, time_unix_ns)) {
+            abs_diff(it->seq, time_unix_ns) > abs_diff(std::prev(it)->seq, time_unix_ns)) {
             insert_idx--;
         }
         // NOTE: in future, could perhaps use gps data that isn't associated with an img_pt in some
         // way. maybe to help with interpolation, estimate velocity
-        if (detail::abs_diff(img_pt_vector_[insert_idx].seq, time_unix_ns) <
+        if (abs_diff(img_pt_vector_[insert_idx].seq, time_unix_ns) <
             FourSeasonsParser::CAM_CAP_DELTA_NS) {
             img_pt_vector_[insert_idx].gps_gcs = gps_data;
         }
@@ -126,284 +113,4 @@ FourSeasonsParser::FourSeasonsParser(const std::filesystem::path& root_dir,
 cv::Mat FourSeasonsParser::load_image(const size_t m) const {
     return get_image_point(m).load_image(img_dir_);
 }
-
-FourSeasonsParser::FourSeasonsTransforms::FourSeasonsTransforms(
-    const std::filesystem::path& path_transforms) {
-    std::ifstream file_transforms(path_transforms);
-    std::string line;
-    while (std::getline(file_transforms, line)) {
-        if (line.find("transform_S_AS") != std::string::npos) {
-            std::getline(file_transforms, line);
-            S_from_AS = get_transform_from_line(line);
-        } else if (line.find("TS_cam_imu") != std::string::npos) {
-            std::getline(file_transforms, line);
-            cam_from_imu = get_transform_from_line(line);
-        } else if (line.find("transform_w_gpsw") != std::string::npos) {
-            std::getline(file_transforms, line);
-            w_from_gpsw = get_transform_from_line(line);
-        } else if (line.find("transform_gps_imu") != std::string::npos) {
-            std::getline(file_transforms, line);
-            gps_from_imu = get_transform_from_line(line);
-        } else if (line.find("transform_e_gpsw") != std::string::npos) {
-            std::getline(file_transforms, line);
-            e_from_gpsw = get_transform_from_line(line);
-        } else if (line.find("GNSS scale") != std::string::npos) {
-            std::getline(file_transforms, line);
-            gnss_scale = std::stod(line);
-        }
-    }
-}
-
-liegroups::SE3 FourSeasonsParser::FourSeasonsTransforms::get_transform_from_line(
-    const std::string& line) {
-    enum TransformEntry { T_X, T_Y, T_Z, Q_X, Q_Y, Q_Z, Q_W };
-    std::vector<std::string> parsed_transform_line = txt_parser_help::parse_line_adv(line, ",");
-    if (parsed_transform_line.size() < 7) {
-        std::stringstream error_stream;
-        error_stream << "parsed_transform_line doesn't have sufficient entries for "
-                        "transform! parsed_transform_line.size(): "
-                     << parsed_transform_line.size() << std::endl;
-        throw std::runtime_error(error_stream.str());
-    }
-    std::vector<double> transform_nums;
-    for (const std::string& num : parsed_transform_line) {
-        transform_nums.push_back(static_cast<double>(std::stod(num)));
-    }
-    Eigen::Vector3d translation(transform_nums[TransformEntry::T_X],
-                                transform_nums[TransformEntry::T_Y],
-                                transform_nums[TransformEntry::T_Z]);
-    Eigen::Quaterniond rotation(
-        transform_nums[TransformEntry::Q_W], transform_nums[TransformEntry::Q_X],
-        transform_nums[TransformEntry::Q_Y], transform_nums[TransformEntry::Q_Z]);
-    return liegroups::SE3(rotation, translation);
-}
-
-Eigen::Vector3d FourSeasonsParser::gcs_from_ECEF(const Eigen::Vector3d& t_place_from_ECEF) {
-    static const GeographicLib::Geocentric earth = GeographicLib::Geocentric::WGS84();
-
-    double x = t_place_from_ECEF.x(), y = t_place_from_ECEF.y(), z = t_place_from_ECEF.z();
-
-    double lat_deg, lon_deg, alt_m;
-    earth.Reverse(x, y, z, lat_deg, lon_deg, alt_m);
-
-    return Eigen::Vector3d(lat_deg, lon_deg, alt_m);
-}
-
-Eigen::Vector3d FourSeasonsParser::ECEF_from_gcs(const Eigen::Vector3d& gcs_coordinate) {
-    double lat_deg = gcs_coordinate.x();
-    double lon_deg = gcs_coordinate.y();
-    double alt_m = gcs_coordinate.z();
-
-    double x, y, z;
-
-    static const GeographicLib::Geocentric earth = GeographicLib::Geocentric::WGS84();
-    earth.Forward(lat_deg, lon_deg, alt_m, x, y, z);
-
-    return Eigen::Vector3d(x, y, z);
-}
-
-namespace detail {
-namespace txt_parser_help {
-
-std::vector<std::string> parse_line_adv(const std::string& line, const std::string& delim) {
-    if (delim == " ") {
-        return std::vector<std::string>(
-            absl::StrSplit(line, absl::ByAnyChar(" \t\n\r"), absl::SkipWhitespace()));
-    }
-    return std::vector<std::string>(absl::StrSplit(line, delim, absl::SkipWhitespace()));
-}
-
-FourSeasonsParser::CameraCalibrationFisheye load_camera_calibration(
-    const std::filesystem::path& calibration_dir) {
-    std::ifstream file_calibration(calibration_dir / "calib_0.txt");
-    std::string line_calibration;
-    std::getline(file_calibration, line_calibration);
-    std::vector<std::string> parsed_calib_line = parse_line_adv(line_calibration, " ");
-    return FourSeasonsParser::CameraCalibrationFisheye{
-        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::FX)]),
-        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::FY)]),
-        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::CX)]),
-        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::CY)]),
-        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::K1)]),
-        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::K2)]),
-        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::K3)]),
-        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::K4)])};
-}
-
-// minimum sig figs of time in seconds (of type double) for all entries of results.txt
-size_t min_sig_figs_result_time(const std::filesystem::path& path_vio) {
-    std::ifstream file_vio(path_vio);
-    std::string line;
-    int min_figs = INT_MAX;
-    while (std::getline(file_vio, line)) {
-        const std::vector<std::string> parsed_line = parse_line_adv(line, " ");
-        const std::string str_time_seconds = parsed_line[static_cast<size_t>(ResultIdx::TIME_SEC)];
-        const int figs =
-            str_time_seconds.size() +
-            (str_time_seconds.find('.') != std::string::npos ? -1 : 0);  // '.' is not a fig
-        min_figs = std::min(min_figs, figs);
-    }
-    return min_figs;
-}
-
-const TimeDataList create_img_time_data_list(const std::filesystem::path& path_img,
-                                             const size_t time_sig_figs) {
-    TimeDataList time_map_img;
-    std::ifstream file_img(path_img);
-    std::string line;
-    while (std::getline(file_img, line)) {
-        std::vector<std::string> parsed_line = parse_line_adv(line, " ");
-        size_t time_ns = std::stoull(parsed_line[static_cast<size_t>(ImgIdx::TIME_NS)]);
-        time_map_img.push_back(
-            std::make_pair(round_to_sig_figs(time_ns, time_sig_figs), parsed_line));
-    }
-    return time_map_img;
-}
-
-const TimeDataMap create_gnss_poses_time_data_map(const std::filesystem::path& path_gnss,
-                                                  const size_t time_sig_figs) {
-    TimeDataMap time_map_gnss_poses;
-    std::ifstream file_gnss_poses(path_gnss);
-    std::string line;
-    std::getline(file_gnss_poses,
-                 line);  // advance a line to get past the top comment in GNSSPoses.txt
-    while (std::getline(file_gnss_poses, line)) {
-        std::vector<std::string> parsed_line = parse_line_adv(line, ",");
-        size_t time_ns = std::stoull(parsed_line[static_cast<size_t>(GPSIdx::TIME_NS)]);
-        time_map_gnss_poses.insert({round_to_sig_figs(time_ns, time_sig_figs), parsed_line});
-    }
-    return time_map_gnss_poses;
-}
-
-const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio,
-                                           const size_t time_sig_figs) {
-    TimeDataMap time_map_vio;
-    std::ifstream file_vio(path_vio);
-    std::string line;
-    while (std::getline(file_vio, line)) {
-        std::vector<std::string> parsed_line = parse_line_adv(line, " ");
-        double time_seconds = std::stod(parsed_line[static_cast<size_t>(ResultIdx::TIME_SEC)]);
-        size_t time_ns = time_seconds * 1e9;
-        time_map_vio.insert({round_to_sig_figs(time_ns, time_sig_figs), parsed_line});
-    }
-    return time_map_vio;
-}
-}  // namespace txt_parser_help
-
-namespace gps_parser_help {
-size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds) {
-    std::chrono::sys_days date = std::chrono::year_month_day{
-        std::chrono::year{utc_date.year + 2000}, std::chrono::month{utc_date.month},
-        std::chrono::day{utc_date.day}};
-    std::chrono::nanoseconds utc_time_day_ns =
-        std::chrono::nanoseconds{static_cast<size_t>(utc_time_day_seconds * 1e9)};
-    std::chrono::sys_time<std::chrono::nanoseconds> timestamp = date + utc_time_day_ns;
-    return timestamp.time_since_epoch().count();
-}
-TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps) {
-    TimeGPSList time_list_gps;
-    std::ifstream file_gps(path_gps);
-    std::string line;
-    std::optional<nmea::sentence> nmea_sentence;
-    nmea::date date_last;
-    date_last.day = 255;
-    date_last.month = 255;
-    date_last.year = 255;
-    double time_of_day_last = -1.0;
-    while (std::getline(file_gps, line) && !line.empty()) {
-        try {
-            nmea_sentence = nmea::sentence(line);
-        } catch (const std::exception& e) {
-            std::cerr << "failed to parse line as nmea sentence: " << e.what()
-                      << "\ncontinuing to next line\n";
-            continue;
-        }
-        if (nmea_sentence->type() == "GGA") {
-            nmea::gga gga(*nmea_sentence);
-            if (gga.utc.exists() && gga.latitude.exists() && gga.longitude.exists()) {
-                if (std::abs(gga.utc.get() - time_of_day_last) <
-                    1e-3) {  // GGA messages for this dataset come second
-                    ImagePoint::GPSData gps_data;
-                    gps_data.seq = gps_utc_to_unix_time(date_last, gga.utc.get());
-                    gps_data.latitude = gga.latitude.get();
-                    gps_data.longitude = gga.longitude.get();
-                    if (gga.altitude.exists()) gps_data.altitude = gga.altitude.get();
-                    time_list_gps.push_back(std::make_pair(gps_data.seq, gps_data));
-                }
-            }
-        } else if (nmea_sentence->type() == "RMC") {
-            nmea::rmc rmc(*nmea_sentence);
-            if (rmc.utc.exists() && rmc.date.exists()) {
-                date_last = rmc.date.get();
-                time_of_day_last = rmc.utc.get();
-            }
-        } else if (nmea_sentence->type() == "GST") {
-            std::optional<GSTData> gst_data = parse_gpgst(nmea_sentence->nmea_string());
-            if (gst_data && std::abs(gst_data->utc_time_ns - time_of_day_last) <
-                                1e-3) {  // GST message for this dataset come third
-                size_t unix_time_ns = gps_utc_to_unix_time(date_last, gst_data->utc_time_ns);
-                if (time_list_gps.back().first == unix_time_ns) {
-                    time_list_gps.back().second.uncertainty.emplace(
-                        gst_data->sigma_lat_m, gst_data->sigma_lon_m, gst_data->sigma_alt_m,
-                        gst_data->error_orientation_deg, gst_data->rms_range_error_m);
-                }
-            }
-        }
-    }
-    return time_list_gps;
-}
-
-std::vector<std::string> split_nmea_sentence(const std::string& sentence) {
-    std::vector<std::string> fields;
-    std::string field;
-    std::stringstream ss(sentence);
-
-    while (std::getline(ss, field, ',')) {
-        // remove checksum from the last field if present
-        auto asterisk = field.find('*');
-        if (asterisk != std::string::npos) field = field.substr(0, asterisk);
-        fields.push_back(field);
-    }
-
-    return fields;
-}
-double time_of_day_seconds(const double utc_time_hhmmss) {
-    int hours = static_cast<int>(utc_time_hhmmss / 10000);
-    int minutes = static_cast<int>((utc_time_hhmmss - hours * 10000) / 100);
-    double seconds = utc_time_hhmmss - hours * 10000 - minutes * 100;
-    return hours * 3600 + minutes * 60 + seconds;
-}
-std::optional<GSTData> parse_gpgst(const std::string& sentence) {
-    if (sentence.substr(0, 6) != "$GPGST") {
-        return std::nullopt;
-    }
-
-    std::vector<std::string> fields = split_nmea_sentence(sentence);
-    if (fields.size() != 9) {
-        return std::nullopt;
-    }
-    for (size_t i = 0; i < fields.size(); i++) {
-        if (fields[i].empty()) return std::nullopt;
-    }
-
-    GSTData gst;
-    gst.utc_time_ns = time_of_day_seconds(std::stod(fields[1]));
-    gst.rms_range_error_m = std::stod(fields[2]);
-    gst.error_semi_major_m = std::stod(fields[3]);
-    gst.error_semi_minor_m = std::stod(fields[4]);
-    gst.error_orientation_deg = std::stod(fields[5]);
-    gst.sigma_lat_m = std::stod(fields[6]);
-    gst.sigma_lon_m = std::stod(fields[7]);
-    gst.sigma_alt_m = std::stod(fields[8]);
-
-    return gst;
-}
-
-}  // namespace gps_parser_help
-
-template <typename T>
-size_t abs_diff(const T& a, const T& b) {
-    return a > b ? a - b : b - a;
-}
-}  // namespace detail
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/four_seasons_parser.hh b/experimental/learn_descriptors/four_seasons_parser.hh
index 9709bc396..4458215c8 100644
--- a/experimental/learn_descriptors/four_seasons_parser.hh
+++ b/experimental/learn_descriptors/four_seasons_parser.hh
@@ -1,144 +1,56 @@
 #pragma once
 
-#include <stdio.h>
-
 #include <cstddef>
 #include <filesystem>
+#include <memory>
 #include <opencv2/opencv.hpp>
 #include <string>
 #include <vector>
 
 #include "common/liegroups/se3.hh"
-#include "experimental/learn_descriptors/image_point.hh"
-#include "nmea/message/gga.hpp"
-#include "nmea/message/rmc.hpp"
-#include "nmea/sentence.hpp"
+#include "experimental/learn_descriptors/camera_calibration.hh"
+#include "experimental/learn_descriptors/four_seasons_transforms.hh"
+#include "experimental/learn_descriptors/image_point_four_seasons.hh"
 
 namespace robot::experimental::learn_descriptors {
-class FourSeasonsParser {
+class FourSeasonsParser : std::enable_shared_from_this<FourSeasonsParser> {
    public:
     static constexpr double CAM_HZ = 30.0;
     static constexpr double CAM_CAP_DELTA_NS = 1e9 / CAM_HZ;
-    struct CameraCalibrationFisheye {
-        double fx, fy, cx, cy, k1, k2, k3, k4;
-    };
+
     FourSeasonsParser(const std::filesystem::path& root_dir,
                       const std::filesystem::path& calibration_dir);
+    std::shared_ptr<FourSeasonsParser> get_shared() { return shared_from_this(); }
     cv::Mat load_image(const size_t idx) const;
-    const ImagePoint& get_image_point(const size_t idx) const { return img_pt_vector_[idx]; };
+    const ImagePointFourSeasons& get_image_point(const size_t idx) const {
+        return img_pt_vector_[idx];
+    };
     size_t num_images() const { return img_pt_vector_.size(); };
-    const CameraCalibrationFisheye& camera_calibration() const { return cal_; };
+    const std::shared_ptr<CameraCalibrationFisheye>& camera_calibration() const { return cal_; };
     const liegroups::SE3& S_from_AS() const {
-        return transforms_.S_from_AS;
+        return shared_transforms_->S_from_AS;
     };  // metric scale from arbitrary (internal slam) scale
-    const liegroups::SE3& cam_from_imu() const { return transforms_.cam_from_imu; };
+    const liegroups::SE3& cam_from_imu() const { return shared_transforms_->cam_from_imu; };
     const liegroups::SE3& w_from_gpsw() const {
-        return transforms_.w_from_gpsw;
+        return shared_transforms_->w_from_gpsw;
     };  // visual world from local gps (ENU)
     const liegroups::SE3& gps_from_imu() const {
-        return transforms_.gps_from_imu;
+        return shared_transforms_->gps_from_imu;
     };  // phsyical onboard gps from physical onboard imu
     const liegroups::SE3& e_from_gpsw() const {
-        return transforms_.e_from_gpsw;
+        return shared_transforms_->e_from_gpsw;
     };  // ECEF from local gps (ENU)
     double gnss_scale() const {
-        return transforms_.gnss_scale;
+        return shared_transforms_->gnss_scale;
     };  // scale from vio frame to gnss frame. WARNING: will require retooling if the scales per
         // keyframe (pose) are not all one value. See more here:
         // https://github.com/pmwenzel/4seasons-dataset
 
-    static Eigen::Vector3d gcs_from_ECEF(const Eigen::Vector3d& t_place_from_ECEF);
-    static Eigen::Vector3d ECEF_from_gcs(const Eigen::Vector3d& gcs_coordinate);
-
    protected:
-    struct FourSeasonsTransforms {
-        liegroups::SE3 S_from_AS;
-        liegroups::SE3 cam_from_imu;
-        liegroups::SE3 w_from_gpsw;
-        liegroups::SE3 gps_from_imu;
-        liegroups::SE3 e_from_gpsw;
-        double gnss_scale;
-
-        FourSeasonsTransforms(const std::filesystem::path& path_transforms);
-
-       private:
-        static liegroups::SE3 get_transform_from_line(const std::string& line);
-    };
     const std::filesystem::path root_dir_;
     const std::filesystem::path img_dir_;
-    const CameraCalibrationFisheye cal_;
-    const FourSeasonsTransforms transforms_;
-    ImagePointVector img_pt_vector_;
-};
-
-namespace detail {
-template <typename T>
-std::size_t abs_diff(const T& a, const T& b);
-namespace txt_parser_help {
-using TimeDataMap = std::unordered_map<size_t, std::vector<std::string>>;
-using TimeDataList = std::vector<std::pair<size_t, std::vector<std::string>>>;
-enum class GPSIdx {
-    TIME_NS = 0,
-    TRAN_X = 1,
-    TRAN_Y = 2,
-    TRAN_Z = 3,
-    QUAT_X = 4,
-    QUAT_Y = 5,
-    QUAT_Z = 6,
-    QUAT_W = 7,
-};
-enum class ImgIdx {
-    TIME_NS = 0,
-    TIME_SEC = 1,
-    EXPOSURE_TIME = 2  // I'm not completely sure if this is correct - Nico
-};
-enum class ResultIdx {
-    TIME_SEC = 0,
-    TRAN_X = 1,
-    TRAN_Y = 2,
-    TRAN_Z = 3,
-    QUAT_X = 4,
-    QUAT_Y = 5,
-    QUAT_Z = 6,
-    QUAT_W = 7,
-};
-enum class CalibIdx { FX = 1, FY = 2, CX = 3, CY = 4, K1 = 5, K2 = 6, K3 = 7, K4 = 8 };
-std::vector<std::string> parse_line_adv(const std::string& line, const std::string& delim = " ");
-FourSeasonsParser::CameraCalibrationFisheye load_camera_calibration(
-    const std::filesystem::path& calibration_dir);
-template <typename T>
-T round_to_sig_figs(T val, int n) {
-    if (val == 0) return 0;
-    double d = static_cast<double>(val);
-    int exponent = static_cast<int>(std::floor(std::log10(std::abs(d))));
-    double multiplier = std::pow(10.0, n - exponent - 1);
-    return static_cast<T>(std::round(d * multiplier) / multiplier);
-}
-size_t min_sig_figs_result_time(const std::filesystem::path& path_vio);
-const TimeDataList create_img_time_data_list(const std::filesystem::path& path_img,
-                                             const size_t time_sig_figs);
-const TimeDataMap create_gnss_poses_time_data_map(const std::filesystem::path& path_gnss,
-                                                  const size_t time_sig_figs);
-const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio,
-                                           const size_t time_sig_figs);
-}  // namespace txt_parser_help
-namespace gps_parser_help {
-using TimeGPSList = std::vector<std::pair<size_t, ImagePoint::GPSData>>;
-struct GSTData {
-    double utc_time_ns;
-    double rms_range_error_m;
-    double error_semi_major_m;
-    double error_semi_minor_m;
-    double error_orientation_deg;
-    double sigma_lat_m;
-    double sigma_lon_m;
-    double sigma_alt_m;
+    const std::shared_ptr<CameraCalibrationFisheye> cal_;
+    const std::shared_ptr<FourSeasonsTransforms::StaticTransforms> shared_transforms_;
+    ImagePointFourSeasonsVector img_pt_vector_;
 };
-std::vector<std::string> split_nmea_sentence(const std::string& nmea_sentence);
-double time_of_day_seconds(const double utc_time_hhmmss);
-std::optional<GSTData> parse_gpgst(const std::string& nmea_sentence);
-size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds);
-TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps);
-}  // namespace gps_parser_help
-}  // namespace detail
 }  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/four_seasons_parser_detail.cc b/experimental/learn_descriptors/four_seasons_parser_detail.cc
new file mode 100644
index 000000000..b41e71113
--- /dev/null
+++ b/experimental/learn_descriptors/four_seasons_parser_detail.cc
@@ -0,0 +1,220 @@
+#include "experimental/learn_descriptors/four_seasons_parser_detail.hh"
+
+#include <chrono>
+#include <cmath>
+#include <cstddef>
+#include <fstream>
+#include <iostream>
+#include <optional>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "absl/strings/str_split.h"
+#include "absl/strings/strip.h"
+#include "nmea/message/gga.hpp"
+#include "nmea/message/rmc.hpp"
+#include "nmea/sentence.hpp"
+
+namespace robot::experimental::learn_descriptors::detail::four_seasons_parser {
+namespace txt_parser_help {
+
+std::vector<std::string> parse_line_adv(const std::string& line, const std::string& delim) {
+    if (delim == " ") {
+        return std::vector<std::string>(
+            absl::StrSplit(line, absl::ByAnyChar(" \t\n\r"), absl::SkipWhitespace()));
+    }
+    return std::vector<std::string>(absl::StrSplit(line, delim, absl::SkipWhitespace()));
+}
+
+std::shared_ptr<CameraCalibrationFisheye> load_camera_calibration(
+    const std::filesystem::path& calibration_dir) {
+    std::ifstream file_calibration(calibration_dir / "calib_0.txt");
+    std::string line_calibration;
+    std::getline(file_calibration, line_calibration);
+    std::vector<std::string> parsed_calib_line = parse_line_adv(line_calibration, " ");
+    return std::make_shared<CameraCalibrationFisheye>(
+        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::FX)]),
+        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::FY)]),
+        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::CX)]),
+        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::CY)]),
+        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::K1)]),
+        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::K2)]),
+        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::K3)]),
+        std::stod(parsed_calib_line[static_cast<size_t>(CalibIdx::K4)]));
+}
+
+// minimum sig figs of time in seconds (of type double) for all entries of results.txt
+size_t min_sig_figs_result_time(const std::filesystem::path& path_vio) {
+    std::ifstream file_vio(path_vio);
+    std::string line;
+    int min_figs = INT_MAX;
+    while (std::getline(file_vio, line)) {
+        const std::vector<std::string> parsed_line = parse_line_adv(line, " ");
+        const std::string str_time_seconds = parsed_line[static_cast<size_t>(ResultIdx::TIME_SEC)];
+        const int figs =
+            str_time_seconds.size() +
+            (str_time_seconds.find('.') != std::string::npos ? -1 : 0);  // '.' is not a fig
+        min_figs = std::min(min_figs, figs);
+    }
+    return min_figs;
+}
+
+const TimeDataList create_img_time_data_list(const std::filesystem::path& path_img,
+                                             const size_t time_sig_figs) {
+    TimeDataList time_map_img;
+    std::ifstream file_img(path_img);
+    std::string line;
+    while (std::getline(file_img, line)) {
+        std::vector<std::string> parsed_line = parse_line_adv(line, " ");
+        size_t time_ns = std::stoull(parsed_line[static_cast<size_t>(ImgIdx::TIME_NS)]);
+        time_map_img.push_back(
+            std::make_pair(round_to_sig_figs(time_ns, time_sig_figs), parsed_line));
+    }
+    return time_map_img;
+}
+
+const TimeDataMap create_gnss_poses_time_data_map(const std::filesystem::path& path_gnss,
+                                                  const size_t time_sig_figs) {
+    TimeDataMap time_map_gnss_poses;
+    std::ifstream file_gnss_poses(path_gnss);
+    std::string line;
+    std::getline(file_gnss_poses,
+                 line);  // advance a line to get past the top comment in GNSSPoses.txt
+    while (std::getline(file_gnss_poses, line)) {
+        std::vector<std::string> parsed_line = parse_line_adv(line, ",");
+        size_t time_ns = std::stoull(parsed_line[static_cast<size_t>(GPSIdx::TIME_NS)]);
+        time_map_gnss_poses.insert({round_to_sig_figs(time_ns, time_sig_figs), parsed_line});
+    }
+    return time_map_gnss_poses;
+}
+
+const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio,
+                                           const size_t time_sig_figs) {
+    TimeDataMap time_map_vio;
+    std::ifstream file_vio(path_vio);
+    std::string line;
+    while (std::getline(file_vio, line)) {
+        std::vector<std::string> parsed_line = parse_line_adv(line, " ");
+        double time_seconds = std::stod(parsed_line[static_cast<size_t>(ResultIdx::TIME_SEC)]);
+        size_t time_ns = time_seconds * 1e9;
+        time_map_vio.insert({round_to_sig_figs(time_ns, time_sig_figs), parsed_line});
+    }
+    return time_map_vio;
+}
+}  // namespace txt_parser_help
+
+namespace gps_parser_help {
+
+size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds) {
+    std::chrono::sys_days date = std::chrono::year_month_day{
+        std::chrono::year{utc_date.year + 2000}, std::chrono::month{utc_date.month},
+        std::chrono::day{utc_date.day}};
+    std::chrono::nanoseconds utc_time_day_ns =
+        std::chrono::nanoseconds{static_cast<size_t>(utc_time_day_seconds * 1e9)};
+    std::chrono::sys_time<std::chrono::nanoseconds> timestamp = date + utc_time_day_ns;
+    return timestamp.time_since_epoch().count();
+}
+
+TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps) {
+    TimeGPSList time_list_gps;
+    std::ifstream file_gps(path_gps);
+    std::string line;
+    std::optional<nmea::sentence> nmea_sentence;
+    nmea::date date_last;
+    date_last.day = 255;
+    date_last.month = 255;
+    date_last.year = 255;
+    double time_of_day_last = -1.0;
+    while (std::getline(file_gps, line) && !line.empty()) {
+        try {
+            nmea_sentence = nmea::sentence(line);
+        } catch (const std::exception& e) {
+            std::cerr << "failed to parse line as nmea sentence: " << e.what()
+                      << "\ncontinuing to next line\n";
+            continue;
+        }
+        if (nmea_sentence->type() == "GGA") {
+            nmea::gga gga(*nmea_sentence);
+            if (gga.utc.exists() && gga.latitude.exists() && gga.longitude.exists()) {
+                if (std::abs(gga.utc.get() - time_of_day_last) <
+                    1e-3) {  // GGA messages for this dataset come second
+                    GPSData gps_data;
+                    gps_data.seq = gps_utc_to_unix_time(date_last, gga.utc.get());
+                    gps_data.latitude = gga.latitude.get();
+                    gps_data.longitude = gga.longitude.get();
+                    if (gga.altitude.exists()) gps_data.altitude = gga.altitude.get();
+                    time_list_gps.push_back(std::make_pair(gps_data.seq, gps_data));
+                }
+            }
+        } else if (nmea_sentence->type() == "RMC") {
+            nmea::rmc rmc(*nmea_sentence);
+            if (rmc.utc.exists() && rmc.date.exists()) {
+                date_last = rmc.date.get();
+                time_of_day_last = rmc.utc.get();
+            }
+        } else if (nmea_sentence->type() == "GST") {
+            std::optional<GSTData> gst_data = parse_gpgst(nmea_sentence->nmea_string());
+            if (gst_data && std::abs(gst_data->utc_time_ns - time_of_day_last) <
+                                1e-3) {  // GST message for this dataset come third
+                size_t unix_time_ns = gps_utc_to_unix_time(date_last, gst_data->utc_time_ns);
+                if (time_list_gps.back().first == unix_time_ns) {
+                    time_list_gps.back().second.uncertainty.emplace(
+                        gst_data->sigma_lat_m, gst_data->sigma_lon_m, gst_data->sigma_alt_m,
+                        gst_data->error_orientation_deg, gst_data->rms_range_error_m);
+                }
+            }
+        }
+    }
+    return time_list_gps;
+}
+
+std::vector<std::string> split_nmea_sentence(const std::string& sentence) {
+    std::vector<std::string> fields;
+    std::string field;
+    std::stringstream ss(sentence);
+
+    while (std::getline(ss, field, ',')) {
+        // remove checksum from the last field if present
+        auto asterisk = field.find('*');
+        if (asterisk != std::string::npos) field = field.substr(0, asterisk);
+        fields.push_back(field);
+    }
+
+    return fields;
+}
+
+double time_of_day_seconds(const double utc_time_hhmmss) {
+    int hours = static_cast<int>(utc_time_hhmmss / 10000);
+    int minutes = static_cast<int>((utc_time_hhmmss - hours * 10000) / 100);
+    double seconds = utc_time_hhmmss - hours * 10000 - minutes * 100;
+    return hours * 3600 + minutes * 60 + seconds;
+}
+
+std::optional<GSTData> parse_gpgst(const std::string& sentence) {
+    if (sentence.substr(0, 6) != "$GPGST") {
+        return std::nullopt;
+    }
+
+    std::vector<std::string> fields = split_nmea_sentence(sentence);
+    if (fields.size() != 9) {
+        return std::nullopt;
+    }
+    for (size_t i = 0; i < fields.size(); i++) {
+        if (fields[i].empty()) return std::nullopt;
+    }
+
+    GSTData gst;
+    gst.utc_time_ns = time_of_day_seconds(std::stod(fields[1]));
+    gst.rms_range_error_m = std::stod(fields[2]);
+    gst.error_semi_major_m = std::stod(fields[3]);
+    gst.error_semi_minor_m = std::stod(fields[4]);
+    gst.error_orientation_deg = std::stod(fields[5]);
+    gst.sigma_lat_m = std::stod(fields[6]);
+    gst.sigma_lon_m = std::stod(fields[7]);
+    gst.sigma_alt_m = std::stod(fields[8]);
+
+    return gst;
+}
+}  // namespace gps_parser_help
+}  // namespace robot::experimental::learn_descriptors::detail::four_seasons_parser
\ No newline at end of file
diff --git a/experimental/learn_descriptors/four_seasons_parser_detail.hh b/experimental/learn_descriptors/four_seasons_parser_detail.hh
new file mode 100644
index 000000000..0f0c30ad5
--- /dev/null
+++ b/experimental/learn_descriptors/four_seasons_parser_detail.hh
@@ -0,0 +1,86 @@
+#pragma once
+
+#include <cstddef>
+#include <filesystem>
+#include <memory>
+#include <optional>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "experimental/learn_descriptors/camera_calibration.hh"
+#include "experimental/learn_descriptors/gps_data.hh"
+#include "nmea/object/date.hpp"
+
+namespace robot::experimental::learn_descriptors::detail::four_seasons_parser {
+template <typename T>
+std::size_t abs_diff(const T& a, const T& b) {
+    return a > b ? a - b : b - a;
+};
+namespace txt_parser_help {
+using TimeDataMap = std::unordered_map<size_t, std::vector<std::string>>;
+using TimeDataList = std::vector<std::pair<size_t, std::vector<std::string>>>;
+enum class GPSIdx {
+    TIME_NS = 0,
+    TRAN_X = 1,
+    TRAN_Y = 2,
+    TRAN_Z = 3,
+    QUAT_X = 4,
+    QUAT_Y = 5,
+    QUAT_Z = 6,
+    QUAT_W = 7,
+};
+enum class ImgIdx {
+    TIME_NS = 0,
+    TIME_SEC = 1,
+    EXPOSURE_TIME = 2  // I'm not completely sure if this is correct - Nico
+};
+enum class ResultIdx {
+    TIME_SEC = 0,
+    TRAN_X = 1,
+    TRAN_Y = 2,
+    TRAN_Z = 3,
+    QUAT_X = 4,
+    QUAT_Y = 5,
+    QUAT_Z = 6,
+    QUAT_W = 7,
+};
+enum class CalibIdx { FX = 1, FY = 2, CX = 3, CY = 4, K1 = 5, K2 = 6, K3 = 7, K4 = 8 };
+std::vector<std::string> parse_line_adv(const std::string& line, const std::string& delim = " ");
+std::shared_ptr<CameraCalibrationFisheye> load_camera_calibration(
+    const std::filesystem::path& calibration_dir);
+template <typename T>
+T round_to_sig_figs(T val, int n) {
+    if (val == 0) return 0;
+    double d = static_cast<double>(val);
+    int exponent = static_cast<int>(std::floor(std::log10(std::abs(d))));
+    double multiplier = std::pow(10.0, n - exponent - 1);
+    return static_cast<T>(std::round(d * multiplier) / multiplier);
+}
+size_t min_sig_figs_result_time(const std::filesystem::path& path_vio);
+const TimeDataList create_img_time_data_list(const std::filesystem::path& path_img,
+                                             const size_t time_sig_figs);
+const TimeDataMap create_gnss_poses_time_data_map(const std::filesystem::path& path_gnss,
+                                                  const size_t time_sig_figs);
+const TimeDataMap create_vio_time_data_map(const std::filesystem::path& path_vio,
+                                           const size_t time_sig_figs);
+}  // namespace txt_parser_help
+namespace gps_parser_help {
+using TimeGPSList = std::vector<std::pair<size_t, GPSData>>;
+struct GSTData {
+    double utc_time_ns;
+    double rms_range_error_m;
+    double error_semi_major_m;
+    double error_semi_minor_m;
+    double error_orientation_deg;
+    double sigma_lat_m;
+    double sigma_lon_m;
+    double sigma_alt_m;
+};
+std::vector<std::string> split_nmea_sentence(const std::string& nmea_sentence);
+double time_of_day_seconds(const double utc_time_hhmmss);
+std::optional<GSTData> parse_gpgst(const std::string& nmea_sentence);
+size_t gps_utc_to_unix_time(const nmea::date& utc_date, const double utc_time_day_seconds);
+TimeGPSList create_gps_time_data_list(const std::filesystem::path& path_gps);
+}  // namespace gps_parser_help
+}  // namespace robot::experimental::learn_descriptors::detail::four_seasons_parser
diff --git a/experimental/learn_descriptors/four_seasons_parser_example.cc b/experimental/learn_descriptors/four_seasons_parser_example.cc
index 064f404ef..e49467bfd 100644
--- a/experimental/learn_descriptors/four_seasons_parser_example.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_example.cc
@@ -64,7 +64,7 @@ int main(int argc, const char** argv) {
         const std::string img_str = "img " + std::to_string(i);
         cv::putText(img, img_str, cv::Point(10, 20), cv::FONT_HERSHEY_SIMPLEX, 0.5,
                     cv::Scalar(0, 255, 0), 2);
-        const lrn_desc::ImagePoint img_pt = parser.get_image_point(i);
+        const lrn_desc::ImagePointFourSeasons img_pt = parser.get_image_point(i);
         std::stringstream ss_AS_w_from_gnss_cam;
         ss_AS_w_from_gnss_cam << "AS_w_from_gnss_cam: ";
         if (img_pt.AS_w_from_gnss_cam) {
@@ -84,7 +84,7 @@ int main(int argc, const char** argv) {
         std::stringstream ss_gps;
         ss_gps << "gps_gcs: ";
         if (img_pt.gps_gcs) {
-            const lrn_desc::ImagePoint::GPSData& gps_data = *img_pt.gps_gcs;
+            const lrn_desc::GPSData& gps_data = *img_pt.gps_gcs;
             ss_gps << "long: " << gps_data.longitude << ", lat: " << gps_data.latitude;
             if (gps_data.altitude) {
                 ss_gps << ", alt: " << *gps_data.altitude;
diff --git a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
index bca7ec2c4..4e52440d2 100644
--- a/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_example_viz.cc
@@ -1,5 +1,7 @@
 #include <algorithm>
+#include <chrono>
 #include <cstddef>
+#include <cstdlib>
 #include <filesystem>
 #include <iostream>
 #include <numeric>
@@ -8,8 +10,10 @@
 #include <vector>
 
 #include "common/check.hh"
+#include "common/gps/frame_translation.hh"
 #include "cxxopts.hpp"
 #include "experimental/learn_descriptors/four_seasons_parser.hh"
+#include "experimental/learn_descriptors/frontend.hh"
 #include "opencv2/opencv.hpp"
 #include "visualization/opencv/opencv_viz.hh"
 
@@ -47,19 +51,42 @@ int main(int argc, const char** argv) {
 
     lrn_desc::FourSeasonsParser parser(path_data, path_calibration);
 
+    lrn_desc::FrontendParams params{lrn_desc::FrontendParams::ExtractorType::SIFT,
+                                    lrn_desc::FrontendParams::MatcherType::KNN, true, false};
+    lrn_desc::Frontend frontend(params);
+    constexpr size_t NUM_IMAGES = 100;
+    (void)NUM_IMAGES;
+    auto start = std::chrono::high_resolution_clock::now();
+    for (size_t i = 0; i < NUM_IMAGES; i += 1) {
+        const lrn_desc::ImagePointFourSeasons img_pt = parser.get_image_point(i);
+        frontend.add_image(lrn_desc::ImageAndPoint{
+            parser.load_image(i), std::make_shared<lrn_desc::ImagePointFourSeasons>(img_pt)});
+    }
+    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
+        std::chrono::high_resolution_clock::now() - start);
+    std::cout << "done adding! took " << duration.count() << " milliseconds" << std::endl;
+    frontend.populate_frames();
+    start = std::chrono::high_resolution_clock::now();
+    duration = std::chrono::duration_cast<std::chrono::milliseconds>(
+        std::chrono::high_resolution_clock::now() - start);
+    std::cout << "done populating frames! took " << duration.count() << " milliseconds"
+              << std::endl;
+
     ROBOT_CHECK(parser.num_images() != 0);
 
-    std::vector<robot::geometry::VizPose> viz_frames;   // camera frames from visual world frame
-    std::vector<robot::geometry::VizPoint> viz_points;  // camera frames from visual world frame
+    std::vector<robot::visualization::VizPose> viz_frames;  // camera frames from visual world frame
+    std::vector<robot::visualization::VizPoint>
+        viz_points;  // camera frames from visual world frame
 
     Eigen::Isometry3d scale_mat = Eigen::Isometry3d::Identity();
     std::cout << "gnss scale: " << parser.gnss_scale() << std::endl;
     scale_mat.linear() *= parser.gnss_scale();
     std::cout << "scale mat: " << scale_mat.matrix() << std::endl;
+    std::optional<Eigen::Vector3d> first_gps_to_cam;
     std::optional<double> altitude_gps_from_gnss_cam;
     std::vector<double> gps_ns_delta_from_shutter;
-    for (size_t i = 0; i < parser.num_images(); i += 1) {
-        const lrn_desc::ImagePoint img_pt = parser.get_image_point(i);
+    for (size_t i = 0; i < NUM_IMAGES; i += 1) {
+        const lrn_desc::ImagePointFourSeasons img_pt = parser.get_image_point(i);
         std::cout << img_pt.to_string() << std::endl;
         if (!img_pt.AS_w_from_gnss_cam) {
             continue;
@@ -67,9 +94,13 @@ int main(int argc, const char** argv) {
         Eigen::Isometry3d w_from_gnss_cam(Eigen::Isometry3d(parser.S_from_AS().matrix()) *
                                           scale_mat *
                                           Eigen::Isometry3d(img_pt.AS_w_from_gnss_cam->matrix()));
-
-        viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i));
         viz_frames.emplace_back(w_from_gnss_cam, "x_ref_" + std::to_string(i));
+        Eigen::Isometry3d frontend_w_from_cam_groundtruth(
+            frontend.frames()[i]->world_from_cam_groundtruth_->matrix());
+        ROBOT_CHECK(frontend_w_from_cam_groundtruth.matrix().isApprox(w_from_gnss_cam.matrix()),
+                    frontend_w_from_cam_groundtruth.matrix(), w_from_gnss_cam.matrix());
+        viz_frames.emplace_back(frontend_w_from_cam_groundtruth,
+                                "x_frontend_ref_" + std::to_string(i));
         if (img_pt.gps_gcs) {
             if (img_pt.gps_gcs->seq > img_pt.seq) {
                 gps_ns_delta_from_shutter.push_back(
@@ -88,28 +119,40 @@ int main(int argc, const char** argv) {
                     (parser.e_from_gpsw() * parser.w_from_gpsw().inverse()).matrix()) *
                 w_from_gnss_cam;
             const Eigen::Vector3d gnss_cam_in_gcs =
-                parser.gcs_from_ECEF(ECEF_from_gnss_cam.translation());
+                robot::gps::lla_from_ecef(ECEF_from_gnss_cam.translation());
 
             if (!altitude_gps_from_gnss_cam) {
                 altitude_gps_from_gnss_cam = gnss_cam_in_gcs.z() - *(img_pt.gps_gcs->altitude);
             }
+            if (!first_gps_to_cam) {
+                first_gps_to_cam =
+                    frontend.frames()[i]->world_from_cam_groundtruth_->translation() -
+                    *frontend.frames()[i]->cam_in_world_initial_guess_;
+            }
             gcs_coordinate.z() += *altitude_gps_from_gnss_cam;
             std::cout << std::setprecision(20) << "gnss_cam_in_gcs: " << gnss_cam_in_gcs
                       << "\ngps_gcs: " << gcs_coordinate
                       << "\ngps-gnss_cam: " << (gcs_coordinate - gnss_cam_in_gcs) << std::endl;
 
-            const Eigen::Vector3d ECEF_from_gps = parser.ECEF_from_gcs(gcs_coordinate);
+            const Eigen::Vector3d ECEF_from_gps = robot::gps::ecef_from_lla(gcs_coordinate);
             const Eigen::Vector4d ECEF_from_gps_hom(ECEF_from_gps.x(), ECEF_from_gps.y(),
                                                     ECEF_from_gps.z(), 1);
             Eigen::Vector4d gps_in_w =
                 Eigen::Matrix4d((parser.w_from_gpsw() * parser.e_from_gpsw().inverse()).matrix()) *
                 ECEF_from_gps_hom;
-            viz_points.emplace_back(gps_in_w.head<3>(), "x_gps_" + std::to_string(i));
+            const Eigen::Isometry3d cam_from_gps(
+                (parser.cam_from_imu() * parser.gps_from_imu().inverse()).matrix());
+            Eigen::Vector3d cam_gps_in_w = gps_in_w.head<3>() - cam_from_gps.translation();
+            viz_points.emplace_back(cam_gps_in_w, "x_cam_gps" + std::to_string(i));
+            viz_points.emplace_back(
+                *frontend.frames()[i]->cam_in_world_initial_guess_ + *first_gps_to_cam,
+                "x_frontend_cam_gps" + std::to_string(i));
             const Eigen::Vector3d gps_from_ref_in_world =
                 gps_in_w.head<3>() - w_from_gnss_cam.translation();
             std::cout << "gps_from_ref_in_world: " << gps_from_ref_in_world << std::endl;
         }
     }
+
     const double sum =
         std::accumulate(gps_ns_delta_from_shutter.begin(), gps_ns_delta_from_shutter.end(), 0.0);
     double avg_ns_gps_delta = sum / gps_ns_delta_from_shutter.size();
@@ -136,6 +179,6 @@ int main(int argc, const char** argv) {
     std::cout << "max delta: " << max_delta << std::endl;
     std::cout << "\ngot " << viz_frames.size() << " poses" << std::endl;
     std::cout << "got " << viz_points.size() << " points" << std::endl;
-    robot::geometry::viz_scene(viz_frames, viz_points, cv::viz::Color::brown(), true, true,
-                               "Viz Trajectory + GPS Points");
+    robot::visualization::viz_scene(viz_frames, viz_points, cv::viz::Color::brown(), true, true,
+                                    "Viz Trajectory + GPS Points");
 }
diff --git a/experimental/learn_descriptors/four_seasons_parser_test.cc b/experimental/learn_descriptors/four_seasons_parser_test.cc
index ff49d935d..1b1a940b9 100644
--- a/experimental/learn_descriptors/four_seasons_parser_test.cc
+++ b/experimental/learn_descriptors/four_seasons_parser_test.cc
@@ -10,14 +10,12 @@
 
 #include "Eigen/Core"
 #include "Eigen/Geometry"
-#include "GeographicLib/Constants.hpp"
-#include "GeographicLib/Geocentric.hpp"
-#include "GeographicLib/Geodesic.hpp"
-#include "GeographicLib/LocalCartesian.hpp"
 #include "absl/strings/str_split.h"
 #include "absl/strings/strip.h"
 #include "common/check.hh"
+#include "common/gps/frame_translation.hh"
 #include "common/liegroups/se3.hh"
+#include "experimental/learn_descriptors/four_seasons_parser_detail.hh"
 #include "gtest/gtest.h"
 #include "nmea/message/gga.hpp"
 #include "nmea/message/rmc.hpp"
@@ -35,6 +33,7 @@ static bool images_equal(cv::Mat img1, cv::Mat img2) {
 }
 
 namespace robot::experimental::learn_descriptors {
+using namespace detail::four_seasons_parser;
 TEST(FourSeasonsParserTest, parser_test) {
     const std::filesystem::path dir_snippet =
         "external/four_seasons_snippet/recording_2020-04-07_11-33-45";
@@ -43,6 +42,7 @@ TEST(FourSeasonsParserTest, parser_test) {
     EXPECT_TRUE(std::filesystem::exists(dir_snippet));
     EXPECT_TRUE(std::filesystem::exists(dir_calibration));
 
+    std::cout << "heartbeat -1" << std::endl;
     FourSeasonsParser parser(dir_snippet, dir_calibration);
 
     // transformations test
@@ -65,17 +65,17 @@ TEST(FourSeasonsParserTest, parser_test) {
     EXPECT_DOUBLE_EQ(gnss_scale, parser.gnss_scale());
 
     // calibration test
-    const FourSeasonsParser::CameraCalibrationFisheye calibration_target =
-        detail::txt_parser_help::load_camera_calibration(dir_calibration);
-    const FourSeasonsParser::CameraCalibrationFisheye calibration = parser.camera_calibration();
-    EXPECT_DOUBLE_EQ(calibration_target.cx, calibration.cx);
-    EXPECT_DOUBLE_EQ(calibration_target.cy, calibration.cy);
-    EXPECT_DOUBLE_EQ(calibration_target.fx, calibration.fx);
-    EXPECT_DOUBLE_EQ(calibration_target.fy, calibration.fy);
-    EXPECT_DOUBLE_EQ(calibration_target.k1, calibration.k1);
-    EXPECT_DOUBLE_EQ(calibration_target.k2, calibration.k2);
-    EXPECT_DOUBLE_EQ(calibration_target.k3, calibration.k3);
-    EXPECT_DOUBLE_EQ(calibration_target.k4, calibration.k4);
+    const std::shared_ptr<CameraCalibrationFisheye> calibration_target =
+        txt_parser_help::load_camera_calibration(dir_calibration);
+    const std::shared_ptr<CameraCalibrationFisheye> calibration = parser.camera_calibration();
+    EXPECT_DOUBLE_EQ(calibration_target->cx, calibration->cx);
+    EXPECT_DOUBLE_EQ(calibration_target->cy, calibration->cy);
+    EXPECT_DOUBLE_EQ(calibration_target->fx, calibration->fx);
+    EXPECT_DOUBLE_EQ(calibration_target->fy, calibration->fy);
+    EXPECT_DOUBLE_EQ(calibration_target->k1, calibration->k1);
+    EXPECT_DOUBLE_EQ(calibration_target->k2, calibration->k2);
+    EXPECT_DOUBLE_EQ(calibration_target->k3, calibration->k3);
+    EXPECT_DOUBLE_EQ(calibration_target->k4, calibration->k4);
 
     EXPECT_NE(parser.num_images(), 0);
 
@@ -87,17 +87,16 @@ TEST(FourSeasonsParserTest, parser_test) {
     const std::filesystem::path path_img_time = dir_snippet / "times.txt";
     const std::filesystem::path path_gnss_poses = dir_snippet / "GNSSPoses.txt";
     const std::filesystem::path path_vio = dir_snippet / "result.txt";
-    const size_t min_sig_figs_time = detail::txt_parser_help::min_sig_figs_result_time(path_vio);
+    const size_t min_sig_figs_time = txt_parser_help::min_sig_figs_result_time(path_vio);
 
     // fetch data for all the fields (AS_w_from_gnss_cam, img times, result)
-    const detail::txt_parser_help::TimeDataList img_time_list =
-        detail::txt_parser_help::create_img_time_data_list(path_img_time, min_sig_figs_time);
+    const txt_parser_help::TimeDataList img_time_list =
+        txt_parser_help::create_img_time_data_list(path_img_time, min_sig_figs_time);
     EXPECT_EQ(img_time_list.size(), parser.num_images());
-    const detail::txt_parser_help::TimeDataMap gnss_poses_time_map =
-        detail::txt_parser_help::create_gnss_poses_time_data_map(path_gnss_poses,
-                                                                 min_sig_figs_time);
-    const detail::txt_parser_help::TimeDataMap vio_time_map =
-        detail::txt_parser_help::create_vio_time_data_map(path_vio, min_sig_figs_time);
+    const txt_parser_help::TimeDataMap gnss_poses_time_map =
+        txt_parser_help::create_gnss_poses_time_data_map(path_gnss_poses, min_sig_figs_time);
+    const txt_parser_help::TimeDataMap vio_time_map =
+        txt_parser_help::create_vio_time_data_map(path_vio, min_sig_figs_time);
 
     Eigen::Matrix4d scale_mat = Eigen::Matrix4d::Identity();
     scale_mat(0, 0) = scale_mat(1, 1) = scale_mat(2, 2) = parser.gnss_scale();
@@ -108,7 +107,7 @@ TEST(FourSeasonsParserTest, parser_test) {
     std::vector<double> gps_ns_delta_from_shutter;
 
     for (size_t i = 0; i < parser.num_images(); i++) {
-        const ImagePoint& img_pt = parser.get_image_point(i);
+        const ImagePointFourSeasons& img_pt = parser.get_image_point(i);
         const cv::Mat img = parser.load_image(i);
 
         // seq (time in nanoseconds) test
@@ -124,20 +123,22 @@ TEST(FourSeasonsParserTest, parser_test) {
             const std::vector<std::string>& parsed_line_gnss_poses =
                 gnss_poses_time_map.at(img_time_list[i].first);
 
-            Eigen::Vector3d gps_translation(std::stod(parsed_line_gnss_poses[static_cast<size_t>(
-                                                detail::txt_parser_help::GPSIdx::TRAN_X)]),
-                                            std::stod(parsed_line_gnss_poses[static_cast<size_t>(
-                                                detail::txt_parser_help::GPSIdx::TRAN_Y)]),
-                                            std::stod(parsed_line_gnss_poses[static_cast<size_t>(
-                                                detail::txt_parser_help::GPSIdx::TRAN_Z)]));
-            Eigen::Quaterniond gps_quat(std::stod(parsed_line_gnss_poses[static_cast<size_t>(
-                                            detail::txt_parser_help::GPSIdx::QUAT_W)]),
-                                        std::stod(parsed_line_gnss_poses[static_cast<size_t>(
-                                            detail::txt_parser_help::GPSIdx::QUAT_X)]),
-                                        std::stod(parsed_line_gnss_poses[static_cast<size_t>(
-                                            detail::txt_parser_help::GPSIdx::QUAT_Y)]),
-                                        std::stod(parsed_line_gnss_poses[static_cast<size_t>(
-                                            detail::txt_parser_help::GPSIdx::QUAT_Z)]));
+            Eigen::Vector3d gps_translation(
+                std::stod(
+                    parsed_line_gnss_poses[static_cast<size_t>(txt_parser_help::GPSIdx::TRAN_X)]),
+                std::stod(
+                    parsed_line_gnss_poses[static_cast<size_t>(txt_parser_help::GPSIdx::TRAN_Y)]),
+                std::stod(
+                    parsed_line_gnss_poses[static_cast<size_t>(txt_parser_help::GPSIdx::TRAN_Z)]));
+            Eigen::Quaterniond gps_quat(
+                std::stod(
+                    parsed_line_gnss_poses[static_cast<size_t>(txt_parser_help::GPSIdx::QUAT_W)]),
+                std::stod(
+                    parsed_line_gnss_poses[static_cast<size_t>(txt_parser_help::GPSIdx::QUAT_X)]),
+                std::stod(
+                    parsed_line_gnss_poses[static_cast<size_t>(txt_parser_help::GPSIdx::QUAT_Y)]),
+                std::stod(
+                    parsed_line_gnss_poses[static_cast<size_t>(txt_parser_help::GPSIdx::QUAT_Z)]));
             EXPECT_TRUE(liegroups::SE3(gps_quat, gps_translation)
                             .matrix()
                             .isApprox(img_pt.AS_w_from_gnss_cam->matrix()));
@@ -148,20 +149,16 @@ TEST(FourSeasonsParserTest, parser_test) {
             const std::vector<std::string>& parsed_line_gps =
                 vio_time_map.at(img_time_list[i].first);
             Eigen::Vector3d ground_truth_translation(
-                std::stod(parsed_line_gps[static_cast<size_t>(
-                    detail::txt_parser_help::ResultIdx::TRAN_X)]),
-                std::stod(parsed_line_gps[static_cast<size_t>(
-                    detail::txt_parser_help::ResultIdx::TRAN_Y)]),
-                std::stod(parsed_line_gps[static_cast<size_t>(
-                    detail::txt_parser_help::ResultIdx::TRAN_Z)]));
-            Eigen::Quaterniond ground_truth_quat(std::stod(parsed_line_gps[static_cast<size_t>(
-                                                     detail::txt_parser_help::ResultIdx::QUAT_W)]),
-                                                 std::stod(parsed_line_gps[static_cast<size_t>(
-                                                     detail::txt_parser_help::ResultIdx::QUAT_X)]),
-                                                 std::stod(parsed_line_gps[static_cast<size_t>(
-                                                     detail::txt_parser_help::ResultIdx::QUAT_Y)]),
-                                                 std::stod(parsed_line_gps[static_cast<size_t>(
-                                                     detail::txt_parser_help::ResultIdx::QUAT_Z)]));
+                std::stod(parsed_line_gps[static_cast<size_t>(txt_parser_help::ResultIdx::TRAN_X)]),
+                std::stod(parsed_line_gps[static_cast<size_t>(txt_parser_help::ResultIdx::TRAN_Y)]),
+                std::stod(
+                    parsed_line_gps[static_cast<size_t>(txt_parser_help::ResultIdx::TRAN_Z)]));
+            Eigen::Quaterniond ground_truth_quat(
+                std::stod(parsed_line_gps[static_cast<size_t>(txt_parser_help::ResultIdx::QUAT_W)]),
+                std::stod(parsed_line_gps[static_cast<size_t>(txt_parser_help::ResultIdx::QUAT_X)]),
+                std::stod(parsed_line_gps[static_cast<size_t>(txt_parser_help::ResultIdx::QUAT_Y)]),
+                std::stod(
+                    parsed_line_gps[static_cast<size_t>(txt_parser_help::ResultIdx::QUAT_Z)]));
             EXPECT_TRUE(liegroups::SE3(ground_truth_quat, ground_truth_translation)
                             .matrix()
                             .isApprox(img_pt.AS_w_from_vio_cam->matrix()));
@@ -177,8 +174,7 @@ TEST(FourSeasonsParserTest, parser_test) {
                 ECEF_from_gnss_cam * gps_from_cam.inverse();
 
             const Eigen::Vector3d& t_ECEF_from_gnss_gps = ECEF_from_gnss_gps.translation();
-            const Eigen::Vector3d gnss_gps_gcs =
-                FourSeasonsParser::gcs_from_ECEF(t_ECEF_from_gnss_gps);
+            const Eigen::Vector3d gnss_gps_gcs = gps::lla_from_ecef(t_ECEF_from_gnss_gps);
 
             const Eigen::Vector3d raw_gps_gcs(
                 img_pt.gps_gcs->latitude, img_pt.gps_gcs->longitude,
diff --git a/experimental/learn_descriptors/four_seasons_transforms.cc b/experimental/learn_descriptors/four_seasons_transforms.cc
new file mode 100644
index 000000000..c6dc25ac4
--- /dev/null
+++ b/experimental/learn_descriptors/four_seasons_transforms.cc
@@ -0,0 +1,67 @@
+#include "experimental/learn_descriptors/four_seasons_transforms.hh"
+
+#include <filesystem>
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include "Eigen/Core"
+#include "Eigen/Geometry"
+#include "common/check.hh"
+#include "common/liegroups/se3.hh"
+#include "experimental/learn_descriptors/four_seasons_parser_detail.hh"
+
+namespace robot::experimental::learn_descriptors {
+FourSeasonsTransforms::StaticTransforms::StaticTransforms(
+    const std::filesystem::path& path_transforms) {
+    std::ifstream file_transforms(path_transforms);
+    ROBOT_CHECK(file_transforms, path_transforms);
+    std::string line;
+    while (std::getline(file_transforms, line)) {
+        if (line.find("transform_S_AS") != std::string::npos) {
+            std::getline(file_transforms, line);
+            S_from_AS = get_transform_from_line(line);
+        } else if (line.find("TS_cam_imu") != std::string::npos) {
+            std::getline(file_transforms, line);
+            cam_from_imu = get_transform_from_line(line);
+        } else if (line.find("transform_w_gpsw") != std::string::npos) {
+            std::getline(file_transforms, line);
+            w_from_gpsw = get_transform_from_line(line);
+        } else if (line.find("transform_gps_imu") != std::string::npos) {
+            std::getline(file_transforms, line);
+            gps_from_imu = get_transform_from_line(line);
+        } else if (line.find("transform_e_gpsw") != std::string::npos) {
+            std::getline(file_transforms, line);
+            e_from_gpsw = get_transform_from_line(line);
+        } else if (line.find("GNSS scale") != std::string::npos) {
+            std::getline(file_transforms, line);
+            gnss_scale = std::stod(line);
+        }
+    }
+}
+
+liegroups::SE3 FourSeasonsTransforms::StaticTransforms::get_transform_from_line(
+    const std::string& line) {
+    enum TransformEntry { T_X, T_Y, T_Z, Q_X, Q_Y, Q_Z, Q_W };
+    std::vector<std::string> parsed_transform_line =
+        detail::four_seasons_parser::txt_parser_help::parse_line_adv(line, ",");
+    if (parsed_transform_line.size() < 7) {
+        std::stringstream error_stream;
+        error_stream << "parsed_transform_line doesn't have sufficient entries for "
+                        "transform! parsed_transform_line.size(): "
+                     << parsed_transform_line.size() << std::endl;
+        throw std::runtime_error(error_stream.str());
+    }
+    std::vector<double> transform_nums;
+    for (const std::string& num : parsed_transform_line) {
+        transform_nums.push_back(static_cast<double>(std::stod(num)));
+    }
+    Eigen::Vector3d translation(transform_nums[TransformEntry::T_X],
+                                transform_nums[TransformEntry::T_Y],
+                                transform_nums[TransformEntry::T_Z]);
+    Eigen::Quaterniond rotation(
+        transform_nums[TransformEntry::Q_W], transform_nums[TransformEntry::Q_X],
+        transform_nums[TransformEntry::Q_Y], transform_nums[TransformEntry::Q_Z]);
+    return liegroups::SE3(rotation, translation);
+}
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/four_seasons_transforms.hh b/experimental/learn_descriptors/four_seasons_transforms.hh
new file mode 100644
index 000000000..9e8ab1bde
--- /dev/null
+++ b/experimental/learn_descriptors/four_seasons_transforms.hh
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <filesystem>
+#include <string>
+
+#include "common/liegroups/se3.hh"
+
+namespace robot::experimental::learn_descriptors {
+struct FourSeasonsTransforms {
+    struct StaticTransforms {
+        liegroups::SE3 S_from_AS;
+        liegroups::SE3 cam_from_imu;
+        liegroups::SE3 w_from_gpsw;
+        liegroups::SE3 gps_from_imu;
+        liegroups::SE3 e_from_gpsw;
+        double gnss_scale;  // scale from vio frame to gnss frame. WARNING: will require retooling
+                            // if the scales per
+        // keyframe (pose) are not all one value. See more here:
+        // https://github.com/pmwenzel/4seasons-dataset
+
+        StaticTransforms(const std::filesystem::path& path_transforms);
+
+       private:
+        static liegroups::SE3 get_transform_from_line(const std::string& line);
+    };
+    // might consider defining a nested "DynamicTransforms" or "PerImageTransforms" at some point -
+    // Nico
+};
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frame.cc b/experimental/learn_descriptors/frame.cc
new file mode 100644
index 000000000..139c27858
--- /dev/null
+++ b/experimental/learn_descriptors/frame.cc
@@ -0,0 +1,9 @@
+#include "experimental/learn_descriptors/frame.hh"
+
+namespace robot::experimental::learn_descriptors {
+void Frame::add_keypoints(const KeypointsCV& kpts) {
+    kpts_.insert(kpts_.end(), kpts.begin(), kpts.end());
+};
+
+void Frame::assign_descriptors(const cv::Mat& descriptors) { descriptors_ = descriptors; };
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frame.hh b/experimental/learn_descriptors/frame.hh
new file mode 100644
index 000000000..0c5535899
--- /dev/null
+++ b/experimental/learn_descriptors/frame.hh
@@ -0,0 +1,121 @@
+#pragma once
+#include <memory>
+#include <optional>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+
+#include "Eigen/Core"
+#include "common/check.hh"
+#include "experimental/learn_descriptors/frontend_definitions.hh"
+#include "gtsam/base/Matrix.h"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/Pose3.h"
+#include "gtsam/geometry/Rot3.h"
+#include "opencv2/opencv.hpp"
+
+namespace robot::experimental::learn_descriptors {
+class Frame {
+   public:
+    Frame(FrameId id, size_t seq, const cv::Mat img, gtsam::Cal3_S2::shared_ptr K,
+          const KeypointsCV kpts, const cv::Mat descriptors)
+        : id_(id),
+          seq_(seq),
+          undistorted_img_(img),
+          K_(K),
+          kpts_(kpts),
+          descriptors_(descriptors) {}
+    Frame(const Frame& other)
+        : id_(other.id_),
+          seq_(other.seq_),
+          undistorted_img_(other.undistorted_img_.clone()),
+          K_(other.K_),
+          kpts_(other.kpts_),
+          descriptors_(other.descriptors_.clone()),
+          frame_from_other_frames_(other.frame_from_other_frames_),
+          world_from_cam_groundtruth_(other.world_from_cam_groundtruth_),
+          cam_in_world_initial_guess_(other.cam_in_world_initial_guess_),
+          cam_in_world_interpolated_guess_(other.cam_in_world_interpolated_guess_),
+          world_from_cam_initial_guess_(other.world_from_cam_initial_guess_),
+          translation_covariance_in_cam_(other.translation_covariance_in_cam_) {}
+
+    std::optional<Eigen::Vector3d> velocity_to(const Frame& other_frame) const {
+        ROBOT_CHECK(other_frame.seq_ >= seq_);
+        if (!other_frame.cam_in_world_initial_guess_ || !cam_in_world_initial_guess_)
+            return std::nullopt;
+        double dt_seconds = static_cast<double>(other_frame.seq_ - seq_) * 1e-9;
+        return (*other_frame.cam_in_world_initial_guess_ - *cam_in_world_initial_guess_) /
+               dt_seconds;
+    };
+    std::optional<Eigen::Vector3d> velocity_from(const Frame& other_frame) const {
+        ROBOT_CHECK(seq_ >= other_frame.seq_);
+        if (!other_frame.cam_in_world_initial_guess_ || !cam_in_world_initial_guess_)
+            return std::nullopt;
+        double dt_seconds = static_cast<double>(seq_ - other_frame.seq_) * 1e-9;
+        return (*cam_in_world_initial_guess_ - *other_frame.cam_in_world_initial_guess_) /
+               dt_seconds;
+    };
+
+    void add_keypoints(const KeypointsCV& kpts);
+    void assign_descriptors(const cv::Mat& descriptors);
+
+    const KeypointsCV& keypoint() { return kpts_; };
+    const cv::Mat& descriptors() { return descriptors_; };
+
+    std::string to_string() const {
+        std::stringstream ss;
+        ss << "Frame " << id_ << ":\n";
+        ss << "\tseq: " << seq_ << "\n";
+        ss << "\tK: ";
+        if (K_) {
+            ss << "fx: " << K_->fx() << ", fy: " << K_->fy() << ", s: " << K_->skew()
+               << ", px: " << K_->px() << ", py: " << K_->py();
+        } else {
+            ss << "N/A";
+        }
+        ss << "\n\tkpts_.size(): " << kpts_.size();
+        ss << "\n\tframe_from_other_frames_.size(): " << frame_from_other_frames_.size();
+        ss << "\n\tworld_from_cam_groundtruth_: ";
+        if (world_from_cam_groundtruth_) {
+            ss << "\n" << world_from_cam_groundtruth_->matrix();
+        } else {
+            ss << "N/A";
+        }
+        ss << "\n\tcam_in_world_initial_guess_: ";
+        if (cam_in_world_initial_guess_) {
+            ss << "\n" << cam_in_world_initial_guess_->matrix();
+        } else {
+            ss << "N/A";
+        }
+        ss << "\n\tworld_from_cam_initial_guess_: ";
+        if (world_from_cam_initial_guess_) {
+            ss << "\n" << world_from_cam_initial_guess_->matrix();
+        } else {
+            ss << "N/A";
+        }
+        ss << "\n\ttranslation_covariance_in_cam_: ";
+        if (translation_covariance_in_cam_) {
+            ss << "\n" << translation_covariance_in_cam_->matrix();
+        } else {
+            ss << "N/A";
+        }
+        return ss.str();
+    }
+
+    FrameId id_;
+    size_t seq_;
+    cv::Mat undistorted_img_;
+    gtsam::Cal3_S2::shared_ptr K_;
+    KeypointsCV kpts_;
+    cv::Mat descriptors_;
+    std::unordered_map<FrameId, gtsam::Rot3>
+        frame_from_other_frames_;  // map of relative rotation for this_frame_from_frame_[FrameId]
+    std::optional<gtsam::Pose3> world_from_cam_groundtruth_;
+    std::optional<gtsam::Point3>
+        cam_in_world_initial_guess_;  // intended for data from sensors (GPS)
+    std::optional<gtsam::Point3> cam_in_world_interpolated_guess_;
+    std::optional<gtsam::Rot3> world_from_cam_initial_guess_;
+    std::optional<gtsam::Matrix3> translation_covariance_in_cam_;
+};
+using SharedFrame = std::shared_ptr<Frame>;
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frontend.cc b/experimental/learn_descriptors/frontend.cc
new file mode 100644
index 000000000..32af22642
--- /dev/null
+++ b/experimental/learn_descriptors/frontend.cc
@@ -0,0 +1,525 @@
+#include "experimental/learn_descriptors/frontend.hh"
+
+#include <cmath>
+#include <cstddef>
+#include <iostream>
+#include <memory>
+#include <optional>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "Eigen/Core"
+#include "Eigen/Geometry"
+#include "common/check.hh"
+#include "common/geometry/camera.hh"
+#include "experimental/learn_descriptors/frame.hh"
+#include "experimental/learn_descriptors/image_point.hh"
+#include "experimental/learn_descriptors/structure_from_motion_types.hh"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/Pose3.h"
+#include "gtsam/geometry/triangulation.h"
+#include "opencv2/opencv.hpp"
+
+namespace detail_sfm {
+
+std::optional<gtsam::Point3> attempt_triangulate(const std::vector<gtsam::Pose3> &cam_poses,
+                                                 const std::vector<gtsam::Point2> &cam_obs,
+                                                 gtsam::Cal3_S2::shared_ptr K,
+                                                 const double max_reproj_error = 2.0,
+                                                 const bool verbose = true) {
+    gtsam::Point3 p_lmk_in_world;
+    if (cam_poses.size() >= 2) {
+        try {
+            // Attempt triangulation using DLT (or the GTSAM provided method)
+            p_lmk_in_world = gtsam::triangulatePoint3(
+                cam_poses, K, gtsam::Point2Vector(cam_obs.begin(), cam_obs.end()));
+
+        } catch (const gtsam::TriangulationCheiralityException &e) {
+            // Handle the exception gracefully by logging and retaining the previous
+            // estimate.
+            if (verbose)
+                std::cerr << "attempt_triangulation failed. Likely cheirality exception: "
+                          << e.what() << ". Discarding involved keypoints." << std::endl;
+        }
+    }
+    // Optional: perform an explicit cheirality check
+    for (const auto &pose : cam_poses) {
+        // Transform point to the camera coordinate system.
+        // transformTo() converts a world point to the camera frame.
+        gtsam::Point3 p_cam_lmk = pose.transformTo(p_lmk_in_world);
+        if (p_cam_lmk.z() <= 0) {  // Check that the depth is positive
+            return std::nullopt;
+        }
+    }
+
+    // Cheirality & reprojection checks
+    for (size_t i = 0; i < cam_poses.size(); ++i) {
+        const auto &pose = cam_poses[i];
+        // Cheirality
+        gtsam::Point3 p_cam = pose.transformTo(p_lmk_in_world);
+        if (p_cam.z() <= 0) {
+            if (verbose) {
+                std::cerr << "[triangulate] point behind camera " << i << " (z=" << p_cam.z()
+                          << ")\n";
+            }
+            return std::nullopt;
+        }
+        // Reprojection error
+        if (max_reproj_error > 0) {
+            gtsam::PinholeCamera<gtsam::Cal3_S2> cam(pose, *K);
+            const auto reproj = cam.project(p_lmk_in_world);
+            const double err = (reproj - cam_obs[i]).norm();
+            if (err > max_reproj_error) {
+                if (verbose) {
+                    std::cerr << "[triangulate] reprojection error too large on view " << i << " ("
+                              << err << " px)\n";
+                }
+                return std::nullopt;
+            }
+        }
+    }
+
+    return p_lmk_in_world;
+}
+
+gtsam::Pose3 averagePoses(const std::vector<gtsam::Pose3> &poses, int maxIter = 10) {
+    if (poses.empty()) throw std::runtime_error("Empty pose vector");
+
+    gtsam::Pose3 mean = poses[0];
+
+    for (int iter = 0; iter < maxIter; ++iter) {
+        gtsam::Vector6 total = gtsam::Vector6::Zero();
+
+        for (const auto &pose : poses) {
+            gtsam::Pose3 delta = mean.between(pose);
+            total += gtsam::Pose3::Logmap(delta);
+        }
+
+        total /= poses.size();
+        mean = mean.compose(gtsam::Pose3::Expmap(total));
+    }
+
+    return mean;
+}
+
+gtsam::Point3 averagePoints(const std::vector<gtsam::Point3> &points) {
+    if (points.empty()) throw std::runtime_error("Empty point vector");
+    gtsam::Point3 sum(0, 0, 0);
+    for (const auto &pt : points) sum += pt;
+    return sum / points.size();
+}
+
+gtsam::Point3 get_variance(const std::vector<gtsam::Point3> &points) {
+    const gtsam::Point3 mean = averagePoints(points);
+    gtsam::Point3 var(0, 0, 0);
+    for (const gtsam::Point3 &pt : points) {
+        var += (mean - pt).array().square().matrix();
+    }
+    return var / points.size();
+}
+
+double rotation_error(const Eigen::Isometry3d &T_est, const Eigen::Isometry3d &T_gt) {
+    Eigen::Matrix3d R_err = T_gt.rotation().transpose() * T_est.rotation();
+
+    // 2. Compute trace and clamp to [-1,1] for numerical safety
+    double tr = R_err.trace();
+    double cos_theta = std::min(1.0, std::max(-1.0, (tr - 1.0) / 2.0));
+
+    // 3. Recover angle (in radians)
+    return std::acos(cos_theta);
+}
+}  // namespace detail_sfm
+
+namespace robot::experimental::learn_descriptors {
+
+Frontend::Frontend(FrontendParams params_) : params_(params_) {
+    switch (params_.extractor_type) {
+        case FrontendParams::ExtractorType::SIFT:
+            feature_extractor_ = cv::SIFT::create();
+            break;
+        case FrontendParams::ExtractorType::ORB:
+            feature_extractor_ = cv::ORB::create();
+            break;
+        default:
+            // Error handling needed?
+            break;
+    }
+    switch (params_.matcher_type) {
+        case FrontendParams::MatcherType::BRUTE_FORCE:
+            descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2);
+            break;
+        case FrontendParams::MatcherType::KNN:
+            descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2);
+            break;
+        case FrontendParams::MatcherType::FLANN:
+            if (params_.extractor_type == FrontendParams::ExtractorType::ORB) {
+                throw std::invalid_argument("FLANN can not be used with ORB.");
+            }
+            descriptor_matcher_ = cv::FlannBasedMatcher::create();
+            break;
+        default:
+            // Error handling needed?
+            break;
+    }
+}
+
+void Frontend::populate_frames(bool verbose) {
+    shared_frames_.clear();
+    shared_frames_.shrink_to_fit();
+    shared_frames_.reserve(images_and_points_.size());
+    FrameId id = shared_frames_.size();
+    for (const ImageAndPoint &img_and_pt : images_and_points_) {
+        const std::shared_ptr<ImagePoint> &shared_img_pt = img_and_pt.shared_img_pt;
+        if (verbose) {
+            std::cout << shared_img_pt->to_string() << std::endl;
+        }
+        cv::Mat img_undistorted;
+        cv::fisheye::undistortImage(img_and_pt.image_distorted, img_undistorted,
+                                    shared_img_pt->K->k_mat(), shared_img_pt->K->d_mat(),
+                                    shared_img_pt->K->k_mat(), img_and_pt.image_distorted.size());
+        auto [kpts, descriptors] = extract_features(img_undistorted);
+        KeypointsCV kpts_cv;
+        for (const cv::KeyPoint &kpt : kpts) {
+            kpts_cv.push_back(kpt.pt);
+        }
+
+        gtsam::Cal3_S2::shared_ptr K =
+            boost::make_shared<gtsam::Cal3_S2>(shared_img_pt->K->fx, shared_img_pt->K->fy, 0,
+                                               shared_img_pt->K->cx, shared_img_pt->K->cy);
+
+        Frame frame(id, shared_img_pt->seq, img_undistorted, K, kpts_cv, descriptors);
+        const std::optional<Eigen::Isometry3d> maybe_world_from_cam_grnd_trth =
+            shared_img_pt->world_from_cam_ground_truth();
+        if (maybe_world_from_cam_grnd_trth) {
+            frame.world_from_cam_groundtruth_ =
+                gtsam::Pose3(maybe_world_from_cam_grnd_trth->matrix());
+        }
+        const std::optional<Eigen::Vector3d> maybe_cam_in_world = shared_img_pt->cam_in_world();
+        if (maybe_cam_in_world) {
+            frame.cam_in_world_initial_guess_ = *maybe_cam_in_world;
+        }
+        if (id == 0) frame.world_from_cam_initial_guess_ = gtsam::Rot3::Identity();
+        const std::optional<gtsam::Matrix3> maybe_translation_covariance_in_cam =
+            shared_img_pt->translation_covariance_in_cam();
+        if (maybe_translation_covariance_in_cam) {
+            frame.translation_covariance_in_cam_ = *maybe_translation_covariance_in_cam;
+        }
+        shared_frames_.push_back(std::make_shared<Frame>(frame));
+        id++;
+    }
+    interpolate_frames();
+}
+
+void Frontend::match_frames_and_build_tracks() {
+    if (params_.exhaustive) {
+        for (size_t i = 0; i < shared_frames_.size() - 1; i++) {
+            for (size_t j = i + 1; j < shared_frames_.size(); j++) {
+                std::vector<cv::DMatch> matches = compute_matches(shared_frames_[i]->descriptors(),
+                                                                  shared_frames_[j]->descriptors());
+                enforce_bijective_buffer_matches(matches);
+                if (matches.size() < 5) {
+                    continue;
+                }
+
+                std::vector<cv::KeyPoint> cv_kpts_1;
+                std::vector<cv::KeyPoint> cv_kpts_2;
+                for (const KeypointCV &kpt : shared_frames_[i]->keypoint()) {
+                    cv::KeyPoint cv_kpt;
+                    cv_kpt.pt = kpt;
+                    cv_kpts_1.push_back(cv_kpt);
+                }
+                for (const KeypointCV &kpt : shared_frames_[j]->keypoint()) {
+                    cv::KeyPoint cv_kpt;
+                    cv_kpt.pt = kpt;
+                    cv_kpts_2.push_back(cv_kpt);
+                }
+                std::optional<Eigen::Isometry3d> scale_cam0_from_cam1 =
+                    robot::geometry::estimate_cam0_from_cam1(
+                        cv_kpts_1, cv_kpts_2, matches,
+                        images_and_points_[i]
+                            .shared_img_pt->K
+                            ->k_mat());  // fine for now since all cameras have the same K,
+                                         // how to reconcile later though...?
+
+                if (!scale_cam0_from_cam1) {
+                    continue;
+                }
+
+                shared_frames_[i]->frame_from_other_frames_.emplace(
+                    j, gtsam::Rot3(scale_cam0_from_cam1->linear().matrix()));
+                for (const cv::DMatch match : matches) {
+                    const KeypointCV kpt_cam0 = shared_frames_[i]->keypoint()[match.queryIdx];
+                    const KeypointCV kpt_cam1 = shared_frames_[j]->keypoint()[match.trainIdx];
+
+                    auto key = std::make_pair(shared_frames_[i]->id_, kpt_cam0);
+                    if (lmk_id_map_.find(key) != lmk_id_map_.end()) {
+                        const size_t id = lmk_id_map_.at(key);
+                        ROBOT_CHECK(id < feature_tracks_.size(),
+                                    "lmk_id_map_ id's shuold not exceed feature_tracks_ size!");
+                        feature_tracks_[id].obs_.emplace_back(shared_frames_[i]->id_, kpt_cam0);
+                        feature_tracks_[id].obs_.emplace_back(shared_frames_[j]->id_, kpt_cam1);
+                    } else {
+                        FeatureTrack feature_track(i, kpt_cam0);
+                        feature_track.obs_.emplace_back(shared_frames_[j]->id_, kpt_cam1);
+                        feature_tracks_.push_back(feature_track);
+                        lmk_id_map_.emplace(std::make_pair(shared_frames_[i]->id_, kpt_cam0),
+                                            feature_tracks_.size() - 1);
+                    }
+                }
+            }
+        }
+    } else {  // successive only
+        for (size_t i = 0; i < shared_frames_.size() - 1; i++) {
+            std::vector<cv::DMatch> matches = compute_matches(shared_frames_[i]->descriptors(),
+                                                              shared_frames_[i + 1]->descriptors());
+            enforce_bijective_buffer_matches(matches);
+            if (matches.size() < 5) {
+                continue;
+            }
+
+            std::vector<cv::KeyPoint> cv_kpts_1;
+            std::vector<cv::KeyPoint> cv_kpts_2;
+            for (const KeypointCV &kpt : shared_frames_[i]->keypoint()) {
+                cv::KeyPoint cv_kpt;
+                cv_kpt.pt = kpt;
+                cv_kpts_1.push_back(cv_kpt);
+            }
+            for (const KeypointCV &kpt : shared_frames_[i + 1]->keypoint()) {
+                cv::KeyPoint cv_kpt;
+                cv_kpt.pt = kpt;
+                cv_kpts_2.push_back(cv_kpt);
+            }
+            std::optional<Eigen::Isometry3d> scale_cam0_from_cam1 =
+                robot::geometry::estimate_cam0_from_cam1(
+                    cv_kpts_1, cv_kpts_2, matches, images_and_points_[i].shared_img_pt->K->k_mat());
+            if (!scale_cam0_from_cam1) {
+                continue;
+            }
+            ROBOT_CHECK(shared_frames_[i]->world_from_cam_initial_guess_,
+                        "This rotation should be populated.");
+            shared_frames_[i + 1]->world_from_cam_initial_guess_.emplace(
+                shared_frames_[i]->world_from_cam_initial_guess_->matrix() *
+                scale_cam0_from_cam1->linear().matrix());
+
+            for (const cv::DMatch match : matches) {
+                const KeypointCV kpt_cam0 = shared_frames_[i]->keypoint()[match.queryIdx];
+                const KeypointCV kpt_cam1 = shared_frames_[i + 1]->keypoint()[match.trainIdx];
+
+                auto key = std::make_pair(shared_frames_[i]->id_, kpt_cam0);
+                if (lmk_id_map_.find(key) != lmk_id_map_.end()) {
+                    const size_t id = lmk_id_map_.at(key);
+                    ROBOT_CHECK(id < feature_tracks_.size(),
+                                "lmk_id_map_ id's shuold not exceed feature_tracks_ size!");
+                    feature_tracks_[id].obs_.emplace_back(shared_frames_[i]->id_, kpt_cam0);
+                    feature_tracks_[id].obs_.emplace_back(shared_frames_[i + 1]->id_, kpt_cam1);
+                } else {
+                    FeatureTrack feature_track(i, kpt_cam0);
+                    feature_track.obs_.emplace_back(shared_frames_[i + 1]->id_, kpt_cam1);
+                    feature_tracks_.push_back(feature_track);
+                    lmk_id_map_.emplace(std::make_pair(shared_frames_[i]->id_, kpt_cam0),
+                                        feature_tracks_.size() - 1);
+                }
+            }
+        }
+    }
+    std::cout << "done processing matches" << std::endl;
+}
+
+void Frontend::interpolate_frames() {
+    std::vector<Frame *> frames_with_guess;
+    for (SharedFrame &shared_frame : shared_frames_) {  // this assumes that frames are ordered by
+                                                        // seq (time), later on may have to
+        // think about this more if trying to run pipeline on unordered images or sets
+        // whose capture times are on different days
+        if (shared_frame->cam_in_world_initial_guess_) {
+            frames_with_guess.push_back(&(*shared_frame));
+        }
+    }
+    if (frames_with_guess.size() < 2) return;
+    size_t idx_guess = 0;
+    for (SharedFrame &shared_frame : shared_frames_) {
+        Eigen::Vector3d interpolated;
+
+        // advance to next guess window if needed
+        while (idx_guess + 1 < frames_with_guess.size() - 1 &&
+               shared_frame->seq_ > frames_with_guess[idx_guess + 1]->seq_) {
+            idx_guess++;
+        }
+
+        if (shared_frame->cam_in_world_initial_guess_) {
+            interpolated = *shared_frame->cam_in_world_initial_guess_;
+        } else {
+            Eigen::Vector3d v_a_to_b =
+                *frames_with_guess[idx_guess]->velocity_to(*frames_with_guess[idx_guess + 1]);
+            double dt = (shared_frame->seq_ < frames_with_guess[idx_guess]->seq_ ? -1.0 : 1.0) *
+                        1e-9 *
+                        (frames_with_guess[idx_guess]->seq_ > shared_frame->seq_
+                             ? frames_with_guess[idx_guess]->seq_ - shared_frame->seq_
+                             : shared_frame->seq_ - frames_with_guess[idx_guess]->seq_);
+            interpolated = *frames_with_guess[idx_guess]->cam_in_world_initial_guess_ +
+                           v_a_to_b * dt;  // add sign back to dt
+        }
+        shared_frame->cam_in_world_interpolated_guess_ = interpolated;
+    }
+}
+
+std::pair<std::vector<cv::KeyPoint>, cv::Mat> Frontend::extract_features(const cv::Mat &img) const {
+    std::vector<cv::KeyPoint> keypoints;
+    cv::Mat descriptors;
+    switch (params_.extractor_type) {
+        default:  // the opencv extractors have the same function signature
+            feature_extractor_->detectAndCompute(img, cv::noArray(), keypoints, descriptors);
+            break;
+    }
+    return std::pair<std::vector<cv::KeyPoint>, cv::Mat>(keypoints, descriptors);
+}
+
+std::vector<cv::DMatch> Frontend::compute_matches(const cv::Mat &descriptors1,
+                                                  const cv::Mat &descriptors2) const {
+    std::vector<cv::DMatch> matches;
+    switch (params_.matcher_type) {
+        case FrontendParams::MatcherType::BRUTE_FORCE:
+            compute_brute_matches(descriptors1, descriptors2, matches);
+            break;
+        case FrontendParams::MatcherType::KNN:
+            compute_KNN_matches(descriptors1, descriptors2, matches);
+            break;
+        case FrontendParams::MatcherType::FLANN:
+            compute_FLANN_matches(descriptors1, descriptors2, matches);
+            break;
+        default:
+            break;
+    }
+    std::sort(matches.begin(), matches.end());
+    return matches;
+}
+
+void Frontend::threshold_matches(std::vector<cv::DMatch> &matches, float dist_threshhold) {
+    matches.erase(std::remove_if(matches.begin(), matches.end(),
+                                 [dist_threshhold](const cv::DMatch &match) {
+                                     return match.distance > dist_threshhold;
+                                 }),
+                  matches.end());
+}
+
+void Frontend::enforce_bijective_matches(std::vector<cv::DMatch> &matches) {
+    std::unordered_map<int, cv::DMatch> bestQueryMatch;
+    std::unordered_map<int, cv::DMatch> bestTrainMatch;
+
+    for (const auto &match : matches) {
+        int queryIdx = match.queryIdx;
+        int trainIdx = match.trainIdx;
+
+        if (bestQueryMatch.find(queryIdx) == bestQueryMatch.end() ||
+            match.distance < bestQueryMatch[queryIdx].distance) {
+            bestQueryMatch[queryIdx] = match;
+        }
+
+        if (bestTrainMatch.find(trainIdx) == bestTrainMatch.end() ||
+            match.distance < bestTrainMatch[trainIdx].distance) {
+            bestTrainMatch[trainIdx] = match;
+        }
+    }
+
+    matches.erase(std::remove_if(matches.begin(), matches.end(),
+                                 [&bestQueryMatch, &bestTrainMatch](const cv::DMatch &match) {
+                                     int queryIdx = match.queryIdx;
+                                     int trainIdx = match.trainIdx;
+
+                                     return bestQueryMatch[queryIdx].trainIdx != trainIdx ||
+                                            bestTrainMatch[trainIdx].queryIdx != queryIdx;
+                                 }),
+                  matches.end());
+}
+
+void Frontend::enforce_bijective_buffer_matches(std::vector<cv::DMatch> &matches) {
+    // Store best and second-best matches per query_idx
+    std::unordered_map<int, std::pair<cv::DMatch, float>> best_two_query_matches;
+    for (const auto &match : matches) {
+        int query_idx = match.queryIdx;
+        float dist = match.distance;
+
+        if (!best_two_query_matches.count(query_idx)) {
+            best_two_query_matches[query_idx] = {match, std::numeric_limits<float>::max()};
+        } else {
+            auto &[best_match, second_best_dist] = best_two_query_matches[query_idx];
+            if (dist < best_match.distance) {
+                second_best_dist = best_match.distance;
+                best_match = match;
+            } else if (dist < second_best_dist) {
+                second_best_dist = dist;
+            }
+        }
+    }
+
+    // Keep matches where best < 0.5 * second_best
+    std::vector<cv::DMatch> filtered;
+    for (const auto &[query_idx, pair] : best_two_query_matches) {
+        const auto &[best_match, second_best_dist] = pair;
+        if (best_match.distance < 0.5f * second_best_dist) {
+            filtered.push_back(best_match);
+        }
+    }
+
+    // Enforce bijection: each train_idx should also be best for only one query_idx
+    std::unordered_map<int, cv::DMatch> best_train_match;
+    for (const auto &match : filtered) {
+        int train_idx = match.trainIdx;
+        if (!best_train_match.count(train_idx) ||
+            match.distance < best_train_match[train_idx].distance) {
+            best_train_match[train_idx] = match;
+        }
+    }
+
+    matches.clear();
+    for (const auto &[train_idx, match] : best_train_match) {
+        matches.push_back(match);
+    }
+}
+
+bool Frontend::compute_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                                     std::vector<cv::DMatch> &matches_out) const {
+    if (params_.matcher_type != FrontendParams::MatcherType::BRUTE_FORCE) {
+        return false;
+    }
+    matches_out.clear();
+    descriptor_matcher_->match(descriptors1, descriptors2, matches_out);
+    return true;
+}
+
+bool Frontend::compute_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                                   std::vector<cv::DMatch> &matches_out) const {
+    if (params_.matcher_type != FrontendParams::MatcherType::KNN) {
+        return false;
+    }
+    std::vector<std::vector<cv::DMatch>> knn_matches;
+    descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2);
+    const float ratio_thresh = 0.7f;
+    matches_out.clear();
+    // Lowe's Ratio Test
+    for (size_t i = 0; i < knn_matches.size(); i++) {
+        if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) {
+            matches_out.push_back(knn_matches[i][0]);
+        }
+    }
+    return true;
+}
+
+bool Frontend::compute_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                                     std::vector<cv::DMatch> &matches_out) const {
+    if (params_.matcher_type != FrontendParams::MatcherType::FLANN) {
+        return false;
+    }
+    std::vector<std::vector<cv::DMatch>> knn_matches;
+    descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2);
+    const float ratio_thresh = 0.7f;
+    matches_out.clear();
+    for (size_t i = 0; i < knn_matches.size(); i++) {
+        if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) {
+            matches_out.push_back(knn_matches[i][0]);
+        }
+    }
+    return true;
+}
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frontend.hh b/experimental/learn_descriptors/frontend.hh
new file mode 100644
index 000000000..e381d8089
--- /dev/null
+++ b/experimental/learn_descriptors/frontend.hh
@@ -0,0 +1,86 @@
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <utility>
+#include <vector>
+
+#include "Eigen/Core"
+#include "experimental/learn_descriptors/frame.hh"
+#include "experimental/learn_descriptors/frontend_definitions.hh"
+#include "experimental/learn_descriptors/image_point.hh"
+#include "experimental/learn_descriptors/structure_from_motion_types.hh"
+#include "opencv2/opencv.hpp"
+
+namespace robot::experimental::learn_descriptors {
+struct FrontendParams {
+    enum class ExtractorType { SIFT, ORB };
+    enum class MatcherType { BRUTE_FORCE, KNN, FLANN };
+    ExtractorType extractor_type = ExtractorType::SIFT;
+    MatcherType matcher_type = MatcherType::KNN;
+    bool exhaustive = true;
+    bool incremental = false;  // a.k.a. images are successive
+};
+struct ImageAndPoint {
+    cv::Mat image_distorted;
+    std::shared_ptr<ImagePoint> shared_img_pt;
+};
+class Frontend {
+   public:
+    static constexpr char symbol_pose_char = 'x';
+    static constexpr char symbol_lmk_char = 'l';
+    explicit Frontend(FrontendParams params);
+    ~Frontend(){};
+
+    void populate_frames(bool verbose = false);
+    void match_frames_and_build_tracks();
+    void interpolate_frames();
+
+    void add_image(const ImageAndPoint &img_and_pt) { images_and_points_.push_back(img_and_pt); };
+    void add_images(const std::vector<ImageAndPoint> &img_and_pts) {
+        for (const ImageAndPoint &img_and_pt : img_and_pts) {
+            images_and_points_.push_back(img_and_pt);
+        }
+    };
+
+    const FrontendParams::ExtractorType &extractor_type() const { return params_.extractor_type; };
+    const FrontendParams::MatcherType &matcher_type() const { return params_.matcher_type; };
+    const FeatureTracks &feature_tracks() const { return feature_tracks_; };
+    const FrameLandmarkIdMap &frame_landmark_id_map() const { return lmk_id_map_; };
+    std::vector<SharedFrame> &frames() { return shared_frames_; };
+
+    std::pair<std::vector<cv::KeyPoint>, cv::Mat> extract_features(const cv::Mat &img) const;
+    std::vector<cv::DMatch> compute_matches(const cv::Mat &descriptors1,
+                                            const cv::Mat &descriptors2) const;
+    static void threshold_matches(std::vector<cv::DMatch> &matches, float dist_threshhold);
+    static void enforce_bijective_matches(std::vector<cv::DMatch> &matches);
+    static void enforce_bijective_buffer_matches(std::vector<cv::DMatch> &matches);
+    static void draw_keypoints(const cv::Mat &img, std::vector<cv::KeyPoint> keypoints,
+                               cv::Mat &img_keypoints_out) {
+        cv::drawKeypoints(img, keypoints, img_keypoints_out, cv::Scalar::all(-1),
+                          cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS);
+    }
+    static void draw_matches(const cv::Mat &img1, std::vector<cv::KeyPoint> keypoints1,
+                             const cv::Mat &img2, std::vector<cv::KeyPoint> keypoints2,
+                             std::vector<cv::DMatch> matches, cv::Mat &img_matches_out) {
+        cv::drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches_out);
+    }
+
+   private:
+    bool compute_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                               std::vector<cv::DMatch> &matches_out) const;
+    bool compute_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                             std::vector<cv::DMatch> &matches_out) const;
+    bool compute_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
+                               std::vector<cv::DMatch> &matches_out) const;
+    FrontendParams params_;
+
+    cv::Ptr<cv::Feature2D> feature_extractor_;
+    cv::Ptr<cv::DescriptorMatcher> descriptor_matcher_;
+
+    std::vector<ImageAndPoint> images_and_points_;
+    FeatureTracks feature_tracks_;
+    FrameLandmarkIdMap lmk_id_map_;
+    std::vector<SharedFrame> shared_frames_;
+};
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/frontend_definitions.hh b/experimental/learn_descriptors/frontend_definitions.hh
new file mode 100644
index 000000000..6204af219
--- /dev/null
+++ b/experimental/learn_descriptors/frontend_definitions.hh
@@ -0,0 +1,43 @@
+#pragma once
+
+#include <ostream>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "experimental/learn_descriptors/structure_from_motion_types.hh"
+#include "gtsam/geometry/Point3.h"
+#include "opencv2/opencv.hpp"
+
+namespace std {
+template <>
+struct hash<std::pair<FrameId, KeypointCV>> {
+    size_t operator()(const std::pair<FrameId, KeypointCV> &p) const {
+        size_t h1 = std::hash<FrameId>()(p.first);
+        size_t h2 = std::hash<float>()(p.second.x) ^ (std::hash<float>()(p.second.y) << 1);
+        return h1 ^ (h2 << 1);  // combine hashes
+    }
+};
+}  // namespace std
+
+namespace robot::experimental::learn_descriptors {
+class FeatureTrack {
+   public:
+    std::vector<std::pair<FrameId, KeypointCV>> obs_;
+
+    bool in_ba_graph_;
+
+    FeatureTrack(FrameId frame_id, const KeypointCV &px) { obs_.emplace_back(frame_id, px); }
+
+    void print() const {
+        std::cout << "Feature track with cameras: ";
+        for (size_t i = 0; i < obs_.size(); i++) {
+            std::cout << " " << obs_[i].first << " ";
+        }
+        std::cout << std::endl;
+    }
+};
+
+using FeatureTracks = std::vector<FeatureTrack>;  // each idx is the FeatureTracks's LandmarkId
+using FrameLandmarkIdMap = std::unordered_map<std::pair<FrameId, KeypointCV>, LandmarkId>;
+}  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/frontend_test.cc b/experimental/learn_descriptors/frontend_test.cc
new file mode 100644
index 000000000..c5046fd1f
--- /dev/null
+++ b/experimental/learn_descriptors/frontend_test.cc
@@ -0,0 +1,198 @@
+#include "experimental/learn_descriptors/frontend.hh"
+
+#include <cstddef>
+#include <memory>
+#include <stdexcept>
+#include <utility>
+#include <vector>
+
+#include "Eigen/Core"
+#include "experimental/learn_descriptors/frame.hh"
+#include "experimental/learn_descriptors/frontend.hh"
+#include "experimental/learn_descriptors/image_point.hh"
+#include "gtest/gtest.h"
+#include "opencv2/opencv.hpp"
+
+namespace robot::experimental::learn_descriptors {
+TEST(FrontendTest, pipeline_sweep) {
+    const size_t width = 640;
+    const size_t height = 480;
+
+    const size_t pixel_shift_x = 20;
+    const size_t PIXEL_COMP_TOL = 20;
+
+    cv::Mat image_1 = cv::Mat::zeros(height, width, CV_8UC3);
+    cv::Mat image_2;
+
+    cv::Mat translation_mat = (cv::Mat_<double>(2, 3) << 1, 0, pixel_shift_x, 0, 1, 0);
+    cv::Point rect_points[4] = {{150, 200}, {350, 200}, {350, 300}, {150, 300}};
+    float com_x = 0.0f, com_y = 0.0f;
+    for (const cv::Point &rect_point : rect_points) {
+        com_x += rect_point.x;
+        com_y += rect_point.y;
+    }
+    com_x *= 0.25;
+    com_y *= 0.25;
+    cv::Point rotation_center(com_x, com_y);
+    cv::Mat rotation_matrix = cv::getRotationMatrix2D(rotation_center, 45, 1.0);
+
+    const size_t line_spacing = 100;
+    for (size_t i = 0; i <= width / line_spacing; i++) {
+        size_t x = i * line_spacing + (width % line_spacing) / 2;
+        size_t b = i * 255.0 / (width / line_spacing);
+        size_t g = 255.0 - i * 255.0 / (width / line_spacing);
+        cv::line(image_1, cv::Point(x, 0), cv::Point(x, height - 1), cv::Scalar(b, g, 0), 2);
+    }
+    for (size_t i = 0; i <= height / line_spacing; i++) {
+        size_t y = i * line_spacing + (height % line_spacing) / 2;
+        size_t b = i * 255.0 / (width / line_spacing);
+        size_t g = 255.0 - i * 255.0 / (width / line_spacing);
+        cv::line(image_1, cv::Point(0, y), cv::Point(width - 1, y), cv::Scalar(b, g, 0), 2);
+    }
+
+    cv::warpAffine(image_1, image_1, rotation_matrix, image_1.size());
+    cv::warpAffine(image_1, image_2, translation_mat, image_1.size());
+
+    // cv::Mat img_test_disp;
+    // cv::hconcat(image_1, image_2, img_test_disp);
+    // cv::imshow("Test", img_test_disp);
+    // cv::waitKey(1000);
+
+    FrontendParams::ExtractorType extractor_types[2] = {FrontendParams::ExtractorType::SIFT,
+                                                        FrontendParams::ExtractorType::ORB};
+    FrontendParams::MatcherType matcher_types[3] = {FrontendParams::MatcherType::BRUTE_FORCE,
+                                                    FrontendParams::MatcherType::FLANN,
+                                                    FrontendParams::MatcherType::KNN};
+
+    FrontendParams params{FrontendParams::ExtractorType::SIFT, FrontendParams::MatcherType::KNN,
+                          true, false};
+    Frontend frontend(params);
+    std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_descriptors_pair_1;
+    std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_descriptors_pair_2;
+    std::vector<cv::DMatch> matches;
+    cv::Mat img_keypoints_out_1(height, width, CV_8UC3),
+        img_keypoints_out_2(height, width, CV_8UC3), img_matches_out(height, 2 * width, CV_8UC3);
+    // cv::Mat img_display_test;
+    for (FrontendParams::ExtractorType extractor_type : extractor_types) {
+        for (FrontendParams::MatcherType matcher_type : matcher_types) {
+            printf("started frontend combination: (%d, %d)\n", static_cast<int>(extractor_type),
+                   static_cast<int>(matcher_type));
+            FrontendParams params{extractor_type, matcher_type, true, false};
+            try {
+                frontend = Frontend(params);
+            } catch (const std::invalid_argument &e) {
+                ROBOT_CHECK(std::string(e.what()) ==
+                            "FLANN can not be used with ORB.");  // very jank...
+                continue;
+            }
+            keypoints_descriptors_pair_1 = frontend.extract_features(image_1);
+            keypoints_descriptors_pair_2 = frontend.extract_features(image_2);
+            matches = frontend.compute_matches(keypoints_descriptors_pair_1.second,
+                                               keypoints_descriptors_pair_2.second);
+            frontend.draw_keypoints(image_1, keypoints_descriptors_pair_1.first,
+                                    img_keypoints_out_1);
+            frontend.draw_keypoints(image_2, keypoints_descriptors_pair_2.first,
+                                    img_keypoints_out_2);
+            frontend.draw_matches(image_1, keypoints_descriptors_pair_1.first, image_2,
+                                  keypoints_descriptors_pair_2.first, matches, img_matches_out);
+            // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test);
+            // cv::vconcat(img_display_test, img_matches_out, img_display_test);
+            // std::stringstream text;
+            // text << "Extractor " << static_cast<int>(extractor_type) << ", matcher "
+            //      << static_cast<int>(matcher_type);
+            // cv::putText(img_display_test, text.str(), cv::Point(20, height - 50),
+            //             cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2, cv::LINE_AA);
+            // cv::imshow("Keypoints and Matches Output.", img_display_test);
+            // std::cout << "Hold spacebar to pause." << std::endl;
+            while (cv::waitKey(1000) == 32) {
+            }
+            printf("completed frontend combination: (%d, %d)\n", static_cast<int>(extractor_type),
+                   static_cast<int>(matcher_type));
+            if (extractor_type != FrontendParams::ExtractorType::ORB) {  // don't check ORB for now
+                for (const cv::DMatch match : matches) {
+                    EXPECT_NEAR(keypoints_descriptors_pair_1.first[match.queryIdx].pt.x -
+                                    keypoints_descriptors_pair_2.first[match.trainIdx].pt.x,
+                                pixel_shift_x, pixel_shift_x + PIXEL_COMP_TOL);
+                    EXPECT_NEAR(keypoints_descriptors_pair_2.first[match.trainIdx].pt.y -
+                                    keypoints_descriptors_pair_1.first[match.queryIdx].pt.y,
+                                0, PIXEL_COMP_TOL);
+                }
+            }
+        }
+    }
+}
+
+TEST(FrontendTest, interpolate_frames) {
+    constexpr size_t width = 800;
+    constexpr size_t height = 400;
+    constexpr double fx = 500.0;
+    constexpr double fy = fx;
+    constexpr double cx = static_cast<double>(width) / 2.0;
+    constexpr double cy = static_cast<double>(height) / 2.0;
+    // gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy));
+    std::shared_ptr<CameraCalibrationFisheye> shared_K =
+        std::make_shared<CameraCalibrationFisheye>(fx, fy, cx, cy, 1, 1, 1, 1);
+    cv::Mat white_image(height, width, CV_8UC3, cv::Scalar(255, 255, 255));
+
+    FrontendParams params{FrontendParams::ExtractorType::SIFT, FrontendParams::MatcherType::KNN,
+                          true, false};
+    Frontend frontend(params);
+
+    struct VelocityPoint {
+        double time;               // time in seconds
+        Eigen::Vector3d velocity;  // m/s
+    };
+    std::vector<VelocityPoint> velocity_points{
+        // basically a piecewise constant function
+        VelocityPoint{1.0, Eigen::Vector3d(1, 0, 0)},  // go in x dir from seconds 0 - 1
+        VelocityPoint{2.0, Eigen::Vector3d(0, 1, 0)},  // go in y dir from seconds 1 - 2
+        VelocityPoint{
+            3.0, Eigen::Vector3d(1, 1, 0)},  // go diagonal in first quadrant from seconds 2 - 3
+    };
+    size_t idx_vel_pt = 0;
+    const Eigen::Vector3d pt0_in_world = Eigen::Vector3d::Zero();
+    std::vector<Eigen::Vector3d> pts_in_world{pt0_in_world};
+    double time = 0.0;
+    std::vector<std::shared_ptr<ImagePoint>> img_pts;
+    ImagePoint img_pt_first;
+    img_pt_first.id = 0;
+    img_pt_first.seq = static_cast<size_t>(time * 1e9);
+    img_pt_first.K = shared_K;
+    img_pt_first.set_cam_in_world(pt0_in_world);
+    img_pts.push_back(std::make_shared<ImagePoint>(img_pt_first));
+    frontend.add_image(ImageAndPoint{white_image, img_pts.back()});
+    constexpr double sample_hz = 6.0;
+    constexpr size_t num_samples_skip = 2;
+    ROBOT_CHECK(static_cast<size_t>(sample_hz) % (num_samples_skip + 1) ==
+                0);  // needed so that the interpolation isn't lossy
+    constexpr double dt = 1 / sample_hz;
+    time += dt;
+    while (time < velocity_points.back().time) {
+        while (idx_vel_pt < velocity_points.size() && time > velocity_points[idx_vel_pt].time) {
+            idx_vel_pt++;
+        }
+        pts_in_world.emplace_back(pts_in_world.back() + dt * velocity_points[idx_vel_pt].velocity);
+        ImagePoint img_pt;
+        img_pt.id = img_pts.size();
+        img_pt.seq = static_cast<size_t>(time * 1e9);
+        img_pt.K = shared_K;
+        if (img_pts.size() % (num_samples_skip + 1) == 0) {
+            img_pt.set_cam_in_world(pts_in_world.back());
+        }
+        img_pts.emplace_back(std::make_shared<ImagePoint>(img_pt));
+        frontend.add_image(ImageAndPoint{white_image, img_pts.back()});
+        time += dt;
+    }
+    frontend.populate_frames();
+    frontend.interpolate_frames();
+
+    const std::vector<SharedFrame> &shared_frames = frontend.frames();
+    constexpr double TOL = 1e-5;
+    for (size_t i = 0; i < pts_in_world.size(); i++) {
+        const Frame &frame = *shared_frames[i];
+        ROBOT_CHECK(frame.cam_in_world_interpolated_guess_);
+        ROBOT_CHECK(pts_in_world[i].isApprox(*frame.cam_in_world_interpolated_guess_, TOL), i,
+                    pts_in_world[i], *frame.cam_in_world_interpolated_guess_);
+    }
+}
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/gps_data.hh b/experimental/learn_descriptors/gps_data.hh
new file mode 100644
index 000000000..52d900be1
--- /dev/null
+++ b/experimental/learn_descriptors/gps_data.hh
@@ -0,0 +1,79 @@
+#pragma once
+
+#include <cmath>
+#include <cstddef>
+#include <optional>
+#include <sstream>
+#include <string>
+
+#include "Eigen/Core"
+
+namespace robot::experimental::learn_descriptors {
+struct GPSData {
+    struct Uncertainty {
+        double sigma_latitude_m;
+        double sigma_longitude_m;
+        double sigma_altitude_m;
+        double orientation_deg;
+        double rms_range_error_m;
+
+        // diagonal latitude, longitude, altitude covariance in meters squared
+        Eigen::Matrix3d to_LLA_covariance() const {
+            Eigen::Matrix3d cov = Eigen::Matrix3d::Zero();
+            cov(0, 0) = sigma_latitude_m * sigma_latitude_m;
+            cov(1, 1) = sigma_longitude_m * sigma_longitude_m;
+            cov(2, 2) = sigma_altitude_m * sigma_altitude_m;
+            return cov;
+        }
+
+        // Convert lat/lon covariance to ENU
+        Eigen::Matrix3d to_ENU_covariance() const {
+            static constexpr double DEG2RAD = M_PI / 180.0;
+            static constexpr double ORIENT_OFFSET_DEG = 90.0;  // CW‑from‑North → CCW‑from‑East
+
+            // 1) build the 2×2 variance ellipse in metres²
+            Eigen::Matrix2d D;
+            D << sigma_latitude_m * sigma_latitude_m, 0.0, 0.0,
+                sigma_longitude_m * sigma_longitude_m;
+
+            // 2) rotate that into ENU axes
+            double theta = (ORIENT_OFFSET_DEG - orientation_deg) * DEG2RAD;
+            Eigen::Matrix2d R;
+            R << std::cos(theta), -std::sin(theta), std::sin(theta), std::cos(theta);
+
+            Eigen::Matrix2d horz = R * D * R.transpose();
+
+            // 3) assemble full 3×3 ENU covariance
+            Eigen::Matrix3d enu = Eigen::Matrix3d::Zero();
+            enu.topLeftCorner<2, 2>() = horz;
+            enu(2, 2) = sigma_altitude_m * sigma_altitude_m;
+            return enu;
+        }
+
+        std::string to_string() const {
+            std::stringstream ss;
+            ss << "Uncertainty:";
+            ss << "\n\tsigma_lat_deg: " << sigma_latitude_m;
+            ss << "\n\tsigma_lon_deg: " << sigma_longitude_m;
+            ss << "\n\tsigma_altitude_m: " << sigma_altitude_m;
+            ss << "\n\torientation_deg: " << orientation_deg;
+            ss << "\n\trms_range_error_m: " << rms_range_error_m;
+            return ss.str();
+        }
+    };
+    size_t seq;  // time in nanoseconds, may differ from image capturetime
+    double latitude;
+    double longitude;
+    std::optional<double> altitude;  // meters above sea level
+    std::optional<Uncertainty> uncertainty;
+    std::string to_string() const {
+        std::stringstream ss;
+        ss << "GPS Data: ";
+        ss << "\n\tseq: " << seq;
+        ss << "\n\tlla: " << latitude << ", " << longitude << ", "
+           << (altitude ? std::to_string(*altitude) : std::string("N/A"));
+        ss << "\n" << (uncertainty ? uncertainty->to_string() : "Uncertainty: N/A");
+        return ss.str();
+    }
+};
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/gtsam_test.cc b/experimental/learn_descriptors/gtsam_test.cc
index 53ee67b49..019d0c7c4 100644
--- a/experimental/learn_descriptors/gtsam_test.cc
+++ b/experimental/learn_descriptors/gtsam_test.cc
@@ -1,13 +1,18 @@
+
 #include <iostream>
 #include <vector>
 
 #include "Eigen/Core"
 #include "gtest/gtest.h"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/PinholeCamera.h"
 #include "gtsam/geometry/Point3.h"
 #include "gtsam/geometry/Pose3.h"
+#include "gtsam/geometry/Rot3.h"
 #include "gtsam/inference/Symbol.h"
 #include "gtsam/nonlinear/LevenbergMarquardtOptimizer.h"
 #include "gtsam/nonlinear/Values.h"
+#include "gtsam/slam/BetweenFactor.h"
 #include "gtsam/slam/GeneralSFMFactor.h"
 #include "gtsam/slam/PriorFactor.h"
 #include "gtsam/slam/ProjectionFactor.h"
@@ -87,7 +92,91 @@ TEST(GtsamTesting, gtsam_simple_cube) {
     gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial);
     gtsam::Values result = optimizer.optimize();
 
-    // result.print("Optimized Results:\n");
+    result.print("Optimized Results:\n");
+
+    constexpr double TOL = 1e-3;
+    for (size_t i = 0; i < poses.size(); i++) {
+        gtsam::Pose3 result_pose = result.at<gtsam::Pose3>(gtsam::Symbol('x', i));
+        EXPECT_NEAR(poses[i].translation()[0], result_pose.translation()[0], TOL);
+        EXPECT_NEAR(poses[i].translation()[1], result_pose.translation()[1], TOL);
+        EXPECT_NEAR(poses[i].translation()[2], result_pose.translation()[2], TOL);
+        EXPECT_TRUE(poses[i].rotation().matrix().isApprox(result_pose.rotation().matrix(), TOL));
+    }
+    for (size_t i = 0; i < cube_W.size(); i++) {
+        gtsam::Point3 result_point = result.at<gtsam::Point3>(gtsam::Symbol('L', i));
+        EXPECT_NEAR(cube_W[i][0], result_point[0], TOL);
+        EXPECT_NEAR(cube_W[i][1], result_point[1], TOL);
+        EXPECT_NEAR(cube_W[i][2], result_point[2], TOL);
+    }
+}
+TEST(GtsamTesting, gtsam_simple_cube_2) {
+    std::vector<gtsam::Point3> cube_W;
+    float cube_size = 1.0f;
+    cube_W.push_back(gtsam::Point3(0, 0, 0));
+    cube_W.push_back(gtsam::Point3(cube_size, 0, 0));
+    cube_W.push_back(gtsam::Point3(cube_size, cube_size, 0));
+    cube_W.push_back(gtsam::Point3(0, cube_size, 0));
+    cube_W.push_back(gtsam::Point3(0, 0, cube_size));
+    cube_W.push_back(gtsam::Point3(cube_size, 0, cube_size));
+    cube_W.push_back(gtsam::Point3(cube_size, cube_size, cube_size));
+    cube_W.push_back(gtsam::Point3(0, cube_size, cube_size));
+
+    const size_t img_width = 640;
+    const size_t img_height = 480;
+    const double fx = 500.0;
+    const double fy = fx;
+    const double cx = img_width / 2.0;
+    const double cy = img_height / 2.0;
+
+    gtsam::Values initial;
+    gtsam::NonlinearFactorGraph graph;
+
+    gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy));
+    initial.insert(gtsam::Symbol('K', 0), *K);
+
+    auto poseNoise = gtsam::noiseModel::Diagonal::Sigmas(gtsam::Vector6::Constant(0.1));
+    auto measurementNoise = gtsam::noiseModel::Isotropic::Sigma(2, 1.0);
+    std::vector<gtsam::Pose3> poses;
+    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras;
+
+    Eigen::Matrix3d rotation0(
+        Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+        Eigen::AngleAxis(-M_PI / 2, Eigen::Vector3d(1, 0, 0)).toRotationMatrix());
+    gtsam::Pose3 pose0(gtsam::Rot3(rotation0), gtsam::Point3(4, 0, 0));
+    gtsam::PinholeCamera<gtsam::Cal3_S2> camera0(pose0, *K);
+    graph.emplace_shared<gtsam::PriorFactor<gtsam::Pose3>>(gtsam::Symbol('x', 0), pose0, poseNoise);
+    poses.push_back(pose0);
+    cameras.push_back(camera0);
+
+    gtsam::Pose3 pose1(
+        gtsam::Rot3(Eigen::AngleAxis(M_PI / 2, Eigen::Vector3d(0, 0, 1)).toRotationMatrix() *
+                    rotation0),
+        gtsam::Point3(0, 4, 0));
+    gtsam::PinholeCamera<gtsam::Cal3_S2> camera1(pose1, *K);
+    graph.emplace_shared<gtsam::BetweenFactor<gtsam::Pose3>>(
+        gtsam::Symbol('x', 0), gtsam::Symbol('x', 1), pose0.between(pose1), poseNoise);
+    poses.push_back(pose1);
+    cameras.push_back(camera1);
+
+    for (size_t i = 0; i < poses.size(); i++) {
+        initial.insert(gtsam::Symbol('x', i), poses[i]);
+    }
+    for (size_t i = 0; i < cube_W.size(); i++) {
+        initial.insert(gtsam::Symbol('L', i), gtsam::Point3(0, 0, 0));
+        for (size_t j = 0; j < poses.size(); j++) {
+            gtsam::Point2 pixel_p_cube_C = cameras[j].project(cube_W[i]);
+            if (GtsamTestHelper::pixel_in_range(pixel_p_cube_C, img_width, img_height)) {
+                graph.emplace_shared<gtsam::GeneralSFMFactor2<gtsam::Cal3_S2>>(
+                    pixel_p_cube_C, measurementNoise, gtsam::Symbol('x', j), gtsam::Symbol('L', i),
+                    gtsam::Symbol('K', 0));
+            }
+        }
+    }
+
+    gtsam::LevenbergMarquardtOptimizer optimizer(graph, initial);
+    gtsam::Values result = optimizer.optimize();
+
+    result.print("Optimized Results:\n");
 
     constexpr double TOL = 1e-3;
     for (size_t i = 0; i < poses.size(); i++) {
diff --git a/experimental/learn_descriptors/image_point.hh b/experimental/learn_descriptors/image_point.hh
index c46708d17..6e9273eed 100644
--- a/experimental/learn_descriptors/image_point.hh
+++ b/experimental/learn_descriptors/image_point.hh
@@ -2,132 +2,72 @@
 
 #include <cstddef>
 #include <filesystem>
-#include <opencv2/opencv.hpp>
+#include <memory>
 #include <optional>
 #include <sstream>
 #include <string>
 
 #include "Eigen/Core"
 #include "Eigen/Geometry"
+#include "camera_calibration.hh"
 #include "common/liegroups/se3.hh"
-#include "common/time/robot_time.hh"
+#include "experimental/learn_descriptors/camera_calibration.hh"
 
 namespace robot::experimental::learn_descriptors {
 struct ImagePoint {
-    struct GPSData {
-        struct Uncertainty {
-            double sigma_lat_mitude;
-            double sigma_longitude;
-            double sigma_altitude;
-            double orientation_deg;
-            double rms_range_error_m;
+    virtual ~ImagePoint() = default;
 
-            // diagonal latitude, longitude, altitude covariance in meters squared
-            Eigen::Matrix3d to_LLA_covariance() const {
-                Eigen::Matrix3d cov = Eigen::Matrix3d::Zero();
-                cov(0, 0) = sigma_lat_mitude * sigma_lat_mitude;
-                cov(1, 1) = sigma_longitude * sigma_longitude;
-                cov(2, 2) = sigma_altitude * sigma_altitude;
-                return cov;
-            }
-
-            // Convert lat/lon covariance to ENU at given latitude
-            Eigen::Matrix3d to_ENU_covariance(double lat_deg) const {
-                double lat_rad = lat_deg * M_PI / 180.0;
-
-                // meters per degree scaling
-                double kLat =
-                    111132.92 - 559.82 * std::cos(2 * lat_rad) + 1.175 * std::cos(4 * lat_rad);
-                double kLon = 111412.84 * std::cos(lat_rad) - 93.5 * std::cos(3 * lat_rad);
-
-                Eigen::Matrix3d lla_cov = to_LLA_covariance();
-
-                // rotate lat/lon covariance (upper-left 2x2) using orientation
-                double theta = orientation_deg * M_PI / 180.0;
-                Eigen::Matrix2d R;
-                R << std::cos(theta), -std::sin(theta), std::sin(theta), std::cos(theta);
-
-                Eigen::Matrix2d D = lla_cov.topLeftCorner<2, 2>();
-                Eigen::Matrix2d rotated_latlon_cov = R * D * R.transpose();
-
-                // apply linear scaling to convert degrees to meters in EN
-                Eigen::Matrix2d J;
-                J << kLat, 0, 0, kLon;
-
-                Eigen::Matrix2d en_cov = J * rotated_latlon_cov * J.transpose();
-
-                Eigen::Matrix3d enu_cov = Eigen::Matrix3d::Zero();
-                enu_cov.topLeftCorner<2, 2>() = en_cov;
-                enu_cov(2, 2) = lla_cov(2, 2);  // alt variance unchanged
-
-                return enu_cov;
-            }
-        };
-        size_t seq;  // time in nanoseconds, may differ from image capturetime
-        double latitude;
-        double longitude;
-        std::optional<double> altitude;  // meters above sea level
-        std::optional<Uncertainty> uncertainty;
-    };
     size_t id;   // idx for DB
-    size_t seq;  // time in nanoseconds of image capture (also the name of image)
-    std::optional<liegroups::SE3>
-        AS_w_from_gnss_cam;  // globally opimized arbitrary scale visual world (gnss + VIO + loop
-                             // closure + etc.) from cam
-    std::optional<liegroups::SE3> AS_w_from_vio_cam;  // arbitrary scale world from vio result cam
-    std::optional<GPSData> gps_gcs;  // raw gps measurement in gcs (global cordinate system)
-
-    const std::string to_string() const {
-        auto se3_to_str = [](const liegroups::SE3& se3) {
-            const Eigen::Vector3d& t = se3.translation();
-            const Eigen::Quaterniond& r = se3.so3().unit_quaternion();
+    size_t seq;  // time in nanoseconds of image capture (also could be name of image; this is also
+                 // time since unix epoch)
+    std::shared_ptr<CameraCalibrationFisheye> K;
+    void set_cam_in_world(const Eigen::Vector3d& cam_in_world) { cam_in_world_ = cam_in_world; };
+    virtual std::optional<Eigen::Isometry3d> world_from_cam_ground_truth() const {
+        return std::nullopt;
+    };
+    virtual std::optional<Eigen::Vector3d> cam_in_world() const { return cam_in_world_; };
+    virtual std::optional<Eigen::Matrix3d> translation_covariance_in_cam() const {
+        return std::nullopt;
+    };
+    virtual std::string to_string() const {
+        auto vec3d_to_str = [](const Eigen::Vector3d& vec3d) {
             std::stringstream ss;
-            ss << "\t\tt: [" << t.x() << ", " << t.y() << ", " << t.z() << "]\n";
+            ss << "[" << vec3d.x() << ", " << vec3d.y() << ", " << vec3d.z() << "]";
+            return ss.str();
+        };
+        auto isom3d_to_str = [&vec3d_to_str](const Eigen::Isometry3d& isom3d) {
+            const Eigen::Vector3d& t = isom3d.translation();
+            const Eigen::Quaterniond r(isom3d.rotation());
+            std::stringstream ss;
+            ss << "\t\tt: " << vec3d_to_str(t) << "\n";
             ss << "\t\tq: [" << r.w() << ", " << r.x() << ", " << r.y() << ", " << r.z() << "]";
             return ss.str();
         };
         std::stringstream ss;
         ss << "Image Point " << id << ":\n";
-        ss << "\tseq: " << seq << "\n";
-        ss << "\tAS_w_from_gnss_cam: ";
-        if (AS_w_from_gnss_cam) {
-            ss << "\n" << se3_to_str(*AS_w_from_gnss_cam);
+        ss << "\tworld_from_cam_ground_truth: ";
+        if (world_from_cam_ground_truth()) {
+            ss << "\n" << isom3d_to_str(*world_from_cam_ground_truth());
         } else {
             ss << "N/A";
         }
-        ss << "\n\tAS_w_from_vio_cam: ";
-        if (AS_w_from_vio_cam) {
-            ss << "\n" << se3_to_str(*AS_w_from_vio_cam);
+        ss << "\n\tcam_in_world: ";
+        if (cam_in_world()) {
+            ss << vec3d_to_str(*cam_in_world());
         } else {
             ss << "N/A";
         }
-        ss << "\n\tgps_gcs: ";
-        if (gps_gcs) {
-            ss << "\n\t\tseq: " << gps_gcs->seq;
-            ss << "\n\t\t" << gps_gcs->latitude << "\t" << gps_gcs->longitude << "\t";
-            if (gps_gcs->altitude) {
-                ss << *(gps_gcs->altitude);
-            } else {
-                ss << "alt N/A";
-            }
-            ss << "\n\t\tsigma: ";
-            if (gps_gcs->uncertainty) {
-                ss << gps_gcs->uncertainty->sigma_lat_mitude << "\t"
-                   << gps_gcs->uncertainty->sigma_longitude << "\t"
-                   << gps_gcs->uncertainty->sigma_altitude;
-            } else {
-                ss << "N/A";
-            }
+        ss << "\n\ttranslation_covariance_in_cam: ";
+        if (translation_covariance_in_cam()) {
+            ss << *translation_covariance_in_cam();
         } else {
             ss << "N/A";
         }
         return ss.str();
     }
 
-    cv::Mat load_image(const std::filesystem::path& img_dir) const {
-        const std::filesystem::path path(img_dir / (std::to_string(seq) + ".png"));
-        return cv::imread(path);
-    }
+   private:
+    std::optional<Eigen::Vector3d> cam_in_world_;
 };
-typedef std::vector<ImagePoint> ImagePointVector;
+using SharedImagePoint = std::shared_ptr<ImagePoint>;
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/image_point_four_seasons.cc b/experimental/learn_descriptors/image_point_four_seasons.cc
new file mode 100644
index 000000000..7c2829e94
--- /dev/null
+++ b/experimental/learn_descriptors/image_point_four_seasons.cc
@@ -0,0 +1,53 @@
+#include "experimental/learn_descriptors/image_point_four_seasons.hh"
+
+#include <optional>
+
+#include "Eigen/Core"
+#include "Eigen/Geometry"
+#include "common/gps/frame_translation.hh"
+#include "common/liegroups/se3.hh"
+#include "experimental/learn_descriptors/four_seasons_transforms.hh"
+
+namespace robot::experimental::learn_descriptors {
+ImagePointFourSeasons::~ImagePointFourSeasons() = default;
+
+std::optional<Eigen::Isometry3d> ImagePointFourSeasons::world_from_cam_ground_truth() const {
+    if (!AS_w_from_gnss_cam) {
+        return std::nullopt;
+    }
+    Eigen::Matrix4d scale_mat_reference = Eigen::Matrix4d::Identity();
+    scale_mat_reference(0, 0) = scale_mat_reference(1, 1) = scale_mat_reference(2, 2) =
+        shared_static_transforms->gnss_scale;
+    Eigen::Isometry3d world_from_cam_ground_truth(
+        (shared_static_transforms->S_from_AS.matrix() * scale_mat_reference *
+         AS_w_from_gnss_cam->matrix())
+            .matrix());
+    return world_from_cam_ground_truth;
+}
+
+std::optional<Eigen::Vector3d> ImagePointFourSeasons::cam_in_world() const {
+    if (!gps_gcs) return std::nullopt;
+    const Eigen::Vector3d gps_in_ECEF(gps::ecef_from_lla(
+        Eigen::Vector3d(gps_gcs->latitude, gps_gcs->longitude, *gps_gcs->altitude)));
+    const Eigen::Vector3d gps_in_world(shared_static_transforms->w_from_gpsw *
+                                       shared_static_transforms->e_from_gpsw.inverse() *
+                                       gps_in_ECEF);
+    const Eigen::Isometry3d cam_from_gps(
+        (shared_static_transforms->cam_from_imu * shared_static_transforms->gps_from_imu.inverse())
+            .matrix());
+    const Eigen::Vector3d cam_in_world(gps_in_world + cam_from_gps.translation());
+    return cam_in_world;
+}
+
+std::optional<Eigen::Matrix3d> ImagePointFourSeasons::translation_covariance_in_cam() const {
+    return gps_covariance_in_world();
+}
+
+std::optional<Eigen::Matrix3d> ImagePointFourSeasons::gps_covariance_in_world() const {
+    if (!gps_gcs || !gps_gcs->uncertainty) return std::nullopt;
+    const Eigen::Matrix3d ENU_covariance(gps_gcs->uncertainty->to_ENU_covariance());
+    const Eigen::Matrix3d w_from_ENU(shared_static_transforms->w_from_gpsw.so3().matrix());
+    const Eigen::Matrix3d w_covariance(w_from_ENU * ENU_covariance * w_from_ENU.transpose());
+    return w_covariance;
+}
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/image_point_four_seasons.hh b/experimental/learn_descriptors/image_point_four_seasons.hh
new file mode 100644
index 000000000..95401d885
--- /dev/null
+++ b/experimental/learn_descriptors/image_point_four_seasons.hh
@@ -0,0 +1,85 @@
+#pragma once
+
+#include <cstddef>
+#include <filesystem>
+#include <memory>
+#include <optional>
+#include <sstream>
+#include <string>
+
+#include "Eigen/Core"
+#include "Eigen/Geometry"
+#include "common/check.hh"
+#include "common/liegroups/se3.hh"
+#include "experimental/learn_descriptors/four_seasons_transforms.hh"
+#include "experimental/learn_descriptors/gps_data.hh"
+#include "experimental/learn_descriptors/image_point.hh"
+#include "opencv2/opencv.hpp"
+
+namespace robot::experimental::learn_descriptors {
+class FourSeasonsParser;
+struct ImagePointFourSeasons : ImagePoint {
+    virtual ~ImagePointFourSeasons();
+
+    // size_t id;   // idx for DB
+    // size_t seq;  // time in nanoseconds of image capture (also the name of image)
+    std::optional<liegroups::SE3>
+        AS_w_from_gnss_cam;  // globally opimized arbitrary scale visual world (gnss + VIO + loop
+                             // closure + etc.) from cam
+    std::optional<liegroups::SE3> AS_w_from_vio_cam;  // arbitrary scale world from vio result cam
+    std::optional<GPSData> gps_gcs;  // raw gps measurement in gcs (global cordinate system)
+    std::shared_ptr<FourSeasonsTransforms::StaticTransforms> shared_static_transforms;
+
+    std::optional<Eigen::Vector3d> velocity_to(const ImagePointFourSeasons& other_img_pt) const {
+        ROBOT_CHECK(other_img_pt.seq >= seq);
+        if (!other_img_pt.cam_in_world() || !cam_in_world()) return std::nullopt;
+        double dt_seconds = static_cast<double>(other_img_pt.seq - seq) * 1e-9;
+        return (*other_img_pt.cam_in_world() - *cam_in_world()) / dt_seconds;
+    };
+    std::optional<Eigen::Vector3d> velocity_from(const ImagePointFourSeasons& other_img_pt) const {
+        ROBOT_CHECK(seq >= other_img_pt.seq);
+        if (!other_img_pt.cam_in_world() || !cam_in_world()) return std::nullopt;
+        double dt_seconds = static_cast<double>(seq - other_img_pt.seq) * 1e-9;
+        return (*cam_in_world() - *other_img_pt.cam_in_world()) / dt_seconds;
+    };
+
+    std::optional<Eigen::Isometry3d> world_from_cam_ground_truth() const override;
+    std::optional<Eigen::Vector3d> cam_in_world() const override;
+    std::optional<Eigen::Matrix3d> translation_covariance_in_cam() const override;
+    std::optional<Eigen::Matrix3d> gps_covariance_in_world() const;
+
+    std::string to_string() const override {
+        auto se3_to_str = [](const liegroups::SE3& se3) {
+            const Eigen::Vector3d& t = se3.translation();
+            const Eigen::Quaterniond& r = se3.so3().unit_quaternion();
+            std::stringstream ss;
+            ss << "\t\tt: [" << t.x() << ", " << t.y() << ", " << t.z() << "]\n";
+            ss << "\t\tq: [" << r.w() << ", " << r.x() << ", " << r.y() << ", " << r.z() << "]";
+            return ss.str();
+        };
+        std::stringstream ss;
+        ss << "Image Point " << id << ":\n";
+        ss << "\tseq: " << seq << "\n";
+        ss << "\tAS_w_from_gnss_cam: ";
+        if (AS_w_from_gnss_cam) {
+            ss << "\n" << se3_to_str(*AS_w_from_gnss_cam);
+        } else {
+            ss << "N/A";
+        }
+        ss << "\n\tAS_w_from_vio_cam: ";
+        if (AS_w_from_vio_cam) {
+            ss << "\n" << se3_to_str(*AS_w_from_vio_cam);
+        } else {
+            ss << "N/A";
+        }
+        ss << "\n" << (gps_gcs ? gps_gcs->to_string() : "GPS Data: N/A");
+        return ss.str();
+    }
+
+    cv::Mat load_image(const std::filesystem::path& img_dir) const {
+        const std::filesystem::path path(img_dir / (std::to_string(seq) + ".png"));
+        return cv::imread(path);
+    }
+};
+typedef std::vector<ImagePointFourSeasons> ImagePointFourSeasonsVector;
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/learn_descriptors.cc b/experimental/learn_descriptors/learn_descriptors.cc
index 94ba9833a..f2938510f 100644
--- a/experimental/learn_descriptors/learn_descriptors.cc
+++ b/experimental/learn_descriptors/learn_descriptors.cc
@@ -1,6 +1,7 @@
 #include "experimental/learn_descriptors/learn_descriptors.hh"
 
 #include <iostream>
+#include <string>
 
 namespace robot::experimental::learn_descriptors {
 void hello_world(const std::string &in) { std::cout << in << std::endl; }
diff --git a/experimental/learn_descriptors/spatial_test_scene.cc b/experimental/learn_descriptors/spatial_test_scene.cc
new file mode 100644
index 000000000..93168e118
--- /dev/null
+++ b/experimental/learn_descriptors/spatial_test_scene.cc
@@ -0,0 +1,147 @@
+#include "experimental/learn_descriptors/spatial_test_scene.hh"
+
+#include <random>
+
+#include "Eigen/Core"
+#include "Eigen/Geometry"
+#include "common/check.hh"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/PinholeCamera.h"
+#include "opencv2/opencv.hpp"
+
+Eigen::Isometry3d isometry_from_vector(const Eigen::VectorXd &pose_vector) {
+    ROBOT_CHECK(pose_vector.size() == 6);
+    Eigen::Isometry3d T = Eigen::Isometry3d::Identity();
+    if (pose_vector.tail<3>().norm() > 1e-6) {
+        T.linear() = Eigen::AngleAxisd(pose_vector.tail<3>().norm(), pose_vector.tail<3>())
+                         .toRotationMatrix();
+    }
+    T.translation() = pose_vector.head<3>();
+    return T;
+}
+
+namespace robot::experimental::learn_descriptors {
+template <typename T>
+std::vector<SpatialTestScene::ProjectedPoint> SpatialTestScene::get_projected_pixels(
+    const gtsam::PinholeCamera<T> &camera, const cv::Size &img_size) {
+    std::vector<ProjectedPoint> pixels;
+    for (size_t i = 0; i < points_.size(); i++) {
+        const Eigen::Vector3d pt = points_[i];
+        Eigen::Vector2d pixel = camera.project(pt);
+        if (ProjectionHelper::pixel_in_range(pixel, img_size.width, img_size.height)) {
+            ProjectedPoint projected_pt(i, pixel);
+            pixels.push_back(projected_pt);
+        }
+    }
+    return pixels;
+};
+
+template <typename T>
+std::vector<std::pair<SpatialTestScene::ProjectedPoint, SpatialTestScene::ProjectedPoint>>
+SpatialTestScene::get_corresponding_pixels(
+    const std::vector<std::pair<gtsam::PinholeCamera<T>, cv::Size>> &cam_and_img_size) {
+    std::vector<std::pair<ProjectedPoint, ProjectedPoint>> corresponding_pixels;
+    for (size_t i = 0; i < points_.size(); i++) {
+        const Eigen::Vector3d pt = points_[i];
+        Eigen::Vector2d pixel_cam0 = cam_and_img_size[0].first.project(pt);
+        Eigen::Vector2d pixel_cam1 = cam_and_img_size[1].first.project(pt);
+        if (ProjectionHelper::pixel_in_range(pixel_cam0, cam_and_img_size[0].second.width,
+                                             cam_and_img_size[0].second.height) &&
+            ProjectionHelper::pixel_in_range(pixel_cam1, cam_and_img_size[1].second.width,
+                                             cam_and_img_size[1].second.height)) {
+            corresponding_pixels.emplace_back(ProjectedPoint(i, pixel_cam0),
+                                              ProjectedPoint(i, pixel_cam1));
+        }
+    }
+    return corresponding_pixels;
+};
+
+void SpatialTestScene::add_point(Eigen::Vector3d point, std::optional<Noise> point_noise) {
+    points_groundtruth_.push_back(point);
+    if (point_noise) {
+        ROBOT_CHECK(point_noise->dim() == 3);
+        points_.push_back(point + point_noise->sample());
+    } else {
+        points_.push_back(point);
+    }
+};
+
+void SpatialTestScene::add_points(std::vector<Eigen::Vector3d> points,
+                                  std::optional<Noise> point_noise) {
+    for (const Eigen::Vector3d &point : points) {
+        points_groundtruth_.push_back(point);
+        if (point_noise) {
+            ROBOT_CHECK(point_noise->dim() == 3);
+            points_.push_back(point + point_noise->sample());
+        } else {
+            points_.push_back(point);
+        }
+    }
+}
+
+void SpatialTestScene::add_camera(gtsam::PinholeCamera<gtsam::Cal3_S2> camera,
+                                  std::optional<Noise> pose_noise) {
+    cameras_groundtruth_.push_back(camera);
+    if (pose_noise) {
+        cameras_.push_back(gtsam::PinholeCamera<gtsam::Cal3_S2>(
+            camera.pose() * gtsam::Pose3(isometry_from_vector(pose_noise->sample()).matrix()),
+            camera.calibration()));
+    } else {
+        cameras_.push_back(camera);
+    }
+}
+
+void SpatialTestScene::add_cameras(std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras,
+                                   std::optional<Noise> pose_noise) {
+    for (const gtsam::PinholeCamera<gtsam::Cal3_S2> &cam : cameras) {
+        cameras_groundtruth_.push_back(cam);
+        if (pose_noise) {
+            cameras_.push_back(gtsam::PinholeCamera<gtsam::Cal3_S2>(
+                cam.pose() * gtsam::Pose3(isometry_from_vector(pose_noise->sample()).matrix()),
+                cam.calibration()));
+        } else {
+            cameras_.push_back(cam);
+        }
+    }
+}
+
+void SpatialTestScene::add_rand_cameras_face_origin(int num_cameras, double min_radius_origin,
+                                                    double max_radius_origin,
+                                                    std::optional<Noise> pose_noise,
+                                                    const gtsam::Cal3_S2 &K) {
+    std::random_device rd;
+    std::mt19937 gen(rd());
+
+    std::uniform_real_distribution<double> dist_radius(min_radius_origin, max_radius_origin);
+    std::uniform_real_distribution<double> dist_omega(0.0, 2 * M_PI);
+
+    const Eigen::Isometry3d world_from_cam_default =
+        Eigen::Translation3d(1.0, 0, 0) * Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d::UnitY()) *
+        Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d::UnitZ());
+    for (int i = 0; i < num_cameras; i++) {
+        double angle_z = dist_omega(gen);
+        Eigen::Isometry3d longitude_from_world = Eigen::Isometry3d::Identity();
+        longitude_from_world.linear() =
+            Eigen::AngleAxisd(angle_z, Eigen::Vector3d::UnitZ()).matrix();
+        double angle_x = dist_omega(gen);
+        Eigen::Isometry3d latitude_from_world = Eigen::Isometry3d::Identity();
+        latitude_from_world.linear() =
+            Eigen::AngleAxisd(angle_x, Eigen::Vector3d::UnitY()).matrix();
+
+        double radius = dist_radius(gen);
+        Eigen::Isometry3d world_from_cam(world_from_cam_default);
+        world_from_cam.translation() *= radius;
+        world_from_cam = longitude_from_world * latitude_from_world * world_from_cam;
+        cameras_groundtruth_.push_back(
+            gtsam::PinholeCamera(gtsam::Pose3(world_from_cam.matrix()), K));
+        if (pose_noise) {
+            cameras_.push_back(gtsam::PinholeCamera(
+                gtsam::Pose3(
+                    (world_from_cam * isometry_from_vector(pose_noise->sample())).matrix()),
+                K));
+        } else {
+            cameras_.push_back(gtsam::PinholeCamera(gtsam::Pose3(world_from_cam.matrix()), K));
+        }
+    }
+}
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/spatial_test_scene.hh b/experimental/learn_descriptors/spatial_test_scene.hh
new file mode 100644
index 000000000..4f542223a
--- /dev/null
+++ b/experimental/learn_descriptors/spatial_test_scene.hh
@@ -0,0 +1,107 @@
+#pragma once
+
+#include <optional>
+#include <random>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "Eigen/Dense"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/PinholeCamera.h"
+#include "opencv2/opencv.hpp"
+
+namespace robot::experimental::learn_descriptors {
+class ProjectionHelper {
+   public:
+    static constexpr size_t img_width_default = 640;
+    static constexpr size_t img_height_default = 480;
+    static constexpr double fx_default = 500.0;
+    static constexpr double fy_default = fx_default;
+    static constexpr double cx_default = img_width_default / 2.0;
+    static constexpr double cy_default = img_height_default / 2.0;
+    static const gtsam::Cal3_S2::shared_ptr &K_default() {
+        static gtsam::Cal3_S2::shared_ptr K(
+            new gtsam::Cal3_S2(fx_default, fy_default, 0.0, cx_default, cy_default));
+        return K;
+    }
+    static bool pixel_in_range(Eigen::Vector2d pixel, size_t img_width, size_t img_height) {
+        return pixel[0] >= 0 && pixel[0] < img_width && pixel[1] >= 0 && pixel[1] < img_height;
+    }
+};
+class SpatialTestScene {
+   public:
+    struct Noise {
+        Noise(const std::vector<double> &sigmas, std::optional<unsigned int> seed = std::nullopt)
+            : sigmas_(sigmas) {
+            if (seed) {
+                gen_.seed(*seed);
+            } else {
+                std::random_device rd;
+                gen_.seed(rd());
+            }
+
+            for (double sigma : sigmas_) {
+                dists_.emplace_back(0.0, sigma);
+            }
+        }
+
+        static Noise constrained(size_t dim = 6) { return Noise(std::vector<double>(dim, 0.0)); }
+
+        Eigen::VectorXd sample() {
+            Eigen::VectorXd result(sigmas_.size());
+            for (size_t i = 0; i < sigmas_.size(); ++i) {
+                result(i) = dists_[i](gen_);
+            }
+            return result;
+        }
+
+        size_t dim() const { return sigmas_.size(); }
+
+       private:
+        std::vector<double> sigmas_;
+        std::vector<std::normal_distribution<double>> dists_;
+        std::mt19937 gen_;
+    };
+    struct ProjectedPoint {
+        ProjectedPoint(const size_t pt_idx, const Eigen::Vector2d &pixel)
+            : pt_idx(pt_idx), pixel(pixel){};
+        size_t pt_idx;
+        Eigen::Vector2d pixel;
+    };
+    SpatialTestScene() = default;
+    SpatialTestScene(std::vector<Eigen::Vector3d> points) : points_(points){};
+    template <typename T>
+    std::vector<ProjectedPoint> get_projected_pixels(const gtsam::PinholeCamera<T> &camera,
+                                                     const cv::Size &img_size);
+    template <typename T>
+    std::vector<std::pair<ProjectedPoint, ProjectedPoint>> get_corresponding_pixels(
+        const std::vector<std::pair<gtsam::PinholeCamera<T>, cv::Size>>
+            &cam_and_img_size);  // make this parameter of size 2
+
+    void add_point(Eigen::Vector3d point, std::optional<Noise> point_noise = std::nullopt);
+    void add_points(std::vector<Eigen::Vector3d> points,
+                    std::optional<Noise> point_noise = std::nullopt);
+    void add_camera(gtsam::PinholeCamera<gtsam::Cal3_S2> camera,
+                    std::optional<Noise> pose_noise = std::nullopt);
+    void add_cameras(std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras,
+                     std::optional<Noise> pose_noise = std::nullopt);
+    void add_rand_cameras_face_origin(int num_cameras, double min_radius_origin,
+                                      double max_radius_origin,
+                                      std::optional<Noise> pose_noise = std::nullopt,
+                                      const gtsam::Cal3_S2 &K = *ProjectionHelper::K_default());
+
+    const std::vector<Eigen::Vector3d> &points() { return points_; };
+    const std::vector<Eigen::Vector3d> &points_groundtruth() { return points_groundtruth_; };
+    const std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> &cameras() { return cameras_; };
+    const std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> &cameras_groundtruth() {
+        return cameras_groundtruth_;
+    };
+
+   protected:
+    std::vector<Eigen::Vector3d> points_;
+    std::vector<Eigen::Vector3d> points_groundtruth_;
+    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras_;
+    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras_groundtruth_;
+};
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/spatial_test_scene_cube.hh b/experimental/learn_descriptors/spatial_test_scene_cube.hh
new file mode 100644
index 000000000..0d74262ac
--- /dev/null
+++ b/experimental/learn_descriptors/spatial_test_scene_cube.hh
@@ -0,0 +1,20 @@
+#pragma once
+
+#include "Eigen/Core"
+#include "experimental/learn_descriptors/spatial_test_scene.hh"
+
+namespace robot::experimental::learn_descriptors {
+class SpatialTestSceneCube : public SpatialTestScene {
+   public:
+    SpatialTestSceneCube(const float cube_size) : SpatialTestScene() {
+        add_point(Eigen::Vector3d(-cube_size / 2, -cube_size / 2, -cube_size / 2));
+        add_point(Eigen::Vector3d(-cube_size / 2, cube_size / 2, -cube_size / 2));
+        add_point(Eigen::Vector3d(cube_size / 2, -cube_size / 2, -cube_size / 2));
+        add_point(Eigen::Vector3d(cube_size / 2, cube_size / 2, -cube_size / 2));
+        add_point(Eigen::Vector3d(-cube_size / 2, -cube_size / 2, cube_size / 2));
+        add_point(Eigen::Vector3d(-cube_size / 2, cube_size / 2, cube_size / 2));
+        add_point(Eigen::Vector3d(cube_size / 2, -cube_size / 2, cube_size / 2));
+        add_point(Eigen::Vector3d(cube_size / 2, cube_size / 2, cube_size / 2));
+    };
+};
+}  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/spatial_test_scene_cube_example.cc b/experimental/learn_descriptors/spatial_test_scene_cube_example.cc
new file mode 100644
index 000000000..5ba850db7
--- /dev/null
+++ b/experimental/learn_descriptors/spatial_test_scene_cube_example.cc
@@ -0,0 +1,46 @@
+#include <string>
+#include <vector>
+
+#include "Eigen/Dense"
+#include "common/check.hh"
+#include "experimental/learn_descriptors/spatial_test_scene_cube.hh"
+#include "gtsam/geometry/Cal3_S2.h"
+#include "gtsam/geometry/PinholeCamera.h"
+#include "visualization/opencv/opencv_viz.hh"
+
+using namespace robot::experimental::learn_descriptors;
+
+int main() {
+    SpatialTestSceneCube test_scene(1.0f);
+
+    const size_t img_width = 640;
+    const size_t img_height = 480;
+    const double fx = 500.0;
+    const double fy = fx;
+    const double cx = static_cast<double>(img_width) / 2.0;
+    const double cy = static_cast<double>(img_height) / 2.0;
+    gtsam::Cal3_S2::shared_ptr K(new gtsam::Cal3_S2(fx, fy, 0, cx, cy));
+
+    SpatialTestScene::Noise pose_noise({0.05, 0.05, 0.05, 0.15, 0.15, 0.15});
+    test_scene.add_rand_cameras_face_origin(7, 2.0, 6.0, pose_noise, *K);
+
+    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras = test_scene.cameras();
+    std::vector<gtsam::PinholeCamera<gtsam::Cal3_S2>> cameras_grndtrth =
+        test_scene.cameras_groundtruth();
+    ROBOT_CHECK(cameras.size() == cameras_grndtrth.size());
+    std::vector<robot::visualization::VizPose> viz_cam_poses;
+    viz_cam_poses.reserve(cameras.size() + cameras_grndtrth.size());
+    for (size_t i = 0; i < cameras.size(); i++) {
+        viz_cam_poses.emplace_back(Eigen::Isometry3d(cameras[i].pose().matrix()),
+                                   "cam_" + std::to_string(i));
+        viz_cam_poses.emplace_back(Eigen::Isometry3d(cameras_grndtrth[i].pose().matrix()),
+                                   "cam_grnd_" + std::to_string(i));
+    }
+    ROBOT_CHECK(test_scene.points().size() == test_scene.points_groundtruth().size());
+    std::vector<robot::visualization::VizPoint> viz_points;
+    for (size_t i = 0; i < test_scene.points().size(); i++) {
+        viz_points.emplace_back(test_scene.points()[i], "pt_" + std::to_string(i));
+        viz_points.emplace_back(test_scene.points_groundtruth()[i], "pt_grnd_" + std::to_string(i));
+    }
+    robot::visualization::viz_scene(viz_cam_poses, viz_points);
+}
\ No newline at end of file
diff --git a/experimental/learn_descriptors/structure_from_motion.cc b/experimental/learn_descriptors/structure_from_motion.cc
new file mode 100644
index 000000000..ae46c2966
--- /dev/null
+++ b/experimental/learn_descriptors/structure_from_motion.cc
@@ -0,0 +1,84 @@
+#include "experimental/learn_descriptors/structure_from_motion.hh"
+
+#include <filesystem>
+#include <memory>
+#include <ostream>
+#include <sstream>
+#include <stdexcept>
+
+#include "common/check.hh"
+#include "common/geometry/camera.hh"
+#include "common/geometry/translate_types.hh"
+#include "experimental/learn_descriptors/backend.hh"
+#include "experimental/learn_descriptors/frame.hh"
+#include "experimental/learn_descriptors/frontend.hh"
+#include "gtsam/geometry/Point2.h"
+#include "opencv2/opencv.hpp"
+#include "opencv2/viz.hpp"
+#include "visualization/opencv/opencv_viz.hh"
+
+namespace robot::experimental::learn_descriptors {
+void StructureFromMotion::add_image_point(const cv::Mat &img_undistorted,
+                                          const SharedImagePoint &image_point) {
+    frontend_.add_image(ImageAndPoint{img_undistorted, image_point});
+}
+void StructureFromMotion::add_image_points(
+    const std::vector<cv::Mat> &imgs_undistorted,
+    const std::vector<SharedImagePoint> &shared_image_points) {
+    ROBOT_CHECK(imgs_undistorted.size() == shared_image_points.size());
+    std::vector<ImageAndPoint> img_and_pts;
+    img_and_pts.reserve(shared_image_points.size());
+    for (size_t i = 0; i < shared_image_points.size(); i++) {
+        img_and_pts.emplace_back(imgs_undistorted[i], shared_image_points[i]);
+    }
+    frontend_.add_images(img_and_pts);
+}
+
+void StructureFromMotion::solve_structure(
+    const int num_steps, std::optional<Backend::graph_step_debug_func> iter_debug_func) {
+    // FRONTEND
+    frontend_.populate_frames();
+    ROBOT_CHECK(frontend_.frames().size() > 2);
+    frontend_.match_frames_and_build_tracks();
+    ROBOT_CHECK(frontend_.frames().front()->world_from_cam_groundtruth_,
+                "Assuming that our first frame has groundtruth for now.",
+                *frontend_.frames().front()->world_from_cam_groundtruth_);
+    // align rotation via the initial guess
+    frontend_.frames().front()->world_from_cam_initial_guess_ =
+        frontend_.frames().front()->world_from_cam_groundtruth_->rotation();
+
+    // BACKEND
+    backend_.clear();
+    backend_.add_frames(frontend_.frames());
+    backend_.calculate_initial_values();
+    backend_.populate_graph(frontend_.feature_tracks());
+    graph_values(backend_.current_initial_values(), "viz init values");
+    backend_.solve_graph(10, iter_debug_func);
+}
+
+void StructureFromMotion::graph_values(const gtsam::Values &values, const std::string &window_name,
+                                       const cv::viz::Color color) {
+    std::vector<robot::visualization::VizPose> final_poses;
+    std::vector<robot::visualization::VizPoint> final_lmks;
+    for (const auto &key_value : values) {
+        gtsam::Key key = key_value.key;
+        gtsam::Symbol symbol(key);
+        try {
+            const gtsam::Pose3 &pose = values.at<gtsam::Pose3>(key);
+            final_poses.emplace_back(Eigen::Isometry3d(pose.matrix()), symbol.string());
+            continue;
+        } catch (const gtsam::ValuesIncorrectType &) {
+        }
+
+        try {
+            const gtsam::Point3 &pt = values.at<gtsam::Point3>(key);
+            final_lmks.emplace_back(pt, symbol.string());
+            continue;
+        } catch (const gtsam::ValuesIncorrectType &) {
+        }
+
+        std::cerr << "Key " << symbol << " is neither Pose3 nor Point3.\n";
+    }
+    robot::visualization::viz_scene(final_poses, final_lmks, color, true, true, window_name);
+}
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/structure_from_motion.hh b/experimental/learn_descriptors/structure_from_motion.hh
new file mode 100644
index 000000000..93ffe0ddb
--- /dev/null
+++ b/experimental/learn_descriptors/structure_from_motion.hh
@@ -0,0 +1,48 @@
+#pragma once
+
+#include <functional>
+#include <map>
+#include <memory>
+#include <optional>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <utility>
+
+#include "Eigen/Dense"
+#include "common/geometry/camera.hh"
+#include "experimental/learn_descriptors/backend.hh"
+#include "experimental/learn_descriptors/frame.hh"
+#include "experimental/learn_descriptors/frontend.hh"
+#include "experimental/learn_descriptors/image_point.hh"
+#include "gtsam/geometry/Pose3.h"
+#include "gtsam/inference/Symbol.h"
+#include "gtsam/nonlinear/Values.h"
+#include "opencv2/opencv.hpp"
+#include "opencv2/viz.hpp"
+
+namespace robot::experimental::learn_descriptors {
+class StructureFromMotion {
+   public:
+    StructureFromMotion(FrontendParams frontend_params) : frontend_(Frontend(frontend_params)){};
+
+    static void graph_values(const gtsam::Values &values,
+                             const std::string &window_name = "Viz Structure",
+                             const cv::viz::Color color = cv::viz::Color::brown());
+
+    void add_image_point(const cv::Mat &img_undistorted, const SharedImagePoint &image_point);
+    void add_image_points(const std::vector<cv::Mat> &imgs_undistorted,
+                          const std::vector<SharedImagePoint> &image_points);
+    void solve_structure(
+        const int num_steps,
+        std::optional<Backend::graph_step_debug_func> iter_debug_func = std::nullopt);
+    const gtsam::Values &result() { return backend_.result(); };
+
+    Frontend frontend() { return frontend_; };
+    Backend backend() { return backend_; }
+
+   private:
+    Frontend frontend_;
+    Backend backend_;
+};
+}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/structure_from_motion_example.cc b/experimental/learn_descriptors/structure_from_motion_example.cc
new file mode 100644
index 000000000..2dce0047e
--- /dev/null
+++ b/experimental/learn_descriptors/structure_from_motion_example.cc
@@ -0,0 +1,75 @@
+#include <cstddef>
+#include <functional>
+#include <memory>
+#include <ostream>
+
+#include "Eigen/Core"
+#include "Eigen/Geometry"
+#include "cxxopts.hpp"
+#include "experimental/learn_descriptors/backend.hh"
+#include "experimental/learn_descriptors/four_seasons_parser.hh"
+#include "experimental/learn_descriptors/frame.hh"
+#include "experimental/learn_descriptors/frontend.hh"
+#include "experimental/learn_descriptors/frontend_definitions.hh"
+#include "experimental/learn_descriptors/image_point_four_seasons.hh"
+#include "experimental/learn_descriptors/structure_from_motion.hh"
+#include "experimental/learn_descriptors/structure_from_motion_types.hh"
+#include "opencv2/opencv.hpp"
+#include "visualization/opencv/opencv_viz.hh"
+
+int main(int argc, const char **argv) {
+    using namespace robot::experimental::learn_descriptors;
+    // clang-format off
+    cxxopts::Options options("four_seasons_parser_example", "Demonstrate usage of four_seasons_parser");
+    options.add_options()
+    ("data_dir", "Path to dataset root directory", cxxopts::value<std::string>())
+    ("calibration_dir", "Path to dataset calibration directory", cxxopts::value<std::string>())
+    ("help", "Print usage");
+    // clang-format on
+
+    auto args = options.parse(argc, argv);
+
+    const auto check_required = [&](const std::string &opt) {
+        if (args.count(opt) == 0) {
+            std::cout << "Missing " << opt << " argument" << std::endl;
+            std::cout << options.help() << std::endl;
+            std::exit(1);
+        }
+    };
+
+    if (args.count("help")) {
+        std::cout << options.help() << std::endl;
+        return 0;
+    }
+
+    check_required("data_dir");
+    check_required("calibration_dir");
+
+    const std::filesystem::path path_data = args["data_dir"].as<std::string>();
+    const std::filesystem::path path_calibration = args["calibration_dir"].as<std::string>();
+    FourSeasonsParser parser(path_data, path_calibration);
+
+    FrontendParams frontend_params{FrontendParams::ExtractorType::SIFT,
+                                   FrontendParams::MatcherType::KNN, true, false};
+    StructureFromMotion sfm(frontend_params);
+
+    // short sequences (maybe about a doezen points that have decent overlap and aren't spaced too
+    // far apart on the order of maybe a meter) perform ok. many improvements needed for the overall
+    // system
+    for (size_t i = 835; i < 855; i += 2) {
+        const ImagePointFourSeasons img_pt = parser.get_image_point(i);
+        sfm.add_image_point(parser.load_image(i), std::make_shared<ImagePointFourSeasons>(img_pt));
+    }
+
+    using epoch = size_t;
+    Backend::graph_step_debug_func graph_itr_debug_func = [&](const gtsam::Values &vals,
+                                                              const epoch iter) {
+        std::cout << "iteration " << iter << " complete!";
+        std::string window_name = "Iteration_" + std::to_string(iter);
+        StructureFromMotion::graph_values(vals, window_name);
+    };
+    // passing graph_itr_debug_func will execute the function after each optimization epoch
+    // sfm.solve_structure(10, graph_itr_debug_func);
+    sfm.solve_structure(10);
+    StructureFromMotion::graph_values(sfm.result());
+}
\ No newline at end of file
diff --git a/experimental/learn_descriptors/structure_from_motion_types.hh b/experimental/learn_descriptors/structure_from_motion_types.hh
new file mode 100644
index 000000000..e8d0595bc
--- /dev/null
+++ b/experimental/learn_descriptors/structure_from_motion_types.hh
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <cstddef>
+#include <vector>
+
+#include "gtsam/geometry/Point3.h"
+#include "opencv2/opencv.hpp"
+
+using Timestamp = size_t;
+
+using FrameId = size_t;  // Frame id is used as the index of gtsam symbol
+                         // (not as a gtsam key).
+using LandmarkId = size_t;
+using LandmarkIds = std::vector<LandmarkId>;
+using Landmark = gtsam::Point3;
+using Landmarks = std::vector<Landmark, Eigen::aligned_allocator<Landmark>>;
+
+using KeypointCV = cv::Point2f;
+using KeypointsCV = std::vector<cv::Point2f>;
diff --git a/experimental/learn_descriptors/symphony_lake_parser.cc b/experimental/learn_descriptors/symphony_lake_parser.cc
index 5f93ab174..f06ebd116 100644
--- a/experimental/learn_descriptors/symphony_lake_parser.cc
+++ b/experimental/learn_descriptors/symphony_lake_parser.cc
@@ -5,11 +5,72 @@
 #include "common/check.hh"
 
 namespace robot::experimental::learn_descriptors {
+const Eigen::Vector3d DataParser::t_boat_cam(-0.430, 0, 0.255);
+
+const Eigen::Isometry3d DataParser::T_boat_gps(
+    (Eigen::Matrix4d() << 1, 0, 0, 0.080, 0, 1, 0, -0.1183, 0, 0, 1, 0.010, 0, 0, 0, 1).finished());
+
+const Eigen::Isometry3d DataParser::T_boat_imu = []() {
+    Eigen::Isometry3d T_boat_imu;
+    T_boat_imu.translation() = Eigen::Vector3d(-0.494, 0, -0.089);
+    T_boat_imu.linear() = Eigen::AngleAxisd(M_PI, Eigen::Vector3d::UnitX()).matrix();
+    return T_boat_imu;
+}();
+
 DataParser::DataParser(const std::filesystem::path &image_root_dir,
-                       const std::vector<std::string> &survey_list) {
+                       const std::vector<std::string> &survey_list)
+    : image_root_dir_(image_root_dir), survey_list_(survey_list) {
     ROBOT_CHECK(std::filesystem::exists(image_root_dir), "Image root dir does not exist!",
                 image_root_dir);
     surveys_.load(image_root_dir.string(), survey_list);
 }
 DataParser::~DataParser() {}
+
+const Eigen::Isometry3d DataParser::get_boat_from_camera(
+    const symphony_lake_dataset::ImagePoint &img_pt) {
+    return get_boat_from_camera(img_pt.pan, img_pt.tilt);
+}
+const Eigen::Isometry3d DataParser::get_boat_from_camera(double theta_pan, double theta_tilt) {
+    Eigen::AngleAxisd R_z(-theta_pan, Eigen::Vector3d::UnitZ());   // pan
+    Eigen::AngleAxisd R_y(-theta_tilt, Eigen::Vector3d::UnitY());  // tilt
+    Eigen::Matrix3d R_boat_gimbal(R_z * R_y);
+
+    Eigen::Matrix3d R_gimbal_camera = (Eigen::AngleAxisd(-M_PI_2, Eigen::Vector3d::UnitX()) *
+                                       Eigen::AngleAxisd(M_PI_2, Eigen::Vector3d::UnitY()))
+                                          .matrix();
+    Eigen::Matrix3d R_boat_camera(R_boat_gimbal * R_gimbal_camera);
+
+    Eigen::Isometry3d T_boat_cam;
+    T_boat_cam.translation() = t_boat_cam;
+    T_boat_cam.linear() = R_boat_camera;
+    return T_boat_cam;
+}
+
+const Eigen::Isometry3d DataParser::get_world_from_gps(
+    const symphony_lake_dataset::ImagePoint &img_pt) {
+    Eigen::Isometry3d world_from_gps;
+    // img_pt.theta is in the North East Down frame, not lattitude longitude coords. However,
+    // because of the alignment of the imu and gps, this rotation math holds.
+    world_from_gps.linear() = Eigen::AngleAxisd(img_pt.theta, Eigen::Vector3d::UnitZ()).matrix();
+    world_from_gps.translation() = Eigen::Vector3d(img_pt.x, img_pt.y, 0);
+    return world_from_gps;
+}
+
+const Eigen::Isometry3d DataParser::get_world_from_boat(
+    const symphony_lake_dataset::ImagePoint &img_pt) {
+    // Eigen::Isometry3d world_from_gps;
+
+    // Eigen::Matrix3d R_world_imu = get_R_world_imu(img_pt.theta);
+    // Eigen::Isometry3d T_imu_gps = T_boat_imu.inverse() * T_boat_gps;
+    // Eigen::Matrix3d R_world_gps = R_world_imu * T_imu_gps.linear();
+
+    // world_from_gps.linear() = R_world_gps;
+    // world_from_gps.translation() = Eigen::Vector3d(img_pt.x, img_pt.y, 0);
+
+    // Eigen::Isometry3d world_from_boat = world_from_gps * T_boat_gps.inverse();
+
+    Eigen::Isometry3d world_from_gps = get_world_from_gps(img_pt);
+    Eigen::Isometry3d world_from_boat = world_from_gps * T_boat_gps.inverse();
+    return world_from_boat;
+}
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/symphony_lake_parser.hh b/experimental/learn_descriptors/symphony_lake_parser.hh
index 021ebd3f2..536771d06 100644
--- a/experimental/learn_descriptors/symphony_lake_parser.hh
+++ b/experimental/learn_descriptors/symphony_lake_parser.hh
@@ -1,21 +1,75 @@
 #pragma once
-#pragma once
-#include <string.h>
-
+#include <coroutine>
 #include <filesystem>
+#include <iostream>
+#include <string>
+#include <vector>
 
+#include "Eigen/Dense"
+#include "symphony_lake_dataset/ImagePoint.h"
 #include "symphony_lake_dataset/SurveyVector.h"
 
 namespace robot::experimental::learn_descriptors {
+class SymphonyLakeCamParams {
+   public:
+    constexpr static double fx = 759.308, fy = 690.44;
+    constexpr static double px = 370.915, py = 250.909;
+
+    constexpr static double k1 = -0.302805, k2 = 0.171088, k3 = 0.0;
+    constexpr static double p1 = 0.001151, p2 = -0.00038;
+};
 class DataParser {
    public:
+    static const Eigen::Vector3d t_boat_cam;
+    static const Eigen::Isometry3d T_boat_gps;
+    static const Eigen::Isometry3d T_boat_imu;
+
     DataParser(const std::filesystem::path &image_root_dir,
                const std::vector<std::string> &survey_list);
     ~DataParser();
 
+    // Eigen::Affine3d get_T_world_camera(size_t survey_idx, size_t image_idx, bool use_gps = false,
+    //                                    bool use_compass = false);
+    static const Eigen::Isometry3d get_boat_from_camera(
+        const symphony_lake_dataset::ImagePoint &img_pt);
+    static const Eigen::Isometry3d get_boat_from_camera(double theta_pan, double theta_tilt);
+    static const Eigen::Isometry3d get_world_from_gps(
+        const symphony_lake_dataset::ImagePoint &img_pt);
+    /// @brief get_world_from_boat assuming z_axis_boat dot z_axis_world ~ -1
+    /// @param img_pt
+    /// @return world_from_boat
+    static const Eigen::Isometry3d get_world_from_boat(
+        const symphony_lake_dataset::ImagePoint &img_pt);
+
     const symphony_lake_dataset::SurveyVector &get_surveys() const { return surveys_; };
 
    private:
+    std::filesystem::path image_root_dir_;
+    std::vector<std::string> survey_list_;
     symphony_lake_dataset::SurveyVector surveys_;
 };
+class SymphonyLakeDatasetTestHelper {
+   public:
+    static constexpr const char *test_image_root_dir =
+        "external/symphony_lake_snippet/symphony_lake";
+    static constexpr std::array<const char *, 1> test_survey_list = {"140106_snippet"};
+    static bool images_equal(cv::Mat img1, cv::Mat img2) {
+        if (img1.size() != img2.size() || img1.type() != img2.type()) {
+            return false;
+        }
+        cv::Mat diff;
+        cv::absdiff(img1, img2, diff);
+        diff = diff.reshape(1);
+        return cv::countNonZero(diff) == 0;
+    };
+    static DataParser get_test_parser() {
+        return DataParser(get_test_iamge_root_dir(), get_test_survey_list());
+    };
+    static std::filesystem::path get_test_iamge_root_dir() {
+        return std::filesystem::path(test_image_root_dir);
+    };
+    static std::vector<std::string> get_test_survey_list() {
+        return std::vector<std::string>(test_survey_list.begin(), test_survey_list.end());
+    };
+};
 }  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/symphony_lake_parser_test.cc b/experimental/learn_descriptors/symphony_lake_parser_test.cc
index 93caa7a10..a8c309c4e 100644
--- a/experimental/learn_descriptors/symphony_lake_parser_test.cc
+++ b/experimental/learn_descriptors/symphony_lake_parser_test.cc
@@ -6,31 +6,17 @@
 #include <string>
 #include <vector>
 
+#include "common/check.hh"
 #include "gtest/gtest.h"
 #include "opencv2/opencv.hpp"
+#include "visualization/opencv/opencv_viz.hh"
 
 namespace robot::experimental::learn_descriptors {
 namespace {
 bool is_test() { return std::getenv("BAZEL_TEST") != nullptr; }
 }  // namespace
-
-class SymphonyLakeDatasetTestHelper {
-   public:
-    static bool images_equal(cv::Mat img1, cv::Mat img2) {
-        if (img1.size() != img2.size() || img1.type() != img2.type()) {
-            return false;
-        }
-        cv::Mat diff;
-        cv::absdiff(img1, img2, diff);
-        diff = diff.reshape(1);
-        return cv::countNonZero(diff) == 0;
-    }
-};
 TEST(SymphonyLakeParserTest, snippet_140106) {
-    const std::filesystem::path image_root_dir = "external/symphony_lake_snippet/symphony_lake";
-    const std::vector<std::string> survey_list{"140106_snippet"};
-
-    DataParser data_parser = DataParser(image_root_dir, survey_list);
+    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
 
     const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
 
@@ -43,6 +29,9 @@ TEST(SymphonyLakeParserTest, snippet_140106) {
     for (int i = 0; i < static_cast<int>(survey_vector.getNumSurveys()); i++) {
         const symphony_lake_dataset::Survey &survey = survey_vector.get(i);
         for (int j = 0; j < static_cast<int>(survey.getNumImages()); j++) {
+            // img_point contains all the csv data entries
+            const symphony_lake_dataset::ImagePoint img_point = survey.getImagePoint(j);
+            (void)img_point;  // suppress unused variable
             image = survey.loadImageByImageIndex(j);
 
             // get the target image
@@ -52,15 +41,87 @@ TEST(SymphonyLakeParserTest, snippet_140106) {
             std::string target_img_name_str = target_img_name.str();
             target_img_name_str.replace(0, target_img_name_str.size() - target_img_name_length, "");
             std::filesystem::path target_img_dir =
-                image_root_dir / survey_list[i] / "0027" / target_img_name_str;
+                SymphonyLakeDatasetTestHelper::get_test_iamge_root_dir() /
+                SymphonyLakeDatasetTestHelper::get_test_survey_list()[i] / "0027" /
+                target_img_name_str;
             target_img = cv::imread(target_img_dir.string());
 
             EXPECT_TRUE(SymphonyLakeDatasetTestHelper::images_equal(image, target_img));
-            if (!is_test()) {
-                cv::imshow("Symphony Dataset Image", image);
-                cv::waitKey(2);
-            }
+            // if (!is_test()) {
+            //     cv::imshow("Symphony Dataset Image", image);
+            //     cv::waitKey(2);
+            // }
+            // cv::imshow("Symphony Dataset Image", image);
+            // cv::waitKey(200);
+        }
+    }
+}
+
+TEST(SymphonyLakeParserTest, test_cam_frames) {
+    const std::vector<int> indices = []() {
+        std::vector<int> tmp;
+        for (int i = 0; i < 200; i += 10) {
+            tmp.push_back(i);
+        }
+        return tmp;
+    }();
+    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
+    const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
+    const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
+    const symphony_lake_dataset::ImagePoint image_point_first =
+        survey.getImagePoint(indices.front());
+
+    std::vector<Eigen::Isometry3d> cam_frames;
+
+    // NOTE: the world in these images is east, north, up centered at boat0 translation
+    Eigen::Vector3d t_world_boat0 =
+        DataParser::get_world_from_boat(image_point_first).translation();
+
+    for (size_t i = 0; i < indices.size(); i++) {
+        const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(indices[i]);
+        Eigen::Isometry3d T_world_boatidx = DataParser::get_world_from_boat(img_pt);
+        Eigen::Isometry3d T_boatidx_camidx =
+            DataParser::get_boat_from_camera(img_pt);  // current boat to current camera
+
+        Eigen::Isometry3d T_world_camidx = T_world_boatidx * T_boatidx_camidx;
+        T_world_camidx.translation() -= t_world_boat0;
+
+        cam_frames.push_back(T_world_camidx);
+    }
+
+    // geometry::viz_scene(cam_frames, std::vector<Eigen::Vector3d>(), cv::viz::Color::black(),
+    // true, true, "test_cam_frames");
+}
+
+TEST(SymphonyLakeParserTest, test_gps_frames) {
+    const std::vector<int> indices = []() {
+        std::vector<int> tmp;
+        for (int i = 0; i < 200; i += 10) {
+            tmp.push_back(i);
         }
+        return tmp;
+    }();
+    DataParser data_parser = SymphonyLakeDatasetTestHelper::get_test_parser();
+    const symphony_lake_dataset::SurveyVector &survey_vector = data_parser.get_surveys();
+    const symphony_lake_dataset::Survey &survey = survey_vector.get(0);
+    const symphony_lake_dataset::ImagePoint image_point_first =
+        survey.getImagePoint(indices.front());
+
+    std::vector<Eigen::Isometry3d> gps_frames;
+
+    // NOTE: the world in these images is east, north, up centered at boat0 translation
+    Eigen::Vector3d t_world_boat0 =
+        DataParser::get_world_from_boat(image_point_first).translation();
+    // Eigen::Vector3d t_world_gps0(image_point_first.x, image_point_first.y, 0);
+
+    for (size_t i = 0; i < indices.size(); i++) {
+        const symphony_lake_dataset::ImagePoint img_pt = survey.getImagePoint(indices[i]);
+        Eigen::Isometry3d T_world_gpsidx = DataParser::get_world_from_gps(img_pt);
+        T_world_gpsidx.translation() -= t_world_boat0;
+        gps_frames.push_back(T_world_gpsidx);
     }
+
+    // geometry::viz_scene(gps_frames, std::vector<Eigen::Vector3d>(), cv::viz::Color::black(),
+    // true, true, "test_gps_frames");
 }
 }  // namespace robot::experimental::learn_descriptors
diff --git a/experimental/learn_descriptors/visual_odometry.cc b/experimental/learn_descriptors/visual_odometry.cc
deleted file mode 100644
index 360473208..000000000
--- a/experimental/learn_descriptors/visual_odometry.cc
+++ /dev/null
@@ -1,118 +0,0 @@
-#include "experimental/learn_descriptors/visual_odometry.hh"
-
-namespace robot::experimental::learn_descriptors {
-VisualOdometry::VisualOdometry(Frontend::ExtractorType frontend_extractor,
-                               Frontend::MatcherType frontend_matcher) {
-    frontend_ = Frontend(frontend_extractor, frontend_matcher);
-}
-
-Frontend::Frontend(ExtractorType frontend_algorithm, MatcherType frontend_matcher) {
-    extractor_type_ = frontend_algorithm;
-    matcher_type_ = frontend_matcher;
-
-    switch (extractor_type_) {
-        case ExtractorType::SIFT:
-            feature_extractor_ = cv::SIFT::create();
-            break;
-        case ExtractorType::ORB:
-            feature_extractor_ = cv::ORB::create();
-            break;
-        default:
-            // Error handling needed?
-            break;
-    }
-    switch (matcher_type_) {
-        case MatcherType::BRUTE_FORCE:
-            descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2);
-            break;
-        case MatcherType::KNN:
-            descriptor_matcher_ = cv::BFMatcher::create(cv::NORM_L2);
-            break;
-        case MatcherType::FLANN:
-            if (frontend_algorithm == ExtractorType::ORB) {
-                throw std::invalid_argument("FLANN can not be used with ORB.");
-            }
-            descriptor_matcher_ = cv::FlannBasedMatcher::create();
-            break;
-        default:
-            // Error handling needed?
-            break;
-    }
-}
-
-std::pair<std::vector<cv::KeyPoint>, cv::Mat> Frontend::get_keypoints_and_descriptors(
-    const cv::Mat &img) const {
-    std::vector<cv::KeyPoint> keypoints;
-    cv::Mat descriptors;
-    switch (extractor_type_) {
-        default:  // the opencv extractors have the same function signature
-            feature_extractor_->detectAndCompute(img, cv::noArray(), keypoints, descriptors);
-            break;
-    }
-    return std::pair<std::vector<cv::KeyPoint>, cv::Mat>(keypoints, descriptors);
-}
-
-std::vector<cv::DMatch> Frontend::get_matches(const cv::Mat &descriptors1,
-                                              const cv::Mat &descriptors2) const {
-    std::vector<cv::DMatch> matches;
-    switch (matcher_type_) {
-        case MatcherType::BRUTE_FORCE:
-            get_brute_matches(descriptors1, descriptors2, matches);
-            break;
-        case MatcherType::KNN:
-            get_KNN_matches(descriptors1, descriptors2, matches);
-            break;
-        case MatcherType::FLANN:
-            get_FLANN_matches(descriptors1, descriptors2, matches);
-            break;
-        default:
-            break;
-    }
-    std::sort(matches.begin(), matches.end());
-    return matches;
-}
-
-bool Frontend::get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                                 std::vector<cv::DMatch> &matches_out) const {
-    if (matcher_type_ != MatcherType::BRUTE_FORCE) {
-        return false;
-    }
-    matches_out.clear();
-    descriptor_matcher_->match(descriptors1, descriptors2, matches_out);
-    return true;
-}
-
-bool Frontend::get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                               std::vector<cv::DMatch> &matches_out) const {
-    if (matcher_type_ != MatcherType::KNN) {
-        return false;
-    }
-    std::vector<std::vector<cv::DMatch>> knn_matches;
-    descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2);
-    const float ratio_thresh = 0.7f;
-    matches_out.clear();
-    for (size_t i = 0; i < knn_matches.size(); i++) {
-        if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) {
-            matches_out.push_back(knn_matches[i][0]);
-        }
-    }
-    return true;
-}
-
-bool Frontend::get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                                 std::vector<cv::DMatch> &matches_out) const {
-    if (matcher_type_ != MatcherType::FLANN) {
-        return false;
-    }
-    std::vector<std::vector<cv::DMatch>> knn_matches;
-    descriptor_matcher_->knnMatch(descriptors1, descriptors2, knn_matches, 2);
-    const float ratio_thresh = 0.7f;
-    matches_out.clear();
-    for (size_t i = 0; i < knn_matches.size(); i++) {
-        if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) {
-            matches_out.push_back(knn_matches[i][0]);
-        }
-    }
-    return true;
-}
-}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/visual_odometry.hh b/experimental/learn_descriptors/visual_odometry.hh
deleted file mode 100644
index b64434d2f..000000000
--- a/experimental/learn_descriptors/visual_odometry.hh
+++ /dev/null
@@ -1,58 +0,0 @@
-#pragma once
-
-#include <opencv2/opencv.hpp>
-
-namespace robot::experimental::learn_descriptors {
-class Frontend {
-   public:
-    enum class ExtractorType { SIFT, ORB };
-    enum class MatcherType { BRUTE_FORCE, KNN, FLANN };
-
-    Frontend(){};
-    Frontend(ExtractorType frontend_extractor, MatcherType frontend_matcher);
-    ~Frontend(){};
-
-    ExtractorType get_extractor_type() const { return extractor_type_; };
-    MatcherType get_matcher_type() const { return matcher_type_; };
-
-    std::pair<std::vector<cv::KeyPoint>, cv::Mat> get_keypoints_and_descriptors(
-        const cv::Mat &img) const;
-    std::vector<cv::DMatch> get_matches(const cv::Mat &descriptors1,
-                                        const cv::Mat &descriptors2) const;
-
-    static void draw_keypoints(const cv::Mat &img, std::vector<cv::KeyPoint> keypoints,
-                               cv::Mat img_keypoints_out) {
-        cv::drawKeypoints(img, keypoints, img_keypoints_out, cv::Scalar::all(-1),
-                          cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS);
-    }
-    static void draw_matches(const cv::Mat &img1, std::vector<cv::KeyPoint> keypoints1,
-                             const cv::Mat &img2, std::vector<cv::KeyPoint> keypoints2,
-                             std::vector<cv::DMatch> matches, cv::Mat img_matches_out) {
-        cv::drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches_out);
-    }
-
-   private:
-    bool get_brute_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                           std::vector<cv::DMatch> &matches_out) const;
-    bool get_KNN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                         std::vector<cv::DMatch> &matches_out) const;
-    bool get_FLANN_matches(const cv::Mat &descriptors1, const cv::Mat &descriptors2,
-                           std::vector<cv::DMatch> &matches_out) const;
-    ExtractorType extractor_type_;
-    MatcherType matcher_type_;
-
-    cv::Ptr<cv::Feature2D> feature_extractor_;
-    cv::Ptr<cv::DescriptorMatcher> descriptor_matcher_;
-};
-class VisualOdometry {
-   public:
-    VisualOdometry(Frontend::ExtractorType frontend_extractor,
-                   Frontend::MatcherType frontend_matcher = Frontend::MatcherType::KNN);
-    ~VisualOdometry(){};
-
-   private:
-    cv::Mat prev_image_;
-
-    Frontend frontend_;
-};
-}  // namespace robot::experimental::learn_descriptors
\ No newline at end of file
diff --git a/experimental/learn_descriptors/visual_odometry_test.cc b/experimental/learn_descriptors/visual_odometry_test.cc
deleted file mode 100644
index b1803317e..000000000
--- a/experimental/learn_descriptors/visual_odometry_test.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-#include "experimental/learn_descriptors/visual_odometry.hh"
-
-#include <iostream>
-#include <sstream>
-
-#include "gtest/gtest.h"
-
-namespace robot::experimental::learn_descriptors {
-TEST(VIO_TEST, frontend_pipeline_sweep) {
-    const size_t width = 640;
-    const size_t height = 480;
-
-    const size_t pixel_shift_x = 20;
-    const size_t PIXEL_COMP_TOL = 20;
-
-    cv::Mat image_1 = cv::Mat::zeros(height, width, CV_8UC3);
-    cv::Mat image_2;
-
-    cv::Mat translation_mat = (cv::Mat_<double>(2, 3) << 1, 0, pixel_shift_x, 0, 1, 0);
-    cv::Point rect_points[4] = {{150, 200}, {350, 200}, {350, 300}, {150, 300}};
-    float com_x = 0.0f, com_y = 0.0f;
-    for (const cv::Point& rect_point : rect_points) {
-        com_x += rect_point.x;
-        com_y += rect_point.y;
-    }
-    com_x *= 0.25;
-    com_y *= 0.25;
-    cv::Point rotation_center(com_x, com_y);
-    cv::Mat rotation_matrix = cv::getRotationMatrix2D(rotation_center, 45, 1.0);
-
-    const size_t line_spacing = 100;
-    for (size_t i = 0; i <= width / line_spacing; i++) {
-        size_t x = i * line_spacing + (width % line_spacing) / 2;
-        size_t b = i * 255.0 / (width / line_spacing);
-        size_t g = 255.0 - i * 255.0 / (width / line_spacing);
-        cv::line(image_1, cv::Point(x, 0), cv::Point(x, height - 1), cv::Scalar(b, g, 0), 2);
-    }
-    for (size_t i = 0; i <= height / line_spacing; i++) {
-        size_t y = i * line_spacing + (height % line_spacing) / 2;
-        size_t b = i * 255.0 / (width / line_spacing);
-        size_t g = 255.0 - i * 255.0 / (width / line_spacing);
-        cv::line(image_1, cv::Point(0, y), cv::Point(width - 1, y), cv::Scalar(b, g, 0), 2);
-    }
-
-    cv::warpAffine(image_1, image_1, rotation_matrix, image_1.size());
-    cv::warpAffine(image_1, image_2, translation_mat, image_1.size());
-
-    // cv::Mat img_test_disp;
-    // cv::hconcat(image_1, image_2, img_test_disp);
-    // cv::imshow("Test", img_test_disp);
-    // cv::waitKey(1000);
-
-    Frontend::ExtractorType extractor_types[2] = {Frontend::ExtractorType::SIFT,
-                                                  Frontend::ExtractorType::ORB};
-    Frontend::MatcherType matcher_types[3] = {Frontend::MatcherType::BRUTE_FORCE,
-                                              Frontend::MatcherType::FLANN,
-                                              Frontend::MatcherType::KNN};
-
-    Frontend frontend;
-    std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_descriptors_pair_1;
-    std::pair<std::vector<cv::KeyPoint>, cv::Mat> keypoints_descriptors_pair_2;
-    std::vector<cv::DMatch> matches;
-    cv::Mat img_keypoints_out_1(height, width, CV_8UC3),
-        img_keypoints_out_2(height, width, CV_8UC3), img_matches_out(height, 2 * width, CV_8UC3);
-    // cv::Mat img_display_test;
-    for (Frontend::ExtractorType extractor_type : extractor_types) {
-        for (Frontend::MatcherType matcher_type : matcher_types) {
-            printf("started frontend combination: (%d, %d)\n", static_cast<int>(extractor_type),
-                   static_cast<int>(matcher_type));
-            try {
-                frontend = Frontend(extractor_type, matcher_type);
-            } catch (const std::invalid_argument& e) {
-                assert(std::string(e.what()) == "FLANN can not be used with ORB.");  // very jank...
-                continue;
-            }
-            keypoints_descriptors_pair_1 = frontend.get_keypoints_and_descriptors(image_1);
-            keypoints_descriptors_pair_2 = frontend.get_keypoints_and_descriptors(image_2);
-            matches = frontend.get_matches(keypoints_descriptors_pair_1.second,
-                                           keypoints_descriptors_pair_2.second);
-            frontend.draw_keypoints(image_1, keypoints_descriptors_pair_1.first,
-                                    img_keypoints_out_1);
-            frontend.draw_keypoints(image_2, keypoints_descriptors_pair_2.first,
-                                    img_keypoints_out_2);
-            frontend.draw_matches(image_1, keypoints_descriptors_pair_1.first, image_2,
-                                  keypoints_descriptors_pair_2.first, matches, img_matches_out);
-            // cv::hconcat(img_keypoints_out_1, img_keypoints_out_2, img_display_test);
-            // cv::vconcat(img_display_test, img_matches_out, img_display_test);
-            // std::stringstream text;
-            // text << "Extractor " << static_cast<int>(extractor_type) << ", matcher "
-            //      << static_cast<int>(matcher_type);
-            // cv::putText(img_display_test, text.str(), cv::Point(20, height - 50),
-            //             cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2, cv::LINE_AA);
-            // cv::imshow("Keypoints and Matches Output.", img_display_test);
-            // std::cout << "Press spacebar to pause." << std::endl;
-            // while (cv::waitKey(1000) == 32) {
-            // }
-            printf("completed frontend combination: (%d, %d)\n", static_cast<int>(extractor_type),
-                   static_cast<int>(matcher_type));
-            if (extractor_type != Frontend::ExtractorType::ORB) {  // don't check ORB for now
-                for (const cv::DMatch match : matches) {
-                    EXPECT_NEAR(keypoints_descriptors_pair_1.first[match.queryIdx].pt.x -
-                                    keypoints_descriptors_pair_2.first[match.trainIdx].pt.x,
-                                pixel_shift_x, pixel_shift_x + PIXEL_COMP_TOL);
-                    EXPECT_NEAR(keypoints_descriptors_pair_2.first[match.trainIdx].pt.y -
-                                    keypoints_descriptors_pair_1.first[match.queryIdx].pt.y,
-                                0, PIXEL_COMP_TOL);
-                }
-            }
-        }
-    }
-}
-}  // namespace robot::experimental::learn_descriptors
diff --git a/visualization/opencv/opencv_viz.cc b/visualization/opencv/opencv_viz.cc
index 406def4b2..06c46da1f 100644
--- a/visualization/opencv/opencv_viz.cc
+++ b/visualization/opencv/opencv_viz.cc
@@ -2,10 +2,13 @@
 
 #include <iostream>
 
+#include "Eigen/Core"
+#include "Eigen/Geometry"
 #include "common/geometry/translate_types.hh"
 #include "opencv2/viz.hpp"
 
-namespace robot::geometry {
+using namespace robot::geometry;
+namespace robot::visualization {
 cv::Vec3d rotation_matrix_to_axis_angle(const cv::Matx33d &R) {
     // Ensure R is a valid rotation matrix
     CV_Assert(cv::determinant(R) > 0.999 && cv::determinant(R) < 1.001);
@@ -109,4 +112,4 @@ void viz_scene(const std::vector<VizPose> &world_from_poses,
 
     window.spin();
 }
-}  // namespace robot::geometry
+}  // namespace robot::visualization
diff --git a/visualization/opencv/opencv_viz.hh b/visualization/opencv/opencv_viz.hh
index 28dbc197b..5f5956b43 100644
--- a/visualization/opencv/opencv_viz.hh
+++ b/visualization/opencv/opencv_viz.hh
@@ -7,7 +7,7 @@
 #include "Eigen/Geometry"
 #include "opencv2/viz.hpp"
 
-namespace robot::geometry {
+namespace robot::visualization {
 struct VizPose {
     const Eigen::Isometry3d world_from_pose;
     const std::optional<std::string> label;
@@ -27,4 +27,4 @@ void viz_scene(const std::vector<VizPose> &world_from_poses,
                const cv::viz::Color color_background = cv::viz::Color::black(),
                const bool show_grid = true, const bool show_origin = true,
                const std::string &window_name = "Viz Window", const double text_scale = 0.1);
-}  // namespace robot::geometry
\ No newline at end of file
+}  // namespace robot::visualization
\ No newline at end of file
diff --git a/visualization/opencv/opencv_viz_test.cc b/visualization/opencv/opencv_viz_test.cc
index 502f8e4f4..832ce2878 100644
--- a/visualization/opencv/opencv_viz_test.cc
+++ b/visualization/opencv/opencv_viz_test.cc
@@ -3,7 +3,7 @@
 #include "gtest/gtest.h"
 #include "opencv2/viz.hpp"
 
-namespace robot::geometry {
+namespace robot::visualization {
 
 TEST(OpencvVizTest, demo) {
     cv::viz::Viz3d window("My Window");
@@ -111,4 +111,4 @@ TEST(OpencvVizTest, cube_test_labeled) {
 
     viz_scene(world_from_cams, cube_points_in_world);
 }
-}  // namespace robot::geometry
\ No newline at end of file
+}  // namespace robot::visualization
\ No newline at end of file