diff --git a/CMakeLists.txt b/CMakeLists.txt
index 331ecddd4d..7fefce132a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -143,6 +143,9 @@ list(
   "${AVM_ROOT}/third_party/libyuv/source/scale_neon64.cc"
   "${AVM_ROOT}/third_party/libyuv/source/scale_win.cc")
 
+list(APPEND AVM_CJSON_SOURCES "${AVM_ROOT}/third_party/cJSON/cJSON.c"
+     "${AVM_ROOT}/third_party/cJSON/cJSON.h")
+
 list(
   APPEND
   AVM_SOURCES
@@ -403,8 +406,14 @@ endif()
 
 if(CONFIG_AV2_DECODER AND ENABLE_EXAMPLES)
   add_executable(
-    avmdec "${AVM_ROOT}/apps/avmdec.c" $<TARGET_OBJECTS:avm_common_app_util>
-           $<TARGET_OBJECTS:avm_decoder_app_util>)
+    avmdec
+    "${AVM_ROOT}/apps/avmdec.c"
+    $<TARGET_OBJECTS:avm_common_app_util>
+    $<TARGET_OBJECTS:avm_decoder_app_util>
+    "${AVM_ROOT}/common/xlayer_config.h"
+    "${AVM_ROOT}/common/xlayer_config_parse.c"
+    "${AVM_ROOT}/common/xlayer_config_parse.h"
+    $<TARGET_OBJECTS:avm_cjson>)
   target_sources(avmdec PRIVATE $<TARGET_OBJECTS:lanczos_resample>)
   add_executable(
     decode_to_md5
@@ -475,9 +484,18 @@ if(CONFIG_AV2_ENCODER)
   if(ENABLE_EXAMPLES)
     add_executable(
       avmenc
-      "${AVM_ROOT}/apps/avmenc.c" $<TARGET_OBJECTS:avm_common_app_util>
+      "${AVM_ROOT}/apps/avmenc.c"
+      "${AVM_ROOT}/apps/avmenc_xlayer.c"
+      "${AVM_ROOT}/apps/avmenc_xlayer.h"
+      "${AVM_ROOT}/common/xlayer_config.h"
+      "${AVM_ROOT}/common/xlayer_config_parse.c"
+      "${AVM_ROOT}/common/xlayer_config_parse.h"
+      "${AVM_ROOT}/common/tu_assembler.c"
+      "${AVM_ROOT}/common/tu_assembler.h"
+      $<TARGET_OBJECTS:avm_common_app_util>
       $<TARGET_OBJECTS:avm_encoder_app_util>
-      $<TARGET_OBJECTS:avm_encoder_stats>)
+      $<TARGET_OBJECTS:avm_encoder_stats>
+      $<TARGET_OBJECTS:avm_cjson>)
     add_executable(
       lossless_encoder
       "${AVM_ROOT}/examples/lossless_encoder.c"
@@ -947,6 +965,9 @@ if(ENABLE_EXAMPLES
       set_property(TARGET ${avm_app} PROPERTY LINKER_LANGUAGE CXX)
     endforeach()
   endif()
+
+  # cJSON library for JSON config parsing (used by multi-xlayer encoder)
+  add_library(avm_cjson OBJECT ${AVM_CJSON_SOURCES})
 endif()
 
 if(ENABLE_TESTS)
diff --git a/apps/avmdec.c b/apps/avmdec.c
index 1d80f28558..ee1600a777 100644
--- a/apps/avmdec.c
+++ b/apps/avmdec.c
@@ -45,6 +45,8 @@
 
 #include "common/rawenc.h"
 #include "common/y4menc.h"
+#include "common/xlayer_config.h"
+#include "common/xlayer_config_parse.h"
 
 #if CONFIG_LIBYUV
 #include "third_party/libyuv/include/libyuv/scale.h"
@@ -52,6 +54,58 @@
 
 static const char *exec_name;
 
+// Buffered frame for flush reordering in interleaved output mode.
+typedef struct FlushFrame {
+  avm_image_t *img;         // Allocated deep copy of the decoded image
+  unsigned int order_hint;  // display_order_hint for sorting
+  int xlayer_id;
+  int mlayer_id;
+} FlushFrame;
+
+static int compare_flush_frames(const void *a, const void *b) {
+  const FlushFrame *fa = (const FlushFrame *)a;
+  const FlushFrame *fb = (const FlushFrame *)b;
+  if (fa->order_hint != fb->order_hint)
+    return (fa->order_hint < fb->order_hint) ? -1 : 1;
+  if (fa->xlayer_id != fb->xlayer_id)
+    return (fa->xlayer_id < fb->xlayer_id) ? -1 : 1;
+  if (fa->mlayer_id != fb->mlayer_id)
+    return (fa->mlayer_id < fb->mlayer_id) ? -1 : 1;
+  return 0;
+}
+
+// Deep-copy an avm_image_t: allocate a new image and copy pixel data.
+static avm_image_t *deep_copy_image(const avm_image_t *src) {
+  avm_image_t *dst = avm_img_alloc(NULL, src->fmt, src->d_w, src->d_h, 32);
+  if (!dst) return NULL;
+  dst->bit_depth = src->bit_depth;
+  dst->monochrome = src->monochrome;
+  dst->csp = src->csp;
+  dst->range = src->range;
+  dst->cp = src->cp;
+  dst->tc = src->tc;
+  dst->mc = src->mc;
+  dst->tlayer_id = src->tlayer_id;
+  dst->mlayer_id = src->mlayer_id;
+  dst->xlayer_id = src->xlayer_id;
+  dst->stream_id = src->stream_id;
+  dst->display_order_hint = src->display_order_hint;
+  int num_planes = src->monochrome ? 1 : 3;
+  for (int p = 0; p < num_planes; p++) {
+    int h = avm_img_plane_height(src, p);
+    int w = avm_img_plane_width(src, p);
+    int bps = (src->fmt & AVM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1;
+    const unsigned char *s = src->planes[p];
+    unsigned char *d = dst->planes[p];
+    for (int row = 0; row < h; row++) {
+      memcpy(d, s, (size_t)w * bps);
+      s += src->stride[p];
+      d += dst->stride[p];
+    }
+  }
+  return dst;
+}
+
 #if CONFIG_PARAKIT_COLLECT_DATA
 #include "av2/common/entropy_sideinfo.h"
 #endif
@@ -136,6 +190,12 @@ static const arg_def_t bruoptmodearg =
     ARG_DEF(NULL, "bru-opt-mode", 0, "Use BRU optimized decode mode");
 static const arg_def_t icc_file =
     ARG_DEF(NULL, "icc", 1, "Output ICC profile file");
+static const arg_def_t xlayercfgarg = ARG_DEF(
+    NULL, "xlayer-config", 1,
+    "Multi-xlayer JSON config (provides atlas layout for --atlas-composite)");
+static const arg_def_t atlascompositearg = ARG_DEF(
+    NULL, "atlas-composite", 0,
+    "Composite decoded xlayers onto atlas canvas (requires --xlayer-config)");
 static const arg_def_t *all_args[] = { &help,
                                        &codecarg,
                                        &use_yv12,
@@ -169,6 +229,8 @@ static const arg_def_t *all_args[] = { &help,
                                        &randomaccess,
                                        &bruoptmodearg,
                                        &icc_file,
+                                       &xlayercfgarg,
+                                       &atlascompositearg,
                                        NULL };
 
 #if CONFIG_LANCZOS_RESAMPLE
@@ -643,6 +705,104 @@ static FILE *open_outfile(const char *name) {
   }
 }
 
+// Dynamic composite groups derived from LCR layer properties.
+// Each unique (layer_type, auxiliary_type, view_type) combination
+// produces a separate composite output. Layers within a group must
+// share the same chroma format; mixed chroma forces separate outputs.
+// Mixed bit depth is handled by promoting to the highest bit depth.
+typedef struct CompositeGroup {
+  int layer_type;      // TEXTURE_LAYER, AUX_LAYER, etc.
+  int auxiliary_type;  // only meaningful when layer_type == AUX_LAYER
+  int view_type;       // VIEW_UNSPECIFIED, VIEW_CENTER, VIEW_LEFT, etc.
+  int num_xlayers;     // how many xlayers belong to this group
+  int xlayer_ids[MAX_NUM_XLAYERS];      // xlayer_ids in this group
+  int xlayer_indices[MAX_NUM_XLAYERS];  // indices into xlayer_cfg.xlayers[]
+                                        // (-1 if from decoder query)
+  avm_image_t *canvas;
+  FILE *outfile_cg;
+  int layers_placed;  // reset each frame
+  int frame_count;
+  int mixed_chroma;            // 1 if layers have different chroma formats
+  unsigned int max_bit_depth;  // highest bit depth among layers in group
+  char label[128];             // human-readable label for stderr
+} CompositeGroup;
+
+static const char *comp_layer_type_names[] = { "texture", "auxiliary", "stereo",
+                                               "dependent" };
+static const char *comp_aux_type_names[] = { "alpha", "depth", "segmentation",
+                                             "gain_map" };
+static const char *comp_view_type_names[] = { "unspecified", "center", "left",
+                                              "right", "explicit" };
+
+// Build composite groups from arrays of per-xlayer properties.
+// Allocates comp_groups and fills *out_groups / *out_num_groups.
+// xlayer_ids[], layer_types[], aux_types[], view_types[] are parallel arrays
+// of length num_xlayers. config_indices[] provides the JSON config index for
+// each xlayer (-1 if built from decoder query).
+static void build_composite_groups(int num_xlayers, const int *xlayer_ids,
+                                   const int *layer_types, const int *aux_types,
+                                   const int *view_types,
+                                   const int *config_indices,
+                                   CompositeGroup **out_groups,
+                                   int *out_num_groups) {
+  CompositeGroup *groups =
+      (CompositeGroup *)calloc(num_xlayers, sizeof(CompositeGroup));
+  int num_groups = 0;
+
+  for (int i = 0; i < num_xlayers; i++) {
+    int lt = layer_types[i];
+    int at = aux_types[i];
+    int vt = view_types[i];
+    // Find existing group or create new
+    int gidx = -1;
+    for (int g = 0; g < num_groups; g++) {
+      if (groups[g].layer_type == lt && groups[g].auxiliary_type == at &&
+          groups[g].view_type == vt) {
+        gidx = g;
+        break;
+      }
+    }
+    if (gidx < 0) {
+      gidx = num_groups++;
+      groups[gidx].layer_type = lt;
+      groups[gidx].auxiliary_type = at;
+      groups[gidx].view_type = vt;
+      groups[gidx].num_xlayers = 0;
+      groups[gidx].canvas = NULL;
+      groups[gidx].outfile_cg = NULL;
+      groups[gidx].layers_placed = 0;
+      groups[gidx].frame_count = 0;
+    }
+    int k = groups[gidx].num_xlayers++;
+    groups[gidx].xlayer_ids[k] = xlayer_ids[i];
+    groups[gidx].xlayer_indices[k] = config_indices ? config_indices[i] : -1;
+  }
+
+  // Build labels and report
+  fprintf(stderr, "Atlas composite: %d output group(s)\n", num_groups);
+  for (int g = 0; g < num_groups; g++) {
+    CompositeGroup *cg = &groups[g];
+    const char *lt_name = (cg->layer_type >= 0 && cg->layer_type < 4)
+                              ? comp_layer_type_names[cg->layer_type]
+                              : "unknown";
+    const char *vt_name = (cg->view_type >= 0 && cg->view_type < 5)
+                              ? comp_view_type_names[cg->view_type]
+                              : "unknown";
+    if (cg->layer_type == AUX_LAYER && cg->auxiliary_type >= 0 &&
+        cg->auxiliary_type < 4) {
+      snprintf(cg->label, sizeof(cg->label), "%s_%s_%s",
+               comp_aux_type_names[cg->auxiliary_type], lt_name, vt_name);
+    } else {
+      snprintf(cg->label, sizeof(cg->label), "%s_%s", lt_name, vt_name);
+    }
+    fprintf(stderr, "  group %d [%s]: %d xlayer(s)\n", g, cg->label,
+            cg->num_xlayers);
+  }
+
+  *out_groups = groups;
+  *out_num_groups = num_groups;
+}
+
 static int main_loop(int argc, const char **argv_) {
   avm_codec_ctx_t decoder;
   char *fn = NULL;
@@ -679,6 +839,15 @@ static int main_loop(int argc, const char **argv_) {
   int num_local_ops_selections = 0;
   int output_all_layers = 0;
   int skip_film_grain = 0;
+  int atlas_composite = 0;
+  char xlayer_config_path[PATH_MAX] = { 0 };
+  MultiXLayerConfig xlayer_cfg;
+
+  CompositeGroup *comp_groups = NULL;
+  int num_comp_groups = 0;
+  int comp_groups_built = 0;
+  avm_atlas_info_t dec_atlas_info;
+  memset(&dec_atlas_info, 0, sizeof(dec_atlas_info));
   int random_access_point_index = 0;
   int bru_opt_mode = 0;
   avm_image_t *scaled_img = NULL;
@@ -686,6 +855,12 @@ static int main_loop(int argc, const char **argv_) {
   int frame_avail, got_data, flush_decoder = 0;
   int num_external_frame_buffers = 0;
   struct ExternalFrameBufferList ext_fb_list = { 0, NULL };
+  int is_monotonic_output = -1;  // -1 = unknown, 0/1 from bitstream
+
+  // Flush reordering buffer for interleaved single-file output
+  FlushFrame *flush_buf = NULL;
+  int flush_buf_count = 0;
+  int flush_buf_capacity = 0;
 
   const char *outfile_pattern = NULL;
   char outfile_name[PATH_MAX] = { 0 };
@@ -709,6 +884,7 @@ static int main_loop(int argc, const char **argv_) {
   FILE *outfile_substream[AVM_MAX_NUM_STREAMS] = { NULL };
 
   int substream_frame_out[AVM_MAX_NUM_STREAMS] = { 0 };
+  int total_decode_errors = 0;
   FILE *framestats_file = NULL;
 
   FILE *icc_f = NULL;
@@ -876,6 +1052,10 @@ static int main_loop(int argc, const char **argv_) {
       bru_opt_mode = 1;
     } else if (arg_match(&arg, &icc_file, argi)) {
       icc_f = fopen(arg.val, "wb");
+    } else if (arg_match(&arg, &xlayercfgarg, argi)) {
+      snprintf(xlayer_config_path, PATH_MAX, "%s", arg.val);
+    } else if (arg_match(&arg, &atlascompositearg, argi)) {
+      atlas_composite = 1;
     } else {
       argj++;
     }
@@ -889,6 +1069,38 @@ static int main_loop(int argc, const char **argv_) {
   /* Handle non-option arguments */
   fn = argv[0];
 
+  // Atlas composite setup
+  xlayer_config_init(&xlayer_cfg);
+  if (atlas_composite) {
+    output_all_layers = 1;  // implicitly enable all-layers output
+  }
+  // Default to keep-going mode for multi-xlayer decoding
+  if (output_all_layers && !keep_going) {
+    keep_going = 1;
+  }
+  if (xlayer_config_path[0] != '\0') {
+    if (parse_multi_xlayer_config(xlayer_config_path, &xlayer_cfg) != 0) {
+      die("Error: failed to parse xlayer config \"%s\"\n", xlayer_config_path);
+    }
+    // Build composite groups eagerly from JSON config
+    if (atlas_composite && xlayer_cfg.enable_atlas) {
+      int xlids[MAX_NUM_XLAYERS], lts[MAX_NUM_XLAYERS];
+      int ats[MAX_NUM_XLAYERS], vts[MAX_NUM_XLAYERS];
+      int idxs[MAX_NUM_XLAYERS];
+      for (int xi = 0; xi < xlayer_cfg.num_xlayers; xi++) {
+        const XLayerEncConfig *xl = &xlayer_cfg.xlayers[xi];
+        xlids[xi] = xl->xlayer_id;
+        lts[xi] = xl->layer_type;
+        ats[xi] = (xl->layer_type == AUX_LAYER) ? xl->auxiliary_type : -1;
+        vts[xi] = xl->view_type;
+        idxs[xi] = xi;
+      }
+      build_composite_groups(xlayer_cfg.num_xlayers, xlids, lts, ats, vts, idxs,
+                             &comp_groups, &num_comp_groups);
+      comp_groups_built = 1;
+    }
+  }
+
   if (!fn) {
     free(argv);
     fprintf(stderr, "No input file specified!\n");
@@ -955,6 +1167,28 @@ static int main_loop(int argc, const char **argv_) {
         outfile = open_outfile(outfile_name);
       }
     }
+    // Open per-group output files for atlas composite (JSON path only;
+    // decoder-query path opens files in the deferred block)
+    if (atlas_composite && comp_groups_built && num_comp_groups > 1) {
+      for (int g = 0; g < num_comp_groups; g++) {
+        char group_name[PATH_MAX + 128] = { 0 };
+        // Insert group label before extension
+        const char *dot = strrchr(outfile_name, '.');
+        if (dot) {
+          size_t base_len = (size_t)(dot - outfile_name);
+          snprintf(group_name, sizeof(group_name), "%.*s_%s%s", (int)base_len,
+                   outfile_name, comp_groups[g].label, dot);
+        } else {
+          snprintf(group_name, sizeof(group_name), "%s_%s", outfile_name,
+                   comp_groups[g].label);
+        }
+        comp_groups[g].outfile_cg = open_outfile(group_name);
+        fprintf(stderr, "  group %d output: %s\n", g, group_name);
+      }
+    } else if (atlas_composite && comp_groups_built && num_comp_groups == 1) {
+      // Single group: reuse the main outfile
+      comp_groups[0].outfile_cg = outfile;
+    }
   }
 
   if (use_y4m && !noblit) {
@@ -1102,6 +1336,7 @@ static int main_loop(int argc, const char **argv_) {
                avm_codec_error(&decoder));
 
           if (detail) warn("Additional information: %s", detail);
+          total_decode_errors++;
           if (!keep_going) goto fail;
         }
 
@@ -1136,6 +1371,68 @@ static int main_loop(int argc, const char **argv_) {
     dx_time += avm_usec_timer_elapsed(&timer);
 
     got_data = 0;
+
+    // Deferred composite group building from decoder LCR/Atlas info
+    if (atlas_composite && !comp_groups_built) {
+      avm_lcr_info_t lcr_info;
+      memset(&lcr_info, 0, sizeof(lcr_info));
+      memset(&dec_atlas_info, 0, sizeof(dec_atlas_info));
+
+      int have_lcr = !AVM_CODEC_CONTROL_TYPECHECKED(&decoder, AV2D_GET_LCR_INFO,
+                                                    &lcr_info);
+      int have_atlas = !AVM_CODEC_CONTROL_TYPECHECKED(
+          &decoder, AV2D_GET_ATLAS_INFO, &dec_atlas_info);
+
+      if (have_lcr && lcr_info.num_xlayers > 0) {
+        int xlids[31], lts[31], ats_arr[31], vts[31];
+        for (int li = 0; li < lcr_info.num_xlayers; li++) {
+          xlids[li] = lcr_info.xlayers[li].xlayer_id;
+          lts[li] = lcr_info.xlayers[li].layer_type;
+          ats_arr[li] = lcr_info.xlayers[li].auxiliary_type;
+          vts[li] = lcr_info.xlayers[li].view_type;
+        }
+        build_composite_groups(lcr_info.num_xlayers, xlids, lts, ats_arr, vts,
+                               NULL, &comp_groups, &num_comp_groups);
+        comp_groups_built = 1;
+
+        if (have_atlas && dec_atlas_info.num_segments > 0) {
+          fprintf(stderr,
+                  "Atlas info from bitstream: %dx%d canvas, %d segment(s)\n",
+                  dec_atlas_info.atlas_width, dec_atlas_info.atlas_height,
+                  dec_atlas_info.num_segments);
+        }
+
+        // Open per-group output files
+        if (!noblit && single_file && outfile_pattern) {
+          if (num_comp_groups > 1) {
+            for (int g = 0; g < num_comp_groups; g++) {
+              char group_name[PATH_MAX + 128] = { 0 };
+              const char *dot = strrchr(outfile_name, '.');
+              if (dot) {
+                size_t base_len = (size_t)(dot - outfile_name);
+                snprintf(group_name, sizeof(group_name), "%.*s_%s%s",
+                         (int)base_len, outfile_name, comp_groups[g].label,
+                         dot);
+              } else {
+                snprintf(group_name, sizeof(group_name), "%s_%s", outfile_name,
+                         comp_groups[g].label);
+              }
+              comp_groups[g].outfile_cg = open_outfile(group_name);
+              fprintf(stderr, "  group %d output: %s\n", g, group_name);
+            }
+          } else if (num_comp_groups == 1) {
+            comp_groups[0].outfile_cg = outfile;
+          }
+        }
+      } else {
+        // No LCR info available — atlas composite not possible
+        fprintf(stderr,
+                "Warning: no LCR info in bitstream, atlas composite disabled. "
+                "Falling back to per-layer output.\n");
+        atlas_composite = 0;
+      }
+    }
+
     while ((img = avm_codec_get_frame(&decoder, &iter))) {
       // frame_out does not include hidden frames.
       ++frame_out;
@@ -1144,6 +1441,17 @@ static int main_loop(int argc, const char **argv_) {
       }
       if (!flush_decoder) got_data = 1;
 
+      // Query monotonic_output_order_flag lazily on first output frame
+      if (is_monotonic_output < 0) {
+        unsigned int mono_flag = 0;
+        if (!AVM_CODEC_CONTROL_TYPECHECKED(
+                &decoder, AV2D_GET_MONOTONIC_OUTPUT_ORDER, &mono_flag)) {
+          is_monotonic_output = (int)mono_flag;
+        } else {
+          is_monotonic_output = 1;  // assume monotonic if unknown
+        }
+      }
+
       if (AVM_CODEC_CONTROL_TYPECHECKED(&decoder, AVMD_GET_FRAME_CORRUPTED,
                                         &corrupted)) {
         warn("Failed AVM_GET_FRAME_CORRUPTED: %s", avm_codec_error(&decoder));
@@ -1181,6 +1489,235 @@ static int main_loop(int argc, const char **argv_) {
         const int PLANES_YVU[] = { AVM_PLANE_Y, AVM_PLANE_V, AVM_PLANE_U };
         const int *planes = flipuv ? PLANES_YVU : PLANES_YUV;
 
+        // Buffer frames for interleaved single-file output so they can
+        // be sorted by display order before writing.  Non-monotonic
+        // output from the decoder can interleave xlayers out of display
+        // order even during normal decode (not just flush).
+        if (!is_monotonic_output && output_all_layers && num_streams == 1 &&
+            single_file && !do_md5 && !atlas_composite) {
+          if (flush_buf_count >= flush_buf_capacity) {
+            int new_cap = flush_buf_capacity ? flush_buf_capacity * 2 : 64;
+            FlushFrame *new_buf = (FlushFrame *)realloc(
+                flush_buf, (size_t)new_cap * sizeof(FlushFrame));
+            if (!new_buf) {
+              warn("Failed to allocate flush reorder buffer");
+              goto fail;
+            }
+            flush_buf = new_buf;
+            flush_buf_capacity = new_cap;
+          }
+          FlushFrame *ff = &flush_buf[flush_buf_count];
+          ff->img = deep_copy_image(img);
+          if (!ff->img) {
+            warn("Failed to copy flush frame");
+            goto fail;
+          }
+          ff->order_hint = img->display_order_hint;
+          ff->xlayer_id = img->xlayer_id;
+          ff->mlayer_id = img->mlayer_id;
+          flush_buf_count++;
+          continue;
+        }
+
+        // Atlas composite mode: place decoded xlayer into its group's canvas
+        if (atlas_composite && comp_groups_built) {
+          int xlid = img->xlayer_id;
+
+          // Find this xlayer's composite group by xlayer_id
+          int gidx = -1;
+          for (int g = 0; g < num_comp_groups; g++) {
+            for (int k = 0; k < comp_groups[g].num_xlayers; k++) {
+              if (comp_groups[g].xlayer_ids[k] == xlid) {
+                gidx = g;
+                break;
+              }
+            }
+            if (gidx >= 0) break;
+          }
+          if (gidx < 0) {
+            fprintf(stderr,
+                    "Warning: decoded xlayer_id %d not in any composite group, "
+                    "skipping\n",
+                    xlid);
+            continue;
+          }
+
+          CompositeGroup *cg = &comp_groups[gidx];
+
+          // Allocate this group's canvas on first use
+          if (!cg->canvas) {
+            unsigned int cw = img->d_w;
+            unsigned int ch = img->d_h;
+            // Prefer atlas info from decoder, then JSON config
+            if (dec_atlas_info.atlas_width > 0 &&
+                dec_atlas_info.atlas_height > 0) {
+              cw = (unsigned int)dec_atlas_info.atlas_width;
+              ch = (unsigned int)dec_atlas_info.atlas_height;
+            } else if (xlayer_cfg.atlas_width > 0 &&
+                       xlayer_cfg.atlas_height > 0) {
+              cw = (unsigned int)xlayer_cfg.atlas_width;
+              ch = (unsigned int)xlayer_cfg.atlas_height;
+            }
+            cg->max_bit_depth = img->bit_depth;
+            cg->canvas = avm_img_alloc(NULL, img->fmt, cw, ch, 32);
+            if (!cg->canvas) {
+              die("Error: failed to allocate composite canvas %ux%u for "
+                  "group %d [%s]\n",
+                  cw, ch, gidx, cg->label);
+            }
+            cg->canvas->bit_depth = img->bit_depth;
+            cg->canvas->monochrome = img->monochrome;
+            cg->canvas->csp = img->csp;
+            cg->canvas->range = img->range;
+            for (int p = 0; p < 3; p++) {
+              unsigned int ph = avm_img_plane_height(cg->canvas, p);
+              memset(cg->canvas->planes[p], 0,
+                     (size_t)cg->canvas->stride[p] * ph);
+            }
+          }
+
+          // Check chroma format compatibility
+          if (img->x_chroma_shift != cg->canvas->x_chroma_shift ||
+              img->y_chroma_shift != cg->canvas->y_chroma_shift) {
+            if (!cg->mixed_chroma) {
+              cg->mixed_chroma = 1;
+              fprintf(stderr,
+                      "Warning: group %d [%s] has mixed chroma formats — "
+                      "compositing disabled for this group. Use per-layer "
+                      "output (--all-layers --num-streams) instead.\n",
+                      gidx, cg->label);
+            }
+            // Fall through to normal output path (don't continue)
+          } else {
+            // Handle bit-depth mismatch: promote to highest
+            unsigned int canvas_bd = cg->canvas->bit_depth;
+            unsigned int frame_bd = img->bit_depth;
+            if (frame_bd > cg->max_bit_depth) cg->max_bit_depth = frame_bd;
+
+            // Get atlas position for this xlayer.
+            // Try decoder atlas info first, then JSON config fallback.
+            int pos_x = 0, pos_y = 0;
+            int found_pos = 0;
+            if (dec_atlas_info.num_segments > 0) {
+              for (int s = 0; s < dec_atlas_info.num_segments; s++) {
+                if (dec_atlas_info.segments[s].xlayer_id == xlid) {
+                  pos_x = dec_atlas_info.segments[s].pos_x;
+                  pos_y = dec_atlas_info.segments[s].pos_y;
+                  found_pos = 1;
+                  break;
+                }
+              }
+            }
+            if (!found_pos && xlayer_config_path[0] != '\0') {
+              for (int xi = 0; xi < xlayer_cfg.num_xlayers; xi++) {
+                if (xlayer_cfg.xlayers[xi].xlayer_id == xlid) {
+                  pos_x = xlayer_cfg.xlayers[xi].atlas_pos_x >= 0
+                              ? xlayer_cfg.xlayers[xi].atlas_pos_x
+                              : 0;
+                  pos_y = xlayer_cfg.xlayers[xi].atlas_pos_y >= 0
+                              ? xlayer_cfg.xlayers[xi].atlas_pos_y
+                              : 0;
+                  break;
+                }
+              }
+            }
+            int canvas_bps =
+                (cg->canvas->fmt & AVM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1;
+            int frame_bps = (img->fmt & AVM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1;
+            int shift = (int)canvas_bd - (int)frame_bd;
+
+            for (int p = 0; p < 3; p++) {
+              int px = pos_x, py = pos_y;
+              unsigned int pw = img->d_w, ph = img->d_h;
+              if (p > 0) {
+                px >>= (int)img->x_chroma_shift;
+                py >>= (int)img->y_chroma_shift;
+                pw >>= img->x_chroma_shift;
+                ph >>= img->y_chroma_shift;
+              }
+              const unsigned char *src_row = img->planes[p];
+              unsigned char *dst_row = cg->canvas->planes[p] +
+                                       py * cg->canvas->stride[p] +
+                                       px * canvas_bps;
+
+              if (shift == 0 && canvas_bps == frame_bps) {
+                // Same bit depth: direct memcpy
+                unsigned int row_bytes = pw * (unsigned int)canvas_bps;
+                for (unsigned int row = 0; row < ph; row++) {
+                  memcpy(dst_row, src_row, row_bytes);
+                  src_row += img->stride[p];
+                  dst_row += cg->canvas->stride[p];
+                }
+              } else if (canvas_bps == 2 && frame_bps == 2 && shift > 0) {
+                // Both 16-bit, canvas higher: shift up
+                for (unsigned int row = 0; row < ph; row++) {
+                  const uint16_t *s = (const uint16_t *)src_row;
+                  uint16_t *d = (uint16_t *)dst_row;
+                  for (unsigned int col = 0; col < pw; col++)
+                    d[col] = (uint16_t)(s[col] << shift);
+                  src_row += img->stride[p];
+                  dst_row += cg->canvas->stride[p];
+                }
+              } else if (canvas_bps == 2 && frame_bps == 1) {
+                // 8-bit frame into 16-bit canvas
+                int total_shift = (int)canvas_bd - 8;
+                for (unsigned int row = 0; row < ph; row++) {
+                  uint16_t *d = (uint16_t *)dst_row;
+                  for (unsigned int col = 0; col < pw; col++)
+                    d[col] =
+                        (uint16_t)((unsigned int)src_row[col] << total_shift);
+                  src_row += img->stride[p];
+                  dst_row += cg->canvas->stride[p];
+                }
+              } else {
+                // Fallback: direct copy (same bps, shift <= 0 = truncate)
+                unsigned int row_bytes = pw * (unsigned int)frame_bps;
+                if ((unsigned int)canvas_bps < (unsigned int)frame_bps)
+                  row_bytes = pw * (unsigned int)canvas_bps;
+                for (unsigned int row = 0; row < ph; row++) {
+                  memcpy(dst_row, src_row, row_bytes);
+                  src_row += img->stride[p];
+                  dst_row += cg->canvas->stride[p];
+                }
+              }
+            }
+
+            // Output composite when all xlayers for this group are placed
+            cg->layers_placed++;
+            if (cg->layers_placed >= cg->num_xlayers) {
+              cg->layers_placed = 0;
+              cg->frame_count++;
+              FILE *cg_out = cg->outfile_cg;
+              if (cg_out && single_file) {
+                avm_image_t *cimg = cg->canvas;
+                int num_planes_out = (opt_raw && cimg->monochrome) ? 1 : 3;
+                if (use_y4m) {
+                  char y4m_buf[Y4M_BUFFER_SIZE] = { 0 };
+                  if (cg->frame_count == 1) {
+                    y4m_write_file_header(
+                        y4m_buf, sizeof(y4m_buf), cimg->d_w, cimg->d_h,
+                        &avm_input_ctx.framerate, cimg->monochrome, cimg->csp,
+                        cimg->fmt, cimg->bit_depth, cimg->range);
+                    fputs(y4m_buf, cg_out);
+                  }
+                  y4m_write_frame_header(y4m_buf, sizeof(y4m_buf));
+                  fputs(y4m_buf, cg_out);
+                  y4m_write_image_file(cimg, planes, cg_out);
+                } else {
+                  raw_write_image_file(cimg, planes, num_planes_out, cg_out);
+                }
+              }
+              // Zero-fill canvas for next frame
+              for (int p = 0; p < 3; p++) {
+                unsigned int ph = avm_img_plane_height(cg->canvas, p);
+                memset(cg->canvas->planes[p], 0,
+                       (size_t)cg->canvas->stride[p] * ph);
+              }
+            }
+            continue;  // skip normal output path
+          }
+        }
+
         if (do_scale) {
           if (frame_out == 1) {
             // If the output frames are to be scaled to a fixed display size
@@ -1375,11 +1912,80 @@ static int main_loop(int argc, const char **argv_) {
     }
   }
 
+  // Write buffered frames in display order for interleaved output
+  if (flush_buf_count > 0) {
+    qsort(flush_buf, (size_t)flush_buf_count, sizeof(FlushFrame),
+          compare_flush_frames);
+    const int PLANES_YUV[] = { AVM_PLANE_Y, AVM_PLANE_U, AVM_PLANE_V };
+    const int PLANES_YVU[] = { AVM_PLANE_Y, AVM_PLANE_V, AVM_PLANE_U };
+    const int *planes = flipuv ? PLANES_YVU : PLANES_YUV;
+    for (int fi = 0; fi < flush_buf_count; fi++) {
+      avm_image_t *fimg = flush_buf[fi].img;
+      unsigned int output_bit_depth;
+      if (!fixed_output_bit_depth && single_file) {
+        output_bit_depth = fimg->bit_depth;
+      } else {
+        output_bit_depth = fixed_output_bit_depth;
+      }
+      if (output_bit_depth != 0)
+        avm_shift_img(output_bit_depth, &fimg, &img_shifted);
+
+      if (use_y4m) {
+        char y4m_buf[Y4M_BUFFER_SIZE] = { 0 };
+        if (fi == 0) {
+          // Write y4m file header for the first sorted frame
+          y4m_write_file_header(y4m_buf, sizeof(y4m_buf), fimg->d_w, fimg->d_h,
+                                &avm_input_ctx.framerate, fimg->monochrome,
+                                fimg->csp, fimg->fmt, fimg->bit_depth,
+                                fimg->range);
+          fputs(y4m_buf, outfile);
+        }
+        y4m_write_frame_header(y4m_buf, sizeof(y4m_buf));
+        fputs(y4m_buf, outfile);
+        y4m_write_image_file(fimg, planes, outfile);
+      } else {
+        int num_planes = (opt_raw && fimg->monochrome) ? 1 : 3;
+        raw_write_image_file(fimg, planes, num_planes, outfile);
+      }
+      avm_img_free(flush_buf[fi].img);
+    }
+    // frame_out was already incremented in the main loop for each
+    // buffered frame, so don't add flush_buf_count again.
+    free(flush_buf);
+    flush_buf = NULL;
+    flush_buf_count = 0;
+  }
+
   if (summary || progress) {
     show_progress(frame_in, frame_out, dx_time);
     fprintf(stderr, "\n");
   }
 
+  // Output summary report
+  if (!noblit && outfile_pattern && strcmp(outfile_pattern, "-") != 0) {
+    fprintf(stderr, "\nDecode complete:\n");
+    if (atlas_composite && comp_groups_built) {
+      for (int g = 0; g < num_comp_groups; g++) {
+        fprintf(stderr, "  Output: %s (%d frames)\n", comp_groups[g].label,
+                comp_groups[g].frame_count);
+      }
+    } else if (num_streams > 1) {
+      for (int sub = 0; sub < num_streams; sub++) {
+        char outfile_substream_name[PATH_MAX] = { 0 };
+        add_postfix_stream_id(outfile_name, outfile_substream_name, sub);
+        fprintf(stderr, "  Output: %s (%d frames)\n", outfile_substream_name,
+                substream_frame_out[sub]);
+      }
+    } else {
+      fprintf(stderr, "  Output: %s (%d frames)\n", outfile_name, frame_out);
+    }
+    if (total_decode_errors > 0) {
+      fprintf(stderr, "  Errors: %d\n", total_decode_errors);
+    } else {
+      fprintf(stderr, "  Errors: 0\n");
+    }
+  }
+
   if (frames_corrupted) {
     fprintf(stderr, "WARNING: %d frames corrupted.\n", frames_corrupted);
   } else {
@@ -1388,6 +1994,14 @@ static int main_loop(int argc, const char **argv_) {
 
 fail:
 
+  // Clean up flush buffer if we exited early
+  if (flush_buf) {
+    for (int fi = 0; fi < flush_buf_count; fi++) {
+      if (flush_buf[fi].img) avm_img_free(flush_buf[fi].img);
+    }
+    free(flush_buf);
+  }
+
   if (avm_codec_destroy(&decoder)) {
     fprintf(stderr, "Failed to destroy decoder: %s\n",
             avm_codec_error(&decoder));
@@ -1433,6 +2047,15 @@ static int main_loop(int argc, const char **argv_) {
 
   if (scaled_img) avm_img_free(scaled_img);
   if (img_shifted) avm_img_free(img_shifted);
+  if (comp_groups) {
+    for (int g = 0; g < num_comp_groups; g++) {
+      if (comp_groups[g].canvas) avm_img_free(comp_groups[g].canvas);
+      // Close per-group files (but not if it's the shared main outfile)
+      if (comp_groups[g].outfile_cg && comp_groups[g].outfile_cg != outfile)
+        fclose(comp_groups[g].outfile_cg);
+    }
+    free(comp_groups);
+  }
 
   for (i = 0; i < ext_fb_list.num_external_frame_buffers; ++i) {
     free(ext_fb_list.ext_fb[i].data);
diff --git a/apps/avmenc.c b/apps/avmenc.c
index 6375df5d8a..257dba557a 100644
--- a/apps/avmenc.c
+++ b/apps/avmenc.c
@@ -40,6 +40,8 @@
 #include "common/tools_common.h"
 #include "common/warnings.h"
 #include "av2/common/blockd.h"
+#include "common/xlayer_config_parse.h"
+#include "apps/avmenc_xlayer.h"
 
 #if CONFIG_WEBM_IO
 #include "common/webmenc.h"
@@ -136,6 +138,7 @@ const arg_def_t *main_args[] = { &g_av2_codec_arg_defs.help,
                                  &g_av2_codec_arg_defs.debugmode,
                                  &g_av2_codec_arg_defs.outputfile,
                                  &g_av2_codec_arg_defs.reconfile,
+                                 &g_av2_codec_arg_defs.xlayer_config,
                                  &g_av2_codec_arg_defs.codecarg,
                                  &g_av2_codec_arg_defs.passes,
                                  &g_av2_codec_arg_defs.pass_arg,
@@ -737,6 +740,8 @@ static void parse_global_config(struct AvxEncoderConfig *global, char ***argv) {
       global->disable_warning_prompt = 1;
     } else if (arg_match(&arg, &g_av2_codec_arg_defs.icc_file, argi)) {
       read_icc_profile(global, arg.val);
+    } else if (arg_match(&arg, &g_av2_codec_arg_defs.xlayer_config, argi)) {
+      global->xlayer_config_path = arg.val;
     } else {
       argj++;
     }
@@ -1624,12 +1629,7 @@ static void setup_pass(struct stream_state *stream,
 static void initialize_encoder(struct stream_state *stream,
                                struct AvxEncoderConfig *global) {
   int i;
-  int flags = 0;
-
-  flags |= (global->show_psnr >= 1) ? AVM_CODEC_USE_PSNR : 0;
-  flags |= (global->show_psnr == 2) ? AVM_CODEC_USE_STREAM_PSNR : 0;
-  flags |= global->quiet ? 0 : AVM_CODEC_USE_PER_FRAME_STATS;
-  flags |= global->verbose ? AVM_CODEC_USE_PER_FRAME_HLS_INFO : 0;
+  int flags = avx_encoder_init_flags(global);
 
   /* Construct Encoder Context */
   avm_codec_enc_init(&stream->encoder, global->codec, &stream->config.cfg,
@@ -1907,40 +1907,46 @@ static float usec_to_fps(uint64_t usec, unsigned int frames) {
 }
 
 static void write_recon_file(struct stream_state *stream, FILE *file) {
-  avm_image_t enc_img;
-
-  AVM_CODEC_CONTROL_TYPECHECKED(&stream->encoder, AV2_GET_NEW_FRAME_IMAGE,
-                                &enc_img);
+  const avm_image_t *enc_img = avm_codec_get_preview_frame(&stream->encoder);
 
-  ctx_exit_on_error(&stream->encoder,
-                    "Failed to get encoder reconstructed frame");
+  if (!enc_img) {
+    ctx_exit_on_error(&stream->encoder,
+                      "Failed to get encoder reconstructed frame");
+    return;
+  }
 
-  int num_planes = enc_img.monochrome ? 1 : 3;
+  int num_planes = enc_img->monochrome ? 1 : 3;
   const int PLANES_YUV[] = { AVM_PLANE_Y, AVM_PLANE_U, AVM_PLANE_V };
   const int *planes = PLANES_YUV;
-  raw_write_image_file(&enc_img, planes, num_planes, file);
+  raw_write_image_file(enc_img, planes, num_planes, file);
 }
 
 static void test_decode(struct stream_state *stream,
                         enum TestDecodeFatality fatal) {
-  avm_image_t enc_img, dec_img;
+  avm_image_t dec_img;
 
-  // fprintf(stderr, "DEBUG: Running test_decode at POC: %d\n",
-  //         stream->frames_out - 1);
   if (stream->mismatch_seen) return;
 
-  /* Get the internal reference frame */
-  AVM_CODEC_CONTROL_TYPECHECKED(&stream->encoder, AV2_GET_NEW_FRAME_IMAGE,
-                                &enc_img);
+  /* Get the internal reference frame from the encoder via preview API.
+   * AV2_GET_NEW_FRAME_IMAGE relies on last_show_frame_buf which is only set
+   * for immediate-output frames, so it fails for hidden frames encoded with
+   * SEF mode.  The preview API accesses cm->cur_frame directly and always
+   * works. */
+  const avm_image_t *enc_img = avm_codec_get_preview_frame(&stream->encoder);
+
   AVM_CODEC_CONTROL_TYPECHECKED(&stream->decoder, AV2_GET_NEW_FRAME_IMAGE,
                                 &dec_img);
 
-  ctx_exit_on_error(&stream->encoder, "Failed to get encoder reference frame");
+  if (!enc_img) {
+    ctx_exit_on_error(&stream->encoder,
+                      "Failed to get encoder reference frame");
+    return;
+  }
   ctx_exit_on_error(&stream->decoder, "Failed to get decoder reference frame");
 
-  if (!avm_compare_img(&enc_img, &dec_img)) {
+  if (!avm_compare_img(enc_img, &dec_img)) {
     int y[4], u[4], v[4];
-    avm_find_mismatch_high(&enc_img, &dec_img, y, u, v);
+    avm_find_mismatch_high(enc_img, &dec_img, y, u, v);
     stream->decoder.err = 1;
     warn_or_exit_on_error(&stream->decoder, fatal == TEST_DECODE_FATAL,
                           "Stream %d: Encode/decode mismatch on POC %d at"
@@ -1953,7 +1959,6 @@ static void test_decode(struct stream_state *stream,
     stream->mismatch_seen = stream->frames_out;
   }
 
-  avm_img_free(&enc_img);
   avm_img_free(&dec_img);
 }
 
@@ -1991,6 +1996,35 @@ int main(int argc, const char **argv_) {
 
   if (argc < 2) usage_exit();
 
+  // Multi-xlayer encoding: dispatch to separate path if config is provided
+  if (global.xlayer_config_path != NULL) {
+    // Warn about unconsumed CLI args that will be ignored in xlayer mode
+    for (argi = argv; *argi; argi++) {
+      if (argi[0][0] == '-' && argi[0][1])
+        warn(
+            "option \"%s\" ignored in xlayer mode "
+            "(use JSON config instead)",
+            *argi);
+    }
+
+    MultiXLayerConfig mcfg;
+    if (parse_multi_xlayer_config(global.xlayer_config_path, &mcfg) != 0) {
+      die("Error: failed to parse xlayer config \"%s\"\n",
+          global.xlayer_config_path);
+    }
+    if (resolve_input_sources(&mcfg) != 0) {
+      die("Error: failed to resolve input sources in xlayer config \"%s\"\n",
+          global.xlayer_config_path);
+    }
+    resolve_mlayer_ci(&mcfg);
+    if (validate_multi_xlayer_config(&mcfg) != 0) {
+      die("Error: invalid xlayer config \"%s\"\n", global.xlayer_config_path);
+    }
+    res = encode_multi_xlayer(&mcfg, &global);
+    free(argv);
+    return res;
+  }
+
   switch (global.color_type) {
     case I420: input.fmt = AVM_IMG_FMT_I420; break;
     case I422: input.fmt = AVM_IMG_FMT_I422; break;
diff --git a/apps/avmenc.h b/apps/avmenc.h
index 17c9f94613..1ca93f9ede 100644
--- a/apps/avmenc.h
+++ b/apps/avmenc.h
@@ -52,8 +52,20 @@ struct AvxEncoderConfig {
   int experimental_bitstream;
   avm_chroma_sample_position_t csp;
   cfg_options_t encoder_config;
+  const char *xlayer_config_path;  // Path to multi-xlayer JSON config
 };
 
+// Compute encoder init flags from global config (used by both single-stream
+// and multi-xlayer paths).
+static inline int avx_encoder_init_flags(const struct AvxEncoderConfig *cfg) {
+  int flags = 0;
+  flags |= (cfg->show_psnr >= 1) ? AVM_CODEC_USE_PSNR : 0;
+  flags |= (cfg->show_psnr == 2) ? AVM_CODEC_USE_STREAM_PSNR : 0;
+  flags |= cfg->quiet ? 0 : AVM_CODEC_USE_PER_FRAME_STATS;
+  flags |= cfg->verbose ? AVM_CODEC_USE_PER_FRAME_HLS_INFO : 0;
+  return flags;
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/apps/avmenc_xlayer.c b/apps/avmenc_xlayer.c
new file mode 100644
index 0000000000..9c373e848e
--- /dev/null
+++ b/apps/avmenc_xlayer.c
@@ -0,0 +1,1163 @@
+/*
+ * Copyright (c) 2025, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 3-Clause Clear License
+ * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
+ * License was not distributed with this source code in the LICENSE file, you
+ * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/.  If the
+ * Alliance for Open Media Patent License 1.0 was not distributed with this
+ * source code in the PATENTS file, you can obtain it at
+ * aomedia.org/license/patent-license/.
+ */
+
+#include "apps/avmenc_xlayer.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "avm/avm_encoder.h"
+#include "avm/avm_integer.h"
+#include "avm/avmcx.h"
+#include "avm_ports/avm_timer.h"
+#include "common/tools_common.h"
+#include "common/y4minput.h"
+
+// Shared source reader for subpicture encoding from a single input
+typedef struct SharedSourceReader {
+  struct AvxInputContext input;
+  avm_image_t raw;  // full-resolution frame
+  int initialized;
+  int eof;
+} SharedSourceReader;
+
+// Open a file and detect its type (Y4M or raw YUV).
+// On success, populates input->file, file_type, and (for Y4M) dimensions,
+// framerate, format, bit_depth, and color_range. Returns 0 on success, -1
+// on error.
+static int open_and_detect_input(struct AvxInputContext *input,
+                                 const char *filename) {
+  input->file = fopen(filename, "rb");
+  if (!input->file) {
+    fprintf(stderr, "Error: cannot open input file \"%s\"\n", filename);
+    return -1;
+  }
+
+  struct FileTypeDetectionBuffer *detect = &input->detect;
+  detect->buf_read = (int)fread(detect->buf, 1, 4, input->file);
+  detect->position = 0;
+
+  if (detect->buf_read >= 4 && memcmp(detect->buf, "YUV4", 4) == 0) {
+    input->file_type = FILE_TYPE_Y4M;
+    y4m_input_open(&input->y4m, input->file, (char *)detect->buf, 4,
+                   AVM_CSP_UNSPECIFIED, 0);
+    input->width = input->y4m.pic_w;
+    input->height = input->y4m.pic_h;
+    input->framerate.numerator = input->y4m.fps_n;
+    input->framerate.denominator = input->y4m.fps_d;
+    input->fmt = input->y4m.avm_fmt;
+    input->bit_depth = input->y4m.bit_depth;
+    input->color_range = input->y4m.color_range;
+  } else {
+    input->file_type = FILE_TYPE_RAW;
+    fseek(input->file, 0, SEEK_SET);
+    // Reset detect buffer so read_yuv_frame doesn't replay detection bytes
+    detect->buf_read = 0;
+    detect->position = 0;
+  }
+  return 0;
+}
+
+static int shared_source_init(SharedSourceReader *src,
+                              const InputSourceConfig *inp,
+                              const MultiXLayerConfig *mcfg) {
+  memset(src, 0, sizeof(*src));
+  if (inp->filename[0] == '\0') return 0;
+
+  src->input.filename = inp->filename;
+  src->input.framerate.numerator = 30;
+  src->input.framerate.denominator = 1;
+  src->input.only_i420 = 0;
+  src->input.bit_depth = 0;
+
+  if (open_and_detect_input(&src->input, inp->filename) != 0) return -1;
+
+  if (src->input.file_type == FILE_TYPE_RAW) {
+    // Use config-specified dimensions for raw input
+    src->input.width = inp->width;
+    src->input.height = inp->height;
+    src->input.fmt = AVM_IMG_FMT_I420;
+  }
+
+  // Override dimensions from config if specified
+  if (inp->width > 0) src->input.width = inp->width;
+  if (inp->height > 0) src->input.height = inp->height;
+
+  // Apply explicit format/bit_depth (overrides Y4M detection too)
+  if (inp->format == 422)
+    src->input.fmt = AVM_IMG_FMT_I422;
+  else if (inp->format == 444)
+    src->input.fmt = AVM_IMG_FMT_I444;
+  else if (inp->format == 420)
+    src->input.fmt = AVM_IMG_FMT_I420;
+
+  if (inp->bit_depth > 0) src->input.bit_depth = inp->bit_depth;
+
+  // If format still unknown, derive from the first xlayer using this source
+  if (src->input.fmt == 0) {
+    // Find first xlayer referencing this input source
+    int src_idx = (int)(inp - mcfg->input_sources);
+    for (int i = 0; i < mcfg->num_xlayers; i++) {
+      if (mcfg->xlayers[i].input_source_idx == src_idx) {
+        switch (mcfg->xlayers[i].profile) {
+          case MAIN_422_10_IP1: src->input.fmt = AVM_IMG_FMT_I422; break;
+          case MAIN_444_10_IP1: src->input.fmt = AVM_IMG_FMT_I444; break;
+          default: src->input.fmt = AVM_IMG_FMT_I420; break;
+        }
+        break;
+      }
+    }
+    if (src->input.fmt == 0) src->input.fmt = AVM_IMG_FMT_I420;
+  }
+
+  // Allocate full-resolution raw frame
+  if (src->input.file_type != FILE_TYPE_Y4M) {
+    if (!avm_img_alloc(&src->raw, src->input.fmt, src->input.width,
+                       src->input.height, 32)) {
+      fprintf(stderr, "Error: failed to allocate shared source image\n");
+      return -1;
+    }
+  }
+
+  src->initialized = 1;
+  fprintf(stderr, "Input source \"%s\": %ux%u, \"%s\"\n", inp->name,
+          src->input.width, src->input.height, inp->filename);
+  return 0;
+}
+
+// Read one full-resolution frame from shared source. Returns 1 if available.
+static int shared_source_read_frame(SharedSourceReader *src) {
+  if (!src->initialized || src->eof) return 0;
+
+  int frame_avail;
+  if (src->input.file_type == FILE_TYPE_Y4M) {
+    frame_avail = (y4m_input_fetch_frame(&src->input.y4m, src->input.file,
+                                         &src->raw) >= 1);
+  } else {
+    frame_avail = (read_yuv_frame(&src->input, &src->raw) == 0);
+  }
+
+  if (!frame_avail) {
+    src->eof = 1;
+    return 0;
+  }
+  return 1;
+}
+
+// Crop a region from the shared source into an xlayer's raw buffer.
+// Copies the rectangle at (pos_x, pos_y) with size (crop_w, crop_h)
+// from src_img into dst_img.
+static void crop_region_to_xlayer(avm_image_t *dst_img,
+                                  const avm_image_t *src_img, int pos_x,
+                                  int pos_y, unsigned int crop_w,
+                                  unsigned int crop_h) {
+  for (int plane = 0; plane < 3; plane++) {
+    int sx = pos_x;
+    int sy = pos_y;
+    unsigned int cw = crop_w;
+    unsigned int ch = crop_h;
+    int bytes_per_sample = 1;
+
+    if (src_img->fmt & AVM_IMG_FMT_HIGHBITDEPTH) bytes_per_sample = 2;
+
+    if (plane > 0) {
+      sx >>= (int)src_img->x_chroma_shift;
+      sy >>= (int)src_img->y_chroma_shift;
+      cw >>= src_img->x_chroma_shift;
+      ch >>= src_img->y_chroma_shift;
+    }
+
+    const unsigned char *src_row = src_img->planes[plane] +
+                                   sy * src_img->stride[plane] +
+                                   sx * bytes_per_sample;
+    unsigned char *dst_row = dst_img->planes[plane];
+    unsigned int row_bytes = cw * (unsigned int)bytes_per_sample;
+
+    for (unsigned int y = 0; y < ch; y++) {
+      memcpy(dst_row, src_row, row_bytes);
+      src_row += src_img->stride[plane];
+      dst_row += dst_img->stride[plane];
+    }
+  }
+}
+
+static void shared_source_destroy(SharedSourceReader *src) {
+  if (!src->initialized) return;
+  if (src->input.file) fclose(src->input.file);
+  avm_img_free(&src->raw);
+  src->initialized = 0;
+}
+
+// Forward declaration — defined after get_frame_to_encode
+static int mlayer_crop_differs(const XLayerEncConfig *xlcfg, int ml);
+
+// Initialize a single xlayer encoder from its config entry.
+// Uses the global config for defaults that aren't overridden per-layer.
+// When use_shared_source is true, input file opening is skipped (source is
+// provided externally via crop_region_to_xlayer).
+static int init_xlayer_encoder(XLayerEncoderState *state,
+                               const XLayerEncConfig *xlcfg,
+                               const MultiXLayerConfig *mcfg,
+                               const struct AvxEncoderConfig *global,
+                               int use_shared_source) {
+  avm_codec_iface_t *iface = get_avm_encoder_by_short_name("av2");
+  if (!iface) {
+    fprintf(stderr, "Error: AV2 encoder not available\n");
+    return -1;
+  }
+
+  state->xlayer_id = xlcfg->xlayer_id;
+  state->frames_out = 0;
+  state->frame_count = 0;
+  state->cx_time = 0;
+  state->eof = 0;
+  state->allocated_raw_shift = 0;
+  state->input_shift = 0;
+
+  // Open input file (skip when using shared source — frames come from crop)
+  if (!use_shared_source) {
+    memset(&state->input, 0, sizeof(state->input));
+    state->input.filename = xlcfg->input_filename;
+    state->input.framerate.numerator = 30;
+    state->input.framerate.denominator = 1;
+    state->input.only_i420 = 0;
+    state->input.bit_depth = 0;
+
+    if (open_and_detect_input(&state->input, xlcfg->input_filename) != 0) {
+      fprintf(stderr, "Error: failed to open input for xlayer %d\n",
+              xlcfg->xlayer_id);
+      return -1;
+    }
+
+    // Override dimensions from config if specified
+    if (xlcfg->width > 0) state->input.width = xlcfg->width;
+    if (xlcfg->height > 0) state->input.height = xlcfg->height;
+    if (state->input.fmt == 0)
+      state->input.fmt = AVM_IMG_FMT_I420;  // default, profile may override
+  } else {
+    // Shared source mode: dimensions come from xlayer config
+    memset(&state->input, 0, sizeof(state->input));
+    state->input.width = xlcfg->width;
+    state->input.height = xlcfg->height;
+    state->input.fmt = AVM_IMG_FMT_I420;  // default, profile may override
+    state->input.framerate.numerator = 30;
+    state->input.framerate.denominator = 1;
+  }
+
+  // Derive input image format from profile (chroma subsampling)
+  switch (xlcfg->profile) {
+    case MAIN_422_10_IP1: state->input.fmt = AVM_IMG_FMT_I422; break;
+    case MAIN_444_10_IP1: state->input.fmt = AVM_IMG_FMT_I444; break;
+    default:  // MAIN_420_10_IP0..MAIN_420_10: 4:2:0
+      // Keep whatever was detected from file, or default I420
+      if (state->input.fmt != AVM_IMG_FMT_I422 &&
+          state->input.fmt != AVM_IMG_FMT_I444)
+        state->input.fmt = AVM_IMG_FMT_I420;
+      break;
+  }
+
+  // Get default encoder config
+  avm_codec_err_t res = avm_codec_enc_config_default(iface, &state->cfg, 0);
+  if (res) {
+    fprintf(stderr, "Error: failed to get default config for xlayer %d\n",
+            xlcfg->xlayer_id);
+    return -1;
+  }
+
+  // Set dimensions
+  state->cfg.g_w = state->input.width;
+  state->cfg.g_h = state->input.height;
+
+  // Set timebase from input framerate (or global).
+  // When using a named input source with an explicit frame rate, use that
+  // rate so the encoder's internal timing matches the source content rate.
+  if (xlcfg->input_source_idx >= 0 &&
+      mcfg->input_sources[xlcfg->input_source_idx].frame_rate_num > 0) {
+    state->cfg.g_timebase.num =
+        mcfg->input_sources[xlcfg->input_source_idx].frame_rate_den;
+    state->cfg.g_timebase.den =
+        mcfg->input_sources[xlcfg->input_source_idx].frame_rate_num;
+  } else if (global->have_framerate) {
+    state->cfg.g_timebase.num = global->framerate.den;
+    state->cfg.g_timebase.den = global->framerate.num;
+  } else {
+    state->cfg.g_timebase.num = state->input.framerate.denominator;
+    state->cfg.g_timebase.den = state->input.framerate.numerator;
+  }
+
+  // Set profile
+  state->cfg.g_profile = xlcfg->profile;
+
+  // Set rate control: use QP if specified, otherwise use global settings
+  if (xlcfg->qp >= 0) {
+    state->cfg.rc_end_usage = AVM_Q;
+    // use_fixed_qp_offsets=1 tells the rate control to honor the specified QP
+    // directly, bypassing adaptive KF quality boosting that would otherwise
+    // ignore the QP and encode keyframes at minimum quantizer.
+    state->cfg.use_fixed_qp_offsets = 1;
+    state->cfg.rc_min_quantizer = 0;
+    state->cfg.rc_max_quantizer = 255;
+  } else if (xlcfg->bitrate > 0) {
+    state->cfg.rc_end_usage = AVM_VBR;
+    state->cfg.rc_target_bitrate = xlcfg->bitrate;
+  }
+
+  // Set lag_in_frames
+  if (xlcfg->lag_in_frames >= 0) {
+    state->cfg.g_lag_in_frames = xlcfg->lag_in_frames;
+  }
+
+  // Set keyframe interval.
+  // For multi-mlayer xlayers with lag_in_frames == 0, disable encoder-internal
+  // keyframe placement because the encoder's keyframe counter advances per
+  // encode call (not per TU), causing misaligned keyframes across mlayers.
+  // The xlayer encode loop manages keyframes externally via AVM_EFLAG_FORCE_KF.
+  // For multi-mlayer with lag_in_frames > 0, use multi_layers_lag_test which
+  // fixes the per-encode-call counter and enables forward keyframe support.
+  if (xlcfg->num_embedded_layers > 1 && state->cfg.g_lag_in_frames == 0) {
+    state->cfg.kf_mode = AVM_KF_DISABLED;
+    // Set kf_max_dist to the spec conformance limit for display_order_hint:
+    // get_disp_order_hint must return < (1 << (DISPLAY_ORDER_HINT_BITS - 1)).
+    state->cfg.kf_max_dist = (1 << 29);
+  } else if (xlcfg->kf_max_dist >= 0) {
+    state->cfg.kf_max_dist = xlcfg->kf_max_dist;
+  }
+
+  // Enable LCR and OPS based on config
+  state->cfg.enable_lcr =
+      (mcfg->enable_global_lcr || mcfg->enable_local_lcr) ? 1 : 0;
+  if (mcfg->num_ops_sets > 0) {
+    state->cfg.enable_ops = 1;
+  }
+
+  // Set bit depth based on profile (all standard AV2 profiles are 10-bit)
+  state->cfg.g_bit_depth = AVM_BITS_10;
+#if CONFIG_TESTONLY_12BIT_SUPPORT
+  if (xlcfg->profile == TEST_ONLY_12BIT_PROFILE)
+    state->cfg.g_bit_depth = AVM_BITS_12;
+#endif
+  state->cfg.g_input_bit_depth =
+      state->input.bit_depth > 0 ? state->input.bit_depth : 8;
+  state->input_shift =
+      (int)state->cfg.g_bit_depth - (int)state->cfg.g_input_bit_depth;
+
+  // Set fwd_kf_enabled from GOP mode (must be set before encoder init)
+  {
+    int fwd_kf = 0;
+    switch (xlcfg->gop_mode) {
+      case 1:  // open_leading
+      case 2:  // open_sef
+        fwd_kf = 1;
+        break;
+      default:  // 0 = closed
+        fwd_kf = 0;
+        break;
+    }
+    if (xlcfg->fwd_kf_enabled >= 0) fwd_kf = xlcfg->fwd_kf_enabled;
+    state->cfg.fwd_kf_enabled = fwd_kf;
+  }
+
+  // Set S-Frame pre-init config fields
+  if (xlcfg->sframe_dist >= 0) {
+    state->cfg.sframe_dist = (unsigned int)xlcfg->sframe_dist;
+  }
+  if (xlcfg->sframe_mode >= 0) {
+    state->cfg.sframe_mode = (unsigned int)xlcfg->sframe_mode;
+  }
+  if (xlcfg->sframe_type >= 0) {
+    state->cfg.sframe_type = (unsigned int)xlcfg->sframe_type;
+  }
+
+  // Initialize encoder with reporting flags matching single-stream path
+  int flags = avx_encoder_init_flags(global);
+  res = avm_codec_enc_init(&state->encoder, iface, &state->cfg, flags);
+  if (res) {
+    fprintf(stderr, "Error: encoder init failed for xlayer %d: %s\n",
+            xlcfg->xlayer_id, avm_codec_error(&state->encoder));
+    return -1;
+  }
+
+  // Apply encoder controls
+  int cpu = xlcfg->cpu_used >= 0 ? xlcfg->cpu_used : 5;
+  avm_codec_control(&state->encoder, AVME_SET_CPUUSED, cpu);
+  avm_codec_control(&state->encoder, AVME_SET_XLAYER_ID, xlcfg->xlayer_id);
+
+  // Set QP via codec control (not via rc_min/max_quantizer)
+  if (xlcfg->qp >= 0) {
+    avm_codec_control(&state->encoder, AVME_SET_QP, (unsigned int)xlcfg->qp);
+  }
+
+  // Apply GOP mode controls (post-init codec controls)
+  // Note: kf_filt (keyframe filtering) is independent of GOP mode. The first
+  // frame is always a displayed CLK. For open GOP modes, fwd_kf_enabled=1
+  // (set pre-init above) causes subsequent keyframes to be OLK. In AV2,
+  // OLK frames can be displayed directly — they do not need to be hidden.
+  // kf_filt can be set separately via the "enable_keyframe_filtering" config.
+  {
+    int kf_filt = 0, sef_hidden = 0, intra_only_fwd = 0;
+    switch (xlcfg->gop_mode) {
+      case 1:  // open_leading: OLK at subsequent GOP boundaries
+        sef_hidden = 0;
+        break;
+      case 2:  // open_sef (monotonic: hidden INTRA_ONLY_FRAME + SEF)
+        sef_hidden = 1;
+        if (mcfg->monotonic_output_order) intra_only_fwd = 1;
+        break;
+      default:  // 0 = closed
+        sef_hidden = 0;
+        break;
+    }
+    // Monotonic output requires SEF for all hidden frames — implicit output
+    // is not allowed when monotonic_output_order_flag is set.
+    if (mcfg->monotonic_output_order) sef_hidden = 1;
+
+    if (xlcfg->enable_keyframe_filtering >= 0)
+      kf_filt = xlcfg->enable_keyframe_filtering;
+    if (xlcfg->add_sef_for_hidden_frames >= 0)
+      sef_hidden = xlcfg->add_sef_for_hidden_frames;
+
+    avm_codec_control(&state->encoder, AV2E_SET_ENABLE_KEYFRAME_FILTERING,
+                      (unsigned int)kf_filt);
+    avm_codec_control(&state->encoder, AV2E_SET_ADD_SEF_FOR_HIDDEN_FRAMES,
+                      sef_hidden);
+    if (intra_only_fwd) {
+      avm_codec_control(&state->encoder, AV2E_SET_INTRA_ONLY_FWD_KF, 1);
+    }
+  }
+
+  // Enable multi_layers_lag_test for multi-mlayer with lag > 0.
+  // This fixes per-encode-call keyframe counting and GF group management.
+  // The GF interval must be set to (lag - 1) / num_mlayers to account for
+  // mlayer interleaving in the lookahead — each source frame generates
+  // num_mlayers encode calls, so the effective lag in source frames is
+  // lag / num_mlayers.  Without this, the GF group is too large for the
+  // lookahead and the encoder never produces output beyond the keyframe.
+  if (xlcfg->num_embedded_layers > 1 && state->cfg.g_lag_in_frames > 0) {
+    avm_codec_control(&state->encoder,
+                      AV2E_SET_ENABLE_FLAG_MULTI_LAYER_LAG_TEST, 1);
+    int gop_size =
+        (state->cfg.g_lag_in_frames - 1) / xlcfg->num_embedded_layers;
+    avm_codec_control(&state->encoder, AV2E_SET_MIN_GF_INTERVAL, gop_size);
+    avm_codec_control(&state->encoder, AV2E_SET_MAX_GF_INTERVAL, gop_size);
+  }
+
+  if (xlcfg->num_embedded_layers > 1) {
+    avm_codec_control(&state->encoder, AVME_SET_NUMBER_MLAYERS,
+                      xlcfg->num_embedded_layers);
+  }
+  if (xlcfg->num_temporal_layers > 1) {
+    avm_codec_control(&state->encoder, AVME_SET_NUMBER_TLAYERS,
+                      xlcfg->num_temporal_layers);
+  }
+
+  if (mcfg->monotonic_output_order) {
+    avm_codec_control(&state->encoder, AV2E_SET_MONOTONIC_OUTPUT_ORDER, 1);
+  }
+
+  // Propagate xlayer-level color configuration to the encoder.
+  // These were previously parsed from JSON but never applied.
+  if (xlcfg->color_primaries >= 0)
+    avm_codec_control(&state->encoder, AV2E_SET_COLOR_PRIMARIES,
+                      (unsigned int)xlcfg->color_primaries);
+  if (xlcfg->transfer_characteristics >= 0)
+    avm_codec_control(&state->encoder, AV2E_SET_TRANSFER_CHARACTERISTICS,
+                      (unsigned int)xlcfg->transfer_characteristics);
+  if (xlcfg->matrix_coefficients >= 0)
+    avm_codec_control(&state->encoder, AV2E_SET_MATRIX_COEFFICIENTS,
+                      (unsigned int)xlcfg->matrix_coefficients);
+  if (xlcfg->full_range_flag >= 0)
+    avm_codec_control(&state->encoder, AV2E_SET_COLOR_RANGE,
+                      (unsigned int)xlcfg->full_range_flag);
+
+  // Apply per-mlayer CI overrides (after resolving inheritance from xlayer).
+  // Only set controls for mlayers whose CI differs from the xlayer base.
+  for (int m = 0; m < xlcfg->num_embedded_layers; m++) {
+    const MLayerSourceConfig *ms = &xlcfg->mlayer_sources[m];
+    if (ms->color_primaries >= 0 &&
+        ms->color_primaries != xlcfg->color_primaries)
+      avm_codec_control(&state->encoder, AV2E_SET_MLAYER_COLOR_PRIMARIES,
+                        (unsigned int)m, (unsigned int)ms->color_primaries);
+    if (ms->transfer_characteristics >= 0 &&
+        ms->transfer_characteristics != xlcfg->transfer_characteristics)
+      avm_codec_control(
+          &state->encoder, AV2E_SET_MLAYER_TRANSFER_CHARACTERISTICS,
+          (unsigned int)m, (unsigned int)ms->transfer_characteristics);
+    if (ms->matrix_coefficients >= 0 &&
+        ms->matrix_coefficients != xlcfg->matrix_coefficients)
+      avm_codec_control(&state->encoder, AV2E_SET_MLAYER_MATRIX_COEFFICIENTS,
+                        (unsigned int)m, (unsigned int)ms->matrix_coefficients);
+    if (ms->full_range_flag >= 0 &&
+        ms->full_range_flag != xlcfg->full_range_flag)
+      avm_codec_control(&state->encoder, AV2E_SET_MLAYER_COLOR_RANGE,
+                        (unsigned int)m, (unsigned int)ms->full_range_flag);
+  }
+
+  // Apply mlayer dependency controls
+  if (xlcfg->has_mlayer_dependencies) {
+    avm_codec_control(&state->encoder, AV2E_SET_MLAYER_DEPENDENCY_PRESENT,
+                      (unsigned int)1);
+    for (int m = 0; m < xlcfg->num_embedded_layers; m++) {
+      unsigned int mask =
+          (unsigned int)resolve_mlayer_dep_mask(&xlcfg->mlayer_sources[m], m);
+      avm_codec_control(&state->encoder, AV2E_SET_MLAYER_DEPENDENCY_MAP,
+                        (unsigned int)m, mask);
+    }
+  }
+
+  // Apply per-xlayer sub-GOP config if specified
+  if (xlcfg->subgop_config_path[0] != '\0') {
+    avm_codec_control(&state->encoder, AV2E_SET_SUBGOP_CONFIG_PATH,
+                      xlcfg->subgop_config_path);
+  }
+
+  // Apply generic codec controls from JSON "codec_controls" array.
+  // Each control is a (name, value) pair mapped to an AV2E_SET_* control ID.
+  {
+    static const struct {
+      const char *name;
+      int ctrl_id;
+    } ctrl_map[] = {
+      { "enable_deblocking", AV2E_SET_ENABLE_DEBLOCKING },
+      { "enable_cdef", AV2E_SET_ENABLE_CDEF },
+      { "enable_restoration", AV2E_SET_ENABLE_RESTORATION },
+      { "enable_tpl_model", AV2E_SET_ENABLE_TPL_MODEL },
+      { "enable_keyframe_filtering", AV2E_SET_ENABLE_KEYFRAME_FILTERING },
+      { "enable_global_motion", AV2E_SET_ENABLE_GLOBAL_MOTION },
+      { "enable_warped_motion", AV2E_SET_ENABLE_WARPED_MOTION },
+      { "enable_intrabc", AV2E_SET_ENABLE_INTRABC },
+      { "enable_palette", AV2E_SET_ENABLE_PALETTE },
+      { "enable_interintra_comp", AV2E_SET_ENABLE_INTERINTRA_COMP },
+      { "enable_smooth_interintra", AV2E_SET_ENABLE_SMOOTH_INTERINTRA },
+      { "enable_interintra_wedge", AV2E_SET_ENABLE_INTERINTRA_WEDGE },
+      { "enable_onesided_comp", AV2E_SET_ENABLE_ONESIDED_COMP },
+      { "enable_masked_comp", AV2E_SET_ENABLE_MASKED_COMP },
+      { "enable_diff_wtd_comp", AV2E_SET_ENABLE_DIFF_WTD_COMP },
+      { "enable_interinter_wedge", AV2E_SET_ENABLE_INTERINTER_WEDGE },
+      { "enable_ref_frame_mvs", AV2E_SET_ENABLE_REF_FRAME_MVS },
+      { "enable_overlay", AV2E_SET_ENABLE_OVERLAY },
+      { "enable_angle_delta", AV2E_SET_ENABLE_ANGLE_DELTA },
+    };
+    static const int num_ctrl_map =
+        (int)(sizeof(ctrl_map) / sizeof(ctrl_map[0]));
+
+    for (int c = 0; c < xlcfg->num_codec_controls; c++) {
+      const char *name = xlcfg->codec_controls[c].name;
+      int value = xlcfg->codec_controls[c].value;
+      int found = 0;
+      for (int k = 0; k < num_ctrl_map; k++) {
+        if (strcmp(name, ctrl_map[k].name) == 0) {
+          avm_codec_control(&state->encoder, ctrl_map[k].ctrl_id, value);
+          found = 1;
+          break;
+        }
+      }
+      if (!found) {
+        fprintf(stderr,
+                "Warning: xlayer %d unknown codec_control \"%s\" (ignored)\n",
+                xlcfg->xlayer_id, name);
+      }
+    }
+  }
+
+  // Allocate raw frame buffer
+  if (use_shared_source || state->input.file_type != FILE_TYPE_Y4M) {
+    if (!avm_img_alloc(&state->raw, state->input.fmt, state->input.width,
+                       state->input.height, 32)) {
+      fprintf(stderr, "Error: failed to allocate image for xlayer %d\n",
+              xlcfg->xlayer_id);
+      return -1;
+    }
+  } else {
+    memset(&state->raw, 0, sizeof(state->raw));
+  }
+
+  // Allocate per-mlayer raw frame buffers for mlayers with their own source
+  if (xlcfg->has_per_mlayer_sources) {
+    for (int m = 0; m < xlcfg->num_embedded_layers; m++) {
+      const MLayerSourceConfig *ms = &xlcfg->mlayer_sources[m];
+      if (ms->input_source_idx >= 0 &&
+          (ms->input_source_idx != xlcfg->input_source_idx ||
+           mlayer_crop_differs(xlcfg, m))) {
+        unsigned int mw = ms->width > 0 ? ms->width : state->input.width;
+        unsigned int mh = ms->height > 0 ? ms->height : state->input.height;
+        if (!avm_img_alloc(&state->mlayer_raw[m], state->input.fmt, mw, mh,
+                           32)) {
+          fprintf(stderr,
+                  "Error: failed to allocate mlayer %d image for xlayer %d\n",
+                  m, xlcfg->xlayer_id);
+          return -1;
+        }
+        state->mlayer_raw_allocated[m] = 1;
+      }
+    }
+  }
+
+  fprintf(stderr, "Initialized xlayer %d: %ux%u%s\n", xlcfg->xlayer_id,
+          state->input.width, state->input.height,
+          use_shared_source ? " (shared source)" : "");
+
+  return 0;
+}
+
+// Read one frame from an xlayer's input. Returns 1 if a frame is available.
+static int read_xlayer_frame(XLayerEncoderState *state) {
+  if (state->eof) return 0;
+
+  int frame_avail;
+  if (state->input.file_type == FILE_TYPE_Y4M) {
+    frame_avail = (y4m_input_fetch_frame(&state->input.y4m, state->input.file,
+                                         &state->raw) >= 1);
+  } else {
+    frame_avail = (read_yuv_frame(&state->input, &state->raw) == 0);
+  }
+
+  if (!frame_avail) {
+    state->eof = 1;
+    return 0;
+  }
+  return 1;
+}
+
+// Upshift a raw frame to the encoder's internal bit depth if needed.
+// Lazily allocates the shift buffer on first use.  Returns the frame
+// pointer the encoder should consume (either the original or shifted).
+static avm_image_t *upshift_frame_if_needed(avm_image_t *raw,
+                                            avm_image_t *raw_shift,
+                                            int *allocated_shift,
+                                            int input_shift,
+                                            int input_bit_depth) {
+  if (input_shift || input_bit_depth == 8) {
+    if (!*allocated_shift) {
+      avm_img_alloc(raw_shift, raw->fmt | AVM_IMG_FMT_HIGHBITDEPTH, raw->d_w,
+                    raw->d_h, 32);
+      *allocated_shift = 1;
+    }
+    avm_img_upshift(raw_shift, raw, input_shift);
+    return raw_shift;
+  }
+  return raw;
+}
+
+// Check if an mlayer has different crop coordinates than the xlayer
+static int mlayer_crop_differs(const XLayerEncConfig *xlcfg, int ml) {
+  const MLayerSourceConfig *ms = &xlcfg->mlayer_sources[ml];
+  if (ms->atlas_pos_x >= 0 && ms->atlas_pos_x != xlcfg->atlas_pos_x) return 1;
+  if (ms->atlas_pos_y >= 0 && ms->atlas_pos_y != xlcfg->atlas_pos_y) return 1;
+  if (ms->width > 0 && ms->width != xlcfg->width) return 1;
+  if (ms->height > 0 && ms->height != xlcfg->height) return 1;
+  return 0;
+}
+
+// Set scaling mode and mlayer_id controls for multi-layer encoding.
+// No-op when n_ml <= 1 (single embedded layer).
+// When use_internal_kf is true, the encoder manages mlayer switching internally
+// (multi_layers_lag_test mode), so AVME_SET_MLAYER_ID is not set.
+static void apply_mlayer_settings(avm_codec_ctx_t *encoder, int n_ml, int ml,
+                                  const int *scaling_modes,
+                                  int use_internal_kf) {
+  if (n_ml <= 1) return;
+
+  // Set scaling mode for every embedded layer
+  int sm = scaling_modes[ml];
+  struct avm_scaling_mode mode = { sm, sm };
+  avm_codec_control(encoder, AVME_SET_SCALEMODE, &mode);
+
+  // Only set mlayer_id explicitly in non-internal-kf mode
+  if (!use_internal_kf) {
+    avm_codec_control(encoder, AVME_SET_MLAYER_ID, (unsigned int)ml);
+  }
+}
+
+// Destroy an xlayer encoder state
+static void destroy_xlayer_encoder(XLayerEncoderState *state) {
+  avm_codec_destroy(&state->encoder);
+  if (state->input.file) fclose(state->input.file);
+  avm_img_free(&state->raw);
+  if (state->allocated_raw_shift) avm_img_free(&state->raw_shift);
+  for (int m = 0; m < MAX_NUM_MLAYERS; m++) {
+    if (state->mlayer_raw_allocated[m]) avm_img_free(&state->mlayer_raw[m]);
+    if (state->mlayer_raw_shift_allocated[m])
+      avm_img_free(&state->mlayer_raw_shift[m]);
+  }
+}
+
+// Per-xlayer packet buffer for collecting encoder output
+typedef struct XLayerPacketBuf {
+  uint8_t *data;
+  size_t size;
+  size_t capacity;
+  int has_keyframe;
+  int has_data;
+} XLayerPacketBuf;
+
+static void pktbuf_init(XLayerPacketBuf *pb) {
+  memset(pb, 0, sizeof(*pb));
+  pb->capacity = 64 * 1024;
+  pb->data = (uint8_t *)malloc(pb->capacity);
+}
+
+static void pktbuf_reset(XLayerPacketBuf *pb) {
+  pb->size = 0;
+  pb->has_keyframe = 0;
+  pb->has_data = 0;
+}
+
+static void pktbuf_free(XLayerPacketBuf *pb) {
+  free(pb->data);
+  pb->data = NULL;
+  pb->size = 0;
+  pb->capacity = 0;
+}
+
+static int pktbuf_append(XLayerPacketBuf *pb, const uint8_t *data, size_t sz) {
+  size_t needed = pb->size + sz;
+  if (needed > pb->capacity) {
+    size_t new_cap = pb->capacity * 2;
+    if (new_cap < needed) new_cap = needed;
+    uint8_t *new_buf = (uint8_t *)realloc(pb->data, new_cap);
+    if (!new_buf) return -1;
+    pb->data = new_buf;
+    pb->capacity = new_cap;
+  }
+  memcpy(pb->data + pb->size, data, sz);
+  pb->size += sz;
+  return 0;
+}
+
+// Drain all pending packets from an encoder into a packet buffer.
+// Returns 1 if any frame packet was collected, 0 otherwise.
+static int drain_encoder_packets(avm_codec_ctx_t *encoder,
+                                 XLayerEncoderState *state,
+                                 XLayerPacketBuf *pb) {
+  int got_data = 0;
+  avm_codec_iter_t iter = NULL;
+  const avm_codec_cx_pkt_t *pkt;
+  while ((pkt = avm_codec_get_cx_data(encoder, &iter))) {
+    if (pkt->kind == AVM_CODEC_CX_FRAME_PKT) {
+      pb->has_data = 1;
+      got_data = 1;
+      state->frames_out++;
+      if (pkt->data.frame.flags & AVM_FRAME_IS_KEY) {
+        pb->has_keyframe = 1;
+      }
+      pktbuf_append(pb, (const uint8_t *)pkt->data.frame.buf,
+                    pkt->data.frame.sz);
+    }
+  }
+  return got_data;
+}
+
+// Assemble a TU from collected per-xlayer packet buffers and write to file.
+// Sets *first_output to 0 after writing structural OBUs.
+// Write combined TUs from multiple xlayers' internal-KF encoder output.
+// Each xlayer's pktbuf is parsed into TU segments (split at TD boundaries),
+// then matching segments across xlayers are combined into single output TUs.
+// This ensures all xlayers' frames for the same temporal unit share one TD
+// and one set of structural OBUs, satisfying the DOH constraint.
+static void write_combined_internal_kf_tus(TUAssembler *tu_asm,
+                                           const MultiXLayerConfig *mcfg,
+                                           const XLayerEncoderState *states,
+                                           const XLayerPacketBuf *pktbufs,
+                                           int num_xlayers, int *first_output,
+                                           FILE *outfile, int verbose,
+                                           int *tu_count) {
+  // Parse each xlayer's output into TU segments
+  TUSegmentInfo xl_segs[MAX_NUM_XLAYERS - 1][MAX_TU_SEGMENTS];
+  int xl_nseg[MAX_NUM_XLAYERS - 1];
+  int max_nseg = 0;
+
+  for (int i = 0; i < num_xlayers; i++) {
+    if (pktbufs[i].has_data) {
+      xl_nseg[i] = tu_assembler_parse_tu_segments(
+          pktbufs[i].data, pktbufs[i].size, xl_segs[i], MAX_TU_SEGMENTS);
+      if (xl_nseg[i] > max_nseg) max_nseg = xl_nseg[i];
+    } else {
+      xl_nseg[i] = 0;
+    }
+  }
+
+  // Write one combined TU per segment position
+  for (int t = 0; t < max_nseg; t++) {
+    tu_asm->size = 0;
+    tu_assembler_write_td(tu_asm);
+
+    int any_kf = 0;
+    for (int i = 0; i < num_xlayers; i++) {
+      if (t < xl_nseg[i] && xl_segs[i][t].has_keyframe) any_kf = 1;
+    }
+
+    int emit_local_lcr =
+        mcfg->enable_local_lcr && ((*first_output && !any_kf) || any_kf);
+    tu_assembler_write_structural_obus(tu_asm, mcfg, first_output, any_kf);
+
+    for (int i = 0; i < num_xlayers; i++) {
+      if (t < xl_nseg[i]) {
+        // Emit local LCR right before this xlayer's data (per spec: local
+        // config precedes the xlayer's SH/frame OBUs within each xlayer group)
+        if (emit_local_lcr) tu_assembler_write_local_lcr(tu_asm, i);
+        tu_assembler_append_xlayer_obus(tu_asm, states[i].xlayer_id,
+                                        pktbufs[i].data + xl_segs[i][t].offset,
+                                        xl_segs[i][t].size);
+      }
+    }
+    if (verbose) tu_assembler_print_contents(tu_asm, (*tu_count));
+    (*tu_count)++;
+    tu_assembler_flush(tu_asm, outfile);
+  }
+}
+
+int encode_multi_xlayer(const MultiXLayerConfig *mcfg,
+                        const struct AvxEncoderConfig *global) {
+  const int num_xlayers = mcfg->num_xlayers;
+  XLayerEncoderState *states = NULL;
+  XLayerPacketBuf *pktbufs = NULL;
+  TUAssembler tu_asm;
+  SharedSourceReader shared_srcs[MAX_INPUT_SOURCES];
+  int num_shared_srcs = mcfg->num_input_sources;
+  FILE *outfile = NULL;
+  int ret = -1;
+  int use_shared_source = (mcfg->num_input_sources > 0);
+
+  // Merge CLI and JSON limits (CLI overrides JSON)
+  int limit = global->limit;
+  if (limit <= 0 && mcfg->limit > 0) limit = mcfg->limit;
+
+  memset(&tu_asm, 0, sizeof(tu_asm));
+  memset(shared_srcs, 0, sizeof(shared_srcs));
+
+  // Allocate per-xlayer encoder states and packet buffers
+  states = (XLayerEncoderState *)calloc(num_xlayers, sizeof(*states));
+  pktbufs = (XLayerPacketBuf *)calloc(num_xlayers, sizeof(*pktbufs));
+  if (!states || !pktbufs) {
+    fprintf(stderr, "Error: failed to allocate xlayer encoder states\n");
+    goto cleanup;
+  }
+  for (int i = 0; i < num_xlayers; i++) pktbuf_init(&pktbufs[i]);
+
+  // Initialize shared source readers for each input source
+  if (use_shared_source) {
+    for (int s = 0; s < num_shared_srcs; s++) {
+      if (shared_source_init(&shared_srcs[s], &mcfg->input_sources[s], mcfg) !=
+          0)
+        goto cleanup;
+    }
+  }
+
+  // Initialize TU assembler
+  if (tu_assembler_init(&tu_asm, mcfg) != 0) {
+    fprintf(stderr, "Error: failed to initialize TU assembler\n");
+    goto cleanup;
+  }
+
+  // Open output file
+  const char *outpath = mcfg->output_filename;
+  if (outpath[0] == '\0') {
+    fprintf(stderr, "Error: no output filename specified in xlayer config\n");
+    goto cleanup;
+  }
+  outfile = fopen(outpath, "wb");
+  if (!outfile) {
+    fprintf(stderr, "Error: cannot open output file \"%s\"\n", outpath);
+    goto cleanup;
+  }
+
+  // Initialize all xlayer encoders
+  for (int i = 0; i < num_xlayers; i++) {
+    int xl_uses_shared = (mcfg->xlayers[i].input_source_idx >= 0);
+    if (init_xlayer_encoder(&states[i], &mcfg->xlayers[i], mcfg, global,
+                            xl_uses_shared) != 0) {
+      goto cleanup;
+    }
+  }
+
+  fprintf(stderr, "Multi-xlayer encoding: %d xlayers, output=\"%s\"\n",
+          num_xlayers, outpath);
+
+  // Pre-index: for each input source, store which xlayer indices use it.
+  // Avoids O(num_xlayers) scan per source per frame in the hot loop.
+  int src_xl_count[MAX_INPUT_SOURCES] = { 0 };
+  int src_xl_indices[MAX_INPUT_SOURCES][MAX_NUM_XLAYERS - 1];
+  for (int i = 0; i < num_xlayers; i++) {
+    int sidx = mcfg->xlayers[i].input_source_idx;
+    if (sidx >= 0 && sidx < MAX_INPUT_SOURCES) {
+      src_xl_indices[sidx][src_xl_count[sidx]++] = i;
+    }
+  }
+
+  // Main encoding loop
+  unsigned int frame_idx = 0;
+  int any_active = 1;
+  int first_output = 1;
+  int tu_count = 0;
+  const int verbose = global->verbose;
+
+  while (any_active) {
+    any_active = 0;
+
+    if (limit > 0 && (int)frame_idx >= limit) break;
+
+    // Read frames: from shared sources and/or per-xlayer inputs
+    // Only read from sources whose frame_skip aligns with this TU
+    for (int s = 0; s < num_shared_srcs; s++) {
+      if (!shared_srcs[s].initialized || shared_srcs[s].eof) continue;
+      int skip = mcfg->input_sources[s].frame_skip;
+      if (skip > 1 && (frame_idx % (unsigned int)skip) != 0) continue;
+      if (!shared_source_read_frame(&shared_srcs[s])) {
+        // Mark all xlayers using this source as EOF
+        for (int j = 0; j < src_xl_count[s]; j++)
+          states[src_xl_indices[s][j]].eof = 1;
+      } else {
+        // Crop regions for xlayers using this source
+        for (int j = 0; j < src_xl_count[s]; j++) {
+          int i = src_xl_indices[s][j];
+          crop_region_to_xlayer(
+              &states[i].raw, &shared_srcs[s].raw, mcfg->xlayers[i].atlas_pos_x,
+              mcfg->xlayers[i].atlas_pos_y, mcfg->xlayers[i].width,
+              mcfg->xlayers[i].height);
+        }
+      }
+    }
+
+    // Read from per-xlayer inputs for xlayers not using any shared source
+    for (int i = 0; i < num_xlayers; i++) {
+      if (mcfg->xlayers[i].input_source_idx < 0 && !states[i].eof) {
+        read_xlayer_frame(&states[i]);
+      }
+    }
+
+    // Encode xlayers for this frame.
+    // Xlayers whose source is skipped this TU are not encoded.
+    //
+    // For multi-mlayer xlayers, keyframes are managed externally: the first
+    // frame is always a keyframe, and subsequent keyframes are placed at
+    // kf_max_dist intervals.  When a TU is a keyframe, ALL mlayers get
+    // AVM_EFLAG_FORCE_KF so that CLK OBUs are aligned across layers (spec
+    // requirement: first mlayer and all independent mlayers must be CLK when
+    // any mlayer is CLK).
+
+    // Reset packet buffers before encoding this TU
+    for (int i = 0; i < num_xlayers; i++) pktbuf_reset(&pktbufs[i]);
+    int got_data = 0;
+
+    for (int i = 0; i < num_xlayers; i++) {
+      // Check if this xlayer's source is active this TU
+      int sidx = mcfg->xlayers[i].input_source_idx;
+      if (sidx >= 0) {
+        int skip = mcfg->input_sources[sidx].frame_skip;
+        if (skip > 1 && (frame_idx % (unsigned int)skip) != 0) continue;
+      }
+
+      const XLayerEncConfig *xlcfg = &mcfg->xlayers[i];
+      int n_ml = xlcfg->num_embedded_layers;
+      int use_internal_kf = (n_ml > 1 && states[i].cfg.g_lag_in_frames > 0);
+
+      for (int ml = 0; ml < n_ml; ml++) {
+        avm_image_t *img = NULL;
+        if (!states[i].eof) {
+          if (xlcfg->has_per_mlayer_sources &&
+              states[i].mlayer_raw_allocated[ml]) {
+            // Per-mlayer source: crop from the mlayer's own source
+            int msrc = xlcfg->mlayer_sources[ml].input_source_idx;
+            if (msrc >= 0 && shared_srcs[msrc].initialized &&
+                !shared_srcs[msrc].eof) {
+              crop_region_to_xlayer(&states[i].mlayer_raw[ml],
+                                    &shared_srcs[msrc].raw,
+                                    xlcfg->mlayer_sources[ml].atlas_pos_x,
+                                    xlcfg->mlayer_sources[ml].atlas_pos_y,
+                                    xlcfg->mlayer_sources[ml].width,
+                                    xlcfg->mlayer_sources[ml].height);
+            }
+            img = upshift_frame_if_needed(
+                &states[i].mlayer_raw[ml], &states[i].mlayer_raw_shift[ml],
+                &states[i].mlayer_raw_shift_allocated[ml],
+                states[i].input_shift, states[i].input.bit_depth);
+          } else {
+            // Default: use xlayer's shared image
+            img = upshift_frame_if_needed(&states[i].raw, &states[i].raw_shift,
+                                          &states[i].allocated_raw_shift,
+                                          states[i].input_shift,
+                                          states[i].input.bit_depth);
+          }
+        }
+
+        apply_mlayer_settings(&states[i].encoder, n_ml, ml, xlcfg->scaling_mode,
+                              use_internal_kf);
+
+        // For multi-mlayer with lag == 0: force KF on independent mlayers
+        // (dependency_mask == 0) on keyframe TUs.  Dependent layers use
+        // inter-layer prediction from the KF of lower layers.
+        // For multi-mlayer with lag > 0: internal KF management handles
+        // keyframes via multi_layers_lag_test, so no external FORCE_KF.
+        // For single-mlayer: use standard encoder-internal keyframe handling.
+        int frame_flags = 0;
+        if (n_ml > 1 && !use_internal_kf) {
+          int is_kf_tu = (frame_idx == 0);
+          if (xlcfg->kf_max_dist > 0 && frame_idx > 0) {
+            is_kf_tu = (frame_idx % xlcfg->kf_max_dist == 0);
+          }
+          if (is_kf_tu) {
+            int mask = resolve_mlayer_dep_mask(&xlcfg->mlayer_sources[ml], ml);
+            if (mask == 0) frame_flags |= AVM_EFLAG_FORCE_KF;
+          }
+        } else {
+          if (frame_idx == 0) frame_flags |= AVM_EFLAG_FORCE_KF;
+        }
+
+        struct avm_usec_timer timer;
+        avm_usec_timer_start(&timer);
+
+        avm_codec_err_t res = avm_codec_encode(
+            &states[i].encoder, img, states[i].frame_count, 1, frame_flags);
+        avm_usec_timer_mark(&timer);
+        states[i].cx_time += avm_usec_timer_elapsed(&timer);
+        states[i].frame_count++;
+
+        if (res != AVM_CODEC_OK) {
+          fprintf(stderr,
+                  "Error: encode failed for xlayer %d frame %u ml %d: %s\n",
+                  states[i].xlayer_id, frame_idx, ml,
+                  avm_codec_error(&states[i].encoder));
+          goto cleanup;
+        }
+
+        // Drain packets immediately — the encoder clears its packet list
+        // on the next avm_codec_encode call, so we must collect before then.
+        if (drain_encoder_packets(&states[i].encoder, &states[i], &pktbufs[i]))
+          got_data = 1;
+      }
+    }
+
+    // Assemble TU(s) from collected packets.
+    // Always use segment-based TU assembly: the encoder may emit TDs within
+    // a single packet blob (e.g. OLK in its own TU, then leading frames in
+    // subsequent TUs; or internal-KF mode with multiple TUs per GF group).
+    // The segment parser splits at TD boundaries and writes one output TU
+    // per segment, combining matching segments across xlayers.
+    if (got_data) {
+      write_combined_internal_kf_tus(&tu_asm, mcfg, states, pktbufs,
+                                     num_xlayers, &first_output, outfile,
+                                     verbose, &tu_count);
+      for (int i = 0; i < num_xlayers; i++) pktbuf_reset(&pktbufs[i]);
+    }
+
+    // Check if any encoder still has input
+    for (int i = 0; i < num_xlayers; i++) {
+      if (!states[i].eof) any_active = 1;
+    }
+
+    frame_idx++;
+  }
+
+  // Flush all encoders.  For internal KF mode (multi_layers_lag_test),
+  // each xlayer's encoder output may contain multiple TUs; we parse them
+  // into segments and combine matching segments across xlayers into shared
+  // TUs.  For non-internal-KF mode, each flush round produces one TU.
+  //
+  // The internal pipeline may need many NULL pushes before it starts
+  // producing output (e.g. lag_in_frames rounds).  We keep flushing until
+  // no data is produced for several consecutive rounds.
+  int flushing = 1;
+  int dry_rounds = 0;
+  const int max_dry_rounds = 50;  // generous upper bound
+  while (flushing || dry_rounds < max_dry_rounds) {
+    flushing = 0;
+
+    for (int i = 0; i < num_xlayers; i++) {
+      int n_ml = mcfg->xlayers[i].num_embedded_layers;
+      int internal_kf = (n_ml > 1 && states[i].cfg.g_lag_in_frames > 0);
+
+      if (internal_kf) {
+        // Internal KF mode: the encoder manages mlayer switching internally.
+        // Push n_ml NULLs to advance all mlayers for one frame.  Accumulate
+        // all output before combining with other xlayers.
+        pktbuf_reset(&pktbufs[i]);
+        for (int ml = 0; ml < n_ml; ml++) {
+          struct avm_usec_timer timer;
+          avm_usec_timer_start(&timer);
+          avm_codec_encode(&states[i].encoder, NULL, states[i].frame_count, 1,
+                           0);
+          avm_usec_timer_mark(&timer);
+          states[i].cx_time += avm_usec_timer_elapsed(&timer);
+          states[i].frame_count++;
+
+          int got = drain_encoder_packets(&states[i].encoder, &states[i],
+                                          &pktbufs[i]);
+          if (got) flushing = 1;
+        }
+      } else {
+        // Non-internal-KF: flush each mlayer, one TU per flush round.
+        pktbuf_reset(&pktbufs[i]);
+        for (int ml = 0; ml < n_ml; ml++) {
+          apply_mlayer_settings(&states[i].encoder, n_ml, ml,
+                                mcfg->xlayers[i].scaling_mode, internal_kf);
+          struct avm_usec_timer timer;
+          avm_usec_timer_start(&timer);
+          avm_codec_encode(&states[i].encoder, NULL, states[i].frame_count, 1,
+                           0);
+          avm_usec_timer_mark(&timer);
+          states[i].cx_time += avm_usec_timer_elapsed(&timer);
+          states[i].frame_count++;
+
+          if (drain_encoder_packets(&states[i].encoder, &states[i],
+                                    &pktbufs[i]))
+            flushing = 1;
+        }
+      }
+    }
+
+    // After all xlayers have been flushed for this round, write combined TUs.
+    if (flushing) {
+      write_combined_internal_kf_tus(&tu_asm, mcfg, states, pktbufs,
+                                     num_xlayers, &first_output, outfile,
+                                     verbose, &tu_count);
+      for (int i = 0; i < num_xlayers; i++) pktbuf_reset(&pktbufs[i]);
+    }
+    if (flushing) {
+      dry_rounds = 0;
+    } else {
+      dry_rounds++;
+    }
+    flushing = 0;
+  }
+
+  // Print summary
+  fprintf(stderr, "\nMulti-xlayer encoding complete:\n");
+  for (int i = 0; i < num_xlayers; i++) {
+    fprintf(stderr, "  xlayer %d: %u frames, %.1fs (%.1f fps)\n",
+            states[i].xlayer_id, states[i].frames_out,
+            states[i].cx_time / 1000000.0,
+            states[i].frames_out > 0
+                ? (double)states[i].frames_out / (states[i].cx_time / 1000000.0)
+                : 0.0);
+  }
+
+  ret = 0;
+
+cleanup:
+  if (pktbufs) {
+    for (int i = 0; i < num_xlayers; i++) pktbuf_free(&pktbufs[i]);
+    free(pktbufs);
+  }
+  if (states) {
+    for (int i = 0; i < num_xlayers; i++) {
+      destroy_xlayer_encoder(&states[i]);
+    }
+    free(states);
+  }
+  tu_assembler_free(&tu_asm);
+  for (int s = 0; s < num_shared_srcs; s++)
+    shared_source_destroy(&shared_srcs[s]);
+  if (outfile) fclose(outfile);
+  return ret;
+}
diff --git a/apps/avmenc_xlayer.h b/apps/avmenc_xlayer.h
new file mode 100644
index 0000000000..77cee603b1
--- /dev/null
+++ b/apps/avmenc_xlayer.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2025, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 3-Clause Clear License
+ * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
+ * License was not distributed with this source code in the LICENSE file, you
+ * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/.  If the
+ * Alliance for Open Media Patent License 1.0 was not distributed with this
+ * source code in the PATENTS file, you can obtain it at
+ * aomedia.org/license/patent-license/.
+ */
+
+#ifndef AVM_APPS_AVMENC_XLAYER_H_
+#define AVM_APPS_AVMENC_XLAYER_H_
+
+#include "avm/avm_encoder.h"
+#include "avm/avmcx.h"
+#include "common/tools_common.h"
+#include "common/xlayer_config.h"
+#include "common/tu_assembler.h"
+#include "apps/avmenc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Per-xlayer encoder state
+typedef struct XLayerEncoderState {
+  int xlayer_id;
+  struct AvxInputContext input;
+  avm_codec_ctx_t encoder;
+  avm_codec_enc_cfg_t cfg;
+  avm_image_t raw;
+  avm_image_t raw_shift;
+  int allocated_raw_shift;
+  int input_shift;
+  unsigned int frames_out;
+  uint32_t
+      frame_count;  // PTS counter (advances per encode call, not per frame)
+  uint64_t cx_time;
+  int eof;  // input exhausted
+  // Per-embedded-layer raw buffers (for per-mlayer input sources)
+  avm_image_t mlayer_raw[MAX_NUM_MLAYERS];
+  avm_image_t mlayer_raw_shift[MAX_NUM_MLAYERS];
+  int mlayer_raw_allocated[MAX_NUM_MLAYERS];
+  int mlayer_raw_shift_allocated[MAX_NUM_MLAYERS];
+} XLayerEncoderState;
+
+// Run multi-xlayer encoding. Returns 0 on success.
+int encode_multi_xlayer(const MultiXLayerConfig *mcfg,
+                        const struct AvxEncoderConfig *global);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // AVM_APPS_AVMENC_XLAYER_H_
diff --git a/av2/arg_defs.c b/av2/arg_defs.c
index 3a825b7b9b..2cede62aa9 100644
--- a/av2/arg_defs.c
+++ b/av2/arg_defs.c
@@ -904,4 +904,6 @@ const av2_codec_arg_definitions_t g_av2_codec_arg_defs = {
               "Cross frame CDF for context initialization "
               "(0: disable cross frame CDF init, 1: enable cross frame CDF "
               "init(default), "),
+  .xlayer_config = ARG_DEF(NULL, "xlayer-config", 1,
+                           "Path to JSON config for multi-xlayer encoding"),
 };
diff --git a/av2/arg_defs.h b/av2/arg_defs.h
index 7fe25641bc..06a774a1b2 100644
--- a/av2/arg_defs.h
+++ b/av2/arg_defs.h
@@ -294,6 +294,7 @@ typedef struct av2_codec_arg_definitions {
   arg_def_t enable_mfh_obu_signaling;
   arg_def_t operating_points_count;
   arg_def_t cross_frame_cdf_init_mode;
+  arg_def_t xlayer_config;
 } av2_codec_arg_definitions_t;
 
 extern const av2_codec_arg_definitions_t g_av2_codec_arg_defs;
diff --git a/av2/av2_cx_iface.c b/av2/av2_cx_iface.c
index 845d11b06e..8650fa6211 100644
--- a/av2/av2_cx_iface.c
+++ b/av2/av2_cx_iface.c
@@ -245,6 +245,7 @@ struct av2_extracfg {
   int buffer_refresh_multi_layers_test[REF_FRAMES];
   int multi_layers_lag_test;
   int force_deferred_frames_for_ras_test;
+  int intra_only_fwd_kf;
 };
 
 // Example subgop configs. Currently not used by default.
@@ -574,6 +575,7 @@ static struct av2_extracfg default_extra_cfg = {
   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // buffer_refresh_multi_layers_test
   0,      // multi_layers_test for nozero lag
   0,      // force_deferred_frames_for_ras_test
+  0,
 };
 // clang-format on
 
@@ -595,6 +597,9 @@ struct avm_codec_alg_priv {
   avm_enc_frame_flags_t next_frame_flags;
   avm_codec_pkt_list_decl(256) pkt_list;
   unsigned int fixed_kf_cntr;
+  // For multi-mlayer with lag: persists across encoder_encode calls to prevent
+  // TDs from being inserted between hidden frames within the same TU.
+  int mlayer_tu_ready;
   // BufferPool that holds all reference frames.
   BufferPool *buffer_pool;
 
@@ -817,6 +822,7 @@ static avm_codec_err_t validate_config(avm_codec_alg_priv_t *ctx,
   RANGE_CHECK(extra_cfg, explicit_ref_frame_map, 0, 1);
   RANGE_CHECK(extra_cfg, add_sef_for_hidden_frames, 0, 1);
   RANGE_CHECK(extra_cfg, monotonic_output_order, 0, 1);
+  RANGE_CHECK(extra_cfg, intra_only_fwd_kf, 0, 1);
   if (extra_cfg->monotonic_output_order &&
       extra_cfg->enable_keyframe_filtering > 0)
     ERROR("monotonic_output_order=1 requires enable_keyframe_filtering=0");
@@ -1515,6 +1521,7 @@ static avm_codec_err_t set_encoder_config(AV2EncoderConfig *oxcf,
 
   // Set Key frame configuration.
   kf_cfg->fwd_kf_enabled = cfg->fwd_kf_enabled;
+  kf_cfg->intra_only_fwd_kf = extra_cfg->intra_only_fwd_kf;
   kf_cfg->auto_key =
       cfg->kf_mode == AVM_KF_AUTO && cfg->kf_min_dist != cfg->kf_max_dist;
   kf_cfg->key_freq_min = cfg->kf_min_dist;
@@ -1607,11 +1614,20 @@ static avm_codec_err_t set_encoder_config(AV2EncoderConfig *oxcf,
   oxcf->ref_frm_cfg.add_sef_for_hidden_frames =
       extra_cfg->add_sef_for_hidden_frames;
   oxcf->tool_cfg.monotonic_output_order = extra_cfg->monotonic_output_order;
+  // Monotonic output requires SEF OBUs for hidden frames — implicit output
+  // is not allowed when monotonic_output_order_flag is set in the sequence
+  // header.  Force add_sef_for_hidden_frames on so the encoder produces SEF
+  // OBUs instead of relying on implicit output.
   if (oxcf->tool_cfg.monotonic_output_order &&
       !oxcf->ref_frm_cfg.add_sef_for_hidden_frames) {
-    // `monotonic_output_order = 1` implies that `implicit_output_frame = 0`.
-    // So, explicit SEF OBUs must be signaled.
     oxcf->ref_frm_cfg.add_sef_for_hidden_frames = 1;
+    static int warned_sef_override;
+    if (!warned_sef_override) {
+      warned_sef_override = 1;
+      fprintf(stderr,
+              "Warning: monotonic_output_order=1 forces "
+              "add_sef_for_hidden_frames=1 (--add-sef-for-output=1)\n");
+    }
   }
 
   oxcf->row_mt = extra_cfg->row_mt;
@@ -2752,6 +2768,130 @@ static avm_codec_err_t ctrl_set_force_deferred_frames_for_ras_test(
   return update_extra_cfg(ctx, &extra_cfg);
 }
 
+static avm_codec_err_t ctrl_set_xlayer_id(avm_codec_alg_priv_t *ctx,
+                                          va_list args) {
+  const int xlayer_id = va_arg(args, int);
+  if (xlayer_id < 0 || xlayer_id > 30) return AVM_CODEC_INVALID_PARAM;
+  ctx->cpi->common.xlayer_id = xlayer_id;
+  return AVM_CODEC_OK;
+}
+
+static avm_codec_err_t ctrl_set_mlayer_dependency_present(
+    avm_codec_alg_priv_t *ctx, va_list args) {
+  const unsigned int flag = va_arg(args, unsigned int);
+  if (flag > 1) return AVM_CODEC_INVALID_PARAM;
+  ctx->cpi->common.seq_params.mlayer_dependency_present_flag = (int)flag;
+  return AVM_CODEC_OK;
+}
+
+static avm_codec_err_t ctrl_set_mlayer_dependency_map(avm_codec_alg_priv_t *ctx,
+                                                      va_list args) {
+  const unsigned int mlayer_idx = va_arg(args, unsigned int);
+  const unsigned int mask = va_arg(args, unsigned int);
+  if (mlayer_idx >= MAX_NUM_MLAYERS) return AVM_CODEC_INVALID_PARAM;
+  SequenceHeader *seq = &ctx->cpi->common.seq_params;
+  for (int j = 0; j < (int)mlayer_idx; j++) {
+    seq->mlayer_dependency_map[mlayer_idx][j] = (mask >> j) & 1;
+  }
+  // Self-dependency is always 1
+  seq->mlayer_dependency_map[mlayer_idx][mlayer_idx] = 1;
+  return AVM_CODEC_OK;
+}
+
+static avm_codec_err_t ctrl_set_intra_only_fwd_kf(avm_codec_alg_priv_t *ctx,
+                                                  va_list args) {
+  struct av2_extracfg extra_cfg = ctx->extra_cfg;
+  extra_cfg.intra_only_fwd_kf = CAST(AV2E_SET_INTRA_ONLY_FWD_KF, args);
+  return update_extra_cfg(ctx, &extra_cfg);
+}
+
+// Helper to derive color_description_idc from CICP triplet.
+static int derive_color_description_idc(avm_color_primaries_t cp,
+                                        avm_transfer_characteristics_t tc,
+                                        avm_matrix_coefficients_t mc) {
+  if (cp == AVM_CICP_CP_BT_709 && tc == AVM_CICP_TC_BT_709 &&
+      mc == AVM_CICP_MC_BT_709)
+    return AVM_COLOR_DESC_IDC_BT709SDR;
+  if (cp == AVM_CICP_CP_BT_709 && tc == AVM_CICP_TC_SRGB &&
+      mc == AVM_CICP_MC_IDENTITY)
+    return AVM_COLOR_DESC_IDC_SRGB;
+  if (cp == AVM_CICP_CP_BT_709 && tc == AVM_CICP_TC_SRGB &&
+      mc == AVM_CICP_MC_BT_470_B_G)
+    return AVM_COLOR_DESC_IDC_SYCC;
+  if (cp == AVM_CICP_CP_BT_2020 && tc == AVM_CICP_TC_SMPTE_2084 &&
+      mc == AVM_CICP_MC_BT_2020_NCL)
+    return AVM_COLOR_DESC_IDC_BT2100PQ;
+  if (cp == AVM_CICP_CP_BT_2020 && tc == AVM_CICP_TC_HLG &&
+      mc == AVM_CICP_MC_BT_2020_NCL)
+    return AVM_COLOR_DESC_IDC_BT2100HLG;
+  return AVM_COLOR_DESC_IDC_EXPLICIT;
+}
+
+// Helper to update ci_params_per_layer[ml] flags after a color field change.
+static void update_mlayer_ci_flags(ContentInterpretation *ci) {
+  ColorInfo *c = &ci->color_info;
+  c->color_description_idc = derive_color_description_idc(
+      c->color_primaries, c->transfer_characteristics, c->matrix_coefficients);
+
+  if (c->color_description_idc == AVM_COLOR_DESC_IDC_EXPLICIT &&
+      c->color_primaries == AVM_CICP_CP_UNSPECIFIED &&
+      c->transfer_characteristics == AVM_CICP_TC_UNSPECIFIED &&
+      c->matrix_coefficients == AVM_CICP_MC_UNSPECIFIED &&
+      c->full_range_flag == 0) {
+    ci->ci_color_description_present_flag = 0;
+  } else {
+    ci->ci_color_description_present_flag = 1;
+  }
+}
+
+// Common helper for per-mlayer CI control handlers.
+// field: 0=color_primaries, 1=transfer_characteristics,
+//        2=matrix_coefficients, 3=full_range_flag
+static avm_codec_err_t set_mlayer_ci_field(avm_codec_alg_priv_t *ctx,
+                                           va_list args, int field) {
+  const unsigned int mlayer_idx = va_arg(args, unsigned int);
+  const unsigned int value = va_arg(args, unsigned int);
+  if (mlayer_idx >= MAX_NUM_MLAYERS) return AVM_CODEC_INVALID_PARAM;
+  ContentInterpretation *ci = &ctx->cpi->common.ci_params_per_layer[mlayer_idx];
+  switch (field) {
+    case 0:
+      ci->color_info.color_primaries = (avm_color_primaries_t)value;
+      break;
+    case 1:
+      ci->color_info.transfer_characteristics =
+          (avm_transfer_characteristics_t)value;
+      break;
+    case 2:
+      ci->color_info.matrix_coefficients = (avm_matrix_coefficients_t)value;
+      break;
+    case 3: ci->color_info.full_range_flag = value ? 1 : 0; break;
+  }
+  update_mlayer_ci_flags(ci);
+  ctx->cpi->write_ci_obu_flag = 1;
+  ctx->cpi->ci_per_layer_overridden[mlayer_idx] = 1;
+  return AVM_CODEC_OK;
+}
+
+static avm_codec_err_t ctrl_set_mlayer_color_primaries(
+    avm_codec_alg_priv_t *ctx, va_list args) {
+  return set_mlayer_ci_field(ctx, args, 0);
+}
+
+static avm_codec_err_t ctrl_set_mlayer_transfer_characteristics(
+    avm_codec_alg_priv_t *ctx, va_list args) {
+  return set_mlayer_ci_field(ctx, args, 1);
+}
+
+static avm_codec_err_t ctrl_set_mlayer_matrix_coefficients(
+    avm_codec_alg_priv_t *ctx, va_list args) {
+  return set_mlayer_ci_field(ctx, args, 2);
+}
+
+static avm_codec_err_t ctrl_set_mlayer_color_range(avm_codec_alg_priv_t *ctx,
+                                                   va_list args) {
+  return set_mlayer_ci_field(ctx, args, 3);
+}
+
 static avm_codec_err_t create_stats_buffer(FIRSTPASS_STATS **frame_stats_buffer,
                                            STATS_BUFFER_CTX *stats_buf_context,
                                            int num_lap_buffers) {
@@ -2819,6 +2959,7 @@ static avm_codec_err_t encoder_init(avm_codec_ctx_t *ctx) {
     }
 
     priv->extra_cfg = default_extra_cfg;
+    priv->mlayer_tu_ready = 1;  // First frame starts a new TU.
     avm_once(av2_initialize_enc);
 
     res = validate_config(priv, &priv->cfg, &priv->extra_cfg);
@@ -3065,18 +3206,19 @@ static void report_stats(AV2_COMP *cpi, size_t frame_size, uint64_t cx_time) {
         const bool use_hbd_psnr = (cpi->b_calculate_psnr == 2);
         if (cpi->oxcf.tool_cfg.enable_bru) {
           fprintf(stdout,
-                  "POC:%6d [%s][BRU%1d:%1d][Level:%d][Q:%3d][LTID:%d]"
-                  "[ELID:%d][TLID:%d]: %10" PRIu64
+                  "POC:%6d [XL:%d][%s][BRU%1d:%1d][Level:%d][Q:%3d][LTID:%d]"
+                  "[ELID:%d][TLID:%d][OH:%d][DOH:%d]: %10" PRIu64
                   " Bytes, "
                   "%6.1fms, %2.4f dB(Y), %2.4f dB(U), "
                   "%2.4f dB(V), "
                   "%2.4f dB(Avg)",
-                  cm->cur_frame->absolute_poc,
+                  cm->cur_frame->absolute_poc, cm->xlayer_id,
                   frameType[cm->current_frame.frame_type + cpi->is_ras_frame],
                   cm->bru.enabled, cm->bru.update_ref_idx,
                   cm->cur_frame->pyramid_level, base_qindex,
                   cm->cur_frame->long_term_id, cm->cur_frame->mlayer_id,
-                  (int)cm->cur_frame->tlayer_id, (uint64_t)frame_size,
+                  (int)cm->cur_frame->tlayer_id, cm->cur_frame->order_hint,
+                  cm->cur_frame->display_order_hint, (uint64_t)frame_size,
                   cx_time / 1000.0,
                   use_hbd_psnr ? psnr.psnr_hbd[1] : psnr.psnr[1],
                   use_hbd_psnr ? psnr.psnr_hbd[2] : psnr.psnr[2],
@@ -3084,17 +3226,18 @@ static void report_stats(AV2_COMP *cpi, size_t frame_size, uint64_t cx_time) {
                   use_hbd_psnr ? psnr.psnr_hbd[0] : psnr.psnr[0]);
         } else {
           fprintf(stdout,
-                  "POC:%6d [%s][Level:%d][Q:%3d][LTID:%d]"
-                  "[ELID:%d][TLID:%d]: %10" PRIu64
+                  "POC:%6d [XL:%d][%s][Level:%d][Q:%3d][LTID:%d]"
+                  "[ELID:%d][TLID:%d][OH:%d][DOH:%d]: %10" PRIu64
                   " Bytes, "
                   "%6.1fms, %2.4f dB(Y), %2.4f dB(U), "
                   "%2.4f dB(V), "
                   "%2.4f dB(Avg)",
-                  cm->cur_frame->absolute_poc,
+                  cm->cur_frame->absolute_poc, cm->xlayer_id,
                   frameType[cm->current_frame.frame_type + cpi->is_ras_frame],
                   cm->cur_frame->pyramid_level, base_qindex,
                   cm->cur_frame->long_term_id, cm->cur_frame->mlayer_id,
-                  (int)cm->cur_frame->tlayer_id, (uint64_t)frame_size,
+                  (int)cm->cur_frame->tlayer_id, cm->cur_frame->order_hint,
+                  cm->cur_frame->display_order_hint, (uint64_t)frame_size,
                   cx_time / 1000.0,
                   use_hbd_psnr ? psnr.psnr_hbd[1] : psnr.psnr[1],
                   use_hbd_psnr ? psnr.psnr_hbd[2] : psnr.psnr[2],
@@ -3104,28 +3247,30 @@ static void report_stats(AV2_COMP *cpi, size_t frame_size, uint64_t cx_time) {
       } else {
         if (cpi->oxcf.tool_cfg.enable_bru) {
           fprintf(stdout,
-                  "POC:%6d [%s][BRU%1d:%1d][Level:%d][Q:%3d][LTID:%d]"
-                  "[ELID:%d][TLID:%d]: %10" PRIu64
+                  "POC:%6d [XL:%d][%s][BRU%1d:%1d][Level:%d][Q:%3d][LTID:%d]"
+                  "[ELID:%d][TLID:%d][OH:%d][DOH:%d]: %10" PRIu64
                   " Bytes, "
                   "%6.1fms",
-                  cm->cur_frame->absolute_poc,
+                  cm->cur_frame->absolute_poc, cm->xlayer_id,
                   frameType[cm->current_frame.frame_type + cpi->is_ras_frame],
                   cm->bru.enabled, cm->bru.update_ref_idx,
                   cm->cur_frame->pyramid_level, base_qindex,
                   cm->cur_frame->long_term_id, cm->cur_frame->mlayer_id,
-                  (int)cm->cur_frame->tlayer_id, (uint64_t)frame_size,
+                  (int)cm->cur_frame->tlayer_id, cm->cur_frame->order_hint,
+                  cm->cur_frame->display_order_hint, (uint64_t)frame_size,
                   cx_time / 1000.0);
         } else {
           fprintf(stdout,
-                  "POC:%6d [%s][Level:%d][Q:%3d][LTID:%d]"
-                  "[ELID:%d][TLID:%d]: %10" PRIu64
+                  "POC:%6d [XL:%d][%s][Level:%d][Q:%3d][LTID:%d]"
+                  "[ELID:%d][TLID:%d][OH:%d][DOH:%d]: %10" PRIu64
                   " Bytes, "
                   "%6.1fms",
-                  cm->cur_frame->absolute_poc,
+                  cm->cur_frame->absolute_poc, cm->xlayer_id,
                   frameType[cm->current_frame.frame_type + cpi->is_ras_frame],
                   cm->cur_frame->pyramid_level, base_qindex,
                   cm->cur_frame->long_term_id, cm->cur_frame->mlayer_id,
-                  (int)cm->cur_frame->tlayer_id, (uint64_t)frame_size,
+                  (int)cm->cur_frame->tlayer_id, cm->cur_frame->order_hint,
+                  cm->cur_frame->display_order_hint, (uint64_t)frame_size,
                   cx_time / 1000.0);
         }
       }
@@ -3385,7 +3530,7 @@ static avm_codec_err_t encoder_encode(avm_codec_alg_priv_t *ctx,
 
     // Get the next visible frame. Invisible frames get packed with the next
     // visible frame.
-    int64_t dst_time_stamp;
+    int64_t dst_time_stamp = 0;
     int64_t dst_end_time_stamp;
     struct avm_usec_timer timer;
     if (cpi->compressor_stage == ENCODE_STAGE) {
@@ -3395,7 +3540,12 @@ static avm_codec_err_t encoder_encode(avm_codec_alg_priv_t *ctx,
           cpi->subgop_stats.num_references[stat_idx] = -1;
       }
     }
-    int ready_for_next_tu = 1;
+    // In multi-mlayer with lag mode, TU boundaries persist across
+    // encoder_encode calls.  Use the persistent flag so that hidden frames
+    // for ml>0 (processed in a separate call) don't start a new TU.
+    const int multi_ml_lag = cpi->oxcf.unit_test_cfg.multi_layers_lag_test &&
+                             cpi->common.number_mlayers > 1;
+    int ready_for_next_tu = multi_ml_lag ? ctx->mlayer_tu_ready : 1;
 
     while (cx_data_sz - index_size >= ctx->cx_data_sz / 2 &&
            !is_frame_visible) {
@@ -3455,9 +3605,25 @@ static avm_codec_err_t encoder_encode(avm_codec_alg_priv_t *ctx,
           ready_for_next_tu = 0;
         }
 
-        if (mlayer_id == 0 && (cpi->common.immediate_output_picture ||
-                               cpi->common.implicit_output_picture)) {
-          ready_for_next_tu = 1;
+        if (multi_ml_lag) {
+          // Multi-mlayer with lag: mark TU boundary after the last mlayer
+          // produces an output frame.  In non-monotonic mode, implicit
+          // output frames are also output (decoder reorders), so each gets
+          // its own TU.  In monotonic mode, only immediate output triggers
+          // a TU boundary (hidden frames are bundled with their SEF).
+          const int is_output =
+              cpi->common.immediate_output_picture ||
+              (!cpi->common.seq_params.monotonic_output_order_flag &&
+               cpi->common.implicit_output_picture);
+          if ((unsigned int)mlayer_id == cpi->common.number_mlayers - 1 &&
+              is_output) {
+            ready_for_next_tu = 1;
+          }
+        } else {
+          if (mlayer_id == 0 && (cpi->common.immediate_output_picture ||
+                                 cpi->common.implicit_output_picture)) {
+            ready_for_next_tu = 1;
+          }
         }
 
         size_t curr_frame_size = frame_size;
@@ -3544,6 +3710,8 @@ static avm_codec_err_t encoder_encode(avm_codec_alg_priv_t *ctx,
 #endif  // CONFIG_MIXED_LOSSLESS_ENCODE
       }
     }
+    // Persist TU readiness for multi-mlayer mode across encoder_encode calls.
+    if (multi_ml_lag) ctx->mlayer_tu_ready = ready_for_next_tu;
     if (is_frame_visible) {
       // Add the frame packet to the list of returned packets.
       avm_codec_cx_pkt_t pkt;
@@ -3571,6 +3739,33 @@ static avm_codec_err_t encoder_encode(avm_codec_alg_priv_t *ctx,
 
       avm_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
 
+      ctx->pending_cx_data = NULL;
+      ctx->pending_cx_data_sz = 0;
+      ctx->pending_frame_count = 0;
+    } else if (!img && ctx->pending_cx_data_sz > 0) {
+      // Flush mode: the encoder ran out of frames but has accumulated
+      // implicit-output frames that were never followed by an
+      // immediate-output frame.  Emit them as a packet so they are not lost.
+      avm_codec_cx_pkt_t pkt;
+
+      cpi->frames_left = AVMMAX(0, cpi->frames_left - 1);
+      pkt.kind = AVM_CODEC_CX_FRAME_PKT;
+      pkt.data.frame.buf = ctx->pending_cx_data;
+      pkt.data.frame.sz = ctx->pending_cx_data_sz;
+      pkt.data.frame.partition_id = -1;
+      pkt.data.frame.vis_frame_size = 0;
+
+      pkt.data.frame.pts =
+          ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) +
+          ctx->pts_offset;
+      pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
+      if (has_no_show_keyframe) {
+        pkt.data.frame.flags |= AVM_FRAME_IS_DELAYED_RANDOM_ACCESS_POINT;
+      }
+      pkt.data.frame.duration = 0;
+
+      avm_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
+
       ctx->pending_cx_data = NULL;
       ctx->pending_cx_data_sz = 0;
       ctx->pending_frame_count = 0;
@@ -3652,7 +3847,6 @@ static avm_codec_err_t ctrl_get_new_frame_image(avm_codec_alg_priv_t *ctx,
 
   if (new_img != NULL) {
     YV12_BUFFER_CONFIG new_frame;
-
     if (av2_get_last_show_frame(ctx->cpi, &new_frame) == 0) {
       yuvconfig2image(new_img, &new_frame, NULL);
       return AVM_CODEC_OK;
@@ -4704,6 +4898,15 @@ static avm_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
   { AV2E_SET_MONOTONIC_OUTPUT_ORDER, ctrl_set_monotonic_output_order },
   { AV2E_SET_FORCE_DEFERRED_FRAMES_FOR_RAS_TEST,
     ctrl_set_force_deferred_frames_for_ras_test },
+  { AVME_SET_XLAYER_ID, ctrl_set_xlayer_id },
+  { AV2E_SET_MLAYER_DEPENDENCY_PRESENT, ctrl_set_mlayer_dependency_present },
+  { AV2E_SET_MLAYER_DEPENDENCY_MAP, ctrl_set_mlayer_dependency_map },
+  { AV2E_SET_INTRA_ONLY_FWD_KF, ctrl_set_intra_only_fwd_kf },
+  { AV2E_SET_MLAYER_COLOR_PRIMARIES, ctrl_set_mlayer_color_primaries },
+  { AV2E_SET_MLAYER_TRANSFER_CHARACTERISTICS,
+    ctrl_set_mlayer_transfer_characteristics },
+  { AV2E_SET_MLAYER_MATRIX_COEFFICIENTS, ctrl_set_mlayer_matrix_coefficients },
+  { AV2E_SET_MLAYER_COLOR_RANGE, ctrl_set_mlayer_color_range },
 
   // Getters
   { AVME_GET_LAST_QUANTIZER, ctrl_get_quantizer },
diff --git a/av2/av2_dx_iface.c b/av2/av2_dx_iface.c
index 7951f1c727..083d812e54 100644
--- a/av2/av2_dx_iface.c
+++ b/av2/av2_dx_iface.c
@@ -1163,6 +1163,7 @@ static avm_image_t *add_grain_if_needed(avm_codec_alg_priv_t *ctx,
   grain_img->mlayer_id = img->mlayer_id;
   grain_img->xlayer_id = img->xlayer_id;
   grain_img->stream_id = img->stream_id;
+  grain_img->display_order_hint = img->display_order_hint;
   img->metadata = NULL;
   if (av2_add_film_grain(grain_params, img, grain_img)) {
     pool->release_fb_cb(pool->cb_priv, fb);
@@ -1247,6 +1248,7 @@ static avm_image_t *decoder_get_frame_(avm_codec_alg_priv_t *ctx,
         img->mlayer_id = output_frame_buf->mlayer_id;
         img->xlayer_id = output_frame_buf->xlayer_id;
         img->stream_id = output_frame_buf->stream_id;
+        img->display_order_hint = output_frame_buf->display_order_hint;
 
         if (pbi->skip_film_grain) grain_params->apply_grain = 0;
         avm_image_t *res =
@@ -2007,6 +2009,196 @@ static avm_codec_err_t ctrl_set_row_mt(avm_codec_alg_priv_t *ctx,
   return AVM_CODEC_OK;
 }
 
+static avm_codec_err_t ctrl_get_lcr_info(avm_codec_alg_priv_t *ctx,
+                                         va_list args) {
+  avm_lcr_info_t *const info = va_arg(args, avm_lcr_info_t *);
+  if (!info) return AVM_CODEC_INVALID_PARAM;
+
+  memset(info, 0, sizeof(*info));
+
+  if (!ctx->frame_worker) return AVM_CODEC_ERROR;
+
+  AVxWorker *const worker = ctx->frame_worker;
+  FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+  AV2Decoder *const pbi = frame_worker_data->pbi;
+
+  // Try global LCR first (stored at xlayer_id = GLOBAL_XLAYER_ID = 31)
+  const struct GlobalLayerConfigurationRecord *glcr = NULL;
+  for (int lcr_idx = 0; lcr_idx < MAX_NUM_LCR; lcr_idx++) {
+    const struct LayerConfigurationRecord *lcr =
+        &pbi->lcr_list[GLOBAL_XLAYER_ID][lcr_idx];
+    if (lcr->valid && lcr->is_global) {
+      glcr = &lcr->global_lcr;
+      break;
+    }
+  }
+
+  if (glcr) {
+    info->num_xlayers = glcr->LcrMaxNumXLayerCount;
+    for (int i = 0; i < info->num_xlayers && i < 31; i++) {
+      avm_xlayer_layer_info_t *xl = &info->xlayers[i];
+      xl->xlayer_id = glcr->LcrXLayerID[i];
+      const struct LCRXLayerInfo *xli = &glcr->xlayer_info[i];
+      xl->max_width = xli->rep_params.lcr_max_pic_width;
+      xl->max_height = xli->rep_params.lcr_max_pic_height;
+      if (xli->lcr_embedded_layer_info_present_flag) {
+        const struct EmbeddedLayerInfo *ml = &xli->mlayer_params;
+        xl->num_mlayers = ml->MLayerCount;
+        // Use mlayer 0 for layer_type/auxiliary_type/view_type
+        xl->layer_type = ml->lcr_layer_type[0];
+        xl->auxiliary_type =
+            (ml->lcr_layer_type[0] == 1) ? ml->lcr_auxiliary_type[0] : -1;
+        xl->view_type = ml->lcr_view_type[0];
+      } else {
+        xl->num_mlayers = 0;
+        xl->layer_type = 0;
+        xl->auxiliary_type = -1;
+        xl->view_type = 0;
+      }
+    }
+    return AVM_CODEC_OK;
+  }
+
+  // Fallback: assemble from local LCRs per xlayer
+  int count = 0;
+  for (int xlid = 0; xlid < GLOBAL_XLAYER_ID && count < 31; xlid++) {
+    for (int lcr_idx = 0; lcr_idx < MAX_NUM_LCR; lcr_idx++) {
+      const struct LayerConfigurationRecord *lcr =
+          &pbi->lcr_list[xlid][lcr_idx];
+      if (lcr->valid && !lcr->is_global) {
+        avm_xlayer_layer_info_t *xl = &info->xlayers[count];
+        xl->xlayer_id = xlid;
+        const struct LCRXLayerInfo *xli = &lcr->local_lcr.xlayer_info;
+        xl->max_width = xli->rep_params.lcr_max_pic_width;
+        xl->max_height = xli->rep_params.lcr_max_pic_height;
+        if (xli->lcr_embedded_layer_info_present_flag) {
+          const struct EmbeddedLayerInfo *ml = &xli->mlayer_params;
+          xl->num_mlayers = ml->MLayerCount;
+          xl->layer_type = ml->lcr_layer_type[0];
+          xl->auxiliary_type =
+              (ml->lcr_layer_type[0] == 1) ? ml->lcr_auxiliary_type[0] : -1;
+          xl->view_type = ml->lcr_view_type[0];
+        } else {
+          xl->num_mlayers = 0;
+          xl->layer_type = 0;
+          xl->auxiliary_type = -1;
+          xl->view_type = 0;
+        }
+        count++;
+        break;  // found LCR for this xlayer, move to next
+      }
+    }
+  }
+  info->num_xlayers = count;
+
+  return (count > 0) ? AVM_CODEC_OK : AVM_CODEC_ERROR;
+}
+
+static avm_codec_err_t ctrl_get_atlas_info(avm_codec_alg_priv_t *ctx,
+                                           va_list args) {
+  avm_atlas_info_t *const info = va_arg(args, avm_atlas_info_t *);
+  if (!info) return AVM_CODEC_INVALID_PARAM;
+
+  memset(info, 0, sizeof(*info));
+
+  if (!ctx->frame_worker) return AVM_CODEC_ERROR;
+
+  AVxWorker *const worker = ctx->frame_worker;
+  FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+  AV2Decoder *const pbi = frame_worker_data->pbi;
+
+  // Scan atlas_list for the first valid entry
+  for (int xlid = 0; xlid < MAX_NUM_XLAYERS; xlid++) {
+    for (int seg_idx = 0; seg_idx < MAX_NUM_ATLAS_SEG_ID; seg_idx++) {
+      const struct AtlasSegmentInfo *asi = &pbi->atlas_list[xlid][seg_idx];
+      if (!asi->valid) continue;
+
+      if (asi->atlas_segment_mode_idc == ENHANCED_ATLAS) {
+        // Enhanced atlas: dimensions and segments from region info + mapping
+        const struct AtlasRegionInfo *reg = &asi->ats_reg_params;
+        const struct AtlasRegionToSegmentMapping *rsm = &asi->ats_reg_seg_map;
+        info->atlas_width = reg->AtlasWidth;
+        info->atlas_height = reg->AtlasHeight;
+        info->num_segments = rsm->ats_num_atlas_segments_minus_1 + 1;
+        // For enhanced atlas, derive segment positions from region mapping
+        for (int s = 0; s < info->num_segments && s < 256; s++) {
+          // Compute segment position from top-left region column/row
+          int col = rsm->ats_top_left_region_column[s];
+          int row = rsm->ats_top_left_region_row[s];
+          int seg_x = 0, seg_y = 0;
+          int seg_w = 0, seg_h = 0;
+          for (int c = 0; c < col; c++)
+            seg_x += reg->ats_column_width_minus_1[c] + 1;
+          for (int r = 0; r < row; r++)
+            seg_y += reg->ats_row_height_minus_1[r] + 1;
+          int br_col = rsm->ats_bottom_right_region_column[s];
+          int br_row = rsm->ats_bottom_right_region_row[s];
+          for (int c = col; c <= br_col; c++)
+            seg_w += reg->ats_column_width_minus_1[c] + 1;
+          for (int r = row; r <= br_row; r++)
+            seg_h += reg->ats_row_height_minus_1[r] + 1;
+          // Use label segment info for xlayer_id mapping
+          const struct AtlasLabelSegmentInfo *lsi = &asi->ats_label_seg;
+          int seg_label_id = lsi->ats_signalled_atlas_segment_ids_flag
+                                 ? lsi->AtlasSegmentIndexToID[s]
+                                 : s;
+          info->segments[s].xlayer_id = seg_label_id;
+          info->segments[s].pos_x = seg_x;
+          info->segments[s].pos_y = seg_y;
+          info->segments[s].width = seg_w;
+          info->segments[s].height = seg_h;
+        }
+        return AVM_CODEC_OK;
+      } else if (asi->atlas_segment_mode_idc == BASIC_ATLAS ||
+                 asi->atlas_segment_mode_idc == MULTISTREAM_ATLAS ||
+                 asi->atlas_segment_mode_idc == MULTISTREAM_ALPHA_ATLAS) {
+        // Basic/multistream atlas: dimensions and segments from basic info
+        const struct AtlasBasicInfo *abi = asi->ats_basic_info;
+        if (!abi) abi = &asi->ats_basic_info_s;
+        if (!abi || (abi->AtlasWidth == 0 && abi->AtlasHeight == 0)) continue;
+        info->atlas_width = abi->AtlasWidth;
+        info->atlas_height = abi->AtlasHeight;
+        info->num_segments = abi->ats_num_atlas_segments_minus_1 + 1;
+        for (int s = 0; s < info->num_segments && s < 256; s++) {
+          info->segments[s].xlayer_id =
+              abi->ats_stream_id_present ? abi->ats_input_stream_id[s] : s;
+          info->segments[s].pos_x = abi->ats_segment_top_left_pos_x[s];
+          info->segments[s].pos_y = abi->ats_segment_top_left_pos_y[s];
+          info->segments[s].width = abi->ats_segment_width[s];
+          info->segments[s].height = abi->ats_segment_height[s];
+        }
+        return AVM_CODEC_OK;
+      } else if (asi->atlas_segment_mode_idc == SINGLE_ATLAS) {
+        // Single atlas: single segment, dimensions from nominal_width/height
+        info->atlas_width = asi->ats_nominal_width_minus1 + 1;
+        info->atlas_height = asi->ats_nominal_height_minus1 + 1;
+        info->num_segments = 1;
+        info->segments[0].xlayer_id = xlid;
+        info->segments[0].pos_x = 0;
+        info->segments[0].pos_y = 0;
+        info->segments[0].width = info->atlas_width;
+        info->segments[0].height = info->atlas_height;
+        return AVM_CODEC_OK;
+      }
+    }
+  }
+
+  return AVM_CODEC_ERROR;
+}
+
+static avm_codec_err_t ctrl_get_monotonic_output_order(
+    avm_codec_alg_priv_t *ctx, va_list args) {
+  unsigned int *const val = va_arg(args, unsigned int *);
+  if (!val) return AVM_CODEC_INVALID_PARAM;
+  if (!ctx->frame_worker) return AVM_CODEC_ERROR;
+
+  AVxWorker *const worker = ctx->frame_worker;
+  FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+  AV2Decoder *const pbi = frame_worker_data->pbi;
+  *val = pbi->common.seq_params.monotonic_output_order_flag;
+  return AVM_CODEC_OK;
+}
+
 static avm_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
   { AV2_COPY_REFERENCE, ctrl_copy_reference },
 
@@ -2051,6 +2243,9 @@ static avm_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
   { AVMD_GET_SHOW_EXISTING_FRAME_FLAG, ctrl_get_show_existing_frame_flag },
   { AVMD_GET_S_FRAME_INFO, ctrl_get_s_frame_info },
   { AVMD_GET_FRAME_INFO, ctrl_get_dec_frame_info },
+  { AV2D_GET_LCR_INFO, ctrl_get_lcr_info },
+  { AV2D_GET_ATLAS_INFO, ctrl_get_atlas_info },
+  { AV2D_GET_MONOTONIC_OUTPUT_ORDER, ctrl_get_monotonic_output_order },
 
   CTRL_MAP_END,
 };
diff --git a/av2/common/av2_common_int.h b/av2/common/av2_common_int.h
index 83394d3efd..e00ea641ce 100644
--- a/av2/common/av2_common_int.h
+++ b/av2/common/av2_common_int.h
@@ -3004,6 +3004,13 @@ typedef struct AV2Common {
    * Initialized to -1 (unset).
    */
   int olk_refresh_frame_flags[MAX_NUM_MLAYERS];
+  /*!
+   * Refresh frame flags of a hidden intra forward keyframe (intra_only_fwd_kf)
+   * per layer.  Used by the encoder to protect the hidden intra's DPB slot
+   * from being overwritten by subsequent pyramid frames.  Initialized to -1
+   * (unset).  Not used by the decoder.
+   */
+  int fwd_intra_refresh_frame_flags[MAX_NUM_MLAYERS];
   /*!
    * Accumulated refresh_frame_flags of regular VCL OBUs co-signalled with an
    * OLK in the same temporal unit, per mlayer. Initialized to -1 (unset).
@@ -3342,6 +3349,16 @@ static INLINE int is_mlayer_transitively_dependent(
   return 0;
 }
 
+// Returns true if mlayer `layer_id` does not depend on any other mlayer,
+// i.e. mlayer_dependency_map[layer_id][j] == 0 for all j != layer_id.
+static INLINE int is_mlayer_independent(const SequenceHeader *const seq,
+                                        const int layer_id) {
+  for (int j = 0; j <= seq->max_mlayer_id; j++) {
+    if (j != layer_id && seq->mlayer_dependency_map[layer_id][j] != 0) return 0;
+  }
+  return 1;
+}
+
 static INLINE void get_secondary_reference_frame_idx(const AV2_COMMON *const cm,
                                                      int *ref_frame_used,
                                                      int *secondary_map_idx) {
diff --git a/av2/decoder/decodeframe.c b/av2/decoder/decodeframe.c
index 404d59fb6e..01306f51c2 100644
--- a/av2/decoder/decodeframe.c
+++ b/av2/decoder/decodeframe.c
@@ -7446,15 +7446,16 @@ static void activate_layer_configuration_record(AV2Decoder *pbi,
     // so that embedded layer info can fall back to it.
     if (!lcr->is_global) {
       int global_id = lcr->local_lcr.lcr_global_id;
-      if (global_id != LCR_ID_UNSPECIFIED) {
-        LayerConfigurationRecord *parent_glcr =
-            &pbi->lcr_list[GLOBAL_XLAYER_ID][global_id];
-        if (parent_glcr->valid && parent_glcr->is_global) {
-          cm->global_lcr_params = *parent_glcr;
-          // Conformance: when a local LCR is present and its parent global LCR
-          // has xlayer_info for the same extended layer, the local LCR's
-          // xlayer_info shall be the same as the global LCR's xlayer_info.
-          const GlobalLayerConfigurationRecord *glcr = &parent_glcr->global_lcr;
+      LayerConfigurationRecord *parent_glcr =
+          &pbi->lcr_list[GLOBAL_XLAYER_ID][global_id];
+      if (parent_glcr->valid && parent_glcr->is_global) {
+        cm->global_lcr_params = *parent_glcr;
+        // Conformance: when a local LCR is present and its parent global LCR
+        // has xlayer_info for the same extended layer, the local LCR's
+        // xlayer_info shall be the same as the global LCR's xlayer_info.
+        // This check only applies when the global LCR carries payload data.
+        const GlobalLayerConfigurationRecord *glcr = &parent_glcr->global_lcr;
+        if (glcr->lcr_global_payload_present_flag) {
           for (int i = 0; i < glcr->LcrMaxNumXLayerCount; i++) {
             if (glcr->LcrXLayerID[i] == lcr->xlayer_id) {
               if (memcmp(&lcr->local_lcr.xlayer_info, &glcr->xlayer_info[i],
diff --git a/av2/decoder/obu.c b/av2/decoder/obu.c
index 66ed285e97..95df5a297c 100644
--- a/av2/decoder/obu.c
+++ b/av2/decoder/obu.c
@@ -2687,6 +2687,7 @@ int avm_decode_frame_from_obus(struct AV2Decoder *pbi, const uint8_t *data,
         pbi->seen_frame_header = 0;
         pbi->next_start_tile = 0;
         pbi->seen_vcl_obu_in_this_tu = 0;
+        pbi->this_is_first_vcl_obu_in_tu = 0;
         pbi->doh_tu_order_hint_bits_set = 0;
         for (int i = 0; i < NUM_CUSTOM_QMS; i++) pbi->qm_protected[i] = 0;
 
@@ -2700,6 +2701,7 @@ int avm_decode_frame_from_obus(struct AV2Decoder *pbi, const uint8_t *data,
             pbi->seen_frame_header = 0;
             pbi->next_start_tile = 0;
             pbi->seen_vcl_obu_in_this_tu = 0;
+            pbi->this_is_first_vcl_obu_in_tu = 0;
             pbi->doh_tu_order_hint_bits_set = 0;
             for (int i = 0; i < NUM_CUSTOM_QMS; i++) pbi->qm_protected[i] = 0;
           }
diff --git a/av2/encoder/bitstream.c b/av2/encoder/bitstream.c
index 558049d3df..f5f94ea752 100644
--- a/av2/encoder/bitstream.c
+++ b/av2/encoder/bitstream.c
@@ -5368,7 +5368,8 @@ static AVM_INLINE void write_uncompressed_header(
     }
   }
 
-  if (obu_type == OBU_OPEN_LOOP_KEY) {
+  if (obu_type == OBU_OPEN_LOOP_KEY || av2_is_olk_forward_keyframe(cpi)) {
+    // OLK (non-monotonic open GOP): set OLK state.
     cpi->olk_encountered = 1;
     cm->last_olk_disp_order_hint = cm->current_frame.display_order_hint;
     cm->last_olk_order_hint = cm->current_frame.order_hint;
@@ -5377,6 +5378,13 @@ static AVM_INLINE void write_uncompressed_header(
     // In this encoder, the OLK updates only one reference slot
     cm->olk_refresh_frame_flags[cm->mlayer_id] =
         current_frame->refresh_frame_flags;
+  } else if (av2_is_fwd_intra_keyframe(cpi)) {
+    // Hidden intra forward keyframe (monotonic open GOP): separate state from
+    // OLK.  Protects the hidden intra's DPB slot but does not use OLK/leading
+    // frame machinery.
+    cpi->fwd_intra_encountered = 1;
+    cm->fwd_intra_refresh_frame_flags[cm->mlayer_id] =
+        current_frame->refresh_frame_flags;
   } else if (obu_type == OBU_CLOSED_LOOP_KEY ||
              (cm->is_leading_picture == 0 &&
               cpi->gf_group.update_type[cpi->gf_group.index] !=
@@ -5390,7 +5398,8 @@ static AVM_INLINE void write_uncompressed_header(
     cm->prev_olk_co_vcl_refresh_frame_flags[cm->mlayer_id] = INVALID_IDX;
   } else if (cpi->olk_encountered && cm->current_frame.display_order_hint >=
                                          cm->last_olk_disp_order_hint) {
-    // This is a frame within the same TU as the OLK. Cannot refresh it either.
+    // This is a co-VCL frame within the same TU as the OLK (non-monotonic
+    // only). Accumulate its refresh flags so the OLK slot set is complete.
     cm->olk_refresh_frame_flags[cm->mlayer_id] |=
         current_frame->refresh_frame_flags;
   }
@@ -6941,6 +6950,83 @@ size_t av2_write_metadata_user_data_unregistered(AV2_COMP *const cpi,
   return total_bytes_written;
 }
 
+// Compare two ContentInterpretation color/chroma fields for equality.
+// Used to decide whether a per-mlayer CI OBU is needed or if inheritance
+// from a dependent layer suffices.
+static int ci_params_equal(const ContentInterpretation *a,
+                           const ContentInterpretation *b) {
+  if (a->ci_color_description_present_flag !=
+      b->ci_color_description_present_flag)
+    return 0;
+  if (a->ci_color_description_present_flag) {
+    if (a->color_info.color_description_idc !=
+        b->color_info.color_description_idc)
+      return 0;
+    if (a->color_info.color_primaries != b->color_info.color_primaries)
+      return 0;
+    if (a->color_info.transfer_characteristics !=
+        b->color_info.transfer_characteristics)
+      return 0;
+    if (a->color_info.matrix_coefficients != b->color_info.matrix_coefficients)
+      return 0;
+    if (a->color_info.full_range_flag != b->color_info.full_range_flag)
+      return 0;
+  }
+  if (a->ci_chroma_sample_position_present_flag !=
+      b->ci_chroma_sample_position_present_flag)
+    return 0;
+  if (a->ci_chroma_sample_position_present_flag) {
+    if (a->ci_chroma_sample_position[0] != b->ci_chroma_sample_position[0])
+      return 0;
+    if (a->ci_chroma_sample_position[1] != b->ci_chroma_sample_position[1])
+      return 0;
+  }
+  if (a->ci_aspect_ratio_info_present_flag !=
+      b->ci_aspect_ratio_info_present_flag)
+    return 0;
+  if (a->ci_timing_info_present_flag != b->ci_timing_info_present_flag)
+    return 0;
+  return 1;
+}
+
+// Write a CI OBU for the current mlayer if it has distinct CI.
+// Returns the number of bytes written (0 if skipped).  Sets *err on failure.
+static size_t write_ci_obu_for_mlayer(AV2_COMP *const cpi, uint8_t *data,
+                                      avm_codec_err_t *err) {
+  AV2_COMMON *const cm = &cpi->common;
+  *err = AVM_CODEC_OK;
+
+  // Skip if CI isn't needed globally
+  if (!cpi->write_ci_obu_flag) return 0;
+
+  // For mlayer > 0, skip if CI is identical to the first dependent layer
+  // (decoder inherits automatically)
+  if (cm->mlayer_id > 0) {
+    for (int ref = 0; ref < cm->mlayer_id; ref++) {
+      if (cm->seq_params.mlayer_dependency_map[cm->mlayer_id][ref]) {
+        if (ci_params_equal(&cm->ci_params_per_layer[cm->mlayer_id],
+                            &cm->ci_params_per_layer[ref]))
+          return 0;
+        break;
+      }
+    }
+  }
+
+  const int obu_layer_ci = (cm->mlayer_id << 5) | cm->xlayer_id;
+  uint32_t obu_header_size =
+      av2_write_obu_header(OBU_CONTENT_INTERPRETATION, 0, obu_layer_ci, data);
+  uint32_t obu_payload_size = av2_write_content_interpretation_obu(
+      &cm->ci_params_per_layer[cm->mlayer_id], data + obu_header_size);
+  size_t length_field_size =
+      obu_memmove(obu_header_size, obu_payload_size, data);
+  if (av2_write_uleb_obu_size(obu_header_size, obu_payload_size, data) !=
+      AVM_CODEC_OK) {
+    *err = AVM_CODEC_ERROR;
+    return 0;
+  }
+  return obu_header_size + obu_payload_size + length_field_size;
+}
+
 // This function actually writes to the bistream. The av2_pack_bitstream()
 // function is a thin wrapper around this function.
 static int av2_pack_bitstream_internal(AV2_COMP *const cpi, uint8_t *dst,
@@ -6958,6 +7044,19 @@ static int av2_pack_bitstream_internal(AV2_COMP *const cpi, uint8_t *dst,
       obu_mlayer << 5 |
       obu_xlayer;  // obu_layer byte (mlayer (3-bit) | xlayer (5-bit))
 
+  // Track which higher mlayers need CI OBUs at this RAP.  ci_rap_tu is used
+  // as a bitmask: bit i is set when mlayer i still needs its CI OBU written.
+  // Only set at CLK/OLK for mlayer 0; each higher mlayer clears its bit once
+  // its CI OBU is emitted.  This survives the intervening mlayer 0 non-CLK
+  // frames that are encoded before mlayer 1+ starts (due to lag encoding).
+  if (cm->mlayer_id == 0 &&
+      (cm->current_frame.cm_obu_type == OBU_CLOSED_LOOP_KEY ||
+       cm->current_frame.cm_obu_type == OBU_OPEN_LOOP_KEY)) {
+    // Set bits for mlayers 1..max_mlayer_id (mlayer 0 is handled inline)
+    const int max_ml = cm->seq_params.max_mlayer_id;
+    cpi->ci_rap_tu = max_ml > 0 ? (((1 << (max_ml + 1)) - 1) & ~1) : 0;
+  }
+
   bool add_new_user_qm = false;
   // If no non-zero delta_q has been used, reset delta_q_present_flag
   if (cm->delta_q_info.delta_q_present_flag && cpi->deltaq_used == 0) {
@@ -7056,19 +7155,11 @@ static int av2_pack_bitstream_internal(AV2_COMP *const cpi, uint8_t *dst,
 
   if (cm->current_frame.cm_obu_type == OBU_CLOSED_LOOP_KEY) {
     size_t length_field_size;
-    if (cm->current_frame.frame_type == KEY_FRAME && !cpi->no_show_fwd_kf &&
-        cpi->write_ci_obu_flag) {
-      obu_header_size =
-          av2_write_obu_header(OBU_CONTENT_INTERPRETATION, 0, 0, data);
-      obu_payload_size = av2_write_content_interpretation_obu(
-          &cm->ci_params_encoder, data + obu_header_size);
-      size_t length_field_size1 =
-          obu_memmove(obu_header_size, obu_payload_size, data);
-      if (av2_write_uleb_obu_size(obu_header_size, obu_payload_size, data) !=
-          AVM_CODEC_OK) {
-        return AVM_CODEC_ERROR;
-      }
-      data += obu_header_size + obu_payload_size + length_field_size1;
+    if (cm->current_frame.frame_type == KEY_FRAME && !cpi->no_show_fwd_kf) {
+      avm_codec_err_t ci_err;
+      size_t ci_bytes = write_ci_obu_for_mlayer(cpi, data, &ci_err);
+      if (ci_err != AVM_CODEC_OK) return AVM_CODEC_ERROR;
+      data += ci_bytes;
     }
 
     if (cm->cur_mfh_id != 0) {
@@ -7133,6 +7224,29 @@ static int av2_pack_bitstream_internal(AV2_COMP *const cpi, uint8_t *dst,
       }
     }
   }
+
+  // Write CI OBU at OLK (random access point) for conformance.
+  // The SH is not written at OLK (noted above), but CI must be present at
+  // all RAPs so decoders can recover color interpretation after random access.
+  if (cm->current_frame.cm_obu_type == OBU_OPEN_LOOP_KEY) {
+    avm_codec_err_t ci_err;
+    size_t ci_bytes = write_ci_obu_for_mlayer(cpi, data, &ci_err);
+    if (ci_err != AVM_CODEC_OK) return AVM_CODEC_ERROR;
+    data += ci_bytes;
+  }
+
+  // Write CI OBU for mlayer > 0 when in a RAP TU.
+  // Higher mlayers are encoded as REGULAR_TILE_GROUP (not CLK/OLK), so they
+  // don't enter the blocks above.  ci_rap_tu bitmask was set by mlayer 0's
+  // CLK/OLK; each bit is cleared once that mlayer's CI has been emitted.
+  if (cm->mlayer_id > 0 && (cpi->ci_rap_tu & (1 << cm->mlayer_id))) {
+    avm_codec_err_t ci_err;
+    size_t ci_bytes = write_ci_obu_for_mlayer(cpi, data, &ci_err);
+    if (ci_err != AVM_CODEC_OK) return AVM_CODEC_ERROR;
+    data += ci_bytes;
+    cpi->ci_rap_tu &= ~(1 << cm->mlayer_id);
+  }
+
   if (add_new_user_qm && !cpi->obu_is_written) {
     assert(cpi->total_signalled_qmobu_count > 0);
     obu_header_size = av2_write_obu_header(OBU_QUANTIZATION_MATRIX,
@@ -7154,7 +7268,7 @@ static int av2_pack_bitstream_internal(AV2_COMP *const cpi, uint8_t *dst,
     struct film_grain_model fgm_current;
     set_film_grain_model(cpi, &fgm_current);
     int use_existing_fgm = -1;
-    if (cm->current_frame.frame_type == KEY_FRAME && !cpi->no_show_fwd_kf) {
+    if (cm->current_frame.frame_type == KEY_FRAME && !cpi->is_fwd_kf) {
       cpi->written_fgm_num =
           0;  // clear the list, it is increased before uncompressed_header()
       fgm_current.fgm_id = 0;
@@ -7313,7 +7427,7 @@ static int av2_pack_bitstream_internal(AV2_COMP *const cpi, uint8_t *dst,
   OBU_TYPE obu_type = cm->is_leading_picture == 1 ? OBU_LEADING_TILE_GROUP
                                                   : OBU_REGULAR_TILE_GROUP;
   if (cm->current_frame.frame_type == KEY_FRAME)
-    obu_type = cpi->no_show_fwd_kf ? OBU_OPEN_LOOP_KEY : OBU_CLOSED_LOOP_KEY;
+    obu_type = cpi->is_fwd_kf ? OBU_OPEN_LOOP_KEY : OBU_CLOSED_LOOP_KEY;
   if (cm->current_frame.frame_type == S_FRAME)
     obu_type = (cpi->is_ras_frame == 1) ? OBU_RAS_FRAME : OBU_SWITCH;
 
@@ -7418,8 +7532,10 @@ static int av2_pack_bitstream_internal(AV2_COMP *const cpi, uint8_t *dst,
 
   int write_temporal_point_metadata =
       (cpi->write_ci_obu_flag &&
-       cpi->common.ci_params_encoder.ci_timing_info_present_flag &&
-       cpi->common.ci_params_encoder.timing_info.equal_elemental_interval == 0)
+       cpi->common.ci_params_per_layer[cpi->common.mlayer_id]
+           .ci_timing_info_present_flag &&
+       cpi->common.ci_params_per_layer[cpi->common.mlayer_id]
+               .timing_info.equal_elemental_interval == 0)
           ? 1
           : 0;
   if (write_temporal_point_metadata) {
diff --git a/av2/encoder/encode_strategy.c b/av2/encoder/encode_strategy.c
index fa786e6a87..2db7b6f4ce 100644
--- a/av2/encoder/encode_strategy.c
+++ b/av2/encoder/encode_strategy.c
@@ -178,45 +178,66 @@ static INLINE void update_gf_group_index(AV2_COMP *cpi) {
       cpi->common.number_mlayers == 1) {
     ++cpi->gf_group.index;
   } else {
-    // To be updated based on the (multi_layers) tests for nonzero lag.
-    // The current test is for fixed GOP with keyframe_filtering off.
+    // Multi-mlayer with lag: within each temporal unit, complete all frames
+    // for the current embedded layer before switching to the next.
+    //
+    // ARFs are "hidden" (batched with their overlay) when monotonic mode is
+    // on OR when add_sef_for_hidden_frames is on.  In both cases, the hidden
+    // frames are grouped together with the first displayable frame in a
+    // single TU, and the index rewinds so each mlayer processes the same
+    // hidden batch.
+    //
+    // In non-monotonic mode WITHOUT SEF, ARF and INTNL_ARF frames are
+    // implicit output (the decoder reorders them), so each gets its own TU —
+    // no batching.  Overlay entries still execute (popping from the lookahead
+    // to stay in sync) but become zero-byte FRAME_NULL_PKT via the
+    // forced_implicit path in av2_cx_iface.c.
     GF_GROUP *const gf_group = &cpi->gf_group;
-    if (gf_group->update_type[cpi->gf_group.index] == ARF_UPDATE ||
-        gf_group->update_type[cpi->gf_group.index] == INTNL_ARF_UPDATE ||
-        gf_group->update_type[cpi->gf_group.index] == KFFLT_UPDATE) {
+    const FRAME_UPDATE_TYPE cur_type = gf_group->update_type[gf_group->index];
+    const int nonmono = !cpi->common.seq_params.monotonic_output_order_flag;
+    // ARFs are hidden (not implicit output) when either monotonic mode is on
+    // OR add_sef_for_hidden_frames is on (SEF mode forces hidden+SEF output).
+    const int arfs_are_hidden =
+        !nonmono || cpi->oxcf.ref_frm_cfg.add_sef_for_hidden_frames;
+
+    if (arfs_are_hidden &&
+        (cur_type == ARF_UPDATE || cur_type == INTNL_ARF_UPDATE ||
+         cur_type == KFFLT_UPDATE)) {
+      // Hidden frame — advance index, stay on same mlayer.
       ++gf_group->index;
-      // Continue on the same mlayer.
-      if (cpi->common.mlayer_id == 0) gf_group->arf_update_counter++;
-    } else if (cpi->common.mlayer_id == 0 && cpi->gf_group.index > 0 &&
-               (gf_group->update_type[cpi->gf_group.index] == LF_UPDATE ||
-                gf_group->update_type[cpi->gf_group.index] ==
-                    FWD_KF_OVERLAY_UPDATE ||
-                gf_group->update_type[cpi->gf_group.index] ==
-                    FWD_KF_SUCCESSOR_UPDATE) &&
-               (gf_group->update_type[cpi->gf_group.index - 1] == ARF_UPDATE ||
-                gf_group->update_type[cpi->gf_group.index - 1] ==
-                    INTNL_ARF_UPDATE ||
-                gf_group->update_type[cpi->gf_group.index - 1] ==
-                    OVERLAY_UPDATE ||
-                gf_group->update_type[cpi->gf_group.index - 1] ==
-                    INTNL_OVERLAY_UPDATE ||
-                gf_group->update_type[cpi->gf_group.index - 1] ==
-                    KFFLT_OVERLAY_UPDATE)) {
-      // This willl force the next encode_call to encode ARFs followed by LF
-      // at the next ml layer.
-      gf_group->index = gf_group->index - gf_group->arf_update_counter;
+      gf_group->arf_update_counter++;
+    } else if (nonmono && cur_type == KFFLT_UPDATE) {
+      // Non-monotonic KFFLT without SEF: the filtered keyframe is hidden
+      // (not implicit output like ARF/INTNL_ARF).  All frames of a given
+      // embedded layer must be grouped together up to the output frame before
+      // moving to the next layer.  Advance to the KFFLT_OVERLAY (displayable)
+      // while staying on the same mlayer, and track the hidden frame count so
+      // the rewind logic below replays for the next mlayer.
+      ++gf_group->index;
+      gf_group->arf_update_counter++;
+    } else if ((arfs_are_hidden || gf_group->arf_update_counter > 0) &&
+               gf_group->arf_update_counter > 0 &&
+               (unsigned int)cpi->common.mlayer_id <
+                   cpi->common.number_mlayers - 1) {
+      // End of hidden batch + displayable for current mlayer, more mlayers
+      // remain: rewind to start of hidden batch and advance mlayer.
+      gf_group->index -= gf_group->arf_update_counter;
       gf_group->arf_update_counter = 0;
-      // Go to next mlayer
-      cpi->common.next_mlayer_id = 1;
-    } else if ((unsigned int)cpi->common.mlayer_id ==
+      cpi->common.next_mlayer_id = cpi->common.mlayer_id + 1;
+    } else if ((unsigned int)cpi->common.mlayer_id <
                cpi->common.number_mlayers - 1) {
-      // Every regular frame is encoded with same source up to number_mlayers.
+      // Not last mlayer: stay at same index, switch to next mlayer.
+      cpi->common.next_mlayer_id = cpi->common.mlayer_id + 1;
+    } else {
+      // Last mlayer: advance index, switch back to ml=0.
       ++gf_group->index;
-      // Go back to mlayer 0
+      gf_group->arf_update_counter = 0;
       cpi->common.next_mlayer_id = 0;
-    } else {
-      // Go to next mlayer
-      cpi->common.next_mlayer_id = 1;
+
+      // Non-monotonic: overlay entries are NOT skipped.  They pop from
+      // the lookahead (keeping it in sync) and become zero-byte
+      // FRAME_NULL_PKT via the forced_implicit path when the underlying
+      // ARF/INTNL was implicit output.
     }
   }
 }
@@ -756,9 +777,9 @@ int av2_get_refresh_frame_flags(
     AV2_COMP *const cpi, const EncodeFrameParams *const frame_params,
     FRAME_UPDATE_TYPE frame_update_type, int gf_index, int cur_disp_order,
     RefFrameMapPair ref_frame_map_pairs[REF_FRAMES]) {
-  // Shown key-frames overwrite all reference slots
+  // Shown key-frames overwrite all reference slots (CLK only, not OLK)
   if (av2_is_shown_keyframe(cpi, frame_params->frame_type) &&
-      cpi->common.seq_params.max_mlayer_id == 0 && !cpi->no_show_fwd_kf) {
+      cpi->common.seq_params.max_mlayer_id == 0 && !cpi->is_fwd_kf) {
     return (1 << cpi->common.seq_params.ref_frames) - 1;
   }
 
@@ -784,23 +805,30 @@ int av2_get_refresh_frame_flags(
         }
       }
     }
-    // For fwd kf, only refresh one buffer. The other buffers will be refreshed
-    // on the first regular TU it encounters after the OLK TU.
-    if (cpi->no_show_fwd_kf) {
-      int refresh_idx = -1;
-      for (int i = 0; i < cm->seq_params.ref_frames; ++i) {
-        if ((refresh_frame_flags >> i) & 1) {
-          // Skip slots containing implicit-output frames that have not
-          // been output yet and whose DOH is at least the current
-          // frame's DOH. (DOH requirement)
-          if (cm->ref_frame_map[i] != NULL &&
-              cm->ref_frame_map[i]->implicit_output_picture &&
-              !cm->ref_frame_map[i]->frame_output_done &&
-              (int)cm->ref_frame_map[i]->display_order_hint >= cur_disp_order) {
-            continue;
+    // For fwd kf (displayed or hidden OLK), only refresh one buffer.
+    // The other buffers will be refreshed on the first regular TU it
+    // encounters after the OLK TU.
+    if (cpi->is_fwd_kf) {
+      // With multiple embedded layers, each mlayer's forward keyframe must
+      // refresh a different DPB slot.  Use the mlayer-aware slot allocator and
+      // protect slots already claimed by earlier mlayers.
+      int fwd_kf_flags_to_keep = 0;
+      const int is_fwd_intra = cpi->oxcf.kf_cfg.intra_only_fwd_kf;
+      for (int ml = 0; ml < cm->mlayer_id; ml++) {
+        int flags = is_fwd_intra ? cm->fwd_intra_refresh_frame_flags[ml]
+                                 : cm->olk_refresh_frame_flags[ml];
+        if (flags != -1) fwd_kf_flags_to_keep |= flags;
+      }
+      int refresh_idx = get_free_ref_map_index_multi_layer(
+          ref_frame_map_pairs, cm->seq_params.ref_frames, fwd_kf_flags_to_keep,
+          cm->mlayer_id);
+      if (refresh_idx == INVALID_IDX) {
+        // Fallback: pick the first available bit in refresh_frame_flags
+        for (int i = 0; i < cm->seq_params.ref_frames; ++i) {
+          if ((refresh_frame_flags >> i) & 1) {
+            refresh_idx = i;
+            break;
           }
-          refresh_idx = i;
-          break;
         }
       }
       assert(refresh_idx >= 0);
@@ -842,6 +870,14 @@ int av2_get_refresh_frame_flags(
       olk_flags_to_keep |= cpi->common.olk_refresh_frame_flags[layer];
     }
   }
+  // Also protect the hidden intra forward keyframe's DPB slot.
+  if (cpi->fwd_intra_encountered) {
+    for (int layer = 0; layer <= cpi->common.seq_params.max_mlayer_id;
+         layer++) {
+      if (cpi->common.fwd_intra_refresh_frame_flags[layer] == -1) continue;
+      olk_flags_to_keep |= cpi->common.fwd_intra_refresh_frame_flags[layer];
+    }
+  }
 
   // Protect ref buffer slots containing implicit-output frames with DOH
   // at least the current frame's DOH. (DOH requirement)
@@ -970,8 +1006,8 @@ static int denoise_and_encode(AV2_COMP *const cpi, uint8_t *const dest,
       av2_frame_init_quantizer(cpi);
       av2_setup_past_independence(cm);
 
-      if (!frame_params->immediate_output_picture && cpi->no_show_fwd_kf) {
-        // fwd kf
+      if (!frame_params->immediate_output_picture && cpi->is_fwd_kf) {
+        // fwd kf (displayed or hidden OLK)
         arf_src_index = -1 * gf_group->arf_src_offset[gf_group->index];
       } else if (!frame_params->immediate_output_picture) {
         arf_src_index = 0;
@@ -1038,14 +1074,26 @@ static int denoise_and_encode(AV2_COMP *const cpi, uint8_t *const dest,
 
   cm->cur_frame->allow_direct_use = cm->allow_direct_use;
 
+  // Non-monotonic multi-mlayer: track the ARF's allow_direct_use decision.
+  // This persists across mlayers so the OVERLAY_UPDATE can produce null
+  // output even when the DPB reference has been evicted for later mlayers.
+  // Only applies when ARFs are truly implicit output (not SEF mode).
+  if (cpi->oxcf.unit_test_cfg.multi_layers_lag_test && cm->number_mlayers > 1 &&
+      !cm->seq_params.monotonic_output_order_flag &&
+      !cpi->oxcf.ref_frm_cfg.add_sef_for_hidden_frames &&
+      get_frame_update_type(&cpi->gf_group) == ARF_UPDATE) {
+    cpi->gf_group.arf_is_implicit_output = cm->allow_direct_use;
+  }
+
   // perform tpl after filtering
   int allow_tpl = oxcf->gf_cfg.lag_in_frames > 1 &&
                   !is_stat_generation_stage(cpi) &&
                   oxcf->algo_cfg.enable_tpl_model;
-  if (frame_params->frame_type == KEY_FRAME) {
-    // Don't do tpl for fwd key frames
+  if (frame_params->frame_type == KEY_FRAME ||
+      frame_params->frame_type == INTRA_ONLY_FRAME) {
+    // Don't do tpl for fwd key frames or intra-only fwd frames
     allow_tpl = allow_tpl && !cpi->sf.tpl_sf.disable_filtered_key_tpl &&
-                !cpi->no_show_fwd_kf;
+                !cpi->is_fwd_kf;
   } else {
     // Do tpl after ARF is filtered, or if no ARF, at the second frame of GF
     // group.
@@ -1142,11 +1190,12 @@ int av2_encode_strategy(AV2_COMP *const cpi, size_t *const size,
   if (cpi->oxcf.ref_frm_cfg.add_sef_for_hidden_frames) {
     cpi->common.implicit_output_picture = 0;
   }
-  if (gf_group->update_type[gf_group->index] == FWD_KF_OVERLAY_UPDATE ||
-      gf_group->update_type[gf_group->index] == FWD_KF_SUCCESSOR_UPDATE) {
+  if ((gf_group->update_type[gf_group->index] == FWD_KF_OVERLAY_UPDATE ||
+       gf_group->update_type[gf_group->index] == FWD_KF_SUCCESSOR_UPDATE) &&
+      !cpi->oxcf.kf_cfg.intra_only_fwd_kf) {
     // These have to use implicit output since they need to be
     // coded_output_picture OBUs, to be put together with a hidden OLK obu in
-    // the same TU.
+    // the same TU.  Not applicable for intra_only_fwd_kf.
     cpi->common.implicit_output_picture = 1;
   }
 
@@ -1253,22 +1302,27 @@ int av2_encode_strategy(AV2_COMP *const cpi, size_t *const size,
   if (cpi->oxcf.ref_frm_cfg.add_sef_for_hidden_frames) {
     cm->implicit_output_picture = 0;
   }
-  if (gf_group->update_type[gf_group->index] == FWD_KF_OVERLAY_UPDATE ||
-      gf_group->update_type[gf_group->index] == FWD_KF_SUCCESSOR_UPDATE) {
+  if ((gf_group->update_type[gf_group->index] == FWD_KF_OVERLAY_UPDATE ||
+       gf_group->update_type[gf_group->index] == FWD_KF_SUCCESSOR_UPDATE) &&
+      !cpi->oxcf.kf_cfg.intra_only_fwd_kf) {
     // These have to use implicit output since they need to be
     // coded_output_picture OBUs, to be put together with a hidden OLK obu in
-    // the same TU.
+    // the same TU.  Not applicable for intra_only_fwd_kf which uses regular
+    // SEF instead of OLK TU structure.
     cpi->common.implicit_output_picture = 1;
   }
 
-  if (frame_params.frame_type == KEY_FRAME && !cpi->no_show_fwd_kf) {
+  if (frame_params.frame_type == KEY_FRAME && !cpi->is_fwd_kf) {
+    // CLK: not implicit output, not direct use.
     cm->allow_direct_use = 0;
     cm->implicit_output_picture = 0;
   }
 
-  if (cpi->no_show_fwd_kf && cpi->oxcf.kf_cfg.enable_keyframe_filtering > 1) {
+  if (cpi->no_show_fwd_kf && (cpi->oxcf.kf_cfg.enable_keyframe_filtering > 1 ||
+                              cpi->oxcf.kf_cfg.intra_only_fwd_kf)) {
     // An overlay of the fwd kf is going to be added. The fwd kf cannot be
-    // directly displayed.
+    // directly displayed.  For intra_only_fwd_kf, the hidden INTRA_ONLY_FRAME
+    // must always be shown via SEF, never directly.
     cm->allow_direct_use = 0;
     cm->implicit_output_picture = 0;
   }
@@ -1297,7 +1351,9 @@ int av2_encode_strategy(AV2_COMP *const cpi, size_t *const size,
   cm->current_frame.display_order_hint_restricted = cur_frame_disp;
   cm->current_frame.pyramid_level = get_true_pyr_level(
       cpi->gf_group.layer_depth[cpi->gf_group.index],
-      cm->current_frame.frame_type == KEY_FRAME, cpi->gf_group.max_layer_depth,
+      frame_params.frame_type == KEY_FRAME ||
+          frame_params.frame_type == INTRA_ONLY_FRAME,
+      cpi->gf_group.max_layer_depth,
       cpi->gf_group.update_type[cpi->gf_group.index] == KFFLT_OVERLAY_UPDATE);
 
   cm->current_frame.tlayer_id = cm->tlayer_id;
@@ -1349,9 +1405,11 @@ int av2_encode_strategy(AV2_COMP *const cpi, size_t *const size,
     // encoded
     cpi->gf_state.olk_overlay_last = 1;
   }
-  int use_olk_ref_only =
-      cpi->gf_group.update_type[cpi->gf_group.index] == FWD_KF_OVERLAY_UPDATE ||
-      cpi->gf_group.update_type[cpi->gf_group.index] == FWD_KF_SUCCESSOR_UPDATE;
+  int use_olk_ref_only = !cpi->oxcf.kf_cfg.intra_only_fwd_kf &&
+                         (cpi->gf_group.update_type[cpi->gf_group.index] ==
+                              FWD_KF_OVERLAY_UPDATE ||
+                          cpi->gf_group.update_type[cpi->gf_group.index] ==
+                              FWD_KF_SUCCESSOR_UPDATE);
   init_ref_map_pair(&cpi->common, cm->ref_frame_map_pairs,
                     frame_params.frame_type == KEY_FRAME,
                     cpi->is_ras_frame == 1, use_olk_ref_only);
@@ -1507,7 +1565,7 @@ int av2_encode_strategy(AV2_COMP *const cpi, size_t *const size,
 
       // If this is a forward keyframe, mark as a show_existing_frame
       // TODO(bohanli): find a consistent condition for fwd keyframes
-      if (oxcf->kf_cfg.fwd_kf_enabled &&
+      if (oxcf->kf_cfg.fwd_kf_enabled && !oxcf->kf_cfg.intra_only_fwd_kf &&
           (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE ||
            gf_group->update_type[gf_group->index] == KFFLT_OVERLAY_UPDATE) &&
           gf_group->arf_index >= 0 && cpi->rc.frames_to_key == 0) {
@@ -1529,6 +1587,10 @@ int av2_encode_strategy(AV2_COMP *const cpi, size_t *const size,
                   KFFLT_OVERLAY_UPDATE)) ||
             gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE;
       }
+      // In non-monotonic multi-mlayer mode, overlays for implicit-output
+      // frames remain as show_existing_frame.  The forced_implicit path in
+      // av2_cx_iface.c converts them to zero-byte FRAME_NULL_PKT, keeping
+      // the lookahead in sync without emitting redundant OBUs.
       frame_params.frame_params_update_type_was_overlay &=
           allow_show_existing(cpi, *frame_flags);
     } else {
diff --git a/av2/encoder/encoder.c b/av2/encoder/encoder.c
index b38b0b0ec8..92ef97a1d9 100644
--- a/av2/encoder/encoder.c
+++ b/av2/encoder/encoder.c
@@ -903,6 +903,15 @@ static void set_content_interpreation_params(struct AV2_COMP *cpi,
     cpi->write_ci_obu_flag = 1;
   else
     cpi->write_ci_obu_flag = 0;
+
+  // Propagate the base CI to all mlayer slots that haven't been explicitly
+  // overridden by AV2E_SET_MLAYER_COLOR_* controls.  Use MAX_NUM_MLAYERS
+  // rather than max_mlayer_id because max_mlayer_id may not be set yet at
+  // init time (it is set later by av2_init_seq_coding_tools).
+  for (int ml = 0; ml < MAX_NUM_MLAYERS; ml++) {
+    if (!cpi->ci_per_layer_overridden[ml])
+      cpi->common.ci_params_per_layer[ml] = *ci_params;
+  }
 }
 
 static void init_config(struct AV2_COMP *cpi, AV2EncoderConfig *oxcf) {
@@ -4608,6 +4617,42 @@ static int encode_frame_to_data_rate(AV2_COMP *cpi, size_t *size, uint8_t *dest,
       cm->ref_frame_map[cpi->fb_idx_for_overlay] &&
       cm->ref_frame_map[cpi->fb_idx_for_overlay]->implicit_output_picture;
 
+  // Non-monotonic multi-mlayer: when an overlay's underlying frame was
+  // implicit output but the reference has been evicted from the DPB (e.g.,
+  // ml>0 in interleaved coding order), the overlay produces zero bytes.
+  // The source was already popped from the lookahead; we just return early.
+  // This handles both:
+  //   - INTNL_OVERLAY_UPDATE (always null, since INTNL always implicit output)
+  //   - OVERLAY_UPDATE when arf_is_implicit_output (ARF had allow_direct_use=1)
+  if (!forced_implicit && cpi->update_type_was_overlay &&
+      cpi->fb_idx_for_overlay == INVALID_IDX &&
+      !cm->seq_params.monotonic_output_order_flag &&
+      cpi->oxcf.unit_test_cfg.multi_layers_lag_test && cm->number_mlayers > 1) {
+    *size = 0;
+    if (cm->immediate_output_picture) cpi->last_show_frame_buf = cm->cur_frame;
+    if (!av2_check_keyframe_overlay(cpi->gf_group.index, &cpi->gf_group,
+                                    cpi->rc.frames_since_key))
+      ++current_frame->frame_number;
+    return AVM_CODEC_OK;
+  }
+  // Also handle OVERLAY_UPDATE with valid fb_idx but where the ARF was
+  // tracked as implicit output (arf_is_implicit_output persists across
+  // mlayers even when the DPB slot has been reused).
+  if (!forced_implicit && !cpi->update_type_was_overlay &&
+      cpi->gf_group.update_type[cpi->gf_group.index] == OVERLAY_UPDATE &&
+      cpi->gf_group.arf_is_implicit_output &&
+      !cm->seq_params.monotonic_output_order_flag &&
+      cpi->oxcf.unit_test_cfg.multi_layers_lag_test && cm->number_mlayers > 1) {
+    *size = 0;
+    cpi->update_type_was_overlay = 1;  // signal av2_cx_iface as null overlay
+    cpi->gf_group.arf_is_implicit_output = 0;  // consumed
+    if (cm->immediate_output_picture) cpi->last_show_frame_buf = cm->cur_frame;
+    if (!av2_check_keyframe_overlay(cpi->gf_group.index, &cpi->gf_group,
+                                    cpi->rc.frames_since_key))
+      ++current_frame->frame_number;
+    return AVM_CODEC_OK;
+  }
+
   if ((!cpi->oxcf.ref_frm_cfg.add_sef_for_hidden_frames || forced_implicit) &&
       cpi->update_type_was_overlay && cpi->fb_idx_for_overlay != INVALID_IDX &&
       cm->ref_frame_map[cpi->fb_idx_for_overlay]) {
@@ -4661,28 +4706,38 @@ static int encode_frame_to_data_rate(AV2_COMP *cpi, size_t *size, uint8_t *dest,
   }
 
   if (need_sef_obu_for_hidden_frame(cpi)) {
-    // If this is an olk SEF, reset the buffers except for the olk.
-    int ref_flags_to_keep = 0;
-    for (int layer = 0; layer <= seq_params->max_mlayer_id; layer++) {
-      ref_flags_to_keep |= cm->olk_refresh_frame_flags[layer];
-    }
-    if (cpi->olk_encountered && ref_flags_to_keep != 0 &&
-        cpi->fb_idx_for_overlay >= 0 &&
-        ((ref_flags_to_keep >> cpi->fb_idx_for_overlay) & 1u)) {
-      for (int ref_index = 0; ref_index < cm->seq_params.ref_frames;
-           ref_index++) {
-        if (!((ref_flags_to_keep >> ref_index) & 1u) &&
-            (cm->ref_frame_map[ref_index] == NULL ||
-             cm->ref_frame_map[ref_index]->long_term_id == -1)) {
-          if (cm->ref_frame_map[ref_index] != NULL) {
-            --cm->ref_frame_map[ref_index]->ref_count;
-            cm->ref_frame_map[ref_index] = NULL;
+    if (cpi->fwd_intra_encountered) {
+      // Monotonic hidden intra forward keyframe: output via SEF.
+      // No DPB clearing — pyramid frames are regular and still referenced.
+      cpi->gf_state.fwd_intra_overlay_last = 1;
+      cpi->fwd_intra_encountered = 0;
+      cm->fwd_intra_refresh_frame_flags[cm->mlayer_id] = -1;
+      cpi->is_olk_overlay = 0;
+    } else if (cpi->olk_encountered) {
+      // Non-monotonic OLK SEF: reset buffers except for the OLK.
+      int ref_flags_to_keep = 0;
+      for (int layer = 0; layer <= seq_params->max_mlayer_id; layer++) {
+        ref_flags_to_keep |= cm->olk_refresh_frame_flags[layer];
+      }
+      if (ref_flags_to_keep != 0 && cpi->fb_idx_for_overlay >= 0 &&
+          ((ref_flags_to_keep >> cpi->fb_idx_for_overlay) & 1u)) {
+        for (int ref_index = 0; ref_index < cm->seq_params.ref_frames;
+             ref_index++) {
+          if (!((ref_flags_to_keep >> ref_index) & 1u) &&
+              (cm->ref_frame_map[ref_index] == NULL ||
+               cm->ref_frame_map[ref_index]->long_term_id == -1)) {
+            if (cm->ref_frame_map[ref_index] != NULL) {
+              --cm->ref_frame_map[ref_index]->ref_count;
+              cm->ref_frame_map[ref_index] = NULL;
+            }
           }
         }
+        cpi->is_olk_overlay = 1;
+        cpi->gf_state.olk_overlay_last = 1;
+        cpi->olk_encountered = 0;
+      } else {
+        cpi->is_olk_overlay = 0;
       }
-      cpi->is_olk_overlay = 1;
-      cpi->gf_state.olk_overlay_last = 1;
-      cpi->olk_encountered = 0;
     } else {
       cpi->is_olk_overlay = 0;
     }
@@ -4697,7 +4752,10 @@ static int encode_frame_to_data_rate(AV2_COMP *cpi, size_t *size, uint8_t *dest,
     cpi->seq_params_locked = 1;
     cm->sef_ref_fb_idx = cpi->fb_idx_for_overlay;
 
-    if (cm->last_olk_disp_order_hint > cm->current_frame.display_order_hint) {
+    if (cm->last_olk_disp_order_hint > cm->current_frame.display_order_hint &&
+        cpi->is_olk_overlay) {
+      // Non-monotonic OLK: SEF for a frame whose display order precedes the
+      // OLK is a leading SEF.
       cm->is_leading_picture = 1;
     } else {
       cm->is_leading_picture = 0;
@@ -4784,17 +4842,28 @@ static int encode_frame_to_data_rate(AV2_COMP *cpi, size_t *size, uint8_t *dest,
     return AVM_CODEC_OK;
   }
 
-  if (current_frame->frame_type == KEY_FRAME) {
+  if (current_frame->frame_type == KEY_FRAME ||
+      av2_is_olk_forward_keyframe(cpi)) {
+    // CLK or OLK (displayed or hidden).
     cm->is_leading_picture = -1;
-    if (cpi->no_show_fwd_kf) {
+    if (cpi->is_fwd_kf) {
+      // OLK (displayed or hidden): subsequent frames before the OLK in display
+      // order are leading pictures.
       cpi->olk_encountered = 1;
       cm->last_olk_order_hint = cm->current_frame.order_hint;
       cm->last_olk_disp_order_hint = cm->current_frame.display_order_hint;
     } else {
       cpi->olk_encountered = 0;
     }
+  } else if (av2_is_fwd_intra_keyframe(cpi)) {
+    // Hidden intra forward keyframe (monotonic open GOP).
+    // No leading-picture state — all frames remain regular in monotonic mode.
+    cm->is_leading_picture = 0;
+    cpi->fwd_intra_encountered = 1;
   } else {
     if (cm->last_olk_disp_order_hint > cm->current_frame.display_order_hint) {
+      // Non-monotonic OLK: frames with display order before the OLK are
+      // leading pictures.
       cm->is_leading_picture = 1;
     } else {
       cm->is_leading_picture = 0;
@@ -5466,6 +5535,7 @@ int av2_get_compressed_data(AV2_COMP *cpi, unsigned int *frame_flags,
 
   // Initialize fields related to forward keyframes
   cpi->no_show_fwd_kf = 0;
+  cpi->is_fwd_kf = 0;
 
   check_ref_count_status_enc(cpi);
   if (assign_cur_frame_new_fb(cm) == NULL) return AVM_CODEC_ERROR;
diff --git a/av2/encoder/encoder.h b/av2/encoder/encoder.h
index 61d623e679..c9fe419ad0 100644
--- a/av2/encoder/encoder.h
+++ b/av2/encoder/encoder.h
@@ -433,6 +433,14 @@ typedef struct {
    */
   bool fwd_kf_enabled;
 
+  /*!
+   * When true and fwd_kf_enabled is true, the forward keyframe is coded as
+   * INTRA_ONLY_FRAME instead of KEY_FRAME.  This enables open GOP with
+   * monotonic output: the hidden intra frame is shown via SEF, and reference
+   * buffers are not reset (allowing inter-prediction across the boundary).
+   */
+  bool intra_only_fwd_kf;
+
   /*!
    * Indicates if S-Frames should be enabled for the sequence.
    */
@@ -1062,6 +1070,11 @@ typedef struct {
   const SubGOPCfg *subgop_cfg;
   // Number of arf updates before a displayeed frame.
   int arf_update_counter;
+  // Non-monotonic multi-mlayer: 1 if the ARF in this GF group had
+  // allow_direct_use=1 (implicit output).  Persists across mlayers so
+  // the OVERLAY_UPDATE can produce null output even when the reference
+  // has been evicted from the DPB for later mlayers.
+  int arf_is_implicit_output;
   /*!\endcond */
 } GF_GROUP;
 /*!\cond */
@@ -1071,6 +1084,8 @@ typedef struct {
   int arf_gf_boost_lst;
   // Track if the last frame in a GOP is a olk overlay
   int olk_overlay_last;
+  // Track if the last frame in a GOP is a fwd intra SEF output
+  int fwd_intra_overlay_last;
 } GF_STATE;
 
 typedef struct {
@@ -2379,15 +2394,30 @@ typedef struct AV2_COMP {
   struct lookahead_ctx *lookahead;
 
   /*!
-   * When set, this flag indicates that the current frame is a forward keyframe.
+   * When set, this flag indicates that the current frame is a hidden forward
+   * keyframe (needs kf_filt overlay or SEF to be shown).
    */
   int no_show_fwd_kf;
+  /*!
+   * When set, this flag indicates that the current ARF is at the KF boundary
+   * and should be coded as a keyframe (OLK or INTRA_ONLY_FRAME).  This is
+   * independent of no_show_fwd_kf: for open_leading the frame is a displayed
+   * OLK (is_fwd_kf=1, no_show_fwd_kf=0).
+   */
+  int is_fwd_kf;
   /*!
    * Indicates an OLK obu is encountered in any layer
    * It is initialized as 0 and set 1 when the first olk is decoded and set 0
    * when the first regular frame or the first CLK after the olk is decoded.
    */
   int olk_encountered;
+  /*!
+   * Indicates a hidden intra forward keyframe (intra_only_fwd_kf) has been
+   * coded.  Set when the hidden INTRA_ONLY_FRAME is encoded and cleared when
+   * its SEF is output.  Used to protect the hidden intra's DPB slot from
+   * being overwritten by subsequent pyramid frames.
+   */
+  int fwd_intra_encountered;
   /*!
    * If true, the update type is one of overlay updates
    */
@@ -2928,6 +2958,17 @@ typedef struct AV2_COMP {
    * write ci obu
    */
   int write_ci_obu_flag;
+  /*!
+   * Set to 1 when the current TU is a RAP (CLK or OLK at mlayer 0).
+   * Used to trigger CI OBU writing for mlayer > 0 in the same TU.
+   */
+  int ci_rap_tu;
+  /*!
+   * Per-mlayer CI override flags.  Set to 1 by AV2E_SET_MLAYER_COLOR_*
+   * controls to indicate that ci_params_per_layer[ml] was explicitly set
+   * and should not be overwritten by set_content_interpreation_params().
+   */
+  int ci_per_layer_overridden[MAX_NUM_MLAYERS];
   /*!
    * Write the Buffer Removal Timing OBU
    */
@@ -3612,10 +3653,36 @@ static INLINE void check_ref_count_status_enc(AV2_COMP *cpi) {
   }
 }
 
-// Returns true if current frame is a shown (visible) keyframe.
+// Returns true if current frame is a shown CLK (closed-loop keyframe).
+// Excludes OLKs (both displayed and hidden) and hidden intra fwd KFs.
 static INLINE bool av2_is_shown_keyframe(const AV2_COMP *cpi,
                                          FRAME_TYPE frame_type) {
-  return (frame_type == KEY_FRAME) && !cpi->no_show_fwd_kf;
+  return (frame_type == KEY_FRAME) && !cpi->is_fwd_kf;
+}
+
+// Returns true if the current frame is a hidden forward keyframe that uses the
+// OLK mechanism (KEY_FRAME with no_show_fwd_kf): non-monotonic open GOP.
+static INLINE bool av2_is_olk_forward_keyframe(const AV2_COMP *cpi) {
+  return cpi->no_show_fwd_kf &&
+         cpi->common.current_frame.frame_type == KEY_FRAME;
+}
+
+// Returns true if the current frame is a hidden intra forward keyframe
+// (intra_only_fwd_kf): monotonic open GOP.  The INTRA_ONLY_FRAME provides a
+// random access point but does not use OLK machinery.
+static INLINE bool av2_is_fwd_intra_keyframe(const AV2_COMP *cpi) {
+  return cpi->no_show_fwd_kf && cpi->oxcf.kf_cfg.intra_only_fwd_kf &&
+         cpi->common.current_frame.frame_type == INTRA_ONLY_FRAME;
+}
+
+// Returns true if the forward keyframe at a GOP boundary should be hidden
+// (no_show).  The fwd KF is hidden only when there is a mechanism to display
+// it later: either keyframe filtering (kf_filt >= 2, which uses an overlay)
+// or SEF-based hidden frame output.  For open_leading (non-monotonic OLK),
+// the OLK is an implicit output frame and must NOT be hidden.
+static INLINE bool av2_fwd_kf_should_be_hidden(const AV2_COMP *cpi) {
+  return cpi->oxcf.kf_cfg.enable_keyframe_filtering > 1 ||
+         cpi->oxcf.ref_frm_cfg.add_sef_for_hidden_frames;
 }
 
 /*!\endcond */
diff --git a/av2/encoder/gop_structure.c b/av2/encoder/gop_structure.c
index cb983bd05c..ea23d51ea2 100644
--- a/av2/encoder/gop_structure.c
+++ b/av2/encoder/gop_structure.c
@@ -322,15 +322,23 @@ static int construct_multi_layer_gf_structure(
     }
 
     if (has_hidden_fwd_kf) {
-      gf_group->update_type[frame_index] =
-          use_fwd_kf_overlay ? FWD_KF_OVERLAY_UPDATE : FWD_KF_SUCCESSOR_UPDATE;
-      gf_group->arf_src_offset[frame_index] =
-          cpi->common.number_mlayers *
-          ((use_fwd_kf_overlay ? 0 : 1) + gf_interval - cur_frame_index - 1);
-      gf_group->cur_frame_idx[frame_index] = cur_frame_index;
-      gf_group->layer_depth[frame_index] = MAX_ARF_LAYERS;
-      gf_group->arf_boost[frame_index] = NORMAL_BOOST;
-      ++frame_index;
+      // For intra_only_fwd_kf (monotonic open GOP), the hidden intra frame
+      // is output via SEF like any other hidden frame — no FWD_KF_SUCCESSOR
+      // is needed (it would violate monotonic output ordering).  Only
+      // non-monotonic OLK or keyframe-filtering modes need the
+      // successor/overlay.
+      if (!cpi->oxcf.kf_cfg.intra_only_fwd_kf) {
+        gf_group->update_type[frame_index] = use_fwd_kf_overlay
+                                                 ? FWD_KF_OVERLAY_UPDATE
+                                                 : FWD_KF_SUCCESSOR_UPDATE;
+        gf_group->arf_src_offset[frame_index] =
+            cpi->common.number_mlayers *
+            ((use_fwd_kf_overlay ? 0 : 1) + gf_interval - cur_frame_index - 1);
+        gf_group->cur_frame_idx[frame_index] = cur_frame_index;
+        gf_group->layer_depth[frame_index] = MAX_ARF_LAYERS;
+        gf_group->arf_boost[frame_index] = NORMAL_BOOST;
+        ++frame_index;
+      }
     }
 
     set_multi_layer_params(twopass, gf_group, rc, frame_info, cur_frame_index,
@@ -347,9 +355,11 @@ static int construct_multi_layer_gf_structure(
       gf_group->arf_boost[frame_index] = NORMAL_BOOST;
       ++frame_index;
       ++cur_frame_index;
-      if (!use_fwd_kf_overlay) {
+      if (!use_fwd_kf_overlay && !cpi->oxcf.kf_cfg.intra_only_fwd_kf) {
         // Add one more for the regular frame after the fwd kf sef (implicit
-        // output).
+        // output).  Not needed for intra_only_fwd_kf: the hidden intra is
+        // output via SEF like any other hidden frame, and the next frame
+        // belongs to the next GF group.
         gf_group->update_type[frame_index] = OVERLAY_UPDATE;
         gf_group->arf_src_offset[frame_index] = 0;
         gf_group->cur_frame_idx[frame_index] = cur_frame_index;
diff --git a/av2/encoder/pass2_strategy.c b/av2/encoder/pass2_strategy.c
index 8418bffc07..e0b97df90e 100644
--- a/av2/encoder/pass2_strategy.c
+++ b/av2/encoder/pass2_strategy.c
@@ -2798,6 +2798,16 @@ static void setup_target_rate(AV2_COMP *cpi) {
   rc->base_frame_target = target_rate;
 }
 
+// Determine if the current ARF is at a keyframe boundary (forward keyframe).
+static int is_at_fwd_kf_boundary(const AV2_COMP *cpi, int src_index) {
+  const GF_GROUP *gf_group = &cpi->gf_group;
+  return src_index == cpi->rc.frames_to_key * (int)cpi->common.number_mlayers &&
+         src_index != 0 && cpi->oxcf.kf_cfg.fwd_kf_enabled &&
+         gf_group->size > 1 &&
+         gf_group->update_type[gf_group->index] != FWD_KF_OVERLAY_UPDATE &&
+         gf_group->update_type[gf_group->index] != FWD_KF_SUCCESSOR_UPDATE;
+}
+
 void av2_get_second_pass_params(AV2_COMP *cpi,
                                 EncodeFrameParams *const frame_params) {
   RATE_CONTROL *const rc = &cpi->rc;
@@ -2813,21 +2823,26 @@ void av2_get_second_pass_params(AV2_COMP *cpi,
 
     setup_target_rate(cpi);
     int src_index = gf_group->arf_src_offset[gf_group->index];
-    if (src_index == cpi->rc.frames_to_key * (int)cpi->common.number_mlayers &&
-        src_index != 0 && cpi->oxcf.kf_cfg.fwd_kf_enabled &&
-        gf_group->size > 1 &&
-        gf_group->update_type[gf_group->index] != FWD_KF_OVERLAY_UPDATE &&
-        gf_group->update_type[gf_group->index] != FWD_KF_SUCCESSOR_UPDATE) {
-      cpi->no_show_fwd_kf = 1;
+    // Determine if this ARF is at the KF boundary (forward keyframe).
+    if (is_at_fwd_kf_boundary(cpi, src_index)) {
+      cpi->is_fwd_kf = 1;
+      // Hidden only when there is a mechanism to show it later
+      // (kf_filt overlay or SEF).  For open_leading, the OLK is displayed
+      // directly as an implicit output frame.
+      cpi->no_show_fwd_kf = av2_fwd_kf_should_be_hidden(cpi) ? 1 : 0;
     }
     // If this is an arf frame then we dont want to read the stats file or
     // advance the input pointer as we already have what we need.
     if (update_type == ARF_UPDATE || update_type == INTNL_ARF_UPDATE ||
         update_type == KFFLT_UPDATE) {
-      if (cpi->no_show_fwd_kf) {
+      if (cpi->is_fwd_kf) {
         assert(update_type == ARF_UPDATE || update_type == KFFLT_UPDATE);
-        frame_params->frame_type = KEY_FRAME;
-        frame_params->frame_params_obu_type = OBU_OPEN_LOOP_KEY;
+        if (cpi->oxcf.kf_cfg.intra_only_fwd_kf) {
+          frame_params->frame_type = INTRA_ONLY_FRAME;
+        } else {
+          frame_params->frame_type = KEY_FRAME;
+          frame_params->frame_params_obu_type = OBU_OPEN_LOOP_KEY;
+        }
       } else {
         frame_params->frame_type = INTER_FRAME;
         update_subgop_stats(&cpi->gf_group, &cpi->subgop_stats,
@@ -2848,6 +2863,11 @@ void av2_get_second_pass_params(AV2_COMP *cpi,
       int sframe_dist = oxcf->kf_cfg.sframe_dist;
       int sframe_mode = oxcf->kf_cfg.sframe_mode;
       int enable_sframe = oxcf->kf_cfg.enable_sframe;
+      // S-frames are only allowed in independent mlayers.
+      if (sframe_dist && !is_mlayer_independent(&cpi->common.seq_params,
+                                                cpi->common.mlayer_id)) {
+        sframe_dist = 0;
+      }
       CurrentFrame *const current_frame = &cpi->common.current_frame;
       // ARF_UPDATE and KFFLT_UPDATE is set as S_FRAME in the RA case
       if (sframe_dist && enable_sframe) {
@@ -2911,13 +2931,15 @@ void av2_get_second_pass_params(AV2_COMP *cpi,
       oxcf->kf_cfg.key_freq_min *= cpi->common.number_mlayers;
       oxcf->kf_cfg.key_freq_max *= cpi->common.number_mlayers;
     }
-    if (cpi->gf_state.olk_overlay_last) {
+    if (cpi->gf_state.olk_overlay_last ||
+        cpi->gf_state.fwd_intra_overlay_last) {
       const int kf_offset = -rc->frames_to_key;
-      // The olk key frame has been encoded. Next is the arf.
+      // The forward keyframe (OLK or hidden intra) has been encoded and output.
+      // Next is the arf — no CLK needed at this key period boundary.
       frame_params->frame_type = INTER_FRAME;
       frame_params->frame_params_obu_type = NUM_OBU_TYPES;
-      // Temporarily change decrease key frame interval since we've already seen
-      // the key frame in the OLK.
+      // Temporarily decrease key frame interval since we've already seen
+      // the key frame in the forward keyframe.
       oxcf->kf_cfg.key_freq_min = AVMMAX(
           0, oxcf->kf_cfg.key_freq_min - (int)cpi->common.number_mlayers);
       oxcf->kf_cfg.key_freq_max =
@@ -2925,6 +2947,7 @@ void av2_get_second_pass_params(AV2_COMP *cpi,
                  oxcf->kf_cfg.key_freq_max - (int)cpi->common.number_mlayers);
       find_next_key_frame(cpi, &this_frame);
       rc->frames_since_key += kf_offset * (int)cpi->common.number_mlayers;
+      cpi->gf_state.fwd_intra_overlay_last = 0;
     } else {
       assert(rc->frames_to_key >= -1);
       frame_params->frame_type = KEY_FRAME;
@@ -3026,13 +3049,11 @@ void av2_get_second_pass_params(AV2_COMP *cpi,
         gf_group->update_type[gf_group->size - 1] == OVERLAY_UPDATE;
 
     cpi->no_show_fwd_kf = 0;
+    cpi->is_fwd_kf = 0;
     int src_index = gf_group->arf_src_offset[gf_group->index];
-    if (src_index == cpi->rc.frames_to_key * (int)cpi->common.number_mlayers &&
-        src_index != 0 && cpi->oxcf.kf_cfg.fwd_kf_enabled &&
-        gf_group->size > 1 &&
-        gf_group->update_type[gf_group->index] != FWD_KF_OVERLAY_UPDATE &&
-        gf_group->update_type[gf_group->index] != FWD_KF_SUCCESSOR_UPDATE) {
-      cpi->no_show_fwd_kf = 1;
+    if (is_at_fwd_kf_boundary(cpi, src_index)) {
+      cpi->is_fwd_kf = 1;
+      cpi->no_show_fwd_kf = av2_fwd_kf_should_be_hidden(cpi) ? 1 : 0;
     }
     const int update_type = gf_group->update_type[gf_group->index];
 
@@ -3040,9 +3061,13 @@ void av2_get_second_pass_params(AV2_COMP *cpi,
         !(update_type == ARF_UPDATE || update_type == INTNL_ARF_UPDATE);
 
     if (update_type == ARF_UPDATE) {
-      if (cpi->no_show_fwd_kf) {
-        frame_params->frame_type = KEY_FRAME;
-        frame_params->frame_params_obu_type = OBU_OPEN_LOOP_KEY;
+      if (cpi->is_fwd_kf) {
+        if (cpi->oxcf.kf_cfg.intra_only_fwd_kf) {
+          frame_params->frame_type = INTRA_ONLY_FRAME;
+        } else {
+          frame_params->frame_type = KEY_FRAME;
+          frame_params->frame_params_obu_type = OBU_OPEN_LOOP_KEY;
+        }
       } else {
         frame_params->frame_type = rc->frames_since_key == 0 ? KEY_FRAME
                                    : (frame_params->frame_type == S_FRAME)
diff --git a/av2/encoder/ratectrl.c b/av2/encoder/ratectrl.c
index a3581edfb6..62e9aa3b60 100644
--- a/av2/encoder/ratectrl.c
+++ b/av2/encoder/ratectrl.c
@@ -1598,11 +1598,9 @@ static int rc_pick_q_and_bounds(const AV2_COMP *cpi, int width, int height,
       gf_group->update_type[gf_index] == INTNL_ARF_UPDATE;
 
   if (frame_is_intra_only(cm)) {
-    const int is_fwd_kf = cm->current_frame.frame_type == KEY_FRAME &&
-                          cm->immediate_output_picture == 0 &&
-                          cpi->no_show_fwd_kf;
+    const int is_fwd_kf_local = cpi->is_fwd_kf;
     get_intra_q_and_bounds(cpi, width, height, &active_best_quality,
-                           &active_worst_quality, qp, is_fwd_kf);
+                           &active_worst_quality, qp, is_fwd_kf_local);
 #ifdef STRICT_RC
     active_best_quality = 0;
 #endif
diff --git a/av2/exports_enc b/av2/exports_enc
index aa0f232a64..365035b002 100644
--- a/av2/exports_enc
+++ b/av2/exports_enc
@@ -1,2 +1,6 @@
 data avm_codec_av2_cx_algo
 text avm_codec_av2_cx
+data av2_level_defs
+text avm_wb_write_rice_golomb
+text avm_wb_is_byte_aligned
+text avm_wb_write_uleb
diff --git a/avm/avm_image.h b/avm/avm_image.h
index 0836005fd8..4b770c4ae0 100644
--- a/avm/avm_image.h
+++ b/avm/avm_image.h
@@ -390,6 +390,8 @@ typedef struct avm_image {
   int mlayer_id; /**< mlayer id of image */
   int xlayer_id; /**< xlayer id of image */
   int stream_id; /**< stream index [0..num_streams-1], -1 for global */
+  unsigned int
+      display_order_hint; /**< display order hint for output reordering */
 
   /*!\brief The following member may be set by the application to associate
    * data with this image.
diff --git a/avm/avmcx.h b/avm/avmcx.h
index 5ec1bca1d7..4f7059cce8 100644
--- a/avm/avmcx.h
+++ b/avm/avmcx.h
@@ -1249,6 +1249,56 @@ enum avme_enc_control_id {
    * the restricted_prediction_switch output ordering path.
    */
   AV2E_SET_FORCE_DEFERRED_FRAMES_FOR_RAS_TEST = 185,
+  /*!\brief Codec control function to set obu_xlayer_id for multi-xlayer
+   * encoding, int parameter (0-30).
+   */
+  AVME_SET_XLAYER_ID = 186,
+
+  /*!\brief Codec control function to set mlayer_dependency_present_flag,
+   * unsigned int parameter (0 or 1).
+   */
+  AV2E_SET_MLAYER_DEPENDENCY_PRESENT = 187,
+
+  /*!\brief Codec control function to set mlayer dependency map for a given
+   * mlayer. Takes two parameters: mlayer index (unsigned int) and dependency
+   * bitmask (unsigned int). Bit j set means mlayer depends on mlayer j.
+   */
+  AV2E_SET_MLAYER_DEPENDENCY_MAP = 188,
+
+  /*!\brief Codec control function to enable intra-only forward keyframes.
+   *
+   * When enabled (and fwd_kf_enabled is true), forward keyframes are coded as
+   * INTRA_ONLY_FRAME instead of KEY_FRAME.  This enables open GOP with
+   * monotonic output: the hidden intra frame is shown via SEF, and reference
+   * buffers are not reset.
+   * int parameter.
+   */
+  AV2E_SET_INTRA_ONLY_FWD_KF = 188,
+
+  /*!\brief Codec control function to set per-mlayer color_primaries.
+   * Takes two parameters: mlayer index (unsigned int) and color_primaries
+   * value (unsigned int). Populates ci_params_per_layer[mlayer_idx].
+   */
+  AV2E_SET_MLAYER_COLOR_PRIMARIES = 189,
+
+  /*!\brief Codec control function to set per-mlayer transfer_characteristics.
+   * Takes two parameters: mlayer index (unsigned int) and
+   * transfer_characteristics value (unsigned int).
+   */
+  AV2E_SET_MLAYER_TRANSFER_CHARACTERISTICS = 190,
+
+  /*!\brief Codec control function to set per-mlayer matrix_coefficients.
+   * Takes two parameters: mlayer index (unsigned int) and
+   * matrix_coefficients value (unsigned int).
+   */
+  AV2E_SET_MLAYER_MATRIX_COEFFICIENTS = 191,
+
+  /*!\brief Codec control function to set per-mlayer color_range.
+   * Takes two parameters: mlayer index (unsigned int) and full_range_flag
+   * value (unsigned int, 0=limited, 1=full).
+   */
+  AV2E_SET_MLAYER_COLOR_RANGE = 192,
+
 };
 
 /*!\brief avm 1-D scaling mode
@@ -1776,6 +1826,13 @@ AVM_CTRL_USE_TYPE(AV2E_SET_MONOTONIC_OUTPUT_ORDER, int)
 
 AVM_CTRL_USE_TYPE(AV2E_SET_FORCE_DEFERRED_FRAMES_FOR_RAS_TEST, int)
 #define AVME_CTRL_AV2E_SET_FORCE_DEFERRED_FRAMES_FOR_RAS_TEST
+
+AVM_CTRL_USE_TYPE(AVME_SET_XLAYER_ID, int)
+#define AVM_CTRL_AVME_SET_XLAYER_ID
+
+AVM_CTRL_USE_TYPE(AV2E_SET_INTRA_ONLY_FWD_KF, int)
+#define AVME_CTRL_AV2E_SET_INTRA_ONLY_FWD_KF
+
 /*!\endcond */
 /*! @} - end defgroup avm_encoder */
 #ifdef __cplusplus
diff --git a/avm/avmdx.h b/avm/avmdx.h
index e83a34305f..6856c7974a 100644
--- a/avm/avmdx.h
+++ b/avm/avmdx.h
@@ -162,6 +162,51 @@ typedef struct avm_screen_content_tools_info {
   int force_integer_mv;
 } avm_screen_content_tools_info;
 
+/*!\brief Per-xlayer layer info exposed by AV2D_GET_LCR_INFO.
+ *
+ * Mirrors the key fields from the internal LCR structures for each
+ * extended layer. Populated from Global or Local LCR OBU data.
+ */
+typedef struct avm_xlayer_layer_info {
+  int xlayer_id;      /**< Extended layer ID (0-30) */
+  int layer_type;     /**< 0=texture, 1=auxiliary, 2=stereo, 3=dependent */
+  int auxiliary_type; /**< 0=alpha, 1=depth, 2=segmentation, 3=gain_map
+                           (-1 if N/A) */
+  int view_type;      /**< 0=unspecified, 1=center, 2=left, 3=right,
+                            4=explicit */
+  int max_width;      /**< lcr_max_pic_width from RepresentationInfo */
+  int max_height;     /**< lcr_max_pic_height from RepresentationInfo */
+  int num_mlayers;    /**< Number of embedded (media) layers */
+} avm_xlayer_layer_info_t;
+
+/*!\brief LCR info exposed by AV2D_GET_LCR_INFO.
+ *
+ * Contains layer configuration for all xlayers in the stream,
+ * assembled from Global and/or Local LCR OBUs.
+ */
+typedef struct avm_lcr_info {
+  int num_xlayers;                     /**< Number of xlayers */
+  avm_xlayer_layer_info_t xlayers[31]; /**< Per-xlayer info */
+} avm_lcr_info_t;
+
+/*!\brief Atlas info exposed by AV2D_GET_ATLAS_INFO.
+ *
+ * Contains atlas canvas dimensions and per-segment placement,
+ * populated from Atlas OBUs.
+ */
+typedef struct avm_atlas_info {
+  int atlas_width;  /**< Canvas width */
+  int atlas_height; /**< Canvas height */
+  int num_segments; /**< Number of atlas segments */
+  struct {
+    int xlayer_id; /**< Which xlayer this segment belongs to */
+    int pos_x;     /**< Top-left X position in canvas */
+    int pos_y;     /**< Top-left Y position in canvas */
+    int width;     /**< Segment width */
+    int height;    /**< Segment height */
+  } segments[256]; /**< Per-segment info (MAX_NUM_ATLAS_SEGMENTS) */
+} avm_atlas_info_t;
+
 /*!\brief Structure to hold the external reference frame pointer.
  *
  * Define a structure to hold the external reference frame pointer.
@@ -342,6 +387,33 @@ enum avm_dec_control_id {
 
   AV2D_SET_BRU_OPT_MODE,
 
+  /*!\brief Codec control function to get LCR (Layer Configuration Record)
+   * info, avm_lcr_info_t* parameter
+   *
+   * Returns layer type, auxiliary type, view type, dimensions, and mlayer
+   * count for each extended layer. Populated from Global/Local LCR OBUs.
+   * Returns AVM_CODEC_ERROR if no LCR has been parsed yet.
+   */
+  AV2D_GET_LCR_INFO,
+
+  /*!\brief Codec control function to get Atlas segment info,
+   * avm_atlas_info_t* parameter
+   *
+   * Returns atlas canvas dimensions and per-segment placement (position,
+   * size, xlayer_id). Populated from Atlas OBUs.
+   * Returns AVM_CODEC_ERROR if no Atlas OBU has been parsed yet.
+   */
+  AV2D_GET_ATLAS_INFO,
+
+  /*!\brief Codec control function to get monotonic_output_order_flag,
+   * unsigned int* parameter
+   *
+   * Returns 1 if monotonic_output_order_flag is set in the sequence header,
+   * 0 otherwise. Returns AVM_CODEC_ERROR if no sequence header has been
+   * parsed yet.
+   */
+  AV2D_GET_MONOTONIC_OUTPUT_ORDER,
+
   AVM_DECODER_CTRL_ID_MAX,
 
   /*!\brief Codec control function to check the presence of forward key frames
@@ -512,6 +584,16 @@ AVM_CTRL_USE_TYPE(AV2D_SET_OUTPUT_ALL_LAYERS, int)
 
 AVM_CTRL_USE_TYPE(AV2_SET_INSPECTION_CALLBACK, avm_inspect_init *)
 #define AVM_CTRL_AV2_SET_INSPECTION_CALLBACK
+
+AVM_CTRL_USE_TYPE(AV2D_GET_LCR_INFO, avm_lcr_info_t *)
+#define AVM_CTRL_AV2D_GET_LCR_INFO
+
+AVM_CTRL_USE_TYPE(AV2D_GET_ATLAS_INFO, avm_atlas_info_t *)
+#define AVM_CTRL_AV2D_GET_ATLAS_INFO
+
+AVM_CTRL_USE_TYPE(AV2D_GET_MONOTONIC_OUTPUT_ORDER, unsigned int *)
+#define AVM_CTRL_AV2D_GET_MONOTONIC_OUTPUT_ORDER
+
 /*!\endcond */
 /*! @} - end defgroup avm_decoder */
 #ifdef __cplusplus
diff --git a/cfg/xlayer/annexG2_360degree_9xlayer.json b/cfg/xlayer/annexG2_360degree_9xlayer.json
new file mode 100644
index 0000000000..f1522f0b2a
--- /dev/null
+++ b/cfg/xlayer/annexG2_360degree_9xlayer.json
@@ -0,0 +1,168 @@
+{
+  "comment": "Annex G.2: 360-degree viewport-dependent streaming with subpictures",
+  "comment2": "9 extended layers in a 3x3 grid covering 3840x1920 equirectangular projection",
+  "comment3": "Each xlayer has 3 embedded layers: texture, alpha, depth",
+  "comment4": "Uses LCR (not MSDO, which is limited to 4 streams)",
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "subpic_topleft.y4m",
+      "width": 1280,
+      "height": 640,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 3,
+      "qp": 200,
+      "cpu_used": 5,
+      "comment": "Top-left subpicture, low quality (corner)"
+    },
+    {
+      "xlayer_id": 1,
+      "input": "subpic_topcenter.y4m",
+      "width": 1280,
+      "height": 640,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 3,
+      "qp": 160,
+      "cpu_used": 5,
+      "comment": "Top-center subpicture, medium quality (adjacent)"
+    },
+    {
+      "xlayer_id": 2,
+      "input": "subpic_topright.y4m",
+      "width": 1280,
+      "height": 640,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 3,
+      "qp": 200,
+      "cpu_used": 5,
+      "comment": "Top-right subpicture, low quality (corner)"
+    },
+    {
+      "xlayer_id": 3,
+      "input": "subpic_midleft.y4m",
+      "width": 1280,
+      "height": 640,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 3,
+      "qp": 160,
+      "cpu_used": 5,
+      "comment": "Middle-left subpicture, medium quality (adjacent)"
+    },
+    {
+      "xlayer_id": 4,
+      "input": "subpic_center.y4m",
+      "width": 1280,
+      "height": 640,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 3,
+      "qp": 128,
+      "cpu_used": 5,
+      "comment": "CENTER VIEWPORT subpicture, HIGH quality"
+    },
+    {
+      "xlayer_id": 5,
+      "input": "subpic_midright.y4m",
+      "width": 1280,
+      "height": 640,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 3,
+      "qp": 160,
+      "cpu_used": 5,
+      "comment": "Middle-right subpicture, medium quality (adjacent)"
+    },
+    {
+      "xlayer_id": 6,
+      "input": "subpic_botleft.y4m",
+      "width": 1280,
+      "height": 640,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 3,
+      "qp": 200,
+      "cpu_used": 5,
+      "comment": "Bottom-left subpicture, low quality (corner)"
+    },
+    {
+      "xlayer_id": 7,
+      "input": "subpic_botcenter.y4m",
+      "width": 1280,
+      "height": 640,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 3,
+      "qp": 160,
+      "cpu_used": 5,
+      "comment": "Bottom-center subpicture, medium quality (adjacent)"
+    },
+    {
+      "xlayer_id": 8,
+      "input": "subpic_botright.y4m",
+      "width": 1280,
+      "height": 640,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 3,
+      "qp": 200,
+      "cpu_used": 5,
+      "comment": "Bottom-right subpicture, low quality (corner/back-facing)"
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "dependent_xlayers": false,
+    "doh_constraint": true
+  },
+  "msdo": {
+    "enable": false,
+    "comment": "MSDO limited to 4 streams; 9 xlayers requires LCR only"
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        {
+          "intent": 0,
+          "xlayer_map": [4],
+          "comment": "OP0: Center viewport subpicture only (low bandwidth)"
+        },
+        {
+          "intent": 1,
+          "xlayer_map": [1, 3, 4, 5, 7],
+          "comment": "OP1: Center + adjacent subpictures (medium bandwidth)"
+        },
+        {
+          "intent": 2,
+          "xlayer_map": [0, 1, 2, 3, 4, 5, 6, 7, 8],
+          "comment": "OP2: All 9 subpictures - complete sphere coverage (high bandwidth)"
+        }
+      ]
+    }
+  ],
+  "atlas": {
+    "enable": true,
+    "mode": 0,
+    "comment": "Enhanced Atlas: 3x3 uniform grid composing 3840x1920 equirectangular"
+  },
+  "combined_tu": true,
+  "monotonic_output_order": true,
+  "output": "360degree_9subpic_muxed.obu"
+}
diff --git a/cfg/xlayer/annexG3_videoconf_3xlayer.json b/cfg/xlayer/annexG3_videoconf_3xlayer.json
new file mode 100644
index 0000000000..de325233f8
--- /dev/null
+++ b/cfg/xlayer/annexG3_videoconf_3xlayer.json
@@ -0,0 +1,90 @@
+{
+  "comment": "Annex G.3: Subpicture composition for video conferencing",
+  "comment2": "3 extended layers: main speaker (high-res) + 2 participants (medium-res)",
+  "comment3": "Atlas composes into a 1920x1080 virtual canvas with non-uniform grid",
+  "comment4": "Main speaker at left (1280x1080), participants stacked at right (640x540 each)",
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "main_speaker.y4m",
+      "width": 1280,
+      "height": 1080,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 5,
+      "comment": "Main speaker, high resolution, high bitrate"
+    },
+    {
+      "xlayer_id": 1,
+      "input": "participant2.y4m",
+      "width": 480,
+      "height": 360,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 160,
+      "cpu_used": 5,
+      "comment": "Participant 2, encoded at 480x360, upsampled to 640x540 in atlas"
+    },
+    {
+      "xlayer_id": 2,
+      "input": "participant3.y4m",
+      "width": 640,
+      "height": 540,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 160,
+      "cpu_used": 5,
+      "comment": "Participant 3, medium resolution"
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 6,
+    "dependent_xlayers": false,
+    "doh_constraint": true,
+    "comment": "purpose_id=6: Multiview Playback"
+  },
+  "msdo": {
+    "enable": false
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        {
+          "intent": 0,
+          "xlayer_map": [0],
+          "comment": "OP0: Main speaker only (mobile/low bandwidth)"
+        },
+        {
+          "intent": 1,
+          "xlayer_map": [0, 1],
+          "comment": "OP1: Main speaker + participant 2"
+        },
+        {
+          "intent": 2,
+          "xlayer_map": [0, 1, 2],
+          "comment": "OP2: All participants - full conferencing view"
+        }
+      ]
+    }
+  ],
+  "atlas": {
+    "enable": true,
+    "mode": 0,
+    "comment": "Enhanced Atlas: 2x2 non-uniform grid, main speaker spans left column"
+  },
+  "combined_tu": true,
+  "monotonic_output_order": true,
+  "output": "videoconf_3participant_muxed.obu"
+}
diff --git a/cfg/xlayer/annexG4_roi_scalable_2xlayer.json b/cfg/xlayer/annexG4_roi_scalable_2xlayer.json
new file mode 100644
index 0000000000..72f8277c16
--- /dev/null
+++ b/cfg/xlayer/annexG4_roi_scalable_2xlayer.json
@@ -0,0 +1,76 @@
+{
+  "comment": "Annex G.4: Region-of-interest scalability for sports broadcast",
+  "comment2": "2 extended layers: base stadium view + high-quality field-of-play enhancement",
+  "comment3": "Base layer coded at 1920x1088 (padded for SB alignment), cropped to 1920x1080",
+  "comment4": "Enhancement layer is 1280x720 field-of-play overlaid at center position (320,180)",
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "stadium_full.y4m",
+      "width": 1920,
+      "height": 1080,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 160,
+      "cpu_used": 5,
+      "comment": "Full stadium view, base quality. Coded as 1920x1088 with 8px bottom padding, cropped to 1920x1080"
+    },
+    {
+      "xlayer_id": 1,
+      "input": "field_of_play.y4m",
+      "width": 1280,
+      "height": 720,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 5,
+      "comment": "Field-of-play region, high quality enhancement. Overlays base at center (320,180)"
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "dependent_xlayers": false,
+    "doh_constraint": true
+  },
+  "msdo": {
+    "enable": false
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        {
+          "intent": 0,
+          "xlayer_map": [0],
+          "comment": "OP0: Base layer only - full stadium at base quality (with normative cropping)"
+        },
+        {
+          "intent": 1,
+          "xlayer_map": [1],
+          "comment": "OP1: Enhancement only - field-of-play at high quality (no stadium context)"
+        },
+        {
+          "intent": 2,
+          "xlayer_map": [0, 1],
+          "comment": "OP2: Full quality - stadium with high-quality field-of-play overlay"
+        }
+      ]
+    }
+  ],
+  "atlas": {
+    "enable": true,
+    "mode": 0,
+    "comment": "Enhanced Atlas: 3x3 non-uniform grid. Segment 0 spans all 9 regions (full frame). Segment 1 is center cell only (1280x720 at position 320,180). Enhancement overlays base via lcr_priority_order."
+  },
+  "combined_tu": true,
+  "monotonic_output_order": true,
+  "output": "sports_roi_scalable_muxed.obu"
+}
diff --git a/cfg/xlayer/stereo_2layer.json b/cfg/xlayer/stereo_2layer.json
new file mode 100644
index 0000000000..9505683c40
--- /dev/null
+++ b/cfg/xlayer/stereo_2layer.json
@@ -0,0 +1,70 @@
+{
+  "comment": "Stereo simulcast: left and right views as separate xlayers. Each xlayer is encoded independently without inter-layer prediction. For stereo with inter-layer prediction, use stereo_embedded_2ml.json instead.",
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "left_1080p.y4m",
+      "width": 1920,
+      "height": 1080,
+      "profile": 3,
+      "level": 16,
+      "tier": 0,
+      "layer_type": "stereo",
+      "view_type": "left",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 5
+    },
+    {
+      "xlayer_id": 1,
+      "input": "right_1080p.y4m",
+      "width": 1920,
+      "height": 1080,
+      "profile": 3,
+      "level": 16,
+      "tier": 0,
+      "layer_type": "stereo",
+      "view_type": "right",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 5
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "dependent_xlayers": false,
+    "doh_constraint": true
+  },
+  "msdo": {
+    "enable": false
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        {
+          "intent": 0,
+          "xlayer_map": [0],
+          "comment": "OP0: left view only (mono)"
+        },
+        {
+          "intent": 1,
+          "xlayer_map": [0, 1],
+          "comment": "OP1: stereo (both views)"
+        }
+      ]
+    }
+  ],
+  "atlas": {
+    "enable": false
+  },
+  "combined_tu": true,
+  "monotonic_output_order": true,
+  "output": "stereo_muxed.obu"
+}
diff --git a/cfg/xlayer/stereo_embedded_2ml.json b/cfg/xlayer/stereo_embedded_2ml.json
new file mode 100644
index 0000000000..8ae3d9a36d
--- /dev/null
+++ b/cfg/xlayer/stereo_embedded_2ml.json
@@ -0,0 +1,33 @@
+{
+  "comment": "Stereo via embedded layers: left and right views as 2 mlayers within a single xlayer. mlayer 1 (right) uses inter-layer prediction from mlayer 0 (left) via depends_on. For simulcast stereo without inter-layer prediction, use stereo_2layer.json instead.",
+  "inputs": [
+    { "name": "left", "filename": "left.yuv", "width": 1920, "height": 1080 },
+    { "name": "right", "filename": "right.yuv", "width": 1920, "height": 1080 }
+  ],
+  "xlayers": [
+    { "xlayer_id": 0, "input_source": "left", "width": 1920, "height": 1080,
+      "atlas_pos_x": 0, "atlas_pos_y": 0,
+      "layer_type": "stereo", "view_type": "left",
+      "color_primaries": 1, "transfer_characteristics": 1,
+      "matrix_coefficients": 1, "full_range_flag": 0,
+      "num_embedded_layers": 2,
+      "embedded_layers": [
+        { "scaling_mode": "1:1", "input_source": "left",
+          "atlas_pos_x": 0, "atlas_pos_y": 0, "width": 1920, "height": 1080,
+          "depends_on": [] },
+        { "scaling_mode": "1:1", "input_source": "right",
+          "atlas_pos_x": 0, "atlas_pos_y": 0, "width": 1920, "height": 1080,
+          "depends_on": [0] }
+      ],
+      "qp": 128, "cpu_used": 5 }
+  ],
+  "ops": [{
+    "ops_id": 0, "priority": 0, "intent_present": true, "ptl_present": true,
+    "mlayer_info_idc": 2,
+    "operating_points": [
+      { "intent": 0, "xlayer_map": [0], "mlayer_count": [1] },
+      { "intent": 1, "xlayer_map": [0], "mlayer_count": [2] }
+    ]
+  }],
+  "output": "stereo_embedded.obu"
+}
diff --git a/cfg/xlayer/subpicture_3region.json b/cfg/xlayer/subpicture_3region.json
new file mode 100644
index 0000000000..1195f4bd82
--- /dev/null
+++ b/cfg/xlayer/subpicture_3region.json
@@ -0,0 +1,101 @@
+{
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "region_a.y4m",
+      "width": 960,
+      "height": 540,
+      "profile": 3,
+      "level": 12,
+      "tier": 0,
+      "layer_type": "texture",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 5,
+      "atlas_pos_x": 0,
+      "atlas_pos_y": 0
+    },
+    {
+      "xlayer_id": 1,
+      "input": "region_b.y4m",
+      "width": 960,
+      "height": 540,
+      "profile": 3,
+      "level": 12,
+      "tier": 0,
+      "layer_type": "texture",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 5,
+      "atlas_pos_x": 960,
+      "atlas_pos_y": 0
+    },
+    {
+      "xlayer_id": 2,
+      "input": "region_c.y4m",
+      "width": 960,
+      "height": 540,
+      "profile": 3,
+      "level": 12,
+      "tier": 0,
+      "layer_type": "texture",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 5,
+      "atlas_pos_x": 0,
+      "atlas_pos_y": 540
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "dependent_xlayers": false,
+    "doh_constraint": true
+  },
+  "msdo": {
+    "enable": false
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        {
+          "intent": 0,
+          "xlayer_map": [0],
+          "comment": "OP0: region A only"
+        },
+        {
+          "intent": 0,
+          "xlayer_map": [1],
+          "comment": "OP1: region B only"
+        },
+        {
+          "intent": 0,
+          "xlayer_map": [2],
+          "comment": "OP2: region C only"
+        },
+        {
+          "intent": 1,
+          "xlayer_map": [0, 1, 2],
+          "comment": "OP3: full picture (all regions)"
+        }
+      ]
+    }
+  ],
+  "atlas": {
+    "enable": true,
+    "mode": 0,
+    "width": 1920,
+    "height": 1080,
+    "uniform_spacing": false
+  },
+  "combined_tu": true,
+  "monotonic_output_order": true,
+  "output": "subpicture_3region_muxed.obu"
+}
diff --git a/cfg/xlayer/subpicture_4quadrant.json b/cfg/xlayer/subpicture_4quadrant.json
new file mode 100644
index 0000000000..b7c73722e0
--- /dev/null
+++ b/cfg/xlayer/subpicture_4quadrant.json
@@ -0,0 +1,126 @@
+{
+  "inputs": [
+    {
+      "name": "default",
+      "filename": "video_1920x1080.yuv",
+      "width": 1920,
+      "height": 1080
+    }
+  ],
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "width": 960,
+      "height": 540,
+      "profile": 3,
+      "level": 12,
+      "tier": 0,
+      "layer_type": "texture",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 5,
+      "atlas_pos_x": 0,
+      "atlas_pos_y": 0
+    },
+    {
+      "xlayer_id": 1,
+      "width": 960,
+      "height": 540,
+      "profile": 3,
+      "level": 12,
+      "tier": 0,
+      "layer_type": "texture",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 5,
+      "atlas_pos_x": 960,
+      "atlas_pos_y": 0
+    },
+    {
+      "xlayer_id": 2,
+      "width": 960,
+      "height": 540,
+      "profile": 3,
+      "level": 12,
+      "tier": 0,
+      "layer_type": "texture",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 5,
+      "atlas_pos_x": 0,
+      "atlas_pos_y": 540
+    },
+    {
+      "xlayer_id": 3,
+      "width": 960,
+      "height": 540,
+      "profile": 3,
+      "level": 12,
+      "tier": 0,
+      "layer_type": "texture",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 5,
+      "atlas_pos_x": 960,
+      "atlas_pos_y": 540
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "dependent_xlayers": false,
+    "doh_constraint": true
+  },
+  "msdo": {
+    "enable": false
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        {
+          "intent": 0,
+          "xlayer_map": [0],
+          "comment": "OP0: top-left quadrant"
+        },
+        {
+          "intent": 0,
+          "xlayer_map": [1],
+          "comment": "OP1: top-right quadrant"
+        },
+        {
+          "intent": 0,
+          "xlayer_map": [2],
+          "comment": "OP2: bottom-left quadrant"
+        },
+        {
+          "intent": 0,
+          "xlayer_map": [3],
+          "comment": "OP3: bottom-right quadrant"
+        },
+        {
+          "intent": 1,
+          "xlayer_map": [0, 1, 2, 3],
+          "comment": "OP4: full picture (all quadrants)"
+        }
+      ]
+    }
+  ],
+  "atlas": {
+    "enable": true,
+    "mode": 0,
+    "width": 1920,
+    "height": 1080,
+    "uniform_spacing": false
+  },
+  "combined_tu": true,
+  "monotonic_output_order": true,
+  "output": "subpicture_4quadrant_muxed.obu"
+}
diff --git a/cfg/xlayer/subpicture_embedded_4q.json b/cfg/xlayer/subpicture_embedded_4q.json
new file mode 100644
index 0000000000..ceb596a17a
--- /dev/null
+++ b/cfg/xlayer/subpicture_embedded_4q.json
@@ -0,0 +1,26 @@
+{
+  "inputs": [
+    { "name": "video", "filename": "video.yuv", "width": 1920, "height": 1080 }
+  ],
+  "xlayers": [
+    { "xlayer_id": 0, "input_source": "video", "width": 960, "height": 540,
+      "atlas_pos_x": 0, "atlas_pos_y": 0,
+      "num_embedded_layers": 4,
+      "embedded_layers": [
+        { "scaling_mode": "1:1", "input_source": "video",
+          "atlas_pos_x": 0, "atlas_pos_y": 0, "width": 960, "height": 540,
+          "depends_on": [] },
+        { "scaling_mode": "1:1", "input_source": "video",
+          "atlas_pos_x": 960, "atlas_pos_y": 0, "width": 960, "height": 540,
+          "depends_on": [] },
+        { "scaling_mode": "1:1", "input_source": "video",
+          "atlas_pos_x": 0, "atlas_pos_y": 540, "width": 960, "height": 540,
+          "depends_on": [] },
+        { "scaling_mode": "1:1", "input_source": "video",
+          "atlas_pos_x": 960, "atlas_pos_y": 540, "width": 960, "height": 540,
+          "depends_on": [] }
+      ],
+      "qp": 128, "cpu_used": 5 }
+  ],
+  "output": "subpic_embedded.obu"
+}
diff --git a/cfg/xlayer/subpicture_texture_alpha_4q.json b/cfg/xlayer/subpicture_texture_alpha_4q.json
new file mode 100644
index 0000000000..d2a4258f68
--- /dev/null
+++ b/cfg/xlayer/subpicture_texture_alpha_4q.json
@@ -0,0 +1,127 @@
+{
+  "inputs": [
+    {
+      "name": "texture",
+      "filename": "texture_1920x1080.yuv",
+      "width": 1920,
+      "height": 1080
+    },
+    {
+      "name": "alpha",
+      "filename": "alpha_1920x1080.yuv",
+      "width": 1920,
+      "height": 1080
+    }
+  ],
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input_source": "texture",
+      "width": 960,
+      "height": 540,
+      "profile": 3,
+      "level": 12,
+      "tier": 0,
+      "layer_type": "texture",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 9,
+      "lag_in_frames": 0,
+      "atlas_pos_x": 0,
+      "atlas_pos_y": 0
+    },
+    {
+      "xlayer_id": 1,
+      "input_source": "texture",
+      "width": 960,
+      "height": 540,
+      "profile": 3,
+      "level": 12,
+      "tier": 0,
+      "layer_type": "texture",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 9,
+      "lag_in_frames": 0,
+      "atlas_pos_x": 960,
+      "atlas_pos_y": 0
+    },
+    {
+      "xlayer_id": 2,
+      "input_source": "alpha",
+      "width": 960,
+      "height": 540,
+      "profile": 3,
+      "level": 12,
+      "tier": 0,
+      "layer_type": "auxiliary",
+      "auxiliary_type": "alpha",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 9,
+      "lag_in_frames": 0,
+      "atlas_pos_x": 0,
+      "atlas_pos_y": 0
+    },
+    {
+      "xlayer_id": 3,
+      "input_source": "alpha",
+      "width": 960,
+      "height": 540,
+      "profile": 3,
+      "level": 12,
+      "tier": 0,
+      "layer_type": "auxiliary",
+      "auxiliary_type": "alpha",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 9,
+      "lag_in_frames": 0,
+      "atlas_pos_x": 960,
+      "atlas_pos_y": 0
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "dependent_xlayers": false,
+    "doh_constraint": true
+  },
+  "msdo": {
+    "enable": false
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        {
+          "intent": 0,
+          "xlayer_map": [0, 1],
+          "comment": "OP0: texture only"
+        },
+        {
+          "intent": 1,
+          "xlayer_map": [0, 1, 2, 3],
+          "comment": "OP1: texture + alpha"
+        }
+      ]
+    }
+  ],
+  "atlas": {
+    "enable": true,
+    "mode": 0,
+    "width": 1920,
+    "height": 1080,
+    "uniform_spacing": false
+  },
+  "combined_tu": true,
+  "monotonic_output_order": true,
+  "output": "subpic_tex_alpha_muxed.obu"
+}
diff --git a/cfg/xlayer/test_scalable_closed_mono.json b/cfg/xlayer/test_scalable_closed_mono.json
new file mode 100644
index 0000000000..4362d3f27d
--- /dev/null
+++ b/cfg/xlayer/test_scalable_closed_mono.json
@@ -0,0 +1,41 @@
+{
+  "comment": "Test: spatial scalability 2-mlayer, closed GOP, monotonic output. Uses inter-layer prediction (depends_on).",
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "BasketballPass_416x240_50.yuv",
+      "width": 416,
+      "height": 240,
+      "layer_type": "texture",
+      "num_embedded_layers": 2,
+      "embedded_layers": [
+        { "scaling_mode": "1/2", "depends_on": [] },
+        { "scaling_mode": "1:1", "depends_on": [0] }
+      ],
+      "qp": 128,
+      "cpu_used": 9,
+      "lag_in_frames": 0,
+      "gop_mode": 0,
+      "codec_controls": [
+        ["enable_deblocking", 0],
+        ["enable_cdef", 0],
+        ["enable_restoration", 0],
+        ["enable_tpl_model", 0],
+        ["enable_keyframe_filtering", 0],
+        ["enable_global_motion", 0],
+        ["enable_warped_motion", 0],
+        ["enable_intrabc", 0],
+        ["enable_palette", 0],
+        ["enable_interintra_comp", 0]
+      ]
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "doh_constraint": true
+  },
+  "combined_tu": true,
+  "monotonic_output_order": true,
+  "output": "test_scalable_closed_mono.obu"
+}
diff --git a/cfg/xlayer/test_scalable_closed_nonmono.json b/cfg/xlayer/test_scalable_closed_nonmono.json
new file mode 100644
index 0000000000..24e19d4bc3
--- /dev/null
+++ b/cfg/xlayer/test_scalable_closed_nonmono.json
@@ -0,0 +1,41 @@
+{
+  "comment": "Test: spatial scalability 2-mlayer, closed GOP, non-monotonic output. Uses inter-layer prediction (depends_on).",
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "BasketballPass_416x240_50.yuv",
+      "width": 416,
+      "height": 240,
+      "layer_type": "texture",
+      "num_embedded_layers": 2,
+      "embedded_layers": [
+        { "scaling_mode": "1/2", "depends_on": [] },
+        { "scaling_mode": "1:1", "depends_on": [0] }
+      ],
+      "qp": 128,
+      "cpu_used": 9,
+      "lag_in_frames": 0,
+      "gop_mode": 0,
+      "codec_controls": [
+        ["enable_deblocking", 0],
+        ["enable_cdef", 0],
+        ["enable_restoration", 0],
+        ["enable_tpl_model", 0],
+        ["enable_keyframe_filtering", 0],
+        ["enable_global_motion", 0],
+        ["enable_warped_motion", 0],
+        ["enable_intrabc", 0],
+        ["enable_palette", 0],
+        ["enable_interintra_comp", 0]
+      ]
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "doh_constraint": true
+  },
+  "combined_tu": true,
+  "monotonic_output_order": false,
+  "output": "test_scalable_closed_nonmono.obu"
+}
diff --git a/cfg/xlayer/test_scalable_open_leading_nonmono.json b/cfg/xlayer/test_scalable_open_leading_nonmono.json
new file mode 100644
index 0000000000..b6a23c45c8
--- /dev/null
+++ b/cfg/xlayer/test_scalable_open_leading_nonmono.json
@@ -0,0 +1,41 @@
+{
+  "comment": "Test: spatial scalability 2-mlayer, open GOP (OLK + leading), non-monotonic. Uses inter-layer prediction. Requires lag_in_frames > 0 for forward keyframe.",
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "BasketballPass_416x240_50.yuv",
+      "width": 416,
+      "height": 240,
+      "layer_type": "texture",
+      "num_embedded_layers": 2,
+      "embedded_layers": [
+        { "scaling_mode": "1/2", "depends_on": [] },
+        { "scaling_mode": "1:1", "depends_on": [0] }
+      ],
+      "qp": 128,
+      "cpu_used": 9,
+      "lag_in_frames": 19,
+      "kf_max_dist": 9,
+      "gop_mode": 1,
+      "codec_controls": [
+        ["enable_deblocking", 0],
+        ["enable_cdef", 0],
+        ["enable_restoration", 0],
+        ["enable_tpl_model", 0],
+        ["enable_global_motion", 0],
+        ["enable_warped_motion", 0],
+        ["enable_intrabc", 0],
+        ["enable_palette", 0],
+        ["enable_interintra_comp", 0]
+      ]
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "doh_constraint": true
+  },
+  "combined_tu": true,
+  "monotonic_output_order": false,
+  "output": "test_scalable_open_leading_nonmono.obu"
+}
diff --git a/cfg/xlayer/test_scalable_open_sef_mono.json b/cfg/xlayer/test_scalable_open_sef_mono.json
new file mode 100644
index 0000000000..b223a30844
--- /dev/null
+++ b/cfg/xlayer/test_scalable_open_sef_mono.json
@@ -0,0 +1,41 @@
+{
+  "comment": "Test: spatial scalability 2-mlayer, open GOP (INTRA_ONLY_FRAME + SEF), monotonic. Uses inter-layer prediction. Forward keyframe coded as INTRA_ONLY_FRAME (no ref reset).",
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "BasketballPass_416x240_50.yuv",
+      "width": 416,
+      "height": 240,
+      "layer_type": "texture",
+      "num_embedded_layers": 2,
+      "embedded_layers": [
+        { "scaling_mode": "1/2", "depends_on": [] },
+        { "scaling_mode": "1:1", "depends_on": [0] }
+      ],
+      "qp": 128,
+      "cpu_used": 9,
+      "lag_in_frames": 19,
+      "kf_max_dist": 9,
+      "gop_mode": 2,
+      "codec_controls": [
+        ["enable_deblocking", 0],
+        ["enable_cdef", 0],
+        ["enable_restoration", 0],
+        ["enable_tpl_model", 0],
+        ["enable_global_motion", 0],
+        ["enable_warped_motion", 0],
+        ["enable_intrabc", 0],
+        ["enable_palette", 0],
+        ["enable_interintra_comp", 0]
+      ]
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "doh_constraint": true
+  },
+  "combined_tu": true,
+  "monotonic_output_order": true,
+  "output": "test_scalable_open_sef_mono.obu"
+}
diff --git a/cfg/xlayer/texture_2mlayer_fast.json b/cfg/xlayer/texture_2mlayer_fast.json
new file mode 100644
index 0000000000..1acdee3f0f
--- /dev/null
+++ b/cfg/xlayer/texture_2mlayer_fast.json
@@ -0,0 +1,37 @@
+{
+  "comment": "Fast 2-embedded-layer config for debugging. Uses spatial scalability (1/2 + 1:1) with expensive coding tools disabled via codec_controls.",
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "texture_1080p.y4m",
+      "width": 416,
+      "height": 240,
+      "layer_type": "texture",
+      "num_embedded_layers": 2,
+      "scaling_mode": ["1/2", "1:1"],
+      "qp": 128,
+      "cpu_used": 9,
+      "lag_in_frames": 0,
+      "codec_controls": [
+        ["enable_deblocking", 0],
+        ["enable_cdef", 0],
+        ["enable_restoration", 0],
+        ["enable_tpl_model", 0],
+        ["enable_keyframe_filtering", 0],
+        ["enable_global_motion", 0],
+        ["enable_warped_motion", 0],
+        ["enable_intrabc", 0],
+        ["enable_palette", 0],
+        ["enable_interintra_comp", 0]
+      ]
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "doh_constraint": true
+  },
+  "combined_tu": true,
+  "monotonic_output_order": true,
+  "output": "texture_2ml_fast.obu"
+}
diff --git a/cfg/xlayer/texture_alpha_depth_3layer.json b/cfg/xlayer/texture_alpha_depth_3layer.json
new file mode 100644
index 0000000000..1683fda8db
--- /dev/null
+++ b/cfg/xlayer/texture_alpha_depth_3layer.json
@@ -0,0 +1,101 @@
+{
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "texture_1080p.y4m",
+      "width": 1920,
+      "height": 1080,
+      "profile": 3,
+      "level": 16,
+      "tier": 0,
+      "layer_type": "texture",
+      "view_type": "center",
+      "color_primaries": 1,
+      "transfer_characteristics": 1,
+      "matrix_coefficients": 1,
+      "full_range_flag": 0,
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 5
+    },
+    {
+      "xlayer_id": 1,
+      "input": "alpha_1080p.y4m",
+      "width": 1920,
+      "height": 1080,
+      "profile": 3,
+      "level": 16,
+      "tier": 0,
+      "layer_type": "auxiliary",
+      "auxiliary_type": "alpha",
+      "color_primaries": 1,
+      "transfer_characteristics": 13,
+      "matrix_coefficients": 0,
+      "full_range_flag": 1,
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 140,
+      "cpu_used": 5
+    },
+    {
+      "xlayer_id": 2,
+      "input": "depth_1080p.y4m",
+      "width": 1920,
+      "height": 1080,
+      "profile": 3,
+      "level": 16,
+      "tier": 0,
+      "layer_type": "auxiliary",
+      "auxiliary_type": "depth",
+      "color_primaries": 1,
+      "transfer_characteristics": 1,
+      "matrix_coefficients": 0,
+      "full_range_flag": 1,
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 160,
+      "cpu_used": 5
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "dependent_xlayers": false,
+    "doh_constraint": true
+  },
+  "msdo": {
+    "enable": false
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        {
+          "intent": 0,
+          "xlayer_map": [0],
+          "comment": "OP0: texture only"
+        },
+        {
+          "intent": 1,
+          "xlayer_map": [0, 1],
+          "comment": "OP1: texture + alpha"
+        },
+        {
+          "intent": 2,
+          "xlayer_map": [0, 1, 2],
+          "comment": "OP2: texture + alpha + depth (all layers)"
+        }
+      ]
+    }
+  ],
+  "atlas": {
+    "enable": false
+  },
+  "combined_tu": true,
+  "monotonic_output_order": true,
+  "output": "texture_alpha_depth_muxed.obu"
+}
diff --git a/cfg/xlayer/texture_depth_2layer.json b/cfg/xlayer/texture_depth_2layer.json
new file mode 100644
index 0000000000..978c99b2d4
--- /dev/null
+++ b/cfg/xlayer/texture_depth_2layer.json
@@ -0,0 +1,73 @@
+{
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "texture_1080p.y4m",
+      "width": 1920,
+      "height": 1080,
+      "profile": 3,
+      "level": 16,
+      "tier": 0,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 5
+    },
+    {
+      "xlayer_id": 1,
+      "input": "depth_1080p.y4m",
+      "width": 1920,
+      "height": 1080,
+      "profile": 3,
+      "level": 16,
+      "tier": 0,
+      "layer_type": "auxiliary",
+      "auxiliary_type": "depth",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 160,
+      "cpu_used": 5
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "dependent_xlayers": false,
+    "doh_constraint": true
+  },
+  "local_lcr": {
+    "enable": true,
+    "mode": "both"
+  },
+  "msdo": {
+    "enable": false
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        {
+          "intent": 0,
+          "xlayer_map": [0],
+          "comment": "OP0: texture only"
+        },
+        {
+          "intent": 1,
+          "xlayer_map": [0, 1],
+          "comment": "OP1: texture + depth"
+        }
+      ]
+    }
+  ],
+  "atlas": {
+    "enable": false
+  },
+  "combined_tu": true,
+  "monotonic_output_order": true,
+  "output": "texture_depth_muxed.obu"
+}
diff --git a/cfg/xlayer/texture_depth_2layer_3ml.json b/cfg/xlayer/texture_depth_2layer_3ml.json
new file mode 100644
index 0000000000..ba806f45ef
--- /dev/null
+++ b/cfg/xlayer/texture_depth_2layer_3ml.json
@@ -0,0 +1,65 @@
+{
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "texture.y4m",
+      "width": 1920,
+      "height": 1080,
+      "num_embedded_layers": 3,
+      "scaling_mode": ["1/4", "1/2", "1:1"],
+      "layer_type": "texture",
+      "color_primaries": 1,
+      "transfer_characteristics": 1,
+      "matrix_coefficients": 1,
+      "full_range_flag": 0,
+      "qp": 128,
+      "cpu_used": 9
+    },
+    {
+      "xlayer_id": 1,
+      "input": "depth.y4m",
+      "width": 1920,
+      "height": 1080,
+      "num_embedded_layers": 1,
+      "layer_type": "auxiliary",
+      "auxiliary_type": "depth",
+      "color_primaries": 1,
+      "transfer_characteristics": 1,
+      "matrix_coefficients": 0,
+      "full_range_flag": 1,
+      "qp": 160,
+      "cpu_used": 9
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 6,
+    "doh_constraint": true
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        {
+          "intent": 0,
+          "xlayer_map": [0],
+          "mlayer_count": [1]
+        },
+        {
+          "intent": 1,
+          "xlayer_map": [0],
+          "mlayer_count": [3]
+        },
+        {
+          "intent": 2,
+          "xlayer_map": [0, 1],
+          "mlayer_count": [3, 1]
+        }
+      ]
+    }
+  ],
+  "output": "texture_depth_3ml.obu"
+}
diff --git a/cfg/xlayer/texture_depth_2layer_clk.json b/cfg/xlayer/texture_depth_2layer_clk.json
new file mode 100644
index 0000000000..2ba23b9698
--- /dev/null
+++ b/cfg/xlayer/texture_depth_2layer_clk.json
@@ -0,0 +1,77 @@
+{
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "texture_1080p.y4m",
+      "width": 1920,
+      "height": 1080,
+      "profile": 3,
+      "level": 16,
+      "tier": 0,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 5,
+      "kf_max_dist": 8,
+      "gop_mode": "closed"
+    },
+    {
+      "xlayer_id": 1,
+      "input": "depth_1080p.y4m",
+      "width": 1920,
+      "height": 1080,
+      "profile": 3,
+      "level": 16,
+      "tier": 0,
+      "layer_type": "auxiliary",
+      "auxiliary_type": "depth",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 160,
+      "cpu_used": 5,
+      "kf_max_dist": 8,
+      "gop_mode": "closed"
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "dependent_xlayers": false,
+    "doh_constraint": true
+  },
+  "local_lcr": {
+    "enable": true,
+    "mode": "both"
+  },
+  "msdo": {
+    "enable": false
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        {
+          "intent": 0,
+          "xlayer_map": [0],
+          "comment": "OP0: texture only"
+        },
+        {
+          "intent": 1,
+          "xlayer_map": [0, 1],
+          "comment": "OP1: texture + depth"
+        }
+      ]
+    }
+  ],
+  "atlas": {
+    "enable": false
+  },
+  "combined_tu": true,
+  "monotonic_output_order": true,
+  "output": "texture_depth_clk_muxed.obu"
+}
diff --git a/cfg/xlayer/texture_depth_2layer_fast.json b/cfg/xlayer/texture_depth_2layer_fast.json
new file mode 100644
index 0000000000..081c072d49
--- /dev/null
+++ b/cfg/xlayer/texture_depth_2layer_fast.json
@@ -0,0 +1,70 @@
+{
+  "comment": "Fast texture + depth config for debugging. Disables expensive coding tools via codec_controls to minimize encode time.",
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "texture_1080p.y4m",
+      "width": 1920,
+      "height": 1080,
+      "layer_type": "texture",
+      "qp": 128,
+      "cpu_used": 9,
+      "lag_in_frames": 0,
+      "codec_controls": [
+        ["enable_deblocking", 0],
+        ["enable_cdef", 0],
+        ["enable_restoration", 0],
+        ["enable_tpl_model", 0],
+        ["enable_keyframe_filtering", 0],
+        ["enable_global_motion", 0],
+        ["enable_warped_motion", 0],
+        ["enable_intrabc", 0],
+        ["enable_palette", 0],
+        ["enable_interintra_comp", 0]
+      ]
+    },
+    {
+      "xlayer_id": 1,
+      "input": "depth_1080p.y4m",
+      "width": 1920,
+      "height": 1080,
+      "layer_type": "auxiliary",
+      "auxiliary_type": "depth",
+      "qp": 160,
+      "cpu_used": 9,
+      "lag_in_frames": 0,
+      "codec_controls": [
+        ["enable_deblocking", 0],
+        ["enable_cdef", 0],
+        ["enable_restoration", 0],
+        ["enable_tpl_model", 0],
+        ["enable_keyframe_filtering", 0],
+        ["enable_global_motion", 0],
+        ["enable_warped_motion", 0],
+        ["enable_intrabc", 0],
+        ["enable_palette", 0],
+        ["enable_interintra_comp", 0]
+      ]
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "doh_constraint": true
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        { "intent": 0, "xlayer_map": [0] },
+        { "intent": 1, "xlayer_map": [0, 1] }
+      ]
+    }
+  ],
+  "combined_tu": true,
+  "monotonic_output_order": true,
+  "output": "texture_depth_fast.obu"
+}
diff --git a/cfg/xlayer/texture_depth_2layer_local_only.json b/cfg/xlayer/texture_depth_2layer_local_only.json
new file mode 100644
index 0000000000..7d6178d44d
--- /dev/null
+++ b/cfg/xlayer/texture_depth_2layer_local_only.json
@@ -0,0 +1,73 @@
+{
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "texture_1080p.y4m",
+      "width": 1920,
+      "height": 1080,
+      "profile": 3,
+      "level": 16,
+      "tier": 0,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 5
+    },
+    {
+      "xlayer_id": 1,
+      "input": "depth_1080p.y4m",
+      "width": 1920,
+      "height": 1080,
+      "profile": 3,
+      "level": 16,
+      "tier": 0,
+      "layer_type": "auxiliary",
+      "auxiliary_type": "depth",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 160,
+      "cpu_used": 5
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "dependent_xlayers": false,
+    "doh_constraint": true
+  },
+  "local_lcr": {
+    "enable": true,
+    "mode": "local_only"
+  },
+  "msdo": {
+    "enable": false
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        {
+          "intent": 0,
+          "xlayer_map": [0],
+          "comment": "OP0: texture only"
+        },
+        {
+          "intent": 1,
+          "xlayer_map": [0, 1],
+          "comment": "OP1: texture + depth"
+        }
+      ]
+    }
+  ],
+  "atlas": {
+    "enable": false
+  },
+  "combined_tu": true,
+  "monotonic_output_order": true,
+  "output": "texture_depth_local_only_muxed.obu"
+}
diff --git a/cfg/xlayer/texture_depth_2layer_open_leading.json b/cfg/xlayer/texture_depth_2layer_open_leading.json
new file mode 100644
index 0000000000..a11e612885
--- /dev/null
+++ b/cfg/xlayer/texture_depth_2layer_open_leading.json
@@ -0,0 +1,77 @@
+{
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "texture_1080p.y4m",
+      "width": 1920,
+      "height": 1080,
+      "profile": 3,
+      "level": 16,
+      "tier": 0,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 5,
+      "kf_max_dist": 16,
+      "gop_mode": "open_leading"
+    },
+    {
+      "xlayer_id": 1,
+      "input": "depth_1080p.y4m",
+      "width": 1920,
+      "height": 1080,
+      "profile": 3,
+      "level": 16,
+      "tier": 0,
+      "layer_type": "auxiliary",
+      "auxiliary_type": "depth",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 160,
+      "cpu_used": 5,
+      "kf_max_dist": 16,
+      "gop_mode": "open_leading"
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "dependent_xlayers": false,
+    "doh_constraint": true
+  },
+  "local_lcr": {
+    "enable": true,
+    "mode": "both"
+  },
+  "msdo": {
+    "enable": false
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        {
+          "intent": 0,
+          "xlayer_map": [0],
+          "comment": "OP0: texture only"
+        },
+        {
+          "intent": 1,
+          "xlayer_map": [0, 1],
+          "comment": "OP1: texture + depth"
+        }
+      ]
+    }
+  ],
+  "atlas": {
+    "enable": false
+  },
+  "combined_tu": true,
+  "monotonic_output_order": false,
+  "output": "texture_depth_open_leading_muxed.obu"
+}
diff --git a/cfg/xlayer/texture_depth_2layer_open_sef.json b/cfg/xlayer/texture_depth_2layer_open_sef.json
new file mode 100644
index 0000000000..b9a1914df8
--- /dev/null
+++ b/cfg/xlayer/texture_depth_2layer_open_sef.json
@@ -0,0 +1,77 @@
+{
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "texture_1080p.y4m",
+      "width": 1920,
+      "height": 1080,
+      "profile": 3,
+      "level": 16,
+      "tier": 0,
+      "layer_type": "texture",
+      "view_type": "center",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 128,
+      "cpu_used": 5,
+      "kf_max_dist": 16,
+      "gop_mode": "open_sef"
+    },
+    {
+      "xlayer_id": 1,
+      "input": "depth_1080p.y4m",
+      "width": 1920,
+      "height": 1080,
+      "profile": 3,
+      "level": 16,
+      "tier": 0,
+      "layer_type": "auxiliary",
+      "auxiliary_type": "depth",
+      "num_temporal_layers": 1,
+      "num_embedded_layers": 1,
+      "qp": 160,
+      "cpu_used": 5,
+      "kf_max_dist": 16,
+      "gop_mode": "open_sef"
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "dependent_xlayers": false,
+    "doh_constraint": true
+  },
+  "local_lcr": {
+    "enable": true,
+    "mode": "local_only"
+  },
+  "msdo": {
+    "enable": false
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        {
+          "intent": 0,
+          "xlayer_map": [0],
+          "comment": "OP0: texture only"
+        },
+        {
+          "intent": 1,
+          "xlayer_map": [0, 1],
+          "comment": "OP1: texture + depth"
+        }
+      ]
+    }
+  ],
+  "atlas": {
+    "enable": false
+  },
+  "combined_tu": true,
+  "monotonic_output_order": true,
+  "output": "texture_depth_open_sef_muxed.obu"
+}
diff --git a/cfg/xlayer/texture_depth_2xl_2ml_closed_mono.json b/cfg/xlayer/texture_depth_2xl_2ml_closed_mono.json
new file mode 100644
index 0000000000..2cf9a099aa
--- /dev/null
+++ b/cfg/xlayer/texture_depth_2xl_2ml_closed_mono.json
@@ -0,0 +1,55 @@
+{
+  "comment": "2 xlayers × 2 mlayers, closed GOP, monotonic output. Texture has spatial scalability (1/2, 1:1). Depth is single-layer. Monotonic: hidden frames (ARF, INTNL) are output via SEF in display order.",
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "texture.y4m",
+      "width": 1920,
+      "height": 1080,
+      "layer_type": "texture",
+      "num_embedded_layers": 2,
+      "scaling_mode": ["1/2", "1:1"],
+      "qp": 128,
+      "cpu_used": 5,
+      "lag_in_frames": 19,
+      "gop_mode": "closed"
+    },
+    {
+      "xlayer_id": 1,
+      "input": "depth.y4m",
+      "width": 1920,
+      "height": 1080,
+      "layer_type": "auxiliary",
+      "auxiliary_type": "depth",
+      "num_embedded_layers": 1,
+      "qp": 160,
+      "cpu_used": 5,
+      "lag_in_frames": 19,
+      "gop_mode": "closed"
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "doh_constraint": true
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        { "intent": 0, "xlayer_map": [0], "mlayer_count": [1],
+          "comment": "OP0: texture at 1/2 resolution" },
+        { "intent": 1, "xlayer_map": [0], "mlayer_count": [2],
+          "comment": "OP1: texture at full resolution (both mlayers)" },
+        { "intent": 2, "xlayer_map": [0, 1], "mlayer_count": [2, 1],
+          "comment": "OP2: texture + depth" }
+      ]
+    }
+  ],
+  "combined_tu": true,
+  "monotonic_output_order": true,
+  "output": "texture_depth_2xl_2ml_closed_mono.obu"
+}
diff --git a/cfg/xlayer/texture_depth_2xl_2ml_closed_nonmono.json b/cfg/xlayer/texture_depth_2xl_2ml_closed_nonmono.json
new file mode 100644
index 0000000000..e43ffd7012
--- /dev/null
+++ b/cfg/xlayer/texture_depth_2xl_2ml_closed_nonmono.json
@@ -0,0 +1,55 @@
+{
+  "comment": "2 xlayers × 2 mlayers, closed GOP, non-monotonic output. Texture has spatial scalability (1/2, 1:1). Depth is single-layer. Non-monotonic: ARF/INTNL frames are implicit output (decoder reorders).",
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "texture.y4m",
+      "width": 1920,
+      "height": 1080,
+      "layer_type": "texture",
+      "num_embedded_layers": 2,
+      "scaling_mode": ["1/2", "1:1"],
+      "qp": 128,
+      "cpu_used": 5,
+      "lag_in_frames": 19,
+      "gop_mode": "closed"
+    },
+    {
+      "xlayer_id": 1,
+      "input": "depth.y4m",
+      "width": 1920,
+      "height": 1080,
+      "layer_type": "auxiliary",
+      "auxiliary_type": "depth",
+      "num_embedded_layers": 1,
+      "qp": 160,
+      "cpu_used": 5,
+      "lag_in_frames": 19,
+      "gop_mode": "closed"
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "doh_constraint": true
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        { "intent": 0, "xlayer_map": [0], "mlayer_count": [1],
+          "comment": "OP0: texture at 1/2 resolution" },
+        { "intent": 1, "xlayer_map": [0], "mlayer_count": [2],
+          "comment": "OP1: texture at full resolution (both mlayers)" },
+        { "intent": 2, "xlayer_map": [0, 1], "mlayer_count": [2, 1],
+          "comment": "OP2: texture + depth" }
+      ]
+    }
+  ],
+  "combined_tu": true,
+  "monotonic_output_order": false,
+  "output": "texture_depth_2xl_2ml_closed_nonmono.obu"
+}
diff --git a/cfg/xlayer/texture_depth_2xl_2ml_open_leading.json b/cfg/xlayer/texture_depth_2xl_2ml_open_leading.json
new file mode 100644
index 0000000000..eed1581ca7
--- /dev/null
+++ b/cfg/xlayer/texture_depth_2xl_2ml_open_leading.json
@@ -0,0 +1,57 @@
+{
+  "comment": "2 xlayers × 2 mlayers, open GOP (OLK + leading pictures), non-monotonic. The forward keyframe is a displayed OLK — it is an implicit output frame (decoder reorders). Frames before the OLK in display order are coded as leading pictures after the OLK in coding order. Requires lag_in_frames > 0 and non-monotonic output.",
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "texture.y4m",
+      "width": 1920,
+      "height": 1080,
+      "layer_type": "texture",
+      "num_embedded_layers": 2,
+      "scaling_mode": ["1/2", "1:1"],
+      "qp": 128,
+      "cpu_used": 5,
+      "lag_in_frames": 19,
+      "kf_max_dist": 9,
+      "gop_mode": "open_leading"
+    },
+    {
+      "xlayer_id": 1,
+      "input": "depth.y4m",
+      "width": 1920,
+      "height": 1080,
+      "layer_type": "auxiliary",
+      "auxiliary_type": "depth",
+      "num_embedded_layers": 1,
+      "qp": 160,
+      "cpu_used": 5,
+      "lag_in_frames": 19,
+      "kf_max_dist": 9,
+      "gop_mode": "open_leading"
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "doh_constraint": true
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        { "intent": 0, "xlayer_map": [0], "mlayer_count": [1],
+          "comment": "OP0: texture at 1/2 resolution" },
+        { "intent": 1, "xlayer_map": [0], "mlayer_count": [2],
+          "comment": "OP1: texture at full resolution" },
+        { "intent": 2, "xlayer_map": [0, 1], "mlayer_count": [2, 1],
+          "comment": "OP2: texture + depth" }
+      ]
+    }
+  ],
+  "combined_tu": true,
+  "monotonic_output_order": false,
+  "output": "texture_depth_2xl_2ml_open_leading.obu"
+}
diff --git a/cfg/xlayer/texture_depth_2xl_2ml_open_sef_mono.json b/cfg/xlayer/texture_depth_2xl_2ml_open_sef_mono.json
new file mode 100644
index 0000000000..62b247dfde
--- /dev/null
+++ b/cfg/xlayer/texture_depth_2xl_2ml_open_sef_mono.json
@@ -0,0 +1,57 @@
+{
+  "comment": "2 xlayers × 2 mlayers, open GOP (hidden INTRA_ONLY_FRAME + SEF), monotonic. The forward keyframe is coded as a hidden INTRA_ONLY_FRAME that does NOT reset reference buffers, enabling inter-prediction across the GOP boundary. The hidden frame is shown via SEF in display order. Requires lag_in_frames > 0.",
+  "xlayers": [
+    {
+      "xlayer_id": 0,
+      "input": "texture.y4m",
+      "width": 1920,
+      "height": 1080,
+      "layer_type": "texture",
+      "num_embedded_layers": 2,
+      "scaling_mode": ["1/2", "1:1"],
+      "qp": 128,
+      "cpu_used": 5,
+      "lag_in_frames": 19,
+      "kf_max_dist": 9,
+      "gop_mode": "open_sef"
+    },
+    {
+      "xlayer_id": 1,
+      "input": "depth.y4m",
+      "width": 1920,
+      "height": 1080,
+      "layer_type": "auxiliary",
+      "auxiliary_type": "depth",
+      "num_embedded_layers": 1,
+      "qp": 160,
+      "cpu_used": 5,
+      "lag_in_frames": 19,
+      "kf_max_dist": 9,
+      "gop_mode": "open_sef"
+    }
+  ],
+  "global_lcr": {
+    "enable": true,
+    "purpose_id": 0,
+    "doh_constraint": true
+  },
+  "ops": [
+    {
+      "ops_id": 0,
+      "priority": 0,
+      "intent_present": true,
+      "ptl_present": true,
+      "operating_points": [
+        { "intent": 0, "xlayer_map": [0], "mlayer_count": [1],
+          "comment": "OP0: texture at 1/2 resolution" },
+        { "intent": 1, "xlayer_map": [0], "mlayer_count": [2],
+          "comment": "OP1: texture at full resolution" },
+        { "intent": 2, "xlayer_map": [0, 1], "mlayer_count": [2, 1],
+          "comment": "OP2: texture + depth" }
+      ]
+    }
+  ],
+  "combined_tu": true,
+  "monotonic_output_order": true,
+  "output": "texture_depth_2xl_2ml_open_sef_mono.obu"
+}
diff --git a/cfg/xlayer/texture_depth_embedded_3ml_2ml.json b/cfg/xlayer/texture_depth_embedded_3ml_2ml.json
new file mode 100644
index 0000000000..1a5d238c7a
--- /dev/null
+++ b/cfg/xlayer/texture_depth_embedded_3ml_2ml.json
@@ -0,0 +1,46 @@
+{
+  "inputs": [
+    { "name": "texture", "filename": "texture.yuv", "width": 1920, "height": 1080 },
+    { "name": "depth", "filename": "depth.yuv", "width": 1920, "height": 1080 }
+  ],
+  "xlayers": [
+    { "xlayer_id": 0, "input_source": "texture", "width": 1920, "height": 1080,
+      "atlas_pos_x": 0, "atlas_pos_y": 0,
+      "layer_type": "texture",
+      "color_primaries": 1, "transfer_characteristics": 1,
+      "matrix_coefficients": 1, "full_range_flag": 0,
+      "num_embedded_layers": 3,
+      "embedded_layers": [
+        { "scaling_mode": "1/4" },
+        { "scaling_mode": "1/2" },
+        { "scaling_mode": "1:1" }
+      ],
+      "qp": 128, "cpu_used": 5 },
+    { "xlayer_id": 1, "input_source": "texture", "width": 1920, "height": 1080,
+      "atlas_pos_x": 0, "atlas_pos_y": 0,
+      "layer_type": "auxiliary", "auxiliary_type": "depth",
+      "color_primaries": 1, "transfer_characteristics": 1,
+      "matrix_coefficients": 1, "full_range_flag": 0,
+      "num_embedded_layers": 2,
+      "embedded_layers": [
+        { "scaling_mode": "1/2", "input_source": "texture",
+          "atlas_pos_x": 0, "atlas_pos_y": 0, "width": 1920, "height": 1080,
+          "depends_on": [] },
+        { "scaling_mode": "1:1", "input_source": "depth",
+          "atlas_pos_x": 0, "atlas_pos_y": 0, "width": 1920, "height": 1080,
+          "depends_on": [],
+          "matrix_coefficients": 0, "full_range_flag": 1 }
+      ],
+      "qp": 160, "cpu_used": 5 }
+  ],
+  "ops": [{
+    "ops_id": 0, "priority": 0, "intent_present": true, "ptl_present": true,
+    "mlayer_info_idc": 2,
+    "operating_points": [
+      { "intent": 0, "xlayer_map": [0], "mlayer_count": [1] },
+      { "intent": 1, "xlayer_map": [0], "mlayer_count": [3] },
+      { "intent": 2, "xlayer_map": [0, 1], "mlayer_count": [3, 2] }
+    ]
+  }],
+  "output": "texture_depth_embedded.obu"
+}
diff --git a/common/tu_assembler.c b/common/tu_assembler.c
new file mode 100644
index 0000000000..5e6fdd36fd
--- /dev/null
+++ b/common/tu_assembler.c
@@ -0,0 +1,1631 @@
+/*
+ * Copyright (c) 2025, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 3-Clause Clear License
+ * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
+ * License was not distributed with this source code in the LICENSE file, you
+ * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/.  If the
+ * Alliance for Open Media Patent License 1.0 was not distributed with this
+ * source code in the PATENTS file, you can obtain it at
+ * aomedia.org/license/patent-license/.
+ */
+
+#include "common/tu_assembler.h"
+
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "avm/avm_codec.h"
+#include "avm/avm_integer.h"
+#include "avm/avmcx.h"
+#include "avm_dsp/bitwriter_buffer.h"
+#include "av2/common/level.h"
+#include "av2/common/obu_util.h"
+
+// Ensure the buffer has room for 'needed' more bytes
+static int ensure_capacity(TUAssembler *ta, size_t needed) {
+  size_t required = ta->size + needed;
+  if (required <= ta->capacity) return 0;
+  size_t new_cap = ta->capacity * 2;
+  if (new_cap < required) new_cap = required;
+  uint8_t *new_buf = (uint8_t *)realloc(ta->buffer, new_cap);
+  if (!new_buf) return -1;
+  ta->buffer = new_buf;
+  ta->capacity = new_cap;
+  return 0;
+}
+
+// Append raw bytes to the assembler buffer
+static int append_bytes(TUAssembler *ta, const uint8_t *data, size_t len) {
+  if (ensure_capacity(ta, len) != 0) return -1;
+  memcpy(ta->buffer + ta->size, data, len);
+  ta->size += len;
+  return 0;
+}
+
+// Write a ULEB128-encoded size value to the buffer
+static int append_uleb128(TUAssembler *ta, uint64_t value) {
+  uint8_t coded[10];
+  size_t coded_size = 0;
+  if (avm_uleb_encode(value, sizeof(coded), coded, &coded_size) != 0) return -1;
+  return append_bytes(ta, coded, coded_size);
+}
+
+// Parse a single OBU header byte
+static void parse_obu_header_byte(uint8_t byte, ObuHeader *hdr) {
+  hdr->obu_header_extension_flag = (byte >> 7) & 1;
+  hdr->type = (OBU_TYPE)((byte >> 2) & 0x1F);
+  hdr->obu_tlayer_id = byte & 0x3;
+}
+
+// Parse extension byte
+static void parse_obu_ext_byte(uint8_t byte, ObuHeader *hdr) {
+  hdr->obu_mlayer_id = (byte >> 5) & 0x7;
+  hdr->obu_xlayer_id = byte & 0x1F;
+}
+
+// Write a 2-byte OBU header with extension (sets xlayer_id)
+static void write_obu_header_with_xlayer(uint8_t *dst, const ObuHeader *hdr,
+                                         int xlayer_id) {
+  // Byte 0: extension_flag=1, type, tlayer_id
+  dst[0] = (uint8_t)((1 << 7) | ((hdr->type & 0x1F) << 2) |
+                     (hdr->obu_tlayer_id & 0x3));
+  // Byte 1: mlayer_id, xlayer_id
+  dst[1] = (uint8_t)(((hdr->obu_mlayer_id & 0x7) << 5) | (xlayer_id & 0x1F));
+}
+
+int tu_assembler_init(TUAssembler *ta, const MultiXLayerConfig *mcfg) {
+  memset(ta, 0, sizeof(*ta));
+  ta->capacity = TU_ASM_INITIAL_CAPACITY;
+  ta->buffer = (uint8_t *)malloc(ta->capacity);
+  if (!ta->buffer) return -1;
+
+  ta->num_xlayers = mcfg->num_xlayers;
+  for (int i = 0; i < mcfg->num_xlayers; i++) {
+    ta->xlayer_ids[i] = mcfg->xlayers[i].xlayer_id;
+  }
+
+  ta->msdo_enabled = mcfg->enable_msdo;
+  ta->num_ops_sets = mcfg->num_ops_sets;
+  ta->config = mcfg;
+
+  // Populate Global LCR from config
+  if (mcfg->enable_global_lcr || mcfg->enable_local_lcr) {
+    populate_global_lcr_from_config(mcfg, &ta->global_lcr);
+    // In local_only mode, the Global LCR is present for stream detection
+    // and PTL but does not carry per-xlayer payload — Local LCRs are
+    // the authoritative source.
+    if (mcfg->enable_local_lcr && mcfg->local_lcr_mode == 1) {
+      ta->global_lcr.lcr_global_payload_present_flag = 0;
+    }
+  }
+
+  // Populate OPS from config
+  for (int s = 0; s < mcfg->num_ops_sets; s++) {
+    populate_ops_from_config(&mcfg->ops_sets[s], GLOBAL_XLAYER_ID, mcfg,
+                             &ta->ops_list[s]);
+  }
+
+  // Populate Atlas from config
+  if (mcfg->enable_atlas) {
+    populate_atlas_from_config(mcfg, &ta->atlas_info);
+  }
+
+  return 0;
+}
+
+void tu_assembler_free(TUAssembler *ta) {
+  if (ta->buffer) {
+    free(ta->buffer);
+    ta->buffer = NULL;
+  }
+  ta->size = 0;
+  ta->capacity = 0;
+}
+
+int tu_assembler_write_td(TUAssembler *ta) {
+  // Write a minimal 1-byte TD OBU: [size=1][header_byte]
+  // TD type = OBU_TEMPORAL_DELIMITER = 2
+  // Header: ext=0, type=2, tlayer=0 => (2 << 2) = 0x08
+  uint8_t td[2];
+  td[0] = 1;     // ULEB128 size = 1 (just the header byte)
+  td[1] = 0x08;  // OBU_TEMPORAL_DELIMITER << 2
+  return append_bytes(ta, td, 2);
+}
+
+// Helper: write per-xlayer info (mirrors write_lcr_xlayer_info in
+// bitstream_lcr.c)
+static void tu_asm_write_lcr_xlayer_info(LCRXLayerInfo *xinfo,
+                                         int atlas_id_present,
+                                         struct avm_write_bit_buffer *wb) {
+  avm_wb_write_bit(wb, xinfo->lcr_rep_info_present_flag);
+  avm_wb_write_bit(wb, xinfo->lcr_xlayer_purpose_present_flag);
+  avm_wb_write_bit(wb, xinfo->lcr_xlayer_color_info_present_flag);
+  avm_wb_write_bit(wb, xinfo->lcr_embedded_layer_info_present_flag);
+
+  if (xinfo->lcr_rep_info_present_flag) {
+    avm_wb_write_uvlc(wb, xinfo->rep_params.lcr_max_pic_width);
+    avm_wb_write_uvlc(wb, xinfo->rep_params.lcr_max_pic_height);
+    avm_wb_write_bit(wb, xinfo->rep_params.lcr_format_info_present_flag);
+    avm_wb_write_bit(wb, xinfo->crop_win.crop_window_present_flag);
+    if (xinfo->rep_params.lcr_format_info_present_flag) {
+      avm_wb_write_uvlc(wb, xinfo->rep_params.lcr_bit_depth_idc);
+      avm_wb_write_uvlc(wb, xinfo->rep_params.lcr_chroma_format_idc);
+    }
+    if (xinfo->crop_win.crop_window_present_flag) {
+      avm_wb_write_uvlc(wb, xinfo->crop_win.crop_win_left_offset);
+      avm_wb_write_uvlc(wb, xinfo->crop_win.crop_win_right_offset);
+      avm_wb_write_uvlc(wb, xinfo->crop_win.crop_win_top_offset);
+      avm_wb_write_uvlc(wb, xinfo->crop_win.crop_win_bottom_offset);
+    }
+  }
+
+  if (xinfo->lcr_xlayer_purpose_present_flag)
+    avm_wb_write_literal(wb, xinfo->lcr_xlayer_purpose_id, 7);
+
+  if (xinfo->lcr_xlayer_color_info_present_flag) {
+    struct XLayerColorInfo *col = &xinfo->xlayer_col_params;
+    avm_wb_write_rice_golomb(wb, col->layer_color_description_idc, 2);
+    if (col->layer_color_description_idc == 0) {
+      avm_wb_write_literal(wb, col->layer_color_primaries, 8);
+      avm_wb_write_literal(wb, col->layer_transfer_characteristics, 8);
+      avm_wb_write_literal(wb, col->layer_matrix_coefficients, 8);
+    }
+    avm_wb_write_bit(wb, col->layer_full_range_flag);
+  }
+
+  // Byte alignment after per-xlayer flags/info
+  avm_wb_write_literal(wb, 0, (8 - wb->bit_offset % 8) % 8);
+
+  if (xinfo->lcr_embedded_layer_info_present_flag) {
+    struct EmbeddedLayerInfo *ml = &xinfo->mlayer_params;
+    avm_wb_write_literal(wb, ml->lcr_mlayer_map, MAX_NUM_MLAYERS);
+    for (int m = 0; m < MAX_NUM_MLAYERS; m++) {
+      if (ml->lcr_mlayer_map & (1 << m)) {
+        avm_wb_write_literal(wb, ml->lcr_tlayer_map[m], MAX_NUM_TLAYERS);
+        if (atlas_id_present) {
+          avm_wb_write_literal(wb, ml->lcr_layer_atlas_segment_id[m], 8);
+          avm_wb_write_literal(wb, ml->lcr_priority_order[m], 8);
+          avm_wb_write_literal(wb, ml->lcr_rendering_method[m], 8);
+        }
+        avm_wb_write_literal(wb, ml->lcr_layer_type[m], 8);
+        if (ml->lcr_layer_type[m] == AUX_LAYER) {
+          avm_wb_write_literal(wb, ml->lcr_auxiliary_type[m], 8);
+        }
+        avm_wb_write_literal(wb, ml->lcr_view_type[m], 8);
+        if (ml->lcr_view_type[m] == VIEW_EXPLICIT) {
+          avm_wb_write_literal(wb, ml->lcr_view_id[m], 8);
+        }
+        if (m > 0) {
+          avm_wb_write_literal(wb, ml->lcr_dependent_layer_map[m], m);
+        }
+        avm_wb_write_bit(wb, ml->lcr_same_sh_max_resolution_flag[m]);
+        if (!ml->lcr_same_sh_max_resolution_flag[m]) {
+          avm_wb_write_uvlc(wb, ml->lcr_max_expected_width[m]);
+          avm_wb_write_uvlc(wb, ml->lcr_max_expected_height[m]);
+        }
+        // Byte alignment per mlayer
+        int remaining = wb->bit_offset % 8;
+        if (remaining != 0) avm_wb_write_literal(wb, 0, 8 - remaining);
+      }
+    }
+  } else {
+    if (atlas_id_present) {
+      avm_wb_write_literal(wb, xinfo->lcr_xlayer_atlas_segment_id, 8);
+      avm_wb_write_literal(wb, xinfo->lcr_xlayer_priority_order, 8);
+      avm_wb_write_literal(wb, xinfo->lcr_xlayer_rendering_method, 8);
+    }
+  }
+}
+
+// Calculate lcr_data_size for a single xlayer payload
+static uint32_t tu_asm_calculate_lcr_data_size(
+    GlobalLayerConfigurationRecord *glcr, int i) {
+  uint8_t temp[2048];
+  struct avm_write_bit_buffer wb = { temp, 0 };
+  int n = glcr->LcrXLayerID[i];
+
+  if (glcr->lcr_dependent_xlayers_flag && n > 0)
+    avm_wb_write_unsigned_literal(&wb, glcr->lcr_num_dependent_xlayer_map[i],
+                                  n);
+
+  tu_asm_write_lcr_xlayer_info(&glcr->xlayer_info[i],
+                               glcr->lcr_global_atlas_id_present_flag, &wb);
+
+  return (wb.bit_offset + 7) / 8;
+}
+
+int tu_assembler_write_global_lcr(TUAssembler *ta) {
+  // Spec-compliant Global LCR OBU serialization matching
+  // write_lcr_global_info() in bitstream_lcr.c.
+  uint8_t lcr_buf[4096];
+  struct avm_write_bit_buffer wb = { lcr_buf, 0 };
+  GlobalLayerConfigurationRecord *glcr = &ta->global_lcr;
+
+  // OBU header with extension (xlayer_id = GLOBAL_XLAYER_ID)
+  avm_wb_write_bit(&wb, 1);  // extension flag
+  avm_wb_write_literal(&wb, OBU_LAYER_CONFIGURATION_RECORD, 5);
+  avm_wb_write_literal(&wb, 0, 2);                 // tlayer
+  avm_wb_write_literal(&wb, 0, 3);                 // mlayer
+  avm_wb_write_literal(&wb, GLOBAL_XLAYER_ID, 5);  // xlayer
+
+  // Global LCR payload — matches write_lcr_global_info() exactly
+  avm_wb_write_literal(&wb, glcr->lcr_global_config_record_id, 3);
+  avm_wb_write_literal(&wb, glcr->lcr_xlayer_map, 31);
+  avm_wb_write_bit(&wb, glcr->lcr_aggregate_info_present_flag);
+  avm_wb_write_bit(&wb, glcr->lcr_seq_profile_tier_level_info_present_flag);
+  avm_wb_write_bit(&wb, glcr->lcr_global_payload_present_flag);
+  avm_wb_write_bit(&wb, glcr->lcr_dependent_xlayers_flag);
+  avm_wb_write_bit(&wb, glcr->lcr_global_atlas_id_present_flag);
+  avm_wb_write_literal(&wb, glcr->lcr_global_purpose_id, 7);
+  avm_wb_write_bit(&wb, glcr->lcr_doh_constraint_flag);
+  avm_wb_write_bit(&wb, glcr->lcr_enforce_tile_alignment_flag);
+  if (glcr->lcr_global_atlas_id_present_flag)
+    avm_wb_write_literal(&wb, glcr->lcr_global_atlas_id, 3);
+  else
+    avm_wb_write_literal(&wb, 0, 3);  // reserved
+  avm_wb_write_literal(&wb, 0, 5);    // reserved
+
+  if (glcr->lcr_aggregate_info_present_flag) {
+    avm_wb_write_literal(&wb, glcr->aggregate_ptl.lcr_config_idc, 6);
+    avm_wb_write_literal(&wb, glcr->aggregate_ptl.lcr_aggregate_level_idx, 5);
+    avm_wb_write_bit(&wb, glcr->aggregate_ptl.lcr_max_tier_flag);
+    avm_wb_write_literal(&wb, glcr->aggregate_ptl.lcr_max_interop, 4);
+  }
+
+  if (glcr->lcr_seq_profile_tier_level_info_present_flag) {
+    for (int i = 0; i < glcr->LcrMaxNumXLayerCount; i++) {
+      avm_wb_write_literal(&wb, glcr->seq_ptl[i].lcr_seq_profile_idc, 5);
+      avm_wb_write_literal(&wb, glcr->seq_ptl[i].lcr_max_level_idx, 5);
+      avm_wb_write_bit(&wb, glcr->seq_ptl[i].lcr_tier_flag);
+      avm_wb_write_literal(&wb, glcr->seq_ptl[i].lcr_max_mlayer_count, 3);
+      avm_wb_write_literal(&wb, glcr->seq_ptl[i].lcr_reserved_2bits, 2);
+    }
+  }
+
+  if (glcr->lcr_global_payload_present_flag) {
+    // Pre-calculate data sizes
+    for (int i = 0; i < glcr->LcrMaxNumXLayerCount; i++) {
+      glcr->lcr_data_size[i] = tu_asm_calculate_lcr_data_size(glcr, i);
+    }
+    for (int i = 0; i < glcr->LcrMaxNumXLayerCount; i++) {
+      avm_wb_write_uleb(&wb, glcr->lcr_data_size[i]);
+      // Write payload
+      const uint32_t start_position = wb.bit_offset;
+      int n = glcr->LcrXLayerID[i];
+      if (glcr->lcr_dependent_xlayers_flag && n > 0)
+        avm_wb_write_unsigned_literal(&wb,
+                                      glcr->lcr_num_dependent_xlayer_map[i], n);
+      tu_asm_write_lcr_xlayer_info(&glcr->xlayer_info[i],
+                                   glcr->lcr_global_atlas_id_present_flag, &wb);
+      // Pad remaining bits to match lcr_data_size
+      const uint32_t parsed_bits = wb.bit_offset - start_position;
+      const int remaining =
+          (int)(glcr->lcr_data_size[i] * 8) - (int)parsed_bits;
+      for (int j = 0; j < remaining; j++) avm_wb_write_bit(&wb, 0);
+    }
+  }
+
+  // Extension flag + trailing bits
+  avm_wb_write_bit(&wb, 0);  // lcr_extension_present_flag
+  if (avm_wb_is_byte_aligned(&wb))
+    avm_wb_write_literal(&wb, 0x80, 8);
+  else
+    avm_wb_write_bit(&wb, 1);
+
+  uint32_t obu_payload_size = avm_wb_bytes_written(&wb);
+
+  // Write: [uleb128 total size][obu data]
+  if (append_uleb128(ta, (uint64_t)obu_payload_size) != 0) return -1;
+  return append_bytes(ta, lcr_buf, obu_payload_size);
+}
+
+int tu_assembler_write_local_lcr(TUAssembler *ta, int xlayer_idx) {
+  // Spec-compliant Local LCR OBU serialization matching
+  // write_lcr_local_info() in bitstream_lcr.c.
+  GlobalLayerConfigurationRecord *glcr = &ta->global_lcr;
+
+  if (xlayer_idx < 0 || xlayer_idx >= glcr->LcrMaxNumXLayerCount) return -1;
+
+  int xlayer_id = glcr->LcrXLayerID[xlayer_idx];
+  uint8_t lcr_buf[4096];
+  struct avm_write_bit_buffer wb = { lcr_buf, 0 };
+
+  // OBU header with extension (xlayer_id = per-xlayer, NOT global)
+  avm_wb_write_bit(&wb, 1);  // extension flag
+  avm_wb_write_literal(&wb, OBU_LAYER_CONFIGURATION_RECORD, 5);
+  avm_wb_write_literal(&wb, 0, 2);                 // tlayer
+  avm_wb_write_literal(&wb, 0, 3);                 // mlayer
+  avm_wb_write_literal(&wb, xlayer_id & 0x1F, 5);  // xlayer
+
+  // Local LCR payload — matches write_lcr_local_info()
+  avm_wb_write_literal(&wb, glcr->lcr_global_config_record_id,
+                       3);          // lcr_global_id
+  avm_wb_write_literal(&wb, 1, 3);  // lcr_local_id (matches encoder.c:938)
+  avm_wb_write_bit(&wb, 1);         // lcr_profile_tier_level_info_present_flag
+  avm_wb_write_bit(&wb, 0);         // lcr_local_atlas_id_present_flag
+
+  // PTL — reuse same data as Global LCR seq_ptl for this xlayer
+  avm_wb_write_literal(&wb, glcr->seq_ptl[xlayer_idx].lcr_seq_profile_idc, 5);
+  avm_wb_write_literal(&wb, glcr->seq_ptl[xlayer_idx].lcr_max_level_idx, 5);
+  avm_wb_write_bit(&wb, glcr->seq_ptl[xlayer_idx].lcr_tier_flag);
+  avm_wb_write_literal(&wb, glcr->seq_ptl[xlayer_idx].lcr_max_mlayer_count, 3);
+  avm_wb_write_literal(&wb, 0, 2);  // lcr_reserved_2bits
+
+  // Reserved bits (atlas_id not present)
+  avm_wb_write_literal(&wb, 0, 3);  // lcr_reserved_zero_3bits
+  avm_wb_write_literal(&wb, 0, 5);  // lcr_reserved_zero_5bits
+
+  // xlayer_info — identical data to Global LCR to pass decoder validation
+  tu_asm_write_lcr_xlayer_info(&glcr->xlayer_info[xlayer_idx], 0, &wb);
+
+  // Extension flag + trailing bits
+  avm_wb_write_bit(&wb, 0);  // lcr_extension_present_flag
+  if (avm_wb_is_byte_aligned(&wb))
+    avm_wb_write_literal(&wb, 0x80, 8);
+  else
+    avm_wb_write_bit(&wb, 1);
+
+  uint32_t obu_payload_size = avm_wb_bytes_written(&wb);
+
+  if (append_uleb128(ta, (uint64_t)obu_payload_size) != 0) return -1;
+  return append_bytes(ta, lcr_buf, obu_payload_size);
+}
+
+int tu_assembler_write_msdo(TUAssembler *ta) {
+  if (!ta->msdo_enabled) return 0;
+
+  // Write MSDO OBU — ported from stream_multiplexer.cc
+  uint8_t msdo_buf[128];
+  struct avm_write_bit_buffer wb = { msdo_buf, 0 };
+
+  // OBU header with extension (xlayer_id = GLOBAL_XLAYER_ID)
+  avm_wb_write_bit(&wb, 1);  // extension flag
+  avm_wb_write_literal(&wb, OBU_MULTI_STREAM_DECODER_OPERATION, 5);
+  avm_wb_write_literal(&wb, 0, 2);                 // tlayer
+  avm_wb_write_literal(&wb, 0, 3);                 // mlayer
+  avm_wb_write_literal(&wb, GLOBAL_XLAYER_ID, 5);  // xlayer
+
+  // MSDO payload
+  avm_wb_write_literal(&wb, ta->num_xlayers - 2, 3);  // num_streams - 2
+  avm_wb_write_literal(&wb, MAIN_420_10_IP1, PROFILE_BITS);
+  avm_wb_write_literal(&wb, SEQ_LEVEL_4_0, LEVEL_BITS);
+  avm_wb_write_bit(&wb, 0);  // tier
+
+  // Even allocation flag
+  avm_wb_write_bit(&wb, 1);  // multistream_even_allocation_flag
+
+  // Per-stream info
+  for (int i = 0; i < ta->num_xlayers; i++) {
+    avm_wb_write_literal(&wb, ta->xlayer_ids[i], XLAYER_BITS);
+    avm_wb_write_literal(&wb, 0, PROFILE_BITS);
+    avm_wb_write_literal(&wb, SEQ_LEVEL_4_0, LEVEL_BITS);
+    avm_wb_write_bit(&wb, 0);  // tier
+  }
+
+  // doh_constraint_flag
+  avm_wb_write_bit(&wb, ta->config->lcr_doh_constraint_flag);
+
+  // Trailing bit
+  if ((wb.bit_offset % 8) == 0) {
+    avm_wb_write_literal(&wb, 0x80, 8);
+  } else {
+    avm_wb_write_bit(&wb, 1);
+    while ((wb.bit_offset % 8) != 0) avm_wb_write_bit(&wb, 0);
+  }
+
+  uint32_t obu_size = avm_wb_bytes_written(&wb);
+
+  if (append_uleb128(ta, (uint64_t)obu_size) != 0) return -1;
+  return append_bytes(ta, msdo_buf, obu_size);
+}
+
+// Compute ops_data_size for a single operating point.
+// Mirrors calculate_ops_data_size() in bitstream_ops.c.
+static uint32_t tu_asm_calculate_ops_data_size(const OperatingPointSet *ops,
+                                               int obu_xlayer_id,
+                                               int op_index) {
+  uint8_t temp_buffer[1024];
+  struct avm_write_bit_buffer temp_wb = { temp_buffer, 0 };
+  const OperatingPoint *op = &ops->op[op_index];
+
+  if (ops->ops_intent_present_flag)
+    avm_wb_write_literal(&temp_wb, op->ops_intent_op, 7);
+
+  if (ops->ops_ptl_present_flag) {
+    if (obu_xlayer_id == GLOBAL_XLAYER_ID) {
+      avm_wb_write_literal(&temp_wb, op->ops_config_idc, MULTI_SEQ_CONFIG_BITS);
+      avm_wb_write_literal(&temp_wb, op->ops_aggregate_level_idx, LEVEL_BITS);
+      avm_wb_write_bit(&temp_wb, op->ops_max_tier_flag);
+      avm_wb_write_literal(&temp_wb, op->ops_max_interop, INTEROP_BITS);
+    } else {
+      avm_wb_write_literal(&temp_wb, op->ops_seq_profile_idc[obu_xlayer_id],
+                           PROFILE_BITS);
+      avm_wb_write_literal(&temp_wb, op->ops_level_idx[obu_xlayer_id],
+                           LEVEL_BITS);
+      avm_wb_write_bit(&temp_wb, op->ops_tier_flag[obu_xlayer_id]);
+      avm_wb_write_literal(&temp_wb, op->ops_mlayer_count[obu_xlayer_id], 3);
+      avm_wb_write_literal(&temp_wb, 0, 2);
+    }
+  }
+
+  if (ops->ops_color_info_present_flag) {
+    // Simplified: write ops_color_description_idc=1 (unspecified, no payload)
+    avm_wb_write_rice_golomb(&temp_wb, op->color_info.ops_color_description_idc,
+                             2);
+    if (op->color_info.ops_color_description_idc == 0) {
+      avm_wb_write_literal(&temp_wb, op->color_info.ops_color_primaries, 8);
+      avm_wb_write_literal(&temp_wb,
+                           op->color_info.ops_transfer_characteristics, 8);
+      avm_wb_write_literal(&temp_wb, op->color_info.ops_matrix_coefficients, 8);
+    }
+    avm_wb_write_bit(&temp_wb, op->color_info.ops_full_range_flag);
+  }
+
+  avm_wb_write_bit(&temp_wb,
+                   op->ops_decoder_model_info_for_this_op_present_flag);
+
+  int ops_initial_display_delay_present_flag =
+      op->ops_initial_display_delay != BUFFER_POOL_MAX_SIZE;
+  avm_wb_write_bit(&temp_wb, ops_initial_display_delay_present_flag);
+  if (ops_initial_display_delay_present_flag) {
+    avm_wb_write_literal(&temp_wb, op->ops_initial_display_delay - 1, 4);
+  }
+
+  if (obu_xlayer_id == GLOBAL_XLAYER_ID) {
+    avm_wb_write_literal(&temp_wb, op->ops_xlayer_map, MAX_NUM_XLAYERS - 1);
+    for (int j = 0; j < MAX_NUM_XLAYERS - 1; j++) {
+      if (op->ops_xlayer_map & (1 << j)) {
+        if (ops->ops_ptl_present_flag) {
+          avm_wb_write_literal(&temp_wb, op->ops_seq_profile_idc[j],
+                               PROFILE_BITS);
+          avm_wb_write_literal(&temp_wb, op->ops_level_idx[j], LEVEL_BITS);
+          avm_wb_write_bit(&temp_wb, op->ops_tier_flag[j]);
+          avm_wb_write_literal(&temp_wb, op->ops_mlayer_count[j], 3);
+          avm_wb_write_literal(&temp_wb, 0, 2);
+        }
+        if (ops->ops_mlayer_info_idc == 1) {
+          avm_wb_write_literal(&temp_wb, op->mlayer_info.ops_mlayer_map[j],
+                               MAX_NUM_MLAYERS);
+          for (int m = 0; m < 8; m++) {
+            if (op->mlayer_info.ops_mlayer_map[j] & (1 << m)) {
+              avm_wb_write_literal(&temp_wb,
+                                   op->mlayer_info.ops_tlayer_map[j][m],
+                                   MAX_NUM_TLAYERS);
+            }
+          }
+        } else if (ops->ops_mlayer_info_idc == 2) {
+          avm_wb_write_bit(&temp_wb, op->ops_mlayer_explicit_info_flag[j]);
+          if (op->ops_mlayer_explicit_info_flag[j]) {
+            avm_wb_write_literal(&temp_wb, op->mlayer_info.ops_mlayer_map[j],
+                                 MAX_NUM_MLAYERS);
+            for (int m = 0; m < 8; m++) {
+              if (op->mlayer_info.ops_mlayer_map[j] & (1 << m)) {
+                avm_wb_write_literal(&temp_wb,
+                                     op->mlayer_info.ops_tlayer_map[j][m],
+                                     MAX_NUM_TLAYERS);
+              }
+            }
+          } else {
+            avm_wb_write_literal(&temp_wb, op->ops_embedded_ops_id[j], 4);
+            avm_wb_write_literal(&temp_wb, op->ops_embedded_op_index[j], 3);
+          }
+        }
+      }
+    }
+  } else {
+    avm_wb_write_literal(&temp_wb,
+                         op->mlayer_info.ops_mlayer_map[obu_xlayer_id],
+                         MAX_NUM_MLAYERS);
+    for (int m = 0; m < 8; m++) {
+      if (op->mlayer_info.ops_mlayer_map[obu_xlayer_id] & (1 << m)) {
+        avm_wb_write_literal(&temp_wb,
+                             op->mlayer_info.ops_tlayer_map[obu_xlayer_id][m],
+                             MAX_NUM_TLAYERS);
+      }
+    }
+  }
+
+  // Byte alignment
+  avm_wb_write_literal(&temp_wb, 0, (8 - temp_wb.bit_offset % 8) % 8);
+  return (temp_wb.bit_offset + 7) / 8;
+}
+
+int tu_assembler_write_ops(TUAssembler *ta, int xlayer_id) {
+  // Spec-compliant OPS OBU serialization matching
+  // av2_write_operating_point_set_obu() in bitstream_ops.c.
+
+  for (int s = 0; s < ta->num_ops_sets; s++) {
+    const OperatingPointSet *ops = &ta->ops_list[s];
+    if (!ops->valid) continue;
+
+    int obu_xlayer_id = (xlayer_id >= 0) ? xlayer_id : ops->obu_xlayer_id;
+
+    uint8_t ops_buf[2048];
+    struct avm_write_bit_buffer wb = { ops_buf, 0 };
+
+    // OBU header with extension
+    avm_wb_write_bit(&wb, 1);  // extension flag
+    avm_wb_write_literal(&wb, OBU_OPERATING_POINT_SET, 5);
+    avm_wb_write_literal(&wb, 0, 2);                     // tlayer
+    avm_wb_write_literal(&wb, 0, 3);                     // mlayer
+    avm_wb_write_literal(&wb, obu_xlayer_id & 0x1F, 5);  // xlayer
+
+    // OPS payload — mirrors av2_write_operating_point_set_obu()
+    avm_wb_write_bit(&wb, ops->ops_reset_flag);
+    avm_wb_write_literal(&wb, ops->ops_id, OPS_ID_BITS);
+    avm_wb_write_literal(&wb, ops->ops_cnt, OPS_COUNT_BITS);
+
+    if (ops->ops_cnt > 0) {
+      avm_wb_write_literal(&wb, ops->ops_priority, 4);
+      avm_wb_write_literal(&wb, ops->ops_intent, 7);
+      avm_wb_write_bit(&wb, ops->ops_intent_present_flag);
+      avm_wb_write_bit(&wb, ops->ops_ptl_present_flag);
+      avm_wb_write_bit(&wb, ops->ops_color_info_present_flag);
+      if (obu_xlayer_id == GLOBAL_XLAYER_ID) {
+        avm_wb_write_literal(&wb, ops->ops_mlayer_info_idc, 2);
+      } else {
+        avm_wb_write_literal(&wb, 0, 2);
+      }
+    }
+
+    for (int p = 0; p < ops->ops_cnt; p++) {
+      OperatingPoint *op = (OperatingPoint *)&ops->op[p];
+
+      // Calculate and write ops_data_size
+      uint32_t data_size =
+          tu_asm_calculate_ops_data_size(ops, obu_xlayer_id, p);
+      avm_wb_write_uleb(&wb, data_size);
+
+      if (ops->ops_intent_present_flag)
+        avm_wb_write_literal(&wb, op->ops_intent_op, 7);
+
+      if (ops->ops_ptl_present_flag) {
+        if (obu_xlayer_id == GLOBAL_XLAYER_ID) {
+          avm_wb_write_literal(&wb, op->ops_config_idc, MULTI_SEQ_CONFIG_BITS);
+          avm_wb_write_literal(&wb, op->ops_aggregate_level_idx, LEVEL_BITS);
+          avm_wb_write_bit(&wb, op->ops_max_tier_flag);
+          avm_wb_write_literal(&wb, op->ops_max_interop, INTEROP_BITS);
+        } else {
+          avm_wb_write_literal(&wb, op->ops_seq_profile_idc[obu_xlayer_id],
+                               PROFILE_BITS);
+          avm_wb_write_literal(&wb, op->ops_level_idx[obu_xlayer_id],
+                               LEVEL_BITS);
+          avm_wb_write_bit(&wb, op->ops_tier_flag[obu_xlayer_id]);
+          avm_wb_write_literal(&wb, op->ops_mlayer_count[obu_xlayer_id], 3);
+          avm_wb_write_literal(&wb, 0, 2);
+        }
+      }
+
+      if (ops->ops_color_info_present_flag) {
+        avm_wb_write_rice_golomb(&wb, op->color_info.ops_color_description_idc,
+                                 2);
+        if (op->color_info.ops_color_description_idc == 0) {
+          avm_wb_write_literal(&wb, op->color_info.ops_color_primaries, 8);
+          avm_wb_write_literal(&wb, op->color_info.ops_transfer_characteristics,
+                               8);
+          avm_wb_write_literal(&wb, op->color_info.ops_matrix_coefficients, 8);
+        }
+        avm_wb_write_bit(&wb, op->color_info.ops_full_range_flag);
+      }
+
+      avm_wb_write_bit(&wb,
+                       op->ops_decoder_model_info_for_this_op_present_flag);
+
+      int ops_initial_display_delay_present_flag =
+          op->ops_initial_display_delay != BUFFER_POOL_MAX_SIZE;
+      avm_wb_write_bit(&wb, ops_initial_display_delay_present_flag);
+      if (ops_initial_display_delay_present_flag) {
+        avm_wb_write_literal(&wb, op->ops_initial_display_delay - 1, 4);
+      }
+
+      if (obu_xlayer_id == GLOBAL_XLAYER_ID) {
+        avm_wb_write_literal(&wb, op->ops_xlayer_map, MAX_NUM_XLAYERS - 1);
+        for (int j = 0; j < MAX_NUM_XLAYERS - 1; j++) {
+          if (op->ops_xlayer_map & (1 << j)) {
+            if (ops->ops_ptl_present_flag) {
+              avm_wb_write_literal(&wb, op->ops_seq_profile_idc[j],
+                                   PROFILE_BITS);
+              avm_wb_write_literal(&wb, op->ops_level_idx[j], LEVEL_BITS);
+              avm_wb_write_bit(&wb, op->ops_tier_flag[j]);
+              avm_wb_write_literal(&wb, op->ops_mlayer_count[j], 3);
+              avm_wb_write_literal(&wb, 0, 2);
+            }
+            if (ops->ops_mlayer_info_idc == 1) {
+              avm_wb_write_literal(&wb, op->mlayer_info.ops_mlayer_map[j],
+                                   MAX_NUM_MLAYERS);
+              for (int m = 0; m < 8; m++) {
+                if (op->mlayer_info.ops_mlayer_map[j] & (1 << m)) {
+                  avm_wb_write_literal(&wb,
+                                       op->mlayer_info.ops_tlayer_map[j][m],
+                                       MAX_NUM_TLAYERS);
+                }
+              }
+            } else if (ops->ops_mlayer_info_idc == 2) {
+              avm_wb_write_bit(&wb, op->ops_mlayer_explicit_info_flag[j]);
+              if (op->ops_mlayer_explicit_info_flag[j]) {
+                avm_wb_write_literal(&wb, op->mlayer_info.ops_mlayer_map[j],
+                                     MAX_NUM_MLAYERS);
+                for (int m = 0; m < 8; m++) {
+                  if (op->mlayer_info.ops_mlayer_map[j] & (1 << m)) {
+                    avm_wb_write_literal(&wb,
+                                         op->mlayer_info.ops_tlayer_map[j][m],
+                                         MAX_NUM_TLAYERS);
+                  }
+                }
+              } else {
+                avm_wb_write_literal(&wb, op->ops_embedded_ops_id[j], 4);
+                avm_wb_write_literal(&wb, op->ops_embedded_op_index[j], 3);
+              }
+            }
+          }
+        }
+      } else {
+        avm_wb_write_literal(&wb, op->mlayer_info.ops_mlayer_map[obu_xlayer_id],
+                             MAX_NUM_MLAYERS);
+        for (int m = 0; m < 8; m++) {
+          if (op->mlayer_info.ops_mlayer_map[obu_xlayer_id] & (1 << m)) {
+            avm_wb_write_literal(
+                &wb, op->mlayer_info.ops_tlayer_map[obu_xlayer_id][m],
+                MAX_NUM_TLAYERS);
+          }
+        }
+      }
+
+      // Byte alignment at end of each operating point
+      avm_wb_write_literal(&wb, 0, (8 - wb.bit_offset % 8) % 8);
+    }
+
+    // Extension flag
+    avm_wb_write_bit(&wb, 0);
+
+    // Trailing bits
+    if (avm_wb_is_byte_aligned(&wb)) {
+      avm_wb_write_literal(&wb, 0x80, 8);
+    } else {
+      avm_wb_write_bit(&wb, 1);
+    }
+
+    uint32_t obu_size = avm_wb_bytes_written(&wb);
+    if (append_uleb128(ta, (uint64_t)obu_size) != 0) return -1;
+    if (append_bytes(ta, ops_buf, obu_size) != 0) return -1;
+  }
+
+  return 0;
+}
+
+int tu_assembler_write_atlas(TUAssembler *ta) {
+  if (!ta->config->enable_atlas) return 0;
+
+  AtlasSegmentInfo *atlas = &ta->atlas_info;
+  if (!atlas->valid) return 0;
+
+  uint8_t atlas_buf[4096];
+  struct avm_write_bit_buffer wb = { atlas_buf, 0 };
+
+  // OBU header with extension (xlayer_id = GLOBAL_XLAYER_ID)
+  avm_wb_write_bit(&wb, 1);  // extension flag
+  avm_wb_write_literal(&wb, OBU_ATLAS_SEGMENT, 5);
+  avm_wb_write_literal(&wb, 0, 2);                 // tlayer
+  avm_wb_write_literal(&wb, 0, 3);                 // mlayer
+  avm_wb_write_literal(&wb, GLOBAL_XLAYER_ID, 5);  // xlayer
+
+  // Atlas payload — mirrors av2_write_atlas_segment_info_obu()
+  avm_wb_write_literal(&wb, atlas->atlas_segment_id, 3);
+  avm_wb_write_uvlc(&wb, atlas->atlas_segment_mode_idc);
+
+  int num_segments = 0;
+  if (atlas->atlas_segment_mode_idc == ENHANCED_ATLAS) {
+    // Write region info
+    struct AtlasRegionInfo *reg = &atlas->ats_reg_params;
+    avm_wb_write_uvlc(&wb, reg->ats_num_region_columns_minus_1);
+    avm_wb_write_uvlc(&wb, reg->ats_num_region_rows_minus_1);
+    avm_wb_write_bit(&wb, reg->ats_uniform_spacing_flag);
+    if (!reg->ats_uniform_spacing_flag) {
+      for (int i = 0; i <= reg->ats_num_region_columns_minus_1; i++)
+        avm_wb_write_uvlc(&wb, reg->ats_column_width_minus_1[i]);
+      for (int i = 0; i <= reg->ats_num_region_rows_minus_1; i++)
+        avm_wb_write_uvlc(&wb, reg->ats_row_height_minus_1[i]);
+    } else {
+      avm_wb_write_uvlc(&wb, reg->ats_region_width_minus_1);
+      avm_wb_write_uvlc(&wb, reg->ats_region_height_minus_1);
+    }
+
+    // Write region to segment mapping
+    struct AtlasRegionToSegmentMapping *map = &atlas->ats_reg_seg_map;
+    avm_wb_write_bit(&wb, map->ats_single_region_per_atlas_segment_flag);
+    if (!map->ats_single_region_per_atlas_segment_flag) {
+      avm_wb_write_uvlc(&wb, map->ats_num_atlas_segments_minus_1);
+      int ns = map->ats_num_atlas_segments_minus_1 + 1;
+      for (int i = 0; i < ns; i++) {
+        avm_wb_write_uvlc(&wb, map->ats_top_left_region_column[i]);
+        avm_wb_write_uvlc(&wb, map->ats_top_left_region_row[i]);
+        avm_wb_write_uvlc(&wb, map->ats_bottom_right_region_column_offset[i]);
+        avm_wb_write_uvlc(&wb, map->ats_bottom_right_region_row_offset[i]);
+      }
+      num_segments = ns;
+    } else {
+      num_segments = reg->NumRegionsInAtlas;
+      map->ats_num_atlas_segments_minus_1 = num_segments - 1;
+    }
+  } else if (atlas->atlas_segment_mode_idc == MULTISTREAM_ATLAS) {
+    // Write basic info for multistream
+    struct AtlasBasicInfo *basic = &atlas->ats_basic_info_s;
+    avm_wb_write_bit(&wb, basic->ats_stream_id_present);
+    avm_wb_write_uvlc(&wb, basic->ats_atlas_width);
+    avm_wb_write_uvlc(&wb, basic->ats_atlas_height);
+    avm_wb_write_uvlc(&wb, basic->ats_num_atlas_segments_minus_1);
+
+    int ns = basic->ats_num_atlas_segments_minus_1 + 1;
+    for (int i = 0; i < ns; i++) {
+      if (basic->ats_stream_id_present)
+        avm_wb_write_literal(&wb, basic->ats_input_stream_id[i], 5);
+      avm_wb_write_uvlc(&wb, basic->ats_segment_top_left_pos_x[i]);
+      avm_wb_write_uvlc(&wb, basic->ats_segment_top_left_pos_y[i]);
+      avm_wb_write_uvlc(&wb, basic->ats_segment_width[i]);
+      avm_wb_write_uvlc(&wb, basic->ats_segment_height[i]);
+    }
+    num_segments = ns;
+  }
+
+  // Label segment info
+  avm_wb_write_bit(&wb,
+                   atlas->ats_label_seg.ats_signalled_atlas_segment_ids_flag);
+  if (atlas->ats_label_seg.ats_signalled_atlas_segment_ids_flag) {
+    for (int i = 0; i < num_segments; i++) {
+      avm_wb_write_literal(&wb, atlas->ats_label_seg.ats_atlas_segment_id[i],
+                           ATLAS_LABEL_SEG_ID_BITS);
+    }
+  }
+
+  // Extension + trailing bits
+  avm_wb_write_bit(&wb, 0);  // ats_extension_present_flag
+  if (avm_wb_is_byte_aligned(&wb))
+    avm_wb_write_literal(&wb, 0x80, 8);
+  else
+    avm_wb_write_bit(&wb, 1);
+
+  uint32_t obu_size = avm_wb_bytes_written(&wb);
+  if (append_uleb128(ta, (uint64_t)obu_size) != 0) return -1;
+  return append_bytes(ta, atlas_buf, obu_size);
+}
+
+int tu_assembler_append_xlayer_obus(TUAssembler *ta, int xlayer_id,
+                                    const uint8_t *data, size_t size) {
+  // Parse OBUs from per-xlayer encoder output and rewrite headers
+  // with the specified xlayer_id. Skip TDs and structural OBUs
+  // (the assembler writes those globally).
+  size_t consumed = 0;
+
+  while (consumed < size) {
+    size_t remaining = size - consumed;
+    size_t length_field_size = 0;
+    uint64_t obu_total_size = 0;
+
+    // Read OBU total size (ULEB128)
+    if (avm_uleb_decode(data + consumed, remaining, &obu_total_size,
+                        &length_field_size) != 0) {
+      fprintf(stderr, "OBU size parsing failed at offset %zu\n", consumed);
+      return -1;
+    }
+
+    if (obu_total_size == 0 ||
+        consumed + length_field_size + obu_total_size > size) {
+      break;
+    }
+
+    // Parse OBU header
+    const uint8_t *obu_start = data + consumed + length_field_size;
+    ObuHeader hdr;
+    memset(&hdr, 0, sizeof(hdr));
+    parse_obu_header_byte(obu_start[0], &hdr);
+
+    int obu_header_size = 1;
+    if (hdr.obu_header_extension_flag) {
+      parse_obu_ext_byte(obu_start[1], &hdr);
+      obu_header_size = 2;
+    }
+
+    consumed += length_field_size + (size_t)obu_total_size;
+
+    // Skip TD OBUs — the assembler writes a single global TD
+    if (hdr.type == OBU_TEMPORAL_DELIMITER) continue;
+
+    // Skip structural OBUs — the assembler generates global versions
+    if (hdr.type == OBU_MULTI_STREAM_DECODER_OPERATION) continue;
+    if (hdr.type == OBU_LAYER_CONFIGURATION_RECORD) continue;
+    if (hdr.type == OBU_OPERATING_POINT_SET) continue;
+    if (hdr.type == OBU_ATLAS_SEGMENT) continue;
+
+    // Rewrite OBU header with xlayer_id and recalculate size
+    // New header is always 2 bytes (extension flag set)
+    uint8_t new_header[2];
+    write_obu_header_with_xlayer(new_header, &hdr, xlayer_id);
+
+    // Payload is everything after the original header
+    const uint8_t *payload = obu_start + obu_header_size;
+    size_t payload_size = (size_t)obu_total_size - (size_t)obu_header_size;
+
+    // New OBU total size = 2 (header) + payload_size
+    uint64_t new_obu_total_size = 2 + payload_size;
+
+    // Write: [uleb128 new total size][2-byte header][payload]
+    if (append_uleb128(ta, new_obu_total_size) != 0) return -1;
+    if (append_bytes(ta, new_header, 2) != 0) return -1;
+    if (payload_size > 0) {
+      if (append_bytes(ta, payload, payload_size) != 0) return -1;
+    }
+  }
+
+  return 0;
+}
+
+int tu_assembler_flush(TUAssembler *ta, FILE *outfile) {
+  if (ta->size == 0) return 0;
+  size_t written = fwrite(ta->buffer, 1, ta->size, outfile);
+  if (written != ta->size) {
+    fprintf(stderr, "Error: failed to write TU (%zu of %zu bytes)\n", written,
+            ta->size);
+    return -1;
+  }
+  ta->size = 0;
+  return 0;
+}
+
+// Write structural OBUs (LCR, OPS, Atlas) into the assembler buffer.
+// Called at the start of a TU when first_output or a keyframe is present.
+// OBU order per spec: Global config (MSDO, Global LCR, Global OPS, Global
+// Atlas) then per-xlayer data with Local LCR preceding each xlayer's OBUs.
+// Local LCRs are NOT emitted here; they are emitted per-xlayer in the caller.
+void tu_assembler_write_structural_obus(TUAssembler *ta,
+                                        const MultiXLayerConfig *mcfg,
+                                        int *first_output, int has_keyframe) {
+  if (*first_output || has_keyframe) {
+    *first_output = 0;
+    if (mcfg->enable_msdo) tu_assembler_write_msdo(ta);
+    if (mcfg->enable_global_lcr || mcfg->enable_local_lcr)
+      tu_assembler_write_global_lcr(ta);
+    tu_assembler_write_ops(ta, GLOBAL_XLAYER_ID);
+    if (mcfg->enable_atlas) tu_assembler_write_atlas(ta);
+  }
+}
+
+// Rewrite an OBU's header with a new xlayer_id and append it to the assembler.
+// obu_start points to the OBU data, obu_size is the total OBU size (header +
+// payload), and obu_header_size is 1 or 2 bytes.
+static void rewrite_and_append_obu(TUAssembler *ta, const uint8_t *obu_start,
+                                   size_t obu_size, int obu_header_size,
+                                   int xlayer_id) {
+  ObuHeader hdr;
+  memset(&hdr, 0, sizeof(hdr));
+  parse_obu_header_byte(obu_start[0], &hdr);
+  if (hdr.obu_header_extension_flag) parse_obu_ext_byte(obu_start[1], &hdr);
+  uint8_t new_header[2];
+  write_obu_header_with_xlayer(new_header, &hdr, xlayer_id);
+  const uint8_t *payload = obu_start + obu_header_size;
+  size_t payload_size = obu_size - (size_t)obu_header_size;
+  uint64_t new_obu_total_size = 2 + payload_size;
+  append_uleb128(ta, new_obu_total_size);
+  append_bytes(ta, new_header, 2);
+  if (payload_size > 0) append_bytes(ta, payload, payload_size);
+}
+
+int tu_assembler_write_split_tus(TUAssembler *ta, const MultiXLayerConfig *mcfg,
+                                 int xlayer_id, const uint8_t *data,
+                                 size_t size, int *first_output,
+                                 FILE *outfile) {
+  // Preserve the encoder's frame order exactly to maintain DPB consistency.
+  //
+  // In multi_layers_lag_test mode, the encoder codes hidden frames (ARF,
+  // INTNL_ARF) followed by the displayable frame for each mlayer, then
+  // repeats for the next mlayer.  It inserts a TD before each group of
+  // frames that belong to the same temporal unit.  We respect these TDs
+  // as TU boundaries, bundling hidden frames with their displayable frame
+  // into a single TU.
+
+  // Single-pass: parse all OBUs into a stack-allocated array.
+  // A typical encoder packet contains at most a few dozen OBUs per TU
+  // (TD + SH + MFH + QM + FGM + CI + BRT + frame OBUs per mlayer).
+  // 256 entries is generous for any realistic configuration.
+  typedef struct {
+    size_t data_offset;  // start of OBU data (after length field)
+    size_t data_size;    // OBU total size (header + payload)
+    int type;
+    int mlayer_id;
+    int is_td;
+    int is_structural;
+    int is_keyframe;
+    int obu_header_size;  // 1 or 2 bytes
+  } ObuEntry;
+
+  enum { MAX_OBU_ENTRIES = 256 };
+  ObuEntry obus[MAX_OBU_ENTRIES];
+  int num_obus = 0;
+
+  {
+    size_t consumed = 0;
+    while (consumed < size && num_obus < MAX_OBU_ENTRIES) {
+      size_t length_field_size = 0;
+      uint64_t obu_total_size = 0;
+      if (avm_uleb_decode(data + consumed, size - consumed, &obu_total_size,
+                          &length_field_size) != 0)
+        break;
+      if (obu_total_size == 0 ||
+          consumed + length_field_size + obu_total_size > size)
+        break;
+      const uint8_t *obu_start = data + consumed + length_field_size;
+      ObuHeader hdr;
+      memset(&hdr, 0, sizeof(hdr));
+      parse_obu_header_byte(obu_start[0], &hdr);
+      if (hdr.obu_header_extension_flag) parse_obu_ext_byte(obu_start[1], &hdr);
+
+      obus[num_obus].data_offset = consumed + length_field_size;
+      obus[num_obus].data_size = (size_t)obu_total_size;
+      obus[num_obus].type = hdr.type;
+      obus[num_obus].mlayer_id = hdr.obu_mlayer_id;
+      obus[num_obus].is_td = (hdr.type == OBU_TEMPORAL_DELIMITER);
+      obus[num_obus].is_structural =
+          (hdr.type == OBU_MULTI_STREAM_DECODER_OPERATION ||
+           hdr.type == OBU_LAYER_CONFIGURATION_RECORD ||
+           hdr.type == OBU_OPERATING_POINT_SET ||
+           hdr.type == OBU_ATLAS_SEGMENT);
+      obus[num_obus].is_keyframe = (hdr.type == OBU_CLOSED_LOOP_KEY);
+      obus[num_obus].obu_header_size =
+          1 + (hdr.obu_header_extension_flag ? 1 : 0);
+
+      consumed += length_field_size + (size_t)obu_total_size;
+      num_obus++;
+    }
+  }
+
+  if (num_obus == 0) return 0;
+
+  // Respect the encoder's TD placement to form TUs.  The encoder inserts a
+  // TD before each group of hidden + displayable frames that belong to the
+  // same temporal unit.  All frames between two consecutive encoder TDs are
+  // bundled into a single output TU, keeping hidden frames together with
+  // their displayable frame as the spec requires.
+  int tu_count = 0;
+  int tu_started = 0;          // 1 once we've written our TD for the current TU
+  int structural_written = 0;  // 1 once structural OBUs written for current TU
+  int pending_start = -1;      // Start of non-frame OBUs preceding a frame
+
+  for (int i = 0; i < num_obus; i++) {
+    if (obus[i].is_td) {
+      // Encoder TD marks start of a new temporal unit.
+      // Flush the previous TU if one was started.
+      if (tu_started) {
+        tu_assembler_flush(ta, outfile);
+        tu_count++;
+      }
+      // Begin new TU with our own TD.
+      ta->size = 0;
+      tu_assembler_write_td(ta);
+      tu_started = 1;
+      structural_written = 0;
+      pending_start = -1;
+      continue;
+    }
+
+    if (obus[i].is_structural) continue;  // skip encoder structural OBUs
+
+    int is_frame = (obus[i].type != OBU_SEQUENCE_HEADER) &&
+                   (obus[i].type != OBU_MULTI_FRAME_HEADER) &&
+                   (obus[i].type != OBU_BUFFER_REMOVAL_TIMING) &&
+                   (obus[i].type != OBU_QUANTIZATION_MATRIX) &&
+                   (obus[i].type != OBU_FILM_GRAIN_MODEL) &&
+                   (obus[i].type != OBU_CONTENT_INTERPRETATION);
+
+    if (!is_frame) {
+      // Track where non-frame OBUs start
+      if (pending_start < 0) pending_start = i;
+      continue;
+    }
+
+    // Frame OBU — append to the current TU.
+    if (!tu_started) {
+      // Frame without a preceding encoder TD (e.g. ml>0 frames in a
+      // separate call).  Start a new TU.
+      ta->size = 0;
+      tu_assembler_write_td(ta);
+      tu_started = 1;
+      structural_written = 0;
+    }
+
+    // Write structural OBUs once per TU, before the first frame that
+    // needs them.  tu_assembler_write_structural_obus() has its own
+    // first_output / keyframe guard, so calling it for each frame is safe
+    // — it will only emit once.
+    if (!structural_written) {
+      tu_assembler_write_structural_obus(ta, mcfg, first_output,
+                                         obus[i].is_keyframe);
+      structural_written = 1;
+    }
+
+    // Write any non-frame OBUs that preceded this frame (SH, etc.)
+    for (int j = (pending_start >= 0 ? pending_start : i); j < i; j++) {
+      if (obus[j].is_td || obus[j].is_structural) continue;
+      rewrite_and_append_obu(ta, data + obus[j].data_offset, obus[j].data_size,
+                             obus[j].obu_header_size, xlayer_id);
+    }
+    pending_start = -1;
+
+    // Write the frame OBU itself with xlayer_id
+    rewrite_and_append_obu(ta, data + obus[i].data_offset, obus[i].data_size,
+                           obus[i].obu_header_size, xlayer_id);
+  }
+
+  // Flush the last TU if one is in progress.
+  if (tu_started) {
+    tu_assembler_flush(ta, outfile);
+    tu_count++;
+  }
+
+  return tu_count;
+}
+
+int tu_assembler_parse_tu_segments(const uint8_t *data, size_t size,
+                                   TUSegmentInfo *segs, int max_segs) {
+  // Scan OBUs, splitting at TD boundaries.  Each segment starts at a TD
+  // and extends to just before the next TD (or end of data).
+  int nseg = 0;
+  size_t seg_start = 0;
+  int has_kf = 0;
+  int in_segment = 0;
+  size_t consumed = 0;
+
+  while (consumed < size) {
+    size_t length_field_size = 0;
+    uint64_t obu_total_size = 0;
+    if (avm_uleb_decode(data + consumed, size - consumed, &obu_total_size,
+                        &length_field_size) != 0)
+      break;
+    if (obu_total_size == 0 ||
+        consumed + length_field_size + obu_total_size > size)
+      break;
+
+    const uint8_t *obu_start = data + consumed + length_field_size;
+    ObuHeader hdr;
+    memset(&hdr, 0, sizeof(hdr));
+    parse_obu_header_byte(obu_start[0], &hdr);
+
+    size_t obu_end = consumed + length_field_size + (size_t)obu_total_size;
+
+    if (hdr.type == OBU_TEMPORAL_DELIMITER) {
+      // Close previous segment if any
+      if (in_segment && nseg < max_segs) {
+        segs[nseg].offset = seg_start;
+        segs[nseg].size = consumed - seg_start;
+        segs[nseg].has_keyframe = has_kf;
+        nseg++;
+      }
+      // Start new segment at this TD
+      seg_start = consumed;
+      has_kf = 0;
+      in_segment = 1;
+    } else if (hdr.type == OBU_CLOSED_LOOP_KEY) {
+      has_kf = 1;
+    }
+
+    consumed = obu_end;
+  }
+
+  // Close last segment
+  if (in_segment && consumed > seg_start && nseg < max_segs) {
+    segs[nseg].offset = seg_start;
+    segs[nseg].size = consumed - seg_start;
+    segs[nseg].has_keyframe = has_kf;
+    nseg++;
+  }
+
+  return nseg;
+}
+
+void tu_assembler_print_contents(const TUAssembler *ta, int tu_index) {
+  const uint8_t *buf = ta->buffer;
+  size_t buf_size = ta->size;
+  fprintf(stdout, "--- TU %d [%zu bytes] ---\n", tu_index, buf_size);
+  size_t pos = 0;
+  while (pos < buf_size) {
+    ObuHeader hdr;
+    size_t payload_size = 0;
+    size_t bytes_read = 0;
+    if (avm_read_obu_header_and_size(buf + pos, buf_size - pos, &hdr,
+                                     &payload_size,
+                                     &bytes_read) != AVM_CODEC_OK)
+      break;
+    size_t obu_total = bytes_read + payload_size;
+    fprintf(stdout, "  %-36s xl:%2d ml:%d tl:%d %4zu bytes\n",
+            avm_obu_type_to_string(hdr.type), hdr.obu_xlayer_id,
+            hdr.obu_mlayer_id, hdr.obu_tlayer_id, obu_total);
+    pos += obu_total;
+  }
+}
+
+// --- Structural OBU content population ---
+
+// Derive configuration_idc from the highest chroma format among the given
+// xlayer profiles. See Table A.1 in annexA.c:
+//   0 = C_MAIN_420_10 (4:0:0, 4:2:0)
+//   1 = C_MAIN_422_10 (4:0:0, 4:2:0, 4:2:2)
+//   2 = C_MAIN_444_10 (4:0:0, 4:2:0, 4:4:4)
+static int derive_config_idc_from_profiles(const MultiXLayerConfig *mcfg,
+                                           uint32_t xlayer_map) {
+  int config_idc = 0;  // C_MAIN_420_10
+  for (int i = 0; i < mcfg->num_xlayers; i++) {
+    int id = mcfg->xlayers[i].xlayer_id;
+    if (!(xlayer_map & (1u << id))) continue;
+    unsigned int prof = mcfg->xlayers[i].profile;
+    if (prof == MAIN_444_10_IP1) {
+      config_idc = 2;  // C_MAIN_444_10 — highest, can stop
+      break;
+    } else if (prof == MAIN_422_10_IP1 && config_idc < 1) {
+      config_idc = 1;  // C_MAIN_422_10
+    }
+  }
+  return config_idc;
+}
+
+// Derive the aggregate level index for a set of xlayers identified by
+// xlayer_map.  The aggregate level is the smallest level whose constraints
+// accommodate the combined resources of all constituent xlayers:
+//   1. max_picture_size >= sum of all xlayers' picture sizes
+//   2. max_decode_rate  >= sum of all xlayers' decode rates (pic_size * fps)
+// When frame_rate is 0 (not specified), only picture size is checked.
+static int derive_aggregate_level(const MultiXLayerConfig *mcfg,
+                                  uint32_t xlayer_map) {
+  int64_t total_picture_size = 0;
+  int64_t total_decode_rate = 0;
+  int max_individual_level = 0;
+  double fps = mcfg->frame_rate;
+
+  for (int i = 0; i < mcfg->num_xlayers; i++) {
+    int id = mcfg->xlayers[i].xlayer_id;
+    if (!(xlayer_map & (1u << id))) continue;
+    int64_t pic_size =
+        (int64_t)mcfg->xlayers[i].width * mcfg->xlayers[i].height;
+    total_picture_size += pic_size;
+    if (fps > 0) total_decode_rate += (int64_t)(pic_size * fps);
+    if ((int)mcfg->xlayers[i].level > max_individual_level)
+      max_individual_level = (int)mcfg->xlayers[i].level;
+  }
+
+  // Walk the level table and find the smallest level that satisfies all
+  // constraints.  The aggregate level must also be >= every individual level.
+  int agg_level = max_individual_level;
+  for (int l = 0; l < SEQ_LEVELS; l++) {
+    if (l < max_individual_level) continue;
+    if (av2_level_defs[l].max_picture_size < total_picture_size) continue;
+    if (fps > 0 && av2_level_defs[l].max_decode_rate < total_decode_rate)
+      continue;
+    agg_level = l;
+    break;
+  }
+  return agg_level;
+}
+
+// Apply scaling mode to a dimension, returning the scaled size.
+// Uses round-up division to match the encoder's internal scaling behavior.
+void populate_global_lcr_from_config(const MultiXLayerConfig *mcfg,
+                                     GlobalLayerConfigurationRecord *glcr) {
+  memset(glcr, 0, sizeof(*glcr));
+
+  glcr->lcr_global_config_record_id = 1;
+
+  // Build xlayer_map bitmask and xlayer ID list
+  uint32_t xlayer_map = 0;
+  for (int i = 0; i < mcfg->num_xlayers; i++) {
+    int id = mcfg->xlayers[i].xlayer_id;
+    xlayer_map |= (1u << id);
+    glcr->LcrXLayerID[i] = id;
+  }
+  glcr->lcr_xlayer_map = (int)xlayer_map;
+  glcr->LcrMaxNumXLayerCount = mcfg->num_xlayers;
+
+  glcr->lcr_global_payload_present_flag = 1;
+  glcr->lcr_global_purpose_id = mcfg->lcr_purpose_id;
+  glcr->lcr_dependent_xlayers_flag = mcfg->lcr_dependent_xlayers_flag;
+  glcr->lcr_doh_constraint_flag = mcfg->lcr_doh_constraint_flag;
+  glcr->lcr_seq_profile_tier_level_info_present_flag = 1;
+
+  // Derive aggregate configuration_idc from all xlayers
+  glcr->aggregate_ptl.lcr_config_idc =
+      derive_config_idc_from_profiles(mcfg, (uint32_t)xlayer_map);
+
+  // Populate per-xlayer info
+  for (int i = 0; i < mcfg->num_xlayers; i++) {
+    const XLayerEncConfig *xl = &mcfg->xlayers[i];
+    LCRXLayerInfo *xinfo = &glcr->xlayer_info[i];
+
+    // Representation info (resolution)
+    xinfo->lcr_rep_info_present_flag = 1;
+    xinfo->rep_params.lcr_max_pic_width = (int)xl->width;
+    xinfo->rep_params.lcr_max_pic_height = (int)xl->height;
+
+    // Color info
+    if (xl->color_primaries >= 0) {
+      xinfo->lcr_xlayer_color_info_present_flag = 1;
+      xinfo->xlayer_col_params.layer_color_primaries = xl->color_primaries;
+      xinfo->xlayer_col_params.layer_transfer_characteristics =
+          xl->transfer_characteristics;
+      xinfo->xlayer_col_params.layer_matrix_coefficients =
+          xl->matrix_coefficients;
+      xinfo->xlayer_col_params.layer_full_range_flag = xl->full_range_flag;
+    }
+
+    // Embedded layer info
+    if (xl->num_embedded_layers > 1 || xl->num_temporal_layers > 1) {
+      xinfo->lcr_embedded_layer_info_present_flag = 1;
+      struct EmbeddedLayerInfo *ml = &xinfo->mlayer_params;
+      ml->MLayerCount = xl->num_embedded_layers;
+      // mlayer_map: bitmask of embedded layers present
+      ml->lcr_mlayer_map = (1 << xl->num_embedded_layers) - 1;
+      for (int m = 0; m < xl->num_embedded_layers; m++) {
+        ml->LcrMlayerID[m] = m;
+        ml->lcr_layer_type[m] = xl->layer_type;
+        if (xl->layer_type == AUX_LAYER) {
+          ml->lcr_auxiliary_type[m] = xl->auxiliary_type;
+        }
+        ml->lcr_view_type[m] = xl->view_type;
+        ml->TLayerCount[m] = xl->num_temporal_layers;
+        ml->lcr_tlayer_map[m] = (1 << xl->num_temporal_layers) - 1;
+        // Set resolution flag based on scaling mode.
+        // lcr_max_expected_width/height signals the maximum frame dimensions
+        // that can appear for this mlayer.  For scaled layers, this must be
+        // the xlayer's full resolution (not the scaled size) because the
+        // encoder may produce full-res frames (e.g., on keyframes that reset
+        // the resize state).
+        int sm = xl->scaling_mode[m];
+        if (sm != AVME_NORMAL) {
+          ml->lcr_same_sh_max_resolution_flag[m] = 0;
+          ml->lcr_max_expected_width[m] = (int)xl->width;
+          ml->lcr_max_expected_height[m] = (int)xl->height;
+        } else {
+          ml->lcr_same_sh_max_resolution_flag[m] = 1;
+        }
+        // Populate dependency map from config
+        ml->lcr_dependent_layer_map[m] =
+            resolve_mlayer_dep_mask(&xl->mlayer_sources[m], m);
+      }
+    } else {
+      // Single embedded layer, single temporal layer
+      xinfo->lcr_embedded_layer_info_present_flag = 1;
+      struct EmbeddedLayerInfo *ml = &xinfo->mlayer_params;
+      ml->MLayerCount = 1;
+      ml->lcr_mlayer_map = 1;
+      ml->LcrMlayerID[0] = 0;
+      ml->lcr_layer_type[0] = xl->layer_type;
+      if (xl->layer_type == AUX_LAYER) {
+        ml->lcr_auxiliary_type[0] = xl->auxiliary_type;
+      }
+      ml->lcr_view_type[0] = xl->view_type;
+      ml->TLayerCount[0] = xl->num_temporal_layers;
+      ml->lcr_tlayer_map[0] = (1 << xl->num_temporal_layers) - 1;
+      ml->lcr_same_sh_max_resolution_flag[0] = 1;
+    }
+
+    // Seq profile/tier/level info
+    glcr->seq_ptl[i].lcr_seq_profile_idc = xl->profile;
+    glcr->seq_ptl[i].lcr_max_level_idx = xl->level;
+    glcr->seq_ptl[i].lcr_tier_flag = xl->tier;
+  }
+
+  // Derive aggregate level and tier from all xlayers
+  {
+    int max_tier = 0;
+    for (int i = 0; i < mcfg->num_xlayers; i++) {
+      if ((int)mcfg->xlayers[i].tier > max_tier)
+        max_tier = (int)mcfg->xlayers[i].tier;
+    }
+    glcr->aggregate_ptl.lcr_aggregate_level_idx =
+        derive_aggregate_level(mcfg, (uint32_t)xlayer_map);
+    glcr->aggregate_ptl.lcr_max_tier_flag = max_tier;
+  }
+}
+
+void populate_ops_from_config(const OPSConfig *ops_cfg, int xlayer_id,
+                              const MultiXLayerConfig *mcfg,
+                              OperatingPointSet *ops) {
+  memset(ops, 0, sizeof(*ops));
+  if (!ops_cfg->enable) return;
+
+  ops->valid = 1;
+  ops->obu_xlayer_id = xlayer_id;
+  ops->ops_id = ops_cfg->ops_id;
+  ops->ops_cnt = ops_cfg->num_operating_points;
+  ops->ops_priority = ops_cfg->priority;
+  ops->ops_intent_present_flag = ops_cfg->intent_present_flag;
+  ops->ops_ptl_present_flag = ops_cfg->ptl_present_flag;
+  ops->ops_color_info_present_flag = ops_cfg->color_info_present_flag;
+  ops->ops_mlayer_info_idc = ops_cfg->mlayer_info_idc;
+
+  for (int p = 0; p < ops_cfg->num_operating_points; p++) {
+    const OperatingPointConfig *opc = &ops_cfg->ops[p];
+    OperatingPoint *op = &ops->op[p];
+
+    op->ops_intent_op = opc->intent;
+    op->ops_xlayer_map = (int)opc->xlayer_map;
+    op->ops_initial_display_delay =
+        BUFFER_POOL_MAX_SIZE;  // default: not present
+
+    // Derive XCount and OpsxLayerID from xlayer_map
+    op->XCount = 0;
+    for (int bit = 0; bit < (int)(MAX_NUM_XLAYERS - 1); bit++) {
+      if (opc->xlayer_map & (1u << bit)) {
+        op->OpsxLayerID[op->XCount] = bit;
+        op->XCount++;
+      }
+    }
+
+    // Per-xlayer mlayer counts and map derivation
+    for (int x = 0; x < op->XCount; x++) {
+      int xl = op->OpsxLayerID[x];
+      int ml_count = opc->mlayer_count[x];
+      op->ops_mlayer_count[xl] = ml_count;
+      // Derive ops_mlayer_map: include the first ml_count mlayers
+      if (ml_count > 0 && ops->ops_mlayer_info_idc >= 1) {
+        op->mlayer_info.ops_mlayer_map[xl] = (1 << ml_count) - 1;
+        // Default: all temporal layers for each included mlayer
+        for (int m = 0; m < ml_count; m++) {
+          // Find the xlayer config to get num_temporal_layers
+          int tl_count = 1;
+          for (int j = 0; j < mcfg->num_xlayers; j++) {
+            if (mcfg->xlayers[j].xlayer_id == xl) {
+              tl_count = mcfg->xlayers[j].num_temporal_layers;
+              break;
+            }
+          }
+          op->mlayer_info.ops_tlayer_map[xl][m] = (1 << tl_count) - 1;
+        }
+        // For idc==2, use explicit info (not embedded OPS references)
+        if (ops->ops_mlayer_info_idc == 2) {
+          op->ops_mlayer_explicit_info_flag[xl] = 1;
+        }
+      }
+    }
+
+    // Derive ops_config_idc from the profiles of constituent xlayers
+    op->ops_config_idc = derive_config_idc_from_profiles(mcfg, opc->xlayer_map);
+
+    // Aggregate level/tier
+    if (opc->aggregate_level_idx >= 0) {
+      op->ops_aggregate_level_idx = opc->aggregate_level_idx;
+    } else {
+      // Derive: find smallest level accommodating summed picture sizes
+      op->ops_aggregate_level_idx =
+          derive_aggregate_level(mcfg, opc->xlayer_map);
+    }
+
+    if (opc->max_tier_flag >= 0) {
+      op->ops_max_tier_flag = opc->max_tier_flag;
+    } else {
+      // Derive: max tier across constituent xlayers
+      for (int x = 0; x < op->XCount; x++) {
+        int xl_id = op->OpsxLayerID[x];
+        for (int j = 0; j < mcfg->num_xlayers; j++) {
+          if (mcfg->xlayers[j].xlayer_id == xl_id) {
+            if ((int)mcfg->xlayers[j].tier > op->ops_max_tier_flag)
+              op->ops_max_tier_flag = (int)mcfg->xlayers[j].tier;
+            break;
+          }
+        }
+      }
+    }
+
+    // Embedded OPS references
+    for (int x = 0; x < MAX_NUM_XLAYERS; x++) {
+      op->ops_embedded_ops_id[x] = opc->embedded_ops_id[x];
+      op->ops_embedded_op_index[x] = opc->embedded_op_index[x];
+    }
+  }
+}
+
+void populate_atlas_from_config(const MultiXLayerConfig *mcfg,
+                                AtlasSegmentInfo *atlas) {
+  memset(atlas, 0, sizeof(*atlas));
+  if (!mcfg->enable_atlas) return;
+
+  atlas->valid = 1;
+  atlas->obu_xlayer_id = GLOBAL_XLAYER_ID;
+  atlas->atlas_segment_id = 1;
+  atlas->atlas_segment_mode_idc = mcfg->atlas_mode;
+
+  const int n = mcfg->num_xlayers;
+
+  if (mcfg->atlas_mode == ENHANCED_ATLAS) {
+    // Enhanced Atlas: region grid from xlayer count/dimensions
+    struct AtlasRegionInfo *reg = &atlas->ats_reg_params;
+
+    if (mcfg->atlas_uniform_spacing) {
+      // Auto-grid: N columns x 1 row
+      reg->ats_uniform_spacing_flag = 1;
+      reg->ats_num_region_columns_minus_1 = n - 1;
+      reg->ats_num_region_rows_minus_1 = 0;
+
+      // Use first xlayer's dimensions as the uniform region size
+      reg->ats_region_width_minus_1 = (int)mcfg->xlayers[0].width - 1;
+      reg->ats_region_height_minus_1 = (int)mcfg->xlayers[0].height - 1;
+      reg->NumRegionsInAtlas = n;
+
+      // Derive atlas dimensions
+      reg->AtlasWidth = (int)mcfg->xlayers[0].width * n;
+      reg->AtlasHeight = (int)mcfg->xlayers[0].height;
+
+      // Single region per atlas segment (one xlayer per region)
+      atlas->ats_reg_seg_map.ats_single_region_per_atlas_segment_flag = 1;
+      atlas->ats_reg_seg_map.ats_num_atlas_segments_minus_1 = n - 1;
+    } else {
+      // Explicit positions: derive grid from per-xlayer atlas_pos_x/y.
+      // Collect unique X and Y boundaries to determine columns and rows.
+      reg->ats_uniform_spacing_flag = 0;
+
+      // Collect unique column start positions and widths
+      int col_x[MAX_NUM_XLAYERS];
+      int col_w[MAX_NUM_XLAYERS];
+      int num_cols = 0;
+      int row_y[MAX_NUM_XLAYERS];
+      int row_h[MAX_NUM_XLAYERS];
+      int num_rows = 0;
+
+      for (int i = 0; i < n; i++) {
+        int px = mcfg->xlayers[i].atlas_pos_x >= 0
+                     ? mcfg->xlayers[i].atlas_pos_x
+                     : 0;
+        int py = mcfg->xlayers[i].atlas_pos_y >= 0
+                     ? mcfg->xlayers[i].atlas_pos_y
+                     : 0;
+        int w = (int)mcfg->xlayers[i].width;
+        int h = (int)mcfg->xlayers[i].height;
+
+        // Insert unique column
+        int found = 0;
+        for (int c = 0; c < num_cols; c++) {
+          if (col_x[c] == px) {
+            found = 1;
+            break;
+          }
+        }
+        if (!found) {
+          col_x[num_cols] = px;
+          col_w[num_cols] = w;
+          num_cols++;
+        }
+
+        // Insert unique row
+        found = 0;
+        for (int r = 0; r < num_rows; r++) {
+          if (row_y[r] == py) {
+            found = 1;
+            break;
+          }
+        }
+        if (!found) {
+          row_y[num_rows] = py;
+          row_h[num_rows] = h;
+          num_rows++;
+        }
+      }
+
+      // Sort columns by X position (simple insertion sort)
+      for (int i = 1; i < num_cols; i++) {
+        int kx = col_x[i], kw = col_w[i];
+        int j = i - 1;
+        while (j >= 0 && col_x[j] > kx) {
+          col_x[j + 1] = col_x[j];
+          col_w[j + 1] = col_w[j];
+          j--;
+        }
+        col_x[j + 1] = kx;
+        col_w[j + 1] = kw;
+      }
+
+      // Sort rows by Y position
+      for (int i = 1; i < num_rows; i++) {
+        int ky = row_y[i], kh = row_h[i];
+        int j = i - 1;
+        while (j >= 0 && row_y[j] > ky) {
+          row_y[j + 1] = row_y[j];
+          row_h[j + 1] = row_h[j];
+          j--;
+        }
+        row_y[j + 1] = ky;
+        row_h[j + 1] = kh;
+      }
+
+      reg->ats_num_region_columns_minus_1 = num_cols - 1;
+      reg->ats_num_region_rows_minus_1 = num_rows - 1;
+      for (int c = 0; c < num_cols; c++)
+        reg->ats_column_width_minus_1[c] = col_w[c] - 1;
+      for (int r = 0; r < num_rows; r++)
+        reg->ats_row_height_minus_1[r] = row_h[r] - 1;
+      reg->NumRegionsInAtlas = num_cols * num_rows;
+
+      // Use explicit region-to-segment mapping since not all grid cells
+      // may be occupied (e.g., 3 regions in a 2x2 grid).
+      atlas->ats_reg_seg_map.ats_single_region_per_atlas_segment_flag = 0;
+      atlas->ats_reg_seg_map.ats_num_atlas_segments_minus_1 = n - 1;
+
+      // Map each xlayer to its grid cell
+      for (int i = 0; i < n; i++) {
+        int px = mcfg->xlayers[i].atlas_pos_x >= 0
+                     ? mcfg->xlayers[i].atlas_pos_x
+                     : 0;
+        int py = mcfg->xlayers[i].atlas_pos_y >= 0
+                     ? mcfg->xlayers[i].atlas_pos_y
+                     : 0;
+        int col_idx = 0, row_idx = 0;
+        for (int c = 0; c < num_cols; c++) {
+          if (col_x[c] == px) {
+            col_idx = c;
+            break;
+          }
+        }
+        for (int r = 0; r < num_rows; r++) {
+          if (row_y[r] == py) {
+            row_idx = r;
+            break;
+          }
+        }
+        atlas->ats_reg_seg_map.ats_top_left_region_column[i] = col_idx;
+        atlas->ats_reg_seg_map.ats_top_left_region_row[i] = row_idx;
+        atlas->ats_reg_seg_map.ats_bottom_right_region_column_offset[i] = 0;
+        atlas->ats_reg_seg_map.ats_bottom_right_region_row_offset[i] = 0;
+        // Derived fields
+        atlas->ats_reg_seg_map.ats_bottom_right_region_column[i] = col_idx;
+        atlas->ats_reg_seg_map.ats_bottom_right_region_row[i] = row_idx;
+      }
+    }
+
+    // No signalled segment IDs
+    atlas->ats_label_seg.ats_signalled_atlas_segment_ids_flag = 0;
+
+  } else if (mcfg->atlas_mode == MULTISTREAM_ATLAS) {
+    // Multistream Atlas: per-segment positions from xlayer config
+    struct AtlasBasicInfo *basic = &atlas->ats_basic_info_s;
+    atlas->ats_basic_info = basic;
+
+    basic->ats_stream_id_present = 1;
+    basic->ats_num_atlas_segments_minus_1 = n - 1;
+
+    // Derive or use explicit atlas dimensions
+    if (mcfg->atlas_width > 0) {
+      basic->ats_atlas_width = mcfg->atlas_width;
+      basic->ats_atlas_height = mcfg->atlas_height;
+    } else {
+      // Auto-derive: horizontal tiling
+      int total_w = 0;
+      int max_h = 0;
+      for (int i = 0; i < n; i++) {
+        total_w += (int)mcfg->xlayers[i].width;
+        if ((int)mcfg->xlayers[i].height > max_h)
+          max_h = (int)mcfg->xlayers[i].height;
+      }
+      basic->ats_atlas_width = total_w;
+      basic->ats_atlas_height = max_h;
+    }
+    basic->AtlasWidth = basic->ats_atlas_width;
+    basic->AtlasHeight = basic->ats_atlas_height;
+
+    // Per-segment info
+    int auto_x = 0;
+    for (int i = 0; i < n; i++) {
+      basic->ats_input_stream_id[i] = mcfg->xlayers[i].xlayer_id;
+      basic->ats_segment_width[i] = (int)mcfg->xlayers[i].width;
+      basic->ats_segment_height[i] = (int)mcfg->xlayers[i].height;
+
+      if (mcfg->xlayers[i].atlas_pos_x >= 0) {
+        basic->ats_segment_top_left_pos_x[i] = mcfg->xlayers[i].atlas_pos_x;
+        basic->ats_segment_top_left_pos_y[i] = mcfg->xlayers[i].atlas_pos_y;
+      } else {
+        // Auto-place: horizontal tiling
+        basic->ats_segment_top_left_pos_x[i] = auto_x;
+        basic->ats_segment_top_left_pos_y[i] = 0;
+      }
+      auto_x += (int)mcfg->xlayers[i].width;
+    }
+
+    // No signalled segment IDs
+    atlas->ats_label_seg.ats_signalled_atlas_segment_ids_flag = 0;
+  }
+}
diff --git a/common/tu_assembler.h b/common/tu_assembler.h
new file mode 100644
index 0000000000..16eea688c0
--- /dev/null
+++ b/common/tu_assembler.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2025, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 3-Clause Clear License
+ * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
+ * License was not distributed with this source code in the LICENSE file, you
+ * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/.  If the
+ * Alliance for Open Media Patent License 1.0 was not distributed with this
+ * source code in the PATENTS file, you can obtain it at
+ * aomedia.org/license/patent-license/.
+ */
+
+#ifndef AVM_COMMON_TU_ASSEMBLER_H_
+#define AVM_COMMON_TU_ASSEMBLER_H_
+
+#include <stdio.h>
+#include <stdint.h>
+
+#include "av2/common/enums.h"
+#include "av2/common/av2_common_int.h"
+#include "common/xlayer_config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define TU_ASM_INITIAL_CAPACITY (256 * 1024)
+
+typedef struct TUAssembler {
+  uint8_t *buffer;
+  size_t size;
+  size_t capacity;
+  int num_xlayers;
+  int xlayer_ids[MAX_NUM_XLAYERS - 1];
+  // Structural OBU data populated from config
+  GlobalLayerConfigurationRecord global_lcr;
+  int msdo_enabled;
+  int num_ops_sets;
+  OperatingPointSet ops_list[MAX_NUM_OPS_ID];
+  AtlasSegmentInfo atlas_info;
+  // Reference to the config for OBU population
+  const MultiXLayerConfig *config;
+} TUAssembler;
+
+// Initialize assembler from multi-xlayer config
+int tu_assembler_init(TUAssembler *ta, const MultiXLayerConfig *mcfg);
+
+// Free assembler resources
+void tu_assembler_free(TUAssembler *ta);
+
+// Write a Temporal Delimiter OBU (xlayer_id=31)
+int tu_assembler_write_td(TUAssembler *ta);
+
+// Write a Global LCR OBU
+int tu_assembler_write_global_lcr(TUAssembler *ta);
+
+// Write a Local LCR OBU for the specified xlayer config index.
+// The xlayer_info is copied from the Global LCR to ensure decoder-side
+// consistency validation passes when both Global and Local LCRs are present.
+int tu_assembler_write_local_lcr(TUAssembler *ta, int xlayer_idx);
+
+// Write an MSDO OBU
+int tu_assembler_write_msdo(TUAssembler *ta);
+
+// Write an OPS OBU for the specified xlayer_id
+int tu_assembler_write_ops(TUAssembler *ta, int xlayer_id);
+
+// Write an Atlas OBU
+int tu_assembler_write_atlas(TUAssembler *ta);
+
+// Append per-xlayer OBUs from an encoder packet, rewriting OBU headers
+// with the given xlayer_id. Skips per-xlayer TDs and structural OBUs.
+int tu_assembler_append_xlayer_obus(TUAssembler *ta, int xlayer_id,
+                                    const uint8_t *data, size_t size);
+
+// Flush the assembled buffer to the output file and reset size to 0
+int tu_assembler_flush(TUAssembler *ta, FILE *outfile);
+
+// Write structural OBUs (LCR, OPS, Atlas) into the assembler buffer.
+// Emits once per TU: only when *first_output is set or has_keyframe is true.
+void tu_assembler_write_structural_obus(TUAssembler *ta,
+                                        const MultiXLayerConfig *mcfg,
+                                        int *first_output, int has_keyframe);
+
+// Split encoder output at internal TD boundaries and write each segment as
+// a separate TU.  This is used for multi_layers_lag_test mode where the
+// encoder inserts TDs between implicit_output frames at different OrderHints
+// to satisfy the DOH constraint.  Each segment gets its own TD, structural
+// OBUs (on first_output or keyframe), and xlayer-rewritten frame data.
+// Returns the number of TUs written, or -1 on error.
+int tu_assembler_write_split_tus(TUAssembler *ta, const MultiXLayerConfig *mcfg,
+                                 int xlayer_id, const uint8_t *data,
+                                 size_t size, int *first_output, FILE *outfile);
+
+// A parsed TU segment: a contiguous byte range of OBU data between two
+// consecutive TD boundaries in an encoder's output.
+#define MAX_TU_SEGMENTS 64
+
+typedef struct TUSegmentInfo {
+  size_t offset;     // start offset in the source data
+  size_t size;       // byte size of this segment (including the TD)
+  int has_keyframe;  // 1 if segment contains a keyframe OBU
+} TUSegmentInfo;
+
+// Parse encoder output into TU segments split at TD boundaries.
+// Each segment spans from one TD to the next (or end of data).
+// Returns the number of segments found (stored in segs[]), or -1 on error.
+int tu_assembler_parse_tu_segments(const uint8_t *data, size_t size,
+                                   TUSegmentInfo *segs, int max_segs);
+
+// Print a summary of all OBUs in the current assembled TU buffer to stdout.
+// Must be called before tu_assembler_flush() (which resets the buffer).
+void tu_assembler_print_contents(const TUAssembler *ta, int tu_index);
+
+// Populate a GlobalLayerConfigurationRecord from config
+void populate_global_lcr_from_config(const MultiXLayerConfig *mcfg,
+                                     GlobalLayerConfigurationRecord *glcr);
+
+// Populate an OperatingPointSet from config
+void populate_ops_from_config(const OPSConfig *ops_cfg, int xlayer_id,
+                              const MultiXLayerConfig *mcfg,
+                              OperatingPointSet *ops);
+
+// Populate AtlasSegmentInfo from config
+void populate_atlas_from_config(const MultiXLayerConfig *mcfg,
+                                AtlasSegmentInfo *atlas);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // AVM_COMMON_TU_ASSEMBLER_H_
diff --git a/common/xlayer_config.h b/common/xlayer_config.h
new file mode 100644
index 0000000000..a72c6f3319
--- /dev/null
+++ b/common/xlayer_config.h
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2025, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 3-Clause Clear License
+ * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
+ * License was not distributed with this source code in the LICENSE file, you
+ * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/.  If the
+ * Alliance for Open Media Patent License 1.0 was not distributed with this
+ * source code in the PATENTS file, you can obtain it at
+ * aomedia.org/license/patent-license/.
+ */
+
+#ifndef AVM_COMMON_XLAYER_CONFIG_H_
+#define AVM_COMMON_XLAYER_CONFIG_H_
+
+#include <limits.h>
+#include <stdint.h>
+#include <string.h>
+
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+#include "av2/common/enums.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_INPUT_SOURCES 8
+#define MAX_SOURCE_NAME_LEN 64
+#define MAX_CODEC_CONTROLS 32
+
+// Named input source for multi-source encoding
+typedef struct InputSourceConfig {
+  char name[MAX_SOURCE_NAME_LEN];
+  char filename[PATH_MAX];
+  unsigned int width;
+  unsigned int height;
+  int format;          // pixel format: 0=auto, 420, 422, 444
+  int bit_depth;       // 0=auto (detect from file or default 8)
+  int frame_rate_num;  // 0/0=auto (detect from Y4M or use global timebase)
+  int frame_rate_den;
+  int frame_skip;  // resolved: max_fps/this_fps (1=every TU, 2=every other)
+} InputSourceConfig;
+
+// Per-embedded-layer source and dependency configuration
+typedef struct MLayerSourceConfig {
+  char input_source_name[MAX_SOURCE_NAME_LEN];  // "" = inherit from xlayer
+  int input_source_idx;  // resolved: -1 = inherit from xlayer
+  int atlas_pos_x;       // crop origin X (-1 = inherit from xlayer)
+  int atlas_pos_y;       // crop origin Y (-1 = inherit from xlayer)
+  unsigned int width;    // crop width (0 = inherit from xlayer)
+  unsigned int height;   // crop height (0 = inherit from xlayer)
+  int dependency_mask;   // bitmask of lower mlayers this depends on
+                         // (-1 = default linear chain)
+  // Content Interpretation overrides (-1 = inherit from xlayer)
+  int color_primaries;
+  int transfer_characteristics;
+  int matrix_coefficients;
+  int full_range_flag;
+} MLayerSourceConfig;
+
+// Default mlayer dependency mask: linear chain where each mlayer depends on
+// all lower mlayers.  mlayer 0 has mask 0 (no dependencies).
+#define DEFAULT_MLAYER_DEP_MASK(m) ((m) > 0 ? (1 << (m)) - 1 : 0)
+
+// Resolve a per-mlayer dependency mask, replacing the sentinel -1 with the
+// default linear chain.
+static inline int resolve_mlayer_dep_mask(const MLayerSourceConfig *ms, int m) {
+  return (ms->dependency_mask >= 0) ? ms->dependency_mask
+                                    : DEFAULT_MLAYER_DEP_MASK(m);
+}
+
+// Per extended-layer encoder configuration
+typedef struct XLayerEncConfig {
+  int xlayer_id;  // 0-30
+  char input_filename[PATH_MAX];
+  unsigned int width;
+  unsigned int height;
+  unsigned int profile;
+  unsigned int tier;
+  unsigned int level;
+  int layer_type;      // TEXTURE_LAYER, AUX_LAYER, STEREO_LAYER, etc.
+  int auxiliary_type;  // LCR_ALPHA_AUX, LCR_DEPTH_AUX, etc. (if AUX_LAYER)
+  int view_type;       // VIEW_UNSPECIFIED, VIEW_LEFT, VIEW_RIGHT, etc.
+  int num_temporal_layers;
+  int num_embedded_layers;
+  // Color info
+  int color_primaries;
+  int transfer_characteristics;
+  int matrix_coefficients;
+  int full_range_flag;
+  // Encoder overrides (-1 = use global default)
+  int qp;
+  int bitrate;
+  int cpu_used;
+  int lag_in_frames;
+  int sframe_dist;  // S-Frame interval (-1 = disabled/default)
+  int sframe_mode;  // S-Frame insertion mode (-1 = default)
+  int sframe_type;  // S-Frame type: 0=regular, 1=RAS (-1 = default)
+  // Coding structure (-1 or empty = use global default)
+  int kf_max_dist;                    // keyframe interval (-1 = default)
+  char subgop_config_path[PATH_MAX];  // sub-GOP config file (empty = default)
+  // GOP mode: 0=closed(CLK), 1=open_leading(OLK), 2=open_sef
+  int gop_mode;
+  int fwd_kf_enabled;             // override: -1=derive from gop_mode
+  int enable_keyframe_filtering;  // override: -1=derive from gop_mode
+  int add_sef_for_hidden_frames;  // override: -1=derive from gop_mode
+  // Atlas layout position in composite canvas (-1 = auto)
+  int atlas_pos_x;
+  int atlas_pos_y;
+  // Input source reference (for multi-source encoding)
+  char input_source_name[MAX_SOURCE_NAME_LEN];  // references InputSourceConfig
+  int input_source_idx;  // resolved index into input_sources[] (-1 = own file)
+  // Scaling for embedded layers
+  int scaling_mode[MAX_NUM_MLAYERS];
+  // Per-embedded-layer source and dependency configuration
+  MLayerSourceConfig mlayer_sources[MAX_NUM_MLAYERS];
+  int has_per_mlayer_sources;   // 1 if any mlayer has its own source/crop
+  int has_mlayer_dependencies;  // 1 if any mlayer has explicit dependency_mask
+  // Generic post-init codec controls from JSON "codec_controls" array
+  int num_codec_controls;
+  struct {
+    char name[64];
+    int value;
+  } codec_controls[MAX_CODEC_CONTROLS];
+} XLayerEncConfig;
+
+// Per operating-point configuration within an OPS set
+typedef struct OperatingPointConfig {
+  int intent;           // OPS intent (display, monitoring, etc.)
+  uint32_t xlayer_map;  // bitmask of xlayers included in this OP
+  // Per-xlayer within this OP
+  int mlayer_count[MAX_NUM_XLAYERS];  // embedded layers per xlayer (0=all)
+  int tlayer_count[MAX_NUM_XLAYERS];  // temporal layers per xlayer (0=all)
+  // PTL overrides for this OP
+  int aggregate_level_idx;  // -1 = derive from constituent layers
+  int max_tier_flag;        // -1 = derive
+  // Per-xlayer embedded OPS references
+  int embedded_ops_id[MAX_NUM_XLAYERS];    // -1 = not set
+  int embedded_op_index[MAX_NUM_XLAYERS];  // -1 = not set
+} OperatingPointConfig;
+
+// OPS set configuration (one per OPS OBU)
+typedef struct OPSConfig {
+  int enable;
+  int ops_id;    // OPS ID (0-15)
+  int priority;  // OPS priority
+  int intent_present_flag;
+  int ptl_present_flag;
+  int color_info_present_flag;
+  int mlayer_info_idc;  // 0=no info, 1=same, 2=explicit
+  int num_operating_points;
+  OperatingPointConfig ops[MAX_OPS_COUNT];
+} OPSConfig;
+
+// Top-level multi-xlayer configuration
+typedef struct MultiXLayerConfig {
+  int num_xlayers;
+  XLayerEncConfig xlayers[MAX_NUM_XLAYERS - 1];  // up to 31
+  // Global LCR
+  int enable_global_lcr;
+  int lcr_purpose_id;
+  int lcr_dependent_xlayers_flag;
+  int lcr_doh_constraint_flag;
+  // Local LCR
+  int enable_local_lcr;
+  int local_lcr_mode;  // 0 = both (Global+Local, identical xlayer_info)
+                       // 1 = local_only (Global without payload, Local is
+                       //     authoritative)
+  // MSDO
+  int enable_msdo;
+  // Atlas
+  int enable_atlas;
+  int atlas_mode;
+  int atlas_width;            // canvas width (0 = derive from xlayers)
+  int atlas_height;           // canvas height (0 = derive)
+  int atlas_uniform_spacing;  // 1 = auto-grid, 0 = explicit positions
+  // OPS
+  int num_ops_sets;
+  OPSConfig ops_sets[MAX_NUM_OPS_ID];
+  // Shared source (for subpicture encoding from single input)
+  char source_filename[PATH_MAX];  // shared source file (empty = disabled)
+  unsigned int source_width;       // source resolution (0 = derive from file)
+  unsigned int source_height;
+  // Named input sources (replaces single source for multi-source encoding)
+  int num_input_sources;
+  InputSourceConfig input_sources[MAX_INPUT_SOURCES];
+  // Bitstream
+  int combined_tu;
+  int monotonic_output_order;
+  double frame_rate;  // 0 = use main encoder timebase (default)
+  int limit;          // max frames to encode (0 = unlimited)
+  char output_filename[PATH_MAX];
+} MultiXLayerConfig;
+
+// Initialize config with defaults
+static inline void xlayer_config_init(MultiXLayerConfig *cfg) {
+  memset(cfg, 0, sizeof(*cfg));
+  cfg->enable_global_lcr = 1;
+  cfg->lcr_doh_constraint_flag = 1;
+  cfg->atlas_uniform_spacing = 1;
+  cfg->combined_tu = 1;
+  cfg->monotonic_output_order = 1;
+  for (int i = 0; i < MAX_NUM_XLAYERS - 1; i++) {
+    cfg->xlayers[i].xlayer_id = -1;
+    cfg->xlayers[i].qp = -1;
+    cfg->xlayers[i].bitrate = -1;
+    cfg->xlayers[i].cpu_used = -1;
+    cfg->xlayers[i].lag_in_frames = -1;
+    cfg->xlayers[i].sframe_dist = -1;
+    cfg->xlayers[i].sframe_mode = -1;
+    cfg->xlayers[i].sframe_type = -1;
+    cfg->xlayers[i].kf_max_dist = -1;
+    cfg->xlayers[i].fwd_kf_enabled = -1;
+    cfg->xlayers[i].enable_keyframe_filtering = -1;
+    cfg->xlayers[i].add_sef_for_hidden_frames = -1;
+    cfg->xlayers[i].atlas_pos_x = -1;
+    cfg->xlayers[i].atlas_pos_y = -1;
+    cfg->xlayers[i].input_source_idx = -1;
+    cfg->xlayers[i].profile = MAIN_420_10_IP1;
+    cfg->xlayers[i].level = SEQ_LEVEL_4_0;
+    cfg->xlayers[i].num_temporal_layers = 1;
+    cfg->xlayers[i].num_embedded_layers = 1;
+    cfg->xlayers[i].view_type = VIEW_UNSPECIFIED;
+    for (int j = 0; j < MAX_NUM_MLAYERS; j++) {
+      cfg->xlayers[i].mlayer_sources[j].input_source_idx = -1;
+      cfg->xlayers[i].mlayer_sources[j].atlas_pos_x = -1;
+      cfg->xlayers[i].mlayer_sources[j].atlas_pos_y = -1;
+      cfg->xlayers[i].mlayer_sources[j].width = 0;
+      cfg->xlayers[i].mlayer_sources[j].height = 0;
+      cfg->xlayers[i].mlayer_sources[j].dependency_mask = -1;
+      cfg->xlayers[i].mlayer_sources[j].color_primaries = -1;
+      cfg->xlayers[i].mlayer_sources[j].transfer_characteristics = -1;
+      cfg->xlayers[i].mlayer_sources[j].matrix_coefficients = -1;
+      cfg->xlayers[i].mlayer_sources[j].full_range_flag = -1;
+    }
+  }
+  for (int i = 0; i < MAX_NUM_OPS_ID; i++) {
+    for (int j = 0; j < MAX_OPS_COUNT; j++) {
+      cfg->ops_sets[i].ops[j].aggregate_level_idx = -1;
+      cfg->ops_sets[i].ops[j].max_tier_flag = -1;
+      for (int k = 0; k < MAX_NUM_XLAYERS; k++) {
+        cfg->ops_sets[i].ops[j].embedded_ops_id[k] = -1;
+        cfg->ops_sets[i].ops[j].embedded_op_index[k] = -1;
+      }
+    }
+  }
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // AVM_COMMON_XLAYER_CONFIG_H_
diff --git a/common/xlayer_config_parse.c b/common/xlayer_config_parse.c
new file mode 100644
index 0000000000..d304a8910c
--- /dev/null
+++ b/common/xlayer_config_parse.c
@@ -0,0 +1,1273 @@
+/*
+ * Copyright (c) 2025, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 3-Clause Clear License
+ * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
+ * License was not distributed with this source code in the LICENSE file, you
+ * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/.  If the
+ * Alliance for Open Media Patent License 1.0 was not distributed with this
+ * source code in the PATENTS file, you can obtain it at
+ * aomedia.org/license/patent-license/.
+ */
+
+#include "common/xlayer_config_parse.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "avm/avmcx.h"
+#include "third_party/cJSON/cJSON.h"
+
+// Map layer type string to enum value
+static int parse_layer_type(const char *str) {
+  if (!str) return TEXTURE_LAYER;
+  if (strcmp(str, "texture") == 0) return TEXTURE_LAYER;
+  if (strcmp(str, "auxiliary") == 0) return AUX_LAYER;
+  if (strcmp(str, "stereo") == 0) return STEREO_LAYER;
+  if (strcmp(str, "dependent") == 0) return DEPENDENT_LAYER;
+  fprintf(stderr, "Warning: unknown layer_type \"%s\", defaulting to texture\n",
+          str);
+  return TEXTURE_LAYER;
+}
+
+// Map auxiliary type string to enum value
+static int parse_auxiliary_type(const char *str) {
+  if (!str) return LCR_ALPHA_AUX;
+  if (strcmp(str, "alpha") == 0) return LCR_ALPHA_AUX;
+  if (strcmp(str, "depth") == 0) return LCR_DEPTH_AUX;
+  if (strcmp(str, "segmentation") == 0) return LCR_SEGMENTATION_AUX;
+  if (strcmp(str, "gain_map") == 0) return LCR_GAIN_MAP_AUX;
+  fprintf(stderr,
+          "Warning: unknown auxiliary_type \"%s\", defaulting to alpha\n", str);
+  return LCR_ALPHA_AUX;
+}
+
+// Map view type string to enum value
+static int parse_view_type(const char *str) {
+  if (!str) return VIEW_UNSPECIFIED;
+  if (strcmp(str, "unspecified") == 0) return VIEW_UNSPECIFIED;
+  if (strcmp(str, "center") == 0) return VIEW_CENTER;
+  if (strcmp(str, "left") == 0) return VIEW_LEFT;
+  if (strcmp(str, "right") == 0) return VIEW_RIGHT;
+  if (strcmp(str, "explicit") == 0) return VIEW_EXPLICIT;
+  fprintf(stderr,
+          "Warning: unknown view_type \"%s\", defaulting to "
+          "unspecified\n",
+          str);
+  return VIEW_UNSPECIFIED;
+}
+
+// Map scaling mode string to enum value, returns -1 on error
+static int parse_scaling_mode(const char *str) {
+  if (!str) return -1;
+  if (strcmp(str, "1:1") == 0 || strcmp(str, "normal") == 0) return AVME_NORMAL;
+  if (strcmp(str, "4/5") == 0) return AVME_FOURFIVE;
+  if (strcmp(str, "3/5") == 0) return AVME_THREEFIVE;
+  if (strcmp(str, "3/4") == 0) return AVME_THREEFOUR;
+  if (strcmp(str, "1/4") == 0) return AVME_ONEFOUR;
+  if (strcmp(str, "1/8") == 0) return AVME_ONEEIGHT;
+  if (strcmp(str, "1/2") == 0) return AVME_ONETWO;
+  fprintf(stderr, "Warning: unknown scaling_mode \"%s\"\n", str);
+  return -1;
+}
+
+// Map GOP mode string to integer value
+static int parse_gop_mode(const char *str) {
+  if (!str) return 0;
+  if (strcmp(str, "closed") == 0) return 0;
+  if (strcmp(str, "open_leading") == 0) return 1;
+  if (strcmp(str, "open_sef") == 0) return 2;
+  fprintf(stderr, "Warning: unknown gop_mode \"%s\", defaulting to closed\n",
+          str);
+  return 0;
+}
+
+// Map chroma format string to integer (420, 422, 444). Returns 0 on error.
+static int parse_chroma_format(const char *str) {
+  if (!str) return 0;
+  if (strcmp(str, "yuv420") == 0 || strcmp(str, "420") == 0) return 420;
+  if (strcmp(str, "yuv422") == 0 || strcmp(str, "422") == 0) return 422;
+  if (strcmp(str, "yuv444") == 0 || strcmp(str, "444") == 0) return 444;
+  fprintf(stderr, "Warning: unknown format \"%s\"\n", str);
+  return 0;
+}
+
+// Helper: warn about unknown keys in a JSON object
+static void warn_unknown_keys(const cJSON *obj, const char *const known[],
+                              int num_known, const char *section) {
+  const cJSON *item = NULL;
+  cJSON_ArrayForEach(item, obj) {
+    if (strcmp(item->string, "comment") == 0) continue;
+    int found = 0;
+    for (int i = 0; i < num_known; i++) {
+      if (strcmp(item->string, known[i]) == 0) {
+        found = 1;
+        break;
+      }
+    }
+    if (!found)
+      fprintf(stderr, "Warning: unknown key \"%s\" in %s (ignored)\n",
+              item->string, section);
+  }
+}
+
+// Helper: get integer from JSON, or default
+static int json_get_int(const cJSON *obj, const char *key, int default_val) {
+  const cJSON *item = cJSON_GetObjectItemCaseSensitive(obj, key);
+  if (cJSON_IsNumber(item)) return item->valueint;
+  return default_val;
+}
+
+// Helper: get boolean from JSON, or default
+static int json_get_bool(const cJSON *obj, const char *key, int default_val) {
+  const cJSON *item = cJSON_GetObjectItemCaseSensitive(obj, key);
+  if (cJSON_IsTrue(item)) return 1;
+  if (cJSON_IsFalse(item)) return 0;
+  if (cJSON_IsNumber(item)) return item->valueint != 0;
+  return default_val;
+}
+
+// Helper: get string from JSON, or default
+static const char *json_get_string(const cJSON *obj, const char *key,
+                                   const char *default_val) {
+  const cJSON *item = cJSON_GetObjectItemCaseSensitive(obj, key);
+  if (cJSON_IsString(item)) return item->valuestring;
+  return default_val;
+}
+
+// Helper: parse frame_rate from JSON into num/den rational.
+// Accepts: number (e.g., 30 -> 30/1), string "N/D" (e.g., "30000/1001").
+// Returns 0 on success (or no field present), -1 on error.
+static int json_parse_frame_rate(const cJSON *obj, const char *key, int *num,
+                                 int *den) {
+  const cJSON *item = cJSON_GetObjectItemCaseSensitive(obj, key);
+  if (!item) return 0;
+  if (cJSON_IsNumber(item)) {
+    // Integer or float -> convert to rational
+    double v = item->valuedouble;
+    if (v <= 0.0) return 0;
+    // Check if it's an integer
+    if (v == (double)(int)v) {
+      *num = (int)v;
+      *den = 1;
+    } else {
+      // Common fractional rates: multiply by 1001 to check for NTSC
+      double v1001 = v * 1001.0;
+      if (fabs(v1001 - round(v1001)) < 0.01) {
+        *num = (int)round(v1001);
+        *den = 1001;
+      } else {
+        // General: use 1000x scale
+        *num = (int)round(v * 1000.0);
+        *den = 1000;
+      }
+    }
+    return 0;
+  }
+  if (cJSON_IsString(item)) {
+    int n = 0, d = 0;
+    if (sscanf(item->valuestring, "%d/%d", &n, &d) == 2 && d > 0) {
+      *num = n;
+      *den = d;
+      return 0;
+    }
+    // Try plain integer string
+    if (sscanf(item->valuestring, "%d", &n) == 1 && n > 0) {
+      *num = n;
+      *den = 1;
+      return 0;
+    }
+    fprintf(stderr, "Error: invalid frame_rate \"%s\"\n", item->valuestring);
+    return -1;
+  }
+  return 0;
+}
+
+// Read entire file into a malloc'd string
+static char *read_file_contents(const char *path) {
+  FILE *f = fopen(path, "rb");
+  if (!f) {
+    fprintf(stderr, "Error: cannot open config file \"%s\"\n", path);
+    return NULL;
+  }
+  fseek(f, 0, SEEK_END);
+  long len = ftell(f);
+  fseek(f, 0, SEEK_SET);
+  if (len <= 0) {
+    fclose(f);
+    fprintf(stderr, "Error: config file \"%s\" is empty\n", path);
+    return NULL;
+  }
+  char *buf = (char *)malloc((size_t)len + 1);
+  if (!buf) {
+    fclose(f);
+    return NULL;
+  }
+  size_t read_len = fread(buf, 1, (size_t)len, f);
+  fclose(f);
+  buf[read_len] = '\0';
+  return buf;
+}
+
+// Parse a single xlayer entry from JSON into XLayerEncConfig
+static int parse_xlayer_entry(const cJSON *entry, XLayerEncConfig *xlcfg) {
+  xlcfg->xlayer_id = json_get_int(entry, "xlayer_id", -1);
+  if (xlcfg->xlayer_id < 0 || xlcfg->xlayer_id > 30) {
+    fprintf(stderr, "Error: xlayer_id must be 0-30, got %d\n",
+            xlcfg->xlayer_id);
+    return -1;
+  }
+
+  const char *input = json_get_string(entry, "input", NULL);
+  if (input) {
+    snprintf(xlcfg->input_filename, PATH_MAX, "%s", input);
+  }
+
+  // Input source reference (for multi-source mode)
+  const char *isrc = json_get_string(entry, "input_source", NULL);
+  if (isrc) {
+    snprintf(xlcfg->input_source_name, MAX_SOURCE_NAME_LEN, "%s", isrc);
+  }
+
+  xlcfg->width = (unsigned int)json_get_int(entry, "width", 0);
+  xlcfg->height = (unsigned int)json_get_int(entry, "height", 0);
+  xlcfg->profile =
+      (unsigned int)json_get_int(entry, "profile", MAIN_420_10_IP1);
+  xlcfg->tier = (unsigned int)json_get_int(entry, "tier", 0);
+  xlcfg->level = (unsigned int)json_get_int(entry, "level", SEQ_LEVEL_4_0);
+
+  const char *lt = json_get_string(entry, "layer_type", "texture");
+  xlcfg->layer_type = parse_layer_type(lt);
+
+  if (xlcfg->layer_type == AUX_LAYER) {
+    const char *at = json_get_string(entry, "auxiliary_type", "alpha");
+    xlcfg->auxiliary_type = parse_auxiliary_type(at);
+  }
+
+  const char *vt = json_get_string(entry, "view_type", NULL);
+  if (vt) xlcfg->view_type = parse_view_type(vt);
+
+  xlcfg->num_temporal_layers = json_get_int(entry, "num_temporal_layers", 1);
+  xlcfg->num_embedded_layers = json_get_int(entry, "num_embedded_layers", 1);
+
+  // Color info
+  xlcfg->color_primaries = json_get_int(entry, "color_primaries", -1);
+  xlcfg->transfer_characteristics =
+      json_get_int(entry, "transfer_characteristics", -1);
+  xlcfg->matrix_coefficients = json_get_int(entry, "matrix_coefficients", -1);
+  xlcfg->full_range_flag = json_get_int(entry, "full_range_flag", -1);
+
+  // Encoder overrides
+  xlcfg->qp = json_get_int(entry, "qp", -1);
+  xlcfg->bitrate = json_get_int(entry, "bitrate", -1);
+  xlcfg->cpu_used = json_get_int(entry, "cpu_used", -1);
+  xlcfg->lag_in_frames = json_get_int(entry, "lag_in_frames", -1);
+
+  // S-Frame parameters
+  xlcfg->sframe_dist = json_get_int(entry, "sframe_dist", -1);
+  xlcfg->sframe_mode = json_get_int(entry, "sframe_mode", -1);
+  xlcfg->sframe_type = json_get_int(entry, "sframe_type", -1);
+
+  // Coding structure overrides
+  xlcfg->kf_max_dist = json_get_int(entry, "kf_max_dist", -1);
+  const char *subgop = json_get_string(entry, "subgop_config", NULL);
+  if (subgop) {
+    snprintf(xlcfg->subgop_config_path, PATH_MAX, "%s", subgop);
+  }
+
+  // GOP mode and overrides
+  const char *gop = json_get_string(entry, "gop_mode", NULL);
+  if (gop) xlcfg->gop_mode = parse_gop_mode(gop);
+  xlcfg->fwd_kf_enabled = json_get_int(entry, "fwd_kf_enabled", -1);
+  xlcfg->enable_keyframe_filtering =
+      json_get_int(entry, "enable_keyframe_filtering", -1);
+  xlcfg->add_sef_for_hidden_frames =
+      json_get_int(entry, "add_sef_for_hidden_frames", -1);
+
+  // Atlas layout position
+  xlcfg->atlas_pos_x = json_get_int(entry, "atlas_pos_x", -1);
+  xlcfg->atlas_pos_y = json_get_int(entry, "atlas_pos_y", -1);
+
+  // Scaling modes for embedded layers (flat array format)
+  const cJSON *scaling =
+      cJSON_GetObjectItemCaseSensitive(entry, "scaling_mode");
+  const cJSON *el_arr =
+      cJSON_GetObjectItemCaseSensitive(entry, "embedded_layers");
+
+  if (cJSON_IsArray(scaling) && cJSON_IsArray(el_arr)) {
+    fprintf(stderr,
+            "Error: xlayer %d has both \"scaling_mode\" array and "
+            "\"embedded_layers\" — these are mutually exclusive\n",
+            xlcfg->xlayer_id);
+    return -1;
+  }
+
+  int scaling_modes_explicit = 0;
+
+  if (cJSON_IsArray(scaling)) {
+    scaling_modes_explicit = 1;
+    int n = cJSON_GetArraySize(scaling);
+    if (n > MAX_NUM_MLAYERS) n = MAX_NUM_MLAYERS;
+    for (int i = 0; i < n; i++) {
+      const cJSON *s = cJSON_GetArrayItem(scaling, i);
+      if (cJSON_IsNumber(s)) {
+        xlcfg->scaling_mode[i] = s->valueint;
+      } else if (cJSON_IsString(s)) {
+        int mode = parse_scaling_mode(s->valuestring);
+        if (mode < 0) {
+          fprintf(stderr, "Error: invalid scaling_mode \"%s\" for xlayer %d\n",
+                  s->valuestring, xlcfg->xlayer_id);
+          return -1;
+        }
+        xlcfg->scaling_mode[i] = mode;
+      }
+    }
+  }
+
+  // Per-embedded-layer configuration (new format)
+  if (cJSON_IsArray(el_arr)) {
+    int n = cJSON_GetArraySize(el_arr);
+    if (n != xlcfg->num_embedded_layers) {
+      fprintf(stderr,
+              "Error: xlayer %d \"embedded_layers\" array length %d does not "
+              "match num_embedded_layers %d\n",
+              xlcfg->xlayer_id, n, xlcfg->num_embedded_layers);
+      return -1;
+    }
+    for (int m = 0; m < n; m++) {
+      const cJSON *el = cJSON_GetArrayItem(el_arr, m);
+      MLayerSourceConfig *ms = &xlcfg->mlayer_sources[m];
+
+      // scaling_mode per embedded layer
+      const cJSON *sm_item =
+          cJSON_GetObjectItemCaseSensitive(el, "scaling_mode");
+      if (sm_item != NULL) scaling_modes_explicit = 1;
+      if (cJSON_IsNumber(sm_item)) {
+        xlcfg->scaling_mode[m] = sm_item->valueint;
+      } else if (cJSON_IsString(sm_item)) {
+        int mode = parse_scaling_mode(sm_item->valuestring);
+        if (mode < 0) {
+          fprintf(
+              stderr,
+              "Error: invalid scaling_mode \"%s\" for xlayer %d mlayer %d\n",
+              sm_item->valuestring, xlcfg->xlayer_id, m);
+          return -1;
+        }
+        xlcfg->scaling_mode[m] = mode;
+      }
+
+      // input_source
+      const char *ml_isrc = json_get_string(el, "input_source", NULL);
+      if (ml_isrc)
+        snprintf(ms->input_source_name, MAX_SOURCE_NAME_LEN, "%s", ml_isrc);
+
+      // crop coordinates
+      ms->atlas_pos_x = json_get_int(el, "atlas_pos_x", -1);
+      ms->atlas_pos_y = json_get_int(el, "atlas_pos_y", -1);
+      ms->width = (unsigned int)json_get_int(el, "width", 0);
+      ms->height = (unsigned int)json_get_int(el, "height", 0);
+
+      // depends_on → dependency_mask
+      const cJSON *deps = cJSON_GetObjectItemCaseSensitive(el, "depends_on");
+      if (cJSON_IsArray(deps)) {
+        ms->dependency_mask = 0;
+        const cJSON *dep = NULL;
+        cJSON_ArrayForEach(dep, deps) {
+          if (cJSON_IsNumber(dep) && dep->valueint >= 0 && dep->valueint < m) {
+            ms->dependency_mask |= (1 << dep->valueint);
+          } else if (cJSON_IsNumber(dep) && dep->valueint >= m) {
+            fprintf(stderr,
+                    "Error: xlayer %d mlayer %d depends_on[%d] >= self\n",
+                    xlcfg->xlayer_id, m, dep->valueint);
+            return -1;
+          }
+        }
+        xlcfg->has_mlayer_dependencies = 1;
+      }
+
+      // Content Interpretation overrides (inherit from xlayer if omitted)
+      ms->color_primaries = json_get_int(el, "color_primaries", -1);
+      ms->transfer_characteristics =
+          json_get_int(el, "transfer_characteristics", -1);
+      ms->matrix_coefficients = json_get_int(el, "matrix_coefficients", -1);
+      ms->full_range_flag = json_get_int(el, "full_range_flag", -1);
+
+      // Warn about unknown keys in this embedded layer entry
+      {
+        static const char *const el_known[] = {
+          "scaling_mode",        "input_source",    "atlas_pos_x",
+          "atlas_pos_y",         "width",           "height",
+          "depends_on",          "color_primaries", "transfer_characteristics",
+          "matrix_coefficients", "full_range_flag",
+        };
+        char el_section[64];
+        snprintf(el_section, sizeof(el_section),
+                 "xlayer %d embedded_layers[%d]", xlcfg->xlayer_id, m);
+        warn_unknown_keys(el, el_known, sizeof(el_known) / sizeof(el_known[0]),
+                          el_section);
+      }
+
+      if (ms->input_source_name[0] || ms->atlas_pos_x >= 0 || ms->width > 0)
+        xlcfg->has_per_mlayer_sources = 1;
+    }
+  }
+
+  // Parse generic codec controls array: [["name", value], ...]
+  const cJSON *cc_arr =
+      cJSON_GetObjectItemCaseSensitive(entry, "codec_controls");
+  if (cJSON_IsArray(cc_arr)) {
+    int n = cJSON_GetArraySize(cc_arr);
+    if (n > MAX_CODEC_CONTROLS) {
+      fprintf(stderr, "Error: xlayer %d has %d codec_controls (max %d)\n",
+              xlcfg->xlayer_id, n, MAX_CODEC_CONTROLS);
+      return -1;
+    }
+    xlcfg->num_codec_controls = n;
+    for (int c = 0; c < n; c++) {
+      const cJSON *pair = cJSON_GetArrayItem(cc_arr, c);
+      if (!cJSON_IsArray(pair) || cJSON_GetArraySize(pair) != 2) {
+        fprintf(stderr,
+                "Error: xlayer %d codec_controls[%d] must be [\"name\", "
+                "value]\n",
+                xlcfg->xlayer_id, c);
+        return -1;
+      }
+      const cJSON *name_item = cJSON_GetArrayItem(pair, 0);
+      const cJSON *val_item = cJSON_GetArrayItem(pair, 1);
+      if (!cJSON_IsString(name_item) || !cJSON_IsNumber(val_item)) {
+        fprintf(stderr,
+                "Error: xlayer %d codec_controls[%d] must be [string, "
+                "number]\n",
+                xlcfg->xlayer_id, c);
+        return -1;
+      }
+      snprintf(xlcfg->codec_controls[c].name, 64, "%s", name_item->valuestring);
+      xlcfg->codec_controls[c].value = val_item->valueint;
+    }
+  }
+
+  // Derive default scaling modes when num_embedded_layers > 1 and none
+  // specified (all zeros)
+  if (!scaling_modes_explicit && xlcfg->num_embedded_layers > 1) {
+    int all_zero = 1;
+    for (int i = 0; i < xlcfg->num_embedded_layers; i++) {
+      if (xlcfg->scaling_mode[i] != 0) {
+        all_zero = 0;
+        break;
+      }
+    }
+    if (all_zero) {
+      // Default: smallest to full-res. Last layer is always AVME_NORMAL (0).
+      if (xlcfg->num_embedded_layers == 2) {
+        xlcfg->scaling_mode[0] = AVME_ONETWO;
+      } else if (xlcfg->num_embedded_layers >= 3) {
+        xlcfg->scaling_mode[0] = AVME_ONEFOUR;
+        xlcfg->scaling_mode[1] = AVME_ONETWO;
+      }
+    }
+  }
+
+  // Warn about unknown keys
+  {
+    static const char *const known[] = {
+      "xlayer_id",
+      "input",
+      "input_source",
+      "width",
+      "height",
+      "profile",
+      "tier",
+      "level",
+      "layer_type",
+      "auxiliary_type",
+      "view_type",
+      "num_temporal_layers",
+      "num_embedded_layers",
+      "color_primaries",
+      "transfer_characteristics",
+      "matrix_coefficients",
+      "full_range_flag",
+      "qp",
+      "bitrate",
+      "cpu_used",
+      "lag_in_frames",
+      "sframe_dist",
+      "sframe_mode",
+      "sframe_type",
+      "kf_max_dist",
+      "subgop_config",
+      "gop_mode",
+      "fwd_kf_enabled",
+      "enable_keyframe_filtering",
+      "add_sef_for_hidden_frames",
+      "atlas_pos_x",
+      "atlas_pos_y",
+      "scaling_mode",
+      "embedded_layers",
+      "codec_controls",
+    };
+    char section[64];
+    snprintf(section, sizeof(section), "xlayer %d", xlcfg->xlayer_id);
+    warn_unknown_keys(entry, known, sizeof(known) / sizeof(known[0]), section);
+  }
+
+  return 0;
+}
+
+// Parse operating point xlayer_map from JSON array of xlayer IDs to bitmask
+static uint32_t parse_xlayer_map_array(const cJSON *arr) {
+  uint32_t map = 0;
+  if (!cJSON_IsArray(arr)) return 0;
+  const cJSON *elem = NULL;
+  cJSON_ArrayForEach(elem, arr) {
+    if (cJSON_IsNumber(elem) && elem->valueint >= 0 &&
+        elem->valueint < (int)MAX_NUM_XLAYERS) {
+      map |= (1u << (unsigned int)elem->valueint);
+    }
+  }
+  return map;
+}
+
+// Parse a single OPS set from JSON
+static int parse_ops_entry(const cJSON *entry, OPSConfig *ops_cfg) {
+  ops_cfg->enable = 1;
+  ops_cfg->ops_id = json_get_int(entry, "ops_id", 0);
+  ops_cfg->priority = json_get_int(entry, "priority", 0);
+  ops_cfg->intent_present_flag = json_get_bool(entry, "intent_present", 1);
+  ops_cfg->ptl_present_flag = json_get_bool(entry, "ptl_present", 1);
+  ops_cfg->color_info_present_flag =
+      json_get_bool(entry, "color_info_present", 0);
+  ops_cfg->mlayer_info_idc = json_get_int(entry, "mlayer_info_idc", 0);
+
+  const cJSON *op_arr =
+      cJSON_GetObjectItemCaseSensitive(entry, "operating_points");
+  if (!cJSON_IsArray(op_arr)) {
+    fprintf(stderr, "Error: OPS %d missing \"operating_points\" array\n",
+            ops_cfg->ops_id);
+    return -1;
+  }
+
+  ops_cfg->num_operating_points = cJSON_GetArraySize(op_arr);
+  if (ops_cfg->num_operating_points > MAX_OPS_COUNT) {
+    fprintf(stderr, "Error: OPS %d has %d operating points (max %d)\n",
+            ops_cfg->ops_id, ops_cfg->num_operating_points, MAX_OPS_COUNT);
+    return -1;
+  }
+
+  for (int i = 0; i < ops_cfg->num_operating_points; i++) {
+    const cJSON *op_entry = cJSON_GetArrayItem(op_arr, i);
+    OperatingPointConfig *op = &ops_cfg->ops[i];
+
+    op->intent = json_get_int(op_entry, "intent", 0);
+    const cJSON *xmap =
+        cJSON_GetObjectItemCaseSensitive(op_entry, "xlayer_map");
+    op->xlayer_map = parse_xlayer_map_array(xmap);
+
+    // Per-xlayer overrides within this OP
+    const cJSON *ml =
+        cJSON_GetObjectItemCaseSensitive(op_entry, "mlayer_count");
+    if (cJSON_IsArray(ml)) {
+      int n = cJSON_GetArraySize(ml);
+      for (int j = 0; j < n && j < MAX_NUM_XLAYERS; j++) {
+        const cJSON *v = cJSON_GetArrayItem(ml, j);
+        if (cJSON_IsNumber(v)) op->mlayer_count[j] = v->valueint;
+      }
+    }
+
+    op->aggregate_level_idx = json_get_int(op_entry, "aggregate_level_idx", -1);
+    op->max_tier_flag = json_get_int(op_entry, "max_tier_flag", -1);
+
+    // Warn about unknown keys in this operating point
+    {
+      static const char *const op_known[] = {
+        "intent",        "xlayer_map", "mlayer_count", "aggregate_level_idx",
+        "max_tier_flag",
+      };
+      char op_section[64];
+      snprintf(op_section, sizeof(op_section), "ops %d operating_points[%d]",
+               ops_cfg->ops_id, i);
+      warn_unknown_keys(op_entry, op_known,
+                        sizeof(op_known) / sizeof(op_known[0]), op_section);
+    }
+  }
+
+  // Warn about unknown keys in this OPS entry
+  {
+    static const char *const known[] = {
+      "ops_id",           "priority",           "intent_present",
+      "ptl_present",      "color_info_present", "mlayer_info_idc",
+      "operating_points",
+    };
+    char section[64];
+    snprintf(section, sizeof(section), "ops %d", ops_cfg->ops_id);
+    warn_unknown_keys(entry, known, sizeof(known) / sizeof(known[0]), section);
+  }
+
+  return 0;
+}
+
+int parse_multi_xlayer_config(const char *json_path, MultiXLayerConfig *cfg) {
+  xlayer_config_init(cfg);
+
+  char *json_str = read_file_contents(json_path);
+  if (!json_str) return -1;
+
+  cJSON *root = cJSON_Parse(json_str);
+  free(json_str);
+  if (!root) {
+    fprintf(stderr, "Error: failed to parse JSON in \"%s\"\n", json_path);
+    return -1;
+  }
+
+  // Parse xlayers array
+  const cJSON *xlayers = cJSON_GetObjectItemCaseSensitive(root, "xlayers");
+  if (!cJSON_IsArray(xlayers)) {
+    fprintf(stderr, "Error: config missing \"xlayers\" array\n");
+    cJSON_Delete(root);
+    return -1;
+  }
+
+  cfg->num_xlayers = cJSON_GetArraySize(xlayers);
+  if (cfg->num_xlayers < 1 || cfg->num_xlayers > MAX_NUM_XLAYERS - 1) {
+    fprintf(stderr, "Error: num_xlayers %d out of range (1-%d)\n",
+            cfg->num_xlayers, MAX_NUM_XLAYERS - 1);
+    cJSON_Delete(root);
+    return -1;
+  }
+
+  for (int i = 0; i < cfg->num_xlayers; i++) {
+    const cJSON *entry = cJSON_GetArrayItem(xlayers, i);
+    if (parse_xlayer_entry(entry, &cfg->xlayers[i]) != 0) {
+      cJSON_Delete(root);
+      return -1;
+    }
+  }
+
+  // Parse global_lcr
+  const cJSON *lcr = cJSON_GetObjectItemCaseSensitive(root, "global_lcr");
+  if (cJSON_IsObject(lcr)) {
+    cfg->enable_global_lcr = json_get_bool(lcr, "enable", 1);
+    cfg->lcr_purpose_id = json_get_int(lcr, "purpose_id", 0);
+    cfg->lcr_dependent_xlayers_flag =
+        json_get_bool(lcr, "dependent_xlayers", 0);
+    cfg->lcr_doh_constraint_flag = json_get_bool(lcr, "doh_constraint", 1);
+    static const char *const lcr_known[] = {
+      "enable",
+      "purpose_id",
+      "dependent_xlayers",
+      "doh_constraint",
+    };
+    warn_unknown_keys(lcr, lcr_known, sizeof(lcr_known) / sizeof(lcr_known[0]),
+                      "global_lcr");
+  }
+
+  // Parse local_lcr
+  const cJSON *local_lcr = cJSON_GetObjectItemCaseSensitive(root, "local_lcr");
+  if (cJSON_IsObject(local_lcr)) {
+    cfg->enable_local_lcr = json_get_bool(local_lcr, "enable", 0);
+    const char *mode_str = json_get_string(local_lcr, "mode", "both");
+    if (strcmp(mode_str, "local_only") == 0)
+      cfg->local_lcr_mode = 1;
+    else
+      cfg->local_lcr_mode = 0;
+    static const char *const ll_known[] = { "enable", "mode" };
+    warn_unknown_keys(local_lcr, ll_known,
+                      sizeof(ll_known) / sizeof(ll_known[0]), "local_lcr");
+  }
+
+  // Parse msdo
+  const cJSON *msdo = cJSON_GetObjectItemCaseSensitive(root, "msdo");
+  if (cJSON_IsObject(msdo)) {
+    cfg->enable_msdo = json_get_bool(msdo, "enable", 0);
+    static const char *const msdo_known[] = { "enable" };
+    warn_unknown_keys(msdo, msdo_known,
+                      sizeof(msdo_known) / sizeof(msdo_known[0]), "msdo");
+  }
+
+  // Parse ops array
+  const cJSON *ops_arr = cJSON_GetObjectItemCaseSensitive(root, "ops");
+  if (cJSON_IsArray(ops_arr)) {
+    cfg->num_ops_sets = cJSON_GetArraySize(ops_arr);
+    if (cfg->num_ops_sets > MAX_NUM_OPS_ID) cfg->num_ops_sets = MAX_NUM_OPS_ID;
+    for (int i = 0; i < cfg->num_ops_sets; i++) {
+      const cJSON *entry = cJSON_GetArrayItem(ops_arr, i);
+      if (parse_ops_entry(entry, &cfg->ops_sets[i]) != 0) {
+        cJSON_Delete(root);
+        return -1;
+      }
+    }
+  }
+
+  // Parse atlas
+  const cJSON *atlas = cJSON_GetObjectItemCaseSensitive(root, "atlas");
+  if (cJSON_IsObject(atlas)) {
+    cfg->enable_atlas = json_get_bool(atlas, "enable", 0);
+    cfg->atlas_mode = json_get_int(atlas, "mode", 0);
+    cfg->atlas_width = json_get_int(atlas, "width", 0);
+    cfg->atlas_height = json_get_int(atlas, "height", 0);
+    cfg->atlas_uniform_spacing = json_get_bool(atlas, "uniform_spacing", 1);
+    static const char *const atlas_known[] = {
+      "enable", "mode", "width", "height", "uniform_spacing",
+    };
+    warn_unknown_keys(atlas, atlas_known,
+                      sizeof(atlas_known) / sizeof(atlas_known[0]), "atlas");
+  }
+
+  // Parse named input sources array (new format)
+  const cJSON *inputs = cJSON_GetObjectItemCaseSensitive(root, "inputs");
+  if (cJSON_IsArray(inputs)) {
+    int n = cJSON_GetArraySize(inputs);
+    if (n > MAX_INPUT_SOURCES) {
+      fprintf(stderr, "Error: too many input sources (%d > %d)\n", n,
+              MAX_INPUT_SOURCES);
+      cJSON_Delete(root);
+      return -1;
+    }
+    cfg->num_input_sources = n;
+    for (int i = 0; i < n; i++) {
+      const cJSON *inp = cJSON_GetArrayItem(inputs, i);
+      InputSourceConfig *src = &cfg->input_sources[i];
+      const char *name = json_get_string(inp, "name", NULL);
+      if (name) snprintf(src->name, MAX_SOURCE_NAME_LEN, "%s", name);
+      const char *fn = json_get_string(inp, "filename", NULL);
+      if (fn) snprintf(src->filename, PATH_MAX, "%s", fn);
+      src->width = (unsigned int)json_get_int(inp, "width", 0);
+      src->height = (unsigned int)json_get_int(inp, "height", 0);
+      const char *fmt = json_get_string(inp, "format", NULL);
+      src->format = parse_chroma_format(fmt);
+      src->bit_depth = json_get_int(inp, "bit_depth", 0);
+      if (json_parse_frame_rate(inp, "frame_rate", &src->frame_rate_num,
+                                &src->frame_rate_den) != 0) {
+        cJSON_Delete(root);
+        return -1;
+      }
+      static const char *const inp_known[] = {
+        "name",   "filename",  "width",      "height",
+        "format", "bit_depth", "frame_rate",
+      };
+      char inp_section[64];
+      snprintf(inp_section, sizeof(inp_section), "inputs[%d]", i);
+      warn_unknown_keys(inp, inp_known,
+                        sizeof(inp_known) / sizeof(inp_known[0]), inp_section);
+    }
+  }
+
+  // Parse shared source (legacy single-source format)
+  const cJSON *source = cJSON_GetObjectItemCaseSensitive(root, "source");
+  if (cJSON_IsObject(source)) {
+    if (cfg->num_input_sources > 0) {
+      fprintf(stderr, "Error: cannot specify both \"inputs\" and \"source\"\n");
+      cJSON_Delete(root);
+      return -1;
+    }
+    const char *src_file = json_get_string(source, "filename", NULL);
+    if (src_file) {
+      snprintf(cfg->source_filename, PATH_MAX, "%s", src_file);
+    }
+    cfg->source_width = (unsigned int)json_get_int(source, "width", 0);
+    cfg->source_height = (unsigned int)json_get_int(source, "height", 0);
+    // Convert to input_sources[0] for unified handling
+    cfg->num_input_sources = 1;
+    InputSourceConfig *src = &cfg->input_sources[0];
+    snprintf(src->name, MAX_SOURCE_NAME_LEN, "default");
+    if (src_file) snprintf(src->filename, PATH_MAX, "%s", src_file);
+    src->width = cfg->source_width;
+    src->height = cfg->source_height;
+    static const char *const src_known[] = { "filename", "width", "height" };
+    warn_unknown_keys(source, src_known,
+                      sizeof(src_known) / sizeof(src_known[0]), "source");
+  }
+
+  // Parse bitstream options
+  cfg->combined_tu = json_get_bool(root, "combined_tu", 1);
+  cfg->monotonic_output_order =
+      json_get_bool(root, "monotonic_output_order", 0);
+
+  // Parse frame rate (used for aggregate level derivation)
+  {
+    const cJSON *fps = cJSON_GetObjectItemCaseSensitive(root, "frame_rate");
+    if (cJSON_IsNumber(fps)) cfg->frame_rate = fps->valuedouble;
+  }
+
+  // Parse limit (max frames to encode)
+  cfg->limit = json_get_int(root, "limit", 0);
+
+  const char *output = json_get_string(root, "output", NULL);
+  if (output) {
+    snprintf(cfg->output_filename, PATH_MAX, "%s", output);
+  }
+
+  // Warn about unknown root-level keys
+  {
+    static const char *const known[] = {
+      "xlayers",     "global_lcr",
+      "local_lcr",   "msdo",
+      "ops",         "atlas",
+      "inputs",      "source",
+      "combined_tu", "monotonic_output_order",
+      "frame_rate",  "limit",
+      "output",
+    };
+    warn_unknown_keys(root, known, sizeof(known) / sizeof(known[0]), "root");
+  }
+
+  cJSON_Delete(root);
+  return 0;
+}
+
+// Look up an input source by name. Returns its index, or -1 if not found.
+static int find_input_source_by_name(const MultiXLayerConfig *cfg,
+                                     const char *name) {
+  for (int s = 0; s < cfg->num_input_sources; s++) {
+    if (strcmp(name, cfg->input_sources[s].name) == 0) return s;
+  }
+  return -1;
+}
+
+int resolve_input_sources(MultiXLayerConfig *cfg) {
+  for (int i = 0; i < cfg->num_xlayers; i++) {
+    XLayerEncConfig *xl = &cfg->xlayers[i];
+    xl->input_source_idx = -1;  // default: own file
+
+    // Skip xlayers with their own input file
+    if (xl->input_filename[0] != '\0') continue;
+
+    if (xl->input_source_name[0] != '\0') {
+      // Explicit source reference — look up by name
+      xl->input_source_idx =
+          find_input_source_by_name(cfg, xl->input_source_name);
+      if (xl->input_source_idx < 0) {
+        fprintf(stderr,
+                "Error: xlayer %d references unknown input_source \"%s\"\n",
+                xl->xlayer_id, xl->input_source_name);
+        return -1;
+      }
+    } else if (cfg->num_input_sources == 1) {
+      // Single input source — all unassigned xlayers use it
+      xl->input_source_idx = 0;
+    } else if (cfg->num_input_sources > 1) {
+      fprintf(stderr,
+              "Error: xlayer %d has no input or input_source, and multiple "
+              "input sources are defined\n",
+              xl->xlayer_id);
+      return -1;
+    }
+  }
+
+  // Resolve per-mlayer input sources
+  for (int i = 0; i < cfg->num_xlayers; i++) {
+    XLayerEncConfig *xl = &cfg->xlayers[i];
+    if (!xl->has_per_mlayer_sources) continue;
+    for (int m = 0; m < xl->num_embedded_layers; m++) {
+      MLayerSourceConfig *ms = &xl->mlayer_sources[m];
+      if (ms->input_source_name[0] == '\0') {
+        // Inherit from xlayer
+        ms->input_source_idx = xl->input_source_idx;
+        if (ms->atlas_pos_x < 0) ms->atlas_pos_x = xl->atlas_pos_x;
+        if (ms->atlas_pos_y < 0) ms->atlas_pos_y = xl->atlas_pos_y;
+        if (ms->width == 0) ms->width = xl->width;
+        if (ms->height == 0) ms->height = xl->height;
+        continue;
+      }
+      // Look up source by name
+      ms->input_source_idx =
+          find_input_source_by_name(cfg, ms->input_source_name);
+      if (ms->input_source_idx < 0) {
+        fprintf(stderr,
+                "Error: xlayer %d mlayer %d references unknown input_source "
+                "\"%s\"\n",
+                xl->xlayer_id, m, ms->input_source_name);
+        return -1;
+      }
+      // Fill in crop defaults if not specified
+      if (ms->atlas_pos_x < 0) ms->atlas_pos_x = 0;
+      if (ms->atlas_pos_y < 0) ms->atlas_pos_y = 0;
+    }
+  }
+
+  // Resolve frame_skip for each input source based on frame rates.
+  // The master rate is the highest frame rate among all sources.
+  // Each source's rate must be an exact integer divisor of the master rate.
+  // Frame rates are rational (num/den) for exact arithmetic.
+  {
+    // Find the maximum frame rate using rational comparison: a/b > c/d iff
+    // a*d > c*b
+    int max_idx = -1;
+    for (int s = 0; s < cfg->num_input_sources; s++) {
+      if (cfg->input_sources[s].frame_rate_num <= 0) continue;
+      if (max_idx < 0) {
+        max_idx = s;
+      } else {
+        int64_t lhs = (int64_t)cfg->input_sources[s].frame_rate_num *
+                      cfg->input_sources[max_idx].frame_rate_den;
+        int64_t rhs = (int64_t)cfg->input_sources[max_idx].frame_rate_num *
+                      cfg->input_sources[s].frame_rate_den;
+        if (lhs > rhs) max_idx = s;
+      }
+    }
+
+    if (max_idx >= 0) {
+      int max_n = cfg->input_sources[max_idx].frame_rate_num;
+      int max_d = cfg->input_sources[max_idx].frame_rate_den;
+
+      for (int s = 0; s < cfg->num_input_sources; s++) {
+        int src_n = cfg->input_sources[s].frame_rate_num;
+        int src_d = cfg->input_sources[s].frame_rate_den;
+        if (src_n <= 0) {
+          // Unspecified — assume master rate
+          cfg->input_sources[s].frame_skip = 1;
+          continue;
+        }
+        // Ratio = (max_n/max_d) / (src_n/src_d) = (max_n * src_d) /
+        //         (max_d * src_n)
+        int64_t ratio_num = (int64_t)max_n * src_d;
+        int64_t ratio_den = (int64_t)max_d * src_n;
+        // Must be an exact integer (ratio_den divides ratio_num evenly)
+        if (ratio_den == 0 || ratio_num % ratio_den != 0) {
+          fprintf(stderr,
+                  "Error: input source \"%s\" frame_rate %d/%d is not an "
+                  "exact divisor of the max frame_rate %d/%d\n",
+                  cfg->input_sources[s].name, src_n, src_d, max_n, max_d);
+          return -1;
+        }
+        int skip = (int)(ratio_num / ratio_den);
+        if (skip < 1) {
+          fprintf(stderr,
+                  "Error: input source \"%s\" frame_rate %d/%d exceeds the "
+                  "max frame_rate %d/%d\n",
+                  cfg->input_sources[s].name, src_n, src_d, max_n, max_d);
+          return -1;
+        }
+        cfg->input_sources[s].frame_skip = skip;
+      }
+    } else {
+      // No frame rates specified — all sources run at same rate
+      for (int s = 0; s < cfg->num_input_sources; s++)
+        cfg->input_sources[s].frame_skip = 1;
+    }
+  }
+
+  return 0;
+}
+
+// Resolve per-mlayer CI inheritance: if an mlayer's CI field is -1, inherit
+// from the parent xlayer's value. Must be called after resolve_input_sources().
+void resolve_mlayer_ci(MultiXLayerConfig *cfg) {
+  for (int i = 0; i < cfg->num_xlayers; i++) {
+    XLayerEncConfig *xl = &cfg->xlayers[i];
+    for (int m = 0; m < xl->num_embedded_layers; m++) {
+      MLayerSourceConfig *ms = &xl->mlayer_sources[m];
+      if (ms->color_primaries == -1) ms->color_primaries = xl->color_primaries;
+      if (ms->transfer_characteristics == -1)
+        ms->transfer_characteristics = xl->transfer_characteristics;
+      if (ms->matrix_coefficients == -1)
+        ms->matrix_coefficients = xl->matrix_coefficients;
+      if (ms->full_range_flag == -1) ms->full_range_flag = xl->full_range_flag;
+    }
+  }
+}
+
+int validate_multi_xlayer_config(const MultiXLayerConfig *cfg) {
+  if (cfg->num_xlayers < 1) {
+    fprintf(stderr, "Error: must have at least 1 xlayer\n");
+    return -1;
+  }
+
+  // Check xlayer_ids are unique and in range
+  int seen[MAX_NUM_XLAYERS] = { 0 };
+  for (int i = 0; i < cfg->num_xlayers; i++) {
+    int id = cfg->xlayers[i].xlayer_id;
+    if (id < 0 || id > 30) {
+      fprintf(stderr, "Error: xlayer %d has invalid xlayer_id %d\n", i, id);
+      return -1;
+    }
+    if (seen[id]) {
+      fprintf(stderr, "Error: duplicate xlayer_id %d\n", id);
+      return -1;
+    }
+    seen[id] = 1;
+  }
+
+  // Validate input source names are unique and non-empty
+  for (int i = 0; i < cfg->num_input_sources; i++) {
+    if (cfg->input_sources[i].name[0] == '\0') {
+      fprintf(stderr, "Error: input source %d has no name\n", i);
+      return -1;
+    }
+    for (int j = i + 1; j < cfg->num_input_sources; j++) {
+      if (strcmp(cfg->input_sources[i].name, cfg->input_sources[j].name) == 0) {
+        fprintf(stderr, "Error: duplicate input source name \"%s\"\n",
+                cfg->input_sources[i].name);
+        return -1;
+      }
+    }
+  }
+
+  // Validate each xlayer has input (or input source) and dimensions
+  int has_shared_source =
+      (cfg->source_filename[0] != '\0' || cfg->num_input_sources > 0);
+  for (int i = 0; i < cfg->num_xlayers; i++) {
+    const XLayerEncConfig *xl = &cfg->xlayers[i];
+    if (xl->input_filename[0] == '\0' && xl->input_source_idx < 0 &&
+        !has_shared_source) {
+      fprintf(stderr,
+              "Error: xlayer %d missing input filename and no shared source\n",
+              xl->xlayer_id);
+      return -1;
+    }
+    // When using a shared/named input source, atlas positions and dimensions
+    // are required
+    if (xl->input_source_idx >= 0) {
+      if (xl->atlas_pos_x < 0 || xl->atlas_pos_y < 0) {
+        fprintf(stderr,
+                "Error: xlayer %d requires atlas_pos_x/y when using input "
+                "source\n",
+                xl->xlayer_id);
+        return -1;
+      }
+      if (xl->width == 0 || xl->height == 0) {
+        fprintf(stderr,
+                "Error: xlayer %d requires width/height when using input "
+                "source\n",
+                xl->xlayer_id);
+        return -1;
+      }
+    }
+  }
+
+  // Per-source-group chroma validation: xlayers sharing the same input source
+  // must have the same chroma format (profile determines chroma)
+  for (int s = 0; s < cfg->num_input_sources; s++) {
+    int ref_chroma = -1;
+    int ref_xlayer_id = -1;
+    unsigned int ref_profile = 0;
+    for (int i = 0; i < cfg->num_xlayers; i++) {
+      if (cfg->xlayers[i].input_source_idx != s) continue;
+      int chroma = (cfg->xlayers[i].profile <= MAIN_420_10_IP1)   ? 0
+                   : (cfg->xlayers[i].profile == MAIN_422_10_IP1) ? 1
+                                                                  : 2;
+      if (ref_chroma < 0) {
+        ref_chroma = chroma;
+        ref_xlayer_id = cfg->xlayers[i].xlayer_id;
+        ref_profile = cfg->xlayers[i].profile;
+      } else if (chroma != ref_chroma) {
+        fprintf(stderr,
+                "Error: xlayers sharing input source \"%s\" must use the "
+                "same chroma format (xlayer %d profile %u vs xlayer %d "
+                "profile %u)\n",
+                cfg->input_sources[s].name, ref_xlayer_id, ref_profile,
+                cfg->xlayers[i].xlayer_id, cfg->xlayers[i].profile);
+        return -1;
+      }
+    }
+  }
+
+  // Validate OPS operating points reference valid xlayer_ids
+  for (int s = 0; s < cfg->num_ops_sets; s++) {
+    const OPSConfig *ops = &cfg->ops_sets[s];
+    if (!ops->enable) continue;
+    for (int p = 0; p < ops->num_operating_points; p++) {
+      uint32_t xmap = ops->ops[p].xlayer_map;
+      for (int bit = 0; bit < 31; bit++) {
+        if (xmap & (1u << bit)) {
+          if (!seen[bit]) {
+            fprintf(stderr,
+                    "Error: OPS %d OP %d references xlayer_id %d which is "
+                    "not in the config\n",
+                    ops->ops_id, p, bit);
+            return -1;
+          }
+        }
+      }
+    }
+  }
+
+  // When monotonic_output_order is disabled, all xlayers must use the same
+  // coding structure (temporal layers, lag-in-frames, keyframe interval,
+  // sub-GOP config, and GOP mode) so that their output ordering is
+  // synchronized.
+  if (!cfg->monotonic_output_order && cfg->num_xlayers > 1) {
+    const XLayerEncConfig *ref = &cfg->xlayers[0];
+    for (int i = 1; i < cfg->num_xlayers; i++) {
+      const XLayerEncConfig *xl = &cfg->xlayers[i];
+      if (xl->num_temporal_layers != ref->num_temporal_layers) {
+        fprintf(stderr,
+                "Error: monotonic_output_order=0 requires all xlayers to use "
+                "the same num_temporal_layers (xlayer %d has %d, xlayer %d has "
+                "%d)\n",
+                ref->xlayer_id, ref->num_temporal_layers, xl->xlayer_id,
+                xl->num_temporal_layers);
+        return -1;
+      }
+      if (xl->lag_in_frames != ref->lag_in_frames) {
+        fprintf(stderr,
+                "Error: monotonic_output_order=0 requires all xlayers to use "
+                "the same lag_in_frames (xlayer %d has %d, xlayer %d has %d)\n",
+                ref->xlayer_id, ref->lag_in_frames, xl->xlayer_id,
+                xl->lag_in_frames);
+        return -1;
+      }
+      if (xl->kf_max_dist != ref->kf_max_dist) {
+        fprintf(stderr,
+                "Error: monotonic_output_order=0 requires all xlayers to use "
+                "the same kf_max_dist (xlayer %d has %d, xlayer %d has %d)\n",
+                ref->xlayer_id, ref->kf_max_dist, xl->xlayer_id,
+                xl->kf_max_dist);
+        return -1;
+      }
+      if (strcmp(xl->subgop_config_path, ref->subgop_config_path) != 0) {
+        fprintf(stderr,
+                "Error: monotonic_output_order=0 requires all xlayers to use "
+                "the same subgop_config (xlayer %d has \"%s\", xlayer %d has "
+                "\"%s\")\n",
+                ref->xlayer_id, ref->subgop_config_path, xl->xlayer_id,
+                xl->subgop_config_path);
+        return -1;
+      }
+      if (xl->gop_mode != ref->gop_mode) {
+        fprintf(stderr,
+                "Error: monotonic_output_order=0 requires all xlayers to use "
+                "the same gop_mode (xlayer %d has %d, xlayer %d has %d)\n",
+                ref->xlayer_id, ref->gop_mode, xl->xlayer_id, xl->gop_mode);
+        return -1;
+      }
+    }
+  }
+
+  // Validate embedded layer configuration
+  for (int i = 0; i < cfg->num_xlayers; i++) {
+    const XLayerEncConfig *xl = &cfg->xlayers[i];
+    if (xl->num_embedded_layers < 1 ||
+        xl->num_embedded_layers > MAX_NUM_MLAYERS) {
+      fprintf(stderr,
+              "Error: xlayer %d num_embedded_layers %d out of range (1-%d)\n",
+              xl->xlayer_id, xl->num_embedded_layers, MAX_NUM_MLAYERS);
+      return -1;
+    }
+    if (xl->num_embedded_layers > 1) {
+      // Last layer must be full-resolution (AVME_NORMAL = 0)
+      if (xl->scaling_mode[xl->num_embedded_layers - 1] != AVME_NORMAL) {
+        fprintf(stderr,
+                "Error: xlayer %d scaling_mode[%d] must be 0 (full-res) for "
+                "the last embedded layer\n",
+                xl->xlayer_id, xl->num_embedded_layers - 1);
+        return -1;
+      }
+      // Validate all scaling mode values are in range
+      for (int m = 0; m < xl->num_embedded_layers; m++) {
+        if (xl->scaling_mode[m] < AVME_NORMAL ||
+            xl->scaling_mode[m] > AVME_ONETWO) {
+          fprintf(stderr,
+                  "Error: xlayer %d scaling_mode[%d]=%d out of range (0-%d)\n",
+                  xl->xlayer_id, m, xl->scaling_mode[m], AVME_ONETWO);
+          return -1;
+        }
+      }
+    }
+  }
+
+  // Validate per-embedded-layer source configuration
+  for (int i = 0; i < cfg->num_xlayers; i++) {
+    const XLayerEncConfig *xl = &cfg->xlayers[i];
+    if (!xl->has_per_mlayer_sources && !xl->has_mlayer_dependencies) continue;
+    for (int m = 0; m < xl->num_embedded_layers; m++) {
+      const MLayerSourceConfig *ms = &xl->mlayer_sources[m];
+      // Per-mlayer source requires width, height, and crop coordinates
+      if (ms->input_source_name[0] != '\0' || ms->input_source_idx >= 0) {
+        if (ms->width == 0 || ms->height == 0) {
+          fprintf(stderr,
+                  "Error: xlayer %d mlayer %d requires width/height when "
+                  "using per-mlayer input source\n",
+                  xl->xlayer_id, m);
+          return -1;
+        }
+        if (ms->atlas_pos_x < 0 || ms->atlas_pos_y < 0) {
+          fprintf(stderr,
+                  "Error: xlayer %d mlayer %d requires atlas_pos_x/y when "
+                  "using per-mlayer input source\n",
+                  xl->xlayer_id, m);
+          return -1;
+        }
+      }
+      // dependency_mask validation
+      if (ms->dependency_mask >= 0) {
+        if (m == 0 && ms->dependency_mask != 0) {
+          fprintf(stderr,
+                  "Error: xlayer %d mlayer 0 cannot depend on any lower "
+                  "mlayer (depends_on must be empty)\n",
+                  xl->xlayer_id);
+          return -1;
+        }
+        // Check all set bits reference valid lower mlayer indices
+        for (int j = m; j < MAX_NUM_MLAYERS; j++) {
+          if (ms->dependency_mask & (1 << j)) {
+            fprintf(stderr,
+                    "Error: xlayer %d mlayer %d depends_on references "
+                    "mlayer %d (must be < %d)\n",
+                    xl->xlayer_id, m, j, m);
+            return -1;
+          }
+        }
+      }
+    }
+  }
+
+  // Validate per-mlayer CI values are in valid CICP ranges
+  for (int i = 0; i < cfg->num_xlayers; i++) {
+    const XLayerEncConfig *xl = &cfg->xlayers[i];
+    for (int m = 0; m < xl->num_embedded_layers; m++) {
+      const MLayerSourceConfig *ms = &xl->mlayer_sources[m];
+      if (ms->color_primaries != -1 &&
+          (ms->color_primaries < 0 || ms->color_primaries > 255)) {
+        fprintf(stderr,
+                "Error: xlayer %d mlayer %d color_primaries %d out of range "
+                "(0-255)\n",
+                xl->xlayer_id, m, ms->color_primaries);
+        return -1;
+      }
+      if (ms->transfer_characteristics != -1 &&
+          (ms->transfer_characteristics < 0 ||
+           ms->transfer_characteristics > 255)) {
+        fprintf(stderr,
+                "Error: xlayer %d mlayer %d transfer_characteristics %d out "
+                "of range (0-255)\n",
+                xl->xlayer_id, m, ms->transfer_characteristics);
+        return -1;
+      }
+      if (ms->matrix_coefficients != -1 &&
+          (ms->matrix_coefficients < 0 || ms->matrix_coefficients > 255)) {
+        fprintf(stderr,
+                "Error: xlayer %d mlayer %d matrix_coefficients %d out of "
+                "range (0-255)\n",
+                xl->xlayer_id, m, ms->matrix_coefficients);
+        return -1;
+      }
+      if (ms->full_range_flag != -1 &&
+          (ms->full_range_flag < 0 || ms->full_range_flag > 1)) {
+        fprintf(stderr,
+                "Error: xlayer %d mlayer %d full_range_flag %d must be 0 or "
+                "1\n",
+                xl->xlayer_id, m, ms->full_range_flag);
+        return -1;
+      }
+    }
+  }
+
+  // Validate GOP mode constraints
+  if (cfg->monotonic_output_order) {
+    for (int i = 0; i < cfg->num_xlayers; i++) {
+      if (cfg->xlayers[i].gop_mode == 1) {
+        fprintf(stderr,
+                "Error: gop_mode \"open_leading\" is not allowed with "
+                "monotonic_output_order=1 (xlayer %d). Leading OBUs require "
+                "non-monotonic output.\n",
+                cfg->xlayers[i].xlayer_id);
+        return -1;
+      }
+    }
+  }
+
+  return 0;
+}
diff --git a/common/xlayer_config_parse.h b/common/xlayer_config_parse.h
new file mode 100644
index 0000000000..325d830a20
--- /dev/null
+++ b/common/xlayer_config_parse.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2025, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 3-Clause Clear License
+ * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
+ * License was not distributed with this source code in the LICENSE file, you
+ * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/.  If the
+ * Alliance for Open Media Patent License 1.0 was not distributed with this
+ * source code in the PATENTS file, you can obtain it at
+ * aomedia.org/license/patent-license/.
+ */
+
+#ifndef AVM_COMMON_XLAYER_CONFIG_PARSE_H_
+#define AVM_COMMON_XLAYER_CONFIG_PARSE_H_
+
+#include "common/xlayer_config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Parse a JSON configuration file for multi-xlayer encoding.
+// Returns 0 on success, -1 on error (with message printed to stderr).
+int parse_multi_xlayer_config(const char *json_path, MultiXLayerConfig *cfg);
+
+// Resolve input_source_idx for each xlayer after parsing.
+// Must be called between parse and validate.
+// Returns 0 on success, -1 on error.
+int resolve_input_sources(MultiXLayerConfig *cfg);
+
+// Resolve per-mlayer CI inheritance: if an mlayer's CI field is -1, inherit
+// from the parent xlayer's value.  Must be called after
+// resolve_input_sources().
+void resolve_mlayer_ci(MultiXLayerConfig *cfg);
+
+// Validate a parsed multi-xlayer configuration.
+// Returns 0 on success, -1 on error.
+int validate_multi_xlayer_config(const MultiXLayerConfig *cfg);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // AVM_COMMON_XLAYER_CONFIG_PARSE_H_
diff --git a/doc/multi_xlayer_encoding.md b/doc/multi_xlayer_encoding.md
new file mode 100644
index 0000000000..98a3454d78
--- /dev/null
+++ b/doc/multi_xlayer_encoding.md
@@ -0,0 +1,1255 @@
+# Multi-XLayer Encoding Guide
+
+This document describes how to use AVM's multi-xlayer encoding framework
+to encode multiple extended layers (xlayers) into a single combined
+bitstream. Xlayers enable use cases such as texture+depth, stereo video,
+subpicture tiling, and spatially scalable encoding with embedded layers.
+
+## Table of Contents
+
+- [Quick Start](#quick-start)
+- [CLI Usage](#cli-usage)
+- [JSON Configuration Reference](#json-configuration-reference)
+  - [Top-Level Fields](#top-level-fields)
+  - [XLayer Entry Fields](#xlayer-entry-fields)
+  - [Embedded Layers (MLayers)](#embedded-layers-mlayers)
+  - [Per-Embedded-Layer Configuration](#per-embedded-layer-configuration)
+  - [Global LCR](#global-lcr)
+  - [Local LCR](#local-lcr)
+  - [OPS (Operating Point Set)](#ops-operating-point-set)
+  - [Atlas](#atlas)
+  - [Input Sources](#input-sources)
+  - [Codec Controls](#codec-controls)
+  - [GOP Modes](#gop-modes)
+- [Use Cases and Examples](#use-cases-and-examples)
+  - [Texture + Depth](#texture--depth)
+  - [Stereo Video](#stereo-video)
+  - [Subpicture Tiling](#subpicture-tiling)
+  - [Subpicture with Auxiliary Layers](#subpicture-with-auxiliary-layers)
+  - [Spatial Scalability with Embedded Layers](#spatial-scalability-with-embedded-layers)
+  - [Mixed Embedded Layer Counts](#mixed-embedded-layer-counts)
+  - [Stereo via Embedded Layers](#stereo-via-embedded-layers)
+  - [Subpicture Tiling via Embedded Layers](#subpicture-tiling-via-embedded-layers)
+  - [Texture + Depth via Embedded Layers with XLayers](#texture--depth-via-embedded-layers-with-xlayers)
+- [GOP Mode and Output Order](#gop-mode-and-output-order)
+  - [Compatibility Matrix](#compatibility-matrix)
+  - [Closed GOP, Non-Monotonic (Multi-XLayer + Multi-MLayer)](#closed-gop-non-monotonic-multi-xlayer--multi-mlayer)
+  - [Closed GOP, Monotonic (Multi-XLayer + Multi-MLayer)](#closed-gop-monotonic-multi-xlayer--multi-mlayer)
+  - [Open Leading, Non-Monotonic (Multi-XLayer + Multi-MLayer)](#open-leading-non-monotonic-multi-xlayer--multi-mlayer)
+  - [Open SEF, Monotonic (Multi-XLayer + Multi-MLayer)](#open-sef-monotonic-multi-xlayer--multi-mlayer)
+- [Decoding](#decoding)
+- [Stream Demuxing](#stream-demuxing)
+- [Constraints and Validation](#constraints-and-validation)
+
+---
+
+## Quick Start
+
+1. Create a JSON configuration file describing your xlayers.
+2. Encode with `avmenc --xlayer-config config.json`.
+3. Decode with `avmdec --all-layers` to get all layers.
+
+Minimal two-layer example:
+
+```json
+{
+  "xlayers": [
+    { "xlayer_id": 0, "input": "texture.y4m", "width": 1920, "height": 1080,
+      "qp": 128, "cpu_used": 5 },
+    { "xlayer_id": 1, "input": "depth.y4m", "width": 1920, "height": 1080,
+      "layer_type": "auxiliary", "auxiliary_type": "depth",
+      "qp": 160, "cpu_used": 5 }
+  ],
+  "output": "combined.obu"
+}
+```
+
+```bash
+avmenc --xlayer-config two_layer.json --limit=30
+avmdec --all-layers -o decoded.y4m combined.obu
+```
+
+---
+
+## CLI Usage
+
+### Encoder
+
+Multi-xlayer encoding is triggered by passing `--xlayer-config`:
+
+```bash
+avmenc --xlayer-config <path-to-json> [--limit=N] [--framerate=N/D]
+```
+
+When `--xlayer-config` is provided, the encoder ignores the normal
+single-stream arguments (input file, `--width`, `--height`, etc.) and
+reads all configuration from the JSON file. Standard arguments that are
+still honored:
+
+| Argument | Effect |
+|----------|--------|
+| `--limit=N` | Encode at most N source frames |
+| `--framerate=N/D` | Override timebase for all xlayers |
+
+### Decoder
+
+```bash
+avmdec --all-layers -o output.y4m input.obu
+avmdec --all-layers --num-streams=N -o output_%d.y4m input.obu
+avmdec --all-layers --atlas-composite --xlayer-config config.json -o composite.y4m input.obu
+```
+
+| Flag | Purpose |
+|------|---------|
+| `--all-layers` | Output all decoded frames (all xlayers, all mlayers) |
+| `--num-streams=N` | Split output into N separate files (`output_0.y4m`, `output_1.y4m`, ...) |
+| `--xlayer-config` | Provide atlas layout for `--atlas-composite` |
+| `--atlas-composite` | Composite decoded xlayers onto an atlas canvas using the layout from the config |
+
+---
+
+## JSON Configuration Reference
+
+### Top-Level Fields
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `xlayers` | array | *required* | Array of xlayer entries (1-31) |
+| `inputs` | array | `[]` | Named input sources (see [Input Sources](#input-sources)) |
+| `source` | object | | Legacy single shared source (converted to `inputs[0]` internally) |
+| `output` | string | `""` | Output bitstream path |
+| `combined_tu` | bool | `true` | Combine all xlayer OBUs into shared TUs |
+| `monotonic_output_order` | bool | `false` | Encoder outputs frames in monotonic order |
+| `frame_rate` | number | `0` | Frame rate for aggregate level derivation (0 = use encoder timebase) |
+
+### XLayer Entry Fields
+
+Each entry in the `xlayers` array configures one extended layer:
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `xlayer_id` | int | *required* | Unique ID, 0-30 |
+| `input` | string | `""` | Input file path (Y4M or raw YUV). Not needed if using `input_source` or single `inputs` entry. |
+| `input_source` | string | `""` | Reference to a named input source from `inputs` array. |
+| `width` | int | 0 | Frame width (required for raw YUV or shared source) |
+| `height` | int | 0 | Frame height |
+| `profile` | int | 3 | AV2 profile (0-3 = Main 4:2:0 10-bit variants, 4 = Main 4:2:2 10-bit, 5 = Main 4:4:4 10-bit) |
+| `tier` | int | 0 | Tier |
+| `level` | int | 16 | Level index (e.g. 16 = Level 4.0) |
+| `layer_type` | string | `"texture"` | `"texture"`, `"auxiliary"`, `"stereo"`, or `"dependent"` |
+| `auxiliary_type` | string | `"alpha"` | Only when `layer_type` is `"auxiliary"`: `"alpha"`, `"depth"`, `"segmentation"`, `"gain_map"` |
+| `view_type` | string | `"unspecified"` | `"unspecified"`, `"center"`, `"left"`, `"right"`, `"explicit"` |
+| `qp` | int | -1 | Fixed QP (0-255). -1 = use global default. |
+| `bitrate` | int | -1 | Target bitrate in kbps. -1 = use QP mode. |
+| `cpu_used` | int | -1 | Encoder speed preset (0=slowest, 9=fastest). -1 = default (5). |
+| `lag_in_frames` | int | -1 | Lookahead buffer size. -1 = default. |
+| `kf_max_dist` | int | -1 | Maximum keyframe interval. -1 = default. |
+| `subgop_config` | string | `""` | Path to sub-GOP JSON config file |
+| `gop_mode` | string | `"closed"` | `"closed"`, `"open_leading"`, or `"open_sef"` |
+| `fwd_kf_enabled` | int | -1 | Forward keyframe override. -1 = derive from `gop_mode`. |
+| `enable_keyframe_filtering` | int | -1 | KF filtering override. -1 = derive. |
+| `add_sef_for_hidden_frames` | int | -1 | SEF for hidden frames override. -1 = derive. |
+| `num_temporal_layers` | int | 1 | Number of temporal layers (1-8) |
+| `num_embedded_layers` | int | 1 | Number of spatial embedded layers (1-8) |
+| `scaling_mode` | array | auto | Scaling mode per embedded layer (see [Embedded Layers](#embedded-layers-mlayers)) |
+| `embedded_layers` | array | | Per-embedded-layer configuration (see [Per-Embedded-Layer Configuration](#per-embedded-layer-configuration)) |
+| `color_primaries` | int | -1 | Color primaries. -1 = not signaled. |
+| `transfer_characteristics` | int | -1 | Transfer characteristics. -1 = not signaled. |
+| `matrix_coefficients` | int | -1 | Matrix coefficients. -1 = not signaled. |
+| `full_range_flag` | int | -1 | Full range flag. -1 = not signaled. |
+| `atlas_pos_x` | int | -1 | X position in atlas canvas (required for shared source mode) |
+| `atlas_pos_y` | int | -1 | Y position in atlas canvas |
+| `codec_controls` | array | `[]` | Generic codec controls applied after encoder init (see [Codec Controls](#codec-controls)) |
+
+### Embedded Layers (MLayers)
+
+Each xlayer can independently encode multiple spatial embedded layers
+(mlayers). The encoder is called once per mlayer for each source frame,
+with the appropriate scaling mode and mlayer ID set before each call.
+The encoder internally rescales the source image.
+
+**Configuration:**
+
+```json
+{
+  "xlayer_id": 0,
+  "num_embedded_layers": 3,
+  "scaling_mode": ["1/4", "1/2", "1:1"]
+}
+```
+
+The `scaling_mode` array specifies the spatial scale for each embedded
+layer, from smallest to largest. The last entry must always be `"1:1"`
+(full resolution).
+
+**Scaling mode values:**
+
+| String | Integer | Scale Factor |
+|--------|---------|-------------|
+| `"1:1"` or `"normal"` | 0 | Full resolution |
+| `"4/5"` | 1 | 4/5 scale |
+| `"3/5"` | 2 | 3/5 scale |
+| `"3/4"` | 3 | 3/4 scale |
+| `"1/4"` | 4 | 1/4 scale |
+| `"1/8"` | 5 | 1/8 scale |
+| `"1/2"` | 6 | 1/2 scale |
+
+Both string and integer values are accepted in JSON.
+
+**Default derivation:** When `num_embedded_layers > 1` and `scaling_mode`
+is omitted, defaults are derived automatically:
+
+| Layers | Default `scaling_mode` |
+|--------|----------------------|
+| 2 | `["1/2", "1:1"]` |
+| 3 | `["1/4", "1/2", "1:1"]` |
+
+**LCR signaling:** For each non-full-resolution mlayer, the LCR OBU
+signals `lcr_same_sh_max_resolution_flag = 0` with
+`lcr_max_expected_width/height` set to the xlayer's full resolution
+(not the scaled size). This is because the encoder may produce
+full-resolution frames (e.g., on keyframes), so the LCR must signal
+the maximum possible dimensions. Full-resolution mlayers signal
+`lcr_same_sh_max_resolution_flag = 1`.
+
+#### Per-Embedded-Layer Configuration
+
+When different embedded layers need genuinely different input content
+(e.g., stereo views, subpicture tiles, overlay+base), use the
+`"embedded_layers"` array to configure each mlayer independently.
+
+```json
+{
+  "xlayer_id": 0,
+  "num_embedded_layers": 2,
+  "embedded_layers": [
+    { "scaling_mode": "1/2", "input_source": "left",
+      "atlas_pos_x": 0, "atlas_pos_y": 0, "width": 1920, "height": 1080,
+      "depends_on": [] },
+    { "scaling_mode": "1:1", "input_source": "right",
+      "atlas_pos_x": 0, "atlas_pos_y": 0, "width": 1920, "height": 1080,
+      "depends_on": [0] }
+  ]
+}
+```
+
+Each entry in `"embedded_layers"` corresponds to one mlayer (in order).
+All fields are optional — omitted fields inherit from the parent xlayer.
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `scaling_mode` | string/int | inherit | Encoder-internal scaling for this mlayer |
+| `input_source` | string | inherit | Named input source for this mlayer |
+| `atlas_pos_x` | int | inherit | Crop origin X within source |
+| `atlas_pos_y` | int | inherit | Crop origin Y within source |
+| `width` | int | inherit | Crop width |
+| `height` | int | inherit | Crop height |
+| `depends_on` | array of int | default linear | Which lower mlayer indices this depends on for inter-layer prediction |
+| `color_primaries` | int | inherit | CICP color primaries (0-255), inherits from xlayer if omitted |
+| `transfer_characteristics` | int | inherit | CICP transfer characteristics (0-255), inherits from xlayer if omitted |
+| `matrix_coefficients` | int | inherit | CICP matrix coefficients (0-255), inherits from xlayer if omitted |
+| `full_range_flag` | int | inherit | 0=limited range, 1=full range, inherits from xlayer if omitted |
+
+**`depends_on` semantics:**
+- Absent: default linear chain (each mlayer depends on all lower mlayers)
+- `[]`: independent (no inter-layer prediction)
+- `[0]`: depends only on mlayer 0
+- `[0, 1]`: depends on mlayers 0 and 1
+
+**Mutual exclusion:** `"embedded_layers"` and the flat `"scaling_mode"`
+array cannot both be present on the same xlayer entry. Use one or the
+other.
+
+**Backward compatibility:** When `"embedded_layers"` is absent, all
+behavior is unchanged — the existing flat `"scaling_mode"` array and
+default scaling derivation work as before.
+
+#### Content Interpretation (CI) Per MLayer
+
+Each embedded layer can have its own Content Interpretation (CI) OBU with
+distinct CICP color properties. This is useful when different mlayers carry
+content with different color characteristics (e.g., HDR base layer with
+SDR enhancement, or depth with different matrix coefficients).
+
+**Inheritance rules:**
+1. If an mlayer omits a CI field (or sets it to `-1`), it inherits
+   from the parent xlayer's value.
+2. At the bitstream level, if an mlayer's resolved CI is identical to its
+   first dependent layer's CI (via `depends_on`), the CI OBU is omitted —
+   the decoder inherits automatically.
+3. CI is written at every random access point (CLK) for each mlayer that
+   has distinct CI.
+4. CI must not change within a coded video sequence (CVS).
+
+**Example:** Stereo with different color primaries per view:
+```json
+{
+  "xlayer_id": 0,
+  "color_primaries": 1,
+  "transfer_characteristics": 1,
+  "matrix_coefficients": 1,
+  "num_embedded_layers": 2,
+  "embedded_layers": [
+    { "scaling_mode": "1/2", "color_primaries": 9,
+      "transfer_characteristics": 16, "matrix_coefficients": 9 },
+    { "scaling_mode": "1:1" }
+  ]
+}
+```
+
+In this example, mlayer 0 uses BT.2020/PQ (CICP 9/16/9) while mlayer 1
+inherits BT.709 (CICP 1/1/1) from the xlayer. Each gets its own CI OBU
+in the bitstream with the correct `obu_layer` value.
+
+### Global LCR
+
+The Global Layer Configuration Record describes the overall multi-layer
+structure in the bitstream.
+
+```json
+"global_lcr": {
+  "enable": true,
+  "purpose_id": 0,
+  "dependent_xlayers": false,
+  "doh_constraint": true
+}
+```
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `enable` | bool | `true` | Write a Global LCR OBU |
+| `purpose_id` | int | 0 | LCR purpose (0=unspecified, 6=multiview, etc.) |
+| `dependent_xlayers` | bool | `false` | Signal dependent xlayers |
+| `doh_constraint` | bool | `true` | Decode order hint constraint |
+
+### Local LCR
+
+Local LCR OBUs provide per-xlayer layer configuration.
+
+```json
+"local_lcr": {
+  "enable": true,
+  "mode": "both"
+}
+```
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `enable` | bool | `false` | Write Local LCR OBUs |
+| `mode` | string | `"both"` | `"both"` = Global + Local with identical xlayer_info; `"local_only"` = Global without payload, Local is authoritative |
+
+### OPS (Operating Point Set)
+
+Operating points define subsets of the bitstream that can be
+independently decoded. Each operating point specifies which xlayers
+(and optionally how many mlayers per xlayer) are included.
+
+```json
+"ops": [
+  {
+    "ops_id": 0,
+    "priority": 0,
+    "intent_present": true,
+    "ptl_present": true,
+    "operating_points": [
+      { "intent": 0, "xlayer_map": [0], "mlayer_count": [1] },
+      { "intent": 1, "xlayer_map": [0], "mlayer_count": [3] },
+      { "intent": 2, "xlayer_map": [0, 1], "mlayer_count": [3, 1] }
+    ]
+  }
+]
+```
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `ops_id` | int | OPS identifier (0-15) |
+| `priority` | int | OPS priority |
+| `intent_present` | bool | Signal intent per operating point |
+| `ptl_present` | bool | Signal profile/tier/level per operating point |
+| `color_info_present` | bool | Signal color info per operating point |
+| `mlayer_info_idc` | int | Mlayer info mode (0=none, 1=same, 2=explicit) |
+| `operating_points` | array | Array of operating point definitions |
+
+Each operating point entry:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `intent` | int | Display intent |
+| `xlayer_map` | array | List of xlayer IDs included in this OP |
+| `mlayer_count` | array | Number of embedded layers per xlayer in this OP (0=all) |
+
+### Atlas
+
+Atlas signaling describes how xlayers are spatially composed into a
+single canvas.
+
+```json
+"atlas": {
+  "enable": true,
+  "mode": 0,
+  "width": 1920,
+  "height": 1080,
+  "uniform_spacing": false
+}
+```
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `enable` | bool | `false` | Write Atlas OBU |
+| `mode` | int | 0 | Atlas mode |
+| `width` | int | 0 | Canvas width (0 = derive from xlayers) |
+| `height` | int | 0 | Canvas height (0 = derive) |
+| `uniform_spacing` | bool | `true` | Auto-grid (`true`) or explicit positions (`false`) |
+
+When `uniform_spacing` is `false`, each xlayer must specify `atlas_pos_x`
+and `atlas_pos_y`.
+
+### Input Sources
+
+Define multiple named input sequences. Each xlayer references one by
+name and specifies crop coordinates within that input. The same input
+can feed multiple xlayers with different crop regions.
+
+```json
+"inputs": [
+  { "name": "texture", "filename": "video.yuv", "width": 1920, "height": 1080 },
+  { "name": "alpha", "filename": "alpha.yuv", "width": 1920, "height": 1080,
+    "format": "yuv420", "bit_depth": 8 }
+],
+"xlayers": [
+  { "xlayer_id": 0, "input_source": "texture", "width": 960, "height": 540,
+    "atlas_pos_x": 0, "atlas_pos_y": 0, ... },
+  { "xlayer_id": 1, "input_source": "texture", "width": 960, "height": 540,
+    "atlas_pos_x": 960, "atlas_pos_y": 0, ... },
+  { "xlayer_id": 2, "input_source": "alpha", "width": 960, "height": 540,
+    "atlas_pos_x": 0, "atlas_pos_y": 0, "layer_type": "auxiliary",
+    "auxiliary_type": "alpha", ... }
+]
+```
+
+Each input source entry:
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `name` | string | *required* | Unique name to reference from xlayers |
+| `filename` | string | *required* | Input file path (Y4M or raw YUV) |
+| `width` | int | 0 | Frame width (required for raw YUV, 0 = auto-detect from Y4M) |
+| `height` | int | 0 | Frame height |
+| `format` | string | `""` | Chroma format: `"yuv420"`, `"yuv422"`, `"yuv444"` (auto-detected for Y4M) |
+| `bit_depth` | int | 0 | Input bit depth (0 = auto-detect from Y4M, or default 8 for raw) |
+| `frame_rate` | number or string | 0 | Frame rate as a number (e.g. `30`, `29.97`) or rational string (e.g. `"30000/1001"`). 0 = auto-detect from Y4M or use global timebase. |
+
+**Input resolution priority** (per xlayer, in order):
+1. `"input"` — xlayer reads its own file, no shared source
+2. `"input_source"` — references a named input from `"inputs"`, uses
+   `atlas_pos_x/y` as crop origin within that input
+3. If neither is set and `"inputs"` has exactly 1 entry, all xlayers
+   use that input
+4. If neither is set and `"inputs"` has multiple entries, validation error
+
+When an xlayer uses an input source:
+- `atlas_pos_x` / `atlas_pos_y` are required (used as the crop origin)
+- `width` / `height` are required (used as the crop size)
+- All xlayers sharing the same input source must use the same chroma
+  format (profile)
+
+**Mixed mode:** Some xlayers can use `input_source` (shared sources)
+while others use `input` (own files) in the same config.
+
+**Backward compatibility:** The old single `"source"` object is
+internally converted to `"inputs"` with a single entry named
+`"default"`:
+
+```json
+{ "source": { "filename": "v.yuv", "width": 1920, "height": 1080 } }
+```
+
+is equivalent to:
+
+```json
+{ "inputs": [{ "name": "default", "filename": "v.yuv", "width": 1920, "height": 1080 }] }
+```
+
+`"inputs"` and `"source"` cannot both be present.
+
+### Codec Controls
+
+Generic codec controls allow per-xlayer override of encoder settings
+that are normally only accessible via CLI flags. Controls are applied
+after encoder initialization via `avm_codec_control()`.
+
+```json
+{
+  "xlayer_id": 0,
+  "codec_controls": [
+    ["enable_deblocking", 0],
+    ["enable_cdef", 0],
+    ["enable_restoration", 0],
+    ["enable_tpl_model", 0],
+    ["enable_keyframe_filtering", 0],
+    ["enable_global_motion", 0],
+    ["enable_warped_motion", 0]
+  ]
+}
+```
+
+Each entry is a `[name, value]` pair where `name` is a string matching
+the codec control name and `value` is an integer. Supported control
+names map directly to the `AV2E_SET_*` codec control IDs:
+
+| Control Name | CLI Equivalent | Description |
+|-------------|----------------|-------------|
+| `enable_deblocking` | `--enable-deblocking` | Deblocking filter |
+| `enable_cdef` | `--enable-cdef` | CDEF filter |
+| `enable_restoration` | `--enable-restoration` | Loop restoration |
+| `enable_tpl_model` | `--enable-tpl-model` | Temporal dependency model |
+| `enable_keyframe_filtering` | `--enable-keyframe-filtering` | Keyframe filtering |
+| `enable_global_motion` | `--enable-global-motion` | Global motion estimation |
+| `enable_warped_motion` | `--enable-warped-motion` | Warped motion compensation |
+| `enable_intrabc` | `--enable-intrabc` | Intra block copy |
+| `enable_palette` | `--enable-palette` | Palette mode |
+| `enable_interintra_comp` | `--enable-interintra-comp` | Inter-intra compound |
+
+This is particularly useful for creating fast debug configurations
+that disable expensive coding tools. See the `*_fast.json` configs
+in `cfg/xlayer/` for examples.
+
+### GOP Modes
+
+The `gop_mode` field controls the Group of Pictures structure, which
+determines how keyframes and reference frames are managed across GOP
+boundaries. Three modes are available:
+
+#### `"closed"` (default)
+
+Closed GOP: each GOP begins with a Closed Loop Key (CLK) frame that
+resets all reference buffers. No inter-prediction is possible across
+GOP boundaries. Works with both monotonic and non-monotonic output.
+
+```json
+{ "gop_mode": "closed" }
+```
+
+Derived settings: `fwd_kf_enabled = 0`, `enable_keyframe_filtering = 0`,
+`add_sef_for_hidden_frames = 0`.
+
+#### `"open_leading"` (non-monotonic only)
+
+Open GOP with Open Loop Key (OLK) and leading pictures. The forward
+keyframe is coded as a KEY_FRAME (OLK OBU) at the GOP boundary. An OLK
+can be either **displayed** (implicit output — the decoder reorders it
+to the correct display position) or **hidden** (followed by an overlay
+or SEF in the same temporal unit). By default in this mode, the OLK is
+displayed; setting `enable_keyframe_filtering` to 2 makes it hidden
+with a filtered overlay. Frames before the OLK in display order but
+after it in coding order are "leading pictures" (LEADING_TILE_GROUP
+OBUs).
+
+This mode requires `lag_in_frames > 0` (for the lookahead needed to
+code the forward keyframe) and is incompatible with
+`monotonic_output_order: true` (leading OBUs require non-monotonic output).
+
+```json
+{ "gop_mode": "open_leading", "lag_in_frames": 19, "kf_max_dist": 9 }
+```
+
+**Important constraints:**
+- OLK OBUs cannot be in the same temporal unit as leading OBUs. TUs
+  with leading OBUs contain only leading VCL OBUs.
+- The OLK designation is at the **temporal unit level**, not the frame
+  level. Higher embedded layers in an OLK TU can be inter OBUs.
+- `enable_keyframe_filtering` is independent of GOP mode and defaults
+  to 0. When set to 2, the OLK is hidden and a filtered overlay frame
+  is produced in the same TU. When 0, the OLK is displayed directly.
+
+Derived settings: `fwd_kf_enabled = 1`, `enable_keyframe_filtering = 0`,
+`add_sef_for_hidden_frames = 0`.
+
+#### `"open_sef"` (monotonic compatible)
+
+Open GOP with hidden intra frame and SEF output. When combined with
+`monotonic_output_order: true`, the forward keyframe is coded as a
+hidden INTRA_ONLY_FRAME instead of KEY_FRAME. This preserves reference
+buffers across the GOP boundary (no reset), enabling inter-prediction
+from frames before the boundary. The hidden intra frame is later shown
+via the Show Existing Frame (SEF) mechanism.
+
+This mode requires `lag_in_frames > 0` for the lookahead. When
+monotonic output is enabled, the `intra_only_fwd_kf` control is
+automatically set.
+
+```json
+{
+  "gop_mode": "open_sef",
+  "lag_in_frames": 19,
+  "kf_max_dist": 9
+}
+```
+
+Derived settings: `fwd_kf_enabled = 1`, `enable_keyframe_filtering = 0`,
+`add_sef_for_hidden_frames = 1`. When `monotonic_output_order` is also
+`true`: `intra_only_fwd_kf = 1`.
+
+**INTRA_ONLY_FRAME vs KEY_FRAME:** An INTRA_ONLY_FRAME is intra-coded
+(no inter-prediction within the frame) but does NOT reset reference
+buffers, frame number, or reference frame mappings. Subsequent frames
+can still reference frames from before the GOP boundary. A KEY_FRAME,
+in contrast, resets all reference state, creating a clean random access
+point.
+
+#### Multi-Mlayer Keyframe Management
+
+For xlayers with multiple embedded layers (mlayers):
+
+- **With `lag_in_frames = 0`:** The encoder-internal keyframe placement
+  is disabled (`kf_mode = AVM_KF_DISABLED`) because the encoder's
+  keyframe counter advances per encode call, not per temporal unit. The
+  xlayer encode loop manages keyframes externally via `AVM_EFLAG_FORCE_KF`
+  on independent mlayers (those with `depends_on: []` or `depends_on`
+  absent and `mlayer_id == 0`).
+
+- **With `lag_in_frames > 0`:** The encoder uses `multi_layers_lag_test`
+  which fixes the per-encode-call keyframe counter and enables internal
+  forward keyframe support for multi-mlayer encoding. This is required
+  for `gop_mode: "open_leading"` and `gop_mode: "open_sef"` with
+  multiple embedded layers.
+
+**Multi-rate encoding:** Input sources can have different frame rates.
+The encoder uses the highest frame rate as the master rate and encodes
+at that cadence. Lower-rate sources must have frame rates that are exact
+integer divisors of the master rate. On temporal units where a source is
+not active, its xlayers are skipped.
+
+Frame rates are stored internally as rational numbers (`num/den`) to
+avoid floating-point precision issues. The JSON accepts both numeric
+values (e.g. `30`, `29.97`) and rational strings (e.g. `"30000/1001"`).
+Common conversions:
+
+| JSON value | Internal `num/den` |
+|------------|-------------------|
+| `60` | 60/1 |
+| `30` | 30/1 |
+| `29.97` | 30000/1001 |
+| `23.976` | 24000/1001 |
+| `"30000/1001"` | 30000/1001 |
+
+Example with 60 fps texture and 15 fps depth (depth encodes every 4th TU):
+
+```json
+{
+  "inputs": [
+    { "name": "texture", "filename": "video.yuv", "width": 1920, "height": 1080,
+      "frame_rate": 60 },
+    { "name": "depth", "filename": "depth.yuv", "width": 1920, "height": 1080,
+      "frame_rate": 15 }
+  ],
+  "xlayers": [
+    { "xlayer_id": 0, "input_source": "texture", "width": 1920, "height": 1080,
+      "atlas_pos_x": 0, "atlas_pos_y": 0, "qp": 128, "cpu_used": 5 },
+    { "xlayer_id": 1, "input_source": "depth", "width": 1920, "height": 1080,
+      "atlas_pos_x": 0, "atlas_pos_y": 0,
+      "layer_type": "auxiliary", "auxiliary_type": "depth",
+      "qp": 160, "cpu_used": 5 }
+  ],
+  "output": "multi_rate.obu"
+}
+```
+
+---
+
+### Texture + Depth
+
+Encode a texture layer and a depth map as two independent xlayers:
+
+```json
+{
+  "xlayers": [
+    { "xlayer_id": 0, "input": "texture.y4m", "width": 1920, "height": 1080,
+      "layer_type": "texture", "qp": 128, "cpu_used": 5 },
+    { "xlayer_id": 1, "input": "depth.y4m", "width": 1920, "height": 1080,
+      "layer_type": "auxiliary", "auxiliary_type": "depth",
+      "qp": 160, "cpu_used": 5 }
+  ],
+  "global_lcr": { "enable": true, "purpose_id": 0, "doh_constraint": true },
+  "ops": [{
+    "ops_id": 0, "priority": 0, "intent_present": true, "ptl_present": true,
+    "operating_points": [
+      { "intent": 0, "xlayer_map": [0] },
+      { "intent": 1, "xlayer_map": [0, 1] }
+    ]
+  }],
+  "output": "texture_depth.obu"
+}
+```
+
+See: `cfg/xlayer/texture_depth_2layer.json`
+
+### Stereo Video
+
+Encode left and right views as separate xlayers (simulcast). Each
+view is encoded independently — there is no inter-layer prediction
+between views. For stereo with inter-layer prediction, see
+[Stereo via Embedded Layers](#stereo-via-embedded-layers).
+
+```json
+{
+  "xlayers": [
+    { "xlayer_id": 0, "input": "left.y4m", "width": 1920, "height": 1080,
+      "layer_type": "stereo", "view_type": "left", "qp": 128, "cpu_used": 5 },
+    { "xlayer_id": 1, "input": "right.y4m", "width": 1920, "height": 1080,
+      "layer_type": "stereo", "view_type": "right", "qp": 128, "cpu_used": 5 }
+  ],
+  "ops": [{
+    "ops_id": 0, "priority": 0, "intent_present": true, "ptl_present": true,
+    "operating_points": [
+      { "intent": 0, "xlayer_map": [0] },
+      { "intent": 1, "xlayer_map": [0, 1] }
+    ]
+  }],
+  "output": "stereo.obu"
+}
+```
+
+See: `cfg/xlayer/stereo_2layer.json`
+
+### Subpicture Tiling
+
+Tile a 1920x1080 frame into 4 quadrants from a single input source:
+
+```json
+{
+  "inputs": [
+    { "name": "default", "filename": "video.yuv", "width": 1920, "height": 1080 }
+  ],
+  "xlayers": [
+    { "xlayer_id": 0, "width": 960, "height": 540,
+      "atlas_pos_x": 0, "atlas_pos_y": 0, "qp": 128, "cpu_used": 5 },
+    { "xlayer_id": 1, "width": 960, "height": 540,
+      "atlas_pos_x": 960, "atlas_pos_y": 0, "qp": 128, "cpu_used": 5 },
+    { "xlayer_id": 2, "width": 960, "height": 540,
+      "atlas_pos_x": 0, "atlas_pos_y": 540, "qp": 128, "cpu_used": 5 },
+    { "xlayer_id": 3, "width": 960, "height": 540,
+      "atlas_pos_x": 960, "atlas_pos_y": 540, "qp": 128, "cpu_used": 5 }
+  ],
+  "atlas": { "enable": true, "width": 1920, "height": 1080,
+             "uniform_spacing": false },
+  "ops": [{
+    "ops_id": 0, "priority": 0, "intent_present": true, "ptl_present": true,
+    "operating_points": [
+      { "intent": 0, "xlayer_map": [0] },
+      { "intent": 1, "xlayer_map": [0, 1, 2, 3] }
+    ]
+  }],
+  "output": "subpicture_4q.obu"
+}
+```
+
+See: `cfg/xlayer/subpicture_4quadrant.json`
+
+### Subpicture with Auxiliary Layers
+
+Encode texture and alpha from separate source files, each tiled into
+subpictures. The texture tiles crop from the texture source, and the
+alpha tiles crop from the alpha source:
+
+```json
+{
+  "inputs": [
+    { "name": "texture", "filename": "video.yuv", "width": 1920, "height": 1080 },
+    { "name": "alpha", "filename": "alpha.yuv", "width": 1920, "height": 1080 }
+  ],
+  "xlayers": [
+    { "xlayer_id": 0, "input_source": "texture", "width": 960, "height": 540,
+      "atlas_pos_x": 0, "atlas_pos_y": 0, "layer_type": "texture", ... },
+    { "xlayer_id": 1, "input_source": "texture", "width": 960, "height": 540,
+      "atlas_pos_x": 960, "atlas_pos_y": 0, "layer_type": "texture", ... },
+    { "xlayer_id": 2, "input_source": "alpha", "width": 960, "height": 540,
+      "atlas_pos_x": 0, "atlas_pos_y": 0,
+      "layer_type": "auxiliary", "auxiliary_type": "alpha", ... },
+    { "xlayer_id": 3, "input_source": "alpha", "width": 960, "height": 540,
+      "atlas_pos_x": 960, "atlas_pos_y": 0,
+      "layer_type": "auxiliary", "auxiliary_type": "alpha", ... }
+  ],
+  "output": "subpic_tex_alpha.obu"
+}
+```
+
+See: `cfg/xlayer/subpicture_texture_alpha_4q.json`
+
+### Spatial Scalability with Embedded Layers
+
+Encode a texture layer with 3 spatial scales (1/4, 1/2, full) and a
+depth layer at full resolution only:
+
+```json
+{
+  "xlayers": [
+    { "xlayer_id": 0, "input": "texture.y4m", "width": 1920, "height": 1080,
+      "num_embedded_layers": 3,
+      "scaling_mode": ["1/4", "1/2", "1:1"],
+      "layer_type": "texture", "qp": 128, "cpu_used": 9 },
+    { "xlayer_id": 1, "input": "depth.y4m", "width": 1920, "height": 1080,
+      "num_embedded_layers": 1,
+      "layer_type": "auxiliary", "auxiliary_type": "depth",
+      "qp": 160, "cpu_used": 9 }
+  ],
+  "ops": [{
+    "ops_id": 0, "priority": 0, "intent_present": true, "ptl_present": true,
+    "operating_points": [
+      { "intent": 0, "xlayer_map": [0], "mlayer_count": [1] },
+      { "intent": 1, "xlayer_map": [0], "mlayer_count": [3] },
+      { "intent": 2, "xlayer_map": [0, 1], "mlayer_count": [3, 1] }
+    ]
+  }],
+  "output": "scalable_texture_depth.obu"
+}
+```
+
+This produces three operating points:
+- OP0: texture at 1/4 resolution (480x270) — lowest bandwidth
+- OP1: texture at all 3 scales (480x270, 960x540, 1920x1080) — full quality
+- OP2: texture at all scales + depth — complete bitstream
+
+See: `cfg/xlayer/texture_depth_2layer_3ml.json`
+
+### Mixed Embedded Layer Counts
+
+Different xlayers can have different numbers of embedded layers. For
+example, a main texture layer could use 3 embedded layers for spatial
+scalability while an auxiliary depth layer uses only 1. The constraint
+is that output frames within a TU must have matching order hints and
+synchronized random access points — NOT that embedded layer counts
+match across xlayers.
+
+### Stereo via Embedded Layers
+
+Encode left and right views as two embedded layers within a single
+xlayer, each reading from a different input source. This allows
+inter-layer prediction between views when `depends_on` is set:
+
+```json
+{
+  "inputs": [
+    { "name": "left", "filename": "left.yuv", "width": 1920, "height": 1080 },
+    { "name": "right", "filename": "right.yuv", "width": 1920, "height": 1080 }
+  ],
+  "xlayers": [
+    { "xlayer_id": 0, "input_source": "left", "width": 1920, "height": 1080,
+      "atlas_pos_x": 0, "atlas_pos_y": 0,
+      "layer_type": "stereo", "view_type": "left",
+      "num_embedded_layers": 2,
+      "embedded_layers": [
+        { "scaling_mode": "1:1", "input_source": "left",
+          "atlas_pos_x": 0, "atlas_pos_y": 0, "width": 1920, "height": 1080,
+          "depends_on": [] },
+        { "scaling_mode": "1:1", "input_source": "right",
+          "atlas_pos_x": 0, "atlas_pos_y": 0, "width": 1920, "height": 1080,
+          "depends_on": [0] }
+      ],
+      "qp": 128, "cpu_used": 5 }
+  ],
+  "ops": [{
+    "ops_id": 0, "priority": 0, "intent_present": true, "ptl_present": true,
+    "mlayer_info_idc": 2,
+    "operating_points": [
+      { "intent": 0, "xlayer_map": [0], "mlayer_count": [1] },
+      { "intent": 1, "xlayer_map": [0], "mlayer_count": [2] }
+    ]
+  }],
+  "output": "stereo_embedded.obu"
+}
+```
+
+This produces two operating points:
+- OP0: left view only (mlayer 0)
+- OP1: both views (mlayers 0 and 1)
+
+See: `cfg/xlayer/stereo_embedded_2ml.json`
+
+### Subpicture Tiling via Embedded Layers
+
+Tile a 1920x1080 frame into 4 quadrants using 4 embedded layers within
+a single xlayer, each cropping from a different region of the same
+input source. This avoids needing 4 separate xlayers:
+
+```json
+{
+  "inputs": [
+    { "name": "video", "filename": "video.yuv", "width": 1920, "height": 1080 }
+  ],
+  "xlayers": [
+    { "xlayer_id": 0, "input_source": "video", "width": 960, "height": 540,
+      "atlas_pos_x": 0, "atlas_pos_y": 0,
+      "num_embedded_layers": 4,
+      "embedded_layers": [
+        { "scaling_mode": "1:1", "input_source": "video",
+          "atlas_pos_x": 0, "atlas_pos_y": 0, "width": 960, "height": 540,
+          "depends_on": [] },
+        { "scaling_mode": "1:1", "input_source": "video",
+          "atlas_pos_x": 960, "atlas_pos_y": 0, "width": 960, "height": 540,
+          "depends_on": [] },
+        { "scaling_mode": "1:1", "input_source": "video",
+          "atlas_pos_x": 0, "atlas_pos_y": 540, "width": 960, "height": 540,
+          "depends_on": [] },
+        { "scaling_mode": "1:1", "input_source": "video",
+          "atlas_pos_x": 960, "atlas_pos_y": 540, "width": 960, "height": 540,
+          "depends_on": [] }
+      ],
+      "qp": 128, "cpu_used": 5 }
+  ],
+  "output": "subpic_embedded.obu"
+}
+```
+
+Note `depends_on: []` on each mlayer — the quadrants are spatially
+independent so inter-layer prediction is disabled.
+
+See: `cfg/xlayer/subpicture_embedded_4q.json`
+
+### Texture + Depth via Embedded Layers with XLayers
+
+Combine xlayers and per-mlayer embedded layers. One xlayer uses 3
+embedded layers for spatial scalability (1/4, 1/2, full), while a
+second xlayer has 2 embedded layers reading from texture and depth
+sources separately:
+
+```json
+{
+  "inputs": [
+    { "name": "texture", "filename": "texture.yuv", "width": 1920, "height": 1080 },
+    { "name": "depth", "filename": "depth.yuv", "width": 1920, "height": 1080 }
+  ],
+  "xlayers": [
+    { "xlayer_id": 0, "input_source": "texture", "width": 1920, "height": 1080,
+      "atlas_pos_x": 0, "atlas_pos_y": 0,
+      "layer_type": "texture",
+      "num_embedded_layers": 3,
+      "embedded_layers": [
+        { "scaling_mode": "1/4" },
+        { "scaling_mode": "1/2" },
+        { "scaling_mode": "1:1" }
+      ],
+      "qp": 128, "cpu_used": 5 },
+    { "xlayer_id": 1, "input_source": "texture", "width": 1920, "height": 1080,
+      "atlas_pos_x": 0, "atlas_pos_y": 0,
+      "layer_type": "auxiliary", "auxiliary_type": "depth",
+      "num_embedded_layers": 2,
+      "embedded_layers": [
+        { "scaling_mode": "1/2", "input_source": "texture",
+          "atlas_pos_x": 0, "atlas_pos_y": 0, "width": 1920, "height": 1080,
+          "depends_on": [] },
+        { "scaling_mode": "1:1", "input_source": "depth",
+          "atlas_pos_x": 0, "atlas_pos_y": 0, "width": 1920, "height": 1080,
+          "depends_on": [] }
+      ],
+      "qp": 160, "cpu_used": 5 }
+  ],
+  "ops": [{
+    "ops_id": 0, "priority": 0, "intent_present": true, "ptl_present": true,
+    "mlayer_info_idc": 2,
+    "operating_points": [
+      { "intent": 0, "xlayer_map": [0], "mlayer_count": [1] },
+      { "intent": 1, "xlayer_map": [0], "mlayer_count": [3] },
+      { "intent": 2, "xlayer_map": [0, 1], "mlayer_count": [3, 2] }
+    ]
+  }],
+  "output": "texture_depth_embedded.obu"
+}
+```
+
+This produces three operating points:
+- OP0: texture at 1/4 resolution
+- OP1: texture at all 3 scales
+- OP2: texture at all scales + depth via independent embedded layers
+
+See: `cfg/xlayer/texture_depth_embedded_3ml_2ml.json`
+
+---
+
+## GOP Mode and Output Order
+
+The `gop_mode` and `monotonic_output_order` settings interact to control
+how keyframes, reference frames, and hidden frames are managed. This
+section covers the valid combinations with multi-xlayer + multi-mlayer
+examples.
+
+### Compatibility Matrix
+
+| GOP Mode | Non-Monotonic (`false`) | Monotonic (`true`) |
+|----------|:-----------------------:|:------------------:|
+| `closed` | Yes | Yes |
+| `open_leading` | Yes | **No** |
+| `open_sef` | Yes* | Yes |
+
+\* `open_sef` with non-monotonic is valid but uses KEY_FRAME (not
+INTRA_ONLY_FRAME) as the forward keyframe, which resets reference
+buffers. With monotonic output, `open_sef` uses INTRA_ONLY_FRAME to
+preserve references across the GOP boundary.
+
+**Key differences:**
+
+- **Non-monotonic**: ARF and INTNL_ARF frames are implicit output
+  frames (the decoder reorders them to display order). No SEF OBUs are
+  needed for these. Zero overhead.
+- **Monotonic**: ARF and INTNL_ARF frames are genuinely hidden. SEF
+  OBUs are inserted at the correct monotonic position to display them.
+  SEFs have zero coding cost.
+- **`open_leading`**: The forward keyframe is an OLK. By default it is
+  **displayed** (implicit output), but it can be made hidden with
+  `enable_keyframe_filtering = 2` (producing a filtered overlay in the
+  same TU). The OLK designation is at the TU level; higher embedded
+  layers in the OLK TU can be inter OBUs.
+- **`open_sef` + monotonic**: The forward keyframe is a **hidden
+  INTRA_ONLY_FRAME** that does NOT reset reference buffers. Inter-
+  prediction across the GOP boundary is possible.
+
+### Closed GOP, Non-Monotonic (Multi-XLayer + Multi-MLayer)
+
+Each GOP begins with a CLK that resets all reference buffers. ARF and
+INTNL_ARF frames are implicit output (decoder reorders). This is the
+simplest and most robust configuration.
+
+```json
+{
+  "xlayers": [
+    { "xlayer_id": 0, "input": "texture.y4m", "width": 1920, "height": 1080,
+      "num_embedded_layers": 2, "scaling_mode": ["1/2", "1:1"],
+      "qp": 128, "cpu_used": 5, "lag_in_frames": 19,
+      "gop_mode": "closed" },
+    { "xlayer_id": 1, "input": "depth.y4m", "width": 1920, "height": 1080,
+      "num_embedded_layers": 1,
+      "layer_type": "auxiliary", "auxiliary_type": "depth",
+      "qp": 160, "cpu_used": 5, "lag_in_frames": 19,
+      "gop_mode": "closed" }
+  ],
+  "monotonic_output_order": false,
+  "output": "closed_nonmono.obu"
+}
+```
+
+See: `cfg/xlayer/texture_depth_2xl_2ml_closed_nonmono.json`
+
+### Closed GOP, Monotonic (Multi-XLayer + Multi-MLayer)
+
+Same as above but with monotonic output. Hidden frames (ARF, INTNL_ARF)
+are output via SEF at the correct display position. This is required
+when the application needs frames in strict display order (e.g.,
+low-delay playback without reordering).
+
+```json
+{
+  "xlayers": [
+    { "xlayer_id": 0, "input": "texture.y4m", "width": 1920, "height": 1080,
+      "num_embedded_layers": 2, "scaling_mode": ["1/2", "1:1"],
+      "qp": 128, "cpu_used": 5, "lag_in_frames": 19,
+      "gop_mode": "closed" },
+    { "xlayer_id": 1, "input": "depth.y4m", "width": 1920, "height": 1080,
+      "num_embedded_layers": 1,
+      "layer_type": "auxiliary", "auxiliary_type": "depth",
+      "qp": 160, "cpu_used": 5, "lag_in_frames": 19,
+      "gop_mode": "closed" }
+  ],
+  "monotonic_output_order": true,
+  "output": "closed_mono.obu"
+}
+```
+
+See: `cfg/xlayer/texture_depth_2xl_2ml_closed_mono.json`
+
+### Open Leading, Non-Monotonic (Multi-XLayer + Multi-MLayer)
+
+The forward keyframe is an OLK at each GOP boundary. By default it is
+displayed (implicit output), but `enable_keyframe_filtering` can make
+it hidden with a filtered overlay. Frames before the OLK in display
+order are coded as leading pictures after the OLK in coding order. The
+OLK allows random access while preserving some coding efficiency
+through leading-picture prediction.
+
+Requires `lag_in_frames > 0` and `monotonic_output_order: false`.
+
+```json
+{
+  "xlayers": [
+    { "xlayer_id": 0, "input": "texture.y4m", "width": 1920, "height": 1080,
+      "num_embedded_layers": 2, "scaling_mode": ["1/2", "1:1"],
+      "qp": 128, "cpu_used": 5,
+      "lag_in_frames": 19, "kf_max_dist": 9,
+      "gop_mode": "open_leading" },
+    { "xlayer_id": 1, "input": "depth.y4m", "width": 1920, "height": 1080,
+      "num_embedded_layers": 1,
+      "layer_type": "auxiliary", "auxiliary_type": "depth",
+      "qp": 160, "cpu_used": 5,
+      "lag_in_frames": 19, "kf_max_dist": 9,
+      "gop_mode": "open_leading" }
+  ],
+  "monotonic_output_order": false,
+  "output": "open_leading_nonmono.obu"
+}
+```
+
+See: `cfg/xlayer/texture_depth_2xl_2ml_open_leading.json`
+
+### Open SEF, Monotonic (Multi-XLayer + Multi-MLayer)
+
+The forward keyframe is a hidden INTRA_ONLY_FRAME that does not reset
+reference buffers. Inter-prediction from frames before the GOP boundary
+is preserved. The hidden frame is output via SEF in monotonic display
+order. This gives the best coding efficiency at GOP boundaries while
+maintaining strict display-order output.
+
+Requires `lag_in_frames > 0`.
+
+```json
+{
+  "xlayers": [
+    { "xlayer_id": 0, "input": "texture.y4m", "width": 1920, "height": 1080,
+      "num_embedded_layers": 2, "scaling_mode": ["1/2", "1:1"],
+      "qp": 128, "cpu_used": 5,
+      "lag_in_frames": 19, "kf_max_dist": 9,
+      "gop_mode": "open_sef" },
+    { "xlayer_id": 1, "input": "depth.y4m", "width": 1920, "height": 1080,
+      "num_embedded_layers": 1,
+      "layer_type": "auxiliary", "auxiliary_type": "depth",
+      "qp": 160, "cpu_used": 5,
+      "lag_in_frames": 19, "kf_max_dist": 9,
+      "gop_mode": "open_sef" }
+  ],
+  "monotonic_output_order": true,
+  "output": "open_sef_mono.obu"
+}
+```
+
+See: `cfg/xlayer/texture_depth_2xl_2ml_open_sef_mono.json`
+
+---
+
+## Decoding
+
+### Basic multi-layer decode
+
+```bash
+# Decode all layers into a single interleaved y4m
+avmdec --all-layers -o decoded.y4m combined.obu
+
+# Decode all layers into separate per-stream files
+avmdec --all-layers --num-streams=2 -o decoded_%d.y4m combined.obu
+```
+
+### Atlas composite decode
+
+Reconstruct the original composite canvas from subpicture tiles:
+
+```bash
+avmdec --all-layers --atlas-composite \
+  --xlayer-config subpicture_4quadrant.json \
+  -o composite.y4m subpicture_4q.obu
+```
+
+This reads the atlas layout from the JSON config and composites each
+decoded xlayer back into its position on the canvas.
+
+---
+
+## Stream Demuxing
+
+The `stream_demuxer` tool (built alongside `avmenc` and `avmdec`) can
+extract individual xlayer bitstreams from a combined multi-xlayer OBU
+file:
+
+```bash
+stream_demuxer input.obu output_prefix
+```
+
+This produces separate `.obu` files for each xlayer discovered in the
+Global LCR: `output_prefix_0.obu`, `output_prefix_1.obu`, etc. Each
+extracted stream can be decoded independently with the standard decoder.
+
+---
+
+## Constraints and Validation
+
+The JSON config is validated before encoding. The following constraints
+are enforced:
+
+1. **xlayer_id** must be unique and in range 0-30.
+2. Each xlayer must have an `input` file, an `input_source` reference,
+   or a single default `inputs` entry must be configured.
+3. **Input source names** must be unique and non-empty.
+4. `"inputs"` and `"source"` cannot both be present.
+5. When multiple `inputs` are defined, each xlayer without its own
+   `input` file must have an explicit `input_source`.
+6. **num_embedded_layers** must be 1-8.
+7. When `num_embedded_layers > 1`:
+   - The last entry in `scaling_mode` must be `"1:1"` (full resolution).
+   - All scaling mode values must be valid (0-6).
+8. **Input source** mode requires `atlas_pos_x`, `atlas_pos_y`,
+   `width`, and `height` for every xlayer using that source. Xlayers
+   sharing the same input source must use the same chroma format.
+9. **OPS** operating points may only reference xlayer IDs that exist
+   in the config.
+10. When **`monotonic_output_order` is `false`**, all xlayers must use
+   the same coding structure: `num_temporal_layers`, `lag_in_frames`,
+   `kf_max_dist`, `subgop_config`, and `gop_mode`. Different
+   `num_embedded_layers` is allowed.
+11. **`gop_mode: "open_leading"`** is not allowed when
+   `monotonic_output_order` is `true` (leading OBUs require
+   non-monotonic output).
+12. **Input source frame rates** must be exact integer divisors of the
+   highest frame rate among all input sources (e.g. 60/30/15 is valid,
+   but 30/24 is not).
+13. **`embedded_layers`** and the flat `scaling_mode` array are mutually
+   exclusive on the same xlayer entry.
+14. **`embedded_layers`** array length must match `num_embedded_layers`.
+15. Per-mlayer **`input_source`** requires `width`, `height`,
+   `atlas_pos_x`, and `atlas_pos_y`.
+16. **`depends_on`** entries must reference mlayer indices strictly less
+   than the current mlayer index. mlayer 0 cannot depend on anything.
+17. **CLK/OLK alignment:** When a CLK (Closed Layer Key) OBU appears
+   in a temporal unit, the first embedded layer (mlayer 0) and all
+   independent embedded layers (those with `depends_on: []`) must
+   also have CLK OBUs. The same rule applies to OLK (Open Layer Key)
+   OBUs. The encoder enforces this automatically.
+18. **Monotonic output order and hidden frames:** When
+   `monotonic_output_order` is `true`, implicit output frames are not
+   allowed. All hidden frames (ARFs, forward keyframes) must be output
+   via SEF (Show Existing Frame) instead. The encoder automatically
+   enables `add_sef_for_hidden_frames` when monotonic output is
+   requested. This precludes `gop_mode: "open_leading"` (which uses
+   implicit output for OLK overlays and leading frames).
+19. **Open GOP with monotonic output:** When `gop_mode: "open_sef"` and
+   `monotonic_output_order` is `true`, the forward keyframe is coded as
+   INTRA_ONLY_FRAME (not KEY_FRAME). This preserves reference buffers
+   across the GOP boundary, enabling inter-prediction from pre-boundary
+   frames. The hidden intra frame is later shown via SEF.
+
+---
+
+## Reference Configs
+
+The `cfg/xlayer/` directory contains ready-to-use configuration files:
+
+| Config | Description |
+|--------|-------------|
+| `texture_depth_2layer.json` | Texture + depth, 2 xlayers |
+| `texture_depth_2layer_3ml.json` | Texture (3 embedded layers) + depth |
+| `texture_depth_2layer_clk.json` | Texture + depth, closed GOP |
+| `texture_depth_2layer_open_leading.json` | Texture + depth, open leading GOP (1 mlayer each) |
+| `texture_depth_2layer_open_sef.json` | Texture + depth, open SEF GOP (1 mlayer each) |
+| `texture_depth_2xl_2ml_closed_nonmono.json` | **2 xlayers × 2 mlayers, closed GOP, non-monotonic** |
+| `texture_depth_2xl_2ml_closed_mono.json` | **2 xlayers × 2 mlayers, closed GOP, monotonic** |
+| `texture_depth_2xl_2ml_open_leading.json` | **2 xlayers × 2 mlayers, open leading, non-monotonic** |
+| `texture_depth_2xl_2ml_open_sef_mono.json` | **2 xlayers × 2 mlayers, open SEF, monotonic** |
+| `texture_depth_2layer_local_only.json` | Texture + depth, local-only LCR |
+| `texture_depth_2layer_fast.json` | Texture + depth, fast debug settings (coding tools disabled) |
+| `texture_2mlayer_fast.json` | Single xlayer with 2 embedded layers, fast debug settings |
+| `texture_alpha_depth_3layer.json` | Texture + alpha + depth, 3 xlayers |
+| `stereo_2layer.json` | Stereo simulcast: left + right as separate xlayers (no inter-layer prediction) |
+| `subpicture_3region.json` | 3-region subpicture tiling |
+| `subpicture_4quadrant.json` | 4-quadrant subpicture tiling (single input source) |
+| `subpicture_texture_alpha_4q.json` | 4-quadrant with separate texture + alpha input sources |
+| `annexG2_360degree_9xlayer.json` | 360-degree video, 9 subpictures with 3 embedded layers each |
+| `annexG3_videoconf_3xlayer.json` | Video conferencing, 3 participants |
+| `annexG4_roi_scalable_2xlayer.json` | ROI scalable, base + enhancement |
+| `stereo_embedded_2ml.json` | Stereo views via 2 embedded layers with inter-layer prediction |
+| `subpicture_embedded_4q.json` | 4-quadrant subpicture via 4 embedded layers |
+| `texture_depth_embedded_3ml_2ml.json` | Texture (3 mlayers) + depth via embedded layers with xlayers |
diff --git a/test/test.cmake b/test/test.cmake
index 49c61873c7..5be36c8f1d 100644
--- a/test/test.cmake
+++ b/test/test.cmake
@@ -45,7 +45,11 @@ list(
   "${AVM_ROOT}/test/test_vectors.h"
   "${AVM_ROOT}/test/transform_test_base.h"
   "${AVM_ROOT}/test/util.h"
-  "${AVM_ROOT}/test/video_source.h")
+  "${AVM_ROOT}/test/video_source.h"
+  "${AVM_ROOT}/test/xlayer_config_test.cc"
+  "${AVM_ROOT}/test/tu_assembler_test.cc"
+  "${AVM_ROOT}/common/xlayer_config_parse.c"
+  "${AVM_ROOT}/common/tu_assembler.c")
 
 list(
   APPEND
@@ -358,6 +362,7 @@ function(setup_avm_test_targets)
     endif()
   endif()
 
+  target_sources(test_libavm PRIVATE $<TARGET_OBJECTS:avm_cjson>)
   target_link_libraries(test_libavm ${AVM_LIB_LINK_TYPE} avm avm_gtest)
 
   if(CONFIG_LIBYUV)
diff --git a/test/tu_assembler_test.cc b/test/tu_assembler_test.cc
new file mode 100644
index 0000000000..21a81d4673
--- /dev/null
+++ b/test/tu_assembler_test.cc
@@ -0,0 +1,844 @@
+/*
+ * Copyright (c) 2025, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 3-Clause Clear License
+ * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
+ * License was not distributed with this source code in the LICENSE file, you
+ * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/.  If the
+ * Alliance for Open Media Patent License 1.0 was not distributed with this
+ * source code in the PATENTS file, you can obtain it at
+ * aomedia.org/license/patent-license/.
+ */
+
+#include <string.h>
+
+#include <string>
+
+#include "avm/avm_integer.h"
+#include "av2/common/enums.h"
+#include "common/tu_assembler.h"
+#include "common/xlayer_config.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+// Helper: build a minimal MultiXLayerConfig for testing
+static void MakeMinimalConfig(MultiXLayerConfig *cfg, int num_xlayers,
+                              const int *xlayer_ids) {
+  xlayer_config_init(cfg);
+  cfg->num_xlayers = num_xlayers;
+  for (int i = 0; i < num_xlayers; i++) {
+    cfg->xlayers[i].xlayer_id = xlayer_ids[i];
+    snprintf(cfg->xlayers[i].input_filename, PATH_MAX, "input_%d", i);
+    cfg->xlayers[i].width = 1920;
+    cfg->xlayers[i].height = 1080;
+  }
+  cfg->enable_global_lcr = 1;
+  snprintf(cfg->output_filename, PATH_MAX, "test_out.obu");
+}
+
+// Helper: parse an OBU at the given offset, return header info.
+// Returns the total consumed bytes (length_field + obu_total_size).
+static int ParseObuAt(const uint8_t *buf, size_t buf_size, size_t offset,
+                      int *out_type, int *out_ext_flag, int *out_xlayer_id,
+                      size_t *out_payload_size) {
+  if (offset >= buf_size) return -1;
+
+  uint64_t obu_total_size = 0;
+  size_t length_field_size = 0;
+  if (avm_uleb_decode(buf + offset, buf_size - offset, &obu_total_size,
+                      &length_field_size) != 0) {
+    return -1;
+  }
+
+  if (obu_total_size == 0) return -1;
+
+  const uint8_t *hdr = buf + offset + length_field_size;
+  *out_ext_flag = (hdr[0] >> 7) & 1;
+  *out_type = (hdr[0] >> 2) & 0x1F;
+
+  int hdr_size = 1;
+  *out_xlayer_id = 0;
+  if (*out_ext_flag) {
+    *out_xlayer_id = hdr[1] & 0x1F;
+    hdr_size = 2;
+  }
+
+  *out_payload_size = (size_t)obu_total_size - hdr_size;
+
+  return (int)(length_field_size + (size_t)obu_total_size);
+}
+
+// --- Init / Free Tests ---
+
+TEST(TUAssembler, InitAndFree) {
+  int ids[] = { 0, 1 };
+  MultiXLayerConfig cfg;
+  MakeMinimalConfig(&cfg, 2, ids);
+
+  TUAssembler ta;
+  ASSERT_EQ(tu_assembler_init(&ta, &cfg), 0);
+  EXPECT_NE(ta.buffer, nullptr);
+  EXPECT_EQ(ta.size, 0u);
+  EXPECT_GE(ta.capacity, (size_t)TU_ASM_INITIAL_CAPACITY);
+  EXPECT_EQ(ta.num_xlayers, 2);
+  EXPECT_EQ(ta.xlayer_ids[0], 0);
+  EXPECT_EQ(ta.xlayer_ids[1], 1);
+
+  tu_assembler_free(&ta);
+  EXPECT_EQ(ta.buffer, nullptr);
+  EXPECT_EQ(ta.size, 0u);
+}
+
+// --- TD Write Test ---
+
+TEST(TUAssembler, WriteTD) {
+  int ids[] = { 0, 1 };
+  MultiXLayerConfig cfg;
+  MakeMinimalConfig(&cfg, 2, ids);
+
+  TUAssembler ta;
+  ASSERT_EQ(tu_assembler_init(&ta, &cfg), 0);
+  ASSERT_EQ(tu_assembler_write_td(&ta), 0);
+
+  // TD should be 2 bytes: [size=1][header_byte=0x08]
+  ASSERT_EQ(ta.size, 2u);
+  EXPECT_EQ(ta.buffer[0], 1);     // ULEB128 size = 1
+  EXPECT_EQ(ta.buffer[1], 0x08);  // OBU_TEMPORAL_DELIMITER << 2
+
+  // Parse it back
+  int type, ext_flag, xlayer_id;
+  size_t payload_size;
+  int consumed = ParseObuAt(ta.buffer, ta.size, 0, &type, &ext_flag, &xlayer_id,
+                            &payload_size);
+  EXPECT_EQ(consumed, 2);
+  EXPECT_EQ(type, OBU_TEMPORAL_DELIMITER);
+  EXPECT_EQ(ext_flag, 0);
+  EXPECT_EQ(payload_size, 0u);
+
+  tu_assembler_free(&ta);
+}
+
+// --- OBU Header Rewriting Test ---
+
+TEST(TUAssembler, AppendXLayerObusRewritesHeaders) {
+  int ids[] = { 0, 3 };
+  MultiXLayerConfig cfg;
+  MakeMinimalConfig(&cfg, 2, ids);
+
+  TUAssembler ta;
+  ASSERT_EQ(tu_assembler_init(&ta, &cfg), 0);
+
+  // Construct a fake per-xlayer OBU: a Sequence Header with no extension.
+  // Format: [uleb128 size][header_byte][payload...]
+  // OBU_SEQUENCE_HEADER = 1, header byte: ext=0, type=1, tlayer=0 => (1<<2)=4
+  const uint8_t fake_payload[] = { 0xAA, 0xBB, 0xCC };
+  uint8_t input_obu[16];
+  size_t input_size = 0;
+
+  // ULEB128 size = 1 (header) + 3 (payload) = 4
+  uint8_t size_buf[4];
+  size_t size_len = 0;
+  avm_uleb_encode(4, sizeof(size_buf), size_buf, &size_len);
+  memcpy(input_obu + input_size, size_buf, size_len);
+  input_size += size_len;
+
+  // Header byte: ext=0, type=OBU_SEQUENCE_HEADER(1), tlayer=0
+  input_obu[input_size++] = (uint8_t)((1 << 2));  // 0x04
+
+  // Payload
+  memcpy(input_obu + input_size, fake_payload, sizeof(fake_payload));
+  input_size += sizeof(fake_payload);
+
+  // Append with xlayer_id = 3
+  ASSERT_EQ(tu_assembler_append_xlayer_obus(&ta, 3, input_obu, input_size), 0);
+
+  // Parse the rewritten OBU
+  int type, ext_flag, xlayer_id;
+  size_t payload_size;
+  int consumed = ParseObuAt(ta.buffer, ta.size, 0, &type, &ext_flag, &xlayer_id,
+                            &payload_size);
+  ASSERT_GT(consumed, 0);
+  EXPECT_EQ(type, OBU_SEQUENCE_HEADER);
+  EXPECT_EQ(ext_flag, 1);       // Should now have extension
+  EXPECT_EQ(xlayer_id, 3);      // Rewritten xlayer_id
+  EXPECT_EQ(payload_size, 3u);  // Original payload preserved
+
+  // Verify payload content
+  size_t hdr_offset = 0;
+  uint64_t obu_total = 0;
+  size_t lfs = 0;
+  avm_uleb_decode(ta.buffer, ta.size, &obu_total, &lfs);
+  hdr_offset = lfs + 2;  // Skip size field and 2-byte header
+  EXPECT_EQ(ta.buffer[hdr_offset], 0xAA);
+  EXPECT_EQ(ta.buffer[hdr_offset + 1], 0xBB);
+  EXPECT_EQ(ta.buffer[hdr_offset + 2], 0xCC);
+
+  tu_assembler_free(&ta);
+}
+
+TEST(TUAssembler, AppendXLayerObusSkipsTD) {
+  int ids[] = { 0, 1 };
+  MultiXLayerConfig cfg;
+  MakeMinimalConfig(&cfg, 2, ids);
+
+  TUAssembler ta;
+  ASSERT_EQ(tu_assembler_init(&ta, &cfg), 0);
+
+  // Construct a TD OBU (should be skipped by append)
+  // OBU_TEMPORAL_DELIMITER = 2, header: ext=0, type=2, tlayer=0 => 0x08
+  uint8_t td_obu[] = { 1, 0x08 };  // size=1, header
+
+  ASSERT_EQ(tu_assembler_append_xlayer_obus(&ta, 0, td_obu, sizeof(td_obu)), 0);
+
+  // No output — TD should be filtered
+  EXPECT_EQ(ta.size, 0u);
+
+  tu_assembler_free(&ta);
+}
+
+TEST(TUAssembler, AppendXLayerObusPreservesExtension) {
+  int ids[] = { 0, 2 };
+  MultiXLayerConfig cfg;
+  MakeMinimalConfig(&cfg, 2, ids);
+
+  TUAssembler ta;
+  ASSERT_EQ(tu_assembler_init(&ta, &cfg), 0);
+
+  // Construct an OBU that already has an extension byte
+  // OBU_MULTI_FRAME_HEADER = 3
+  // Header: ext=1, type=3, tlayer=1 => (1<<7)|(3<<2)|1 = 0x8D
+  // Extension: mlayer=2, xlayer=0 => (2<<5)|0 = 0x40
+  const uint8_t payload[] = { 0xDE, 0xAD };
+  uint8_t input_obu[16];
+  size_t input_size = 0;
+
+  // ULEB128 size = 2 (header+ext) + 2 (payload) = 4
+  uint8_t size_buf[4];
+  size_t size_len = 0;
+  avm_uleb_encode(4, sizeof(size_buf), size_buf, &size_len);
+  memcpy(input_obu + input_size, size_buf, size_len);
+  input_size += size_len;
+
+  input_obu[input_size++] = 0x8D;  // Header: ext=1, type=3, tlayer=1
+  input_obu[input_size++] = 0x40;  // Extension: mlayer=2, xlayer=0
+  memcpy(input_obu + input_size, payload, sizeof(payload));
+  input_size += sizeof(payload);
+
+  // Append with xlayer_id = 2
+  ASSERT_EQ(tu_assembler_append_xlayer_obus(&ta, 2, input_obu, input_size), 0);
+
+  // Parse rewritten OBU
+  int type, ext_flag, xlayer_id;
+  size_t payload_size;
+  int consumed = ParseObuAt(ta.buffer, ta.size, 0, &type, &ext_flag, &xlayer_id,
+                            &payload_size);
+  ASSERT_GT(consumed, 0);
+  EXPECT_EQ(type, OBU_MULTI_FRAME_HEADER);
+  EXPECT_EQ(ext_flag, 1);
+  EXPECT_EQ(xlayer_id, 2);      // Rewritten to target xlayer
+  EXPECT_EQ(payload_size, 2u);  // Payload size unchanged
+
+  // Verify mlayer_id is preserved in extension byte
+  uint64_t obu_total = 0;
+  size_t lfs = 0;
+  avm_uleb_decode(ta.buffer, ta.size, &obu_total, &lfs);
+  uint8_t ext_byte = ta.buffer[lfs + 1];
+  int mlayer_id = (ext_byte >> 5) & 0x7;
+  EXPECT_EQ(mlayer_id, 2);  // mlayer preserved
+
+  tu_assembler_free(&ta);
+}
+
+// --- Multiple OBU Append Test ---
+
+TEST(TUAssembler, AppendMultipleObus) {
+  int ids[] = { 0, 1 };
+  MultiXLayerConfig cfg;
+  MakeMinimalConfig(&cfg, 2, ids);
+
+  TUAssembler ta;
+  ASSERT_EQ(tu_assembler_init(&ta, &cfg), 0);
+
+  // Build a packet with: TD + SH + Frame data
+  uint8_t packet[64];
+  size_t pkt_size = 0;
+
+  // OBU 1: TD (should be skipped)
+  packet[pkt_size++] = 1;     // size=1
+  packet[pkt_size++] = 0x08;  // TD header
+
+  // OBU 2: Sequence Header (type=1), 2 bytes payload
+  uint8_t sb[4];
+  size_t sl = 0;
+  avm_uleb_encode(3, sizeof(sb), sb, &sl);  // size = 1 hdr + 2 payload
+  memcpy(packet + pkt_size, sb, sl);
+  pkt_size += sl;
+  packet[pkt_size++] = 0x04;  // SH header, no ext
+  packet[pkt_size++] = 0x11;  // payload byte 1
+  packet[pkt_size++] = 0x22;  // payload byte 2
+
+  // OBU 3: Leading Tile Group (type=6), 3 bytes payload
+  avm_uleb_encode(4, sizeof(sb), sb, &sl);  // size = 1 hdr + 3 payload
+  memcpy(packet + pkt_size, sb, sl);
+  pkt_size += sl;
+  packet[pkt_size++] = (uint8_t)(6 << 2);  // Leading TG header, no ext
+  packet[pkt_size++] = 0x33;
+  packet[pkt_size++] = 0x44;
+  packet[pkt_size++] = 0x55;
+
+  ASSERT_EQ(tu_assembler_append_xlayer_obus(&ta, 1, packet, pkt_size), 0);
+
+  // Should have 2 OBUs output (TD skipped)
+  size_t offset = 0;
+  int type, ext_flag, xlayer_id;
+  size_t payload_size;
+
+  // First OBU: Sequence Header
+  int consumed = ParseObuAt(ta.buffer, ta.size, offset, &type, &ext_flag,
+                            &xlayer_id, &payload_size);
+  ASSERT_GT(consumed, 0);
+  EXPECT_EQ(type, OBU_SEQUENCE_HEADER);
+  EXPECT_EQ(xlayer_id, 1);
+  EXPECT_EQ(payload_size, 2u);
+  offset += consumed;
+
+  // Second OBU: Leading Tile Group
+  consumed = ParseObuAt(ta.buffer, ta.size, offset, &type, &ext_flag,
+                        &xlayer_id, &payload_size);
+  ASSERT_GT(consumed, 0);
+  EXPECT_EQ(type, OBU_LEADING_TILE_GROUP);
+  EXPECT_EQ(xlayer_id, 1);
+  EXPECT_EQ(payload_size, 3u);
+  offset += consumed;
+
+  // Should have consumed all output
+  EXPECT_EQ(offset, ta.size);
+
+  tu_assembler_free(&ta);
+}
+
+// --- Flush Test ---
+
+TEST(TUAssembler, FlushWritesToFile) {
+  int ids[] = { 0 };
+  MultiXLayerConfig cfg;
+  MakeMinimalConfig(&cfg, 1, ids);
+
+  TUAssembler ta;
+  ASSERT_EQ(tu_assembler_init(&ta, &cfg), 0);
+  ASSERT_EQ(tu_assembler_write_td(&ta), 0);
+
+  std::string path_str = testing::TempDir() + "tu_asm_flush_test.obu";
+  const char *path = path_str.c_str();
+  FILE *f = fopen(path, "wb");
+  ASSERT_NE(f, nullptr);
+
+  size_t pre_flush_size = ta.size;
+  ASSERT_EQ(tu_assembler_flush(&ta, f), 0);
+  fclose(f);
+
+  // Buffer should be reset
+  EXPECT_EQ(ta.size, 0u);
+
+  // Verify file contents
+  f = fopen(path, "rb");
+  ASSERT_NE(f, nullptr);
+  fseek(f, 0, SEEK_END);
+  long file_size = ftell(f);
+  fclose(f);
+  EXPECT_EQ((size_t)file_size, pre_flush_size);
+
+  tu_assembler_free(&ta);
+}
+
+// --- Global LCR Population Test ---
+
+TEST(TUAssembler, PopulateGlobalLcr) {
+  int ids[] = { 0, 5 };
+  MultiXLayerConfig cfg;
+  MakeMinimalConfig(&cfg, 2, ids);
+  cfg.lcr_purpose_id = 2;
+  cfg.lcr_dependent_xlayers_flag = 1;
+  cfg.lcr_doh_constraint_flag = 1;
+  cfg.xlayers[0].layer_type = TEXTURE_LAYER;
+  cfg.xlayers[1].layer_type = AUX_LAYER;
+  cfg.xlayers[1].auxiliary_type = LCR_DEPTH_AUX;
+
+  GlobalLayerConfigurationRecord glcr;
+  populate_global_lcr_from_config(&cfg, &glcr);
+
+  EXPECT_EQ(glcr.LcrMaxNumXLayerCount, 2);
+  EXPECT_EQ(glcr.LcrXLayerID[0], 0);
+  EXPECT_EQ(glcr.LcrXLayerID[1], 5);
+
+  // xlayer_map should have bits 0 and 5 set
+  uint32_t expected_map = (1u << 0) | (1u << 5);
+  EXPECT_EQ((uint32_t)glcr.lcr_xlayer_map, expected_map);
+
+  EXPECT_EQ(glcr.lcr_global_purpose_id, 2);
+  EXPECT_EQ(glcr.lcr_dependent_xlayers_flag, 1);
+  EXPECT_EQ(glcr.lcr_doh_constraint_flag, 1);
+  EXPECT_EQ(glcr.lcr_global_payload_present_flag, 1);
+
+  // Per-xlayer info: xlayer 0
+  EXPECT_EQ(glcr.xlayer_info[0].lcr_rep_info_present_flag, 1);
+  EXPECT_EQ(glcr.xlayer_info[0].rep_params.lcr_max_pic_width, 1920);
+  EXPECT_EQ(glcr.xlayer_info[0].rep_params.lcr_max_pic_height, 1080);
+
+  // Per-xlayer info: xlayer 5 is at positional index 1
+  EXPECT_EQ(glcr.xlayer_info[1].lcr_rep_info_present_flag, 1);
+  EXPECT_EQ(glcr.xlayer_info[1].rep_params.lcr_max_pic_width, 1920);
+  EXPECT_EQ(glcr.xlayer_info[1].rep_params.lcr_max_pic_height, 1080);
+
+  // Embedded layer type info
+  EXPECT_EQ(glcr.xlayer_info[0].mlayer_params.lcr_layer_type[0], TEXTURE_LAYER);
+  EXPECT_EQ(glcr.xlayer_info[1].mlayer_params.lcr_layer_type[0], AUX_LAYER);
+  EXPECT_EQ(glcr.xlayer_info[1].mlayer_params.lcr_auxiliary_type[0],
+            LCR_DEPTH_AUX);
+}
+
+// --- OPS Population Test ---
+
+TEST(TUAssembler, PopulateOps) {
+  OPSConfig ops_cfg;
+  memset(&ops_cfg, 0, sizeof(ops_cfg));
+  ops_cfg.enable = 1;
+  ops_cfg.ops_id = 0;
+  ops_cfg.priority = 1;
+  ops_cfg.intent_present_flag = 1;
+  ops_cfg.ptl_present_flag = 1;
+  ops_cfg.num_operating_points = 2;
+
+  // OP0: xlayer 0 only
+  ops_cfg.ops[0].intent = 0;
+  ops_cfg.ops[0].xlayer_map = (1u << 0);
+
+  // OP1: xlayers 0 and 3
+  ops_cfg.ops[1].intent = 1;
+  ops_cfg.ops[1].xlayer_map = (1u << 0) | (1u << 3);
+
+  // Set up a minimal MultiXLayerConfig for derivation
+  int ids[] = { 0, 3 };
+  MultiXLayerConfig mcfg;
+  MakeMinimalConfig(&mcfg, 2, ids);
+  mcfg.xlayers[0].width = 960;
+  mcfg.xlayers[0].height = 540;
+  mcfg.xlayers[0].level = SEQ_LEVEL_4_0;
+  mcfg.xlayers[1].width = 960;
+  mcfg.xlayers[1].height = 540;
+  mcfg.xlayers[1].level = SEQ_LEVEL_4_0;
+
+  OperatingPointSet ops;
+  populate_ops_from_config(&ops_cfg, GLOBAL_XLAYER_ID, &mcfg, &ops);
+
+  EXPECT_EQ(ops.valid, 1);
+  EXPECT_EQ(ops.ops_id, 0);
+  EXPECT_EQ(ops.ops_cnt, 2);
+  EXPECT_EQ(ops.ops_priority, 1);
+  EXPECT_EQ(ops.ops_intent_present_flag, 1);
+
+  // OP0: single xlayer
+  EXPECT_EQ(ops.op[0].ops_intent_op, 0);
+  EXPECT_EQ(ops.op[0].ops_xlayer_map, 1);
+  EXPECT_EQ(ops.op[0].XCount, 1);
+  EXPECT_EQ(ops.op[0].OpsxLayerID[0], 0);
+
+  // OP1: two xlayers
+  EXPECT_EQ(ops.op[1].ops_intent_op, 1);
+  EXPECT_EQ(ops.op[1].ops_xlayer_map, (int)((1u << 0) | (1u << 3)));
+  EXPECT_EQ(ops.op[1].XCount, 2);
+  EXPECT_EQ(ops.op[1].OpsxLayerID[0], 0);
+  EXPECT_EQ(ops.op[1].OpsxLayerID[1], 3);
+}
+
+// --- Global LCR OBU Write Test ---
+
+TEST(TUAssembler, WriteGlobalLcrObu) {
+  int ids[] = { 0, 1 };
+  MultiXLayerConfig cfg;
+  MakeMinimalConfig(&cfg, 2, ids);
+
+  TUAssembler ta;
+  ASSERT_EQ(tu_assembler_init(&ta, &cfg), 0);
+  ASSERT_EQ(tu_assembler_write_global_lcr(&ta), 0);
+
+  // Should have produced some output
+  EXPECT_GT(ta.size, 0u);
+
+  // Parse the OBU header
+  int type, ext_flag, xlayer_id;
+  size_t payload_size;
+  int consumed = ParseObuAt(ta.buffer, ta.size, 0, &type, &ext_flag, &xlayer_id,
+                            &payload_size);
+  ASSERT_GT(consumed, 0);
+  EXPECT_EQ(type, OBU_LAYER_CONFIGURATION_RECORD);
+  EXPECT_EQ(ext_flag, 1);
+  EXPECT_EQ(xlayer_id, GLOBAL_XLAYER_ID);
+
+  tu_assembler_free(&ta);
+}
+
+// --- MSDO OBU Write Test ---
+
+TEST(TUAssembler, WriteMsdoObu) {
+  int ids[] = { 0, 1 };
+  MultiXLayerConfig cfg;
+  MakeMinimalConfig(&cfg, 2, ids);
+  cfg.enable_msdo = 1;
+
+  TUAssembler ta;
+  ASSERT_EQ(tu_assembler_init(&ta, &cfg), 0);
+  ASSERT_EQ(tu_assembler_write_msdo(&ta), 0);
+
+  EXPECT_GT(ta.size, 0u);
+
+  int type, ext_flag, xlayer_id;
+  size_t payload_size;
+  int consumed = ParseObuAt(ta.buffer, ta.size, 0, &type, &ext_flag, &xlayer_id,
+                            &payload_size);
+  ASSERT_GT(consumed, 0);
+  EXPECT_EQ(type, OBU_MULTI_STREAM_DECODER_OPERATION);
+  EXPECT_EQ(ext_flag, 1);
+  EXPECT_EQ(xlayer_id, GLOBAL_XLAYER_ID);
+
+  tu_assembler_free(&ta);
+}
+
+TEST(TUAssembler, MsdoSkippedWhenDisabled) {
+  int ids[] = { 0, 1 };
+  MultiXLayerConfig cfg;
+  MakeMinimalConfig(&cfg, 2, ids);
+  cfg.enable_msdo = 0;
+
+  TUAssembler ta;
+  ASSERT_EQ(tu_assembler_init(&ta, &cfg), 0);
+  ASSERT_EQ(tu_assembler_write_msdo(&ta), 0);
+
+  // Should produce no output when disabled
+  EXPECT_EQ(ta.size, 0u);
+
+  tu_assembler_free(&ta);
+}
+
+// --- Full TU Assembly Test ---
+
+TEST(TUAssembler, FullTuAssembly) {
+  int ids[] = { 0, 1 };
+  MultiXLayerConfig cfg;
+  MakeMinimalConfig(&cfg, 2, ids);
+  cfg.enable_msdo = 1;
+
+  // Add an OPS
+  cfg.num_ops_sets = 1;
+  cfg.ops_sets[0].enable = 1;
+  cfg.ops_sets[0].ops_id = 0;
+  cfg.ops_sets[0].intent_present_flag = 1;
+  cfg.ops_sets[0].ptl_present_flag = 1;
+  cfg.ops_sets[0].num_operating_points = 1;
+  cfg.ops_sets[0].ops[0].xlayer_map = 0x3;
+
+  TUAssembler ta;
+  ASSERT_EQ(tu_assembler_init(&ta, &cfg), 0);
+
+  // Write TD
+  ASSERT_EQ(tu_assembler_write_td(&ta), 0);
+
+  // Write structural OBUs
+  ASSERT_EQ(tu_assembler_write_global_lcr(&ta), 0);
+  ASSERT_EQ(tu_assembler_write_msdo(&ta), 0);
+  ASSERT_EQ(tu_assembler_write_ops(&ta, GLOBAL_XLAYER_ID), 0);
+
+  // Fake per-xlayer data for xlayer 0
+  uint8_t xl0_data[8];
+  size_t xl0_size = 0;
+  uint8_t sb[4];
+  size_t sl = 0;
+  avm_uleb_encode(3, sizeof(sb), sb, &sl);  // SH: 1 hdr + 2 payload
+  memcpy(xl0_data + xl0_size, sb, sl);
+  xl0_size += sl;
+  xl0_data[xl0_size++] = 0x04;  // SH header
+  xl0_data[xl0_size++] = 0xAA;
+  xl0_data[xl0_size++] = 0xBB;
+
+  ASSERT_EQ(tu_assembler_append_xlayer_obus(&ta, 0, xl0_data, xl0_size), 0);
+
+  // Fake per-xlayer data for xlayer 1
+  uint8_t xl1_data[8];
+  size_t xl1_size = 0;
+  avm_uleb_encode(3, sizeof(sb), sb, &sl);
+  memcpy(xl1_data + xl1_size, sb, sl);
+  xl1_size += sl;
+  xl1_data[xl1_size++] = 0x04;
+  xl1_data[xl1_size++] = 0xCC;
+  xl1_data[xl1_size++] = 0xDD;
+
+  ASSERT_EQ(tu_assembler_append_xlayer_obus(&ta, 1, xl1_data, xl1_size), 0);
+
+  // Verify total output is non-empty and can be parsed
+  EXPECT_GT(ta.size, 10u);
+
+  // Walk through OBUs to verify ordering: TD, LCR, MSDO, OPS, xl0 SH, xl1 SH
+  size_t offset = 0;
+  int obu_count = 0;
+  int types[16] = {};
+  int xlayer_ids[16] = {};
+
+  while (offset < ta.size && obu_count < 16) {
+    int type, ext_flag, xlayer_id;
+    size_t payload_size;
+    int consumed = ParseObuAt(ta.buffer, ta.size, offset, &type, &ext_flag,
+                              &xlayer_id, &payload_size);
+    if (consumed <= 0) break;
+    types[obu_count] = type;
+    xlayer_ids[obu_count] = xlayer_id;
+    obu_count++;
+    offset += consumed;
+  }
+
+  // Should have at least 6 OBUs
+  ASSERT_GE(obu_count, 6);
+
+  // First OBU should be TD
+  EXPECT_EQ(types[0], OBU_TEMPORAL_DELIMITER);
+
+  // LCR should follow
+  EXPECT_EQ(types[1], OBU_LAYER_CONFIGURATION_RECORD);
+  EXPECT_EQ(xlayer_ids[1], GLOBAL_XLAYER_ID);
+
+  // MSDO next
+  EXPECT_EQ(types[2], OBU_MULTI_STREAM_DECODER_OPERATION);
+  EXPECT_EQ(xlayer_ids[2], GLOBAL_XLAYER_ID);
+
+  // OPS
+  EXPECT_EQ(types[3], OBU_OPERATING_POINT_SET);
+  EXPECT_EQ(xlayer_ids[3], GLOBAL_XLAYER_ID);
+
+  // Per-xlayer OBUs: xlayer 0 then xlayer 1
+  EXPECT_EQ(types[4], OBU_SEQUENCE_HEADER);
+  EXPECT_EQ(xlayer_ids[4], 0);
+
+  EXPECT_EQ(types[5], OBU_SEQUENCE_HEADER);
+  EXPECT_EQ(xlayer_ids[5], 1);
+
+  // Should have consumed all output
+  EXPECT_EQ(offset, ta.size);
+
+  tu_assembler_free(&ta);
+}
+
+// --- Atlas Population Tests ---
+
+TEST(TUAssembler, PopulateAtlasEnhancedUniform) {
+  int ids[] = { 0, 1, 2 };
+  MultiXLayerConfig cfg;
+  MakeMinimalConfig(&cfg, 3, ids);
+  cfg.enable_atlas = 1;
+  cfg.atlas_mode = ENHANCED_ATLAS;
+  cfg.atlas_uniform_spacing = 1;
+  // All xlayers same size
+  for (int i = 0; i < 3; i++) {
+    cfg.xlayers[i].width = 640;
+    cfg.xlayers[i].height = 480;
+  }
+
+  AtlasSegmentInfo atlas;
+  populate_atlas_from_config(&cfg, &atlas);
+
+  EXPECT_EQ(atlas.valid, 1);
+  EXPECT_EQ(atlas.atlas_segment_mode_idc, ENHANCED_ATLAS);
+  EXPECT_EQ(atlas.atlas_segment_id, 1);
+
+  // Region info: 3 columns x 1 row, uniform spacing
+  EXPECT_EQ(atlas.ats_reg_params.ats_uniform_spacing_flag, 1);
+  EXPECT_EQ(atlas.ats_reg_params.ats_num_region_columns_minus_1, 2);
+  EXPECT_EQ(atlas.ats_reg_params.ats_num_region_rows_minus_1, 0);
+  EXPECT_EQ(atlas.ats_reg_params.ats_region_width_minus_1, 639);
+  EXPECT_EQ(atlas.ats_reg_params.ats_region_height_minus_1, 479);
+  EXPECT_EQ(atlas.ats_reg_params.NumRegionsInAtlas, 3);
+
+  // Segment mapping: single_region_per_segment
+  EXPECT_EQ(atlas.ats_reg_seg_map.ats_single_region_per_atlas_segment_flag, 1);
+  EXPECT_EQ(atlas.ats_reg_seg_map.ats_num_atlas_segments_minus_1, 2);
+}
+
+TEST(TUAssembler, PopulateAtlasEnhancedExplicit2x2) {
+  // 3 regions in a 2x2 grid (bottom-right empty)
+  int ids[] = { 0, 1, 2 };
+  MultiXLayerConfig cfg;
+  MakeMinimalConfig(&cfg, 3, ids);
+  cfg.enable_atlas = 1;
+  cfg.atlas_mode = ENHANCED_ATLAS;
+  cfg.atlas_uniform_spacing = 0;
+
+  cfg.xlayers[0].width = 960;
+  cfg.xlayers[0].height = 540;
+  cfg.xlayers[0].atlas_pos_x = 0;
+  cfg.xlayers[0].atlas_pos_y = 0;
+
+  cfg.xlayers[1].width = 960;
+  cfg.xlayers[1].height = 540;
+  cfg.xlayers[1].atlas_pos_x = 960;
+  cfg.xlayers[1].atlas_pos_y = 0;
+
+  cfg.xlayers[2].width = 960;
+  cfg.xlayers[2].height = 540;
+  cfg.xlayers[2].atlas_pos_x = 0;
+  cfg.xlayers[2].atlas_pos_y = 540;
+
+  AtlasSegmentInfo atlas;
+  populate_atlas_from_config(&cfg, &atlas);
+
+  EXPECT_EQ(atlas.valid, 1);
+  EXPECT_EQ(atlas.atlas_segment_mode_idc, ENHANCED_ATLAS);
+
+  // Grid should be 2 columns x 2 rows
+  EXPECT_EQ(atlas.ats_reg_params.ats_uniform_spacing_flag, 0);
+  EXPECT_EQ(atlas.ats_reg_params.ats_num_region_columns_minus_1, 1);
+  EXPECT_EQ(atlas.ats_reg_params.ats_num_region_rows_minus_1, 1);
+  EXPECT_EQ(atlas.ats_reg_params.ats_column_width_minus_1[0], 959);
+  EXPECT_EQ(atlas.ats_reg_params.ats_column_width_minus_1[1], 959);
+  EXPECT_EQ(atlas.ats_reg_params.ats_row_height_minus_1[0], 539);
+  EXPECT_EQ(atlas.ats_reg_params.ats_row_height_minus_1[1], 539);
+  EXPECT_EQ(atlas.ats_reg_params.NumRegionsInAtlas, 4);
+
+  // Explicit segment mapping (not single_region_per_segment)
+  EXPECT_EQ(atlas.ats_reg_seg_map.ats_single_region_per_atlas_segment_flag, 0);
+  EXPECT_EQ(atlas.ats_reg_seg_map.ats_num_atlas_segments_minus_1, 2);
+
+  // Segment 0 at col=0,row=0
+  EXPECT_EQ(atlas.ats_reg_seg_map.ats_top_left_region_column[0], 0);
+  EXPECT_EQ(atlas.ats_reg_seg_map.ats_top_left_region_row[0], 0);
+
+  // Segment 1 at col=1,row=0
+  EXPECT_EQ(atlas.ats_reg_seg_map.ats_top_left_region_column[1], 1);
+  EXPECT_EQ(atlas.ats_reg_seg_map.ats_top_left_region_row[1], 0);
+
+  // Segment 2 at col=0,row=1
+  EXPECT_EQ(atlas.ats_reg_seg_map.ats_top_left_region_column[2], 0);
+  EXPECT_EQ(atlas.ats_reg_seg_map.ats_top_left_region_row[2], 1);
+}
+
+TEST(TUAssembler, PopulateAtlasMultistream) {
+  int ids[] = { 0, 1 };
+  MultiXLayerConfig cfg;
+  MakeMinimalConfig(&cfg, 2, ids);
+  cfg.enable_atlas = 1;
+  cfg.atlas_mode = MULTISTREAM_ATLAS;
+  cfg.atlas_width = 1920;
+  cfg.atlas_height = 1080;
+  cfg.xlayers[0].width = 960;
+  cfg.xlayers[0].height = 1080;
+  cfg.xlayers[0].atlas_pos_x = 0;
+  cfg.xlayers[0].atlas_pos_y = 0;
+  cfg.xlayers[1].width = 960;
+  cfg.xlayers[1].height = 1080;
+  cfg.xlayers[1].atlas_pos_x = 960;
+  cfg.xlayers[1].atlas_pos_y = 0;
+
+  AtlasSegmentInfo atlas;
+  populate_atlas_from_config(&cfg, &atlas);
+
+  EXPECT_EQ(atlas.valid, 1);
+  EXPECT_EQ(atlas.atlas_segment_mode_idc, MULTISTREAM_ATLAS);
+  EXPECT_EQ(atlas.ats_basic_info_s.ats_stream_id_present, 1);
+  EXPECT_EQ(atlas.ats_basic_info_s.ats_atlas_width, 1920);
+  EXPECT_EQ(atlas.ats_basic_info_s.ats_atlas_height, 1080);
+  EXPECT_EQ(atlas.ats_basic_info_s.ats_num_atlas_segments_minus_1, 1);
+
+  // Segment 0: xlayer_id=0, pos (0,0), 960x1080
+  EXPECT_EQ(atlas.ats_basic_info_s.ats_input_stream_id[0], 0);
+  EXPECT_EQ(atlas.ats_basic_info_s.ats_segment_top_left_pos_x[0], 0);
+  EXPECT_EQ(atlas.ats_basic_info_s.ats_segment_top_left_pos_y[0], 0);
+  EXPECT_EQ(atlas.ats_basic_info_s.ats_segment_width[0], 960);
+
+  // Segment 1: xlayer_id=1, pos (960,0), 960x1080
+  EXPECT_EQ(atlas.ats_basic_info_s.ats_input_stream_id[1], 1);
+  EXPECT_EQ(atlas.ats_basic_info_s.ats_segment_top_left_pos_x[1], 960);
+  EXPECT_EQ(atlas.ats_basic_info_s.ats_segment_width[1], 960);
+}
+
+// --- Atlas OBU Write Tests ---
+
+TEST(TUAssembler, WriteAtlasEnhancedObu) {
+  int ids[] = { 0, 1 };
+  MultiXLayerConfig cfg;
+  MakeMinimalConfig(&cfg, 2, ids);
+  cfg.enable_atlas = 1;
+  cfg.atlas_mode = ENHANCED_ATLAS;
+  cfg.atlas_uniform_spacing = 1;
+  cfg.xlayers[0].width = 960;
+  cfg.xlayers[0].height = 540;
+  cfg.xlayers[1].width = 960;
+  cfg.xlayers[1].height = 540;
+
+  TUAssembler ta;
+  ASSERT_EQ(tu_assembler_init(&ta, &cfg), 0);
+  ASSERT_EQ(tu_assembler_write_atlas(&ta), 0);
+
+  EXPECT_GT(ta.size, 0u);
+
+  // Parse OBU header
+  int type, ext_flag, xlayer_id;
+  size_t payload_size;
+  int consumed = ParseObuAt(ta.buffer, ta.size, 0, &type, &ext_flag, &xlayer_id,
+                            &payload_size);
+  ASSERT_GT(consumed, 0);
+  EXPECT_EQ(type, OBU_ATLAS_SEGMENT);
+  EXPECT_EQ(ext_flag, 1);
+  EXPECT_EQ(xlayer_id, GLOBAL_XLAYER_ID);
+
+  tu_assembler_free(&ta);
+}
+
+TEST(TUAssembler, WriteAtlasMultistreamObu) {
+  int ids[] = { 0, 1 };
+  MultiXLayerConfig cfg;
+  MakeMinimalConfig(&cfg, 2, ids);
+  cfg.enable_atlas = 1;
+  cfg.atlas_mode = MULTISTREAM_ATLAS;
+  cfg.atlas_width = 1920;
+  cfg.atlas_height = 1080;
+  cfg.xlayers[0].width = 960;
+  cfg.xlayers[0].height = 1080;
+  cfg.xlayers[0].atlas_pos_x = 0;
+  cfg.xlayers[0].atlas_pos_y = 0;
+  cfg.xlayers[1].width = 960;
+  cfg.xlayers[1].height = 1080;
+  cfg.xlayers[1].atlas_pos_x = 960;
+  cfg.xlayers[1].atlas_pos_y = 0;
+
+  TUAssembler ta;
+  ASSERT_EQ(tu_assembler_init(&ta, &cfg), 0);
+  ASSERT_EQ(tu_assembler_write_atlas(&ta), 0);
+
+  EXPECT_GT(ta.size, 0u);
+
+  int type, ext_flag, xlayer_id;
+  size_t payload_size;
+  int consumed = ParseObuAt(ta.buffer, ta.size, 0, &type, &ext_flag, &xlayer_id,
+                            &payload_size);
+  ASSERT_GT(consumed, 0);
+  EXPECT_EQ(type, OBU_ATLAS_SEGMENT);
+  EXPECT_EQ(ext_flag, 1);
+  EXPECT_EQ(xlayer_id, GLOBAL_XLAYER_ID);
+
+  tu_assembler_free(&ta);
+}
+
+TEST(TUAssembler, AtlasSkippedWhenDisabled) {
+  int ids[] = { 0, 1 };
+  MultiXLayerConfig cfg;
+  MakeMinimalConfig(&cfg, 2, ids);
+  cfg.enable_atlas = 0;
+
+  TUAssembler ta;
+  ASSERT_EQ(tu_assembler_init(&ta, &cfg), 0);
+  ASSERT_EQ(tu_assembler_write_atlas(&ta), 0);
+
+  EXPECT_EQ(ta.size, 0u);
+
+  tu_assembler_free(&ta);
+}
+
+}  // namespace
diff --git a/test/xlayer_config_test.cc b/test/xlayer_config_test.cc
new file mode 100644
index 0000000000..956936534a
--- /dev/null
+++ b/test/xlayer_config_test.cc
@@ -0,0 +1,1973 @@
+/*
+ * Copyright (c) 2025, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 3-Clause Clear License
+ * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
+ * License was not distributed with this source code in the LICENSE file, you
+ * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/.  If the
+ * Alliance for Open Media Patent License 1.0 was not distributed with this
+ * source code in the PATENTS file, you can obtain it at
+ * aomedia.org/license/patent-license/.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <string>
+
+#include "common/xlayer_config.h"
+#include "common/xlayer_config_parse.h"
+#include "avm/avmcx.h"
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+namespace {
+
+// Helper: write a string to a temporary file and return the path.
+// Uses a static buffer — only one temp file at a time.
+static char temp_path[256];
+
+const char *WriteTempJson(const char *json_str) {
+  snprintf(temp_path, sizeof(temp_path), "%s",
+           testing::TempDir().append("xlayer_test.json").c_str());
+  FILE *f = fopen(temp_path, "w");
+  EXPECT_NE(f, nullptr);
+  if (!f) return nullptr;
+  fputs(json_str, f);
+  fclose(f);
+  return temp_path;
+}
+
+// --- Config Init Tests ---
+
+TEST(XLayerConfig, InitDefaults) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+
+  EXPECT_EQ(cfg.num_xlayers, 0);
+  EXPECT_EQ(cfg.enable_global_lcr, 1);
+  EXPECT_EQ(cfg.lcr_doh_constraint_flag, 1);
+  EXPECT_EQ(cfg.combined_tu, 1);
+  EXPECT_EQ(cfg.monotonic_output_order, 1);
+  EXPECT_EQ(cfg.limit, 0);
+  EXPECT_EQ(cfg.enable_msdo, 0);
+  EXPECT_EQ(cfg.enable_atlas, 0);
+  EXPECT_EQ(cfg.num_ops_sets, 0);
+
+  // Check xlayer defaults
+  for (int i = 0; i < MAX_NUM_XLAYERS - 1; i++) {
+    EXPECT_EQ(cfg.xlayers[i].xlayer_id, -1);
+    EXPECT_EQ(cfg.xlayers[i].qp, -1);
+    EXPECT_EQ(cfg.xlayers[i].bitrate, -1);
+    EXPECT_EQ(cfg.xlayers[i].cpu_used, -1);
+    EXPECT_EQ(cfg.xlayers[i].lag_in_frames, -1);
+    EXPECT_EQ(cfg.xlayers[i].profile, (unsigned int)MAIN_420_10_IP1);
+    EXPECT_EQ(cfg.xlayers[i].level, (unsigned int)SEQ_LEVEL_4_0);
+    EXPECT_EQ(cfg.xlayers[i].num_temporal_layers, 1);
+    EXPECT_EQ(cfg.xlayers[i].num_embedded_layers, 1);
+    EXPECT_EQ(cfg.xlayers[i].view_type, VIEW_UNSPECIFIED);
+  }
+}
+
+// --- JSON Parsing Tests ---
+
+TEST(XLayerConfigParse, MinimalTwoLayer) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw", "width": 1920, "height": 1080,
+        "qp": 128, "cpu_used": 5 },
+      { "xlayer_id": 1, "input": "b.raw", "width": 1920, "height": 1080,
+        "qp": 160, "cpu_used": 5 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  EXPECT_EQ(cfg.num_xlayers, 2);
+  EXPECT_EQ(cfg.xlayers[0].xlayer_id, 0);
+  EXPECT_EQ(cfg.xlayers[1].xlayer_id, 1);
+  EXPECT_STREQ(cfg.xlayers[0].input_filename, "a.raw");
+  EXPECT_STREQ(cfg.xlayers[1].input_filename, "b.raw");
+  EXPECT_EQ(cfg.xlayers[0].width, 1920u);
+  EXPECT_EQ(cfg.xlayers[0].height, 1080u);
+  EXPECT_EQ(cfg.xlayers[0].qp, 128);
+  EXPECT_EQ(cfg.xlayers[1].qp, 160);
+  EXPECT_STREQ(cfg.output_filename, "out.obu");
+
+  // Defaults should apply
+  EXPECT_EQ(cfg.enable_global_lcr, 1);
+  EXPECT_EQ(cfg.combined_tu, 1);
+}
+
+TEST(XLayerConfigParse, LayerTypes) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw", "layer_type": "texture",
+        "view_type": "left" },
+      { "xlayer_id": 1, "input": "b.raw", "layer_type": "auxiliary",
+        "auxiliary_type": "depth" },
+      { "xlayer_id": 2, "input": "c.raw", "layer_type": "stereo",
+        "view_type": "right" }
+    ]
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  EXPECT_EQ(cfg.xlayers[0].layer_type, TEXTURE_LAYER);
+  EXPECT_EQ(cfg.xlayers[0].view_type, VIEW_LEFT);
+  EXPECT_EQ(cfg.xlayers[1].layer_type, AUX_LAYER);
+  EXPECT_EQ(cfg.xlayers[1].auxiliary_type, LCR_DEPTH_AUX);
+  EXPECT_EQ(cfg.xlayers[2].layer_type, STEREO_LAYER);
+  EXPECT_EQ(cfg.xlayers[2].view_type, VIEW_RIGHT);
+}
+
+TEST(XLayerConfigParse, AllAuxiliaryTypes) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw", "layer_type": "auxiliary",
+        "auxiliary_type": "alpha" },
+      { "xlayer_id": 1, "input": "b.raw", "layer_type": "auxiliary",
+        "auxiliary_type": "depth" },
+      { "xlayer_id": 2, "input": "c.raw", "layer_type": "auxiliary",
+        "auxiliary_type": "segmentation" },
+      { "xlayer_id": 3, "input": "d.raw", "layer_type": "auxiliary",
+        "auxiliary_type": "gain_map" }
+    ]
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  EXPECT_EQ(cfg.xlayers[0].auxiliary_type, LCR_ALPHA_AUX);
+  EXPECT_EQ(cfg.xlayers[1].auxiliary_type, LCR_DEPTH_AUX);
+  EXPECT_EQ(cfg.xlayers[2].auxiliary_type, LCR_SEGMENTATION_AUX);
+  EXPECT_EQ(cfg.xlayers[3].auxiliary_type, LCR_GAIN_MAP_AUX);
+}
+
+TEST(XLayerConfigParse, GlobalLcrSection) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw" }
+    ],
+    "global_lcr": {
+      "enable": true,
+      "purpose_id": 3,
+      "dependent_xlayers": true,
+      "doh_constraint": false
+    }
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  EXPECT_EQ(cfg.enable_global_lcr, 1);
+  EXPECT_EQ(cfg.lcr_purpose_id, 3);
+  EXPECT_EQ(cfg.lcr_dependent_xlayers_flag, 1);
+  EXPECT_EQ(cfg.lcr_doh_constraint_flag, 0);
+}
+
+TEST(XLayerConfigParse, MsdoSection) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw" }
+    ],
+    "msdo": { "enable": true }
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  EXPECT_EQ(cfg.enable_msdo, 1);
+}
+
+TEST(XLayerConfigParse, OpsSection) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw" },
+      { "xlayer_id": 1, "input": "b.raw" }
+    ],
+    "ops": [
+      {
+        "ops_id": 0,
+        "priority": 2,
+        "intent_present": true,
+        "ptl_present": true,
+        "operating_points": [
+          { "intent": 0, "xlayer_map": [0] },
+          { "intent": 1, "xlayer_map": [0, 1] }
+        ]
+      }
+    ]
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  EXPECT_EQ(cfg.num_ops_sets, 1);
+  EXPECT_EQ(cfg.ops_sets[0].ops_id, 0);
+  EXPECT_EQ(cfg.ops_sets[0].priority, 2);
+  EXPECT_EQ(cfg.ops_sets[0].intent_present_flag, 1);
+  EXPECT_EQ(cfg.ops_sets[0].ptl_present_flag, 1);
+  EXPECT_EQ(cfg.ops_sets[0].num_operating_points, 2);
+
+  // OP0: xlayer 0 only => bitmask = 0x1
+  EXPECT_EQ(cfg.ops_sets[0].ops[0].intent, 0);
+  EXPECT_EQ(cfg.ops_sets[0].ops[0].xlayer_map, 1u);
+
+  // OP1: xlayers 0 and 1 => bitmask = 0x3
+  EXPECT_EQ(cfg.ops_sets[0].ops[1].intent, 1);
+  EXPECT_EQ(cfg.ops_sets[0].ops[1].xlayer_map, 3u);
+}
+
+TEST(XLayerConfigParse, EncoderOverrideDefaults) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 5, "input": "a.raw" }
+    ]
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  // Unspecified overrides should be -1
+  EXPECT_EQ(cfg.xlayers[0].qp, -1);
+  EXPECT_EQ(cfg.xlayers[0].bitrate, -1);
+  EXPECT_EQ(cfg.xlayers[0].cpu_used, -1);
+  EXPECT_EQ(cfg.xlayers[0].lag_in_frames, -1);
+
+  // Defaults from init
+  EXPECT_EQ(cfg.xlayers[0].profile, (unsigned int)MAIN_420_10_IP1);
+  EXPECT_EQ(cfg.xlayers[0].num_temporal_layers, 1);
+  EXPECT_EQ(cfg.xlayers[0].num_embedded_layers, 1);
+}
+
+// --- Error / Invalid Input Tests ---
+
+TEST(XLayerConfigParse, NonexistentFile) {
+  MultiXLayerConfig cfg;
+  EXPECT_NE(parse_multi_xlayer_config("/nonexistent/path.json", &cfg), 0);
+}
+
+TEST(XLayerConfigParse, InvalidJson) {
+  const char *json = "{ this is not valid json }}}";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  EXPECT_NE(parse_multi_xlayer_config(path, &cfg), 0);
+}
+
+TEST(XLayerConfigParse, MissingXlayersArray) {
+  const char *json = R"({ "output": "test.obu" })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  EXPECT_NE(parse_multi_xlayer_config(path, &cfg), 0);
+}
+
+TEST(XLayerConfigParse, XlayerIdOutOfRange) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 31, "input": "a.raw" }
+    ]
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  EXPECT_NE(parse_multi_xlayer_config(path, &cfg), 0);
+}
+
+TEST(XLayerConfigParse, MissingInputField) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0 }
+    ]
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  // Parse succeeds (input is optional when shared source is used)
+  EXPECT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+  // But validation fails (no input and no shared source)
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+}
+
+// --- Validation Tests ---
+
+TEST(XLayerConfigValidate, ValidTwoLayers) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_xlayers = 2;
+  cfg.xlayers[0].xlayer_id = 0;
+  snprintf(cfg.xlayers[0].input_filename, PATH_MAX, "a.raw");
+  cfg.xlayers[1].xlayer_id = 1;
+  snprintf(cfg.xlayers[1].input_filename, PATH_MAX, "b.raw");
+
+  EXPECT_EQ(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigValidate, DuplicateXlayerId) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_xlayers = 2;
+  cfg.xlayers[0].xlayer_id = 3;
+  snprintf(cfg.xlayers[0].input_filename, PATH_MAX, "a.raw");
+  cfg.xlayers[1].xlayer_id = 3;  // duplicate
+  snprintf(cfg.xlayers[1].input_filename, PATH_MAX, "b.raw");
+
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigValidate, MissingInputFilename) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_xlayers = 1;
+  cfg.xlayers[0].xlayer_id = 0;
+  // input_filename left empty
+
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigValidate, OpsReferencesInvalidXlayer) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_xlayers = 1;
+  cfg.xlayers[0].xlayer_id = 0;
+  snprintf(cfg.xlayers[0].input_filename, PATH_MAX, "a.raw");
+
+  // OPS references xlayer 5, which doesn't exist
+  cfg.num_ops_sets = 1;
+  cfg.ops_sets[0].enable = 1;
+  cfg.ops_sets[0].num_operating_points = 1;
+  cfg.ops_sets[0].ops[0].xlayer_map = (1u << 5);  // xlayer 5
+
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigValidate, ZeroXlayers) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_xlayers = 0;
+
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigValidate, NonMonotonicRequiresSameCodingStructure) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_xlayers = 2;
+  cfg.monotonic_output_order = 0;
+  cfg.xlayers[0].xlayer_id = 0;
+  snprintf(cfg.xlayers[0].input_filename, PATH_MAX, "a.raw");
+  cfg.xlayers[0].num_temporal_layers = 1;
+  cfg.xlayers[1].xlayer_id = 1;
+  snprintf(cfg.xlayers[1].input_filename, PATH_MAX, "b.raw");
+  cfg.xlayers[1].num_temporal_layers = 1;
+
+  // Same coding structure — should pass
+  EXPECT_EQ(validate_multi_xlayer_config(&cfg), 0);
+
+  // Different num_temporal_layers — should fail
+  cfg.xlayers[1].num_temporal_layers = 3;
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+  cfg.xlayers[1].num_temporal_layers = 1;  // restore
+
+  // Different lag_in_frames — should fail
+  cfg.xlayers[0].lag_in_frames = 19;
+  cfg.xlayers[1].lag_in_frames = 35;
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+  cfg.xlayers[1].lag_in_frames = 19;  // restore
+
+  // Different kf_max_dist — should fail
+  cfg.xlayers[0].kf_max_dist = 150;
+  cfg.xlayers[1].kf_max_dist = 300;
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+  cfg.xlayers[1].kf_max_dist = 150;  // restore
+
+  // Different subgop_config — should fail
+  snprintf(cfg.xlayers[0].subgop_config_path, PATH_MAX, "low_delay.json");
+  snprintf(cfg.xlayers[1].subgop_config_path, PATH_MAX, "random_access.json");
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigValidate, MonotonicAllowsDifferentCodingStructure) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_xlayers = 2;
+  cfg.monotonic_output_order = 1;
+  cfg.xlayers[0].xlayer_id = 0;
+  snprintf(cfg.xlayers[0].input_filename, PATH_MAX, "a.raw");
+  cfg.xlayers[0].num_temporal_layers = 1;
+  cfg.xlayers[0].lag_in_frames = 19;
+  cfg.xlayers[0].kf_max_dist = 150;
+  cfg.xlayers[1].xlayer_id = 1;
+  snprintf(cfg.xlayers[1].input_filename, PATH_MAX, "b.raw");
+  cfg.xlayers[1].num_temporal_layers = 3;
+  cfg.xlayers[1].lag_in_frames = 35;
+  cfg.xlayers[1].kf_max_dist = 300;
+
+  // Different coding structures should be allowed with monotonic=1
+  EXPECT_EQ(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigParse, CodingStructureFields) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw",
+        "kf_max_dist": 150,
+        "subgop_config": "low_delay.json" },
+      { "xlayer_id": 1, "input": "b.raw",
+        "kf_max_dist": 300 }
+    ],
+    "monotonic_output_order": true
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  EXPECT_EQ(cfg.xlayers[0].kf_max_dist, 150);
+  EXPECT_STREQ(cfg.xlayers[0].subgop_config_path, "low_delay.json");
+  EXPECT_EQ(cfg.xlayers[1].kf_max_dist, 300);
+  EXPECT_STREQ(cfg.xlayers[1].subgop_config_path, "");
+  EXPECT_EQ(cfg.monotonic_output_order, 1);
+}
+
+TEST(XLayerConfigParse, NonMonotonicRejectsMismatch) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw", "num_temporal_layers": 1 },
+      { "xlayer_id": 1, "input": "b.raw", "num_temporal_layers": 3 }
+    ],
+    "monotonic_output_order": false
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  // Parsing succeeds but validation should fail
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+}
+
+// --- Annex G Config File Parsing Tests ---
+
+// Helper to get the path to a config file in the source tree.
+// Relies on AVM_ROOT being the repo root (test runs from build dir).
+static std::string CfgPath(const char *relative) {
+  // Try the source tree relative to the build directory
+  const char *candidates[] = {
+    "../avm/cfg/xlayer/",     // build dir is sibling of avm/
+    "../../avm/cfg/xlayer/",  // one level deeper
+    "../cfg/xlayer/",         // build dir inside avm/
+    "cfg/xlayer/",            // running from repo root
+  };
+  for (const char *prefix : candidates) {
+    std::string path = std::string(prefix) + relative;
+    FILE *f = fopen(path.c_str(), "r");
+    if (f) {
+      fclose(f);
+      return path;
+    }
+  }
+  // Fall back — will fail with a clear error
+  return std::string("cfg/xlayer/") + relative;
+}
+
+TEST(XLayerConfigAnnexG, G2_360Degree9Xlayer) {
+  std::string path = CfgPath("annexG2_360degree_9xlayer.json");
+  MultiXLayerConfig cfg;
+  int rc = parse_multi_xlayer_config(path.c_str(), &cfg);
+  if (rc != 0) {
+    GTEST_SKIP() << "Config file not found: " << path;
+  }
+
+  EXPECT_EQ(cfg.num_xlayers, 9);
+
+  // Verify xlayer IDs are 0-8
+  for (int i = 0; i < 9; i++) {
+    EXPECT_EQ(cfg.xlayers[i].xlayer_id, i);
+  }
+
+  // All subpictures are 1280x640
+  for (int i = 0; i < 9; i++) {
+    EXPECT_EQ(cfg.xlayers[i].width, 1280u);
+    EXPECT_EQ(cfg.xlayers[i].height, 640u);
+  }
+
+  // Center viewport (xlayer 4) should have lowest QP (highest quality)
+  EXPECT_LT(cfg.xlayers[4].qp, cfg.xlayers[0].qp);
+
+  // 3 embedded layers per xlayer
+  for (int i = 0; i < 9; i++) {
+    EXPECT_EQ(cfg.xlayers[i].num_embedded_layers, 3);
+  }
+
+  EXPECT_EQ(cfg.enable_global_lcr, 1);
+  EXPECT_EQ(cfg.enable_msdo, 0);  // MSDO disabled (>4 streams)
+
+  // OPS: 3 operating points
+  EXPECT_EQ(cfg.num_ops_sets, 1);
+  EXPECT_EQ(cfg.ops_sets[0].num_operating_points, 3);
+
+  // OP0: center only (xlayer 4)
+  EXPECT_EQ(cfg.ops_sets[0].ops[0].xlayer_map, (1u << 4));
+
+  // OP2: all 9 subpictures
+  uint32_t all9 = (1u << 9) - 1;  // bits 0-8
+  EXPECT_EQ(cfg.ops_sets[0].ops[2].xlayer_map, all9);
+
+  EXPECT_EQ(cfg.enable_atlas, 1);
+  EXPECT_EQ(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigAnnexG, G3_VideoConf3Xlayer) {
+  std::string path = CfgPath("annexG3_videoconf_3xlayer.json");
+  MultiXLayerConfig cfg;
+  int rc = parse_multi_xlayer_config(path.c_str(), &cfg);
+  if (rc != 0) {
+    GTEST_SKIP() << "Config file not found: " << path;
+  }
+
+  EXPECT_EQ(cfg.num_xlayers, 3);
+  EXPECT_EQ(cfg.xlayers[0].xlayer_id, 0);
+  EXPECT_EQ(cfg.xlayers[1].xlayer_id, 1);
+  EXPECT_EQ(cfg.xlayers[2].xlayer_id, 2);
+
+  // Main speaker: 1280x1080
+  EXPECT_EQ(cfg.xlayers[0].width, 1280u);
+  EXPECT_EQ(cfg.xlayers[0].height, 1080u);
+
+  // Participant 2: 480x360 (encoded small, upsampled by atlas)
+  EXPECT_EQ(cfg.xlayers[1].width, 480u);
+  EXPECT_EQ(cfg.xlayers[1].height, 360u);
+
+  // Participant 3: 640x540
+  EXPECT_EQ(cfg.xlayers[2].width, 640u);
+  EXPECT_EQ(cfg.xlayers[2].height, 540u);
+
+  EXPECT_EQ(cfg.enable_global_lcr, 1);
+  EXPECT_EQ(cfg.lcr_purpose_id, 6);  // Multiview Playback
+
+  // OPS: 3 operating points
+  EXPECT_EQ(cfg.num_ops_sets, 1);
+  EXPECT_EQ(cfg.ops_sets[0].num_operating_points, 3);
+
+  // OP0: main speaker only
+  EXPECT_EQ(cfg.ops_sets[0].ops[0].xlayer_map, (1u << 0));
+
+  // OP2: all 3 participants
+  EXPECT_EQ(cfg.ops_sets[0].ops[2].xlayer_map,
+            (1u << 0) | (1u << 1) | (1u << 2));
+
+  EXPECT_EQ(cfg.enable_atlas, 1);
+  EXPECT_EQ(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigAnnexG, G4_RoiScalable2Xlayer) {
+  std::string path = CfgPath("annexG4_roi_scalable_2xlayer.json");
+  MultiXLayerConfig cfg;
+  int rc = parse_multi_xlayer_config(path.c_str(), &cfg);
+  if (rc != 0) {
+    GTEST_SKIP() << "Config file not found: " << path;
+  }
+
+  EXPECT_EQ(cfg.num_xlayers, 2);
+  EXPECT_EQ(cfg.xlayers[0].xlayer_id, 0);
+  EXPECT_EQ(cfg.xlayers[1].xlayer_id, 1);
+
+  // Base layer: full stadium 1920x1080
+  EXPECT_EQ(cfg.xlayers[0].width, 1920u);
+  EXPECT_EQ(cfg.xlayers[0].height, 1080u);
+
+  // Enhancement: field-of-play 1280x720
+  EXPECT_EQ(cfg.xlayers[1].width, 1280u);
+  EXPECT_EQ(cfg.xlayers[1].height, 720u);
+
+  // Enhancement should have better quality (lower QP)
+  EXPECT_LT(cfg.xlayers[1].qp, cfg.xlayers[0].qp);
+
+  EXPECT_EQ(cfg.enable_global_lcr, 1);
+  EXPECT_EQ(cfg.enable_msdo, 0);
+
+  // OPS: 3 operating points
+  EXPECT_EQ(cfg.num_ops_sets, 1);
+  EXPECT_EQ(cfg.ops_sets[0].num_operating_points, 3);
+
+  // OP0: base only
+  EXPECT_EQ(cfg.ops_sets[0].ops[0].xlayer_map, (1u << 0));
+
+  // OP1: enhancement only
+  EXPECT_EQ(cfg.ops_sets[0].ops[1].xlayer_map, (1u << 1));
+
+  // OP2: both layers
+  EXPECT_EQ(cfg.ops_sets[0].ops[2].xlayer_map, (1u << 0) | (1u << 1));
+
+  EXPECT_EQ(cfg.enable_atlas, 1);
+  EXPECT_EQ(validate_multi_xlayer_config(&cfg), 0);
+}
+
+// --- GOP Config Tests ---
+
+TEST(XLayerConfigParse, GopModeFields) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw",
+        "gop_mode": "closed", "fwd_kf_enabled": 1 },
+      { "xlayer_id": 1, "input": "b.raw",
+        "gop_mode": "open_leading", "enable_keyframe_filtering": 2 },
+      { "xlayer_id": 2, "input": "c.raw",
+        "gop_mode": "open_sef", "add_sef_for_hidden_frames": 1 }
+    ],
+    "monotonic_output_order": false
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  EXPECT_EQ(cfg.xlayers[0].gop_mode, 0);
+  EXPECT_EQ(cfg.xlayers[0].fwd_kf_enabled, 1);
+  EXPECT_EQ(cfg.xlayers[1].gop_mode, 1);
+  EXPECT_EQ(cfg.xlayers[1].enable_keyframe_filtering, 2);
+  EXPECT_EQ(cfg.xlayers[2].gop_mode, 2);
+  EXPECT_EQ(cfg.xlayers[2].add_sef_for_hidden_frames, 1);
+}
+
+TEST(XLayerConfigValidate, OpenLeadingRejectedWithMonotonic) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_xlayers = 1;
+  cfg.xlayers[0].xlayer_id = 0;
+  snprintf(cfg.xlayers[0].input_filename, PATH_MAX, "a.raw");
+  cfg.xlayers[0].gop_mode = 1;  // open_leading
+  cfg.monotonic_output_order = 1;
+
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigValidate, OpenLeadingAllowedWithNonMonotonic) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_xlayers = 2;
+  cfg.monotonic_output_order = 0;
+  cfg.xlayers[0].xlayer_id = 0;
+  snprintf(cfg.xlayers[0].input_filename, PATH_MAX, "a.raw");
+  cfg.xlayers[0].gop_mode = 1;
+  cfg.xlayers[1].xlayer_id = 1;
+  snprintf(cfg.xlayers[1].input_filename, PATH_MAX, "b.raw");
+  cfg.xlayers[1].gop_mode = 1;
+
+  EXPECT_EQ(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigValidate, NonMonotonicRequiresSameGopMode) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_xlayers = 2;
+  cfg.monotonic_output_order = 0;
+  cfg.xlayers[0].xlayer_id = 0;
+  snprintf(cfg.xlayers[0].input_filename, PATH_MAX, "a.raw");
+  cfg.xlayers[0].gop_mode = 0;
+  cfg.xlayers[1].xlayer_id = 1;
+  snprintf(cfg.xlayers[1].input_filename, PATH_MAX, "b.raw");
+  cfg.xlayers[1].gop_mode = 2;  // mismatch
+
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigParse, GopModeDefaults) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw" }
+    ]
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  // GOP mode defaults: 0 (closed), overrides = -1 (derive)
+  EXPECT_EQ(cfg.xlayers[0].gop_mode, 0);
+  EXPECT_EQ(cfg.xlayers[0].fwd_kf_enabled, -1);
+  EXPECT_EQ(cfg.xlayers[0].enable_keyframe_filtering, -1);
+  EXPECT_EQ(cfg.xlayers[0].add_sef_for_hidden_frames, -1);
+}
+
+// --- Atlas Config Tests ---
+
+TEST(XLayerConfigParse, AtlasLayoutFields) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw", "width": 960, "height": 540,
+        "atlas_pos_x": 0, "atlas_pos_y": 0 },
+      { "xlayer_id": 1, "input": "b.raw", "width": 960, "height": 540,
+        "atlas_pos_x": 960, "atlas_pos_y": 0 }
+    ],
+    "atlas": {
+      "enable": true,
+      "mode": 0,
+      "width": 1920,
+      "height": 540,
+      "uniform_spacing": false
+    }
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  EXPECT_EQ(cfg.enable_atlas, 1);
+  EXPECT_EQ(cfg.atlas_mode, 0);
+  EXPECT_EQ(cfg.atlas_width, 1920);
+  EXPECT_EQ(cfg.atlas_height, 540);
+  EXPECT_EQ(cfg.atlas_uniform_spacing, 0);
+  EXPECT_EQ(cfg.xlayers[0].atlas_pos_x, 0);
+  EXPECT_EQ(cfg.xlayers[0].atlas_pos_y, 0);
+  EXPECT_EQ(cfg.xlayers[1].atlas_pos_x, 960);
+  EXPECT_EQ(cfg.xlayers[1].atlas_pos_y, 0);
+}
+
+// --- Scaling Mode / Embedded Layer Tests ---
+
+TEST(XLayerConfigParse, ScalingModeInteger) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw", "width": 1920, "height": 1080,
+        "num_embedded_layers": 3,
+        "scaling_mode": [4, 6, 0] }
+    ]
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  EXPECT_EQ(cfg.xlayers[0].num_embedded_layers, 3);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[0], 4);  // AVME_ONEFOUR
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[1], 6);  // AVME_ONETWO
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[2], 0);  // AVME_NORMAL
+}
+
+TEST(XLayerConfigParse, ScalingModeString) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw", "width": 1920, "height": 1080,
+        "num_embedded_layers": 3,
+        "scaling_mode": ["1/4", "1/2", "1:1"] }
+    ]
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[0], AVME_ONEFOUR);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[1], AVME_ONETWO);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[2], AVME_NORMAL);
+}
+
+TEST(XLayerConfigParse, ScalingModeAllStringVariants) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw",
+        "num_embedded_layers": 7,
+        "scaling_mode": ["1/8", "1/4", "1/2", "3/5", "3/4", "4/5", "1:1"] }
+    ]
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[0], AVME_ONEEIGHT);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[1], AVME_ONEFOUR);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[2], AVME_ONETWO);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[3], AVME_THREEFIVE);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[4], AVME_THREEFOUR);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[5], AVME_FOURFIVE);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[6], AVME_NORMAL);
+}
+
+TEST(XLayerConfigParse, ScalingModeInvalidString) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw",
+        "num_embedded_layers": 2,
+        "scaling_mode": ["bogus", "1:1"] }
+    ]
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  EXPECT_NE(parse_multi_xlayer_config(path, &cfg), 0);
+}
+
+TEST(XLayerConfigParse, ScalingModeDefaultDerivation2Layers) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw",
+        "num_embedded_layers": 2 }
+    ]
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  // Default for 2 layers: [1/2, 1:1]
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[0], AVME_ONETWO);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[1], AVME_NORMAL);
+}
+
+TEST(XLayerConfigParse, ScalingModeDefaultDerivation3Layers) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw",
+        "num_embedded_layers": 3 }
+    ]
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  // Default for 3 layers: [1/4, 1/2, 1:1]
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[0], AVME_ONEFOUR);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[1], AVME_ONETWO);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[2], AVME_NORMAL);
+}
+
+TEST(XLayerConfigParse, ScalingModeExplicitOverridesDefault) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw",
+        "num_embedded_layers": 2,
+        "scaling_mode": ["3/4", "1:1"] }
+    ]
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  // Explicit values override defaults
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[0], AVME_THREEFOUR);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[1], AVME_NORMAL);
+}
+
+TEST(XLayerConfigValidate, EmbeddedLayerLastMustBeFullRes) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_xlayers = 1;
+  cfg.xlayers[0].xlayer_id = 0;
+  snprintf(cfg.xlayers[0].input_filename, PATH_MAX, "a.raw");
+  cfg.xlayers[0].num_embedded_layers = 2;
+  cfg.xlayers[0].scaling_mode[0] = AVME_ONETWO;
+  cfg.xlayers[0].scaling_mode[1] = AVME_ONETWO;  // Not full-res — invalid
+
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+
+  // Fix it
+  cfg.xlayers[0].scaling_mode[1] = AVME_NORMAL;
+  EXPECT_EQ(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigValidate, EmbeddedLayerOutOfRange) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_xlayers = 1;
+  cfg.xlayers[0].xlayer_id = 0;
+  snprintf(cfg.xlayers[0].input_filename, PATH_MAX, "a.raw");
+  cfg.xlayers[0].num_embedded_layers = 0;  // Invalid
+
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigValidate, EmbeddedLayerInvalidScalingMode) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_xlayers = 1;
+  cfg.xlayers[0].xlayer_id = 0;
+  snprintf(cfg.xlayers[0].input_filename, PATH_MAX, "a.raw");
+  cfg.xlayers[0].num_embedded_layers = 2;
+  cfg.xlayers[0].scaling_mode[0] = 99;  // Invalid value
+  cfg.xlayers[0].scaling_mode[1] = AVME_NORMAL;
+
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigValidate, NonMonotonicAllowsDifferentEmbeddedLayers) {
+  // Different num_embedded_layers is valid — the constraint is that output
+  // frames within a TU must have matching order hints and synchronized RAPs,
+  // NOT that embedded layer counts match across xlayers.
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_xlayers = 2;
+  cfg.monotonic_output_order = 0;
+  cfg.xlayers[0].xlayer_id = 0;
+  snprintf(cfg.xlayers[0].input_filename, PATH_MAX, "a.raw");
+  cfg.xlayers[0].num_embedded_layers = 3;
+  cfg.xlayers[0].scaling_mode[0] = AVME_ONEFOUR;
+  cfg.xlayers[0].scaling_mode[1] = AVME_ONETWO;
+  cfg.xlayers[1].xlayer_id = 1;
+  snprintf(cfg.xlayers[1].input_filename, PATH_MAX, "b.raw");
+  cfg.xlayers[1].num_embedded_layers = 1;
+
+  EXPECT_EQ(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigValidate, MonotonicAllowsDifferentEmbeddedLayers) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_xlayers = 2;
+  cfg.monotonic_output_order = 1;
+  cfg.xlayers[0].xlayer_id = 0;
+  snprintf(cfg.xlayers[0].input_filename, PATH_MAX, "a.raw");
+  cfg.xlayers[0].num_embedded_layers = 3;
+  cfg.xlayers[0].scaling_mode[0] = AVME_ONEFOUR;
+  cfg.xlayers[0].scaling_mode[1] = AVME_ONETWO;
+  cfg.xlayers[1].xlayer_id = 1;
+  snprintf(cfg.xlayers[1].input_filename, PATH_MAX, "b.raw");
+  cfg.xlayers[1].num_embedded_layers = 1;
+
+  EXPECT_EQ(validate_multi_xlayer_config(&cfg), 0);
+}
+
+// --- Multi-Source Input Tests ---
+
+TEST(XLayerConfigParse, MultiSourceParsing) {
+  const char *json = R"({
+    "inputs": [
+      { "name": "texture", "filename": "video.raw", "width": 1920,
+        "height": 1080 },
+      { "name": "alpha", "filename": "alpha.raw", "width": 1920,
+        "height": 1080 }
+    ],
+    "xlayers": [
+      { "xlayer_id": 0, "input_source": "texture", "width": 960,
+        "height": 540, "atlas_pos_x": 0, "atlas_pos_y": 0 },
+      { "xlayer_id": 1, "input_source": "alpha", "width": 960,
+        "height": 540, "atlas_pos_x": 0, "atlas_pos_y": 0 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  EXPECT_EQ(cfg.num_input_sources, 2);
+  EXPECT_STREQ(cfg.input_sources[0].name, "texture");
+  EXPECT_STREQ(cfg.input_sources[0].filename, "video.raw");
+  EXPECT_EQ(cfg.input_sources[0].width, 1920u);
+  EXPECT_EQ(cfg.input_sources[0].height, 1080u);
+  EXPECT_STREQ(cfg.input_sources[1].name, "alpha");
+  EXPECT_STREQ(cfg.input_sources[1].filename, "alpha.raw");
+  EXPECT_STREQ(cfg.xlayers[0].input_source_name, "texture");
+  EXPECT_STREQ(cfg.xlayers[1].input_source_name, "alpha");
+}
+
+TEST(XLayerConfigParse, MultiSourceResolution) {
+  const char *json = R"({
+    "inputs": [
+      { "name": "texture", "filename": "video.raw", "width": 1920,
+        "height": 1080 },
+      { "name": "alpha", "filename": "alpha.raw", "width": 1920,
+        "height": 1080 }
+    ],
+    "xlayers": [
+      { "xlayer_id": 0, "input_source": "texture", "width": 960,
+        "height": 540, "atlas_pos_x": 0, "atlas_pos_y": 0 },
+      { "xlayer_id": 1, "input_source": "texture", "width": 960,
+        "height": 540, "atlas_pos_x": 960, "atlas_pos_y": 0 },
+      { "xlayer_id": 2, "input_source": "alpha", "width": 960,
+        "height": 540, "atlas_pos_x": 0, "atlas_pos_y": 0 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+  ASSERT_EQ(resolve_input_sources(&cfg), 0);
+
+  EXPECT_EQ(cfg.xlayers[0].input_source_idx, 0);
+  EXPECT_EQ(cfg.xlayers[1].input_source_idx, 0);
+  EXPECT_EQ(cfg.xlayers[2].input_source_idx, 1);
+}
+
+TEST(XLayerConfigParse, MultiSourceBackwardCompat) {
+  const char *json = R"({
+    "source": {
+      "filename": "video.raw",
+      "width": 1920,
+      "height": 1080
+    },
+    "xlayers": [
+      { "xlayer_id": 0, "width": 960, "height": 540,
+        "atlas_pos_x": 0, "atlas_pos_y": 0 },
+      { "xlayer_id": 1, "width": 960, "height": 540,
+        "atlas_pos_x": 960, "atlas_pos_y": 0 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  // Legacy source is converted to input_sources[0] named "default"
+  EXPECT_EQ(cfg.num_input_sources, 1);
+  EXPECT_STREQ(cfg.input_sources[0].name, "default");
+  EXPECT_STREQ(cfg.input_sources[0].filename, "video.raw");
+  EXPECT_EQ(cfg.input_sources[0].width, 1920u);
+  EXPECT_EQ(cfg.input_sources[0].height, 1080u);
+  // Legacy fields still populated
+  EXPECT_STREQ(cfg.source_filename, "video.raw");
+
+  // Resolve should assign all xlayers to source 0
+  ASSERT_EQ(resolve_input_sources(&cfg), 0);
+  EXPECT_EQ(cfg.xlayers[0].input_source_idx, 0);
+  EXPECT_EQ(cfg.xlayers[1].input_source_idx, 0);
+}
+
+TEST(XLayerConfigParse, MultiSourceUnknownName) {
+  const char *json = R"({
+    "inputs": [
+      { "name": "texture", "filename": "video.raw", "width": 1920,
+        "height": 1080 }
+    ],
+    "xlayers": [
+      { "xlayer_id": 0, "input_source": "nonexistent", "width": 960,
+        "height": 540, "atlas_pos_x": 0, "atlas_pos_y": 0 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+  // Resolve should fail — unknown source name
+  EXPECT_NE(resolve_input_sources(&cfg), 0);
+}
+
+TEST(XLayerConfigValidate, MultiSourceDuplicateName) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_input_sources = 2;
+  snprintf(cfg.input_sources[0].name, MAX_SOURCE_NAME_LEN, "texture");
+  snprintf(cfg.input_sources[0].filename, PATH_MAX, "a.raw");
+  cfg.input_sources[0].width = 1920;
+  cfg.input_sources[0].height = 1080;
+  snprintf(cfg.input_sources[1].name, MAX_SOURCE_NAME_LEN, "texture");
+  snprintf(cfg.input_sources[1].filename, PATH_MAX, "b.raw");
+  cfg.input_sources[1].width = 1920;
+  cfg.input_sources[1].height = 1080;
+
+  cfg.num_xlayers = 2;
+  cfg.xlayers[0].xlayer_id = 0;
+  cfg.xlayers[0].input_source_idx = 0;
+  cfg.xlayers[0].width = 960;
+  cfg.xlayers[0].height = 540;
+  cfg.xlayers[0].atlas_pos_x = 0;
+  cfg.xlayers[0].atlas_pos_y = 0;
+  cfg.xlayers[1].xlayer_id = 1;
+  cfg.xlayers[1].input_source_idx = 1;
+  cfg.xlayers[1].width = 960;
+  cfg.xlayers[1].height = 540;
+  cfg.xlayers[1].atlas_pos_x = 960;
+  cfg.xlayers[1].atlas_pos_y = 0;
+
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigValidate, MultiSourceChromaValidation) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_input_sources = 1;
+  snprintf(cfg.input_sources[0].name, MAX_SOURCE_NAME_LEN, "default");
+  snprintf(cfg.input_sources[0].filename, PATH_MAX, "v.raw");
+  cfg.input_sources[0].width = 1920;
+  cfg.input_sources[0].height = 1080;
+
+  cfg.num_xlayers = 2;
+  cfg.xlayers[0].xlayer_id = 0;
+  cfg.xlayers[0].input_source_idx = 0;
+  cfg.xlayers[0].width = 960;
+  cfg.xlayers[0].height = 540;
+  cfg.xlayers[0].atlas_pos_x = 0;
+  cfg.xlayers[0].atlas_pos_y = 0;
+  cfg.xlayers[0].profile = MAIN_420_10_IP1;
+  cfg.xlayers[1].xlayer_id = 1;
+  cfg.xlayers[1].input_source_idx = 0;
+  cfg.xlayers[1].width = 960;
+  cfg.xlayers[1].height = 540;
+  cfg.xlayers[1].atlas_pos_x = 960;
+  cfg.xlayers[1].atlas_pos_y = 0;
+  cfg.xlayers[1].profile = MAIN_444_10_IP1;  // Mismatch
+
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+
+  // Fix chroma mismatch
+  cfg.xlayers[1].profile = MAIN_420_10_IP1;
+  EXPECT_EQ(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigParse, MultiSourceMixedMode) {
+  const char *json = R"({
+    "inputs": [
+      { "name": "texture", "filename": "video.raw", "width": 1920,
+        "height": 1080 }
+    ],
+    "xlayers": [
+      { "xlayer_id": 0, "input_source": "texture", "width": 960,
+        "height": 540, "atlas_pos_x": 0, "atlas_pos_y": 0 },
+      { "xlayer_id": 1, "input": "depth.raw", "width": 1920,
+        "height": 1080 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+  ASSERT_EQ(resolve_input_sources(&cfg), 0);
+
+  // xlayer 0 uses shared source, xlayer 1 uses own file
+  EXPECT_EQ(cfg.xlayers[0].input_source_idx, 0);
+  EXPECT_EQ(cfg.xlayers[1].input_source_idx, -1);
+  EXPECT_STREQ(cfg.xlayers[1].input_filename, "depth.raw");
+}
+
+TEST(XLayerConfigParse, MultiSourceSingleDefault) {
+  const char *json = R"({
+    "inputs": [
+      { "name": "main", "filename": "video.raw", "width": 1920,
+        "height": 1080 }
+    ],
+    "xlayers": [
+      { "xlayer_id": 0, "width": 960, "height": 540,
+        "atlas_pos_x": 0, "atlas_pos_y": 0 },
+      { "xlayer_id": 1, "width": 960, "height": 540,
+        "atlas_pos_x": 960, "atlas_pos_y": 0 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+  ASSERT_EQ(resolve_input_sources(&cfg), 0);
+
+  // Single input source — all unassigned xlayers auto-assign to it
+  EXPECT_EQ(cfg.xlayers[0].input_source_idx, 0);
+  EXPECT_EQ(cfg.xlayers[1].input_source_idx, 0);
+}
+
+TEST(XLayerConfigParse, InputsAndSourceMutuallyExclusive) {
+  const char *json = R"({
+    "inputs": [
+      { "name": "texture", "filename": "video.raw", "width": 1920,
+        "height": 1080 }
+    ],
+    "source": {
+      "filename": "video.raw",
+      "width": 1920,
+      "height": 1080
+    },
+    "xlayers": [
+      { "xlayer_id": 0, "width": 960, "height": 540,
+        "atlas_pos_x": 0, "atlas_pos_y": 0 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  EXPECT_NE(parse_multi_xlayer_config(path, &cfg), 0);
+}
+
+TEST(XLayerConfigParse, MultiSourceAmbiguousNoInputSource) {
+  const char *json = R"({
+    "inputs": [
+      { "name": "texture", "filename": "video.raw", "width": 1920,
+        "height": 1080 },
+      { "name": "alpha", "filename": "alpha.raw", "width": 1920,
+        "height": 1080 }
+    ],
+    "xlayers": [
+      { "xlayer_id": 0, "width": 960, "height": 540,
+        "atlas_pos_x": 0, "atlas_pos_y": 0 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+  // Multiple inputs, no explicit input_source — ambiguous, should fail
+  EXPECT_NE(resolve_input_sources(&cfg), 0);
+}
+
+// --- Frame Rate Tests ---
+
+TEST(XLayerConfigParse, FrameRateIntegerParsing) {
+  const char *json = R"({
+    "inputs": [
+      { "name": "fast", "filename": "a.raw", "width": 1920, "height": 1080,
+        "frame_rate": 60 },
+      { "name": "slow", "filename": "b.raw", "width": 1920, "height": 1080,
+        "frame_rate": 15 }
+    ],
+    "xlayers": [
+      { "xlayer_id": 0, "input_source": "fast", "width": 960,
+        "height": 540, "atlas_pos_x": 0, "atlas_pos_y": 0 },
+      { "xlayer_id": 1, "input_source": "slow", "width": 960,
+        "height": 540, "atlas_pos_x": 960, "atlas_pos_y": 0 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  EXPECT_EQ(cfg.input_sources[0].frame_rate_num, 60);
+  EXPECT_EQ(cfg.input_sources[0].frame_rate_den, 1);
+  EXPECT_EQ(cfg.input_sources[1].frame_rate_num, 15);
+  EXPECT_EQ(cfg.input_sources[1].frame_rate_den, 1);
+
+  ASSERT_EQ(resolve_input_sources(&cfg), 0);
+  EXPECT_EQ(cfg.input_sources[0].frame_skip, 1);  // 60/60 = 1
+  EXPECT_EQ(cfg.input_sources[1].frame_skip, 4);  // 60/15 = 4
+}
+
+TEST(XLayerConfigParse, FrameRateRationalString) {
+  const char *json = R"({
+    "inputs": [
+      { "name": "ntsc", "filename": "a.raw", "width": 1920, "height": 1080,
+        "frame_rate": "30000/1001" },
+      { "name": "half", "filename": "b.raw", "width": 1920, "height": 1080,
+        "frame_rate": "15000/1001" }
+    ],
+    "xlayers": [
+      { "xlayer_id": 0, "input_source": "ntsc", "width": 960,
+        "height": 540, "atlas_pos_x": 0, "atlas_pos_y": 0 },
+      { "xlayer_id": 1, "input_source": "half", "width": 960,
+        "height": 540, "atlas_pos_x": 960, "atlas_pos_y": 0 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  EXPECT_EQ(cfg.input_sources[0].frame_rate_num, 30000);
+  EXPECT_EQ(cfg.input_sources[0].frame_rate_den, 1001);
+  EXPECT_EQ(cfg.input_sources[1].frame_rate_num, 15000);
+  EXPECT_EQ(cfg.input_sources[1].frame_rate_den, 1001);
+
+  ASSERT_EQ(resolve_input_sources(&cfg), 0);
+  // 30000/1001 / (15000/1001) = 30000*1001 / (1001*15000) = 2
+  EXPECT_EQ(cfg.input_sources[0].frame_skip, 1);
+  EXPECT_EQ(cfg.input_sources[1].frame_skip, 2);
+}
+
+TEST(XLayerConfigParse, FrameRateNonDivisorFails) {
+  const char *json = R"({
+    "inputs": [
+      { "name": "a", "filename": "a.raw", "width": 1920, "height": 1080,
+        "frame_rate": 30 },
+      { "name": "b", "filename": "b.raw", "width": 1920, "height": 1080,
+        "frame_rate": 24 }
+    ],
+    "xlayers": [
+      { "xlayer_id": 0, "input_source": "a", "width": 960,
+        "height": 540, "atlas_pos_x": 0, "atlas_pos_y": 0 },
+      { "xlayer_id": 1, "input_source": "b", "width": 960,
+        "height": 540, "atlas_pos_x": 960, "atlas_pos_y": 0 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+  // 30/24 = 1.25, not an integer — should fail
+  EXPECT_NE(resolve_input_sources(&cfg), 0);
+}
+
+TEST(XLayerConfigParse, FrameRateUnspecifiedAssumesMax) {
+  const char *json = R"({
+    "inputs": [
+      { "name": "fast", "filename": "a.raw", "width": 1920, "height": 1080,
+        "frame_rate": 60 },
+      { "name": "auto", "filename": "b.raw", "width": 1920,
+        "height": 1080 }
+    ],
+    "xlayers": [
+      { "xlayer_id": 0, "input_source": "fast", "width": 960,
+        "height": 540, "atlas_pos_x": 0, "atlas_pos_y": 0 },
+      { "xlayer_id": 1, "input_source": "auto", "width": 960,
+        "height": 540, "atlas_pos_x": 960, "atlas_pos_y": 0 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+  ASSERT_EQ(resolve_input_sources(&cfg), 0);
+
+  // Unspecified source assumes master rate (skip=1)
+  EXPECT_EQ(cfg.input_sources[0].frame_skip, 1);
+  EXPECT_EQ(cfg.input_sources[1].frame_skip, 1);
+}
+
+TEST(XLayerConfigParse, FrameRateAllSameNoSkip) {
+  const char *json = R"({
+    "inputs": [
+      { "name": "a", "filename": "a.raw", "width": 1920, "height": 1080,
+        "frame_rate": 30 },
+      { "name": "b", "filename": "b.raw", "width": 1920, "height": 1080,
+        "frame_rate": 30 }
+    ],
+    "xlayers": [
+      { "xlayer_id": 0, "input_source": "a", "width": 960,
+        "height": 540, "atlas_pos_x": 0, "atlas_pos_y": 0 },
+      { "xlayer_id": 1, "input_source": "b", "width": 960,
+        "height": 540, "atlas_pos_x": 960, "atlas_pos_y": 0 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+  ASSERT_EQ(resolve_input_sources(&cfg), 0);
+
+  EXPECT_EQ(cfg.input_sources[0].frame_skip, 1);
+  EXPECT_EQ(cfg.input_sources[1].frame_skip, 1);
+}
+
+// --- Embedded Layers (Per-MLlayer Source) Tests ---
+
+TEST(XLayerConfigParse, EmbeddedLayersParsing) {
+  const char *json = R"({
+    "inputs": [
+      { "name": "left", "filename": "left.raw", "width": 1920, "height": 1080 },
+      { "name": "right", "filename": "right.raw", "width": 1920, "height": 1080 }
+    ],
+    "xlayers": [
+      { "xlayer_id": 0, "input_source": "left", "width": 1920, "height": 1080,
+        "atlas_pos_x": 0, "atlas_pos_y": 0,
+        "num_embedded_layers": 2,
+        "embedded_layers": [
+          { "scaling_mode": "1/2", "input_source": "left",
+            "atlas_pos_x": 0, "atlas_pos_y": 0, "width": 1920, "height": 1080,
+            "depends_on": [] },
+          { "scaling_mode": "1:1", "input_source": "right",
+            "atlas_pos_x": 0, "atlas_pos_y": 0, "width": 1920, "height": 1080,
+            "depends_on": [0] }
+        ],
+        "qp": 128, "cpu_used": 5 }
+    ],
+    "output": "stereo_ml.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+  ASSERT_EQ(resolve_input_sources(&cfg), 0);
+  ASSERT_EQ(validate_multi_xlayer_config(&cfg), 0);
+
+  EXPECT_EQ(cfg.xlayers[0].num_embedded_layers, 2);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[0], AVME_ONETWO);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[1], AVME_NORMAL);
+  EXPECT_EQ(cfg.xlayers[0].has_per_mlayer_sources, 1);
+  EXPECT_EQ(cfg.xlayers[0].has_mlayer_dependencies, 1);
+
+  // mlayer 0: source "left"
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[0].input_source_idx, 0);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[0].dependency_mask, 0);
+
+  // mlayer 1: source "right", depends on mlayer 0
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[1].input_source_idx, 1);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[1].dependency_mask, 1);
+}
+
+TEST(XLayerConfigParse, EmbeddedLayersScalingModeOnly) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "test.raw", "width": 1920, "height": 1080,
+        "num_embedded_layers": 3,
+        "embedded_layers": [
+          { "scaling_mode": "1/4" },
+          { "scaling_mode": "1/2" },
+          { "scaling_mode": "1:1" }
+        ],
+        "qp": 128, "cpu_used": 5 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[0], AVME_ONEFOUR);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[1], AVME_ONETWO);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[2], AVME_NORMAL);
+  EXPECT_EQ(cfg.xlayers[0].has_per_mlayer_sources, 0);
+  EXPECT_EQ(cfg.xlayers[0].has_mlayer_dependencies, 0);
+}
+
+TEST(XLayerConfigParse, EmbeddedLayersAndScalingModeMutualExclusion) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "test.raw", "width": 1920, "height": 1080,
+        "num_embedded_layers": 2,
+        "scaling_mode": ["1/2", "1:1"],
+        "embedded_layers": [
+          { "scaling_mode": "1/2" },
+          { "scaling_mode": "1:1" }
+        ],
+        "qp": 128, "cpu_used": 5 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  EXPECT_NE(parse_multi_xlayer_config(path, &cfg), 0);
+}
+
+TEST(XLayerConfigParse, EmbeddedLayersCountMismatch) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "test.raw", "width": 1920, "height": 1080,
+        "num_embedded_layers": 3,
+        "embedded_layers": [
+          { "scaling_mode": "1/2" },
+          { "scaling_mode": "1:1" }
+        ],
+        "qp": 128, "cpu_used": 5 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  EXPECT_NE(parse_multi_xlayer_config(path, &cfg), 0);
+}
+
+TEST(XLayerConfigParse, EmbeddedLayersDependsOnParsing) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "test.raw", "width": 1920, "height": 1080,
+        "num_embedded_layers": 3,
+        "embedded_layers": [
+          { "scaling_mode": "1/4", "depends_on": [] },
+          { "scaling_mode": "1/2", "depends_on": [0] },
+          { "scaling_mode": "1:1" }
+        ],
+        "qp": 128, "cpu_used": 5 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  // mlayer 0: depends_on: [] -> mask=0
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[0].dependency_mask, 0);
+  // mlayer 1: depends_on: [0] -> mask=1
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[1].dependency_mask, 1);
+  // mlayer 2: no depends_on -> mask=-1 (default)
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[2].dependency_mask, -1);
+  EXPECT_EQ(cfg.xlayers[0].has_mlayer_dependencies, 1);
+}
+
+TEST(XLayerConfigResolve, EmbeddedLayersSourceResolution) {
+  const char *json = R"({
+    "inputs": [
+      { "name": "main", "filename": "main.raw", "width": 1920, "height": 1080 },
+      { "name": "aux", "filename": "aux.raw", "width": 1920, "height": 1080 }
+    ],
+    "xlayers": [
+      { "xlayer_id": 0, "input_source": "main", "width": 1920, "height": 1080,
+        "atlas_pos_x": 0, "atlas_pos_y": 0,
+        "num_embedded_layers": 2,
+        "embedded_layers": [
+          { "scaling_mode": "1/2", "input_source": "main",
+            "atlas_pos_x": 0, "atlas_pos_y": 0, "width": 1920, "height": 1080 },
+          { "scaling_mode": "1:1", "input_source": "aux",
+            "atlas_pos_x": 0, "atlas_pos_y": 0, "width": 1920, "height": 1080 }
+        ],
+        "qp": 128, "cpu_used": 5 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+  ASSERT_EQ(resolve_input_sources(&cfg), 0);
+
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[0].input_source_idx, 0);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[1].input_source_idx, 1);
+}
+
+TEST(XLayerConfigResolve, EmbeddedLayersInheritance) {
+  const char *json = R"({
+    "inputs": [
+      { "name": "main", "filename": "main.raw", "width": 3840, "height": 2160 }
+    ],
+    "xlayers": [
+      { "xlayer_id": 0, "input_source": "main", "width": 1920, "height": 1080,
+        "atlas_pos_x": 100, "atlas_pos_y": 200,
+        "num_embedded_layers": 2,
+        "embedded_layers": [
+          { "scaling_mode": "1/2" },
+          { "scaling_mode": "1:1" }
+        ],
+        "qp": 128, "cpu_used": 5 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+  ASSERT_EQ(resolve_input_sources(&cfg), 0);
+
+  // Both mlayers inherit from xlayer — since no mlayer has explicit source,
+  // has_per_mlayer_sources is 0, so mlayer_sources stay at defaults.
+  // The encoder uses the xlayer's source for all mlayers automatically.
+  EXPECT_EQ(cfg.xlayers[0].has_per_mlayer_sources, 0);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[0].input_source_idx, -1);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[1].input_source_idx, -1);
+}
+
+TEST(XLayerConfigResolve, EmbeddedLayersUnknownSource) {
+  const char *json = R"({
+    "inputs": [
+      { "name": "main", "filename": "main.raw", "width": 1920, "height": 1080 }
+    ],
+    "xlayers": [
+      { "xlayer_id": 0, "input_source": "main", "width": 1920, "height": 1080,
+        "atlas_pos_x": 0, "atlas_pos_y": 0,
+        "num_embedded_layers": 2,
+        "embedded_layers": [
+          { "scaling_mode": "1/2" },
+          { "scaling_mode": "1:1", "input_source": "nonexistent",
+            "atlas_pos_x": 0, "atlas_pos_y": 0, "width": 1920, "height": 1080 }
+        ],
+        "qp": 128, "cpu_used": 5 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+  EXPECT_NE(resolve_input_sources(&cfg), 0);
+}
+
+TEST(XLayerConfigValidate, EmbeddedLayersRequireDimensions) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_input_sources = 2;
+  snprintf(cfg.input_sources[0].name, MAX_SOURCE_NAME_LEN, "left");
+  snprintf(cfg.input_sources[0].filename, PATH_MAX, "left.raw");
+  cfg.input_sources[0].width = 1920;
+  cfg.input_sources[0].height = 1080;
+  snprintf(cfg.input_sources[1].name, MAX_SOURCE_NAME_LEN, "right");
+  snprintf(cfg.input_sources[1].filename, PATH_MAX, "right.raw");
+  cfg.input_sources[1].width = 1920;
+  cfg.input_sources[1].height = 1080;
+
+  cfg.num_xlayers = 1;
+  cfg.xlayers[0].xlayer_id = 0;
+  cfg.xlayers[0].input_source_idx = 0;
+  cfg.xlayers[0].width = 1920;
+  cfg.xlayers[0].height = 1080;
+  cfg.xlayers[0].atlas_pos_x = 0;
+  cfg.xlayers[0].atlas_pos_y = 0;
+  cfg.xlayers[0].num_embedded_layers = 2;
+  cfg.xlayers[0].scaling_mode[0] = AVME_ONETWO;
+  cfg.xlayers[0].scaling_mode[1] = AVME_NORMAL;
+  cfg.xlayers[0].has_per_mlayer_sources = 1;
+
+  // mlayer 1 has source but no width/height
+  cfg.xlayers[0].mlayer_sources[0].input_source_idx = 0;
+  cfg.xlayers[0].mlayer_sources[0].atlas_pos_x = 0;
+  cfg.xlayers[0].mlayer_sources[0].atlas_pos_y = 0;
+  cfg.xlayers[0].mlayer_sources[0].width = 1920;
+  cfg.xlayers[0].mlayer_sources[0].height = 1080;
+
+  cfg.xlayers[0].mlayer_sources[1].input_source_idx = 1;
+  cfg.xlayers[0].mlayer_sources[1].atlas_pos_x = 0;
+  cfg.xlayers[0].mlayer_sources[1].atlas_pos_y = 0;
+  cfg.xlayers[0].mlayer_sources[1].width = 0;   // Missing!
+  cfg.xlayers[0].mlayer_sources[1].height = 0;  // Missing!
+
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+
+  // Fix: add dimensions
+  cfg.xlayers[0].mlayer_sources[1].width = 1920;
+  cfg.xlayers[0].mlayer_sources[1].height = 1080;
+  EXPECT_EQ(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigValidate, EmbeddedLayersDependsOnRange) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_xlayers = 1;
+  cfg.xlayers[0].xlayer_id = 0;
+  snprintf(cfg.xlayers[0].input_filename, PATH_MAX, "test.raw");
+  cfg.xlayers[0].width = 1920;
+  cfg.xlayers[0].height = 1080;
+  cfg.xlayers[0].num_embedded_layers = 2;
+  cfg.xlayers[0].scaling_mode[0] = AVME_ONETWO;
+  cfg.xlayers[0].scaling_mode[1] = AVME_NORMAL;
+  cfg.xlayers[0].has_mlayer_dependencies = 1;
+
+  // mlayer 0 trying to depend on mlayer 1 (invalid: >= self)
+  cfg.xlayers[0].mlayer_sources[0].dependency_mask = 0x02;  // bit 1 set
+
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+
+  // Fix: mlayer 0 depends on nothing
+  cfg.xlayers[0].mlayer_sources[0].dependency_mask = 0;
+  cfg.xlayers[0].mlayer_sources[1].dependency_mask = 1;  // depends on 0
+  EXPECT_EQ(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigParse, EmbeddedLayersBackwardCompat) {
+  // Existing flat scaling_mode array should still work
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "test.raw", "width": 1920, "height": 1080,
+        "num_embedded_layers": 3,
+        "scaling_mode": ["1/4", "1/2", "1:1"],
+        "qp": 128, "cpu_used": 5 }
+    ],
+    "output": "out.obu"
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[0], AVME_ONEFOUR);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[1], AVME_ONETWO);
+  EXPECT_EQ(cfg.xlayers[0].scaling_mode[2], AVME_NORMAL);
+  EXPECT_EQ(cfg.xlayers[0].has_per_mlayer_sources, 0);
+  EXPECT_EQ(cfg.xlayers[0].has_mlayer_dependencies, 0);
+  // No embedded_layers array means mlayer_sources stay at defaults
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[0].input_source_idx, -1);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[0].dependency_mask, -1);
+}
+
+// --- Codec Controls Tests ---
+
+TEST(XLayerConfig, CodecControlsParsing) {
+  const char *json = R"({
+    "inputs": [{ "name": "src", "filename": "test.raw", "width": 64, "height": 64 }],
+    "xlayers": [{
+      "xlayer_id": 0, "input_source": "src",
+      "width": 64, "height": 64,
+      "atlas_pos_x": 0, "atlas_pos_y": 0,
+      "codec_controls": [
+        ["enable_deblocking", 0],
+        ["enable_cdef", 0],
+        ["enable_intrabc", 0]
+      ]
+    }],
+    "ops": [{ "ops_id": 0, "priority": 0, "intent_present": true,
+              "ptl_present": true,
+              "operating_points": [{ "intent": 0, "xlayer_map": [0] }] }],
+    "output": "/tmp/test_cc.obu"
+  })";
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(WriteTempJson(json), &cfg), 0);
+  EXPECT_EQ(cfg.xlayers[0].num_codec_controls, 3);
+  EXPECT_STREQ(cfg.xlayers[0].codec_controls[0].name, "enable_deblocking");
+  EXPECT_EQ(cfg.xlayers[0].codec_controls[0].value, 0);
+  EXPECT_STREQ(cfg.xlayers[0].codec_controls[1].name, "enable_cdef");
+  EXPECT_EQ(cfg.xlayers[0].codec_controls[1].value, 0);
+  EXPECT_STREQ(cfg.xlayers[0].codec_controls[2].name, "enable_intrabc");
+  EXPECT_EQ(cfg.xlayers[0].codec_controls[2].value, 0);
+}
+
+TEST(XLayerConfig, CodecControlsInvalidFormat) {
+  // codec_controls entry is not a [name, value] pair
+  const char *json = R"({
+    "inputs": [{ "name": "src", "filename": "test.raw", "width": 64, "height": 64 }],
+    "xlayers": [{
+      "xlayer_id": 0, "input_source": "src",
+      "width": 64, "height": 64,
+      "atlas_pos_x": 0, "atlas_pos_y": 0,
+      "codec_controls": [
+        ["enable_deblocking"]
+      ]
+    }],
+    "ops": [{ "ops_id": 0, "priority": 0, "intent_present": true,
+              "ptl_present": true,
+              "operating_points": [{ "intent": 0, "xlayer_map": [0] }] }],
+    "output": "/tmp/test_cc.obu"
+  })";
+  MultiXLayerConfig cfg;
+  EXPECT_NE(parse_multi_xlayer_config(WriteTempJson(json), &cfg), 0);
+}
+
+TEST(XLayerConfig, CodecControlsEmpty) {
+  // Empty codec_controls array is valid
+  const char *json = R"({
+    "inputs": [{ "name": "src", "filename": "test.raw", "width": 64, "height": 64 }],
+    "xlayers": [{
+      "xlayer_id": 0, "input_source": "src",
+      "width": 64, "height": 64,
+      "atlas_pos_x": 0, "atlas_pos_y": 0,
+      "codec_controls": []
+    }],
+    "ops": [{ "ops_id": 0, "priority": 0, "intent_present": true,
+              "ptl_present": true,
+              "operating_points": [{ "intent": 0, "xlayer_map": [0] }] }],
+    "output": "/tmp/test_cc.obu"
+  })";
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(WriteTempJson(json), &cfg), 0);
+  EXPECT_EQ(cfg.xlayers[0].num_codec_controls, 0);
+}
+
+// --- Per-MLayer Content Interpretation Tests ---
+
+TEST(XLayerConfigParse, EmbeddedLayersCIParsing) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw", "width": 1920, "height": 1080,
+        "color_primaries": 1,
+        "transfer_characteristics": 1,
+        "matrix_coefficients": 1,
+        "full_range_flag": 0,
+        "num_embedded_layers": 2,
+        "embedded_layers": [
+          { "scaling_mode": "1/2",
+            "color_primaries": 9,
+            "transfer_characteristics": 16,
+            "matrix_coefficients": 9,
+            "full_range_flag": 1 },
+          { "scaling_mode": "1:1" }
+        ]
+      }
+    ]
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  // mlayer 0: explicit CI values
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[0].color_primaries, 9);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[0].transfer_characteristics, 16);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[0].matrix_coefficients, 9);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[0].full_range_flag, 1);
+
+  // mlayer 1: no CI fields → -1 (inherit from xlayer)
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[1].color_primaries, -1);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[1].transfer_characteristics, -1);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[1].matrix_coefficients, -1);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[1].full_range_flag, -1);
+}
+
+TEST(XLayerConfigResolve, EmbeddedLayersCIInheritance) {
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw", "width": 1920, "height": 1080,
+        "color_primaries": 1,
+        "transfer_characteristics": 13,
+        "matrix_coefficients": 6,
+        "full_range_flag": 0,
+        "num_embedded_layers": 3,
+        "embedded_layers": [
+          { "scaling_mode": "1/4",
+            "color_primaries": 9 },
+          { "scaling_mode": "1/2" },
+          { "scaling_mode": "1:1",
+            "full_range_flag": 1 }
+        ]
+      }
+    ]
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+  ASSERT_EQ(resolve_input_sources(&cfg), 0);
+  resolve_mlayer_ci(&cfg);
+
+  // mlayer 0: explicit color_primaries=9, rest inherit from xlayer
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[0].color_primaries, 9);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[0].transfer_characteristics, 13);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[0].matrix_coefficients, 6);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[0].full_range_flag, 0);
+
+  // mlayer 1: all inherit from xlayer
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[1].color_primaries, 1);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[1].transfer_characteristics, 13);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[1].matrix_coefficients, 6);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[1].full_range_flag, 0);
+
+  // mlayer 2: explicit full_range_flag=1, rest inherit from xlayer
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[2].color_primaries, 1);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[2].transfer_characteristics, 13);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[2].matrix_coefficients, 6);
+  EXPECT_EQ(cfg.xlayers[0].mlayer_sources[2].full_range_flag, 1);
+}
+
+TEST(XLayerConfigParse, XLayerColorPropagation) {
+  // Verify xlayer-level color fields are parsed correctly
+  const char *json = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw", "width": 1920, "height": 1080,
+        "color_primaries": 9,
+        "transfer_characteristics": 16,
+        "matrix_coefficients": 9,
+        "full_range_flag": 1 },
+      { "xlayer_id": 1, "input": "b.raw", "width": 1920, "height": 1080 }
+    ]
+  })";
+  const char *path = WriteTempJson(json);
+  ASSERT_NE(path, nullptr);
+
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+
+  // xlayer 0: explicit color info
+  EXPECT_EQ(cfg.xlayers[0].color_primaries, 9);
+  EXPECT_EQ(cfg.xlayers[0].transfer_characteristics, 16);
+  EXPECT_EQ(cfg.xlayers[0].matrix_coefficients, 9);
+  EXPECT_EQ(cfg.xlayers[0].full_range_flag, 1);
+
+  // xlayer 1: no color info → -1 (use codec defaults)
+  EXPECT_EQ(cfg.xlayers[1].color_primaries, -1);
+  EXPECT_EQ(cfg.xlayers[1].transfer_characteristics, -1);
+  EXPECT_EQ(cfg.xlayers[1].matrix_coefficients, -1);
+  EXPECT_EQ(cfg.xlayers[1].full_range_flag, -1);
+}
+
+TEST(XLayerConfigValidate, EmbeddedLayersCIRangeValidation) {
+  MultiXLayerConfig cfg;
+  xlayer_config_init(&cfg);
+  cfg.num_xlayers = 1;
+  cfg.xlayers[0].xlayer_id = 0;
+  snprintf(cfg.xlayers[0].input_filename, PATH_MAX, "a.raw");
+  cfg.xlayers[0].width = 416;
+  cfg.xlayers[0].height = 240;
+  cfg.xlayers[0].num_embedded_layers = 1;
+
+  // Valid: no CI specified (all -1)
+  EXPECT_EQ(validate_multi_xlayer_config(&cfg), 0);
+
+  // Invalid: color_primaries = 300
+  cfg.xlayers[0].mlayer_sources[0].color_primaries = 300;
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+  cfg.xlayers[0].mlayer_sources[0].color_primaries = -1;
+
+  // Invalid: full_range_flag = 2
+  cfg.xlayers[0].mlayer_sources[0].full_range_flag = 2;
+  EXPECT_NE(validate_multi_xlayer_config(&cfg), 0);
+  cfg.xlayers[0].mlayer_sources[0].full_range_flag = -1;
+
+  // Valid again
+  EXPECT_EQ(validate_multi_xlayer_config(&cfg), 0);
+}
+
+TEST(XLayerConfigParse, LimitField) {
+  // Default: limit=0 (unlimited)
+  const char *json_no_limit = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw", "qp": 100 },
+      { "xlayer_id": 1, "input": "b.raw", "qp": 160 }
+    ]
+  })";
+  const char *path = WriteTempJson(json_no_limit);
+  ASSERT_NE(path, nullptr);
+  MultiXLayerConfig cfg;
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+  EXPECT_EQ(cfg.limit, 0);
+
+  // Explicit limit
+  const char *json_limit = R"({
+    "xlayers": [
+      { "xlayer_id": 0, "input": "a.raw", "qp": 100 },
+      { "xlayer_id": 1, "input": "b.raw", "qp": 160 }
+    ],
+    "limit": 5
+  })";
+  path = WriteTempJson(json_limit);
+  ASSERT_NE(path, nullptr);
+  ASSERT_EQ(parse_multi_xlayer_config(path, &cfg), 0);
+  EXPECT_EQ(cfg.limit, 5);
+}
+
+}  // namespace
diff --git a/third_party/cJSON/LICENSE b/third_party/cJSON/LICENSE
new file mode 100644
index 0000000000..47234478c1
--- /dev/null
+++ b/third_party/cJSON/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2009-2017 Dave Gamble and cJSON contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/third_party/cJSON/cJSON.c b/third_party/cJSON/cJSON.c
new file mode 100644
index 0000000000..cd3ff5206e
--- /dev/null
+++ b/third_party/cJSON/cJSON.c
@@ -0,0 +1,362 @@
+/*
+  Copyright (c) 2009-2017 Dave Gamble and cJSON contributors
+
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.
+*/
+
+/* Minimal cJSON implementation for AVM xlayer config parsing. */
+
+#include "third_party/cJSON/cJSON.h"
+
+#include <ctype.h>
+#include <float.h>
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* --- Internal helpers --- */
+
+static cJSON *cJSON_New_Item(void) {
+  cJSON *node = (cJSON *)calloc(1, sizeof(cJSON));
+  return node;
+}
+
+/* Skip whitespace and comments */
+static const char *skip_whitespace(const char *in) {
+  if (in == NULL) return NULL;
+  while (*in && (unsigned char)*in <= ' ') in++;
+  /* Skip // line comments */
+  while (*in == '/' && *(in + 1) == '/') {
+    while (*in && *in != '\n') in++;
+    while (*in && (unsigned char)*in <= ' ') in++;
+  }
+  return in;
+}
+
+/* Forward declarations */
+static const char *parse_value(cJSON *item, const char *value);
+static const char *parse_string(cJSON *item, const char *str);
+static const char *parse_number(cJSON *item, const char *num);
+static const char *parse_array(cJSON *item, const char *value);
+static const char *parse_object(cJSON *item, const char *value);
+
+/* --- Parse string --- */
+static unsigned parse_hex4(const char *str) {
+  unsigned h = 0;
+  for (int i = 0; i < 4; i++) {
+    h <<= 4;
+    if (*str >= '0' && *str <= '9')
+      h += (unsigned)(*str - '0');
+    else if (*str >= 'A' && *str <= 'F')
+      h += (unsigned)(10 + *str - 'A');
+    else if (*str >= 'a' && *str <= 'f')
+      h += (unsigned)(10 + *str - 'a');
+    else
+      return 0;
+    str++;
+  }
+  return h;
+}
+
+static const char *parse_string(cJSON *item, const char *str) {
+  if (*str != '\"') return NULL;
+  str++;
+
+  const char *start = str;
+  size_t len = 0;
+
+  /* First pass: compute length */
+  while (*str && *str != '\"') {
+    if (*str == '\\') {
+      str++;
+      if (*str == 'u') {
+        str += 4;
+        len += 4; /* UTF-8 worst case, simplified */
+      } else {
+        len++;
+      }
+    } else {
+      len++;
+    }
+    str++;
+  }
+  if (*str != '\"') return NULL;
+
+  char *out = (char *)malloc(len + 1);
+  if (!out) return NULL;
+
+  str = start;
+  char *ptr = out;
+  while (*str && *str != '\"') {
+    if (*str != '\\') {
+      *ptr++ = *str++;
+    } else {
+      str++;
+      switch (*str) {
+        case 'b': *ptr++ = '\b'; break;
+        case 'f': *ptr++ = '\f'; break;
+        case 'n': *ptr++ = '\n'; break;
+        case 'r': *ptr++ = '\r'; break;
+        case 't': *ptr++ = '\t'; break;
+        case 'u': {
+          unsigned uc = parse_hex4(str + 1);
+          str += 4;
+          /* Simple UTF-8 encoding */
+          if (uc < 0x80) {
+            *ptr++ = (char)uc;
+          } else if (uc < 0x800) {
+            *ptr++ = (char)(0xC0 | (uc >> 6));
+            *ptr++ = (char)(0x80 | (uc & 0x3F));
+          } else {
+            *ptr++ = (char)(0xE0 | (uc >> 12));
+            *ptr++ = (char)(0x80 | ((uc >> 6) & 0x3F));
+            *ptr++ = (char)(0x80 | (uc & 0x3F));
+          }
+          break;
+        }
+        default: *ptr++ = *str; break;
+      }
+      str++;
+    }
+  }
+  *ptr = '\0';
+
+  item->valuestring = out;
+  item->type = cJSON_String;
+  return str + 1; /* skip closing quote */
+}
+
+/* --- Parse number --- */
+static const char *parse_number(cJSON *item, const char *num) {
+  double n = 0;
+  double sign = 1;
+  int scale = 0;
+  int subscale = 0;
+  int signsubscale = 1;
+
+  if (*num == '-') {
+    sign = -1;
+    num++;
+  }
+  if (*num == '0') {
+    num++;
+  } else if (*num >= '1' && *num <= '9') {
+    do {
+      n = n * 10.0 + (*num - '0');
+      num++;
+    } while (*num >= '0' && *num <= '9');
+  }
+  if (*num == '.' && num[1] >= '0' && num[1] <= '9') {
+    num++;
+    do {
+      n = n * 10.0 + (*num - '0');
+      scale--;
+      num++;
+    } while (*num >= '0' && *num <= '9');
+  }
+  if (*num == 'e' || *num == 'E') {
+    num++;
+    if (*num == '+')
+      num++;
+    else if (*num == '-') {
+      signsubscale = -1;
+      num++;
+    }
+    while (*num >= '0' && *num <= '9') {
+      subscale = subscale * 10 + (*num - '0');
+      num++;
+    }
+  }
+
+  n = sign * n * pow(10.0, scale + subscale * signsubscale);
+
+  item->valuedouble = n;
+  item->valueint = (int)n;
+  item->type = cJSON_Number;
+  return num;
+}
+
+/* --- Parse array --- */
+static const char *parse_array(cJSON *item, const char *value) {
+  if (*value != '[') return NULL;
+  item->type = cJSON_Array;
+  value = skip_whitespace(value + 1);
+  if (*value == ']') return value + 1; /* empty array */
+
+  cJSON *child = cJSON_New_Item();
+  if (!child) return NULL;
+  item->child = child;
+  value = skip_whitespace(parse_value(child, skip_whitespace(value)));
+  if (!value) return NULL;
+
+  while (*value == ',') {
+    cJSON *new_item = cJSON_New_Item();
+    if (!new_item) return NULL;
+    child->next = new_item;
+    new_item->prev = child;
+    child = new_item;
+    value = skip_whitespace(parse_value(child, skip_whitespace(value + 1)));
+    if (!value) return NULL;
+  }
+
+  if (*value == ']') return value + 1;
+  return NULL; /* malformed */
+}
+
+/* --- Parse object --- */
+static const char *parse_object(cJSON *item, const char *value) {
+  if (*value != '{') return NULL;
+  item->type = cJSON_Object;
+  value = skip_whitespace(value + 1);
+  if (*value == '}') return value + 1; /* empty object */
+
+  cJSON *child = cJSON_New_Item();
+  if (!child) return NULL;
+  item->child = child;
+
+  /* Parse key */
+  value = parse_string(child, skip_whitespace(value));
+  if (!value) return NULL;
+  child->string = child->valuestring;
+  child->valuestring = NULL;
+  child->type = cJSON_Invalid;
+
+  if (*value != ':') return NULL;
+  value = skip_whitespace(parse_value(child, skip_whitespace(value + 1)));
+  if (!value) return NULL;
+
+  while (*value == ',') {
+    cJSON *new_item = cJSON_New_Item();
+    if (!new_item) return NULL;
+    child->next = new_item;
+    new_item->prev = child;
+    child = new_item;
+
+    value = parse_string(child, skip_whitespace(value + 1));
+    if (!value) return NULL;
+    child->string = child->valuestring;
+    child->valuestring = NULL;
+    child->type = cJSON_Invalid;
+
+    if (*value != ':') return NULL;
+    value = skip_whitespace(parse_value(child, skip_whitespace(value + 1)));
+    if (!value) return NULL;
+  }
+
+  if (*value == '}') return value + 1;
+  return NULL; /* malformed */
+}
+
+/* --- Parse value --- */
+static const char *parse_value(cJSON *item, const char *value) {
+  if (!value) return NULL;
+  if (!strncmp(value, "null", 4)) {
+    item->type = cJSON_NULL;
+    return value + 4;
+  }
+  if (!strncmp(value, "false", 5)) {
+    item->type = cJSON_False;
+    item->valueint = 0;
+    return value + 5;
+  }
+  if (!strncmp(value, "true", 4)) {
+    item->type = cJSON_True;
+    item->valueint = 1;
+    return value + 4;
+  }
+  if (*value == '\"') return parse_string(item, value);
+  if (*value == '-' || (*value >= '0' && *value <= '9'))
+    return parse_number(item, value);
+  if (*value == '[') return parse_array(item, value);
+  if (*value == '{') return parse_object(item, value);
+  return NULL; /* failure */
+}
+
+/* --- Public API --- */
+
+cJSON *cJSON_Parse(const char *value) {
+  cJSON *c = cJSON_New_Item();
+  if (!c) return NULL;
+  const char *end = parse_value(c, skip_whitespace(value));
+  if (!end) {
+    cJSON_Delete(c);
+    return NULL;
+  }
+  return c;
+}
+
+void cJSON_Delete(cJSON *item) {
+  cJSON *next;
+  while (item) {
+    next = item->next;
+    if (!(item->type & cJSON_IsReference) && item->child)
+      cJSON_Delete(item->child);
+    if (!(item->type & cJSON_IsReference) && item->valuestring)
+      free(item->valuestring);
+    if (!(item->type & cJSON_StringIsConst) && item->string)
+      free(item->string);
+    free(item);
+    item = next;
+  }
+}
+
+int cJSON_GetArraySize(const cJSON *array) {
+  cJSON *child;
+  int size = 0;
+  if (!array) return 0;
+  child = array->child;
+  while (child) {
+    size++;
+    child = child->next;
+  }
+  return size;
+}
+
+cJSON *cJSON_GetArrayItem(const cJSON *array, int index) {
+  if (!array || index < 0) return NULL;
+  cJSON *child = array->child;
+  while (child && index > 0) {
+    child = child->next;
+    index--;
+  }
+  return child;
+}
+
+cJSON *cJSON_GetObjectItemCaseSensitive(const cJSON *object,
+                                        const char *string) {
+  if (!object || !string) return NULL;
+  cJSON *child = object->child;
+  while (child) {
+    if (child->string && strcmp(child->string, string) == 0) return child;
+    child = child->next;
+  }
+  return NULL;
+}
+
+char *cJSON_GetStringValue(const cJSON *item) {
+  if (!cJSON_IsString(item)) return NULL;
+  return item->valuestring;
+}
+
+double cJSON_GetNumberValue(const cJSON *item) {
+  if (!cJSON_IsNumber(item)) return 0.0;
+  return item->valuedouble;
+}
diff --git a/third_party/cJSON/cJSON.h b/third_party/cJSON/cJSON.h
new file mode 100644
index 0000000000..6432be13d1
--- /dev/null
+++ b/third_party/cJSON/cJSON.h
@@ -0,0 +1,116 @@
+/*
+  Copyright (c) 2009-2017 Dave Gamble and cJSON contributors
+
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.
+*/
+
+/* Minimal cJSON implementation for AVM xlayer config parsing.
+ * Supports: objects, arrays, strings, numbers, booleans, null.
+ * Based on the cJSON API by Dave Gamble. */
+
+#ifndef CJSON_H
+#define CJSON_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>
+
+/* cJSON Types: */
+#define cJSON_Invalid (0)
+#define cJSON_False (1 << 0)
+#define cJSON_True (1 << 1)
+#define cJSON_NULL (1 << 2)
+#define cJSON_Number (1 << 3)
+#define cJSON_String (1 << 4)
+#define cJSON_Array (1 << 5)
+#define cJSON_Object (1 << 6)
+#define cJSON_Raw (1 << 7)
+
+#define cJSON_IsReference 256
+#define cJSON_StringIsConst 512
+
+/* The cJSON structure: */
+typedef struct cJSON {
+  struct cJSON *next;
+  struct cJSON *prev;
+  struct cJSON *child;
+
+  int type;
+  char *valuestring;
+  int valueint;
+  double valuedouble;
+  char *string;
+} cJSON;
+
+/* Supply a block of JSON, and this returns a cJSON object you can
+ * interrogate. */
+cJSON *cJSON_Parse(const char *value);
+
+/* Delete a cJSON entity and all subentities. */
+void cJSON_Delete(cJSON *item);
+
+/* Returns the number of items in an array (or object). */
+int cJSON_GetArraySize(const cJSON *array);
+
+/* Retrieve item number "index" from array "array". Returns NULL if
+ * unsuccessful. */
+cJSON *cJSON_GetArrayItem(const cJSON *array, int index);
+
+/* Get item "string" from object. Case sensitive. */
+cJSON *cJSON_GetObjectItemCaseSensitive(const cJSON *object,
+                                        const char *string);
+
+/* Check item type */
+#define cJSON_IsInvalid(item) \
+  ((item) == NULL || ((item)->type & 0xFF) == cJSON_Invalid)
+#define cJSON_IsFalse(item) \
+  ((item) != NULL && ((item)->type & 0xFF) == cJSON_False)
+#define cJSON_IsTrue(item) \
+  ((item) != NULL && ((item)->type & 0xFF) == cJSON_True)
+#define cJSON_IsBool(item) \
+  ((item) != NULL && (((item)->type & 0xFF) & (cJSON_True | cJSON_False)))
+#define cJSON_IsNull(item) \
+  ((item) != NULL && ((item)->type & 0xFF) == cJSON_NULL)
+#define cJSON_IsNumber(item) \
+  ((item) != NULL && ((item)->type & 0xFF) == cJSON_Number)
+#define cJSON_IsString(item) \
+  ((item) != NULL && ((item)->type & 0xFF) == cJSON_String)
+#define cJSON_IsArray(item) \
+  ((item) != NULL && ((item)->type & 0xFF) == cJSON_Array)
+#define cJSON_IsObject(item) \
+  ((item) != NULL && ((item)->type & 0xFF) == cJSON_Object)
+
+/* Return string value, or NULL */
+char *cJSON_GetStringValue(const cJSON *item);
+
+/* Return number value, or 0 */
+double cJSON_GetNumberValue(const cJSON *item);
+
+/* Macro to iterate over array/object children */
+#define cJSON_ArrayForEach(element, array) \
+  for (element = (array != NULL) ? (array)->child : NULL; element != NULL; \
+       element = element->next)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CJSON_H */
diff --git a/tools/stream_demuxer.cc b/tools/stream_demuxer.cc
index d28f0197da..2f9dbfdfb7 100644
--- a/tools/stream_demuxer.cc
+++ b/tools/stream_demuxer.cc
@@ -11,6 +11,45 @@
  */
 
 #include "tools/stream_mux.h"
+
+// Parse a Global LCR OBU to discover xlayer IDs.
+// Returns the number of xlayers found, or -1 on error.
+// stream_ids[] is populated with the xlayer IDs found in the LCR xlayer_map.
+static int read_global_lcr_xlayer_map(struct avm_read_bit_buffer *rb,
+                                      int *stream_ids) {
+  // lcr_is_global_flag
+  const int is_global = avm_rb_read_bit(rb);
+  if (!is_global) return -1;  // Not a global LCR
+
+  // lcr_global_config_record_id (LCR_ID_BITS = 3)
+  avm_rb_read_literal(rb, LCR_ID_BITS);
+
+  // lcr_xlayer_map (MAX_NUM_XLAYERS - 1 = 31 bits)
+  const uint32_t xlayer_map =
+      (uint32_t)avm_rb_read_literal(rb, MAX_NUM_XLAYERS - 1);
+
+  // Extract xlayer IDs from bitmask
+  int num_xlayers = 0;
+  for (int i = 0; i < (int)(MAX_NUM_XLAYERS - 1); i++) {
+    if (xlayer_map & (1u << i)) {
+      if (num_xlayers >= AVM_MAX_NUM_STREAMS) break;
+      stream_ids[num_xlayers] = i;
+      num_xlayers++;
+    }
+  }
+
+#if PRINT_TU_INFO
+  printf("\n==Parse Global LCR xlayer_map==\n");
+  printf("--xlayer_map: 0x%x\n", xlayer_map);
+  printf("--num_xlayers: %d\n", num_xlayers);
+  for (int i = 0; i < num_xlayers; i++) {
+    printf("--xlayer_id[%d]: %d\n", i, stream_ids[i]);
+  }
+#endif  // PRINT_TU_INFO
+
+  return num_xlayers;
+}
+
 // This function read a multi-stream decoder operation OBU.
 static int read_multi_stream_decoder_operation(struct avm_read_bit_buffer *rb,
                                                int *stream_ids) {
@@ -210,6 +249,20 @@ void ExtractTU(const uint8_t *data, int length, int *obu_overhead_bytes,
           data_ptr + obu_total_size + length_field_size);
       *num_streams = read_multi_stream_decoder_operation(&rb, stream_ids);
       per_stream_obus.resize(*num_streams);
+    } else if (obu_header.type == OBU_LAYER_CONFIGURATION_RECORD &&
+               obu_header.obu_header_extension_flag &&
+               obu_header.obu_xlayer_id == GLOBAL_XLAYER_ID &&
+               *num_streams <= 1) {
+      // Parse Global LCR to discover xlayer IDs (only if MSDO hasn't already
+      // set up streams — MSDO takes priority when both are present).
+      init_read_bit_buffer(
+          &rb, data_ptr + obu_header_size + static_cast<int>(length_field_size),
+          data_ptr + obu_total_size + length_field_size);
+      int lcr_num_streams = read_global_lcr_xlayer_map(&rb, stream_ids);
+      if (lcr_num_streams > 1) {
+        *num_streams = lcr_num_streams;
+        per_stream_obus.resize(*num_streams);
+      }
     } else {
       // Determine which stream this OBU belongs to.
       int xlayer_id = 0;