mudler · mudler · Jun 6, 2026 · Jun 6, 2026 · Jun 6, 2026
diff --git a/include/parakeet_capi.h b/include/parakeet_capi.h
@@ -19,9 +19,11 @@ typedef struct parakeet_ctx parakeet_ctx;
 // function signatures or semantics below.
 //
 // v3: added the target_lang variants (parakeet_capi_transcribe_path_lang,
-//     parakeet_capi_transcribe_pcm_lang, parakeet_capi_stream_begin_lang) for
-//     multilingual prompt-conditioned (nemotron) models. The original non-lang
-//     entry points are unchanged and delegate with the model default language.
+//     parakeet_capi_transcribe_pcm_lang, parakeet_capi_stream_begin_lang,
+//     parakeet_capi_transcribe_pcm_batch_json_lang,
+//     parakeet_capi_transcribe_pcm_batch_lang) for multilingual
+//     prompt-conditioned (nemotron) models. The original non-lang entry points
+//     are unchanged and delegate with the model default language.
 int parakeet_capi_abi_version(void);
 
 // Load a GGUF model. Returns an owning context, or NULL on failure.
@@ -79,6 +81,20 @@ int parakeet_capi_transcribe_pcm_batch(parakeet_ctx* ctx,
                                        int sample_rate, int decoder,
                                        char** out);
 
+// Like parakeet_capi_transcribe_pcm_batch but selects the language prompt for
+// multilingual (nemotron) models. ONE `target_lang` applies to the whole batch:
+// a locale string (e.g. "en", "de", "auto"); NULL or "" uses the model's
+// default ("auto"). Ignored by non-prompt models. On an unknown locale (for a
+// prompt model) returns nonzero, sets the context's last error, and leaves
+// every out[] entry NULL. parakeet_capi_transcribe_pcm_batch delegates here
+// with the model default.
+int parakeet_capi_transcribe_pcm_batch_lang(parakeet_ctx* ctx,
+                                            const float* const* samples,
+                                            const int* n_samples, int n_clips,
+                                            int sample_rate, int decoder,
+                                            const char* target_lang,
+                                            char** out);
+
 // Transcribe a WAV file returning a malloc'd UTF-8 JSON document with per-word
 // and per-token timestamps + confidence (matching NeMo timestamps=True and the
 // 'max_prob' confidence method). `decoder` is as in
@@ -111,6 +127,18 @@ char* parakeet_capi_transcribe_pcm_batch_json(parakeet_ctx* ctx,
                                               const int* n_samples, int n_clips,
                                               int sample_rate, int decoder);
 
+// Like parakeet_capi_transcribe_pcm_batch_json but selects the language prompt
+// for multilingual (nemotron) models. ONE `target_lang` applies to the whole
+// batch: a locale string (e.g. "en", "de", "auto"); NULL or "" uses the model's
+// default ("auto"). Ignored by non-prompt models. On an unknown locale (for a
+// prompt model) returns NULL and sets the context's last error.
+// parakeet_capi_transcribe_pcm_batch_json delegates here with the model default.
+char* parakeet_capi_transcribe_pcm_batch_json_lang(parakeet_ctx* ctx,
+                                                   const float* samples_concat,
+                                                   const int* n_samples, int n_clips,
+                                                   int sample_rate, int decoder,
+                                                   const char* target_lang);
+
 // ---------------------------------------------------------------------------
 // Streaming API (cache-aware streaming RNN-T, e.g. the EOU model
 // nvidia/parakeet_realtime_eou_120m-v1). The stream session buffers incoming

diff --git a/src/parakeet_capi.cpp b/src/parakeet_capi.cpp
@@ -17,7 +17,9 @@
 
 // ABI version. Bump on breaking changes.
 // v3: target_lang variants (transcribe_path_lang / transcribe_pcm_lang /
-//     stream_begin_lang) for multilingual prompt-conditioned (nemotron) models.
+//     stream_begin_lang / transcribe_pcm_batch_json_lang /
+//     transcribe_pcm_batch_lang) for multilingual prompt-conditioned (nemotron)
+//     models.
 #define PARAKEET_CAPI_ABI_VERSION 3
 
 // The opaque context: a loaded model plus a buffer for the last error message.
@@ -268,17 +270,20 @@ extern "C" char* parakeet_capi_transcribe_pcm(parakeet_ctx* ctx, const float* sa
                                              decoder, nullptr);
 }
 
-extern "C" int parakeet_capi_transcribe_pcm_batch(parakeet_ctx* ctx,
-                                                  const float* const* samples,
-                                                  const int* n_samples, int n_clips,
-                                                  int sample_rate, int decoder,
-                                                  char** out) {
+extern "C" int parakeet_capi_transcribe_pcm_batch_lang(parakeet_ctx* ctx,
+                                                       const float* const* samples,
+                                                       const int* n_samples, int n_clips,
+                                                       int sample_rate, int decoder,
+                                                       const char* target_lang,
+                                                       char** out) {
     if (!ctx) return 1;
     if (!ctx->model) { ctx->last_error = "context has no loaded model"; return 1; }
     if (!samples || !n_samples || !out || n_clips < 0) {
         ctx->last_error = "invalid batch arguments";
         return 1;
     }
+    // NULL / "" -> model default language (ignored by non-prompt models).
+    const std::string lang = target_lang ? target_lang : "";
     // Contract: on any error path (validation, exception, OOM) every out[]
     // entry is left NULL, so the caller owns nothing and frees nothing.
     for (int i = 0; i < n_clips; ++i) out[i] = nullptr;
@@ -292,7 +297,7 @@ extern "C" int parakeet_capi_transcribe_pcm_batch(parakeet_ctx* ctx,
             pcms[i].assign(samples[i], samples[i] + n_samples[i]);
         }
         std::vector<std::string> texts =
-            ctx->model->transcribe_pcm_batch(pcms, sample_rate, to_decoder(decoder));
+            ctx->model->transcribe_pcm_batch(pcms, sample_rate, to_decoder(decoder), lang);
         ctx->last_error.clear();
         for (int i = 0; i < n_clips; ++i) {
             char* s = dup_to_c(texts[i]);
@@ -315,6 +320,16 @@ extern "C" int parakeet_capi_transcribe_pcm_batch(parakeet_ctx* ctx,
     }
 }
 
+extern "C" int parakeet_capi_transcribe_pcm_batch(parakeet_ctx* ctx,
+                                                  const float* const* samples,
+                                                  const int* n_samples, int n_clips,
+                                                  int sample_rate, int decoder,
+                                                  char** out) {
+    // Delegate with the model default language.
+    return parakeet_capi_transcribe_pcm_batch_lang(ctx, samples, n_samples, n_clips,
+                                                   sample_rate, decoder, nullptr, out);
+}
+
 extern "C" char* parakeet_capi_transcribe_path_json(parakeet_ctx* ctx,
                                                     const char* wav_path,
                                                     int decoder) {
@@ -342,14 +357,16 @@ extern "C" char* parakeet_capi_transcribe_path_json(parakeet_ctx* ctx,
     }
 }
 
-extern "C" char* parakeet_capi_transcribe_pcm_batch_json(parakeet_ctx* ctx,
+extern "C" char* parakeet_capi_transcribe_pcm_batch_json_lang(parakeet_ctx* ctx,
         const float* samples_concat, const int* n_samples, int n_clips,
-        int sample_rate, int decoder) {
+        int sample_rate, int decoder, const char* target_lang) {
     if (!ctx) return nullptr;
     if (!ctx->model) { ctx->last_error = "context has no loaded model"; return nullptr; }
     if (!samples_concat || !n_samples || n_clips < 0) {
         ctx->last_error = "invalid batch arguments"; return nullptr;
     }
+    // NULL / "" -> model default language (ignored by non-prompt models).
+    const std::string lang = target_lang ? target_lang : "";
     try {
         std::vector<std::vector<float>> pcms(n_clips);
         size_t off = 0;
@@ -360,7 +377,7 @@ extern "C" char* parakeet_capi_transcribe_pcm_batch_json(parakeet_ctx* ctx,
         }
         std::vector<pk::Transcription> trs =
             ctx->model->transcribe_pcm_batch_with_timestamps(pcms, sample_rate,
-                                                             to_decoder(decoder));
+                                                             to_decoder(decoder), lang);
         const pk::ParakeetConfig& cfg = ctx->model->config();
         const float frame_sec =
             (float)cfg.hop_length * (float)cfg.subsampling_factor / (float)cfg.sample_rate;
@@ -381,6 +398,15 @@ extern "C" char* parakeet_capi_transcribe_pcm_batch_json(parakeet_ctx* ctx,
     }
 }
 
+extern "C" char* parakeet_capi_transcribe_pcm_batch_json(parakeet_ctx* ctx,
+        const float* samples_concat, const int* n_samples, int n_clips,
+        int sample_rate, int decoder) {
+    // Delegate with the model default language.
+    return parakeet_capi_transcribe_pcm_batch_json_lang(ctx, samples_concat, n_samples,
+                                                        n_clips, sample_rate, decoder,
+                                                        nullptr);
+}
+
 // ---------------------------------------------------------------------------
 // Streaming API
 // ---------------------------------------------------------------------------

diff --git a/src/subsampling.cpp b/src/subsampling.cpp
@@ -54,10 +54,12 @@ ggml_tensor* Subsampling::build_graph_batched(ggml_context* ctx,
     const int F = n_mels;            // feature dim (80)
     const ModelLoader& ml = ml_;
 
-    // This task targets the NON-causal (offline) model only. Batched causal
-    // subsampling (per-stage time masking with a batch axis) is out of scope:
-    // the causal branch below still operates on the single-item assumption.
-    GGML_ASSERT(!(causal_ && B > 1) && "batched causal subsampling not supported");
+    // Batched causal subsampling IS supported: the causal branch below applies
+    // the leading ggml_pad_ext (lp1=2/rp1=1 on time) uniformly across the batch,
+    // and the per-item trailing-pad time masking (mask_time on the batch axis)
+    // plus the all_paddings=3 valid-length recurrence reproduce, per item, the
+    // exact standalone causal boundary. A clip in a B>1 batch is byte-identical
+    // to the same clip transcribed standalone (see test_subsampling_batch_causal).
 
     // --- Input (host-side): ggml conv data layout is [W=feat, H=T, IC=1, N=B].
     // NeMo conv input is [B,1,T,feat] (H=T, W=feat). We must feed

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -14,6 +14,7 @@ pk_add_test(test_mel)
 pk_add_test(test_mel_gpu)
 pk_add_test(test_subsampling)
 pk_add_test(test_subsampling_batch)
+pk_add_test(test_subsampling_batch_causal)
 pk_add_test(test_relpos_attention)
 pk_add_test(test_relpos_attention_local)
 pk_add_test(test_relpos_attention_local_chunked)
@@ -58,7 +59,7 @@ pk_add_test(test_capi_batch)
 pk_add_test(test_capi_stream)
 pk_add_test(test_capi_timestamps)
 pk_add_test(test_capi_batch_json)
-set_tests_properties(test_model_loader test_mel test_mel_gpu test_subsampling test_subsampling_batch test_relpos_attention test_relpos_attention_batch test_relpos_attention_local_chunked
+set_tests_properties(test_model_loader test_mel test_mel_gpu test_subsampling test_subsampling_batch test_subsampling_batch_causal test_relpos_attention test_relpos_attention_batch test_relpos_attention_local_chunked
                      test_conformer test_conformer_batch test_conv_eou test_encoder test_encoder_batch test_encoder_batch_local test_encoder_eou
                      test_streaming_encoder test_ctc test_prediction
                      test_prediction_step test_prediction_step_batch
@@ -71,7 +72,7 @@ set_tests_properties(test_model_loader test_mel test_mel_gpu test_subsampling te
                      test_capi_timestamps test_capi_batch_json
                      PROPERTIES LABELS "model")
 # These tests read fixtures/baselines via paths relative to the project root.
-set_tests_properties(test_mel test_mel_gpu test_subsampling test_subsampling_batch test_relpos_attention test_relpos_attention_batch test_conformer test_conformer_batch
+set_tests_properties(test_mel test_mel_gpu test_subsampling test_subsampling_batch test_subsampling_batch_causal test_relpos_attention test_relpos_attention_batch test_conformer test_conformer_batch
                      test_conv_eou test_encoder test_encoder_batch test_encoder_batch_local test_encoder_eou test_streaming_encoder
                      test_ctc test_prediction test_prediction_step test_prediction_step_batch
                      test_joint test_joint_step_batch test_prompt_kernel