diff --git a/tools/kokoro/CMakeLists.txt b/tools/kokoro/CMakeLists.txt index f276e7ce9..394f7f946 100644 --- a/tools/kokoro/CMakeLists.txt +++ b/tools/kokoro/CMakeLists.txt @@ -17,6 +17,7 @@ set(KOKORO_PUBLIC_HEADERS include/kokoro-istft.h include/kokoro-phonemes.h include/kokoro-server-mount.h + include/kokoro-tensor-names.h include/kokoro-layers.h include/kokoro-predictor.h) diff --git a/tools/kokoro/convert_kokoro_pth_to_gguf.py b/tools/kokoro/convert_kokoro_pth_to_gguf.py index 34303fd77..8232c070f 100644 --- a/tools/kokoro/convert_kokoro_pth_to_gguf.py +++ b/tools/kokoro/convert_kokoro_pth_to_gguf.py @@ -95,11 +95,20 @@ def _add_tensor(writer: gguf.GGUFWriter, name: str, data: np.ndarray) -> None: - """Add a tensor as fp32 (cast from fp64/fp16 if needed; ensure c-contig).""" - if data.dtype != np.float32: + """Add tensors with the dtype layout the Kokoro forward pass expects. + + Weight matrices and convolution kernels (ndim >= 2) are emitted as F16; + biases, norms, and other vectors stay F32. All-F32 GGUFs can load but + synthesize noise in the fused runtime path. + """ + if data.dtype not in (np.float32, np.float16): data = data.astype(np.float32) if not data.flags["C_CONTIGUOUS"]: data = np.ascontiguousarray(data) + if data.ndim >= 2: + data = data.astype(np.float16) + elif data.dtype != np.float32: + data = data.astype(np.float32) writer.add_tensor(name, data) @@ -281,6 +290,8 @@ def emit_stub(out_path: str, hp: dict) -> None: _add_tensor(writer, "kokoro.predictor.F0_proj.bias", np.zeros((1,), dtype=np.float32)) _add_tensor(writer, "kokoro.predictor.N_proj.weight", rng.standard_normal((1, hid//2, 1), dtype=np.float32) * scale) _add_tensor(writer, "kokoro.predictor.N_proj.bias", np.zeros((1,), dtype=np.float32)) + _add_tensor(writer, "kokoro.gen.conv_post.weight", rng.standard_normal((22, 128, 7), dtype=np.float32) * scale) + _add_tensor(writer, "kokoro.gen.conv_post.bias", np.zeros((22,), dtype=np.float32)) writer.write_header_to_file() writer.write_kv_data_to_file() diff --git a/tools/kokoro/include/kokoro-tensor-names.h b/tools/kokoro/include/kokoro-tensor-names.h new file mode 100644 index 000000000..50de4c7d5 --- /dev/null +++ b/tools/kokoro/include/kokoro-tensor-names.h @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: MIT +// +// kokoro-tensor-names.h — tensor-name compatibility for Kokoro GGUFs. +// +// The published elizaOS Kokoro bundles use the `kokoro.*` namespace emitted by +// tools/kokoro/convert_kokoro_pth_to_gguf.py. Older local/dev GGUFs used short +// unprefixed names. Keep the aliases centralized so desktop and iOS builds load +// the same schema through the same kokoro_lib code path. + +#pragma once + +namespace eliza_kokoro { + +inline constexpr const char * KOKORO_TENSOR_BERT_TOKEN_EMBD[] = { + "kokoro.bert.token_embd.weight", + "kokoro.bert.embd.tok.weight", + "bert.embd.tok.weight", + "kokoro.token_embd.weight", + nullptr, +}; + +inline constexpr const char * KOKORO_TENSOR_BERT_ATTN_Q[] = { + "kokoro.bert.layer.attn_q.weight", + "kokoro.bert.attn_q.weight", + "bert.layer.attn_q.weight", + "bert.attn_q.weight", + nullptr, +}; + +inline constexpr const char * KOKORO_TENSOR_DURATION_PROJ[] = { + "kokoro.predictor.duration_proj.weight", + "kokoro.predictor.duration.weight", + "predictor.duration_proj.weight", + "pred.duration_proj.weight", + "pred.duration.weight", + nullptr, +}; + +inline constexpr const char * KOKORO_TENSOR_F0_PROJ[] = { + "kokoro.predictor.F0_proj.weight", + "predictor.F0_proj.weight", + "pred.F0_proj.weight", + nullptr, +}; + +inline constexpr const char * KOKORO_TENSOR_N_PROJ[] = { + "kokoro.predictor.N_proj.weight", + "predictor.N_proj.weight", + "pred.N_proj.weight", + nullptr, +}; + +inline constexpr const char * KOKORO_TENSOR_GEN_CONV_POST[] = { + "kokoro.gen.conv_post.weight", + "kokoro.decoder.gen.conv_post.weight", + "decoder.gen.conv_post.weight", + "dec.gen.conv_post.weight", + nullptr, +}; + +inline const char * kokoro_pick_tensor_name( + const char * const * aliases, + bool (*has_tensor)(const char * name, void * user_data), + void * user_data) { + if (!aliases || !has_tensor) return nullptr; + for (const char * const * p = aliases; *p; ++p) { + if (has_tensor(*p, user_data)) return *p; + } + return nullptr; +} + +} // namespace eliza_kokoro diff --git a/tools/kokoro/src/kokoro.cpp b/tools/kokoro/src/kokoro.cpp index 63f1df10b..aeb11cfa1 100644 --- a/tools/kokoro/src/kokoro.cpp +++ b/tools/kokoro/src/kokoro.cpp @@ -30,6 +30,7 @@ #include "kokoro.h" #include "kokoro-istft.h" #include "kokoro-phonemes.h" +#include "kokoro-tensor-names.h" #include "ggml.h" #include "ggml-alloc.h" @@ -164,6 +165,39 @@ static ggml_tensor * find_tensor(ggml_context * ctx, const std::string & name) { return ggml_get_tensor(ctx, name.c_str()); } +static bool has_tensor_alias(const char * name, void * user_data) { + return name && ggml_get_tensor((ggml_context *) user_data, name) != nullptr; +} + +static ggml_tensor * find_tensor_any(ggml_context * ctx, const char * const * aliases) { + const char * name = kokoro_pick_tensor_name(aliases, has_tensor_alias, ctx); + return name ? ggml_get_tensor(ctx, name) : nullptr; +} + +static std::string format_aliases(const char * const * aliases) { + std::string out; + for (const char * const * p = aliases; p && *p; ++p) { + if (!out.empty()) out += ", "; + out += "'"; + out += *p; + out += "'"; + } + return out; +} + +static ggml_tensor * require_tensor_any( + ggml_context * ctx, + const char * const * aliases, + const char * label, + std::string & err_out) { + ggml_tensor * t = find_tensor_any(ctx, aliases); + if (!t) { + err_out = std::string("required tensor missing for ") + label + + " (accepted names: " + format_aliases(aliases) + ")"; + } + return t; +} + } // namespace // --------------------------------------------------------------------------- @@ -259,14 +293,38 @@ kokoro_model_ptr kokoro_load_model( } } - // Bind canonical tensors. Missing tensors are non-fatal during the J2 - // ship phase — the synthesis path treats absent tensors as zero, which - // produces shape-correct but acoustically degraded output. See the - // J2-kokoro-port-notes.md gap log. - model->tok_embd = find_tensor(model->ctx, "kokoro.token_embd.weight"); + // Bind the published Kokoro GGUF schema, while accepting the older + // unprefixed dev names from pre-publication GGUFs. Missing required + // tensors are a hard load error: otherwise the synth path can appear to + // work while silently skipping the real model weights. + model->tok_embd = require_tensor_any( + model->ctx, + KOKORO_TENSOR_BERT_TOKEN_EMBD, + "BERT token embedding", + err_out); + if (!model->tok_embd) return {nullptr, kokoro_model_deleter{}}; + + if (!require_tensor_any(model->ctx, KOKORO_TENSOR_BERT_ATTN_Q, "BERT attention Q", err_out)) { + return {nullptr, kokoro_model_deleter{}}; + } + if (!require_tensor_any(model->ctx, KOKORO_TENSOR_F0_PROJ, "F0 projection", err_out)) { + return {nullptr, kokoro_model_deleter{}}; + } + if (!require_tensor_any(model->ctx, KOKORO_TENSOR_N_PROJ, "noise projection", err_out)) { + return {nullptr, kokoro_model_deleter{}}; + } + if (!require_tensor_any(model->ctx, KOKORO_TENSOR_GEN_CONV_POST, "generator post convolution", err_out)) { + return {nullptr, kokoro_model_deleter{}}; + } + model->mel_proj = find_tensor(model->ctx, "kokoro.decoder.mel_proj.weight"); model->phase_proj = find_tensor(model->ctx, "kokoro.decoder.phase_proj.weight"); - model->dur_proj = find_tensor(model->ctx, "kokoro.predictor.duration.weight"); + model->dur_proj = require_tensor_any( + model->ctx, + KOKORO_TENSOR_DURATION_PROJ, + "duration projection", + err_out); + if (!model->dur_proj) return {nullptr, kokoro_model_deleter{}}; model->style_proj = find_tensor(model->ctx, "kokoro.style.proj.weight"); model->out_norm = find_tensor(model->ctx, "kokoro.text.out_norm.weight"); diff --git a/tools/kokoro/tests/CMakeLists.txt b/tools/kokoro/tests/CMakeLists.txt index 81cae041c..1c97a4627 100644 --- a/tools/kokoro/tests/CMakeLists.txt +++ b/tools/kokoro/tests/CMakeLists.txt @@ -10,3 +10,7 @@ add_test(NAME test-kokoro-phonemes COMMAND test-kokoro-phonemes) add_executable(test-kokoro-istft test_kokoro_istft.cpp) target_link_libraries(test-kokoro-istft PRIVATE kokoro_lib) add_test(NAME test-kokoro-istft COMMAND test-kokoro-istft) + +add_executable(test-kokoro-tensor-names test_kokoro_tensor_names.cpp) +target_link_libraries(test-kokoro-tensor-names PRIVATE kokoro_lib) +add_test(NAME test-kokoro-tensor-names COMMAND test-kokoro-tensor-names) diff --git a/tools/kokoro/tests/test_kokoro_tensor_names.cpp b/tools/kokoro/tests/test_kokoro_tensor_names.cpp new file mode 100644 index 000000000..c510db91e --- /dev/null +++ b/tools/kokoro/tests/test_kokoro_tensor_names.cpp @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: MIT +// +// test_kokoro_tensor_names.cpp — regression coverage for issue #9588. +// +// macOS and iOS both link kokoro_lib into the fused libelizainference target. +// If these aliases drift from the published GGUF schema, both platforms can +// export Kokoro symbols yet fail or silently skip the real weights at load time. + +#include "kokoro-tensor-names.h" + +#include +#include +#include +#include +#include + +namespace { + +bool has_name(const char * name, void * user_data) { + const auto * names = static_cast *>(user_data); + return names->find(name) != names->end(); +} + +void expect_pick( + const char * const * aliases, + const std::set & available, + const char * expected) { + const char * actual = eliza_kokoro::kokoro_pick_tensor_name( + aliases, + has_name, + (void *) &available); + assert(actual != nullptr); + assert(std::strcmp(actual, expected) == 0); +} + +} // namespace + +int main() { + using namespace eliza_kokoro; + + const std::set published_schema = { + "kokoro.bert.token_embd.weight", + "kokoro.bert.layer.attn_q.weight", + "kokoro.predictor.duration_proj.weight", + "kokoro.predictor.F0_proj.weight", + "kokoro.predictor.N_proj.weight", + "kokoro.gen.conv_post.weight", + }; + + expect_pick(KOKORO_TENSOR_BERT_TOKEN_EMBD, published_schema, "kokoro.bert.token_embd.weight"); + expect_pick(KOKORO_TENSOR_BERT_ATTN_Q, published_schema, "kokoro.bert.layer.attn_q.weight"); + expect_pick(KOKORO_TENSOR_DURATION_PROJ, published_schema, "kokoro.predictor.duration_proj.weight"); + expect_pick(KOKORO_TENSOR_F0_PROJ, published_schema, "kokoro.predictor.F0_proj.weight"); + expect_pick(KOKORO_TENSOR_N_PROJ, published_schema, "kokoro.predictor.N_proj.weight"); + expect_pick(KOKORO_TENSOR_GEN_CONV_POST, published_schema, "kokoro.gen.conv_post.weight"); + + const std::set legacy_schema = { + "bert.embd.tok.weight", + "bert.layer.attn_q.weight", + "pred.duration_proj.weight", + "pred.F0_proj.weight", + "pred.N_proj.weight", + "dec.gen.conv_post.weight", + }; + + expect_pick(KOKORO_TENSOR_BERT_TOKEN_EMBD, legacy_schema, "bert.embd.tok.weight"); + expect_pick(KOKORO_TENSOR_BERT_ATTN_Q, legacy_schema, "bert.layer.attn_q.weight"); + expect_pick(KOKORO_TENSOR_DURATION_PROJ, legacy_schema, "pred.duration_proj.weight"); + expect_pick(KOKORO_TENSOR_F0_PROJ, legacy_schema, "pred.F0_proj.weight"); + expect_pick(KOKORO_TENSOR_N_PROJ, legacy_schema, "pred.N_proj.weight"); + expect_pick(KOKORO_TENSOR_GEN_CONV_POST, legacy_schema, "dec.gen.conv_post.weight"); + + const std::set empty_schema; + assert(kokoro_pick_tensor_name(KOKORO_TENSOR_BERT_TOKEN_EMBD, has_name, (void *) &empty_schema) == nullptr); + + std::printf("test_kokoro_tensor_names: OK\n"); + return 0; +}