mudler · mudler · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -9,6 +9,7 @@ endif()
 
 option(PARAKEET_BUILD_TESTS "Build ctest targets" OFF)
 option(PARAKEET_BUILD_CLI   "Build parakeet-cli" ON)
+option(PARAKEET_BUILD_SERVER "Build parakeet-server (OpenAI-compatible example)" ON)
 option(PARAKEET_SHARED      "Build libparakeet as shared" OFF)
 option(PARAKEET_GGML_CUDA   "Forward GGML_CUDA" OFF)
 option(PARAKEET_GGML_METAL  "Forward GGML_METAL" OFF)
@@ -92,6 +93,9 @@ target_link_libraries(parakeet PUBLIC ggml)
 if(PARAKEET_BUILD_CLI)
   add_subdirectory(examples/cli)
 endif()
+if(PARAKEET_BUILD_SERVER)
+  add_subdirectory(examples/server)
+endif()
 if(PARAKEET_BUILD_TESTS)
   enable_testing()
   add_subdirectory(tests)

diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_executable(parakeet-server
+    main.cpp
+    model_fetch.cpp
+    openai_format.cpp)
+target_link_libraries(parakeet-server PRIVATE parakeet)
+target_include_directories(parakeet-server PRIVATE
+    ${CMAKE_SOURCE_DIR}/src
+    ${CMAKE_SOURCE_DIR}/third_party)
+find_package(Threads REQUIRED)
+target_link_libraries(parakeet-server PRIVATE Threads::Threads)
diff --git a/examples/server/README.md b/examples/server/README.md
@@ -0,0 +1,85 @@
+# parakeet-server
+
+A small OpenAI-drop-in HTTP server for transcription, built on parakeet.cpp.
+Point any OpenAI client's `base_url` at it and call
+`POST /v1/audio/transcriptions`.
+
+This is an example, not a production service. It serves one model, runs one
+transcription at a time, and accepts WAV uploads only.
+
+## Build
+
+Built by default with the rest of the project (`PARAKEET_BUILD_SERVER=ON`):
+
+```sh
+cmake -B build && cmake --build build --target parakeet-server -j
+```
+
+## Run
+
+With a local model:
+
+```sh
+./build/examples/server/parakeet-server --model path/to/model.gguf --port 8080
+```
+
+With a published model by alias (downloaded once and cached under
+`${XDG_CACHE_HOME:-$HOME/.cache}/parakeet.cpp/models`, override with
+`--cache-dir` or `PARAKEET_CACHE_DIR`):
+
+```sh
+./build/examples/server/parakeet-server --model tdt_ctc-110m-q4_k
+```
+
+`--model` accepts a local `.gguf` path, an `http(s)://` URL, a `<name>.gguf`
+filename in the `mudler/parakeet-cpp-gguf` repo, or one of these aliases:
+
+| Alias                | Model                                   |
+|----------------------|-----------------------------------------|
+| `tdt_ctc-110m`       | hybrid TDT+CTC 110M (f16)               |
+| `tdt_ctc-110m-q4_k`  | hybrid TDT+CTC 110M (q4_k, smallest)    |
+| `tdt_ctc-1.1b`       | hybrid TDT+CTC 1.1B (f16)               |
+| `tdt-0.6b-v2`        | TDT 0.6B v2 (f16)                       |
+| `tdt-0.6b-v3`        | TDT 0.6B v3, multilingual (f16)         |
+| `tdt-1.1b`           | TDT 1.1B (f16)                          |
+| `ctc-0.6b`           | CTC 0.6B (f16)                          |
+| `ctc-1.1b`           | CTC 1.1B (f16)                          |
+| `rnnt-0.6b`          | RNN-T 0.6B (f16)                        |
+| `rnnt-1.1b`          | RNN-T 1.1B (f16)                        |
+| `eou-120m`           | realtime EOU 120M (f16)                 |
+
+Downloads use `curl` (or `wget`). If neither is on `PATH`, download the `.gguf`
+yourself and pass the local path.
+
+## Call it
+
+```sh
+curl -F file=@audio.wav -F response_format=verbose_json \
+  http://localhost:8080/v1/audio/transcriptions
+```
+
+With the OpenAI Python client:
+
+```python
+from openai import OpenAI
+client = OpenAI(base_url="http://localhost:8080/v1", api_key="not-needed")
+with open("audio.wav", "rb") as f:
+    print(client.audio.transcriptions.create(model="parakeet", file=f).text)
+```
+
+## Supported
+
+- `response_format`: `json` (default), `text`, `verbose_json`.
+- `timestamp_granularities[]=word` adds a `words` array to `verbose_json`.
+
+## Known simplifications
+
+- WAV uploads only. Other formats return 400. Convert with ffmpeg first.
+- `verbose_json` emits a single `segment` spanning the whole transcript;
+  Parakeet has no native segmentation. Word timestamps are real.
+- `language` in `verbose_json` is a fixed `en` placeholder.
+- `model` in the request is accepted but ignored; the process serves the one
+  model given to `--model`.
+- `temperature` and `prompt` are accepted and ignored (greedy decode).
+- Inference is serialized by a mutex. For real parallelism, hold a pool of
+  `pk::Model` contexts instead of one.