elastic · anuraaga · Mar 16, 2026 · Mar 9, 2026
@@ -1,7 +1,7 @@
 # vLLM
 
-This shows how to use the [vLLM OpenTelemetry POC][otel-poc] to export
-OpenTelemetry traces from vLLM requests to its OpenAI compatible endpoint.
+This shows how to export OpenTelemetry traces from [vLLM][vllm] requests to
+its OpenAI compatible endpoint.
 
 ## Prerequisites
 
@@ -28,13 +28,16 @@ Once vLLM is running, use [uv][uv] to make an OpenAI request via
 uv run --exact -q --env-file env.local ../chat.py
 ```
 
+Or, for the OpenAI Responses API
+```bash
+uv run --exact -q --env-file env.local ../chat.py --use-responses-api
+```
+
 ## Notes
 
 * This does not yet support metrics, and there is no GitHub issue on it.
 * This does not yet support logs, and there is no GitHub issue on it.
-* Until [this][openai-responses] resolves, don't use `--use-responses-api`.
 
 ---
-[otel-poc]: https://github.com/vllm-project/vllm/blob/main/examples/online_serving/opentelemetry/README.md
+[vllm]: https://docs.vllm.ai/en/latest/features/opentelemetry.html
 [uv]: https://docs.astral.sh/uv/getting-started/installation/
-[openai-responses]: https://github.com/vllm-project/vllm/issues/14721
@@ -1,8 +1,18 @@
 services:
   vllm:
     container_name: vllm
-    build:
-      context: .
+    image: vllm/vllm-openai-cpu:v0.17.0
+    entrypoint: []
+    # Serve args from the prior Dockerfile CMD:
+    # https://github.com/elastic/observability-examples/blob/139feb0f/inference-platforms/vllm/Dockerfile#L59
+    command:
+      - sh
+      - -c
+      - >
+        vllm serve $$CHAT_MODEL
+        --max-model-len=8192
+        --enforce-eager
+        --otlp-traces-endpoint=$$OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
     env_file:
       - env.local
     ports: