tnm · andriyadi · Mar 27, 2026 · Mar 27, 2026 · Mar 28, 2026
diff --git a/README.md b/README.md
@@ -75,7 +75,7 @@ Non-interactive install:
 - USB local admin console for recovery, safe mode, and pre-network bring-up
 - Persistent memory across reboots
 - Persona options: `neutral`, `friendly`, `technical`, `witty`
-- Provider support for Anthropic, OpenAI, OpenRouter, and Ollama (custom endpoint)
+- Provider support for Anthropic, OpenAI, Azure OpenAI, OpenRouter, and Ollama (custom endpoint)
 
 ## Hardware
 

diff --git a/docs-site/reference/README_COMPLETE.md b/docs-site/reference/README_COMPLETE.md
@@ -35,7 +35,7 @@ Agent: Done. GPIO2 is now off.
 - **Built-in and custom tools** - Ships with a pre-built set of tools, easy to extend
 - **GPIO control** — Read sensors, toggle relays, control LEDs
 - **Persistent memory** — Remembers things across reboots
-- **Any LLM backend** — Anthropic, OpenAI, OpenRouter, or Ollama (custom endpoint)
+- **Any LLM backend** — Anthropic, OpenAI, Azure OpenAI, OpenRouter, or Ollama (custom endpoint)
 - **$5 hardware** — Just an ESP32 dev board and WiFi
 - **~888 KiB guaranteed max binary** — Fits in dual OTA partitions with ~40% free
 
@@ -626,10 +626,17 @@ export ANTHROPIC_API_KEY=...
 # OpenAI
 export OPENAI_API_KEY=...
 ./scripts/emulate.sh --live-api --live-api-provider openai
+
+# Azure OpenAI
+export AZURE_OPENAI_API_KEY=...
+export AZURE_OPENAI_API_URL="https://<resource>.openai.azure.com/openai/responses?api-version=2025-04-01-preview"
+export AZURE_OPENAI_MODEL="<deployment-name>"
+./scripts/emulate.sh --live-api --live-api-provider azure-openai
 ```
 
 `--live-api` keeps QEMU offline but proxies LLM requests over UART to a host bridge process.
-`--live-api-provider auto` (default) infers provider from request format.
+`--live-api-provider auto` (default) seeds the emulator runtime backend/model from the available host env, then infers provider from the emitted request format.
+Set `ANTHROPIC_MODEL`, `OPENAI_MODEL`, or `AZURE_OPENAI_MODEL` to override the seeded model/deployment when needed.
 Use `--live-api-logs` only when debugging bridge timing/forwarding.
 Set `OPENAI_API_URL` to target an OpenAI-compatible endpoint other than the default.
 

diff --git a/main/agent.c b/main/agent.c
@@ -44,6 +44,7 @@ static char s_test_persona_value[16] = {0};
 // Conversation history (rolling message buffer)
 static conversation_msg_t s_history[MAX_HISTORY_TURNS * 2];
 static int s_history_len = 0;
+static char s_responses_previous_response_id[128] = {0};
 
 // Buffers (static to avoid stack overflow)
 static char s_response_buf[LLM_RESPONSE_BUF_SIZE];
@@ -108,7 +109,7 @@ static void history_rollback_to(int marker, const char *reason)
 
 // Add a message to history
 static void history_add(const char *role, const char *content,
-                        bool is_tool_use, bool is_tool_result,
+                        bool is_tool_use, bool is_tool_result, bool is_response_item,
                         const char *tool_id, const char *tool_name)
 {
     // Drop one oldest message when full.
@@ -125,6 +126,7 @@ static void history_add(const char *role, const char *content,
     msg->content[sizeof(msg->content) - 1] = '\0';
     msg->is_tool_use = is_tool_use;
     msg->is_tool_result = is_tool_result;
+    msg->is_response_item = is_response_item;
 
     if (tool_id) {
         strncpy(msg->tool_id, tool_id, sizeof(msg->tool_id) - 1);
@@ -141,6 +143,23 @@ static void history_add(const char *role, const char *content,
     }
 }
 
+static void history_add_response_item(const char *item_json)
+{
+    history_add("assistant", item_json, false, false, true, NULL, NULL);
+}
+
+static void responses_set_previous_response_id(const char *response_id)
+{
+    if (!response_id) {
+        s_responses_previous_response_id[0] = '\0';
+        return;
+    }
+
+    strncpy(s_responses_previous_response_id, response_id,
+            sizeof(s_responses_previous_response_id) - 1);
+    s_responses_previous_response_id[sizeof(s_responses_previous_response_id) - 1] = '\0';
+}
+
 static void queue_channel_response(const char *text)
 {
     if (!s_channel_output_queue) {
@@ -401,6 +420,7 @@ static void process_message(const char *user_message, message_source_t source, i
 {
     ESP_LOGI(TAG, "Processing: %s", user_message);
     int history_turn_start = s_history_len;
+    char previous_response_id_turn_start[sizeof(s_responses_previous_response_id)] = {0};
     bool is_non_command_message = !agent_is_slash_command(user_message);
     bool is_cron_trigger = agent_is_cron_trigger_message(user_message);
     bool telegram_polling_paused = false;
@@ -413,6 +433,10 @@ static void process_message(const char *user_message, message_source_t source, i
         .rounds = 0,
     };
 
+    strncpy(previous_response_id_turn_start, s_responses_previous_response_id,
+            sizeof(previous_response_id_turn_start) - 1);
+    previous_response_id_turn_start[sizeof(previous_response_id_turn_start) - 1] = '\0';
+
     if (agent_is_command(user_message, "resume")) {
         if (!s_messages_paused) {
             send_response("zclaw is already active.", reply_chat_id);
@@ -502,7 +526,7 @@ static void process_message(const char *user_message, message_source_t source, i
     telegram_polling_paused = true;
 
     // Add user message to history
-    history_add("user", user_message, false, false, NULL, NULL);
+    history_add("user", user_message, false, false, false, NULL, NULL);
 
     int rounds = 0;
     bool done = false;
@@ -511,19 +535,32 @@ static void process_message(const char *user_message, message_source_t source, i
         rounds++;
         metrics.rounds = rounds;
 
+        // Azure/OpenAI Responses requests can chain from the server-side response id,
+        // so only send the latest delta item once we have one.
+        const conversation_msg_t *request_history = s_history;
+        int request_history_len = s_history_len;
+        const char *previous_response_id = NULL;
+        if (llm_uses_responses_api() && s_responses_previous_response_id[0] != '\0') {
+            previous_response_id = s_responses_previous_response_id;
+            request_history = &s_history[s_history_len - 1];
+            request_history_len = 1;
+        }
+
         // Build request JSON (user message already in history)
         char *request = json_build_request(
             agent_build_system_prompt(s_persona, s_system_prompt_buf, sizeof(s_system_prompt_buf)),
-            s_history,
-            s_history_len,
+            request_history,
+            request_history_len,
             NULL,  // User message already in history
             tools,
-            tool_count
+            tool_count,
+            previous_response_id
         );
 
         if (!request) {
             ESP_LOGE(TAG, "Failed to build request JSON");
             history_rollback_to(history_turn_start, "request build failed");
+            responses_set_previous_response_id(previous_response_id_turn_start);
             send_response("Error: Failed to build request", reply_chat_id);
             telegram_resume_polling();
             telegram_polling_paused = false;
@@ -538,6 +575,7 @@ static void process_message(const char *user_message, message_source_t source, i
         if (!ratelimit_check(rate_reason, sizeof(rate_reason))) {
             free(request);
             history_rollback_to(history_turn_start, "rate limited");
+            responses_set_previous_response_id(previous_response_id_turn_start);
             send_response(rate_reason, reply_chat_id);
             telegram_resume_polling();
             telegram_polling_paused = false;
@@ -609,6 +647,7 @@ static void process_message(const char *user_message, message_source_t source, i
         if (err != ESP_OK) {
             ESP_LOGE(TAG, "LLM request failed after %d retries", LLM_MAX_RETRIES);
             history_rollback_to(history_turn_start, "llm request failed");
+            responses_set_previous_response_id(previous_response_id_turn_start);
             send_response("Error: Failed to contact LLM API after retries", reply_chat_id);
             telegram_resume_polling();
             telegram_polling_paused = false;
@@ -631,6 +670,7 @@ static void process_message(const char *user_message, message_source_t source, i
                                   &tool_input)) {
             ESP_LOGE(TAG, "Failed to parse response");
             history_rollback_to(history_turn_start, "llm response parse failed");
+            responses_set_previous_response_id(previous_response_id_turn_start);
             send_response("Error: Failed to parse LLM response", reply_chat_id);
             json_free_parsed_response();
             telegram_resume_polling();
@@ -639,16 +679,57 @@ static void process_message(const char *user_message, message_source_t source, i
             return;
         }
 
+        if (llm_uses_responses_api()) {
+            const cJSON *parsed = json_get_parsed_response();
+            const cJSON *response_id = parsed ? cJSON_GetObjectItem((cJSON *)parsed, "id") : NULL;
+            if (response_id && cJSON_IsString((cJSON *)response_id) &&
+                response_id->valuestring[0] != '\0') {
+                responses_set_previous_response_id(response_id->valuestring);
+            }
+        }
+
         // Check if it's a tool use
         if (tool_name[0] != '\0' && tool_input) {
             ESP_LOGI(TAG, "Tool call: %s (round %d)", tool_name, rounds);
 
             // Store the tool_input as JSON string for history
             char *input_str = cJSON_PrintUnformatted(tool_input);
 
-            // Add tool_use to history
-            history_add("assistant", input_str ? input_str : "{}",
-                        true, false, tool_id, tool_name);
+            if (llm_uses_responses_api()) {
+                const cJSON *parsed = json_get_parsed_response();
+                const cJSON *output = parsed ? cJSON_GetObjectItem((cJSON *)parsed, "output") : NULL;
+                const cJSON *response_id = parsed ? cJSON_GetObjectItem((cJSON *)parsed, "id") : NULL;
+                if (!(response_id && cJSON_IsString((cJSON *)response_id) &&
+                      response_id->valuestring[0] != '\0')) {
+                    const cJSON *item = NULL;
+                    if (output && cJSON_IsArray(output)) {
+                        cJSON_ArrayForEach(item, output) {
+                            if (!cJSON_IsObject((cJSON *)item)) {
+                                continue;
+                            }
+
+                            // Fallback for Responses payloads that do not provide a
+                            // top-level response id. In that case we still need to replay
+                            // the prior raw output items to preserve reasoning state.
+                            cJSON *copy = cJSON_Duplicate((cJSON *)item, 1);
+                            char *item_json = NULL;
+                            if (copy) {
+                                cJSON_DeleteItemFromObject(copy, "_parsed_arguments");
+                                item_json = cJSON_PrintUnformatted(copy);
+                                cJSON_Delete(copy);
+                            }
+                            if (item_json) {
+                                history_add_response_item(item_json);
+                                free(item_json);
+                            }
+                        }
+                    }
+                }
+            } else {
+                // Add tool_use to history
+                history_add("assistant", input_str ? input_str : "{}",
+                            true, false, false, tool_id, tool_name);
+            }
             free(input_str);
 
             // Check if it's a user-defined tool
@@ -687,17 +768,17 @@ static void process_message(const char *user_message, message_source_t source, i
             }
 
             // Add tool_result to history
-            history_add("user", s_tool_result_buf, false, true, tool_id, NULL);
+            history_add("user", s_tool_result_buf, false, true, false, tool_id, NULL);
 
             json_free_parsed_response();
             // Continue loop to let Claude see the result
         } else {
             // Text response - we're done
             if (text_out[0] != '\0') {
-                history_add("assistant", text_out, false, false, NULL, NULL);
+                history_add("assistant", text_out, false, false, false, NULL, NULL);
                 send_response(text_out, reply_chat_id);
             } else {
-                history_add("assistant", "(No response from Claude)", false, false, NULL, NULL);
+                history_add("assistant", "(No response from Claude)", false, false, false, NULL, NULL);
                 send_response("(No response from Claude)", reply_chat_id);
             }
             json_free_parsed_response();
@@ -707,7 +788,8 @@ static void process_message(const char *user_message, message_source_t source, i
 
     if (!done) {
         ESP_LOGW(TAG, "Max tool rounds reached");
-        history_add("assistant", "(Reached max tool iterations)", false, false, NULL, NULL);
+        responses_set_previous_response_id(previous_response_id_turn_start);
+        history_add("assistant", "(Reached max tool iterations)", false, false, false, NULL, NULL);
         send_response("(Reached max tool iterations)", reply_chat_id);
         telegram_resume_polling();
         telegram_polling_paused = false;
@@ -733,6 +815,7 @@ void agent_test_reset(void)
 {
     memset(s_history, 0, sizeof(s_history));
     s_history_len = 0;
+    memset(s_responses_previous_response_id, 0, sizeof(s_responses_previous_response_id));
     memset(s_response_buf, 0, sizeof(s_response_buf));
     memset(s_tool_result_buf, 0, sizeof(s_tool_result_buf));
     s_channel_output_queue = NULL;

diff --git a/main/config.h b/main/config.h
@@ -5,7 +5,7 @@
 // Buffer Sizes
 // -----------------------------------------------------------------------------
 #define LLM_REQUEST_BUF_SIZE    12288   // 12KB for outgoing JSON
-#define LLM_RESPONSE_BUF_SIZE   16384   // 16KB for incoming JSON
+#define LLM_RESPONSE_BUF_SIZE   32768   // 32KB for incoming JSON
 #define CHANNEL_RX_BUF_SIZE     512     // Input line buffer
 #define CHANNEL_TX_BUF_SIZE     1024    // Output response buffer for serial/web relay
 #define TOOL_RESULT_BUF_SIZE    512     // Tool execution result
@@ -45,8 +45,9 @@
 typedef enum {
     LLM_BACKEND_ANTHROPIC = 0,
     LLM_BACKEND_OPENAI = 1,
-    LLM_BACKEND_OPENROUTER = 2,
-    LLM_BACKEND_OLLAMA = 3,
+    LLM_BACKEND_AZURE_OPENAI = 2,
+    LLM_BACKEND_OPENROUTER = 3,
+    LLM_BACKEND_OLLAMA = 4,
 } llm_backend_t;
 
 #define LLM_API_URL_ANTHROPIC   "https://api.anthropic.com/v1/messages"
@@ -57,6 +58,7 @@ typedef enum {
 
 #define LLM_DEFAULT_MODEL_ANTHROPIC   "claude-sonnet-4-6"
 #define LLM_DEFAULT_MODEL_OPENAI      "gpt-5.4"
+#define LLM_DEFAULT_MODEL_AZURE_OPENAI ""
 #define LLM_DEFAULT_MODEL_OPENROUTER  "openrouter/auto"
 #define LLM_DEFAULT_MODEL_OLLAMA      "qwen3:8b"