Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ https://github.com/user-attachments/assets/5dd8a80b-d9d8-4150-a4a5-7bdca75738cc
### 🛠️ Developer & Power User Tools
- **Responses API Native**: Built to use the latest OpenAI Responses API with automatic fallback to Chat Completions.
- **Request Transparency**: Peek under the hood with a live-updating payload preview before you send requests.
- **Session Request Logging (Optional)**: Save request/response attempt logs for the current session and download as JSON (media base64 is omitted; filenames are kept).
- **Provider Presets**: Easily switch between OpenAI, Gemini, OpenRouter, BytePlus, or local endpoints via `config.toml`.
- **Reasoning Stream**: View the model's "thought process" in real-time for reasoning-capable models.

Expand Down Expand Up @@ -201,5 +202,6 @@ Built with ❤️ by **Taruma Sakti** · Vibecoding with GPT-5.3-Codex






4 changes: 4 additions & 0 deletions app_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
IS_PROCESSING = "is_processing"
PENDING_ACTION = "pending_action"
LAST_ERROR = "last_error"
REQUEST_LOGGING_ENABLED = "request_logging_enabled"
REQUEST_LOGS = "request_logs"


def init_state() -> None:
Expand All @@ -33,6 +35,8 @@ def init_state() -> None:
IS_PROCESSING: False,
PENDING_ACTION: None,
LAST_ERROR: "",
REQUEST_LOGGING_ENABLED: False,
REQUEST_LOGS: [],
}
for key, value in defaults.items():
if key not in st.session_state:
Expand Down
174 changes: 151 additions & 23 deletions llm_streaming.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Tuple
from typing import Any, Callable, Tuple

import streamlit as st
from openai import OpenAI
Expand Down Expand Up @@ -145,25 +145,55 @@ def extract_response_deltas(event: Any) -> Tuple[str, str]:
return text_delta, reasoning_delta


def build_responses_request_kwargs(
model: str,
messages: list[dict[str, Any]],
reasoning_effort: str | None = None,
) -> dict[str, Any]:
request_kwargs: dict[str, Any] = {
"model": model,
"input": messages_to_responses_input(messages),
"stream": True,
}
if reasoning_effort:
request_kwargs["reasoning"] = {"effort": reasoning_effort}
return request_kwargs


def build_chat_request_kwargs(
model: str,
messages: list[dict[str, Any]],
reasoning_effort: str | None = None,
) -> dict[str, Any]:
request_kwargs: dict[str, Any] = {
"model": model,
"messages": messages,
"stream": True,
"stream_options": {"include_usage": True},
}
if reasoning_effort:
request_kwargs["reasoning_effort"] = reasoning_effort
return request_kwargs


def stream_via_responses_api(
client: OpenAI,
model: str,
messages: list[dict[str, Any]],
thought_placeholder: Any,
answer_placeholder: Any,
reasoning_effort: str | None = None,
request_kwargs: dict[str, Any] | None = None,
) -> Tuple[str, str, dict[str, int] | None]:
answer = ""
thought = ""
usage: dict[str, int] | None = None

responses_request_kwargs: dict[str, Any] = {
"model": model,
"input": messages_to_responses_input(messages),
"stream": True,
}
if reasoning_effort:
responses_request_kwargs["reasoning"] = {"effort": reasoning_effort}
responses_request_kwargs = request_kwargs or build_responses_request_kwargs(
model,
messages,
reasoning_effort=reasoning_effort,
)

stream = client.responses.create(**responses_request_kwargs)

Expand Down Expand Up @@ -195,19 +225,17 @@ def stream_via_chat_completions(
thought_placeholder: Any,
answer_placeholder: Any,
reasoning_effort: str | None = None,
request_kwargs: dict[str, Any] | None = None,
) -> Tuple[str, str, dict[str, int] | None]:
answer = ""
thought = ""
usage: dict[str, int] | None = None

request_kwargs: dict[str, Any] = {
"model": model,
"messages": messages,
"stream": True,
"stream_options": {"include_usage": True},
}
if reasoning_effort:
request_kwargs["reasoning_effort"] = reasoning_effort
request_kwargs = request_kwargs or build_chat_request_kwargs(
model,
messages,
reasoning_effort=reasoning_effort,
)

stream = client.chat.completions.create(**request_kwargs)

Expand Down Expand Up @@ -240,19 +268,60 @@ def stream_response(
answer_placeholder: Any,
reasoning_effort: str | None = None,
prefer_responses_api: bool = True,
attempt_logger: Callable[[dict[str, Any]], None] | None = None,
) -> Tuple[str, str, dict[str, int] | None, bool]:
if not prefer_responses_api:
answer, thought, usage = stream_via_chat_completions(
client,
chat_request_kwargs = build_chat_request_kwargs(
model,
messages,
thought_placeholder,
answer_placeholder,
reasoning_effort=reasoning_effort,
)
st.caption("Transport: Chat Completions (Responses API disabled for this session)")
return answer, thought, usage, False

try:
answer, thought, usage = stream_via_chat_completions(
client,
model,
messages,
thought_placeholder,
answer_placeholder,
reasoning_effort=reasoning_effort,
request_kwargs=chat_request_kwargs,
)
if attempt_logger:
attempt_logger(
{
"transport": "chat_completions",
"request": chat_request_kwargs,
"response": {
"answer": answer,
"reasoning": thought,
"usage": usage,
"error": None,
},
}
)
st.caption("Transport: Chat Completions (Responses API disabled for this session)")
return answer, thought, usage, False
except Exception as chat_exc:
if attempt_logger:
attempt_logger(
{
"transport": "chat_completions",
"request": chat_request_kwargs,
"response": {
"answer": "",
"reasoning": "",
"usage": None,
"error": format_exception(chat_exc),
},
}
)
raise

responses_request_kwargs = build_responses_request_kwargs(
model,
messages,
reasoning_effort=reasoning_effort,
)
try:
answer, thought, usage = stream_via_responses_api(
client,
Expand All @@ -261,10 +330,37 @@ def stream_response(
thought_placeholder,
answer_placeholder,
reasoning_effort=reasoning_effort,
request_kwargs=responses_request_kwargs,
)
if attempt_logger:
attempt_logger(
{
"transport": "responses_api",
"request": responses_request_kwargs,
"response": {
"answer": answer,
"reasoning": thought,
"usage": usage,
"error": None,
},
}
)
st.caption("Transport: Responses API")
return answer, thought, usage, True
except Exception as responses_exc:
if attempt_logger:
attempt_logger(
{
"transport": "responses_api",
"request": responses_request_kwargs,
"response": {
"answer": "",
"reasoning": "",
"usage": None,
"error": format_exception(responses_exc),
},
}
)
thought_placeholder.empty()
answer_placeholder.empty()
st.caption("Responses API failed on this endpoint/model. Falling back to Chat Completions.")
Expand All @@ -273,6 +369,11 @@ def stream_response(
if not next_prefer_responses_api:
st.caption("Responses API auto-disabled for this session due to provider schema mismatch.")

chat_request_kwargs = build_chat_request_kwargs(
model,
messages,
reasoning_effort=reasoning_effort,
)
try:
answer, thought, usage = stream_via_chat_completions(
client,
Expand All @@ -281,10 +382,37 @@ def stream_response(
thought_placeholder,
answer_placeholder,
reasoning_effort=reasoning_effort,
request_kwargs=chat_request_kwargs,
)
if attempt_logger:
attempt_logger(
{
"transport": "chat_completions",
"request": chat_request_kwargs,
"response": {
"answer": answer,
"reasoning": thought,
"usage": usage,
"error": None,
},
}
)
st.caption("Transport: Chat Completions fallback")
return answer, thought, usage, next_prefer_responses_api
except Exception as fallback_exc:
if attempt_logger:
attempt_logger(
{
"transport": "chat_completions",
"request": chat_request_kwargs,
"response": {
"answer": "",
"reasoning": "",
"usage": None,
"error": format_exception(fallback_exc),
},
}
)
thought_placeholder.empty()
answer_placeholder.empty()
st.error("Both Responses API and Chat Completions fallback failed.")
Expand Down
Loading
Loading