Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
b1aa7d7
feat(hybrid): add Hybrid provider option to ProviderDropdown (UI scaf…
NeonRyan May 4, 2026
19b85fb
added per-user llm backend selection with uniform (all same provider)…
NeonCharlie-24 May 5, 2026
3cc75f7
Enhance ChatPage with hybrid LLM configuration support
NeonRyan May 5, 2026
ee2e562
added filter for avaliable backends with health status check set to p…
NeonCharlie-24 May 6, 2026
221950c
added unit tests for the available backends filter and health check.
NeonCharlie-24 May 6, 2026
b5a1c6e
Pending changes to build backend the menus will be moved to the welco…
NeonRyan May 6, 2026
79c0911
fixed bootstrap.py import causing test_available_backends failure.
NeonCharlie-24 May 6, 2026
d3d8612
added conftest.py to simplify mock module imports and unit tests for …
NeonCharlie-24 May 6, 2026
3acdc87
restored needs_clarification_improved function lost during rebase.
NeonCharlie-24 May 12, 2026
104c471
Enhance SettingsModal with user profile and account management features
NeonRyan May 21, 2026
9f2b111
fix stubbing issue in conftest.py from rebase.
NeonCharlie-24 Jun 2, 2026
6895b06
fix backend config values to lock brainforge models on frontend and p…
NeonCharlie-24 Jun 2, 2026
5b34135
added admin-level enabled toggle to each provider and set default bac…
NeonCharlie-24 Jun 2, 2026
a2a5293
replaced frontend hardcoded gemini fallback with dynamic defaults.
NeonCharlie-24 Jun 2, 2026
377a738
fallback to default_backend when hybrid mode has no overrides.
NeonCharlie-24 Jun 2, 2026
d223e2a
add test case for gemini missing.
NeonCharlie-24 Jun 2, 2026
342988e
added configurable default_backend parameter to config.yaml.
NeonCharlie-24 Jun 3, 2026
b5b4769
Fixed the black on black text and added the default option
NeonRyan Jun 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 77 additions & 9 deletions multi_llm_chatbot_backend/app/api/routes/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from app.api.utils import get_or_create_session_for_request_async
from app.core.auth import get_current_active_user
from app.config import get_settings
from app.core.bootstrap import chat_orchestrator
from app.core.bootstrap import chat_orchestrator, get_llm_client
from app.core.database import get_database
from app.core.persona_filter import get_available_persona_ids
from app.core.session_manager import get_session_manager
Expand All @@ -24,6 +24,41 @@
router = APIRouter()
session_manager = get_session_manager()


def resolve_llm_clients(user: User) -> Dict[str, Any]:
"""Resolve LLM clients from a user's stored configuration.

Returns ``{"orchestrator": LLMClient | None, "personas": {id: LLMClient} | None}``.

- No saved config: both values are ``None``; callers fall back to
orchestrator/persona defaults.
- Uniform mode: the same cached client is returned for the orchestrator
and every persona.
- Hybrid mode: the orchestrator and each persona may receive different
clients based on the user's per-persona mapping.
"""
config = user.llm_config
if config is None:
return {"orchestrator": None, "personas": None}

if config.mode == "uniform":
client = get_llm_client(config.default_backend)
persona_clients = {
pid: client for pid in chat_orchestrator.personas
}
return {"orchestrator": client, "personas": persona_clients}

# Hybrid mode
orchestrator_backend = config.orchestrator_backend or config.default_backend
orchestrator_client = get_llm_client(orchestrator_backend)

persona_clients = {}
for pid in chat_orchestrator.personas:
backend = (config.persona_backends or {}).get(pid, config.default_backend)
persona_clients[pid] = get_llm_client(backend)

return {"orchestrator": orchestrator_client, "personas": persona_clients}

# Enhanced data models
class UserInput(BaseModel):
user_input: str
Expand Down Expand Up @@ -81,6 +116,11 @@ async def chat_stream(

async def _event_generator():
try:
# Resolve per-user LLM clients from their stored config
llm_clients = resolve_llm_clients(current_user)
orchestrator_llm = llm_clients["orchestrator"]
persona_llms = llm_clients["personas"]

# Load or create the in-memory session
if message.chat_session_id:
sid = f"chat_{message.chat_session_id}"
Expand All @@ -107,7 +147,9 @@ async def _event_generator():
).to_ndjson()

if await chat_orchestrator.needs_clarification_improved(session, message.user_input):
clar = await chat_orchestrator.generate_contextual_clarification(message.user_input)
clar = await chat_orchestrator.generate_contextual_clarification(
message.user_input, llm_client=orchestrator_llm,
)
yield ChatStreamLine(
type="clarification",
data={
Expand All @@ -123,7 +165,9 @@ async def _event_generator():

# If an enabled tool can handle this query, return its response
# directly and skip persona generation.
tool_result = await chat_orchestrator.get_tool_response(message.user_input)
tool_result = await chat_orchestrator.get_tool_response(
message.user_input, llm_client=orchestrator_llm,
)
if tool_result.used_tool:
# Append user message to in-memory session and persist to MongoDB
session.append_message("orchestrator", tool_result.text)
Expand Down Expand Up @@ -164,6 +208,7 @@ async def _event_generator():
top_personas = await chat_orchestrator.get_top_personas(
session_id=sid,
allowed_ids=available,
llm_client=orchestrator_llm,
)

# Guard against race condition where all selected advisors
Expand Down Expand Up @@ -210,9 +255,11 @@ async def _run(pid: str) -> None:
"document_chunks_used": 0,
})
return
persona_llm = (persona_llms or {}).get(pid)
result = await chat_orchestrator.generate_single_persona_response(
session, persona,
message.response_length or "medium",
llm_client=persona_llm,
)
session.append_message(pid, result["response"])
await done_queue.put(result)
Expand Down Expand Up @@ -390,7 +437,10 @@ async def create_new_chat(
raise HTTPException(status_code=500, detail="Failed to create new chat")

@router.post("/chat/{persona_id}")
async def chat_with_specific_advisor(persona_id: str, input: UserInput, request: Request):
async def chat_with_specific_advisor(
persona_id: str, input: UserInput, request: Request,
current_user: User = Depends(get_current_active_user),
):
"""Chat with a specific advisor - UPDATED"""
try:
if persona_id not in chat_orchestrator.personas:
Expand All @@ -408,11 +458,15 @@ async def chat_with_specific_advisor(persona_id: str, input: UserInput, request:
isExpandRequest=True,
),
)

llm_clients = resolve_llm_clients(current_user)
persona_llm = (llm_clients["personas"] or {}).get(persona_id)

result = await chat_orchestrator.chat_with_persona(
user_input=input.user_input,
persona_id=persona_id,
session_id=session_id
session_id=session_id,
llm_client=persona_llm,
)

# Handle response structure
Expand Down Expand Up @@ -479,7 +533,10 @@ async def chat_with_specific_advisor(persona_id: str, input: UserInput, request:
}

@router.post("/reply-to-advisor")
async def reply_to_advisor(reply: ReplyToAdvisor, request: Request):
async def reply_to_advisor(
reply: ReplyToAdvisor, request: Request,
current_user: User = Depends(get_current_active_user),
):
"""Reply to a specific advisor with proper context - UPDATED"""
try:
if reply.advisor_id not in chat_orchestrator.personas:
Expand Down Expand Up @@ -520,10 +577,14 @@ async def reply_to_advisor(reply: ReplyToAdvisor, request: Request):
if original_message:
contextual_input = f"[Replying to your previous message: '{original_message[:100]}...'] {reply.user_input}"

llm_clients = resolve_llm_clients(current_user)
advisor_llm = (llm_clients["personas"] or {}).get(reply.advisor_id)

result = await chat_orchestrator.chat_with_persona(
user_input=contextual_input,
persona_id=reply.advisor_id,
session_id=session_id
session_id=session_id,
llm_client=advisor_llm,
)

# Handle response structure
Expand Down Expand Up @@ -600,15 +661,22 @@ async def reply_to_advisor(reply: ReplyToAdvisor, request: Request):
}

@router.post("/ask/")
async def ask_question(query: PersonaQuery, request: Request):
async def ask_question(
query: PersonaQuery, request: Request,
current_user: User = Depends(get_current_active_user),
):
"""Ask question - UPDATED"""
try:
session_id = await get_or_create_session_for_request_async(request)

llm_clients = resolve_llm_clients(current_user)
persona_llm = (llm_clients["personas"] or {}).get(query.persona)

result = await chat_orchestrator.chat_with_persona(
user_input=query.question,
persona_id=query.persona,
session_id=session_id
session_id=session_id,
llm_client=persona_llm,
)

if result["type"] == "single_persona_response":
Expand Down
150 changes: 57 additions & 93 deletions multi_llm_chatbot_backend/app/api/routes/provider.py
Original file line number Diff line number Diff line change
@@ -1,108 +1,72 @@
from fastapi import APIRouter, Body, HTTPException
from app.config import get_settings
from app.llm.improved_gemini_client import ImprovedGeminiClient
from app.llm.improved_ollama_client import ImprovedOllamaClient
from app.llm.improved_vllm_client import ImprovedVllmClient
from app.models.default_personas import get_default_personas
from app.core.bootstrap import chat_orchestrator, llm, current_provider, available_providers
from app.core.brainforge_sync import BRAINFORGE_PERSONA_PREFIX
from pydantic import BaseModel
import os
from fastapi import APIRouter, Depends, HTTPException, status
from app.core.auth import get_current_active_user
from app.core.bootstrap import (
chat_orchestrator, get_llm_client, AVAILABLE_BACKENDS, _is_backend_enabled,
)
from app.core.database import get_database
from app.models.user import User, UserLLMConfig
import logging

logger = logging.getLogger(__name__)

router = APIRouter()

def create_llm_client(provider: str = None):
global current_provider
if provider is None:
provider = current_provider

if provider == "gemini":
try:
return ImprovedGeminiClient(model_name=os.getenv("GEMINI_MODEL"))
except ValueError as e:
logger.warning(f"Gemini API key not found, falling back to Ollama: {e}")
return ImprovedOllamaClient(model_name="llama3.2:1b")
elif provider == "ollama":
return ImprovedOllamaClient(model_name="llama3.2:1b")
elif provider == "vllm":
settings = get_settings()
if not settings.llm.vllm.api_url:
raise ValueError("No vLLM endpoint configured. Set llm.vllm.api_url in your config.")
return ImprovedVllmClient(
api_url=settings.llm.vllm.api_url,
api_key=settings.llm.vllm.api_key,
)
else:
raise ValueError(f"Unknown provider: {provider}")

# Initialize LLM and personas
llm = create_llm_client(current_provider)
DEFAULT_PERSONAS = get_default_personas(llm)
for persona in DEFAULT_PERSONAS:
chat_orchestrator.register_persona(persona)

class ProviderSwitch(BaseModel):
provider: str
Comment thread
NeonDaniel marked this conversation as resolved.

@router.get("/current-provider")
async def get_current_provider():
async def get_current_provider(
current_user: User = Depends(get_current_active_user),
):
"""Return the authenticated user's LLM configuration."""
config = current_user.llm_config or UserLLMConfig()
return {
"current_provider": current_provider,
"available_providers": available_providers,
"model_info": {
"name": llm.model_name if hasattr(llm, 'model_name') else "gemini-2.0-flash",
"provider": current_provider
}
"llm_config": config.model_dump(),
"available_backends": AVAILABLE_BACKENDS,
}

@router.post("/switch-provider")
async def switch_provider(provider_data: ProviderSwitch):
global current_provider, llm

if provider_data.provider not in available_providers:
raise HTTPException(status_code=400, detail=f"Unknown provider: {provider_data.provider}. Available: {available_providers}")

try:
current_provider = provider_data.provider
new_llm = create_llm_client(current_provider)
llm = new_llm

chat_orchestrator.llm_client = new_llm

new_personas = get_default_personas(new_llm)
# Clear only non-BrainForge personas; BF advisors have their own LLM clients
non_bf_ids = [pid for pid in chat_orchestrator.personas if not pid.startswith(f"{BRAINFORGE_PERSONA_PREFIX}_")]
for pid in non_bf_ids:
chat_orchestrator.unregister_persona(pid)
for persona in new_personas:
chat_orchestrator.register_persona(persona)

return {
"message": f"Successfully switched to {current_provider}",
"current_provider": current_provider,
"model_info": {
"name": new_llm.model_name if hasattr(new_llm, 'model_name') else "gemini-2.0-flash",
"provider": current_provider
}
}

except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to switch to {provider_data.provider}: {str(e)}")

@router.post("/switch-model")
async def switch_model(model_name: str = Body(...)):
if "gemini" in model_name.lower():
return await switch_provider(ProviderSwitch(provider="gemini"))
else:
return await switch_provider(ProviderSwitch(provider="ollama"))
@router.post("/switch-provider")
async def switch_provider(
llm_config: UserLLMConfig,
Comment thread
NeonDaniel marked this conversation as resolved.
current_user: User = Depends(get_current_active_user),
):
"""Persist the user's LLM configuration to their profile."""
if llm_config.mode == "hybrid" and llm_config.persona_backends:
registered = set(chat_orchestrator.personas.keys())
unknown = set(llm_config.persona_backends.keys()) - registered
if unknown:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Unknown persona IDs: {sorted(unknown)}. "
f"Valid IDs: {sorted(registered)}",
)

backends_to_check = {llm_config.default_backend}
if llm_config.orchestrator_backend:
backends_to_check.add(llm_config.orchestrator_backend)
if llm_config.persona_backends:
backends_to_check.update(llm_config.persona_backends.values())

for backend in backends_to_check:
if not _is_backend_enabled(backend):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Backend {backend!r} is disabled by the administrator.",
)
try:
get_llm_client(backend)
except Exception as exc:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Backend {backend!r} is not configured: {exc}",
)

db = get_database()
await db.users.update_one(
{"_id": current_user.id},
{"$set": {"llm_config": llm_config.model_dump()}},
)

@router.get("/current-model")
async def get_current_model():
model_name = llm.model_name if hasattr(llm, 'model_name') else "gemini-2.0-flash"
return {
"model": model_name,
"provider": current_provider
"message": "LLM configuration updated",
"llm_config": llm_config.model_dump(),
}
5 changes: 5 additions & 0 deletions multi_llm_chatbot_backend/app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ def _warn_connection_envvar(self):


class GeminiConfig(BaseModel):
enabled: bool = True
api_key: str = Field(default=os.getenv("GEMINI_API_KEY"))
model: str = "gemini-2.5-flash"

Expand All @@ -272,12 +273,14 @@ def _warn_gemini_envvar(self):


class OllamaConfig(BaseModel):
enabled: bool = True
model: str = "llama3.2:1b"
# TODO: Drop support for `OLLAMA_BASE_URL` envvar handling
base_url: str = Field(default=os.getenv("OLLAMA_BASE_URL", "http://localhost:11434"))


class VllmConfig(BaseModel):
enabled: bool = True
api_url: str = ""
api_key: str = Field(default=os.getenv("VLLM_API_KEY", ""))

Expand All @@ -290,10 +293,12 @@ class BrainForgeConfig(BaseModel):


class LLMConfig(BaseModel):
default_backend: str = ""
gemini: GeminiConfig = GeminiConfig()
ollama: OllamaConfig = OllamaConfig()
vllm: VllmConfig = VllmConfig()
brainforge: BrainForgeConfig = BrainForgeConfig()
health_check_interval_seconds: int = 300


class RAGConfig(BaseModel):
Expand Down
Loading
Loading