-
Notifications
You must be signed in to change notification settings - Fork 7
Feat/support hybrid model selection rebase #80
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
18 commits
Select commit
Hold shift + click to select a range
b1aa7d7
feat(hybrid): add Hybrid provider option to ProviderDropdown (UI scaf…
NeonRyan 19b85fb
added per-user llm backend selection with uniform (all same provider)…
NeonCharlie-24 3cc75f7
Enhance ChatPage with hybrid LLM configuration support
NeonRyan ee2e562
added filter for avaliable backends with health status check set to p…
NeonCharlie-24 221950c
added unit tests for the available backends filter and health check.
NeonCharlie-24 b5a1c6e
Pending changes to build backend the menus will be moved to the welco…
NeonRyan 79c0911
fixed bootstrap.py import causing test_available_backends failure.
NeonCharlie-24 d3d8612
added conftest.py to simplify mock module imports and unit tests for …
NeonCharlie-24 3acdc87
restored needs_clarification_improved function lost during rebase.
NeonCharlie-24 104c471
Enhance SettingsModal with user profile and account management features
NeonRyan 9f2b111
fix stubbing issue in conftest.py from rebase.
NeonCharlie-24 6895b06
fix backend config values to lock brainforge models on frontend and p…
NeonCharlie-24 5b34135
added admin-level enabled toggle to each provider and set default bac…
NeonCharlie-24 a2a5293
replaced frontend hardcoded gemini fallback with dynamic defaults.
NeonCharlie-24 377a738
fallback to default_backend when hybrid mode has no overrides.
NeonCharlie-24 d223e2a
add test case for gemini missing.
NeonCharlie-24 342988e
added configurable default_backend parameter to config.yaml.
NeonCharlie-24 b5b4769
Fixed the black on black text and added the default option
NeonRyan File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,108 +1,72 @@ | ||
| from fastapi import APIRouter, Body, HTTPException | ||
| from app.config import get_settings | ||
| from app.llm.improved_gemini_client import ImprovedGeminiClient | ||
| from app.llm.improved_ollama_client import ImprovedOllamaClient | ||
| from app.llm.improved_vllm_client import ImprovedVllmClient | ||
| from app.models.default_personas import get_default_personas | ||
| from app.core.bootstrap import chat_orchestrator, llm, current_provider, available_providers | ||
| from app.core.brainforge_sync import BRAINFORGE_PERSONA_PREFIX | ||
| from pydantic import BaseModel | ||
| import os | ||
| from fastapi import APIRouter, Depends, HTTPException, status | ||
| from app.core.auth import get_current_active_user | ||
| from app.core.bootstrap import ( | ||
| chat_orchestrator, get_llm_client, AVAILABLE_BACKENDS, _is_backend_enabled, | ||
| ) | ||
| from app.core.database import get_database | ||
| from app.models.user import User, UserLLMConfig | ||
| import logging | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
| router = APIRouter() | ||
|
|
||
| def create_llm_client(provider: str = None): | ||
| global current_provider | ||
| if provider is None: | ||
| provider = current_provider | ||
|
|
||
| if provider == "gemini": | ||
| try: | ||
| return ImprovedGeminiClient(model_name=os.getenv("GEMINI_MODEL")) | ||
| except ValueError as e: | ||
| logger.warning(f"Gemini API key not found, falling back to Ollama: {e}") | ||
| return ImprovedOllamaClient(model_name="llama3.2:1b") | ||
| elif provider == "ollama": | ||
| return ImprovedOllamaClient(model_name="llama3.2:1b") | ||
| elif provider == "vllm": | ||
| settings = get_settings() | ||
| if not settings.llm.vllm.api_url: | ||
| raise ValueError("No vLLM endpoint configured. Set llm.vllm.api_url in your config.") | ||
| return ImprovedVllmClient( | ||
| api_url=settings.llm.vllm.api_url, | ||
| api_key=settings.llm.vllm.api_key, | ||
| ) | ||
| else: | ||
| raise ValueError(f"Unknown provider: {provider}") | ||
|
|
||
| # Initialize LLM and personas | ||
| llm = create_llm_client(current_provider) | ||
| DEFAULT_PERSONAS = get_default_personas(llm) | ||
| for persona in DEFAULT_PERSONAS: | ||
| chat_orchestrator.register_persona(persona) | ||
|
|
||
| class ProviderSwitch(BaseModel): | ||
| provider: str | ||
|
|
||
| @router.get("/current-provider") | ||
| async def get_current_provider(): | ||
| async def get_current_provider( | ||
| current_user: User = Depends(get_current_active_user), | ||
| ): | ||
| """Return the authenticated user's LLM configuration.""" | ||
| config = current_user.llm_config or UserLLMConfig() | ||
| return { | ||
| "current_provider": current_provider, | ||
| "available_providers": available_providers, | ||
| "model_info": { | ||
| "name": llm.model_name if hasattr(llm, 'model_name') else "gemini-2.0-flash", | ||
| "provider": current_provider | ||
| } | ||
| "llm_config": config.model_dump(), | ||
| "available_backends": AVAILABLE_BACKENDS, | ||
| } | ||
|
|
||
| @router.post("/switch-provider") | ||
| async def switch_provider(provider_data: ProviderSwitch): | ||
| global current_provider, llm | ||
|
|
||
| if provider_data.provider not in available_providers: | ||
| raise HTTPException(status_code=400, detail=f"Unknown provider: {provider_data.provider}. Available: {available_providers}") | ||
|
|
||
| try: | ||
| current_provider = provider_data.provider | ||
| new_llm = create_llm_client(current_provider) | ||
| llm = new_llm | ||
|
|
||
| chat_orchestrator.llm_client = new_llm | ||
|
|
||
| new_personas = get_default_personas(new_llm) | ||
| # Clear only non-BrainForge personas; BF advisors have their own LLM clients | ||
| non_bf_ids = [pid for pid in chat_orchestrator.personas if not pid.startswith(f"{BRAINFORGE_PERSONA_PREFIX}_")] | ||
| for pid in non_bf_ids: | ||
| chat_orchestrator.unregister_persona(pid) | ||
| for persona in new_personas: | ||
| chat_orchestrator.register_persona(persona) | ||
|
|
||
| return { | ||
| "message": f"Successfully switched to {current_provider}", | ||
| "current_provider": current_provider, | ||
| "model_info": { | ||
| "name": new_llm.model_name if hasattr(new_llm, 'model_name') else "gemini-2.0-flash", | ||
| "provider": current_provider | ||
| } | ||
| } | ||
|
|
||
| except Exception as e: | ||
| raise HTTPException(status_code=500, detail=f"Failed to switch to {provider_data.provider}: {str(e)}") | ||
|
|
||
| @router.post("/switch-model") | ||
| async def switch_model(model_name: str = Body(...)): | ||
| if "gemini" in model_name.lower(): | ||
| return await switch_provider(ProviderSwitch(provider="gemini")) | ||
| else: | ||
| return await switch_provider(ProviderSwitch(provider="ollama")) | ||
| @router.post("/switch-provider") | ||
| async def switch_provider( | ||
| llm_config: UserLLMConfig, | ||
|
NeonDaniel marked this conversation as resolved.
|
||
| current_user: User = Depends(get_current_active_user), | ||
| ): | ||
| """Persist the user's LLM configuration to their profile.""" | ||
| if llm_config.mode == "hybrid" and llm_config.persona_backends: | ||
| registered = set(chat_orchestrator.personas.keys()) | ||
| unknown = set(llm_config.persona_backends.keys()) - registered | ||
| if unknown: | ||
| raise HTTPException( | ||
| status_code=status.HTTP_400_BAD_REQUEST, | ||
| detail=f"Unknown persona IDs: {sorted(unknown)}. " | ||
| f"Valid IDs: {sorted(registered)}", | ||
| ) | ||
|
|
||
| backends_to_check = {llm_config.default_backend} | ||
| if llm_config.orchestrator_backend: | ||
| backends_to_check.add(llm_config.orchestrator_backend) | ||
| if llm_config.persona_backends: | ||
| backends_to_check.update(llm_config.persona_backends.values()) | ||
|
|
||
| for backend in backends_to_check: | ||
| if not _is_backend_enabled(backend): | ||
| raise HTTPException( | ||
| status_code=status.HTTP_400_BAD_REQUEST, | ||
| detail=f"Backend {backend!r} is disabled by the administrator.", | ||
| ) | ||
| try: | ||
| get_llm_client(backend) | ||
| except Exception as exc: | ||
| raise HTTPException( | ||
| status_code=status.HTTP_400_BAD_REQUEST, | ||
| detail=f"Backend {backend!r} is not configured: {exc}", | ||
| ) | ||
|
|
||
| db = get_database() | ||
| await db.users.update_one( | ||
| {"_id": current_user.id}, | ||
| {"$set": {"llm_config": llm_config.model_dump()}}, | ||
| ) | ||
|
|
||
| @router.get("/current-model") | ||
| async def get_current_model(): | ||
| model_name = llm.model_name if hasattr(llm, 'model_name') else "gemini-2.0-flash" | ||
| return { | ||
| "model": model_name, | ||
| "provider": current_provider | ||
| "message": "LLM configuration updated", | ||
| "llm_config": llm_config.model_dump(), | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.