feat: update OpenRouter model and fallback options; enhance Streamlit UI with model selection and testing features

VTvito · VTvito · commit eb2983360960 · 2026-03-05T16:19:47.000+01:00
diff --git a/.env.example b/.env.example
@@ -18,4 +18,6 @@ OPENAI_MODEL=gpt-4o-mini
 
 # ── OpenRouter (free alternative — https://openrouter.ai/keys) ──
 OPENROUTER_API_KEY=
-OPENROUTER_MODEL=meta-llama/llama-3.1-8b-instruct:free
+OPENROUTER_MODEL=stepfun/step-3.5-flash:free
+# Optional comma-separated fallback models (used when selected model has no endpoint)
+OPENROUTER_FALLBACK_MODELS=arcee-ai/trinity-large-preview:free,qwen/qwen3-next-80b-a3b-instruct:free,openai/gpt-oss-120b:free
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
@@ -368,7 +368,7 @@ pipeline:
 ### LLM Provider Configuration
 
 - `LLM_PROVIDER=openai` → uses OpenAI API (requires `OPENAI_API_KEY`)
-- `LLM_PROVIDER=openrouter` → uses OpenRouter API gateway (requires `OPENROUTER_API_KEY`). Supports 200+ models including free ones (e.g., `meta-llama/llama-3.1-8b-instruct:free`). OpenAI-compatible API via `openai` Python package with custom `base_url`. Get a free key at https://openrouter.ai/keys.
+- `LLM_PROVIDER=openrouter` → uses OpenRouter API gateway (requires `OPENROUTER_API_KEY`). Supports 200+ models including free ones (e.g., `stepfun/step-3.5-flash:free`). OpenAI-compatible API via `openai` Python package with custom `base_url`. Get a free key at https://openrouter.ai/keys.
 - `LLM_PROVIDER=local` → uses the local HuggingFace text-completion-llm-service
 - Factory: `create_llm_provider(provider=None)` reads env var if not specified
 
@@ -464,7 +464,7 @@ Single bridge network `etl-network`. Services reference each other by container
 | `OPENAI_API_KEY` | — | OpenAI API key |
 | `OPENAI_MODEL` | `gpt-4o-mini` | OpenAI model name |
 | `OPENROUTER_API_KEY` | — | OpenRouter API key (free at https://openrouter.ai/keys) |
-| `OPENROUTER_MODEL` | `meta-llama/llama-3.1-8b-instruct:free` | OpenRouter model identifier |
+| `OPENROUTER_MODEL` | `stepfun/step-3.5-flash:free` | OpenRouter model identifier |
 
 ---
 
diff --git a/README.md b/README.md
@@ -68,6 +68,8 @@ After execution, switch to the **Datasets** tab to browse output files, preview
 
 ### New in Streamlit UX
 
+- **Dual prompt modes** in Pipeline Editor: `Guided` textarea and `Chat-style` conversational input
+- **OpenRouter model semaphore** in sidebar: one-click model reachability check before generation
 - **Platform Readiness** panel in Execution tab: live checks for Airflow, Streamlit, Prometheus, Grafana, including Airflow scheduler heartbeat status
 - **Quick Airflow Triggers** in Execution tab: trigger `hr_analytics_pipeline`, `ecommerce_pipeline`, or `weather_api_pipeline` without leaving Streamlit
 - **Execution insights**: successful steps, processed data volume, slowest step, and orchestration overhead (%)
@@ -91,7 +93,7 @@ Pipeline Compiler → executes steps in parallel via Preparator SDK
 Output: cleaned dataset saved in the requested format
 ```
 
-The AI agent supports both **OpenAI** (GPT-4o-mini) and **local HuggingFace** models. The YAML editor and validator work without any API key.
+The AI agent supports **OpenAI**, **OpenRouter**, and **local HuggingFace** models. The YAML editor and validator work without any API key.
 
 ---
 
@@ -297,9 +299,13 @@ Full walkthrough: [docs/extending.md](docs/extending.md)
 
 | Variable | Default | Description |
 |---|---|---|
-| `LLM_PROVIDER` | `openai` | AI agent provider (`openai` or `local`) |
+| `LLM_PROVIDER` | `openai` | AI agent provider (`openai`, `openrouter`, or `local`) |
 | `OPENAI_API_KEY` | &mdash; | Required if `LLM_PROVIDER=openai` |
 | `OPENAI_MODEL` | `gpt-4o-mini` | OpenAI model |
+| `OPENROUTER_API_KEY` | &mdash; | Required if `LLM_PROVIDER=openrouter` |
+| `OPENROUTER_MODEL` | `stepfun/step-3.5-flash:free` | Default OpenRouter model |
+| `OPENROUTER_FALLBACK_MODELS` | `arcee-ai/trinity-large-preview:free,...` | Comma-separated fallback models if selected model is unavailable |
+| `LOCAL_LLM_URL` | `http://localhost:5012` | Local text-completion service URL when running Streamlit on host |
 | `ETL_DATA_ROOT` | `/app/data` | Base directory for datasets and metadata |
 | `ALLOW_PRIVATE_API_URLS` | `false` | Allow private/local API targets in extract-api |
 
@@ -314,7 +320,7 @@ See [`.env.example`](.env.example) for all available variables including databas
 | Microservices | Python 3.9, Flask, Gunicorn |
 | Data Format | Apache Arrow IPC (streaming) |
 | Orchestration | Apache Airflow |
-| AI Agent | OpenAI / HuggingFace Transformers |
+| AI Agent | OpenAI / OpenRouter / HuggingFace Transformers |
 | UI | Streamlit |
 | Containers | Docker, Docker Compose (PostgreSQL 16, Airflow 2.10.4) |
 | Monitoring | Prometheus + Grafana |
diff --git a/ai_agent/llm_provider.py b/ai_agent/llm_provider.py
@@ -18,6 +18,25 @@
 
 logger = logging.getLogger("ai_agent.llm_provider")
 
+DEFAULT_OPENROUTER_MODEL = "stepfun/step-3.5-flash:free"
+DEFAULT_OPENROUTER_FALLBACK_MODELS = [
+    "arcee-ai/trinity-large-preview:free",
+    "qwen/qwen3-next-80b-a3b-instruct:free",
+    "openai/gpt-oss-120b:free",
+    "openai/gpt-4o-mini",
+]
+
+
+def _default_local_llm_url() -> str:
+    """Choose sensible local LLM URL based on runtime context.
+
+    In Docker, services reach each other via container DNS.
+    On host runs (e.g., Streamlit launched from a venv), localhost is expected.
+    """
+    if os.path.exists("/.dockerenv"):
+        return "http://text-completion-llm-service:5012"
+    return "http://localhost:5012"
+
 
 class LLMProvider(ABC):
     """Abstract base class for LLM providers."""
@@ -92,7 +111,7 @@ def __init__(self, model: str = None, api_key: str = None):
         except ImportError:
             raise ImportError("openai package not installed. Run: pip install openai")
 
-        self.model = model or os.getenv("OPENROUTER_MODEL", "meta-llama/llama-3.1-8b-instruct:free")
+        self.model = model or os.getenv("OPENROUTER_MODEL", DEFAULT_OPENROUTER_MODEL)
         api_key = api_key or os.getenv("OPENROUTER_API_KEY")
         if not api_key:
             raise ValueError(
@@ -108,21 +127,61 @@ def __init__(self, model: str = None, api_key: str = None):
                 "X-Title": "ArrowFlow ETL Platform",
             },
         )
+
+        raw_fallback = os.getenv("OPENROUTER_FALLBACK_MODELS", "")
+        configured_fallbacks = [m.strip() for m in raw_fallback.split(",") if m.strip()]
+        self.fallback_models = configured_fallbacks or DEFAULT_OPENROUTER_FALLBACK_MODELS
         logger.info(f"OpenRouter provider initialized with model: {self.model}")
 
+    @staticmethod
+    def _is_model_unavailable_error(exc: Exception) -> bool:
+        msg = str(exc)
+        return "No endpoints found for" in msg or "model not found" in msg.lower()
+
     def generate(self, prompt: str, system_prompt: str = "", temperature: float = 0.3, max_tokens: int = 2048) -> str:
         messages = []
         if system_prompt:
             messages.append({"role": "system", "content": system_prompt})
         messages.append({"role": "user", "content": prompt})
 
-        response = self.client.chat.completions.create(
-            model=self.model,
-            messages=messages,
-            temperature=temperature,
-            max_tokens=max_tokens,
-        )
-        return response.choices[0].message.content
+        def _call_chat(model_name: str) -> str:
+            response = self.client.chat.completions.create(
+                model=model_name,
+                messages=messages,
+                temperature=temperature,
+                max_tokens=max_tokens,
+            )
+            return response.choices[0].message.content
+
+        try:
+            return _call_chat(self.model)
+        except Exception as exc:
+            if not self._is_model_unavailable_error(exc):
+                raise
+
+            tried = [self.model]
+            for candidate in self.fallback_models:
+                if candidate == self.model:
+                    continue
+                tried.append(candidate)
+                try:
+                    logger.warning(
+                        "OpenRouter model '%s' unavailable, retrying with fallback '%s'",
+                        self.model,
+                        candidate,
+                    )
+                    content = _call_chat(candidate)
+                    self.model = candidate
+                    return content
+                except Exception as fallback_exc:
+                    if not self._is_model_unavailable_error(fallback_exc):
+                        raise
+
+            raise ValueError(
+                "OpenRouter model unavailable. Tried: "
+                + ", ".join(tried)
+                + ". Select another model in Streamlit or set OPENROUTER_FALLBACK_MODELS."
+            ) from exc
 
     def name(self) -> str:
         return f"OpenRouter ({self.model})"
@@ -134,9 +193,7 @@ class LocalProvider(LLMProvider):
     def __init__(self, service_url: str = None):
         import requests
         self.session = requests.Session()
-        self.service_url = service_url or os.getenv(
-            "LOCAL_LLM_URL", "http://text-completion-llm-service:5012"
-        )
+        self.service_url = service_url or os.getenv("LOCAL_LLM_URL") or _default_local_llm_url()
         logger.info(f"Local LLM provider initialized with URL: {self.service_url}")
 
     def generate(self, prompt: str, system_prompt: str = "", temperature: float = 0.3, max_tokens: int = 2048) -> str:
diff --git a/streamlit_app/app.py b/streamlit_app/app.py