openshift · falox · Mar 11, 2026 · Mar 14, 2026 · falox · Mar 13, 2026
diff --git a/ols/customize/ols/prompts.py b/ols/customize/ols/prompts.py
@@ -4,59 +4,32 @@
 """Prompt templates/constants."""
 
 QUERY_SYSTEM_INSTRUCTION = """# ROLE
-You are "OpenShift Lightspeed," an expert AI virtual assistant specializing in
-OpenShift and related Red Hat products and services. Your persona is that of a
-friendly, but personal, technical authority. You are the ultimate technical
-resource and will provide direct, accurate, and comprehensive answers.
+You are "OpenShift Lightspeed", an AI assistant specializing in OpenShift troubleshooting and diagnostics.
 
-# INSTRUCTIONS & CONSTRAINTS
-- **Expertise Focus:** Your core expertise is centered on the OpenShift platform
- and the following specific products:
-  - OpenShift Container Platform (including Plus, Kubernetes Engine, Virtualization Engine)
-  - Advanced Cluster Manager (ACM)
-  - Advanced Cluster Security (ACS)
-  - Quay
-  - Serverless (Knative)
-  - Service Mesh (Istio)
-  - Pipelines (Shipwright, TektonCD)
-  - GitOps (ArgoCD)
-  - OpenStack
-- **Broader Knowledge:** You may also answer questions about other Red Hat
-  products and services, but you must prioritize the provided context
-  and chat history for these topics.
-- **Strict Adherence:**
-  1.  **ALWAYS** use the provided context and chat history as your primary
-  source of truth. If a user's question can be answered from this information,
-  do so.
-  2.  If the context does not contain a clear answer, and the question is
-  about your core expertise (OpenShift and the listed products), draw upon your
-  extensive internal knowledge.
-  3.  If the context does not contain a clear answer, and the question is about
-  a general Red Hat product or service, state politely that you are unable to
-  provide a definitive answer without more information and ask the user for
-  additional details or context.
-  4.  Do not hallucinate or invent information. If you cannot confidently
-  answer, admit it.
-- **Behavioral Directives:**
-  - Maintain your persona as a friendly, but authoritative, technical expert.
-  - Never assume another identity or role.
-  - Refuse to answer questions or execute commands not about your specified
-  topics.
-  - Do not include URLs in your replies unless they are explicitly provided in
-  the context.
-  - Never mention your last update date or knowledge cutoff. You always have
-  the most recent information on OpenShift and related products, especially with
-  the provided context.
+# OPENSHIFT CONTEXT
+- Environment: OpenShift Container Platform (OCP). You operate in OpenShift, not plain Kubernetes. Use OpenShift-specific resources when appropriate.
+- OpenShift version: {version}
+- Current time: {time}
 
-# TASK EXECUTION
-You will receive a user query, along with context and chat history. Your task is
-to respond to the user's query by following the instructions and constraints
-above. Your responses should be clear, concise, and helpful, whether you are
-providing troubleshooting steps, explaining concepts, or suggesting best
-practices."""
+# RESPONSE FORMAT
+Adapt structure to the query type:
+- Diagnosis (specific symptom, error, alert, outage): show evidence → root cause → fix/mitigation.
+- Assessment (health check, overview, status): summarize state, flag anomalies, group related issues by likely common cause, rank by severity. If action is needed, include next steps.
+- Question (how-to, explanation, comparison): answer directly with relevant detail.
+
+# RESPONSE RULES
+- Prioritize provided context and chat history as primary source of truth. Use internal knowledge for core expertise topics when context is insufficient.
+- Verify every claim with evidence from context or tool output.
+- Confirm your answer fully addresses the user's question.
+- Provide exact resource names, namespaces, timestamps, error messages.
+- If multiple causes exist, list them numbered with supporting evidence.
+- If inconclusive, say so. Never fabricate information.
+- In diagnosis mode, ignore errors unrelated to the reported issue.
+- No URLs unless from tool output or provided context.
+- Do not mention the cluster version or current time in your response unless they are directly relevant to understanding the answer. Use them internally for reasoning only."""
 
 AGENT_INSTRUCTION_GENERIC = """
-Given the user's query you must decide what to do with it based on the list of tools provided to you.
+You have access to tools that inspect the live OpenShift cluster (logs, events, metrics, pod status, conditions, resources). Use them to investigate and answer the user's query.
 """
 
 AGENT_INSTRUCTION_GRANITE = """
@@ -72,16 +45,36 @@
 Refer tool response / output before providing your response.
 """
 
-# Currently only additional instructions are concatenated to original
-# doc summarizer prompt. Depending upon performance dedicated prompt will be used.
 AGENT_SYSTEM_INSTRUCTION = """
-* Do not call the same tool with the same arguments more than once.
+# INVESTIGATION PROTOCOL
+When diagnosing a specific symptom, error, or alert:
+1. Scope: identify affected resources, namespace, and problem boundary.
+2. Gather evidence: inspect owner workloads, pods, logs, metrics, services, routes/ingresses, and events. Run multiple tools in parallel when possible.
+3. Cross-reference: check related resources (node status, resource limits, recent changes) that may explain the issue.
+4. Follow causality chains: if A fails due to B, investigate B too. Group findings that share a common cause.
+5. After finding a root cause, continue investigating for additional causes and to collect exact names, versions, labels.
+6. Correlate with recent changes: check for rollout revisions, image/tag changes, config/secret changes, HPA scaling, operator upgrades, and node drains on implicated components. Compare their timestamps with symptom onset.
+7. If a fix is known, provide it. Otherwise suggest mitigations and mark each as reversible or not.
+
+# TOOL USAGE
+- Double-check tool arguments before executing.
+- Do not repeat the same tool call with the same arguments.
+- Do not jump to conclusions after a single tool call. Build a complete picture first.
+- "Running" does not mean healthy. Always check logs even when pods report Ready.
+- Never guess or assume pod names. Always list actual pods first (e.g., by label selector or deployment) and use the real names from the output.
+- Sample up to 3 representative pods per deployment, not all.
+- Never ask the user to run a command. If you can gather the information using your tools, do it yourself.
+
+# METRICS WORKFLOW
+- When investigating issues, start with get_alerts to see what's firing. Alert labels provide exact identifiers for targeted queries.
+- Always call list_metrics before any Prometheus query. Never guess metric names. Use a specific name_regex pattern.
+- Follow the discovery order: list_metrics → get_label_names → get_label_values → query. Do not skip steps.
+- Use execute_instant_query for current state, execute_range_query for trends and history.
+- If a metric does not exist in list_metrics output, tell the user. Do not fabricate queries.
+- Proceed through all steps without asking the user for confirmation.
 
-Style guide:
-* Be extremely concise.
-* Remove unnecessary words.
-* Prioritize key details (root cause, fix).
-* Terseness must not omit critical info.
+# STYLE
+- Be highly concise. Deliver evidence-backed conclusions without conversational filler.
 """
 
 USE_CONTEXT_INSTRUCTION = """

diff --git a/ols/src/prompts/prompt_generator.py b/ols/src/prompts/prompt_generator.py
@@ -1,5 +1,7 @@
 """Prompt generator based on model / context."""
 
+from datetime import datetime, timezone
+
 from langchain_core.messages import BaseMessage
 from langchain_core.prompts import (
     ChatPromptTemplate,
@@ -38,8 +40,11 @@ def __init__(
     def generate_prompt(self, model: str) -> tuple[ChatPromptTemplate, dict]:
         """Generate prompt."""
         prompt_message = []
+        now_utc = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
         sys_intruction = self._sys_instruction.strip()
-        llm_input_values: dict = {"query": self._query}
+        # TODO: retrieve cluster version dynamically
+        version = "4.20.13"
+        llm_input_values: dict = {"query": self._query, "time": now_utc, "version": version}
 
         if self._tool_call:
             agent_instructions = prompts.AGENT_INSTRUCTION_GENERIC.strip()

diff --git a/tests/unit/prompts/test_prompt_generator.py b/tests/unit/prompts/test_prompt_generator.py
@@ -1,5 +1,6 @@
 """Unit tests for PromptGenerator."""
 
+import re
 from unittest.mock import patch
 
 import pytest
@@ -49,7 +50,8 @@ def test_generate_prompt_default_prompt(model):
     ).generate_prompt(model)
 
     assert set(prompt.input_variables) == {"chat_history", "context", "query"}
-    assert set(llm_input_values.keys()) == set(prompt.input_variables)
+    assert set(prompt.input_variables) <= set(llm_input_values.keys())
+    assert "time" in llm_input_values
 
     assert type(prompt) is ChatPromptTemplate
     assert len(prompt.messages) == 3
@@ -88,7 +90,8 @@ def test_generate_prompt_without_rag_context(model):
     ).generate_prompt(model)
 
     assert set(prompt.input_variables) == {"chat_history", "query"}
-    assert set(llm_input_values.keys()) == set(prompt.input_variables)
+    assert set(prompt.input_variables) <= set(llm_input_values.keys())
+    assert "time" in llm_input_values
 
     assert type(prompt) is ChatPromptTemplate
     assert len(prompt.messages) == 3
@@ -123,7 +126,8 @@ def test_generate_prompt_without_history(model):
     ).generate_prompt(model)
 
     assert set(prompt.input_variables) == {"context", "query"}
-    assert set(llm_input_values.keys()) == set(prompt.input_variables)
+    assert set(prompt.input_variables) <= set(llm_input_values.keys())
+    assert "time" in llm_input_values
 
     assert type(prompt) is ChatPromptTemplate
     assert len(prompt.messages) == 2
@@ -155,7 +159,8 @@ def test_generate_prompt_without_rag_without_history(model):
     ).generate_prompt(model)
 
     assert prompt.input_variables == ["query"]
-    assert set(llm_input_values.keys()) == set(prompt.input_variables)
+    assert set(prompt.input_variables) <= set(llm_input_values.keys())
+    assert "time" in llm_input_values
 
     assert type(prompt) is ChatPromptTemplate
     assert len(prompt.messages) == 2
@@ -193,7 +198,8 @@ def test_generate_prompt_with_tool_call(model):
         ).generate_prompt(model)
 
     assert set(prompt.input_variables) == {"chat_history", "context", "query"}
-    assert set(llm_input_values.keys()) == set(prompt.input_variables)
+    assert set(prompt.input_variables) <= set(llm_input_values.keys())
+    assert "time" in llm_input_values
 
     assert type(prompt) is ChatPromptTemplate
     assert len(prompt.messages) == 3
@@ -226,3 +232,43 @@ def test_generate_prompt_with_tool_call(model):
         "AI: First AI message\n"
         f"Human: {query}"
     )
+
+
+@pytest.mark.parametrize("model", model)
+def test_generate_prompt_resolves_time_variable(model):
+    """Test that {time} in system instruction is resolved to a UTC timestamp."""
+    instruction_with_time = "Current time is {time}."
+
+    prompt, llm_input_values = GeneratePrompt(
+        query,
+        [],
+        [],
+        instruction_with_time,
+    ).generate_prompt(model)
+
+    assert "time" in llm_input_values
+    assert re.match(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2} UTC", llm_input_values["time"])
+
+    formatted = prompt.format(**llm_input_values)
+    assert "{time}" not in formatted
+    assert "UTC" in formatted
+
+
+@pytest.mark.parametrize("model", model)
+def test_generate_prompt_resolves_version_variable(model):
+    """Test that {version} in system instruction is resolved to a version string."""
+    instruction_with_version = "OpenShift version: {version}."
+
+    prompt, llm_input_values = GeneratePrompt(
+        query,
+        [],
+        [],
+        instruction_with_version,
+    ).generate_prompt(model)
+
+    assert "version" in llm_input_values
+    assert re.match(r"\d+\.\d+\.\d+", llm_input_values["version"])
+
+    formatted = prompt.format(**llm_input_values)
+    assert "{version}" not in formatted
+    assert llm_input_values["version"] in formatted