From 9ae1a667c50e85d130cbe16e2059aea5bd3836ae Mon Sep 17 00:00:00 2001 From: Joao Fula Date: Fri, 23 Jan 2026 11:30:10 +0100 Subject: [PATCH 1/6] Adding adaptation to lcore tests deactivating more tests --- tests/e2e/conftest.py | 13 +- tests/e2e/pytest.ini | 1 + tests/e2e/test_api.py | 105 +++++++--- tests/e2e/test_attachments.py | 11 +- tests/e2e/test_query_endpoint.py | 36 ++-- tests/e2e/utils/adapt_ols_config.py | 221 ++++++---------------- tests/e2e/utils/cluster.py | 6 +- tests/e2e/utils/constants.py | 3 + tests/e2e/utils/data_collector_control.py | 8 +- tests/e2e/utils/metrics.py | 33 ++-- tests/e2e/utils/ols_installer.py | 210 +++++++++++++++----- tests/e2e/utils/wait_for_ols.py | 15 +- tests/scripts/test-e2e-cluster.sh | 2 +- 13 files changed, 387 insertions(+), 277 deletions(-) diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index 9cb96e4f7..591feeeef 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -94,7 +94,7 @@ def pytest_sessionstart(): # Wait for OLS to be ready print(f"Waiting for OLS to be ready at url: {ols_url} with provider: {provider}...") - OLS_READY = wait_for_ols(ols_url) + OLS_READY = wait_for_ols(ols_url, pytest.client) print(f"OLS is ready: {OLS_READY}") # Gather OLS artifacts in case OLS does not become ready if on_cluster and not OLS_READY: @@ -131,6 +131,17 @@ def pytest_runtest_makereport(item, call) -> TestReport: return TestReport.from_item_and_call(item, call) +def pytest_collection_modifyitems(config, items): + """Skip tests marked with @pytest.mark.skip_with_lcore when LCORE is enabled.""" + lcore_enabled = os.getenv("LCORE", "False").lower() in ("true", "1", "t") + + if lcore_enabled: + skip_lcore = pytest.mark.skip(reason="LCORE environment variable is enabled") + for item in items: + if "skip_with_lcore" in item.keywords: + item.add_marker(skip_lcore) + + def pytest_addoption(parser): """Argument parser for pytest.""" parser.addoption( diff --git a/tests/e2e/pytest.ini b/tests/e2e/pytest.ini index 8bc377654..0e3bcc8d5 100644 --- a/tests/e2e/pytest.ini +++ b/tests/e2e/pytest.ini @@ -17,3 +17,4 @@ markers = byok2 quota_limits data_export + skip_with_lcore: marks tests to skip when LCORE environment variable is enabled diff --git a/tests/e2e/test_api.py b/tests/e2e/test_api.py index 2f4178753..e29bba33a 100644 --- a/tests/e2e/test_api.py +++ b/tests/e2e/test_api.py @@ -7,6 +7,7 @@ import json import re import time +import os import pytest import requests @@ -26,6 +27,7 @@ NON_LLM_REST_API_TIMEOUT, OLS_USER_DATA_COLLECTION_INTERVAL_SHORT, OLS_USER_DATA_PATH, + OLS_SERVICE_DEPLOYMENT ) from tests.e2e.utils.data_collector_control import prepare_for_data_collection_test from tests.e2e.utils.decorators import retry @@ -36,6 +38,7 @@ ) + @pytest.fixture(name="postgres_connection", scope="module") def fixture_postgres_connection(): """Fixture with Postgres connection.""" @@ -48,10 +51,13 @@ def test_readiness(): """Test handler for /readiness REST API endpoint.""" endpoint = "/readiness" with metrics_utils.RestAPICallCounterChecker(pytest.metrics_client, endpoint): - response = pytest.client.get(endpoint, timeout=LLM_REST_API_TIMEOUT) + response = pytest.client.get( + endpoint, + timeout=LLM_REST_API_TIMEOUT + ) assert response.status_code == requests.codes.ok response_utils.check_content_type(response, "application/json") - assert response.json() == {"ready": True, "reason": "service is ready"} + assert response.json() == {"ready": True, "reason": "All providers are healthy", "providers": []} @pytest.mark.smoketest @@ -60,7 +66,10 @@ def test_liveness(): """Test handler for /liveness REST API endpoint.""" endpoint = "/liveness" with metrics_utils.RestAPICallCounterChecker(pytest.metrics_client, endpoint): - response = pytest.client.get(endpoint, timeout=BASIC_ENDPOINTS_TIMEOUT) + response = pytest.client.get( + endpoint, + timeout=BASIC_ENDPOINTS_TIMEOUT + ) assert response.status_code == requests.codes.ok response_utils.check_content_type(response, "application/json") assert response.json() == {"alive": True} @@ -82,6 +91,16 @@ def test_metrics() -> None: "ols_llm_token_received_total", "ols_provider_model_configuration", ) + if os.getenv("LCORE", "False").lower() in ("true", "1", "t"): + expected_counters = ( + "ls_rest_api_calls_total", + "ls_llm_calls_total", + "ls_llm_calls_failures_total", + "ls_llm_validation_errors_total", + "ls_llm_token_sent_total", + "ls_llm_token_received_total", + "ls_provider_model_configuration", + ) # check if all counters are present for expected_counter in expected_counters: @@ -92,12 +111,12 @@ def test_metrics() -> None: assert 'response_duration_seconds_sum{path="/metrics"}' in response.text +@pytest.mark.skip_with_lcore def test_model_provider(): """Read configured model and provider from metrics.""" model, provider = metrics_utils.get_enabled_model_and_provider( pytest.metrics_client ) - # enabled model must be one of our expected combinations assert model, provider in { ("gpt-4o-mini", "openai"), @@ -106,6 +125,7 @@ def test_model_provider(): } +@pytest.mark.skip_with_lcore def test_one_default_model_provider(): """Check if one model and provider is selected as default.""" states = metrics_utils.get_enable_status_for_all_models(pytest.metrics_client) @@ -115,6 +135,7 @@ def test_one_default_model_provider(): ), "one model and provider should be selected as default" +@pytest.mark.skip_with_lcore @pytest.mark.cluster def test_improper_token(): """Test accessing /v1/query endpoint using improper auth. token.""" @@ -127,6 +148,7 @@ def test_improper_token(): assert response.status_code == requests.codes.forbidden +@pytest.mark.skip_with_lcore @pytest.mark.cluster def test_forbidden_user(): """Test scenarios where we expect an unauthorized response. @@ -175,7 +197,6 @@ def test_transcripts_storing_cluster(): timeout=LLM_REST_API_TIMEOUT, ) assert response.status_code == requests.codes.ok - transcript = cluster_utils.get_single_existing_transcript( pod_name, transcripts_path ) @@ -209,13 +230,13 @@ def test_transcripts_storing_cluster(): assert transcript["tool_calls"] == [] +@pytest.mark.skip_with_lcore @retry(max_attempts=3, wait_between_runs=10) def test_openapi_endpoint(): """Test handler for /opanapi REST API endpoint.""" response = pytest.client.get("/openapi.json", timeout=BASIC_ENDPOINTS_TIMEOUT) assert response.status_code == requests.codes.ok response_utils.check_content_type(response, "application/json") - payload = response.json() assert payload is not None, "Incorrect response" @@ -228,7 +249,7 @@ def test_openapi_endpoint(): # check application description info = payload["info"] assert "description" in info, "Service description not provided" - assert f"{metadata.SERVICE_NAME} service API specification" in info["description"] + assert "Lightspeed Core Service (LCS) service API specification" in info["description"] # elementary check that all mandatory endpoints are covered paths = payload["paths"] @@ -252,7 +273,6 @@ def test_cache_existence(postgres_connection): """Test the cache existence.""" if postgres_connection is None: pytest.skip("Postgres is not accessible.") - value = read_conversation_history_count(postgres_connection) # check if history exists at all assert value is not None @@ -330,7 +350,6 @@ def filter_logs(logs: str, last_log_line: str) -> str: def get_last_log_line(logs: str) -> str: return [line for line in logs.split("\n") if line][-1] - # Prepare: patch to manual mode, set short interval, configure stage ingress controller = prepare_for_data_collection_test( short_interval_seconds=OLS_USER_DATA_COLLECTION_INTERVAL_SHORT @@ -361,7 +380,6 @@ def get_last_log_line(logs: str) -> str: # Get log point for next check last_log_line = get_last_log_line(container_log) - # Create new data via feedback endpoint response = pytest.client.post( "/v1/feedback", @@ -397,6 +415,7 @@ def get_last_log_line(logs: str) -> str: assert user_data == [] +@pytest.mark.skip_with_lcore @pytest.mark.cluster def test_http_header_redaction(): """Test that sensitive HTTP headers are redacted from the logs.""" @@ -413,7 +432,7 @@ def test_http_header_redaction(): assert response.json() == {"alive": True} container_log = cluster_utils.get_container_log( - cluster_utils.get_pod_by_prefix()[0], "lightspeed-service-api" + cluster_utils.get_pod_by_prefix()[0], "lightspeed-stack" ) for header in HTTP_REQUEST_HEADERS_TO_REDACT: @@ -479,7 +498,7 @@ def test_ca_service_certs_rotation(): name="lightspeed-operator-controller-manager", namespace="openshift-lightspeed" ) cluster_utils.restart_deployment( - name="lightspeed-app-server", namespace="openshift-lightspeed" + name=OLS_SERVICE_DEPLOYMENT, namespace="openshift-lightspeed" ) cluster_utils.restart_deployment( name="lightspeed-console-plugin", namespace="openshift-lightspeed" @@ -503,17 +522,56 @@ def update_olsconfig(limiters: list[dict]): limiters: List of dictionaries containing limiter configurations to set in ols_config.quota_handlers.limiters """ - configmap_yaml = cluster_utils.run_oc(["get", "cm/olsconfig", "-o", "yaml"]).stdout - configmap = yaml.safe_load(configmap_yaml) - olsconfig = yaml.safe_load(configmap["data"][DEFAULT_CONFIGURATION_FILE]) - olsconfig["ols_config"]["quota_handlers"]["limiters"] = limiters - configmap["data"][DEFAULT_CONFIGURATION_FILE] = yaml.dump(olsconfig) - updated_configmap = yaml.dump(configmap) + is_lcore = os.getenv("LCORE", "False").lower() in ("true", "1", "t") + if is_lcore: + # LCORE environment: update lightspeed-stack-config ConfigMap + configmap_name = "lightspeed-stack-config" + config_file_key = "lightspeed-stack.yaml" + configmap_yaml = cluster_utils.run_oc( + ["get", f"cm/{configmap_name}", "-o", "yaml"] + ).stdout + configmap = yaml.safe_load(configmap_yaml) + stack_config = yaml.safe_load(configmap["data"][config_file_key]) + stack_config["quota_handlers"]["limiters"] = limiters + configmap["data"][config_file_key] = yaml.dump(stack_config) + else: + # Standard environment: update olsconfig ConfigMap + configmap_name = "olsconfig" + configmap_yaml = cluster_utils.run_oc( + ["get", f"cm/{configmap_name}", "-o", "yaml"] + ).stdout + configmap = yaml.safe_load(configmap_yaml) + olsconfig = yaml.safe_load(configmap["data"][DEFAULT_CONFIGURATION_FILE]) + olsconfig["ols_config"]["quota_handlers"]["limiters"] = limiters + configmap["data"][DEFAULT_CONFIGURATION_FILE] = yaml.dump(olsconfig) - cluster_utils.run_oc(["delete", "configmap", "olsconfig"]) + updated_configmap = yaml.dump(configmap) + cluster_utils.run_oc(["delete", "configmap", configmap_name]) cluster_utils.run_oc(["apply", "-f", "-"], command=updated_configmap) + + +@pytest.fixture +def turn_off_operator_pod(): + cluster_utils.run_oc( + [ + "scale", + "deployment/lightspeed-operator-controller-manager", + "--replicas", + "0", + ] + ) + yield + cluster_utils.run_oc( + [ + "scale", + "deployment/lightspeed-operator-controller-manager", + "--replicas", + "1", + ] + ) +@pytest.mark.usefixtures("turn_off_operator_pod") @pytest.mark.quota_limits def test_quota_limits(): """Verify OLS quota limits.""" @@ -522,7 +580,6 @@ def test_quota_limits(): json={"query": "what is kubernetes?"}, timeout=LLM_REST_API_TIMEOUT, ) - # assert that the available quota is # less than the initial one hardcoded in the olsconfig assert ( @@ -539,7 +596,7 @@ def test_quota_limits(): cluster_utils.run_oc( [ "scale", - "deployment/lightspeed-app-server", + f"deployment/{OLS_SERVICE_DEPLOYMENT}", "--replicas", "0", ] @@ -558,7 +615,7 @@ def test_quota_limits(): cluster_utils.run_oc( [ "scale", - "deployment/lightspeed-app-server", + f"deployment/{OLS_SERVICE_DEPLOYMENT}", "--replicas", "1", ] @@ -580,7 +637,7 @@ def test_quota_limits(): cluster_utils.run_oc( [ "scale", - "deployment/lightspeed-app-server", + f"deployment/{OLS_SERVICE_DEPLOYMENT}", "--replicas", "0", ] @@ -589,7 +646,7 @@ def test_quota_limits(): cluster_utils.run_oc( [ "scale", - "deployment/lightspeed-app-server", + f"deployment/{OLS_SERVICE_DEPLOYMENT}", "--replicas", "1", ] diff --git a/tests/e2e/test_attachments.py b/tests/e2e/test_attachments.py index 0e92c0df0..ea6baadef 100644 --- a/tests/e2e/test_attachments.py +++ b/tests/e2e/test_attachments.py @@ -18,7 +18,6 @@ def test_valid_question_with_empty_attachment_list() -> None: """Check the REST API /v1/query with POST HTTP method using empty attachment list.""" endpoint = "/v1/query" - with metrics_utils.RestAPICallCounterChecker( pytest.metrics_client, endpoint, status_code=requests.codes.ok ): @@ -236,8 +235,9 @@ def test_valid_question_with_wrong_attachment_format_unknown_attachment_type() - json_response = response.json() expected_response = { "detail": { - "response": "Unable to process this request", - "cause": "Attachment with improper type unknown_type detected", + "response": "Invalid attribute value", + "cause": "Invalid attatchment type unknown_type: must be one of frozenset" \ + "({'alert', 'log', 'event', 'api object', 'error message', 'configuration', 'stack trace'})", } } assert json_response == expected_response @@ -275,8 +275,9 @@ def test_valid_question_with_wrong_attachment_format_unknown_content_type() -> N json_response = response.json() expected_response = { "detail": { - "response": "Unable to process this request", - "cause": "Attachment with improper content type unknown/type detected", + "response": "Invalid attribute value", + "cause": "Invalid attatchment content type unknown/type: must be one of frozenset" \ + "({'application/json', 'application/xml', 'application/yaml', 'text/plain'})", } } assert json_response == expected_response diff --git a/tests/e2e/test_query_endpoint.py b/tests/e2e/test_query_endpoint.py index d629c62ed..f290eaf2c 100644 --- a/tests/e2e/test_query_endpoint.py +++ b/tests/e2e/test_query_endpoint.py @@ -5,6 +5,7 @@ # pyright: reportAttributeAccessIssue=false import re +import os import pytest import requests @@ -18,10 +19,13 @@ from . import test_api QUERY_ENDPOINT = "/v1/query" +import ipdb - +@pytest.mark.skip_with_lcore def test_invalid_question(): """Check the REST API /v1/query with POST HTTP method for invalid question.""" + ipdb.set_trace() + with metrics_utils.RestAPICallCounterChecker(pytest.metrics_client, QUERY_ENDPOINT): cid = suid.get_suid() response = pytest.client.post( @@ -77,11 +81,11 @@ def test_invalid_question_without_conversation_id(): json_response["response"], re.IGNORECASE, ) - - # new conversation ID should be generated - assert suid.check_suid( - json_response["conversation_id"] - ), "Conversation ID is not in UUID format" + if os.getenv("LCORE", "False").lower() not in ("true", "1", "t"): + # new conversation ID should be generated + assert suid.check_suid( + json_response["conversation_id"] + ), "Conversation ID is not in UUID format" def test_query_call_without_payload(): @@ -125,6 +129,7 @@ def test_query_call_with_improper_payload(): assert "missing" in response.text +@pytest.mark.skip_with_lcore def test_valid_question_improper_conversation_id() -> None: """Check the REST API /v1/query with POST HTTP method for improper conversation ID.""" with metrics_utils.RestAPICallCounterChecker( @@ -150,6 +155,7 @@ def test_valid_question_improper_conversation_id() -> None: assert json_response == expected_response +@pytest.mark.skip_with_lcore @retry(max_attempts=3, wait_between_runs=10) def test_valid_question_missing_conversation_id() -> None: """Check the REST API /v1/query with POST HTTP method for missing conversation ID.""" @@ -175,6 +181,7 @@ def test_valid_question_missing_conversation_id() -> None: ), "Conversation ID is not in UUID format" +@pytest.mark.skip_with_lcore def test_too_long_question() -> None: """Check the REST API /v1/query with too long question.""" # let's make the query really large, larger that context window size @@ -200,6 +207,7 @@ def test_too_long_question() -> None: assert json_response["detail"]["response"] == "Prompt is too long" +@pytest.mark.skip_with_lcore @pytest.mark.smoketest @pytest.mark.rag def test_valid_question() -> None: @@ -256,6 +264,7 @@ def test_ocp_docs_version_same_as_cluster_version() -> None: assert f"{major}.{minor}" in json_response["referenced_documents"][0]["doc_url"] +@pytest.mark.skip_with_lcore def test_valid_question_tokens_counter() -> None: """Check how the tokens counter are updated accordingly.""" model, provider = metrics_utils.get_enabled_model_and_provider( @@ -275,6 +284,7 @@ def test_valid_question_tokens_counter() -> None: response_utils.check_content_type(response, "application/json") +@pytest.mark.skip_with_lcore def test_invalid_question_tokens_counter() -> None: """Check how the tokens counter are updated accordingly.""" model, provider = metrics_utils.get_enabled_model_and_provider( @@ -294,6 +304,7 @@ def test_invalid_question_tokens_counter() -> None: response_utils.check_content_type(response, "application/json") +@pytest.mark.skip_with_lcore def test_token_counters_for_query_call_without_payload() -> None: """Check how the tokens counter are updated accordingly.""" model, provider = metrics_utils.get_enabled_model_and_provider( @@ -322,6 +333,7 @@ def test_token_counters_for_query_call_without_payload() -> None: response_utils.check_content_type(response, "application/json") +@pytest.mark.skip_with_lcore def test_token_counters_for_query_call_with_improper_payload() -> None: """Check how the tokens counter are updated accordingly.""" model, provider = metrics_utils.get_enabled_model_and_provider( @@ -378,6 +390,7 @@ def test_rag_question() -> None: assert len(doc_urls_list) == len(set(doc_urls_list)) +@pytest.mark.skip_with_lcore @pytest.mark.cluster def test_query_filter() -> None: """Ensure responses does not include filtered words and redacted words are not logged.""" @@ -403,7 +416,7 @@ def test_query_filter() -> None: assert "bar" not in response_words # Retrieve the pod name - ols_container_name = "lightspeed-service-api" + ols_container_name = "lightspeed-stack-deployment" pod_name = cluster_utils.get_pod_by_prefix()[0] # Check if filtered words are redacted in the logs @@ -417,7 +430,7 @@ def test_query_filter() -> None: continue # check that the pattern is indeed not found in logs for pattern in unwanted_patterns: - assert pattern not in line.lower() + assert pattern not in line.lower(), f"failed for {pattern}" # Ensure the intended redaction has occurred assert "what is deployment in openshift?" in container_log @@ -486,7 +499,7 @@ def test_query_with_provider_but_not_model() -> None: # error thrown on Pydantic level assert ( json_response["detail"][0]["msg"] - == "Value error, LLM model must be specified when the provider is specified." + == "Value error, Model must be specified if provider is specified" ) @@ -514,7 +527,7 @@ def test_query_with_model_but_not_provider() -> None: assert ( json_response["detail"][0]["msg"] - == "Value error, LLM provider must be specified when the model is specified." + == "Value error, Provider must be specified if model is specified" ) @@ -624,10 +637,11 @@ def test_tool_calling() -> None: # Special check for granite assert not json_response["response"].strip().startswith("") - +import ipdb @pytest.mark.byok1 def test_rag_question_byok1() -> None: """Ensure response include expected top rag reference.""" + ipdb.set_trace() with metrics_utils.RestAPICallCounterChecker(pytest.metrics_client, QUERY_ENDPOINT): response = pytest.client.post( QUERY_ENDPOINT, diff --git a/tests/e2e/utils/adapt_ols_config.py b/tests/e2e/utils/adapt_ols_config.py index c57914dfd..548899c47 100644 --- a/tests/e2e/utils/adapt_ols_config.py +++ b/tests/e2e/utils/adapt_ols_config.py @@ -3,6 +3,7 @@ Handles multi-provider test scenarios dynamically. """ +import json import os import time @@ -12,7 +13,16 @@ from tests.e2e.utils import cluster as cluster_utils from tests.e2e.utils.data_collector_control import configure_exporter_for_e2e_tests from tests.e2e.utils.retry import retry_until_timeout_or_success -from tests.e2e.utils.wait_for_ols import wait_for_ols +from tests.e2e.utils.ols_installer import ( + create_secrets, + get_service_account_tokens, + setup_rbac, + setup_route, + setup_service_accounts, + update_lcore_setting, + update_ols_config, +) +from tests.e2e.utils.constants import OLS_SERVICE_DEPLOYMENT def apply_olsconfig(provider_list: list[str]) -> None: @@ -49,28 +59,21 @@ def update_ols_configmap() -> None: """Update OLS configmap with additional e2e test configurations. Configures logging levels and user data collector settings for testing. + This is a wrapper around update_ols_config that adds data_export specific settings. """ - try: - # Get the current configmap - configmap_yaml = cluster_utils.run_oc( - ["get", "cm/olsconfig", "-o", "yaml"] - ).stdout - configmap = yaml.safe_load(configmap_yaml) - olsconfig = yaml.safe_load(configmap["data"][DEFAULT_CONFIGURATION_FILE]) - - # Ensure proper logging config for e2e tests - if "ols_config" not in olsconfig: - olsconfig["ols_config"] = {} - if "logging_config" not in olsconfig["ols_config"]: - olsconfig["ols_config"]["logging_config"] = {} - - # Set INFO level to avoid redacted logs - olsconfig["ols_config"]["logging_config"]["lib_log_level"] = "INFO" - - # Configure user data collection only for data_export test suite - # Other test suites don't need it and the volume might not be mounted - ols_config_suffix = os.getenv("OLS_CONFIG_SUFFIX", "default") - if ols_config_suffix == "data_export": + # First apply the standard config updates + update_ols_config() + + # Then add data_export specific user data collection config if needed + ols_config_suffix = os.getenv("OLS_CONFIG_SUFFIX", "default") + if ols_config_suffix == "data_export": + try: + configmap_yaml = cluster_utils.run_oc( + ["get", "cm/olsconfig", "-o", "yaml"] + ).stdout + configmap = yaml.safe_load(configmap_yaml) + olsconfig = yaml.safe_load(configmap["data"][DEFAULT_CONFIGURATION_FILE]) + olsconfig["ols_config"]["user_data_collection"] = { "feedback_disabled": False, "feedback_storage": "/app-root/ols-user-data/feedback", @@ -78,82 +81,20 @@ def update_ols_configmap() -> None: "transcripts_storage": "/app-root/ols-user-data/transcripts", } - # Update the configmap - configmap["data"][DEFAULT_CONFIGURATION_FILE] = yaml.dump(olsconfig) - updated_configmap = yaml.dump(configmap) - cluster_utils.run_oc(["apply", "-f", "-"], command=updated_configmap) - print("OLS configmap updated successfully") - - except Exception as e: - raise RuntimeError( - f"Failed to update OLS configmap with e2e settings: {e}" - ) from e - - -def setup_service_accounts(namespace: str) -> None: - """Set up service accounts and access roles. - - Args: - namespace: The Kubernetes namespace to create service accounts in. - """ - print("Ensuring 'test-user' service account exists...") - cluster_utils.run_oc( - ["create", "sa", "test-user", "-n", namespace], - ignore_existing_resource=True, - ) - - print("Ensuring 'metrics-test-user' service account exists...") - cluster_utils.run_oc( - ["create", "sa", "metrics-test-user", "-n", namespace], - ignore_existing_resource=True, - ) - - print("Granting access roles to service accounts...") - cluster_utils.grant_sa_user_access("test-user", "lightspeed-operator-query-access") - cluster_utils.grant_sa_user_access( - "metrics-test-user", "lightspeed-operator-ols-metrics-reader" - ) - - -def setup_rbac(namespace: str) -> None: - """Set up pod-reader role and binding. - - Args: - namespace: The Kubernetes namespace for RBAC configuration. - """ - print("Ensuring 'pod-reader' role and rolebinding exist...") - cluster_utils.run_oc( - [ - "create", - "role", - "pod-reader", - "--verb=get,list", - "--resource=pods", - "--namespace", - namespace, - ], - ignore_existing_resource=True, - ) - - cluster_utils.run_oc( - [ - "create", - "rolebinding", - "test-user-pod-reader", - "--role=pod-reader", - f"--serviceaccount={namespace}:test-user", - "--namespace", - namespace, - ], - ignore_existing_resource=True, - ) - print("RBAC setup verified.") + configmap["data"][DEFAULT_CONFIGURATION_FILE] = yaml.dump(olsconfig) + updated_configmap = yaml.dump(configmap) + cluster_utils.run_oc(["apply", "-f", "-"], command=updated_configmap) + print("Data export configmap settings applied successfully") + except Exception as e: + raise RuntimeError( + f"Failed to update OLS configmap with data export settings: {e}" + ) from e def wait_for_deployment() -> None: """Wait for OLS deployment and pods to be ready. - Ensures the lightspeed-app-server deployment is available and pods are running. + Ensures the service deployment is available and pods are running. """ print("Waiting for OLS deployment to be available...") retry_until_timeout_or_success( @@ -163,13 +104,13 @@ def wait_for_deployment() -> None: [ "get", "deployment", - "lightspeed-app-server", + OLS_SERVICE_DEPLOYMENT, "--ignore-not-found", "-o", "name", ] ).stdout.strip() - == "deployment.apps/lightspeed-app-server", + == f"deployment.apps/{OLS_SERVICE_DEPLOYMENT}", "Waiting for lightspeed-app-server deployment to be detected", ) @@ -177,30 +118,6 @@ def wait_for_deployment() -> None: cluster_utils.wait_for_running_pod() -def setup_route() -> str: - """Set up route and return OLS URL. - - Returns: - The HTTPS URL for accessing the OLS service. - """ - try: - cluster_utils.run_oc(["delete", "route", "ols"], ignore_existing_resource=False) - except Exception: - print("No existing route to delete. Continuing...") - - print("Creating route for OLS access") - cluster_utils.run_oc( - ["create", "-f", "tests/config/operator_install/route.yaml"], - ignore_existing_resource=False, - ) - - url = cluster_utils.run_oc( - ["get", "route", "ols", "-o", "jsonpath='{.spec.host}'"] - ).stdout.strip("'") - - return f"https://{url}" - - def adapt_ols_config() -> tuple[str, str, str]: # pylint: disable=R0915 """Adapt OLS configuration for different providers dynamically. @@ -215,21 +132,13 @@ def adapt_ols_config() -> tuple[str, str, str]: # pylint: disable=R0915 provider_list = provider_env.split() or ["openai"] ols_image = os.getenv("OLS_IMAGE", "") namespace = "openshift-lightspeed" + creds = os.getenv("PROVIDER_KEY_PATH", "empty") + cluster_utils.run_oc( + ["project", "openshift-lightspeed"], ignore_existing_resource=True + ) - print("Checking for existing app server deployment...") - try: - cluster_utils.run_oc( - ["scale", "deployment/lightspeed-app-server", "--replicas", "0"] - ) - retry_until_timeout_or_success( - 30, - 3, - lambda: not cluster_utils.get_pod_by_prefix(fail_not_found=False), - "Waiting for old app server pod to terminate", - ) - print("Old app server scaled down") - except Exception as e: - print(f"No existing app server to scale down (this is OK): {e}") + # Update lcore setting if LCORE is enabled + update_lcore_setting() # Scaling operator to 1 replica to allow finalizer to run for olsconfig cluster_utils.run_oc( [ @@ -240,26 +149,14 @@ def adapt_ols_config() -> tuple[str, str, str]: # pylint: disable=R0915 ] ) # Wait for operator pod to be ready - retry_until_timeout_or_success( - 60, - 5, - lambda: ( - pods := cluster_utils.get_pod_by_prefix( - prefix="lightspeed-operator-controller-manager", fail_not_found=False - ) - ) - and all( - status == "true" - for status in cluster_utils.get_container_ready_status(pods[0]) - ), - "Waiting for operator to be ready", - ) + cluster_utils.wait_for_running_pod("lightspeed-operator-controller-manager") try: - cluster_utils.run_oc(["delete", "olsconfig", "cluster", "--ignore-not-found"]) - print(" Old OLSConfig CR removed") + cluster_utils.run_oc(["delete", "secret", "llmcreds", "--ignore-not-found"]) except Exception as e: - print(f"Could not delete old OLSConfig: {e}") - + print(f"Could not delete old secret: {e}") + creds_list = creds.split() + for i, prov in enumerate(provider_list): + create_secrets(prov, creds_list[i], len(provider_list)) try: apply_olsconfig(provider_list) print("New OLSConfig CR applied") @@ -278,7 +175,7 @@ def adapt_ols_config() -> tuple[str, str, str]: # pylint: disable=R0915 [ "get", "deployment", - "lightspeed-app-server", + OLS_SERVICE_DEPLOYMENT, "--ignore-not-found", "-o", "jsonpath={.status.replicas}", @@ -292,7 +189,7 @@ def adapt_ols_config() -> tuple[str, str, str]: # pylint: disable=R0915 "scale", "deployment/lightspeed-operator-controller-manager", "--replicas", - "0", + "1", ] ) @@ -309,7 +206,7 @@ def adapt_ols_config() -> tuple[str, str, str]: # pylint: disable=R0915 # Scale down app server to apply e2e configurations print("Scaling down app server to apply e2e configurations...") cluster_utils.run_oc( - ["scale", "deployment/lightspeed-app-server", "--replicas", "0"] + ["scale", f"deployment/{OLS_SERVICE_DEPLOYMENT}", "--replicas", "0"] ) retry_until_timeout_or_success( @@ -322,7 +219,8 @@ def adapt_ols_config() -> tuple[str, str, str]: # pylint: disable=R0915 # Update configmap with e2e-specific settings - FAIL FAST if this breaks print("Updating configmap with e2e test settings...") - update_ols_configmap() + if OLS_SERVICE_DEPLOYMENT == "lightspeed-app-server": + update_ols_configmap() print(" Configmap updated successfully") # Apply test image if ols_image: @@ -336,7 +234,7 @@ def adapt_ols_config() -> tuple[str, str, str]: # pylint: disable=R0915 cluster_utils.run_oc( [ "patch", - "deployment/lightspeed-app-server", + f"deployment/{OLS_SERVICE_DEPLOYMENT}", "--type", "json", "-p", @@ -351,7 +249,7 @@ def adapt_ols_config() -> tuple[str, str, str]: # pylint: disable=R0915 # Scale back up print("Scaling up app server with new configuration...") cluster_utils.run_oc( - ["scale", "deployment/lightspeed-app-server", "--replicas", "1"] + ["scale", f"deployment/{OLS_SERVICE_DEPLOYMENT}", "--replicas", "1"] ) # Wait for deployment to be ready @@ -386,18 +284,11 @@ def adapt_ols_config() -> tuple[str, str, str]: # pylint: disable=R0915 print("Tests may experience interference from data collector") # Fetch tokens for service accounts - print("Fetching tokens for service accounts...") - token = cluster_utils.get_token_for("test-user") - metrics_token = cluster_utils.get_token_for("metrics-test-user") + token, metrics_token = get_service_account_tokens() # Set up route and get URL ols_url = setup_route() - # Wait for OLS to be ready - print(f"Waiting for OLS to be ready at {ols_url}...") - if not wait_for_ols(ols_url, timeout=180): - raise RuntimeError("OLS failed to become ready after configuration") - print("OLS configuration and access setup completed successfully.") return ols_url, token, metrics_token diff --git a/tests/e2e/utils/cluster.py b/tests/e2e/utils/cluster.py index 3252d632c..d55432b90 100644 --- a/tests/e2e/utils/cluster.py +++ b/tests/e2e/utils/cluster.py @@ -181,7 +181,7 @@ def get_running_pods(namespace: str = "openshift-lightspeed") -> list[str]: def get_pod_by_prefix( - prefix: str = "lightspeed-app-server-", + prefix: str = "lightspeed-stack-deployment-", namespace: str = "openshift-lightspeed", fail_not_found: bool = True, ) -> list[str]: @@ -333,7 +333,7 @@ def get_container_ready_status(pod: str, namespace: str = "openshift-lightspeed" def wait_for_running_pod( - name: str = "lightspeed-app-server-", namespace: str = "openshift-lightspeed" + name: str = "lightspeed-stack-deployment-", namespace: str = "openshift-lightspeed" ): """Wait for the selected pod to be in running state.""" r = retry_until_timeout_or_success( @@ -401,7 +401,7 @@ def pod_has_containers_ready(): def get_certificate_secret_name( - name: str = "lightspeed-app-server", namespace: str = "openshift-lightspeed" + name: str = "lightspeed-stack-deployment", namespace: str = "openshift-lightspeed" ) -> str: """Get the name of the certificates secret for the service.""" try: diff --git a/tests/e2e/utils/constants.py b/tests/e2e/utils/constants.py index ddabb709b..7af47e09b 100644 --- a/tests/e2e/utils/constants.py +++ b/tests/e2e/utils/constants.py @@ -1,4 +1,5 @@ """Constants for end-to-end tests.""" +import os # timeout settings BASIC_ENDPOINTS_TIMEOUT = 5 @@ -18,3 +19,5 @@ OLS_USER_DATA_COLLECTION_INTERVAL_SHORT = ( 5 # 5 seconds - used only in data collection test ) + +OLS_SERVICE_DEPLOYMENT = "lightspeed-stack-deployment" if os.getenv("LCORE", 'False').lower() in ('true', '1', 't') else "lightspeed-app-server" diff --git a/tests/e2e/utils/data_collector_control.py b/tests/e2e/utils/data_collector_control.py index 963330443..1c45b8650 100644 --- a/tests/e2e/utils/data_collector_control.py +++ b/tests/e2e/utils/data_collector_control.py @@ -168,7 +168,7 @@ def set_exporter_collection_interval(self, interval_seconds: int) -> None: cluster_utils.run_oc( [ "scale", - "deployment/lightspeed-app-server", + "deployment/lightspeed-stack-deployment", "-n", EXPORTER_NAMESPACE, "--replicas=0", @@ -217,7 +217,7 @@ def restart_exporter_container( cluster_utils.run_oc( [ "scale", - "deployment/lightspeed-app-server", + "deployment/lightspeed-stack-deployment", "-n", EXPORTER_NAMESPACE, "--replicas=1", @@ -379,7 +379,7 @@ def patch_exporter_mode_to_manual() -> None: cluster_utils.run_oc( [ "patch", - "deployment/lightspeed-app-server", + "deployment/lightspeed-stack-deployment", "-n", EXPORTER_NAMESPACE, "--type=json", @@ -429,7 +429,7 @@ def prepare_for_data_collection_test( cluster_utils.run_oc( [ "scale", - "deployment/lightspeed-app-server", + "deployment/lightspeed-stack-deployment", "-n", EXPORTER_NAMESPACE, "--replicas=0", diff --git a/tests/e2e/utils/metrics.py b/tests/e2e/utils/metrics.py index 90c65dab9..5d3efd986 100644 --- a/tests/e2e/utils/metrics.py +++ b/tests/e2e/utils/metrics.py @@ -1,6 +1,7 @@ """Utilities for reading and checking metrics from REST API.""" import requests +import os from tests.e2e.utils.constants import BASIC_ENDPOINTS_TIMEOUT @@ -22,7 +23,7 @@ def get_rest_api_counter_value( ): """Retrieve counter value from metrics.""" response = read_metrics(client) - counter_name = "ols_rest_api_calls_total" + counter_name = "ls_rest_api_calls_total" # counters with labels have the following format: # rest_api_calls_total{path="/openapi.json",status_code="200"} 1.0 @@ -34,7 +35,7 @@ def get_rest_api_counter_value( def get_response_duration_seconds_value(client, path, default=None): """Retrieve counter value from metrics.""" response = read_metrics(client) - counter_name = "ols_response_duration_seconds_sum" + counter_name = "ls_response_duration_seconds_sum" # counters with response durations have the following format: # response_duration_seconds_sum{path="/v1/query"} 0.123 @@ -91,13 +92,21 @@ def get_metric_labels(lines, info_node_name, value=None) -> dict: # info node was not found return {} - +import ipdb def get_enabled_model_and_provider(client): """Read configured model and provider from metrics.""" - response = read_metrics(client) - lines = [line.strip() for line in response.split("\n")] - - labels = get_metric_labels(lines, "ols_provider_model_configuration", "1.0") + + ipdb.set_trace() + + if os.getenv("LCORE", 'False').lower() in ('true', '1', 't'): + response = client.get("/v1/models", timeout=BASIC_ENDPOINTS_TIMEOUT) + assert response.status_code == requests.codes.ok + assert response.text is not None + + else: + response = read_metrics(client) + lines = [line.strip() for line in response.split("\n")] + labels = get_metric_labels(lines, "ls_provider_model_configuration", "1.0") return labels["model"], labels["provider"] @@ -105,7 +114,7 @@ def get_enabled_model_and_provider(client): def get_enable_status_for_all_models(client): """Read states about all model and providers.""" response = read_metrics(client) - counters = get_all_metric_counters(response, "ols_provider_model_configuration") + counters = get_all_metric_counters(response, "ls_provider_model_configuration") return [counter == 1.0 for counter in counters] @@ -237,14 +246,14 @@ def __enter__(self): return self.old_counter_token_sent_total = get_model_provider_counter_value( self.client, - "ols_llm_token_sent_total", + "ls_llm_token_sent_total", self.model, self.provider, default=0, ) self.old_counter_token_received_total = get_model_provider_counter_value( self.client, - "ols_llm_token_received_total", + "ls_llm_token_received_total", self.model, self.provider, default=0, @@ -256,7 +265,7 @@ def __exit__(self, exc_type, exc_value, exc_tb): return # check if counter for sent tokens has been updated new_counter_token_sent_total = get_model_provider_counter_value( - self.client, "ols_llm_token_sent_total", self.model, self.provider + self.client, "ls_llm_token_sent_total", self.model, self.provider ) check_token_counter_increases( "sent", @@ -268,7 +277,7 @@ def __exit__(self, exc_type, exc_value, exc_tb): # check if counter for received tokens has been updated new_counter_token_received_total = get_model_provider_counter_value( self.client, - "ols_llm_token_received_total", + "ls_llm_token_received_total", self.model, self.provider, default=0, diff --git a/tests/e2e/utils/ols_installer.py b/tests/e2e/utils/ols_installer.py index de1638b4e..e29624fba 100644 --- a/tests/e2e/utils/ols_installer.py +++ b/tests/e2e/utils/ols_installer.py @@ -1,5 +1,6 @@ """Functions to install the service onto an OCP cluster using the OLS operator.""" +import json import os import subprocess @@ -16,32 +17,38 @@ disconnected = os.getenv("DISCONNECTED", "") +def setup_service_accounts(namespace: str) -> None: + """Set up service accounts and access roles. -def create_and_config_sas() -> tuple[str, str]: - """Create and provide access to service accounts for testing. - - Returns: - tuple containing token and metrics token. + Args: + namespace: The Kubernetes namespace to create service accounts in. """ + print("Ensuring 'test-user' service account exists...") cluster_utils.run_oc( - ["project", "openshift-lightspeed"], ignore_existing_resource=True + ["create", "sa", "test-user", "-n", namespace], + ignore_existing_resource=True, ) - cluster_utils.create_user("test-user", ignore_existing_resource=True) - cluster_utils.create_user("metrics-test-user", ignore_existing_resource=True) - token = cluster_utils.get_token_for("test-user") - metrics_token = cluster_utils.get_token_for("metrics-test-user") - print("created test service account users") - # grant the test service accounts permission to query ols and retrieve metrics + print("Ensuring 'metrics-test-user' service account exists...") + cluster_utils.run_oc( + ["create", "sa", "metrics-test-user", "-n", namespace], + ignore_existing_resource=True, + ) + + print("Granting access roles to service accounts...") cluster_utils.grant_sa_user_access("test-user", "lightspeed-operator-query-access") cluster_utils.grant_sa_user_access( "metrics-test-user", "lightspeed-operator-ols-metrics-reader" ) - print("test service account permissions granted") - # grant pod listing permission to test-user - to test the tools, - # more specifically the we need the test-user be able to see pods - # in the namespace + +def setup_rbac(namespace: str) -> None: + """Set up pod-reader role and binding. + + Args: + namespace: The Kubernetes namespace for RBAC configuration. + """ + print("Ensuring 'pod-reader' role and rolebinding exist...") cluster_utils.run_oc( [ "create", @@ -49,7 +56,8 @@ def create_and_config_sas() -> tuple[str, str]: "pod-reader", "--verb=get,list", "--resource=pods", - "--namespace=openshift-lightspeed", + "--namespace", + namespace, ], ignore_existing_resource=True, ) @@ -60,16 +68,111 @@ def create_and_config_sas() -> tuple[str, str]: "rolebinding", "test-user-pod-reader", "--role=pod-reader", - "--serviceaccount=openshift-lightspeed:test-user", - "--namespace=openshift-lightspeed", + f"--serviceaccount={namespace}:test-user", + "--namespace", + namespace, ], ignore_existing_resource=True, ) + print("RBAC setup verified.") + - print("Granted test-user permission to list pods.") +def get_service_account_tokens() -> tuple[str, str]: + """Get tokens for test service accounts. + Returns: + tuple containing token and metrics token. + """ + print("Fetching tokens for service accounts...") + token = cluster_utils.get_token_for("test-user") + metrics_token = cluster_utils.get_token_for("metrics-test-user") return token, metrics_token +def update_lcore_setting() -> None: + """Update the --use-lcore argument in the CSV if LCORE is enabled. + + Checks if LCORE environment variable is enabled and ensures the + --use-lcore argument in the ClusterServiceVersion is set to true. + """ + if os.getenv("LCORE", "False").lower() not in ("true", "1", "t"): + print("LCORE not enabled, skipping CSV update") + return + + print("LCORE enabled, checking CSV configuration...") + namespace = "openshift-lightspeed" + + # Get the CSV name + csv_name_result = cluster_utils.run_oc( + ["get", "csv", "-n", namespace, "-o", "name"] + ) + csv_full_name = csv_name_result.stdout.strip() + if not csv_full_name: + print("No CSV found in namespace, skipping LCORE update") + return + + csv_name = csv_full_name.replace("clusterserviceversion.operators.coreos.com/", "") + + # Get current args from the CSV + args_result = cluster_utils.run_oc( + [ + "get", + "csv", + csv_name, + "-n", + namespace, + "-o", + "json", + ] + ) + csv_data = json.loads(args_result.stdout) + args = csv_data["spec"]["install"]["spec"]["deployments"][0]["spec"]["template"][ + "spec" + ]["containers"][0]["args"] + + # Check if --use-lcore exists and its value + lcore_arg_index = None + lcore_value = None + for i, arg in enumerate(args): + if arg.startswith("--use-lcore="): + lcore_arg_index = i + lcore_value = arg.split("=", 1)[1] + break + + if lcore_arg_index is None: + print("--use-lcore argument not found in CSV") + return + + if lcore_value == "true": + print("--use-lcore already set to true, no update needed") + return + + print(f"--use-lcore is set to {lcore_value}, updating to true...") + + # Update the argument + patch = ( + f'[{{"op": "replace", "path": "/spec/install/spec/deployments/0/spec/' + f'template/spec/containers/0/args/{lcore_arg_index}", ' + f'"value": "--use-lcore=true"}}]' + ) + + cluster_utils.run_oc( + [ + "patch", + "csv", + csv_name, + "-n", + namespace, + "--type", + "json", + "-p", + patch, + ] + ) + cluster_utils.wait_for_running_pod( + name="lightspeed-operator-controller-manager", + namespace="openshift-lightspeed" + ) + print("--use-lcore updated to true successfully") def update_ols_config() -> None: """Create the ols config configmap with log and collector config for e2e tests. @@ -120,6 +223,30 @@ def update_ols_config() -> None: cluster_utils.run_oc(["apply", "-f", "-"], command=updated_configmap) +def setup_route() -> str: + """Set up route and return OLS URL. + + Returns: + The HTTPS URL for accessing the OLS service. + """ + try: + cluster_utils.run_oc(["delete", "route", "ols"], ignore_existing_resource=False) + except Exception: + print("No existing route to delete. Continuing...") + + print("Creating route for OLS access") + cluster_utils.run_oc( + ["create", "-f", "tests/config/operator_install/route.yaml"], + ignore_existing_resource=False, + ) + + url = cluster_utils.run_oc( + ["get", "route", "ols", "-o", "jsonpath='{.spec.host}'"] + ).stdout.strip("'") + + return f"https://{url}" + + def replace_ols_image(ols_image: str) -> None: """Replace the existing ols image with a new one. @@ -146,7 +273,7 @@ def replace_ols_image(ols_image: str) -> None: cluster_utils.run_oc( [ "scale", - "deployment/lightspeed-app-server", + "deployment/lightspeed-stack-deployment", "--replicas", "0", ] @@ -163,7 +290,7 @@ def replace_ols_image(ols_image: str) -> None: # update the OLS deployment to use the new image from CI/OLS_IMAGE env var patch = f"""[{{"op": "replace", "path": "/spec/template/spec/containers/0/image", "value":"{ols_image}"}}]""" # noqa: E501 cluster_utils.run_oc( - ["patch", "deployment/lightspeed-app-server", "--type", "json", "-p", patch] + ["patch", "deployment/lightspeed-stack-deployment", "--type", "json", "-p", patch] ) @@ -283,8 +410,10 @@ def install_ols() -> tuple[str, str, str]: # pylint: disable=R0915, R0912 # no cluster_utils.run_oc( ["project", "openshift-lightspeed"], ignore_existing_resource=True ) - token, metrics_token = create_and_config_sas() - + namespace = "openshift-lightspeed" + setup_service_accounts(namespace) + setup_rbac(namespace) + token, metrics_token = get_service_account_tokens() # wait for the operator to install # time.sleep(3) # not sure if it is needed but it fails sometimes r = retry_until_timeout_or_success( @@ -306,9 +435,10 @@ def install_ols() -> tuple[str, str, str]: # pylint: disable=R0915, R0912 # no print(msg) raise Exception(msg) print("Operator installed successfully") - + provider = os.getenv("PROVIDER", "openai") creds = os.getenv("PROVIDER_KEY_PATH", "empty") + update_lcore_setting() # create the llm api key secret ols will mount provider_list = provider.split() creds_list = creds.split() @@ -383,13 +513,13 @@ def install_ols() -> tuple[str, str, str]: # pylint: disable=R0915, R0912 # no [ "get", "deployment", - "lightspeed-app-server", + "lightspeed-stack-deployment", "--ignore-not-found", "-o", "name", ] ).stdout - == "deployment.apps/lightspeed-app-server\n", + == "deployment.apps/lightspeed-stack-deployment\n", "Waiting for OLS API server deployment to be created", ) if not r: @@ -428,7 +558,7 @@ def install_ols() -> tuple[str, str, str]: # pylint: disable=R0915, R0912 # no cluster_utils.run_oc( [ "scale", - "deployment/lightspeed-app-server", + "deployment/lightspeed-stack-deployment", "--replicas", "0", ] @@ -438,7 +568,7 @@ def install_ols() -> tuple[str, str, str]: # pylint: disable=R0915, R0912 # no cluster_utils.run_oc( [ "scale", - "deployment/lightspeed-app-server", + "deployment/lightspeed-stack-deployment", "--replicas", "1", ] @@ -465,7 +595,7 @@ def install_ols() -> tuple[str, str, str]: # pylint: disable=R0915, R0912 # no # expect it to be (must-gather will also collect this) print( cluster_utils.run_oc( - ["get", "deployment", "lightspeed-app-server", "-o", "yaml"] + ["get", "deployment", "lightspeed-stack-deployment", "-o", "yaml"] ).stdout ) print("-" * 50) @@ -484,22 +614,6 @@ def install_ols() -> tuple[str, str, str]: # pylint: disable=R0915, R0912 # no print(f"Warning: Could not configure exporter: {e}") print("Tests may experience interference from data collector") - try: - cluster_utils.run_oc( - [ - "delete", - "route", - "ols", - ], - ) - except subprocess.CalledProcessError: - print("No route exists, creating it.") - # create a route so tests can access OLS directly - cluster_utils.run_oc(["create", "-f", "tests/config/operator_install/route.yaml"]) - - url = cluster_utils.run_oc( - ["get", "route", "ols", "-o", "jsonpath='{.spec.host}'"] - ).stdout.strip("'") - ols_url = f"https://{url}" - wait_for_ols(ols_url) + # Set up route and get URL + ols_url = setup_route() return ols_url, token, metrics_token diff --git a/tests/e2e/utils/wait_for_ols.py b/tests/e2e/utils/wait_for_ols.py index d1547781c..90ad4a752 100644 --- a/tests/e2e/utils/wait_for_ols.py +++ b/tests/e2e/utils/wait_for_ols.py @@ -9,12 +9,15 @@ import requests from requests.exceptions import SSLError from urllib3.exceptions import InsecureRequestWarning +from tests.e2e.utils.constants import ( + BASIC_ENDPOINTS_TIMEOUT, +) warnings.filterwarnings("ignore", category=InsecureRequestWarning) # ruff: noqa: S501 -def wait_for_ols(url, timeout=300, interval=10): +def wait_for_ols(url, client, timeout=300, interval=10): """Wait for the OLS to become ready by checking its readiness endpoint. Args: @@ -30,14 +33,20 @@ def wait_for_ols(url, timeout=300, interval=10): for attempt in range(1, attempts + 1): print(f"Checking OLS readiness, attempt {attempt} of {attempts}") try: - response = requests.get(f"{url}/readiness", verify=True, timeout=5) + response = client.get( + "/readiness", + timeout=BASIC_ENDPOINTS_TIMEOUT + ) if response.status_code == requests.codes.ok: print("OLS is ready") return True except SSLError: print("SSL error detected, retrying without SSL verification") try: - response = requests.get(f"{url}/readiness", verify=False, timeout=5) + response = client.get( + "/readiness", + timeout=BASIC_ENDPOINTS_TIMEOUT + ) if response.status_code == requests.codes.ok: print("OLS is ready") return True diff --git a/tests/scripts/test-e2e-cluster.sh b/tests/scripts/test-e2e-cluster.sh index ca1d2474d..307a7b8c1 100755 --- a/tests/scripts/test-e2e-cluster.sh +++ b/tests/scripts/test-e2e-cluster.sh @@ -35,7 +35,7 @@ function run_suites() { # If changes are done in this file, please make sure they reflect in test-e2e-cluster-periodics.sh and test-evaluation.sh # runsuite arguments: - # suiteid test_tags provider provider_keypath model ols_image os_config_suffix + # suiteid test_tags provider provider_keypath model ols_image ols_config_suffix # empty test_tags means run all tests run_suite "azure_openai" "not certificates and not (tool_calling and not smoketest and not rag) and not byok1 and not byok2 and not quota_limits and not data_export" "azure_openai" "$AZUREOPENAI_PROVIDER_KEY_PATH" "gpt-4o-mini" "$OLS_IMAGE" "default" (( rc = rc || $? )) From 83fe27ae29187cde27c272672ae103ed9e8b96a4 Mon Sep 17 00:00:00 2001 From: Joao Fula Date: Tue, 27 Jan 2026 12:04:39 +0100 Subject: [PATCH 2/6] Adding more lcore skips and fixing some tests to pass with lcore --- tests/e2e/test_api.py | 6 ++- tests/e2e/test_attachments.py | 43 ++++++++++-------- tests/e2e/test_query_endpoint.py | 53 ++++++++++++++-------- tests/e2e/test_streaming_query_endpoint.py | 39 +++++++++++++--- tests/e2e/test_user_feedback.py | 6 ++- tests/e2e/utils/metrics.py | 4 +- 6 files changed, 103 insertions(+), 48 deletions(-) diff --git a/tests/e2e/test_api.py b/tests/e2e/test_api.py index e29bba33a..296f4ee15 100644 --- a/tests/e2e/test_api.py +++ b/tests/e2e/test_api.py @@ -135,7 +135,6 @@ def test_one_default_model_provider(): ), "one model and provider should be selected as default" -@pytest.mark.skip_with_lcore @pytest.mark.cluster def test_improper_token(): """Test accessing /v1/query endpoint using improper auth. token.""" @@ -145,7 +144,10 @@ def test_improper_token(): timeout=NON_LLM_REST_API_TIMEOUT, headers={"Authorization": "Bearer wrong-token"}, ) - assert response.status_code == requests.codes.forbidden + if os.getenv("LCORE", "False").lower() not in ("true", "1", "t"): + assert response.status_code == requests.codes.forbidden + else: + assert response.status_code == requests.codes.unauthorized @pytest.mark.skip_with_lcore diff --git a/tests/e2e/test_attachments.py b/tests/e2e/test_attachments.py index ea6baadef..352220a3e 100644 --- a/tests/e2e/test_attachments.py +++ b/tests/e2e/test_attachments.py @@ -6,6 +6,7 @@ import pytest import requests +import os from tests.e2e.utils import metrics as metrics_utils from tests.e2e.utils import response as response_utils @@ -202,7 +203,7 @@ def test_valid_question_with_wrong_attachment_format_field_of_different_type() - assert details["msg"] == "Input should be a valid string" assert details["type"] == "string_type" - +import ipdb @retry(max_attempts=3, wait_between_runs=10) def test_valid_question_with_wrong_attachment_format_unknown_attachment_type() -> None: """Check the REST API /v1/query with POST HTTP method using attachment with wrong type.""" @@ -231,16 +232,26 @@ def test_valid_question_with_wrong_attachment_format_unknown_attachment_type() - # the attachment should not be processed correctly assert response.status_code == requests.codes.unprocessable_entity - json_response = response.json() - expected_response = { - "detail": { - "response": "Invalid attribute value", - "cause": "Invalid attatchment type unknown_type: must be one of frozenset" \ - "({'alert', 'log', 'event', 'api object', 'error message', 'configuration', 'stack trace'})", + if os.getenv("LCORE", "False").lower() not in ("true", "1", "t"): + expected_response = { + "detail": { + "response": "Invalid attribute value", + "cause": "Invalid attatchment type unknown_type: must be one of frozenset" \ + "({'alert', 'log', 'event', 'api object', 'error message', 'configuration', 'stack trace'})", + } } - } - assert json_response == expected_response + assert json_response == expected_response + else: + assert "Invalid attribute value" in json_response["detail"]["response"] + assert "Invalid attatchment type unknown_type: must be one of frozenset" in json_response["detail"]["cause"] + assert "event" in json_response["detail"]["cause"] + assert "log" in json_response["detail"]["cause"] + assert "stack trace" in json_response["detail"]["cause"] + assert "alert" in json_response["detail"]["cause"] + assert "configuration" in json_response["detail"]["cause"] + assert "api object" in json_response["detail"]["cause"] + assert "error message" in json_response["detail"]["cause"] @retry(max_attempts=3, wait_between_runs=10) @@ -271,13 +282,9 @@ def test_valid_question_with_wrong_attachment_format_unknown_content_type() -> N # the attachment should not be processed correctly assert response.status_code == requests.codes.unprocessable_entity - json_response = response.json() - expected_response = { - "detail": { - "response": "Invalid attribute value", - "cause": "Invalid attatchment content type unknown/type: must be one of frozenset" \ - "({'application/json', 'application/xml', 'application/yaml', 'text/plain'})", - } - } - assert json_response == expected_response + assert "Invalid attribute value" in json_response["detail"]["response"] + assert "application/json" in json_response["detail"]["cause"] + assert "application/xml" in json_response["detail"]["cause"] + assert "application/yaml" in json_response["detail"]["cause"] + assert "text/plain" in json_response["detail"]["cause"] diff --git a/tests/e2e/test_query_endpoint.py b/tests/e2e/test_query_endpoint.py index f290eaf2c..d851de646 100644 --- a/tests/e2e/test_query_endpoint.py +++ b/tests/e2e/test_query_endpoint.py @@ -207,29 +207,42 @@ def test_too_long_question() -> None: assert json_response["detail"]["response"] == "Prompt is too long" -@pytest.mark.skip_with_lcore @pytest.mark.smoketest @pytest.mark.rag def test_valid_question() -> None: """Check the REST API /v1/query with POST HTTP method for valid question and no yaml.""" with metrics_utils.RestAPICallCounterChecker(pytest.metrics_client, QUERY_ENDPOINT): - cid = suid.get_suid() - response = pytest.client.post( - QUERY_ENDPOINT, - json={ - "conversation_id": cid, - "query": "what is kubernetes in the context of OpenShift?", - }, - timeout=test_api.LLM_REST_API_TIMEOUT, - ) - assert response.status_code == requests.codes.ok - - response_utils.check_content_type(response, "application/json") - print(vars(response)) - json_response = response.json() - - # checking a few major information from response - assert json_response["conversation_id"] == cid + if os.getenv("LCORE", "False").lower() not in ("true", "1", "t"): + cid = suid.get_suid() + response = pytest.client.post( + QUERY_ENDPOINT, + json={ + "conversation_id": cid, + "query": "what is kubernetes in the context of OpenShift?", + }, + timeout=test_api.LLM_REST_API_TIMEOUT, + ) + assert response.status_code == requests.codes.ok + + response_utils.check_content_type(response, "application/json") + print(vars(response)) + json_response = response.json() + + # checking a few major information from response + assert json_response["conversation_id"] == cid + else: + response = pytest.client.post( + QUERY_ENDPOINT, + json={ + "query": "what is kubernetes in the context of OpenShift?", + }, + timeout=test_api.LLM_REST_API_TIMEOUT, + ) + assert response.status_code == requests.codes.ok + + response_utils.check_content_type(response, "application/json") + print(vars(response)) + json_response = response.json() assert re.search( r"kubernetes|openshift", json_response["response"], @@ -474,6 +487,7 @@ def test_conversation_history() -> None: assert "ingress" in response_text, debug_msg +@pytest.mark.skip_with_lcore def test_query_with_provider_but_not_model() -> None: """Check the REST API /v1/query with POST HTTP method for provider specified, but no model.""" with metrics_utils.RestAPICallCounterChecker( @@ -503,6 +517,7 @@ def test_query_with_provider_but_not_model() -> None: ) +@pytest.mark.skip_with_lcore def test_query_with_model_but_not_provider() -> None: """Check the REST API /v1/query with POST HTTP method for model specified, but no provider.""" with metrics_utils.RestAPICallCounterChecker( @@ -531,6 +546,7 @@ def test_query_with_model_but_not_provider() -> None: ) +@pytest.mark.skip_with_lcore def test_query_with_unknown_provider() -> None: """Check the REST API /v1/query with POST HTTP method for unknown provider specified.""" # retrieve currently selected model @@ -568,6 +584,7 @@ def test_query_with_unknown_provider() -> None: ) +@pytest.mark.skip_with_lcore def test_query_with_unknown_model() -> None: """Check the REST API /v1/query with POST HTTP method for unknown model specified.""" # retrieve currently selected provider diff --git a/tests/e2e/test_streaming_query_endpoint.py b/tests/e2e/test_streaming_query_endpoint.py index c3a67790b..c8ff9c46c 100644 --- a/tests/e2e/test_streaming_query_endpoint.py +++ b/tests/e2e/test_streaming_query_endpoint.py @@ -6,6 +6,7 @@ import json import re +import os import pytest import requests @@ -53,6 +54,7 @@ def construct_response_from_streamed_events(events: dict) -> str: return response +@pytest.mark.skip_with_lcore def test_invalid_question(): """Check the endpoint POST method for invalid question.""" with metrics_utils.RestAPICallCounterChecker( @@ -98,7 +100,8 @@ def test_invalid_question_without_conversation_id(): # new conversation ID should be generated assert events[0]["event"] == "start" assert events[0]["data"] - assert suid.check_suid(events[0]["data"]["conversation_id"]) + if os.getenv("LCORE", "False").lower() not in ("true", "1", "t"): + assert suid.check_suid(events[0]["data"]["conversation_id"]) def test_query_call_without_payload(): @@ -139,6 +142,7 @@ def test_query_call_with_improper_payload(): assert "missing" in response.text +@pytest.mark.skip_with_lcore def test_valid_question_improper_conversation_id() -> None: """Check the endpoint with POST HTTP method for improper conversation ID.""" with metrics_utils.RestAPICallCounterChecker( @@ -163,6 +167,7 @@ def test_valid_question_improper_conversation_id() -> None: assert json_response == expected_response +@pytest.mark.skip_with_lcore def test_too_long_question() -> None: """Check the endpoint with too long question.""" # let's make the query really large, larger that context window size @@ -200,11 +205,24 @@ def test_valid_question() -> None: with metrics_utils.RestAPICallCounterChecker( pytest.metrics_client, STREAMING_QUERY_ENDPOINT ): - cid = suid.get_suid() - response = post_with_defaults( - STREAMING_QUERY_ENDPOINT, - json={"conversation_id": cid, "query": "what is kubernetes?"}, - ) + if os.getenv("LCORE", "False").lower() not in ("true", "1", "t"): + cid = suid.get_suid() + response = pytest.client.post( + STREAMING_QUERY_ENDPOINT, + json={ + "conversation_id": cid, + "query": "what is kubernetes in the context of OpenShift?", + }, + timeout=test_api.LLM_REST_API_TIMEOUT, + ) + else: + response = pytest.client.post( + STREAMING_QUERY_ENDPOINT, + json={ + "query": "what is kubernetes in the context of OpenShift?", + }, + timeout=test_api.LLM_REST_API_TIMEOUT, + ) assert response.status_code == requests.codes.ok response_utils.check_content_type(response, constants.MEDIA_TYPE_TEXT) @@ -244,6 +262,7 @@ def test_ocp_docs_version_same_as_cluster_version() -> None: ) +@pytest.mark.skip_with_lcore def test_valid_question_tokens_counter() -> None: """Check how the tokens counter are updated accordingly.""" model, provider = metrics_utils.get_enabled_model_and_provider( @@ -264,6 +283,7 @@ def test_valid_question_tokens_counter() -> None: response_utils.check_content_type(response, constants.MEDIA_TYPE_TEXT) +@pytest.mark.skip_with_lcore def test_invalid_question_tokens_counter() -> None: """Check how the tokens counter are updated accordingly.""" model, provider = metrics_utils.get_enabled_model_and_provider( @@ -284,6 +304,7 @@ def test_invalid_question_tokens_counter() -> None: response_utils.check_content_type(response, constants.MEDIA_TYPE_TEXT) +@pytest.mark.skip_with_lcore def test_token_counters_for_query_call_without_payload() -> None: """Check how the tokens counter are updated accordingly.""" model, provider = metrics_utils.get_enabled_model_and_provider( @@ -311,6 +332,7 @@ def test_token_counters_for_query_call_without_payload() -> None: response_utils.check_content_type(response, constants.MEDIA_TYPE_JSON) +@pytest.mark.skip_with_lcore def test_token_counters_for_query_call_with_improper_payload() -> None: """Check how the tokens counter are updated accordingly.""" model, provider = metrics_utils.get_enabled_model_and_provider( @@ -372,6 +394,7 @@ def test_rag_question() -> None: assert len(set(docs_urls)) == len(docs_urls) +@pytest.mark.skip_with_lcore @pytest.mark.cluster def test_query_filter() -> None: """Ensure responses does not include filtered words and redacted words are not logged.""" @@ -463,6 +486,7 @@ def test_conversation_history() -> None: assert "ingress" in response_text, scenario_fail_msg +@pytest.mark.skip_with_lcore def test_query_with_provider_but_not_model() -> None: """Check the endpoint with POST HTTP method for provider specified, but no model.""" with metrics_utils.RestAPICallCounterChecker( @@ -491,6 +515,7 @@ def test_query_with_provider_but_not_model() -> None: ) +@pytest.mark.skip_with_lcore def test_query_with_model_but_not_provider() -> None: """Check the endpoint with POST HTTP method for model specified, but no provider.""" with metrics_utils.RestAPICallCounterChecker( @@ -518,6 +543,7 @@ def test_query_with_model_but_not_provider() -> None: ) +@pytest.mark.skip_with_lcore def test_query_with_unknown_provider() -> None: """Check the endpoint with POST HTTP method for unknown provider specified.""" # retrieve currently selected model @@ -554,6 +580,7 @@ def test_query_with_unknown_provider() -> None: ) +@pytest.mark.skip_with_lcore def test_query_with_unknown_model() -> None: """Check the endpoint with POST HTTP method for unknown model specified.""" # retrieve currently selected provider diff --git a/tests/e2e/test_user_feedback.py b/tests/e2e/test_user_feedback.py index ed3a4c9a8..6d07eea46 100644 --- a/tests/e2e/test_user_feedback.py +++ b/tests/e2e/test_user_feedback.py @@ -6,6 +6,7 @@ import pytest import requests +import os from tests.e2e.utils import cluster as cluster_utils from tests.e2e.utils import response as response_utils @@ -27,7 +28,10 @@ def test_feedback_can_post_with_wrong_token(): timeout=test_api.BASIC_ENDPOINTS_TIMEOUT, headers={"Authorization": "Bearer wrong-token"}, ) - assert response.status_code == requests.codes.forbidden + if os.getenv("LCORE", "False").lower() not in ("true", "1", "t"): + assert response.status_code == requests.codes.forbidden + else: + assert response.status_code == requests.codes.unauthorized @pytest.mark.data_export diff --git a/tests/e2e/utils/metrics.py b/tests/e2e/utils/metrics.py index 5d3efd986..0fdd97712 100644 --- a/tests/e2e/utils/metrics.py +++ b/tests/e2e/utils/metrics.py @@ -92,12 +92,10 @@ def get_metric_labels(lines, info_node_name, value=None) -> dict: # info node was not found return {} -import ipdb + def get_enabled_model_and_provider(client): """Read configured model and provider from metrics.""" - ipdb.set_trace() - if os.getenv("LCORE", 'False').lower() in ('true', '1', 't'): response = client.get("/v1/models", timeout=BASIC_ENDPOINTS_TIMEOUT) assert response.status_code == requests.codes.ok From 1aa514c85aa90d1ddfd223800f916479bb4d2a42 Mon Sep 17 00:00:00 2001 From: Joao Fula Date: Tue, 27 Jan 2026 13:06:29 +0100 Subject: [PATCH 3/6] fixing linting Removing bundle sync configuration from console tests. Just run the tests in main. Removing bundle sync configuration from console tests. Just run the tests in main. Removing bundle sync configuration from console tests. Just run the tests in main. Removing bundle sync configuration from console tests. Just run the tests in main. Adding obs owners for access to cluster pool logs Adding obs owners for access to cluster pool logs Adding obs owners for access to cluster pool logs --- tests/e2e/test_api.py | 45 ++++++++++++++-------- tests/e2e/test_attachments.py | 34 ++++++++++------ tests/e2e/test_query_endpoint.py | 15 +++----- tests/e2e/test_streaming_query_endpoint.py | 2 +- tests/e2e/test_user_feedback.py | 6 ++- tests/e2e/utils/adapt_ols_config.py | 23 ++++++----- tests/e2e/utils/cluster.py | 10 ++--- tests/e2e/utils/constants.py | 7 +++- tests/e2e/utils/data_collector_control.py | 25 +++++++----- tests/e2e/utils/metrics.py | 28 +++++--------- tests/e2e/utils/ols_installer.py | 37 ++++++++++-------- tests/e2e/utils/wait_for_ols.py | 13 ++----- 12 files changed, 138 insertions(+), 107 deletions(-) diff --git a/tests/e2e/test_api.py b/tests/e2e/test_api.py index 296f4ee15..eda76b0da 100644 --- a/tests/e2e/test_api.py +++ b/tests/e2e/test_api.py @@ -5,9 +5,9 @@ # pyright: reportAttributeAccessIssue=false import json +import os import re import time -import os import pytest import requests @@ -25,9 +25,9 @@ CONVERSATION_ID, LLM_REST_API_TIMEOUT, NON_LLM_REST_API_TIMEOUT, + OLS_SERVICE_DEPLOYMENT, OLS_USER_DATA_COLLECTION_INTERVAL_SHORT, OLS_USER_DATA_PATH, - OLS_SERVICE_DEPLOYMENT ) from tests.e2e.utils.data_collector_control import prepare_for_data_collection_test from tests.e2e.utils.decorators import retry @@ -38,7 +38,6 @@ ) - @pytest.fixture(name="postgres_connection", scope="module") def fixture_postgres_connection(): """Fixture with Postgres connection.""" @@ -51,13 +50,20 @@ def test_readiness(): """Test handler for /readiness REST API endpoint.""" endpoint = "/readiness" with metrics_utils.RestAPICallCounterChecker(pytest.metrics_client, endpoint): - response = pytest.client.get( - endpoint, - timeout=LLM_REST_API_TIMEOUT - ) + response = pytest.client.get(endpoint, timeout=LLM_REST_API_TIMEOUT) assert response.status_code == requests.codes.ok response_utils.check_content_type(response, "application/json") - assert response.json() == {"ready": True, "reason": "All providers are healthy", "providers": []} + if os.getenv("LCORE", "False").lower() in ("true", "1", "t"): + assert response.json() == { + "ready": True, + "reason": "All providers are healthy", + "providers": [], + } + else: + assert response.json() == { + "ready": True, + "reason": "service is ready", + } @pytest.mark.smoketest @@ -66,10 +72,7 @@ def test_liveness(): """Test handler for /liveness REST API endpoint.""" endpoint = "/liveness" with metrics_utils.RestAPICallCounterChecker(pytest.metrics_client, endpoint): - response = pytest.client.get( - endpoint, - timeout=BASIC_ENDPOINTS_TIMEOUT - ) + response = pytest.client.get(endpoint, timeout=BASIC_ENDPOINTS_TIMEOUT) assert response.status_code == requests.codes.ok response_utils.check_content_type(response, "application/json") assert response.json() == {"alive": True} @@ -173,7 +176,7 @@ def test_transcripts_storing_cluster(): """Test if the transcripts are stored properly.""" transcripts_path = OLS_USER_DATA_PATH + "/transcripts" cluster_utils.wait_for_running_pod() - pod_name = cluster_utils.get_pod_by_prefix()[0] + pod_name = cluster_utils.get_pod_by_prefix(OLS_SERVICE_DEPLOYMENT)[0] # there are multiple tests running agains cluster, so transcripts # can be already present - we need to ensure the storage is empty @@ -239,6 +242,7 @@ def test_openapi_endpoint(): response = pytest.client.get("/openapi.json", timeout=BASIC_ENDPOINTS_TIMEOUT) assert response.status_code == requests.codes.ok response_utils.check_content_type(response, "application/json") + payload = response.json() assert payload is not None, "Incorrect response" @@ -251,7 +255,7 @@ def test_openapi_endpoint(): # check application description info = payload["info"] assert "description" in info, "Service description not provided" - assert "Lightspeed Core Service (LCS) service API specification" in info["description"] + assert f"{metadata.SERVICE_NAME} service API specification" in info["description"] # elementary check that all mandatory endpoints are covered paths = payload["paths"] @@ -352,9 +356,11 @@ def filter_logs(logs: str, last_log_line: str) -> str: def get_last_log_line(logs: str) -> str: return [line for line in logs.split("\n") if line][-1] + # Prepare: patch to manual mode, set short interval, configure stage ingress controller = prepare_for_data_collection_test( - short_interval_seconds=OLS_USER_DATA_COLLECTION_INTERVAL_SHORT + client=pytest.client, + short_interval_seconds=OLS_USER_DATA_COLLECTION_INTERVAL_SHORT, ) data_collection_container_name = "lightspeed-to-dataverse-exporter" @@ -434,7 +440,7 @@ def test_http_header_redaction(): assert response.json() == {"alive": True} container_log = cluster_utils.get_container_log( - cluster_utils.get_pod_by_prefix()[0], "lightspeed-stack" + cluster_utils.get_pod_by_prefix()[0], "lightspeed-service-api" ) for header in HTTP_REQUEST_HEADERS_TO_REDACT: @@ -550,10 +556,15 @@ def update_olsconfig(limiters: list[dict]): updated_configmap = yaml.dump(configmap) cluster_utils.run_oc(["delete", "configmap", configmap_name]) cluster_utils.run_oc(["apply", "-f", "-"], command=updated_configmap) - + @pytest.fixture def turn_off_operator_pod(): + """Turn off operator pod fixture. + + Turn off operator pod to modify lightspeed-stack + without waiting for lightspeed service pod to restart. + """ cluster_utils.run_oc( [ "scale", diff --git a/tests/e2e/test_attachments.py b/tests/e2e/test_attachments.py index 352220a3e..842ddcd20 100644 --- a/tests/e2e/test_attachments.py +++ b/tests/e2e/test_attachments.py @@ -4,9 +4,10 @@ # properly by linters # pyright: reportAttributeAccessIssue=false +import os + import pytest import requests -import os from tests.e2e.utils import metrics as metrics_utils from tests.e2e.utils import response as response_utils @@ -203,7 +204,7 @@ def test_valid_question_with_wrong_attachment_format_field_of_different_type() - assert details["msg"] == "Input should be a valid string" assert details["type"] == "string_type" -import ipdb + @retry(max_attempts=3, wait_between_runs=10) def test_valid_question_with_wrong_attachment_format_unknown_attachment_type() -> None: """Check the REST API /v1/query with POST HTTP method using attachment with wrong type.""" @@ -236,15 +237,17 @@ def test_valid_question_with_wrong_attachment_format_unknown_attachment_type() - if os.getenv("LCORE", "False").lower() not in ("true", "1", "t"): expected_response = { "detail": { - "response": "Invalid attribute value", - "cause": "Invalid attatchment type unknown_type: must be one of frozenset" \ - "({'alert', 'log', 'event', 'api object', 'error message', 'configuration', 'stack trace'})", + "response": "Unable to process this request", + "cause": "Attachment with improper type unknown_type detected", } } assert json_response == expected_response else: assert "Invalid attribute value" in json_response["detail"]["response"] - assert "Invalid attatchment type unknown_type: must be one of frozenset" in json_response["detail"]["cause"] + assert ( + "Invalid attatchment type unknown_type: must be one of frozenset" + in json_response["detail"]["cause"] + ) assert "event" in json_response["detail"]["cause"] assert "log" in json_response["detail"]["cause"] assert "stack trace" in json_response["detail"]["cause"] @@ -283,8 +286,17 @@ def test_valid_question_with_wrong_attachment_format_unknown_content_type() -> N # the attachment should not be processed correctly assert response.status_code == requests.codes.unprocessable_entity json_response = response.json() - assert "Invalid attribute value" in json_response["detail"]["response"] - assert "application/json" in json_response["detail"]["cause"] - assert "application/xml" in json_response["detail"]["cause"] - assert "application/yaml" in json_response["detail"]["cause"] - assert "text/plain" in json_response["detail"]["cause"] + if os.getenv("LCORE", "False").lower() not in ("true", "1", "t"): + expected_response = { + "detail": { + "response": "Unable to process this request", + "cause": "Attachment with improper content type unknown/type detected", + } + } + assert json_response == expected_response + else: + assert "Invalid attribute value" in json_response["detail"]["response"] + assert "application/json" in json_response["detail"]["cause"] + assert "application/xml" in json_response["detail"]["cause"] + assert "application/yaml" in json_response["detail"]["cause"] + assert "text/plain" in json_response["detail"]["cause"] diff --git a/tests/e2e/test_query_endpoint.py b/tests/e2e/test_query_endpoint.py index d851de646..a7eb4ad10 100644 --- a/tests/e2e/test_query_endpoint.py +++ b/tests/e2e/test_query_endpoint.py @@ -4,8 +4,8 @@ # properly by linters # pyright: reportAttributeAccessIssue=false -import re import os +import re import pytest import requests @@ -19,13 +19,11 @@ from . import test_api QUERY_ENDPOINT = "/v1/query" -import ipdb + @pytest.mark.skip_with_lcore def test_invalid_question(): """Check the REST API /v1/query with POST HTTP method for invalid question.""" - ipdb.set_trace() - with metrics_utils.RestAPICallCounterChecker(pytest.metrics_client, QUERY_ENDPOINT): cid = suid.get_suid() response = pytest.client.post( @@ -429,7 +427,7 @@ def test_query_filter() -> None: assert "bar" not in response_words # Retrieve the pod name - ols_container_name = "lightspeed-stack-deployment" + ols_container_name = "lightspeed-service-api" pod_name = cluster_utils.get_pod_by_prefix()[0] # Check if filtered words are redacted in the logs @@ -513,7 +511,7 @@ def test_query_with_provider_but_not_model() -> None: # error thrown on Pydantic level assert ( json_response["detail"][0]["msg"] - == "Value error, Model must be specified if provider is specified" + == "Value error, LLM model must be specified when the provider is specified." ) @@ -542,7 +540,7 @@ def test_query_with_model_but_not_provider() -> None: assert ( json_response["detail"][0]["msg"] - == "Value error, Provider must be specified if model is specified" + == "Value error, LLM provider must be specified when the model is specified." ) @@ -654,11 +652,10 @@ def test_tool_calling() -> None: # Special check for granite assert not json_response["response"].strip().startswith("") -import ipdb + @pytest.mark.byok1 def test_rag_question_byok1() -> None: """Ensure response include expected top rag reference.""" - ipdb.set_trace() with metrics_utils.RestAPICallCounterChecker(pytest.metrics_client, QUERY_ENDPOINT): response = pytest.client.post( QUERY_ENDPOINT, diff --git a/tests/e2e/test_streaming_query_endpoint.py b/tests/e2e/test_streaming_query_endpoint.py index c8ff9c46c..8718de044 100644 --- a/tests/e2e/test_streaming_query_endpoint.py +++ b/tests/e2e/test_streaming_query_endpoint.py @@ -5,8 +5,8 @@ # pyright: reportAttributeAccessIssue=false import json -import re import os +import re import pytest import requests diff --git a/tests/e2e/test_user_feedback.py b/tests/e2e/test_user_feedback.py index 6d07eea46..bbc584dff 100644 --- a/tests/e2e/test_user_feedback.py +++ b/tests/e2e/test_user_feedback.py @@ -4,12 +4,14 @@ # properly by linters # pyright: reportAttributeAccessIssue=false +import os + import pytest import requests -import os from tests.e2e.utils import cluster as cluster_utils from tests.e2e.utils import response as response_utils +from tests.e2e.utils.constants import OLS_SERVICE_DEPLOYMENT from . import test_api @@ -38,7 +40,7 @@ def test_feedback_can_post_with_wrong_token(): def test_feedback_storing_cluster(): """Test if the feedbacks are stored properly.""" feedbacks_path = test_api.OLS_USER_DATA_PATH + "/feedback" - pod_name = cluster_utils.get_pod_by_prefix()[0] + pod_name = cluster_utils.get_pod_by_prefix(OLS_SERVICE_DEPLOYMENT)[0] # there are multiple tests running agains cluster, so transcripts # can be already present - we need to ensure the storage is empty diff --git a/tests/e2e/utils/adapt_ols_config.py b/tests/e2e/utils/adapt_ols_config.py index 548899c47..8a943030c 100644 --- a/tests/e2e/utils/adapt_ols_config.py +++ b/tests/e2e/utils/adapt_ols_config.py @@ -3,16 +3,16 @@ Handles multi-provider test scenarios dynamically. """ -import json import os import time import yaml from ols.constants import DEFAULT_CONFIGURATION_FILE +from tests.e2e.utils import client as client_utils from tests.e2e.utils import cluster as cluster_utils +from tests.e2e.utils.constants import OLS_SERVICE_DEPLOYMENT from tests.e2e.utils.data_collector_control import configure_exporter_for_e2e_tests -from tests.e2e.utils.retry import retry_until_timeout_or_success from tests.e2e.utils.ols_installer import ( create_secrets, get_service_account_tokens, @@ -22,7 +22,7 @@ update_lcore_setting, update_ols_config, ) -from tests.e2e.utils.constants import OLS_SERVICE_DEPLOYMENT +from tests.e2e.utils.retry import retry_until_timeout_or_success def apply_olsconfig(provider_list: list[str]) -> None: @@ -115,7 +115,7 @@ def wait_for_deployment() -> None: ) print("Waiting for pods to be ready...") - cluster_utils.wait_for_running_pod() + cluster_utils.wait_for_running_pod(name=OLS_SERVICE_DEPLOYMENT) def adapt_ols_config() -> tuple[str, str, str]: # pylint: disable=R0915 @@ -269,10 +269,19 @@ def adapt_ols_config() -> tuple[str, str, str]: # pylint: disable=R0915 except Exception as e: print(f"Warning: Could not ensure pod-reader role/binding: {e}") + # Fetch tokens for service accounts + token, metrics_token = get_service_account_tokens() + + # Set up route and get URL + ols_url = setup_route() + # Configure exporter for e2e tests with proper settings try: print("Configuring exporter for e2e tests...") + # Create client for the exporter configuration + test_client = client_utils.get_http_client(ols_url, token) configure_exporter_for_e2e_tests( + client=test_client, interval_seconds=3600, # 1 hour to prevent interference ingress_env="stage", log_level="DEBUG", @@ -283,12 +292,6 @@ def adapt_ols_config() -> tuple[str, str, str]: # pylint: disable=R0915 print(f"Warning: Could not configure exporter: {e}") print("Tests may experience interference from data collector") - # Fetch tokens for service accounts - token, metrics_token = get_service_account_tokens() - - # Set up route and get URL - ols_url = setup_route() - print("OLS configuration and access setup completed successfully.") return ols_url, token, metrics_token diff --git a/tests/e2e/utils/cluster.py b/tests/e2e/utils/cluster.py index d55432b90..2785d18f4 100644 --- a/tests/e2e/utils/cluster.py +++ b/tests/e2e/utils/cluster.py @@ -181,7 +181,7 @@ def get_running_pods(namespace: str = "openshift-lightspeed") -> list[str]: def get_pod_by_prefix( - prefix: str = "lightspeed-stack-deployment-", + prefix: str = "lightspeed-app-server-", namespace: str = "openshift-lightspeed", fail_not_found: bool = True, ) -> list[str]: @@ -333,7 +333,7 @@ def get_container_ready_status(pod: str, namespace: str = "openshift-lightspeed" def wait_for_running_pod( - name: str = "lightspeed-stack-deployment-", namespace: str = "openshift-lightspeed" + name: str = "lightspeed-app-server-", namespace: str = "openshift-lightspeed" ): """Wait for the selected pod to be in running state.""" r = retry_until_timeout_or_success( @@ -362,10 +362,10 @@ def wait_for_running_pod( get_pod_by_prefix(prefix=name, namespace=namespace, fail_not_found=False) ) == 1, - "Waiting for service pod in running state", + f"Waiting for {name} pod in running state", ) if not r: - raise Exception("Timed out waiting for new OLS pod to be ready") + raise Exception("Timed out waiting for {name} pod to be ready") def pod_has_containers_ready(): pods = get_pod_by_prefix(prefix=name, namespace=namespace, fail_not_found=False) @@ -401,7 +401,7 @@ def pod_has_containers_ready(): def get_certificate_secret_name( - name: str = "lightspeed-stack-deployment", namespace: str = "openshift-lightspeed" + name: str = "lightspeed-app-server", namespace: str = "openshift-lightspeed" ) -> str: """Get the name of the certificates secret for the service.""" try: diff --git a/tests/e2e/utils/constants.py b/tests/e2e/utils/constants.py index 7af47e09b..fe246777e 100644 --- a/tests/e2e/utils/constants.py +++ b/tests/e2e/utils/constants.py @@ -1,4 +1,5 @@ """Constants for end-to-end tests.""" + import os # timeout settings @@ -20,4 +21,8 @@ 5 # 5 seconds - used only in data collection test ) -OLS_SERVICE_DEPLOYMENT = "lightspeed-stack-deployment" if os.getenv("LCORE", 'False').lower() in ('true', '1', 't') else "lightspeed-app-server" +OLS_SERVICE_DEPLOYMENT = ( + "lightspeed-stack-deployment" + if os.getenv("LCORE", "False").lower() in ("true", "1", "t") + else "lightspeed-app-server" +) diff --git a/tests/e2e/utils/data_collector_control.py b/tests/e2e/utils/data_collector_control.py index 1c45b8650..dfcc6d808 100644 --- a/tests/e2e/utils/data_collector_control.py +++ b/tests/e2e/utils/data_collector_control.py @@ -11,7 +11,7 @@ import yaml from tests.e2e.utils import cluster as cluster_utils -from tests.e2e.utils.constants import OLS_USER_DATA_PATH +from tests.e2e.utils.constants import OLS_SERVICE_DEPLOYMENT, OLS_USER_DATA_PATH from tests.e2e.utils.wait_for_ols import wait_for_ols # Exporter config map constants @@ -168,7 +168,7 @@ def set_exporter_collection_interval(self, interval_seconds: int) -> None: cluster_utils.run_oc( [ "scale", - "deployment/lightspeed-stack-deployment", + f"deployment/{OLS_SERVICE_DEPLOYMENT}", "-n", EXPORTER_NAMESPACE, "--replicas=0", @@ -203,13 +203,14 @@ def set_exporter_collection_interval(self, interval_seconds: int) -> None: time.sleep(5) def restart_exporter_container( - self, container_name: str = "lightspeed-to-dataverse-exporter" + self, client, container_name: str = "lightspeed-to-dataverse-exporter" ) -> None: """Restart the exporter by scaling deployment back up. The deployment controller will create a new pod with the updated config. Args: + client: httpx Client instance for making API calls. container_name: Name of the exporter container (for verification). """ try: @@ -217,7 +218,7 @@ def restart_exporter_container( cluster_utils.run_oc( [ "scale", - "deployment/lightspeed-stack-deployment", + f"deployment/{OLS_SERVICE_DEPLOYMENT}", "-n", EXPORTER_NAMESPACE, "--replicas=1", @@ -249,7 +250,9 @@ def restart_exporter_container( # Wait for OLS API to be ready (not just pod running) print("Waiting for OLS API to be ready...") ols_url = cluster_utils.get_ols_url("ols") - if not wait_for_ols(ols_url, timeout=120, interval=5): + if not wait_for_ols( + ols_url, client=client, timeout=120, interval=5 + ): print("Warning: OLS readiness check timed out") else: print("OLS API is ready") @@ -310,6 +313,7 @@ def _verify_config_applied( def configure_exporter_for_e2e_tests( + client, interval_seconds: int = 3600, ingress_env: str = "stage", cp_offline_token: str | None = None, @@ -319,6 +323,7 @@ def configure_exporter_for_e2e_tests( """Configure exporter for e2e tests with proper settings. Args: + client: httpx Client instance for making API calls. interval_seconds: Collection interval (default: 3600 = 1 hour). ingress_env: Ingress environment - "stage" or "prod" (default: "stage"). cp_offline_token: Auth token for ingress server (required for stage). @@ -344,7 +349,7 @@ def configure_exporter_for_e2e_tests( ingress_server_auth_token=cp_offline_token or None, log_level=log_level, ) - controller.restart_exporter_container() + controller.restart_exporter_container(client) def patch_exporter_mode_to_manual() -> None: @@ -379,7 +384,7 @@ def patch_exporter_mode_to_manual() -> None: cluster_utils.run_oc( [ "patch", - "deployment/lightspeed-stack-deployment", + f"deployment/{OLS_SERVICE_DEPLOYMENT}", "-n", EXPORTER_NAMESPACE, "--type=json", @@ -391,6 +396,7 @@ def patch_exporter_mode_to_manual() -> None: def prepare_for_data_collection_test( + client, short_interval_seconds: int = 5, ) -> DataCollectorControl: """Prepare the environment for testing data collection. @@ -401,6 +407,7 @@ def prepare_for_data_collection_test( - No cleanup needed (operator will reconcile when it runs next) Args: + client: httpx Client instance for making API calls. short_interval_seconds: Collection interval for testing (default: 5s). Returns: @@ -429,7 +436,7 @@ def prepare_for_data_collection_test( cluster_utils.run_oc( [ "scale", - "deployment/lightspeed-stack-deployment", + f"deployment/{OLS_SERVICE_DEPLOYMENT}", "-n", EXPORTER_NAMESPACE, "--replicas=0", @@ -468,7 +475,7 @@ def prepare_for_data_collection_test( patch_exporter_mode_to_manual() # Scale up and wait for pod - controller.restart_exporter_container() + controller.restart_exporter_container(client) # Wait for first collection cycle wait_time = short_interval_seconds + 3 diff --git a/tests/e2e/utils/metrics.py b/tests/e2e/utils/metrics.py index 0fdd97712..fb2968343 100644 --- a/tests/e2e/utils/metrics.py +++ b/tests/e2e/utils/metrics.py @@ -1,7 +1,6 @@ """Utilities for reading and checking metrics from REST API.""" import requests -import os from tests.e2e.utils.constants import BASIC_ENDPOINTS_TIMEOUT @@ -23,7 +22,7 @@ def get_rest_api_counter_value( ): """Retrieve counter value from metrics.""" response = read_metrics(client) - counter_name = "ls_rest_api_calls_total" + counter_name = "ols_rest_api_calls_total" # counters with labels have the following format: # rest_api_calls_total{path="/openapi.json",status_code="200"} 1.0 @@ -35,7 +34,7 @@ def get_rest_api_counter_value( def get_response_duration_seconds_value(client, path, default=None): """Retrieve counter value from metrics.""" response = read_metrics(client) - counter_name = "ls_response_duration_seconds_sum" + counter_name = "ols_response_duration_seconds_sum" # counters with response durations have the following format: # response_duration_seconds_sum{path="/v1/query"} 0.123 @@ -95,16 +94,9 @@ def get_metric_labels(lines, info_node_name, value=None) -> dict: def get_enabled_model_and_provider(client): """Read configured model and provider from metrics.""" - - if os.getenv("LCORE", 'False').lower() in ('true', '1', 't'): - response = client.get("/v1/models", timeout=BASIC_ENDPOINTS_TIMEOUT) - assert response.status_code == requests.codes.ok - assert response.text is not None - - else: - response = read_metrics(client) - lines = [line.strip() for line in response.split("\n")] - labels = get_metric_labels(lines, "ls_provider_model_configuration", "1.0") + response = read_metrics(client) + lines = [line.strip() for line in response.split("\n")] + labels = get_metric_labels(lines, "ols_provider_model_configuration", "1.0") return labels["model"], labels["provider"] @@ -112,7 +104,7 @@ def get_enabled_model_and_provider(client): def get_enable_status_for_all_models(client): """Read states about all model and providers.""" response = read_metrics(client) - counters = get_all_metric_counters(response, "ls_provider_model_configuration") + counters = get_all_metric_counters(response, "ols_provider_model_configuration") return [counter == 1.0 for counter in counters] @@ -244,14 +236,14 @@ def __enter__(self): return self.old_counter_token_sent_total = get_model_provider_counter_value( self.client, - "ls_llm_token_sent_total", + "ols_llm_token_sent_total", self.model, self.provider, default=0, ) self.old_counter_token_received_total = get_model_provider_counter_value( self.client, - "ls_llm_token_received_total", + "ols_llm_token_received_total", self.model, self.provider, default=0, @@ -263,7 +255,7 @@ def __exit__(self, exc_type, exc_value, exc_tb): return # check if counter for sent tokens has been updated new_counter_token_sent_total = get_model_provider_counter_value( - self.client, "ls_llm_token_sent_total", self.model, self.provider + self.client, "ols_llm_token_sent_total", self.model, self.provider ) check_token_counter_increases( "sent", @@ -275,7 +267,7 @@ def __exit__(self, exc_type, exc_value, exc_tb): # check if counter for received tokens has been updated new_counter_token_received_total = get_model_provider_counter_value( self.client, - "ls_llm_token_received_total", + "ols_llm_token_received_total", self.model, self.provider, default=0, diff --git a/tests/e2e/utils/ols_installer.py b/tests/e2e/utils/ols_installer.py index e29624fba..7896ad13d 100644 --- a/tests/e2e/utils/ols_installer.py +++ b/tests/e2e/utils/ols_installer.py @@ -7,16 +7,18 @@ import yaml from ols.constants import DEFAULT_CONFIGURATION_FILE +from tests.e2e.utils import client as client_utils from tests.e2e.utils import cluster as cluster_utils +from tests.e2e.utils.constants import OLS_SERVICE_DEPLOYMENT from tests.e2e.utils.data_collector_control import configure_exporter_for_e2e_tests from tests.e2e.utils.retry import retry_until_timeout_or_success -from tests.e2e.utils.wait_for_ols import wait_for_ols OC_COMMAND_RETRY_COUNT = 120 OC_COMMAND_RETRY_DELAY = 5 disconnected = os.getenv("DISCONNECTED", "") + def setup_service_accounts(namespace: str) -> None: """Set up service accounts and access roles. @@ -88,6 +90,7 @@ def get_service_account_tokens() -> tuple[str, str]: metrics_token = cluster_utils.get_token_for("metrics-test-user") return token, metrics_token + def update_lcore_setting() -> None: """Update the --use-lcore argument in the CSV if LCORE is enabled. @@ -169,11 +172,11 @@ def update_lcore_setting() -> None: ] ) cluster_utils.wait_for_running_pod( - name="lightspeed-operator-controller-manager", - namespace="openshift-lightspeed" + name="lightspeed-operator-controller-manager", namespace="openshift-lightspeed" ) print("--use-lcore updated to true successfully") + def update_ols_config() -> None: """Create the ols config configmap with log and collector config for e2e tests. @@ -219,7 +222,6 @@ def update_ols_config() -> None: configmap["data"][DEFAULT_CONFIGURATION_FILE] = yaml.dump(olsconfig) updated_configmap = yaml.dump(configmap) - cluster_utils.run_oc(["delete", "configmap", "olsconfig"]) cluster_utils.run_oc(["apply", "-f", "-"], command=updated_configmap) @@ -273,7 +275,7 @@ def replace_ols_image(ols_image: str) -> None: cluster_utils.run_oc( [ "scale", - "deployment/lightspeed-stack-deployment", + f"deployment/{OLS_SERVICE_DEPLOYMENT}", "--replicas", "0", ] @@ -290,7 +292,7 @@ def replace_ols_image(ols_image: str) -> None: # update the OLS deployment to use the new image from CI/OLS_IMAGE env var patch = f"""[{{"op": "replace", "path": "/spec/template/spec/containers/0/image", "value":"{ols_image}"}}]""" # noqa: E501 cluster_utils.run_oc( - ["patch", "deployment/lightspeed-stack-deployment", "--type", "json", "-p", patch] + ["patch", f"deployment/{OLS_SERVICE_DEPLOYMENT}", "--type", "json", "-p", patch] ) @@ -435,7 +437,7 @@ def install_ols() -> tuple[str, str, str]: # pylint: disable=R0915, R0912 # no print(msg) raise Exception(msg) print("Operator installed successfully") - + provider = os.getenv("PROVIDER", "openai") creds = os.getenv("PROVIDER_KEY_PATH", "empty") update_lcore_setting() @@ -513,13 +515,13 @@ def install_ols() -> tuple[str, str, str]: # pylint: disable=R0915, R0912 # no [ "get", "deployment", - "lightspeed-stack-deployment", + f"{OLS_SERVICE_DEPLOYMENT}", "--ignore-not-found", "-o", "name", ] ).stdout - == "deployment.apps/lightspeed-stack-deployment\n", + == f"deployment.apps/{OLS_SERVICE_DEPLOYMENT}\n", "Waiting for OLS API server deployment to be created", ) if not r: @@ -558,7 +560,7 @@ def install_ols() -> tuple[str, str, str]: # pylint: disable=R0915, R0912 # no cluster_utils.run_oc( [ "scale", - "deployment/lightspeed-stack-deployment", + f"deployment/{OLS_SERVICE_DEPLOYMENT}", "--replicas", "0", ] @@ -568,14 +570,14 @@ def install_ols() -> tuple[str, str, str]: # pylint: disable=R0915, R0912 # no cluster_utils.run_oc( [ "scale", - "deployment/lightspeed-stack-deployment", + f"deployment/{OLS_SERVICE_DEPLOYMENT}", "--replicas", "1", ] ) print("Deployment updated, waiting for new pod to be ready") # Wait for the pod to start being created and then wait for it to start running. - cluster_utils.wait_for_running_pod() + cluster_utils.wait_for_running_pod(name=OLS_SERVICE_DEPLOYMENT) print("-" * 50) print("OLS pod seems to be ready") @@ -595,15 +597,22 @@ def install_ols() -> tuple[str, str, str]: # pylint: disable=R0915, R0912 # no # expect it to be (must-gather will also collect this) print( cluster_utils.run_oc( - ["get", "deployment", "lightspeed-stack-deployment", "-o", "yaml"] + ["get", "deployment", OLS_SERVICE_DEPLOYMENT, "-o", "yaml"] ).stdout ) print("-" * 50) + + # Set up route and get URL first + ols_url = setup_route() + if not disconnected: # Configure exporter for e2e tests with proper settings try: print("Configuring exporter for e2e tests...") + # Create client for the exporter configuration + test_client = client_utils.get_http_client(ols_url, token) configure_exporter_for_e2e_tests( + client=test_client, interval_seconds=3600, # 1 hour to prevent interference ingress_env="stage", log_level="debug", @@ -614,6 +623,4 @@ def install_ols() -> tuple[str, str, str]: # pylint: disable=R0915, R0912 # no print(f"Warning: Could not configure exporter: {e}") print("Tests may experience interference from data collector") - # Set up route and get URL - ols_url = setup_route() return ols_url, token, metrics_token diff --git a/tests/e2e/utils/wait_for_ols.py b/tests/e2e/utils/wait_for_ols.py index 90ad4a752..9518ea6c0 100644 --- a/tests/e2e/utils/wait_for_ols.py +++ b/tests/e2e/utils/wait_for_ols.py @@ -9,6 +9,7 @@ import requests from requests.exceptions import SSLError from urllib3.exceptions import InsecureRequestWarning + from tests.e2e.utils.constants import ( BASIC_ENDPOINTS_TIMEOUT, ) @@ -16,12 +17,12 @@ warnings.filterwarnings("ignore", category=InsecureRequestWarning) -# ruff: noqa: S501 def wait_for_ols(url, client, timeout=300, interval=10): """Wait for the OLS to become ready by checking its readiness endpoint. Args: url (str): The base URL of the OLS service. + client (Client): httpx client with configured headers timeout (int, optional): The maximum time to wait in seconds. Default is 600. interval (int, optional): The interval between readiness checks in seconds. Default is 10. @@ -33,20 +34,14 @@ def wait_for_ols(url, client, timeout=300, interval=10): for attempt in range(1, attempts + 1): print(f"Checking OLS readiness, attempt {attempt} of {attempts}") try: - response = client.get( - "/readiness", - timeout=BASIC_ENDPOINTS_TIMEOUT - ) + response = client.get("/readiness", timeout=BASIC_ENDPOINTS_TIMEOUT) if response.status_code == requests.codes.ok: print("OLS is ready") return True except SSLError: print("SSL error detected, retrying without SSL verification") try: - response = client.get( - "/readiness", - timeout=BASIC_ENDPOINTS_TIMEOUT - ) + response = client.get("/readiness", timeout=BASIC_ENDPOINTS_TIMEOUT) if response.status_code == requests.codes.ok: print("OLS is ready") return True From c68052aabaaee2de0cb6d5877c3770973c34d79c Mon Sep 17 00:00:00 2001 From: Joao Fula Date: Tue, 3 Feb 2026 08:36:45 +0100 Subject: [PATCH 4/6] Deactivating operator pod for data collection test Deactivating operator pod for data collection test --- tests/e2e/test_api.py | 55 +++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/tests/e2e/test_api.py b/tests/e2e/test_api.py index eda76b0da..2c102d243 100644 --- a/tests/e2e/test_api.py +++ b/tests/e2e/test_api.py @@ -31,6 +31,7 @@ ) from tests.e2e.utils.data_collector_control import prepare_for_data_collection_test from tests.e2e.utils.decorators import retry +from tests.e2e.utils.ols_installer import update_ols_config from tests.e2e.utils.postgres import ( read_conversation_history, read_conversation_history_count, @@ -335,6 +336,33 @@ def test_conversation_in_postgres_cache(postgres_connection) -> None: assert "OpenShift" in deserialized[3].content +@pytest.fixture +def turn_off_operator_pod(): + """Turn off operator pod fixture. + + Turn off operator pod to modify lightspeed-stack + without waiting for lightspeed service pod to restart. + """ + cluster_utils.run_oc( + [ + "scale", + "deployment/lightspeed-operator-controller-manager", + "--replicas", + "0", + ] + ) + yield + cluster_utils.run_oc( + [ + "scale", + "deployment/lightspeed-operator-controller-manager", + "--replicas", + "1", + ] + ) + + +@pytest.mark.usefixtures("turn_off_operator_pod") @pytest.mark.data_export def test_user_data_collection(): """Test user data collection and upload to ingress. @@ -342,6 +370,7 @@ def test_user_data_collection(): This test runs in isolation with the 'data_export' marker. It patches the exporter to use manual mode so it uses the ConfigMap token. """ + update_ols_config() def filter_logs(logs: str, last_log_line: str) -> str: filtered_logs = [] @@ -558,32 +587,6 @@ def update_olsconfig(limiters: list[dict]): cluster_utils.run_oc(["apply", "-f", "-"], command=updated_configmap) -@pytest.fixture -def turn_off_operator_pod(): - """Turn off operator pod fixture. - - Turn off operator pod to modify lightspeed-stack - without waiting for lightspeed service pod to restart. - """ - cluster_utils.run_oc( - [ - "scale", - "deployment/lightspeed-operator-controller-manager", - "--replicas", - "0", - ] - ) - yield - cluster_utils.run_oc( - [ - "scale", - "deployment/lightspeed-operator-controller-manager", - "--replicas", - "1", - ] - ) - - @pytest.mark.usefixtures("turn_off_operator_pod") @pytest.mark.quota_limits def test_quota_limits(): From bfe02727f81300e6f4737916e8c0c13113e3c9d2 Mon Sep 17 00:00:00 2001 From: Joao Fula Date: Tue, 24 Feb 2026 08:51:37 +0100 Subject: [PATCH 5/6] fixing bug where we waited for two containers for operator pod when tool calling enabled --- tests/e2e/utils/cluster.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/e2e/utils/cluster.py b/tests/e2e/utils/cluster.py index 2785d18f4..c1da23045 100644 --- a/tests/e2e/utils/cluster.py +++ b/tests/e2e/utils/cluster.py @@ -384,8 +384,9 @@ def pod_has_containers_ready(): ols_config_suffix = os.getenv("OLS_CONFIG_SUFFIX", "default") tool_calling_enabled = "tool_calling" in ols_config_suffix - if tool_calling_enabled: - return ready_containers >= 2 + if name == "lightspeed-app-server-": + if tool_calling_enabled: + return ready_containers >= 2 return ready_containers >= 1 # wait for the containers in the server pod to become ready From 27be98bd85efbc2597add55a7a08f3e428f79302 Mon Sep 17 00:00:00 2001 From: Joao Fula Date: Mon, 2 Mar 2026 16:53:29 +0100 Subject: [PATCH 6/6] Adding changes based on comments --- tests/e2e/utils/adapt_ols_config.py | 9 +++++---- tests/e2e/utils/cluster.py | 2 +- tests/e2e/utils/constants.py | 2 ++ tests/e2e/utils/ols_installer.py | 9 +++------ 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/e2e/utils/adapt_ols_config.py b/tests/e2e/utils/adapt_ols_config.py index 8a943030c..39d7fbe95 100644 --- a/tests/e2e/utils/adapt_ols_config.py +++ b/tests/e2e/utils/adapt_ols_config.py @@ -11,7 +11,7 @@ from ols.constants import DEFAULT_CONFIGURATION_FILE from tests.e2e.utils import client as client_utils from tests.e2e.utils import cluster as cluster_utils -from tests.e2e.utils.constants import OLS_SERVICE_DEPLOYMENT +from tests.e2e.utils.constants import OLS_SERVICE_DEPLOYMENT, LCORE_ENABLED from tests.e2e.utils.data_collector_control import configure_exporter_for_e2e_tests from tests.e2e.utils.ols_installer import ( create_secrets, @@ -138,7 +138,8 @@ def adapt_ols_config() -> tuple[str, str, str]: # pylint: disable=R0915 ) # Update lcore setting if LCORE is enabled - update_lcore_setting() + if LCORE_ENABLED: + update_lcore_setting() # Scaling operator to 1 replica to allow finalizer to run for olsconfig cluster_utils.run_oc( [ @@ -189,7 +190,7 @@ def adapt_ols_config() -> tuple[str, str, str]: # pylint: disable=R0915 "scale", "deployment/lightspeed-operator-controller-manager", "--replicas", - "1", + "0", ] ) @@ -219,7 +220,7 @@ def adapt_ols_config() -> tuple[str, str, str]: # pylint: disable=R0915 # Update configmap with e2e-specific settings - FAIL FAST if this breaks print("Updating configmap with e2e test settings...") - if OLS_SERVICE_DEPLOYMENT == "lightspeed-app-server": + if not LCORE_ENABLED: update_ols_configmap() print(" Configmap updated successfully") # Apply test image diff --git a/tests/e2e/utils/cluster.py b/tests/e2e/utils/cluster.py index c1da23045..7c417ee78 100644 --- a/tests/e2e/utils/cluster.py +++ b/tests/e2e/utils/cluster.py @@ -365,7 +365,7 @@ def wait_for_running_pod( f"Waiting for {name} pod in running state", ) if not r: - raise Exception("Timed out waiting for {name} pod to be ready") + raise Exception(f"Timed out waiting for {name} pod to be ready") def pod_has_containers_ready(): pods = get_pod_by_prefix(prefix=name, namespace=namespace, fail_not_found=False) diff --git a/tests/e2e/utils/constants.py b/tests/e2e/utils/constants.py index fe246777e..6973d1bf6 100644 --- a/tests/e2e/utils/constants.py +++ b/tests/e2e/utils/constants.py @@ -21,6 +21,8 @@ 5 # 5 seconds - used only in data collection test ) +LCORE_ENABLED = True if os.getenv("LCORE", "False").lower() in ("true", "1", "t") else False + OLS_SERVICE_DEPLOYMENT = ( "lightspeed-stack-deployment" if os.getenv("LCORE", "False").lower() in ("true", "1", "t") diff --git a/tests/e2e/utils/ols_installer.py b/tests/e2e/utils/ols_installer.py index 7896ad13d..29fc0b6ec 100644 --- a/tests/e2e/utils/ols_installer.py +++ b/tests/e2e/utils/ols_installer.py @@ -9,7 +9,7 @@ from ols.constants import DEFAULT_CONFIGURATION_FILE from tests.e2e.utils import client as client_utils from tests.e2e.utils import cluster as cluster_utils -from tests.e2e.utils.constants import OLS_SERVICE_DEPLOYMENT +from tests.e2e.utils.constants import OLS_SERVICE_DEPLOYMENT, LCORE_ENABLED from tests.e2e.utils.data_collector_control import configure_exporter_for_e2e_tests from tests.e2e.utils.retry import retry_until_timeout_or_success @@ -97,10 +97,6 @@ def update_lcore_setting() -> None: Checks if LCORE environment variable is enabled and ensures the --use-lcore argument in the ClusterServiceVersion is set to true. """ - if os.getenv("LCORE", "False").lower() not in ("true", "1", "t"): - print("LCORE not enabled, skipping CSV update") - return - print("LCORE enabled, checking CSV configuration...") namespace = "openshift-lightspeed" @@ -440,7 +436,8 @@ def install_ols() -> tuple[str, str, str]: # pylint: disable=R0915, R0912 # no provider = os.getenv("PROVIDER", "openai") creds = os.getenv("PROVIDER_KEY_PATH", "empty") - update_lcore_setting() + if LCORE_ENABLED: + update_lcore_setting() # create the llm api key secret ols will mount provider_list = provider.split() creds_list = creds.split()