From 97ffeb385243a086d197d046d9a724e93e03bab0 Mon Sep 17 00:00:00 2001
From: Akshay Prabhakant <akshay.prabhakant@runwhen.com>
Date: Wed, 14 Jan 2026 13:37:28 +0530
Subject: [PATCH 01/10] RWENGG-1350: initial writeup of the applog health
 codebundle

---
 .../generation-rules/k8s-applog-health.yaml   |  21 ++
 .../templates/k8s-applog-health-sli.yaml      |  59 ++++
 .../templates/k8s-applog-health-slx.yaml      |  25 ++
 .../templates/k8s-applog-health-taskset.yaml  |  44 +++
 codebundles/k8s-applog-health/README.md       |   0
 codebundles/k8s-applog-health/runbook.robot   | 308 ++++++++++++++++++
 .../k8s-applog-health/runbook_patterns.json   | 153 +++++++++
 codebundles/k8s-applog-health/sli.robot       | 296 +++++++++++++++++
 .../sli_critical_patterns.json                |  74 +++++
 9 files changed, 980 insertions(+)
 create mode 100644 codebundles/k8s-applog-health/.runwhen/generation-rules/k8s-applog-health.yaml
 create mode 100755 codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-sli.yaml
 create mode 100644 codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-slx.yaml
 create mode 100644 codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-taskset.yaml
 create mode 100644 codebundles/k8s-applog-health/README.md
 create mode 100755 codebundles/k8s-applog-health/runbook.robot
 create mode 100644 codebundles/k8s-applog-health/runbook_patterns.json
 create mode 100755 codebundles/k8s-applog-health/sli.robot
 create mode 100755 codebundles/k8s-applog-health/sli_critical_patterns.json

diff --git a/codebundles/k8s-applog-health/.runwhen/generation-rules/k8s-applog-health.yaml b/codebundles/k8s-applog-health/.runwhen/generation-rules/k8s-applog-health.yaml
new file mode 100644
index 000000000..250026612
--- /dev/null
+++ b/codebundles/k8s-applog-health/.runwhen/generation-rules/k8s-applog-health.yaml
@@ -0,0 +1,21 @@
+apiVersion: runwhen.com/v1
+kind: GenerationRules
+spec:
+  generationRules:
+    - resourceTypes:
+        - deployment
+      matchRules:
+        - type: pattern
+          pattern: ".+"
+          properties: [name]
+          mode: substring
+      slxs:
+        - baseName: applog-health
+          levelOfDetail: detailed
+          qualifiers: ["resource", "namespace", "cluster"]
+          baseTemplateName: k8s-applog-health
+          outputItems:
+            - type: slx
+            - type: sli
+            - type: runbook
+              templateName: k8s-applog-health-taskset.yaml
diff --git a/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-sli.yaml b/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-sli.yaml
new file mode 100755
index 000000000..8576f2825
--- /dev/null
+++ b/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-sli.yaml
@@ -0,0 +1,59 @@
+apiVersion: runwhen.com/v1
+kind: ServiceLevelIndicator
+metadata:
+  name: {{slx_name}}
+  labels:
+    {% include "common-labels.yaml" %}
+  annotations:
+    {% include "common-annotations.yaml" %}
+    runwhen.com/sli: "true"
+spec: 
+  displayUnitsLong: OK
+  displayUnitsShort: ok
+  locations:
+    - {{ default_location }}
+  codeBundle:
+    {% if repo_url %}
+    repoUrl: {{repo_url}}
+    {% else %}
+    repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git
+    {% endif %}
+    {% if ref %}
+    ref: {{ref}}
+    {% else %}
+    ref: main
+    {% endif %}
+    pathToRobot: codebundles/k8s-applog-health/sli.robot
+  intervalStrategy: intermezzo
+  intervalSeconds: 600
+  description: Measures the health of the application logs for the {{match_resource.resource.metadata.name}} deployment.
+  configProvided:
+    - name: NAMESPACE
+      value: {{match_resource.resource.metadata.namespace}}
+    - name: CONTEXT
+      value: {{context}}
+    - name: KUBERNETES_DISTRIBUTION_BINARY
+      value: {{custom.kubernetes_distribution_binary | default("kubectl")}}
+    - name: DEPLOYMENT_NAME
+      value: {{match_resource.resource.metadata.name}}
+    - name: CONTAINER_RESTART_AGE
+      value: "10m"
+    - name: CONTAINER_RESTART_THRESHOLD
+      value: "2"
+    - name: EVENT_AGE
+      value: "10m"
+    - name: EVENT_THRESHOLD
+      value: "2"
+    - name: CHECK_SERVICE_ENDPOINTS
+      value: "true"
+  secretsProvided:
+  {% if wb_version %}
+    {% include "kubernetes-auth.yaml" ignore missing %}
+  {% else %}
+    - name: kubeconfig
+      workspaceKey: {{custom.kubeconfig_secret_name}}
+  {% endif %} 
+  alertConfig:
+    tasks:
+      persona: eager-edgar
+      sessionTTL: 10m
\ No newline at end of file
diff --git a/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-slx.yaml b/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-slx.yaml
new file mode 100644
index 000000000..7ad748284
--- /dev/null
+++ b/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-slx.yaml
@@ -0,0 +1,25 @@
+apiVersion: runwhen.com/v1
+kind: ServiceLevelX
+metadata:
+  name: {{slx_name}}
+  labels:
+    {% include "common-labels.yaml" %}
+  annotations:
+    {% include "common-annotations.yaml" %}
+spec:
+  imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/kubernetes/resources/labeled/deploy.svg
+  alias: {{match_resource.resource.metadata.name}} Application Log Health
+  asMeasuredBy: The presence of application-level errors/issues/stacktraces in the application logs indicating runtime errors or exceptions in {{match_resource.resource.metadata.name}}. 
+  configProvided:
+  - name: OBJECT_NAME
+    value: {{match_resource.resource.metadata.name}}
+  owners:
+  - {{workspace.owner_email}}
+  statement: Application logs for {{match_resource.resource.metadata.name}} should be free of critical errors/issues/stacktraces indicating runtime errors or exceptions. 
+  additionalContext:  
+    {% include "kubernetes-hierarchy.yaml" ignore missing %}
+    qualified_name: "{{ match_resource.qualified_name }}"
+  tags:  
+    {% include "kubernetes-tags.yaml" ignore missing %}
+    - name: access
+      value: read-only
\ No newline at end of file
diff --git a/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-taskset.yaml b/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-taskset.yaml
new file mode 100644
index 000000000..7f1d9b79e
--- /dev/null
+++ b/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-taskset.yaml
@@ -0,0 +1,44 @@
+apiVersion: runwhen.com/v1
+kind: Runbook
+metadata:
+  name: {{slx_name}}
+  labels:
+    {% include "common-labels.yaml" %}
+  annotations:
+    {% include "common-annotations.yaml" %}
+spec: 
+  location: {{default_location}}
+  codeBundle:
+    {% if repo_url %}
+    repoUrl: {{repo_url}}
+    {% else %}
+    repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git
+    {% endif %}
+    {% if ref %}
+    ref: {{ref}}
+    {% else %}
+    ref: main
+    {% endif %}
+    pathToRobot: codebundles/k8s-applog-health/runbook.robot
+  configProvided:
+    - name: NAMESPACE
+      value: {{match_resource.resource.metadata.namespace}}
+    - name: CONTEXT
+      value: {{context}}
+    - name: KUBERNETES_DISTRIBUTION_BINARY
+      value: {{custom.kubernetes_distribution_binary}}
+    - name: DEPLOYMENT_NAME
+      value: {{match_resource.resource.metadata.name}}
+    - name: CONTAINER_RESTART_AGE
+      value: "30m"
+    - name: CONTAINER_RESTART_THRESHOLD
+      value: "4"
+    - name: LOG_AGE
+      value: "10m"
+  secretsProvided:
+  {% if wb_version %}
+    {% include "kubernetes-auth.yaml" ignore missing %}
+  {% else %}
+    - name: kubeconfig
+      workspaceKey: {{custom.kubeconfig_secret_name}}
+  {% endif %}
\ No newline at end of file
diff --git a/codebundles/k8s-applog-health/README.md b/codebundles/k8s-applog-health/README.md
new file mode 100644
index 000000000..e69de29bb
diff --git a/codebundles/k8s-applog-health/runbook.robot b/codebundles/k8s-applog-health/runbook.robot
new file mode 100755
index 000000000..88a57a05e
--- /dev/null
+++ b/codebundles/k8s-applog-health/runbook.robot
@@ -0,0 +1,308 @@
+*** Settings ***
+Documentation       Triages issues related to a deployment and its replicas.
+Metadata            Author    stewartshea
+Metadata            Display Name    Kubernetes Deployment Triage
+Metadata            Supports    Kubernetes,AKS,EKS,GKE,OpenShift
+
+Library             BuiltIn
+Library             RW.Core
+Library             RW.CLI
+Library             RW.platform
+Library             RW.NextSteps
+Library             RW.K8sHelper
+Library             RW.K8sLog
+
+Library             OperatingSystem
+Library             String
+Library             Collections
+Library             DateTime
+
+Suite Setup         Suite Initialization
+
+
+*** Keywords ***
+Suite Initialization
+    ${kubeconfig}=    RW.Core.Import Secret
+    ...    kubeconfig
+    ...    type=string
+    ...    description=The kubernetes kubeconfig yaml containing connection configuration used to connect to cluster(s).
+    ...    pattern=\w*
+    ...    example=For examples, start here https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/
+    ${KUBERNETES_DISTRIBUTION_BINARY}=    RW.Core.Import User Variable    KUBERNETES_DISTRIBUTION_BINARY
+    ...    type=string
+    ...    description=Which binary to use for Kubernetes CLI commands.
+    ...    pattern=\w*
+    ...    enum=[kubectl,oc]
+    ...    example=kubectl
+    ...    default=kubectl
+    ${CONTEXT}=    RW.Core.Import User Variable    CONTEXT
+    ...    type=string
+    ...    description=Which Kubernetes context to operate within.
+    ...    pattern=\w*
+    ...    example=my-main-cluster
+    ${NAMESPACE}=    RW.Core.Import User Variable    NAMESPACE
+    ...    type=string
+    ...    description=The name of the Kubernetes namespace to scope actions and searching to.
+    ...    pattern=\w*
+    ...    example=otel-demo
+    ${DEPLOYMENT_NAME}=    RW.Core.Import User Variable    DEPLOYMENT_NAME
+    ...    type=string
+    ...    description=The name of the deployment to triage.
+    ...    pattern=\w*
+    ...    example=otel-demo-frontend
+    ${LOG_LINES}=    RW.Core.Import User Variable    LOG_LINES
+    ...    type=string
+    ...    description=The number of log lines to fetch from the pods when inspecting logs.
+    ...    pattern=\d+
+    ...    example=100
+    ...    default=100
+    ${LOG_AGE}=    RW.Core.Import User Variable    LOG_AGE
+    ...    type=string
+    ...    description=The age of logs to fetch from pods, used for log analysis tasks.
+    ...    pattern=\w*
+    ...    example=10m
+    ...    default=10m
+
+    ${LOG_ANALYSIS_DEPTH}=    RW.Core.Import User Variable    LOG_ANALYSIS_DEPTH
+    ...    type=string
+    ...    description=The depth of log analysis to perform - basic, standard, or comprehensive.
+    ...    pattern=\w*
+    ...    enum=[basic,standard,comprehensive]
+    ...    example=standard
+    ...    default=standard
+    ${LOG_SEVERITY_THRESHOLD}=    RW.Core.Import User Variable    LOG_SEVERITY_THRESHOLD
+    ...    type=string
+    ...    description=The minimum severity level for creating issues (1=critical, 2=high, 3=medium, 4=low, 5=info).
+    ...    pattern=\d+
+    ...    example=3
+    ...    default=3
+    ${LOG_PATTERN_CATEGORIES_STR}=    RW.Core.Import User Variable    LOG_PATTERN_CATEGORIES
+    ...    type=string
+    ...    description=Comma-separated list of log pattern categories to scan for.
+    ...    pattern=.*
+    ...    example=GenericError,AppFailure,Connection
+    ...    default=GenericError,AppFailure,Connection,Timeout,Auth,Exceptions,Resource,HealthyRecovery
+    ${ANOMALY_THRESHOLD}=    RW.Core.Import User Variable    ANOMALY_THRESHOLD
+    ...    type=string
+    ...    description=The threshold for detecting event anomalies based on events per minute.
+    ...    pattern=\d+
+    ...    example=5
+    ...    default=5
+    ${LOGS_ERROR_PATTERN}=    RW.Core.Import User Variable    LOGS_ERROR_PATTERN
+    ...    type=string
+    ...    description=The error pattern to use when grep-ing logs.
+    ...    pattern=\w*
+    ...    example=(Error: 13|Error: 14)
+    ...    default=error|ERROR
+    ${LOGS_EXCLUDE_PATTERN}=    RW.Core.Import User Variable    LOGS_EXCLUDE_PATTERN
+    ...    type=string
+    ...    description=Pattern used to exclude entries from log analysis when searching for errors. Use regex patterns to filter out false positives like JSON structures.
+    ...    pattern=.*
+    ...    example="errors":\s*\[\]|"warnings":\s*\[\]
+    ...    default="errors":\\s*\\[\\]|\\bINFO\\b|\\bDEBUG\\b|\\bTRACE\\b|\\bSTART\\s*-\\s*|\\bSTART\\s*method\\b
+    ${LOG_SCAN_TIMEOUT}=    RW.Core.Import User Variable    LOG_SCAN_TIMEOUT
+    ...    type=string
+    ...    description=Timeout in seconds for log scanning operations. Increase this value if log scanning times out on large log files.
+    ...    pattern=\d+
+    ...    example=300
+    ...    default=300
+    ${EXCLUDED_CONTAINER_NAMES}=    RW.Core.Import User Variable    EXCLUDED_CONTAINER_NAMES
+    ...    type=string
+    ...    description=Comma-separated list of container names to exclude from log analysis (e.g., linkerd-proxy, istio-proxy, vault-agent).
+    ...    pattern=.*
+    ...    example=linkerd-proxy,istio-proxy,vault-agent
+    ...    default=linkerd-proxy,istio-proxy,vault-agent
+
+    ${CONTAINER_RESTART_AGE}=    RW.Core.Import User Variable    CONTAINER_RESTART_AGE
+    ...    type=string
+    ...    description=The time window (in (h) hours or (m) minutes) to search for container restarts. Only containers that restarted within this time period will be reported.
+    ...    pattern=\w*
+    ...    example=10m
+    ...    default=10m
+    ${CONTAINER_RESTART_THRESHOLD}=    RW.Core.Import User Variable    CONTAINER_RESTART_THRESHOLD
+    ...    type=string
+    ...    description=The minimum number of restarts required to trigger an issue. Containers with restart counts below this threshold will be ignored.
+    ...    pattern=\d+
+    ...    example=1
+    ...    default=1
+    # Convert comma-separated strings to lists
+    @{LOG_PATTERN_CATEGORIES}=    Split String    ${LOG_PATTERN_CATEGORIES_STR}    ,
+    @{EXCLUDED_CONTAINERS_RAW}=    Run Keyword If    "${EXCLUDED_CONTAINER_NAMES}" != ""    Split String    ${EXCLUDED_CONTAINER_NAMES}    ,    ELSE    Create List
+    @{EXCLUDED_CONTAINERS}=    Create List
+    FOR    ${container}    IN    @{EXCLUDED_CONTAINERS_RAW}
+        ${trimmed_container}=    Strip String    ${container}
+        Append To List    ${EXCLUDED_CONTAINERS}    ${trimmed_container}
+    END
+    
+    Set Suite Variable    ${kubeconfig}    ${kubeconfig}
+    Set Suite Variable    ${KUBERNETES_DISTRIBUTION_BINARY}
+    Set Suite Variable    ${CONTEXT}
+    Set Suite Variable    ${NAMESPACE}
+    Set Suite Variable    ${DEPLOYMENT_NAME}
+    Set Suite Variable    ${LOG_LINES}
+    Set Suite Variable    ${LOG_AGE}
+
+    Set Suite Variable    ${LOG_ANALYSIS_DEPTH}
+    Set Suite Variable    ${LOG_SEVERITY_THRESHOLD}
+    Set Suite Variable    ${LOG_PATTERN_CATEGORIES_STR}
+    Set Suite Variable    @{LOG_PATTERN_CATEGORIES}
+    Set Suite Variable    ${ANOMALY_THRESHOLD}
+    Set Suite Variable    ${LOGS_ERROR_PATTERN}
+    Set Suite Variable    ${LOGS_EXCLUDE_PATTERN}
+    Set Suite Variable    ${LOG_SCAN_TIMEOUT}
+    Set Suite Variable    ${EXCLUDED_CONTAINER_NAMES}
+    Set Suite Variable    @{EXCLUDED_CONTAINERS}
+
+    Set Suite Variable    ${CONTAINER_RESTART_AGE}
+    Set Suite Variable    ${CONTAINER_RESTART_THRESHOLD}
+    # Construct environment dictionary safely to handle special characters in regex patterns
+    &{env_dict}=    Create Dictionary    
+    ...    KUBECONFIG=${kubeconfig.key}
+    ...    KUBERNETES_DISTRIBUTION_BINARY=${KUBERNETES_DISTRIBUTION_BINARY}
+    ...    CONTEXT=${CONTEXT}
+    ...    NAMESPACE=${NAMESPACE}
+    ...    LOGS_ERROR_PATTERN=${LOGS_ERROR_PATTERN}
+    ...    LOGS_EXCLUDE_PATTERN=${LOGS_EXCLUDE_PATTERN}
+    ...    ANOMALY_THRESHOLD=${ANOMALY_THRESHOLD}
+    ...    DEPLOYMENT_NAME=${DEPLOYMENT_NAME}
+    ...    CONTAINER_RESTART_AGE=${CONTAINER_RESTART_AGE}
+    ...    CONTAINER_RESTART_THRESHOLD=${CONTAINER_RESTART_THRESHOLD}
+    ...    LOG_SCAN_TIMEOUT=${LOG_SCAN_TIMEOUT}
+    Set Suite Variable    ${env}    ${env_dict}
+    
+    # Check if deployment is scaled to 0 and handle appropriately
+    ${scale_check}=    RW.CLI.Run Cli
+    ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get deployment/${DEPLOYMENT_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '{spec_replicas: .spec.replicas, ready_replicas: (.status.readyReplicas // 0), available_condition: (.status.conditions[] | select(.type == "Available") | .status // "Unknown")}'
+    ...    env=${env}
+    ...    secret_file__kubeconfig=${kubeconfig}
+    ...    timeout_seconds=30
+    
+    TRY
+        ${scale_status}=    Evaluate    json.loads(r'''${scale_check.stdout}''') if r'''${scale_check.stdout}'''.strip() else {}    json
+        ${spec_replicas}=    Evaluate    $scale_status.get('spec_replicas', 1)
+        
+        IF    ${spec_replicas} == 0
+            ${issue_timestamp}=    DateTime.Get Current Date
+            RW.Core.Add Issue
+            ...    severity=4
+            ...    expected=Deployment `${DEPLOYMENT_NAME}` operational status documented
+            ...    actual=Deployment `${DEPLOYMENT_NAME}` is intentionally scaled to zero replicas
+            ...    title=Deployment `${DEPLOYMENT_NAME}` is Scaled Down (Informational)
+            ...    reproduce_hint=kubectl get deployment/${DEPLOYMENT_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o yaml
+            ...    details=Deployment `${DEPLOYMENT_NAME}` is currently scaled to 0 replicas (spec.replicas=0). This is an intentional configuration and not an error. All pod-related healthchecks have been skipped for efficiency. If the deployment should be running, scale it up using:\nkubectl scale deployment/${DEPLOYMENT_NAME} --replicas=<desired_count> --context ${CONTEXT} -n ${NAMESPACE}
+            ...    next_steps=This is informational only. If the deployment should be running, scale it up.
+            ...    observed_at=${issue_timestamp}
+            
+            RW.Core.Add Pre To Report    **ℹ️ Deployment `${DEPLOYMENT_NAME}` is scaled to 0 replicas - Skipping pod-related checks**\n**Available Condition:** ${scale_status.get('available_condition', 'Unknown')}
+            
+            Set Suite Variable    ${SKIP_POD_CHECKS}    ${True}
+        ELSE
+            Set Suite Variable    ${SKIP_POD_CHECKS}    ${False}
+        END
+        
+    EXCEPT
+        Log    Warning: Failed to check deployment scale, continuing with normal checks
+        Set Suite Variable    ${SKIP_POD_CHECKS}    ${False}
+    END
+
+
+*** Tasks ***
+
+Analyze Application Log Patterns for Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}`
+    [Documentation]    Fetches and analyzes logs from the deployment pods for errors, connection issues, and other patterns that indicate application health problems. Note: Warning messages about missing log files for excluded containers (like linkerd-proxy, istio-proxy) are expected and harmless.
+    [Tags]
+    ...    logs
+    ...    application
+    ...    errors
+    ...    stacktrace
+    ...    patterns
+    ...    health
+    ...    deployment
+    ...    access:read-only
+    # Skip pod-related checks if deployment is scaled to 0
+    IF    not ${SKIP_POD_CHECKS}
+        # Temporarily suppress log warnings for excluded containers (they're expected)
+        TRY
+            ${log_dir}=    RW.K8sLog.Fetch Workload Logs
+            ...    workload_type=deployment
+            ...    workload_name=${DEPLOYMENT_NAME}
+            ...    namespace=${NAMESPACE}
+            ...    context=${CONTEXT}
+            ...    kubeconfig=${kubeconfig}
+            ...    log_age=${LOG_AGE}
+            ...    excluded_containers=${EXCLUDED_CONTAINERS}
+        EXCEPT    AS    ${log_error}
+            # If log fetching fails completely, log the error but continue
+            Log    Warning: Log fetching encountered an error: ${log_error}
+            # Set empty log directory to continue with other checks
+            ${log_dir}=    Set Variable    ${EMPTY}
+        END
+        
+        # Only scan logs if we have a valid log directory
+        IF    '''${log_dir}''' != '''${EMPTY}'''
+            ${scan_results}=    RW.K8sLog.Scan Logs For Issues
+            ...    log_dir=${log_dir}
+            ...    workload_type=deployment
+            ...    workload_name=${DEPLOYMENT_NAME}
+            ...    namespace=${NAMESPACE}
+            ...    categories=@{LOG_PATTERN_CATEGORIES}
+            ...    custom_patterns_file=runbook_patterns.json
+            ...    excluded_containers=${EXCLUDED_CONTAINERS}
+        ELSE
+            # Create empty scan results if no logs were fetched
+            ${scan_results}=    Evaluate    {"issues": [], "summary": ["No logs available for analysis"]}
+        END
+        
+        # Post-process results to filter out patterns matching LOGS_EXCLUDE_PATTERN
+        TRY
+            IF    $LOGS_EXCLUDE_PATTERN != ""
+                ${filtered_issues}=    Evaluate    [issue for issue in $scan_results.get('issues', []) if not __import__('re').search('${LOGS_EXCLUDE_PATTERN}', issue.get('details', ''), __import__('re').IGNORECASE)]    modules=re
+                ${filtered_results}=    Evaluate    {**$scan_results, 'issues': $filtered_issues}
+                Set Test Variable    ${scan_results}    ${filtered_results}
+            END
+        EXCEPT
+            Log    Warning: Failed to apply LOGS_EXCLUDE_PATTERN filter, using unfiltered results
+        END
+        
+        ${log_health_score}=    RW.K8sLog.Calculate Log Health Score    scan_results=${scan_results}
+        
+        # Process each issue found in the logs
+        ${issues}=    Evaluate    $scan_results.get('issues', [])
+        ${issues_count}=    Get Length    ${issues}
+
+        # print the contents from log_dir into the report
+        RW.Core.Add Pre To Report    **Log Contents:**\n${log_dir}
+
+        IF    ${issues_count} == 0
+            # create a dummy issue with a keyword argument set to a value depicting no issues found
+            RW.Core.Add Pre To Report    **No issues found in application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`**
+            
+            # create a dummy issue with a keyword argument set to a value depicting no issues found
+            RW.Core.Add Issue
+            ...    severity=4
+            ...    expected=Application logs should be free of critical errors for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
+            ...    actual=No issues found in application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
+            ...    title=No issues found in application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
+            ...    reproduce_hint=Check application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
+            ...    details=No issues found in application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
+            ...    next_steps=No processing required
+            ...    observed_at=${issue_timestamp}
+            ...    next_action=noIssuesFound
+        ELSE
+            # set issue_timestamp to the observed_at value from the first issue
+            ${issue_timestamp}=    Evaluate    $issues[0].get('observed_at', '')
+            
+            # create a dummy issue with a keyword argument set to a value depicting issues found
+            RW.Core.Add Issue
+            ...    severity=4
+            ...    expected=Application logs should be free of critical errors for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
+            ...    actual=Issues found in application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
+            ...    title=Issues found in application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
+            ...    reproduce_hint=Check application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
+            ...    details=Issues found in application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
+            ...    next_steps=Process the issues found in the application logs
+            ...    observed_at=${issue_timestamp}
+            ...    next_action=processApplogIssues
+        END        
+        # RW.K8sLog.Cleanup Temp Files
+    END
\ No newline at end of file
diff --git a/codebundles/k8s-applog-health/runbook_patterns.json b/codebundles/k8s-applog-health/runbook_patterns.json
new file mode 100644
index 000000000..12c9ac4d4
--- /dev/null
+++ b/codebundles/k8s-applog-health/runbook_patterns.json
@@ -0,0 +1,153 @@
+{
+  "critical_patterns": {
+    "GenericError": {
+      "description": "Generic error patterns that indicate application failures",
+      "patterns": [
+        "exception|Exception|EXCEPTION",
+        "fatal|Fatal|FATAL",
+        "panic|Panic|PANIC",
+        "crash|Crash|CRASH",
+        "failed|Failed|FAILED",
+        "failure|Failure|FAILURE"
+      ],
+      "severity": 1
+    },
+    "AppFailure": {
+      "description": "Application-specific failure patterns",
+      "patterns": [
+        "application.*failed",
+        "service.*unavailable",
+        "connection.*refused",
+        "timeout.*error",
+        "out.*of.*memory",
+        "disk.*full",
+        "permission.*denied",
+        "authentication.*failed",
+        "authorization.*failed"
+      ],
+      "severity": 1
+    },
+    "StackTrace": {
+      "description": "Stack trace patterns indicating application crashes",
+      "patterns": [
+        "stack.*trace",
+        "at\\s+\\w+\\.\\w+",
+        "Exception.*in thread",
+        "java\\.lang\\.",
+        "python.*traceback",
+        "goroutine.*panic",
+        "panic:",
+        "fatal.*error"
+      ],
+      "severity": 1
+    },
+    "Connection": {
+      "description": "Connection and network related issues",
+      "patterns": [
+        "connection.*reset",
+        "connection.*timeout",
+        "network.*unreachable",
+        "host.*unreachable",
+        "connection.*dropped",
+        "socket.*error",
+        "dns.*resolution.*failed"
+      ],
+      "severity": 2
+    },
+    "Timeout": {
+      "description": "Timeout related issues",
+      "patterns": [
+        "request.*timeout",
+        "operation.*timeout",
+        "deadline.*exceeded",
+        "context.*timeout",
+        "read.*timeout",
+        "write.*timeout"
+      ],
+      "severity": 2
+    },
+    "Auth": {
+      "description": "Authentication and authorization issues",
+      "patterns": [
+        "unauthorized",
+        "authentication.*error",
+        "invalid.*credentials",
+        "access.*denied",
+        "forbidden",
+        "token.*expired",
+        "certificate.*error"
+      ],
+      "severity": 2
+    },
+    "Exceptions": {
+      "description": "Various exception patterns",
+      "patterns": [
+        "NullPointerException",
+        "IndexOutOfBoundsException",
+        "IllegalArgumentException",
+        "RuntimeException",
+        "SQLException",
+        "IOException"
+      ],
+      "severity": 2
+    },
+    "Resource": {
+      "description": "Resource constraint issues",
+      "patterns": [
+        "resource.*exhausted",
+        "memory.*leak",
+        "cpu.*throttled",
+        "disk.*space.*low",
+        "quota.*exceeded",
+        "rate.*limit.*exceeded"
+      ],
+      "severity": 2
+    },
+    "HealthyRecovery": {
+      "description": "Recovery and healthy state patterns (lower severity)",
+      "patterns": [
+        "recovered.*from.*error",
+        "connection.*restored",
+        "service.*back.*online",
+        "retry.*successful",
+        "health.*check.*passed"
+      ],
+      "severity": 4
+    }
+  },
+  "exclude_patterns": [
+    "\\bINFO\\b",
+    "\\bDEBUG\\b",
+    "\\bTRACE\\b",
+    "health.*check",
+    "heartbeat",
+    "metrics",
+    "monitoring",
+    "\\],INFO\\s*,",
+    "INFO\\s*,c\\.",
+    "START\\s*-\\s*.*Impl\\.",
+    "BusinessService.*\\(\\)",
+    "RestrictionsApiDelegateImpl",
+    "ReadBlockedResourceListBusinessServiceImpl",
+    "LocationReplicaStrategy",
+    "\\bSTART\\s*method\\b",
+    "Calling\\s*BusinessService",
+    "BusinessServiceImpl",
+    "ApiDelegateImpl",
+    "successful.*startup",
+    "application.*started",
+    "server.*started",
+    "listening.*on.*port",
+    "configuration.*loaded",
+    "database.*connection.*established",
+    "linkerd.*INFO.*Connection closed.*error=read header from client timeout",
+    "linkerd_app_core::serve.*Connection closed.*error=read header from client timeout",
+    "TelemetryPipeline.*In the last \\d+ minutes.*operation has failed.*Sending telemetry to the ingestion service",
+    "adjustmentReason=\\w*EXCEPTION\\w*"
+  ],
+  "config": {
+    "max_matches_per_pattern": 10,
+    "case_sensitive": false,
+    "timeout_seconds": 30
+  }
+} 
\ No newline at end of file
diff --git a/codebundles/k8s-applog-health/sli.robot b/codebundles/k8s-applog-health/sli.robot
new file mode 100755
index 000000000..496903cd8
--- /dev/null
+++ b/codebundles/k8s-applog-health/sli.robot
@@ -0,0 +1,296 @@
+*** Settings ***
+Metadata          Author    stewartshea
+Documentation     This SLI uses kubectl to score deployment health. Produces a value between 0 (completely failing the test) and 1 (fully passing the test). Looks for container restarts, critical log errors, pods not ready, deployment status, and recent events.
+Metadata          Display Name    Kubernetes Deployment Healthcheck
+Metadata          Supports    Kubernetes,AKS,EKS,GKE,OpenShift
+Suite Setup       Suite Initialization
+Library           BuiltIn
+Library           RW.Core
+Library           RW.CLI
+Library           RW.platform
+Library           RW.K8sLog
+
+Library           OperatingSystem
+Library           String
+Library           Collections
+
+*** Keywords ***
+Suite Initialization
+    ${kubeconfig}=    RW.Core.Import Secret    kubeconfig
+    ...    type=string
+    ...    description=The kubernetes kubeconfig yaml containing connection configuration used to connect to cluster(s).
+    ...    pattern=\w*
+    ...    example=For examples, start here https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/
+    ${NAMESPACE}=    RW.Core.Import User Variable    NAMESPACE
+    ...    type=string
+    ...    description=The name of the Kubernetes namespace to scope actions and searching to.
+    ...    pattern=\w*
+    ...    example=my-namespace
+    ${CONTEXT}=    RW.Core.Import User Variable    CONTEXT
+    ...    type=string
+    ...    description=Which Kubernetes context to operate within.
+    ...    pattern=\w*
+    ...    example=my-main-cluster
+    ${DEPLOYMENT_NAME}=    RW.Core.Import User Variable    DEPLOYMENT_NAME
+    ...    type=string
+    ...    description=The name of the Kubernetes deployment to check.
+    ...    pattern=\w*
+    ...    example=my-deployment
+    ${CONTAINER_RESTART_AGE}=    RW.Core.Import User Variable    CONTAINER_RESTART_AGE
+    ...    type=string
+    ...    description=The time window in minutes to search for container restarts.
+    ...    pattern=((\d+?)m)?
+    ...    example=10m
+    ...    default=10m
+    ${CONTAINER_RESTART_THRESHOLD}=    RW.Core.Import User Variable    CONTAINER_RESTART_THRESHOLD
+    ...    type=string
+    ...    description=The maximum total container restarts to be still considered healthy.
+    ...    pattern=^\d+$
+    ...    example=1
+    ...    default=1
+    ${RW_LOOKBACK_WINDOW}=    RW.Core.Import Platform Variable    RW_LOOKBACK_WINDOW
+    ${RW_LOOKBACK_WINDOW}=    RW.Core.Normalize Lookback Window     ${RW_LOOKBACK_WINDOW}     2 
+    ${MAX_LOG_LINES}=    RW.Core.Import User Variable    MAX_LOG_LINES
+    ...    type=string
+    ...    description=Maximum number of log lines to fetch per container to prevent API overload.
+    ...    pattern=^\d+$
+    ...    example=100
+    ...    default=100
+    ${MAX_LOG_BYTES}=    RW.Core.Import User Variable    MAX_LOG_BYTES
+    ...    type=string
+    ...    description=Maximum log size in bytes to fetch per container to prevent API overload.
+    ...    pattern=^\d+$
+    ...    example=256000
+    ...    default=256000
+    ${EVENT_AGE}=    RW.Core.Import User Variable    EVENT_AGE
+    ...    type=string
+    ...    description=The time window to check for recent warning events.
+    ...    pattern=((\d+?)m)?
+    ...    example=10m
+    ...    default=10m
+    ${EVENT_THRESHOLD}=    RW.Core.Import User Variable    EVENT_THRESHOLD
+    ...    type=string
+    ...    description=The maximum number of critical warning events allowed before scoring is reduced.
+    ...    pattern=^\d+$
+    ...    example=2
+    ...    default=2
+    ${CHECK_SERVICE_ENDPOINTS}=    RW.Core.Import User Variable    CHECK_SERVICE_ENDPOINTS
+    ...    type=string
+    ...    description=Whether to check service endpoint health. Set to 'false' if deployment doesn't have associated services.
+    ...    enum=[true,false]
+    ...    example=true
+    ...    default=true
+    ${LOGS_EXCLUDE_PATTERN}=    RW.Core.Import User Variable    LOGS_EXCLUDE_PATTERN
+    ...    type=string
+    ...    description=Pattern used to exclude entries from log analysis when searching for errors. Use regex patterns to filter out false positives like JSON structures.
+    ...    pattern=.*
+    ...    example="errors":\s*\[\]|"warnings":\s*\[\]
+    ...    default="errors":\s*\[\]|\\bINFO\\b|\\bDEBUG\\b|\\bTRACE\\b|\\bSTART\\s*-\\s*|\\bSTART\\s*method\\b
+    ${EXCLUDED_CONTAINER_NAMES}=    RW.Core.Import User Variable    EXCLUDED_CONTAINER_NAMES
+    ...    type=string
+    ...    description=Comma-separated list of container names to exclude from log analysis (e.g., linkerd-proxy, istio-proxy, vault-agent).
+    ...    pattern=.*
+    ...    example=linkerd-proxy,istio-proxy,vault-agent
+    ...    default=linkerd-proxy,istio-proxy,vault-agent
+
+    ${KUBERNETES_DISTRIBUTION_BINARY}=    RW.Core.Import User Variable    KUBERNETES_DISTRIBUTION_BINARY
+    ...    type=string
+    ...    description=Which binary to use for Kubernetes CLI commands.
+    ...    enum=[kubectl,oc]
+    ...    example=kubectl
+    ...    default=kubectl
+    Set Suite Variable    ${kubeconfig}    ${kubeconfig}
+    Set Suite Variable    ${KUBERNETES_DISTRIBUTION_BINARY}    ${KUBERNETES_DISTRIBUTION_BINARY}
+    Set Suite Variable    ${CONTAINER_RESTART_AGE}    ${CONTAINER_RESTART_AGE}
+    Set Suite Variable    ${CONTAINER_RESTART_THRESHOLD}    ${CONTAINER_RESTART_THRESHOLD}
+    Set Suite Variable    ${RW_LOOKBACK_WINDOW}    ${RW_LOOKBACK_WINDOW}
+    Set Suite Variable    ${MAX_LOG_LINES}    ${MAX_LOG_LINES}
+    Set Suite Variable    ${MAX_LOG_BYTES}    ${MAX_LOG_BYTES}
+    Set Suite Variable    ${EVENT_AGE}    ${EVENT_AGE}
+    Set Suite Variable    ${EVENT_THRESHOLD}    ${EVENT_THRESHOLD}
+    Set Suite Variable    ${CHECK_SERVICE_ENDPOINTS}    ${CHECK_SERVICE_ENDPOINTS}
+    Set Suite Variable    ${LOGS_EXCLUDE_PATTERN}    ${LOGS_EXCLUDE_PATTERN}
+    Set Suite Variable    ${EXCLUDED_CONTAINER_NAMES}    ${EXCLUDED_CONTAINER_NAMES}
+    
+    # Convert comma-separated string to list
+    @{EXCLUDED_CONTAINERS_RAW}=    Run Keyword If    "${EXCLUDED_CONTAINER_NAMES}" != ""    Split String    ${EXCLUDED_CONTAINER_NAMES}    ,    ELSE    Create List
+    @{EXCLUDED_CONTAINERS}=    Create List
+    FOR    ${container}    IN    @{EXCLUDED_CONTAINERS_RAW}
+        ${trimmed_container}=    Strip String    ${container}
+        Append To List    ${EXCLUDED_CONTAINERS}    ${trimmed_container}
+    END
+    Set Suite Variable    @{EXCLUDED_CONTAINERS}
+
+    Set Suite Variable    ${CONTEXT}    ${CONTEXT}
+    Set Suite Variable    ${NAMESPACE}    ${NAMESPACE}
+    Set Suite Variable    ${DEPLOYMENT_NAME}    ${DEPLOYMENT_NAME}
+    Set Suite Variable    ${env}    {"KUBECONFIG":"./${kubeconfig.key}"}
+    
+    # Initialize score variables
+    Set Suite Variable    ${container_restart_score}    0
+    Set Suite Variable    ${log_health_score}    0
+    Set Suite Variable    ${pods_notready_score}    0
+    Set Suite Variable    ${replica_score}    0
+    Set Suite Variable    ${events_score}    0
+
+    
+    # Check if deployment is scaled to 0 and handle appropriately
+    ${scale_check}=    RW.CLI.Run Cli
+    ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get deployment/${DEPLOYMENT_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '{spec_replicas: .spec.replicas, ready_replicas: (.status.readyReplicas // 0), available_condition: (.status.conditions[] | select(.type == "Available") | .status // "Unknown"), last_scale_time: (.metadata.annotations."deployment.kubernetes.io/last-applied-configuration" // "N/A")}'
+    ...    env=${env}
+    ...    secret_file__kubeconfig=${kubeconfig}
+    ...    timeout_seconds=30
+    
+    TRY
+        ${scale_status}=    Evaluate    json.loads(r'''${scale_check.stdout}''') if r'''${scale_check.stdout}'''.strip() else {}    json
+        ${spec_replicas}=    Evaluate    $scale_status.get('spec_replicas', 1)
+        
+        # Try to determine when deployment was scaled down by checking recent events and replica set history
+        ${scale_down_info}=    Get Deployment Scale Down Timestamp    ${spec_replicas}
+        
+        IF    ${spec_replicas} == 0
+            Log    Deployment ${DEPLOYMENT_NAME} is scaled to 0 replicas - returning special health score
+            Log    Scale down detected at: ${scale_down_info}
+            
+            # For scaled-down deployments, return a score of 0.5 to indicate "intentionally down" vs "broken"
+            Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${True}
+            Set Suite Variable    ${SCALED_DOWN_INFO}    ${scale_down_info}
+        ELSE
+            Log    Deployment ${DEPLOYMENT_NAME} has ${spec_replicas} desired replicas - proceeding with health checks
+            Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${False}
+        END
+        
+    EXCEPT
+        Log    Warning: Failed to check deployment scale, continuing with normal health checks
+        Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${False}
+    END
+
+Get Deployment Scale Down Timestamp
+    [Arguments]    ${spec_replicas}
+    [Documentation]    Attempts to determine when a deployment was scaled down by examining recent events
+    ${scale_down_info}=    Set Variable    Unknown
+    
+    IF    ${spec_replicas} == 0
+        TRY
+            # Check recent scaling events to find when it was scaled to 0
+            ${scaling_events}=    RW.CLI.Run Cli
+            ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get events --context ${CONTEXT} -n ${NAMESPACE} --sort-by='.lastTimestamp' -o json | jq -r '.items[] | select(.reason == "ScalingReplicaSet" and (.message | contains("${DEPLOYMENT_NAME}")) and (.message | contains("to 0"))) | {timestamp: .lastTimestamp, message: .message}' | jq -s 'sort_by(.timestamp) | reverse | .[0] // empty'
+            ...    env=${env}
+            ...    secret_file__kubeconfig=${kubeconfig}
+            ...    timeout_seconds=15
+            
+            IF    '''${scaling_events.stdout}''' != ''
+                ${event_data}=    Evaluate    json.loads(r'''${scaling_events.stdout}''') if r'''${scaling_events.stdout}'''.strip() else {}    json
+                ${timestamp}=    Evaluate    $event_data.get('timestamp', 'Unknown')
+                ${message}=    Evaluate    $event_data.get('message', 'Unknown')
+                ${scale_down_info}=    Set Variable    ${timestamp} (${message})
+                Log    Found scale-down event: ${scale_down_info}
+            ELSE
+                # Try checking replicaset history as fallback
+                ${rs_history}=    RW.CLI.Run Cli
+                ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get replicasets --context ${CONTEXT} -n ${NAMESPACE} -l app=${DEPLOYMENT_NAME} -o json | jq -r '.items[] | select(.spec.replicas == 0) | {creation_time: .metadata.creationTimestamp, name: .metadata.name}' | jq -s 'sort_by(.creation_time) | reverse | .[0] // empty'
+                ...    env=${env}
+                ...    secret_file__kubeconfig=${kubeconfig}
+                ...    timeout_seconds=15
+                
+                IF    '''${rs_history.stdout}''' != ''
+                    ${rs_data}=    Evaluate    json.loads(r'''${rs_history.stdout}''') if r'''${rs_history.stdout}'''.strip() else {}    json
+                    ${rs_time}=    Evaluate    $rs_data.get('creation_time', 'Unknown')
+                    ${scale_down_info}=    Set Variable    Likely around ${rs_time} (based on ReplicaSet history)
+                    Log    Estimated scale-down time from ReplicaSet: ${scale_down_info}
+                ELSE
+                    ${scale_down_info}=    Set Variable    Unable to determine - no recent scaling events found
+                    Log    Could not determine when deployment was scaled down
+                END
+            END
+        EXCEPT
+            Log    Warning: Failed to determine scale-down timestamp
+            ${scale_down_info}=    Set Variable    Failed to determine scale-down time
+        END
+    END
+    
+    RETURN    ${scale_down_info}
+
+*** Tasks ***
+Get Critical Log Errors and Score for Deployment `${DEPLOYMENT_NAME}`
+    [Documentation]    Fetches logs and checks for critical error patterns that indicate application failures.
+    [Tags]    logs    errors    critical    patterns
+    
+    # Skip if deployment is scaled down  
+    IF    ${SKIP_HEALTH_CHECKS}
+        Log    Skipping log analysis - deployment is scaled to 0 replicas
+        ${log_health_score}=    Set Variable    1  # Perfect score for scaled deployment
+        Set Suite Variable    ${log_health_score}
+        RW.Core.Push Metric    ${log_health_score}    sub_name=log_errors
+    ELSE
+        ${log_dir}=    RW.K8sLog.Fetch Workload Logs
+        ...    workload_type=deployment
+        ...    workload_name=${DEPLOYMENT_NAME}
+        ...    namespace=${NAMESPACE}
+        ...    context=${CONTEXT}
+        ...    kubeconfig=${kubeconfig}
+        ...    log_age=${RW_LOOKBACK_WINDOW}
+        ...    max_log_lines=${MAX_LOG_LINES}
+        ...    max_log_bytes=${MAX_LOG_BYTES}
+        ...    excluded_containers=${EXCLUDED_CONTAINERS}
+        
+        # Use only critical error patterns for fast SLI checks
+        @{critical_categories}=    Create List    GenericError    AppFailure
+        
+        ${scan_results}=    RW.K8sLog.Scan Logs For Issues
+        ...    log_dir=${log_dir}
+        ...    workload_type=deployment
+        ...    workload_name=${DEPLOYMENT_NAME}
+        ...    namespace=${NAMESPACE}
+        ...    categories=${critical_categories}
+        ...    custom_patterns_file=sli_critical_patterns.json
+        ...    excluded_containers=${EXCLUDED_CONTAINERS}
+        
+        # Post-process results to filter out patterns matching LOGS_EXCLUDE_PATTERN
+        TRY
+            IF    $LOGS_EXCLUDE_PATTERN != ""
+                ${filtered_issues}=    Evaluate    [issue for issue in $scan_results.get('issues', []) if not __import__('re').search('${LOGS_EXCLUDE_PATTERN}', issue.get('details', ''), __import__('re').IGNORECASE)]    modules=re
+                ${filtered_results}=    Evaluate    {**$scan_results, 'issues': $filtered_issues}
+                Set Test Variable    ${scan_results}    ${filtered_results}
+            END
+        EXCEPT
+            Log    Warning: Failed to apply LOGS_EXCLUDE_PATTERN filter, using unfiltered results
+        END
+        
+        ${log_health_score}=    RW.K8sLog.Calculate Log Health Score    scan_results=${scan_results}
+        
+        # Store details for final score calculation logging
+        TRY
+            ${issues}=    Evaluate    $scan_results.get('issues', [])
+            ${issue_count}=    Get Length    ${issues}
+            Set Suite Variable    ${log_health_details}    ${issue_count} issues found
+        EXCEPT
+            Set Suite Variable    ${log_health_details}    analysis completed
+        END
+        
+        Set Suite Variable    ${log_health_score}
+        RW.K8sLog.Cleanup Temp Files
+        RW.Core.Push Metric    ${log_health_score}    sub_name=log_errors
+    END
+
+Generate Deployment Health Score for `${DEPLOYMENT_NAME}`
+    [Documentation]    Generates the final applog health score and report details
+    [Tags]    score    health    applog
+    
+    IF    ${SKIP_HEALTH_CHECKS}
+        # For scaled-down deployments, return perfect score to indicate "intentionally down" vs "broken"
+        # We distinguish scaled-down vs broken deployments through the log message and report details
+        ${health_score}=    Set Variable    1.0
+        Log    Deployment ${DEPLOYMENT_NAME} is intentionally scaled to 0 replicas (${SCALED_DOWN_INFO}) - Score: ${health_score}
+        RW.Core.Add to Report    Applog Health Score: ${health_score} - Deployment intentionally scaled to 0 replicas
+    ELSE
+        # Use the log health score as the final health score
+        ${health_score}=    Set Variable    ${log_health_score}
+        
+        IF    ${health_score} == 1.0
+            RW.Core.Add to Report    Applog Health Score: ${health_score} - No applog issues detected in workload logs
+        ELSE
+            RW.Core.Add to Report    Applog Health Score: ${health_score} - Applog issue(s) detected in workload logs: ${log_health_details}
+        END
+    END
+    RW.Core.Push Metric    ${health_score}
\ No newline at end of file
diff --git a/codebundles/k8s-applog-health/sli_critical_patterns.json b/codebundles/k8s-applog-health/sli_critical_patterns.json
new file mode 100755
index 000000000..19848bb80
--- /dev/null
+++ b/codebundles/k8s-applog-health/sli_critical_patterns.json
@@ -0,0 +1,74 @@
+{
+  "critical_patterns": {
+    "GenericError": {
+      "description": "Generic error patterns that indicate application failures",
+      "patterns": [
+        "exception|Exception|EXCEPTION",
+        "fatal|Fatal|FATAL",
+        "panic|Panic|PANIC",
+        "crash|Crash|CRASH",
+        "failed|Failed|FAILED",
+        "failure|Failure|FAILURE"
+      ],
+      "severity": 1
+    },
+    "AppFailure": {
+      "description": "Application-specific failure patterns",
+      "patterns": [
+        "application.*failed",
+        "service.*unavailable",
+        "connection.*refused",
+        "timeout.*error",
+        "out.*of.*memory",
+        "disk.*full",
+        "permission.*denied",
+        "authentication.*failed",
+        "authorization.*failed"
+      ],
+      "severity": 1
+    },
+    "StackTrace": {
+      "description": "Stack trace patterns indicating application crashes",
+      "patterns": [
+        "stack.*trace",
+        "at\\s+\\w+\\.\\w+",
+        "Exception.*in thread",
+        "java\\.lang\\.",
+        "python.*traceback",
+        "goroutine.*panic",
+        "panic:",
+        "fatal.*error"
+      ],
+      "severity": 1
+    }
+  },
+  "exclude_patterns": [
+    "\\bINFO\\b",
+    "\\bDEBUG\\b",
+    "\\bTRACE\\b",
+    "health.*check",
+    "heartbeat",
+    "metrics",
+    "monitoring",
+    "\\],INFO\\s*,",
+    "INFO\\s*,c\\.",
+    "START\\s*-\\s*.*Impl\\.",
+    "BusinessService.*\\(\\)",
+    "RestrictionsApiDelegateImpl",
+    "ReadBlockedResourceListBusinessServiceImpl",
+    "LocationReplicaStrategy",
+    "\\bSTART\\s*method\\b",
+    "Calling\\s*BusinessService",
+    "BusinessServiceImpl",
+    "ApiDelegateImpl",
+    "linkerd.*INFO.*Connection closed.*error=read header from client timeout",
+    "linkerd_app_core::serve.*Connection closed.*error=read header from client timeout",
+    "TelemetryPipeline.*In the last \\d+ minutes.*operation has failed.*Sending telemetry to the ingestion service",
+    "adjustmentReason=\\w*EXCEPTION\\w*"
+  ],
+  "config": {
+    "max_matches_per_pattern": 10,
+    "case_sensitive": false,
+    "timeout_seconds": 30
+  }
+} 
\ No newline at end of file

From abf88d177b5830be54c1c8a5d712512c7d3a11d6 Mon Sep 17 00:00:00 2001
From: Akshay Prabhakant <akshay.prabhakant@runwhen.com>
Date: Wed, 14 Jan 2026 16:24:33 +0530
Subject: [PATCH 02/10] Improve log analysis task naming and report formatting

- Rename task to "Scan Application Logs for Errors and Stacktraces" for clarity
- Add timestamp tracking for log extraction to support accurate issue reporting
- Enhance log contents display with structured format showing last N lines per file
- Add LOG_START/LOG_END markers for better log parsing
- Use recorded timestamp when no issues are found
- Re-enable log cleanup after analysis
---
 codebundles/k8s-applog-health/runbook.robot | 32 ++++++++++++++++++---
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/codebundles/k8s-applog-health/runbook.robot b/codebundles/k8s-applog-health/runbook.robot
index 88a57a05e..a3aa2301b 100755
--- a/codebundles/k8s-applog-health/runbook.robot
+++ b/codebundles/k8s-applog-health/runbook.robot
@@ -208,8 +208,8 @@ Suite Initialization
 
 *** Tasks ***
 
-Analyze Application Log Patterns for Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}`
-    [Documentation]    Fetches and analyzes logs from the deployment pods for errors, connection issues, and other patterns that indicate application health problems. Note: Warning messages about missing log files for excluded containers (like linkerd-proxy, istio-proxy) are expected and harmless.
+Scan Application Logs for Errors and Stacktraces for Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}`
+    [Documentation]    Fetches and analyzes logs from the deployment pods for stacktraces, errors, connection issues, and other patterns that indicate application health problems. Note: Warning messages about missing log files for excluded containers (like linkerd-proxy, istio-proxy) are expected and harmless.
     [Tags]
     ...    logs
     ...    application
@@ -221,6 +221,9 @@ Analyze Application Log Patterns for Deployment `${DEPLOYMENT_NAME}` in Namespac
     ...    access:read-only
     # Skip pod-related checks if deployment is scaled to 0
     IF    not ${SKIP_POD_CHECKS}
+        # record current time, and use if no issues found
+        ${log_extraction_timestamp}=    DateTime.Get Current Date
+        
         # Temporarily suppress log warnings for excluded containers (they're expected)
         TRY
             ${log_dir}=    RW.K8sLog.Fetch Workload Logs
@@ -271,9 +274,30 @@ Analyze Application Log Patterns for Deployment `${DEPLOYMENT_NAME}` in Namespac
         ${issues_count}=    Get Length    ${issues}
 
         # print the contents from log_dir into the report
-        RW.Core.Add Pre To Report    **Log Contents:**\n${log_dir}
+        ${logs_subdir}=    Set Variable    ${log_dir}${/}deployment_${DEPLOYMENT_NAME}_logs
+        ${has_logs_dir}=    Run Keyword And Return Status    Directory Should Exist    ${logs_subdir}
+
+        IF    ${has_logs_dir}
+            @{log_files}=    List Files In Directory    ${logs_subdir}    pattern=*_logs.txt    absolute=True
+            Sort List    ${log_files}
+
+            RW.Core.Add Pre To Report    **Log Contents (showing last ${LOG_LINES} lines per file)**
+
+            FOR    ${log_file}    IN    @{log_files}
+                ${base}=    Evaluate    __import__('os').path.basename(r'''${log_file}''')
+
+                # Efficient-ish tail in Python: keeps only last N lines
+                ${tail}=    Evaluate    ''.join(__import__('collections').deque(open(r'''${log_file}''', 'r', encoding='utf-8', errors='replace'), maxlen=int('${LOG_LINES}')))
+
+                RW.Core.Add Pre To Report    [LOG_START: ${base}]\n${tail}\n[LOG_END: ${base}]\n
+            END
+        ELSE
+            RW.Core.Add Pre To Report    **Log Contents:**\nNo log files directory found at: ${logs_subdir}
+        END
 
         IF    ${issues_count} == 0
+            ${issue_timestamp}=    Set Variable    ${log_extraction_timestamp}
+            
             # create a dummy issue with a keyword argument set to a value depicting no issues found
             RW.Core.Add Pre To Report    **No issues found in application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`**
             
@@ -304,5 +328,5 @@ Analyze Application Log Patterns for Deployment `${DEPLOYMENT_NAME}` in Namespac
             ...    observed_at=${issue_timestamp}
             ...    next_action=processApplogIssues
         END        
-        # RW.K8sLog.Cleanup Temp Files
+        RW.K8sLog.Cleanup Temp Files
     END
\ No newline at end of file

From 8f8d391f747db97588b2cb0ba2de8300ebdcf920 Mon Sep 17 00:00:00 2001
From: Akshay Prabhakant <akshay.prabhakant@runwhen.com>
Date: Thu, 15 Jan 2026 14:02:00 +0000
Subject: [PATCH 03/10] Update display name from Kubernetes Deployment Triage
 to Kubernetes AppLog Analysis

---
 codebundles/k8s-applog-health/runbook.robot | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codebundles/k8s-applog-health/runbook.robot b/codebundles/k8s-applog-health/runbook.robot
index a3aa2301b..d98b0523d 100755
--- a/codebundles/k8s-applog-health/runbook.robot
+++ b/codebundles/k8s-applog-health/runbook.robot
@@ -1,7 +1,7 @@
 *** Settings ***
 Documentation       Triages issues related to a deployment and its replicas.
 Metadata            Author    stewartshea
-Metadata            Display Name    Kubernetes Deployment Triage
+Metadata            Display Name    Kubernetes AppLog Analysis
 Metadata            Supports    Kubernetes,AKS,EKS,GKE,OpenShift
 
 Library             BuiltIn

From 532ff8e1ebcb7f30f03746a1cd749d48c54cef40 Mon Sep 17 00:00:00 2001
From: Akshay Prabhakant <akshay.prabhakant@runwhen.com>
Date: Fri, 16 Jan 2026 19:12:03 +0530
Subject: [PATCH 04/10] Add log size limits and clean up unused SLI variables

- Add LOG_SIZE/MAX_LOG_SIZE variable to control max log bytes fetched (default 2MB)
- Increase default LOG_LINES from 100 to 1000 for better log coverage
- Pass max_log_lines and max_log_bytes to log fetching in runbook
- Remove unused EVENT_AGE, EVENT_THRESHOLD, CHECK_SERVICE_ENDPOINTS from SLI
- Remove unnecessary "no issues found" dummy issue creation
- Rename final SLI task to "Generate Application Health Score"
---
 .../templates/k8s-applog-health-sli.yaml      |  4 +++
 .../templates/k8s-applog-health-taskset.yaml  |  4 +++
 codebundles/k8s-applog-health/runbook.robot   | 23 +++++++--------
 codebundles/k8s-applog-health/sli.robot       | 29 +++----------------
 4 files changed, 22 insertions(+), 38 deletions(-)

diff --git a/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-sli.yaml b/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-sli.yaml
index 8576f2825..c4e813650 100755
--- a/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-sli.yaml
+++ b/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-sli.yaml
@@ -46,6 +46,10 @@ spec:
       value: "2"
     - name: CHECK_SERVICE_ENDPOINTS
       value: "true"
+    - name: MAX_LOG_LINES
+      value: "1000"
+    - name: MAX_LOG_SIZE
+      value: "2097152"
   secretsProvided:
   {% if wb_version %}
     {% include "kubernetes-auth.yaml" ignore missing %}
diff --git a/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-taskset.yaml b/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-taskset.yaml
index 7f1d9b79e..73229c8e6 100644
--- a/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-taskset.yaml
+++ b/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-taskset.yaml
@@ -35,6 +35,10 @@ spec:
       value: "4"
     - name: LOG_AGE
       value: "10m"
+    - name: LOG_SIZE
+      value: "2097152"
+    - name: LOG_LINES
+      value: "1000"
   secretsProvided:
   {% if wb_version %}
     {% include "kubernetes-auth.yaml" ignore missing %}
diff --git a/codebundles/k8s-applog-health/runbook.robot b/codebundles/k8s-applog-health/runbook.robot
index d98b0523d..91f6c3fdd 100755
--- a/codebundles/k8s-applog-health/runbook.robot
+++ b/codebundles/k8s-applog-health/runbook.robot
@@ -55,13 +55,19 @@ Suite Initialization
     ...    description=The number of log lines to fetch from the pods when inspecting logs.
     ...    pattern=\d+
     ...    example=100
-    ...    default=100
+    ...    default=1000
     ${LOG_AGE}=    RW.Core.Import User Variable    LOG_AGE
     ...    type=string
     ...    description=The age of logs to fetch from pods, used for log analysis tasks.
     ...    pattern=\w*
     ...    example=10m
     ...    default=10m
+    ${LOG_SIZE}=    RW.Core.Import User Variable    LOG_SIZE
+    ...    type=string
+    ...    description=The maximum size of logs in bytes to fetch from pods, used for log analysis tasks. Defaults to 2MB.
+    ...    pattern=\d*
+    ...    example=1024
+    ...    default=2097152
 
     ${LOG_ANALYSIS_DEPTH}=    RW.Core.Import User Variable    LOG_ANALYSIS_DEPTH
     ...    type=string
@@ -141,6 +147,7 @@ Suite Initialization
     Set Suite Variable    ${DEPLOYMENT_NAME}
     Set Suite Variable    ${LOG_LINES}
     Set Suite Variable    ${LOG_AGE}
+    Set Suite Variable    ${LOG_SIZE}
 
     Set Suite Variable    ${LOG_ANALYSIS_DEPTH}
     Set Suite Variable    ${LOG_SEVERITY_THRESHOLD}
@@ -233,6 +240,8 @@ Scan Application Logs for Errors and Stacktraces for Deployment `${DEPLOYMENT_NA
             ...    context=${CONTEXT}
             ...    kubeconfig=${kubeconfig}
             ...    log_age=${LOG_AGE}
+            ...    max_log_lines=${LOG_LINES}
+            ...    max_log_bytes=${LOG_SIZE}
             ...    excluded_containers=${EXCLUDED_CONTAINERS}
         EXCEPT    AS    ${log_error}
             # If log fetching fails completely, log the error but continue
@@ -300,18 +309,6 @@ Scan Application Logs for Errors and Stacktraces for Deployment `${DEPLOYMENT_NA
             
             # create a dummy issue with a keyword argument set to a value depicting no issues found
             RW.Core.Add Pre To Report    **No issues found in application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`**
-            
-            # create a dummy issue with a keyword argument set to a value depicting no issues found
-            RW.Core.Add Issue
-            ...    severity=4
-            ...    expected=Application logs should be free of critical errors for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
-            ...    actual=No issues found in application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
-            ...    title=No issues found in application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
-            ...    reproduce_hint=Check application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
-            ...    details=No issues found in application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
-            ...    next_steps=No processing required
-            ...    observed_at=${issue_timestamp}
-            ...    next_action=noIssuesFound
         ELSE
             # set issue_timestamp to the observed_at value from the first issue
             ${issue_timestamp}=    Evaluate    $issues[0].get('observed_at', '')
diff --git a/codebundles/k8s-applog-health/sli.robot b/codebundles/k8s-applog-health/sli.robot
index 496903cd8..3c105a100 100755
--- a/codebundles/k8s-applog-health/sli.robot
+++ b/codebundles/k8s-applog-health/sli.robot
@@ -1,7 +1,7 @@
 *** Settings ***
 Metadata          Author    stewartshea
-Documentation     This SLI uses kubectl to score deployment health. Produces a value between 0 (completely failing the test) and 1 (fully passing the test). Looks for container restarts, critical log errors, pods not ready, deployment status, and recent events.
-Metadata          Display Name    Kubernetes Deployment Healthcheck
+Documentation     This SLI uses kubectl to score application log health. Produces a value between 0 (completely failing the test) and 1 (fully passing the test). Looks for container restarts, critical log errors, pods not ready, deployment status, stacktraces and other recent events.
+Metadata          Display Name    Kubernetes Application Log Healthcheck
 Metadata          Supports    Kubernetes,AKS,EKS,GKE,OpenShift
 Suite Setup       Suite Initialization
 Library           BuiltIn
@@ -55,31 +55,13 @@ Suite Initialization
     ...    description=Maximum number of log lines to fetch per container to prevent API overload.
     ...    pattern=^\d+$
     ...    example=100
-    ...    default=100
+    ...    default=1000
     ${MAX_LOG_BYTES}=    RW.Core.Import User Variable    MAX_LOG_BYTES
     ...    type=string
     ...    description=Maximum log size in bytes to fetch per container to prevent API overload.
     ...    pattern=^\d+$
     ...    example=256000
     ...    default=256000
-    ${EVENT_AGE}=    RW.Core.Import User Variable    EVENT_AGE
-    ...    type=string
-    ...    description=The time window to check for recent warning events.
-    ...    pattern=((\d+?)m)?
-    ...    example=10m
-    ...    default=10m
-    ${EVENT_THRESHOLD}=    RW.Core.Import User Variable    EVENT_THRESHOLD
-    ...    type=string
-    ...    description=The maximum number of critical warning events allowed before scoring is reduced.
-    ...    pattern=^\d+$
-    ...    example=2
-    ...    default=2
-    ${CHECK_SERVICE_ENDPOINTS}=    RW.Core.Import User Variable    CHECK_SERVICE_ENDPOINTS
-    ...    type=string
-    ...    description=Whether to check service endpoint health. Set to 'false' if deployment doesn't have associated services.
-    ...    enum=[true,false]
-    ...    example=true
-    ...    default=true
     ${LOGS_EXCLUDE_PATTERN}=    RW.Core.Import User Variable    LOGS_EXCLUDE_PATTERN
     ...    type=string
     ...    description=Pattern used to exclude entries from log analysis when searching for errors. Use regex patterns to filter out false positives like JSON structures.
@@ -106,9 +88,6 @@ Suite Initialization
     Set Suite Variable    ${RW_LOOKBACK_WINDOW}    ${RW_LOOKBACK_WINDOW}
     Set Suite Variable    ${MAX_LOG_LINES}    ${MAX_LOG_LINES}
     Set Suite Variable    ${MAX_LOG_BYTES}    ${MAX_LOG_BYTES}
-    Set Suite Variable    ${EVENT_AGE}    ${EVENT_AGE}
-    Set Suite Variable    ${EVENT_THRESHOLD}    ${EVENT_THRESHOLD}
-    Set Suite Variable    ${CHECK_SERVICE_ENDPOINTS}    ${CHECK_SERVICE_ENDPOINTS}
     Set Suite Variable    ${LOGS_EXCLUDE_PATTERN}    ${LOGS_EXCLUDE_PATTERN}
     Set Suite Variable    ${EXCLUDED_CONTAINER_NAMES}    ${EXCLUDED_CONTAINER_NAMES}
     
@@ -273,7 +252,7 @@ Get Critical Log Errors and Score for Deployment `${DEPLOYMENT_NAME}`
         RW.Core.Push Metric    ${log_health_score}    sub_name=log_errors
     END
 
-Generate Deployment Health Score for `${DEPLOYMENT_NAME}`
+Generate Application Health Score for `${DEPLOYMENT_NAME}`
     [Documentation]    Generates the final applog health score and report details
     [Tags]    score    health    applog
     

From 2591933601c5419b75164e4dcc63d2991e74b396 Mon Sep 17 00:00:00 2001
From: Akshay Prabhakant <akshay.prabhakant@runwhen.com>
Date: Wed, 11 Feb 2026 09:56:12 +0000
Subject: [PATCH 05/10] shifted "Analyze applog " task from healthcheck to
 applog-health CB; shifted "Analyze workload stacktraces" task from
 stacktrace-CB to applog-health-CB

---
 codebundles/k8s-applog-health/runbook.robot   | 215 ++++++++++++------
 codebundles/k8s-applog-health/sli.robot       | 109 +++++++--
 .../k8s-daemonset-healthcheck/runbook.robot   |  58 -----
 .../k8s-deployment-healthcheck/runbook.robot  |  99 --------
 .../k8s-statefulset-healthcheck/runbook.robot |  59 -----
 5 files changed, 229 insertions(+), 311 deletions(-)

diff --git a/codebundles/k8s-applog-health/runbook.robot b/codebundles/k8s-applog-health/runbook.robot
index 91f6c3fdd..299757e56 100755
--- a/codebundles/k8s-applog-health/runbook.robot
+++ b/codebundles/k8s-applog-health/runbook.robot
@@ -1,6 +1,6 @@
 *** Settings ***
 Documentation       Triages issues related to a deployment and its replicas.
-Metadata            Author    stewartshea
+Metadata            Author    akshayrw25
 Metadata            Display Name    Kubernetes AppLog Analysis
 Metadata            Supports    Kubernetes,AKS,EKS,GKE,OpenShift
 
@@ -11,6 +11,7 @@ Library             RW.platform
 Library             RW.NextSteps
 Library             RW.K8sHelper
 Library             RW.K8sLog
+Library             RW.LogAnalysis.ExtractTraceback
 
 Library             OperatingSystem
 Library             String
@@ -45,11 +46,18 @@ Suite Initialization
     ...    description=The name of the Kubernetes namespace to scope actions and searching to.
     ...    pattern=\w*
     ...    example=otel-demo
-    ${DEPLOYMENT_NAME}=    RW.Core.Import User Variable    DEPLOYMENT_NAME
+    ${WORKLOAD_NAME}=    RW.Core.Import User Variable    WORKLOAD_NAME
     ...    type=string
-    ...    description=The name of the deployment to triage.
+    ...    description=The name of the workload (deployment, statefulset, or daemonset) to analyze for stacktraces.
     ...    pattern=\w*
     ...    example=otel-demo-frontend
+    ${WORKLOAD_TYPE}=    RW.Core.Import User Variable    WORKLOAD_TYPE
+    ...    type=string
+    ...    description=The type of Kubernetes workload to analyze.
+    ...    pattern=\w*
+    ...    enum=[deployment,statefulset,daemonset]
+    ...    example=deployment
+    ...    default=deployment
     ${LOG_LINES}=    RW.Core.Import User Variable    LOG_LINES
     ...    type=string
     ...    description=The number of log lines to fetch from the pods when inspecting logs.
@@ -104,8 +112,8 @@ Suite Initialization
     ...    type=string
     ...    description=Pattern used to exclude entries from log analysis when searching for errors. Use regex patterns to filter out false positives like JSON structures.
     ...    pattern=.*
-    ...    example="errors":\s*\[\]|"warnings":\s*\[\]
-    ...    default="errors":\\s*\\[\\]|\\bINFO\\b|\\bDEBUG\\b|\\bTRACE\\b|\\bSTART\\s*-\\s*|\\bSTART\\s*method\\b
+    ...    example="errors":\\s*\\[\\]|"warnings":\\s*\\[\\]
+    ...    default="errors":\\\\s*\\\\[\\\\]|\\\\bINFO\\\\b|\\\\bDEBUG\\\\b|\\\\bTRACE\\\\b|\\\\bSTART\\\\s*-\\\\s*|\\\\bSTART\\\\s*method\\\\b
     ${LOG_SCAN_TIMEOUT}=    RW.Core.Import User Variable    LOG_SCAN_TIMEOUT
     ...    type=string
     ...    description=Timeout in seconds for log scanning operations. Increase this value if log scanning times out on large log files.
@@ -144,7 +152,8 @@ Suite Initialization
     Set Suite Variable    ${KUBERNETES_DISTRIBUTION_BINARY}
     Set Suite Variable    ${CONTEXT}
     Set Suite Variable    ${NAMESPACE}
-    Set Suite Variable    ${DEPLOYMENT_NAME}
+    Set Suite Variable    ${WORKLOAD_NAME}
+    Set Suite Variable    ${WORKLOAD_TYPE}
     Set Suite Variable    ${LOG_LINES}
     Set Suite Variable    ${LOG_AGE}
     Set Suite Variable    ${LOG_SIZE}
@@ -171,15 +180,17 @@ Suite Initialization
     ...    LOGS_ERROR_PATTERN=${LOGS_ERROR_PATTERN}
     ...    LOGS_EXCLUDE_PATTERN=${LOGS_EXCLUDE_PATTERN}
     ...    ANOMALY_THRESHOLD=${ANOMALY_THRESHOLD}
-    ...    DEPLOYMENT_NAME=${DEPLOYMENT_NAME}
+    # ...    DEPLOYMENT_NAME=${DEPLOYMENT_NAME}
+    ...    WORKLOAD_NAME=${WORKLOAD_NAME}
+    ...    WORKLOAD_TYPE=${WORKLOAD_TYPE}
     ...    CONTAINER_RESTART_AGE=${CONTAINER_RESTART_AGE}
     ...    CONTAINER_RESTART_THRESHOLD=${CONTAINER_RESTART_THRESHOLD}
     ...    LOG_SCAN_TIMEOUT=${LOG_SCAN_TIMEOUT}
     Set Suite Variable    ${env}    ${env_dict}
     
-    # Check if deployment is scaled to 0 and handle appropriately
+    # Check if the workload is scaled to 0 and handle appropriately
     ${scale_check}=    RW.CLI.Run Cli
-    ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get deployment/${DEPLOYMENT_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '{spec_replicas: .spec.replicas, ready_replicas: (.status.readyReplicas // 0), available_condition: (.status.conditions[] | select(.type == "Available") | .status // "Unknown")}'
+    ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get ${WORKLOAD_TYPE}/${WORKLOAD_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '{spec_replicas: .spec.replicas, ready_replicas: (.status.readyReplicas // 0), available_condition: (.status.conditions[] | select(.type == "Available") | .status // "Unknown")}'
     ...    env=${env}
     ...    secret_file__kubeconfig=${kubeconfig}
     ...    timeout_seconds=30
@@ -188,19 +199,21 @@ Suite Initialization
         ${scale_status}=    Evaluate    json.loads(r'''${scale_check.stdout}''') if r'''${scale_check.stdout}'''.strip() else {}    json
         ${spec_replicas}=    Evaluate    $scale_status.get('spec_replicas', 1)
         
-        IF    ${spec_replicas} == 0
-            ${issue_timestamp}=    DateTime.Get Current Date
+        # DaemonSets don't scale to 0 in the traditional sense, so skip scale-down logic for them
+        IF    '${WORKLOAD_TYPE}' == 'daemonset'
+            Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} is a DaemonSet - proceeding with log analysis
+            Set Suite Variable    ${SKIP_POD_CHECKS}    ${False}
+        ELSE IF    ${spec_replicas} == 0
             RW.Core.Add Issue
             ...    severity=4
-            ...    expected=Deployment `${DEPLOYMENT_NAME}` operational status documented
-            ...    actual=Deployment `${DEPLOYMENT_NAME}` is intentionally scaled to zero replicas
-            ...    title=Deployment `${DEPLOYMENT_NAME}` is Scaled Down (Informational)
-            ...    reproduce_hint=kubectl get deployment/${DEPLOYMENT_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o yaml
-            ...    details=Deployment `${DEPLOYMENT_NAME}` is currently scaled to 0 replicas (spec.replicas=0). This is an intentional configuration and not an error. All pod-related healthchecks have been skipped for efficiency. If the deployment should be running, scale it up using:\nkubectl scale deployment/${DEPLOYMENT_NAME} --replicas=<desired_count> --context ${CONTEXT} -n ${NAMESPACE}
-            ...    next_steps=This is informational only. If the deployment should be running, scale it up.
-            ...    observed_at=${issue_timestamp}
+            ...    expected=${WORKLOAD_TYPE} `${WORKLOAD_NAME}` operational status documented
+            ...    actual=${WORKLOAD_TYPE} `${WORKLOAD_NAME}` is intentionally scaled to zero replicas
+            ...    title=${WORKLOAD_TYPE} `${WORKLOAD_NAME}` is Scaled Down (Informational)
+            ...    reproduce_hint=kubectl get ${WORKLOAD_TYPE}/${WORKLOAD_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o yaml
+            ...    details=${WORKLOAD_TYPE} `${WORKLOAD_NAME}` is currently scaled to 0 replicas (spec.replicas=0). This is an intentional configuration and not an error. All pod-related healthchecks have been skipped for efficiency. If the workload should be running, scale it up using:\nkubectl scale ${WORKLOAD_TYPE}/${WORKLOAD_NAME} --replicas=<desired_count> --context ${CONTEXT} -n ${NAMESPACE}
+            ...    next_steps=This is informational only. If the workload should be running, scale it up.
             
-            RW.Core.Add Pre To Report    **ℹ️ Deployment `${DEPLOYMENT_NAME}` is scaled to 0 replicas - Skipping pod-related checks**\n**Available Condition:** ${scale_status.get('available_condition', 'Unknown')}
+            RW.Core.Add Pre To Report    **ℹ️ ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` is scaled to 0 replicas - Skipping log analysis**\n**Available Condition:** ${scale_status.get('available_condition', 'Unknown')}
             
             Set Suite Variable    ${SKIP_POD_CHECKS}    ${True}
         ELSE
@@ -208,54 +221,54 @@ Suite Initialization
         END
         
     EXCEPT
-        Log    Warning: Failed to check deployment scale, continuing with normal checks
+        Log    Warning: Failed to check workload scale, continuing with normal checks
         Set Suite Variable    ${SKIP_POD_CHECKS}    ${False}
     END
 
 
 *** Tasks ***
 
-Scan Application Logs for Errors and Stacktraces for Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}`
-    [Documentation]    Fetches and analyzes logs from the deployment pods for stacktraces, errors, connection issues, and other patterns that indicate application health problems. Note: Warning messages about missing log files for excluded containers (like linkerd-proxy, istio-proxy) are expected and harmless.
+
+Analyze Application Log Patterns for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in Namespace `${NAMESPACE}`
+    [Documentation]    Fetches and analyzes logs from the deployment pods for errors, connection issues, and other patterns that indicate application health problems. Note: Warning messages about missing log files for excluded containers (like linkerd-proxy, istio-proxy) are expected and harmless.
     [Tags]
     ...    logs
     ...    application
     ...    errors
-    ...    stacktrace
     ...    patterns
     ...    health
-    ...    deployment
+    ...    ${WORKLOAD_TYPE}
     ...    access:read-only
     # Skip pod-related checks if deployment is scaled to 0
     IF    not ${SKIP_POD_CHECKS}
-        # record current time, and use if no issues found
-        ${log_extraction_timestamp}=    DateTime.Get Current Date
-        
         # Temporarily suppress log warnings for excluded containers (they're expected)
         TRY
             ${log_dir}=    RW.K8sLog.Fetch Workload Logs
-            ...    workload_type=deployment
-            ...    workload_name=${DEPLOYMENT_NAME}
+            ...    workload_type=${WORKLOAD_TYPE}
+            ...    workload_name=${WORKLOAD_NAME}
             ...    namespace=${NAMESPACE}
             ...    context=${CONTEXT}
             ...    kubeconfig=${kubeconfig}
             ...    log_age=${LOG_AGE}
-            ...    max_log_lines=${LOG_LINES}
-            ...    max_log_bytes=${LOG_SIZE}
             ...    excluded_containers=${EXCLUDED_CONTAINERS}
         EXCEPT    AS    ${log_error}
             # If log fetching fails completely, log the error but continue
             Log    Warning: Log fetching encountered an error: ${log_error}
+
+            # TODO: remove this after testing
+            RW.Core.Add Pre To Report    **Log Fetching Error:** ${log_error}
             # Set empty log directory to continue with other checks
             ${log_dir}=    Set Variable    ${EMPTY}
         END
+
+        RW.Core.Add Pre To Report    **Log Directory:** ${log_dir}
         
         # Only scan logs if we have a valid log directory
         IF    '''${log_dir}''' != '''${EMPTY}'''
             ${scan_results}=    RW.K8sLog.Scan Logs For Issues
             ...    log_dir=${log_dir}
-            ...    workload_type=deployment
-            ...    workload_name=${DEPLOYMENT_NAME}
+            ...    workload_type=${WORKLOAD_TYPE}
+            ...    workload_name=${WORKLOAD_NAME}
             ...    namespace=${NAMESPACE}
             ...    categories=@{LOG_PATTERN_CATEGORIES}
             ...    custom_patterns_file=runbook_patterns.json
@@ -280,50 +293,108 @@ Scan Application Logs for Errors and Stacktraces for Deployment `${DEPLOYMENT_NA
         
         # Process each issue found in the logs
         ${issues}=    Evaluate    $scan_results.get('issues', [])
-        ${issues_count}=    Get Length    ${issues}
-
-        # print the contents from log_dir into the report
-        ${logs_subdir}=    Set Variable    ${log_dir}${/}deployment_${DEPLOYMENT_NAME}_logs
-        ${has_logs_dir}=    Run Keyword And Return Status    Directory Should Exist    ${logs_subdir}
+        FOR    ${issue}    IN    @{issues}
+            ${severity}=    Evaluate    $issue.get('severity', ${LOG_SEVERITY_THRESHOLD})
+            IF    ${severity} <= ${LOG_SEVERITY_THRESHOLD}
+                # Convert issue details to string to avoid serialization issues
+                ${issue_details_raw}=    Evaluate    $issue.get("details", "")
+                ${issue_details_str}=    Convert To String    ${issue_details_raw}
+                ${summarized_details}=    RW.K8sLog.Summarize Log Issues    issue_details=${issue_details_str}
+                
+                # Safely extract title and next_steps as strings
+                ${issue_title_raw}=    Evaluate    $issue.get('title', 'Log pattern issue detected')
+                ${issue_title}=    Convert To String    ${issue_title_raw}
+                ${next_steps_raw}=    Evaluate    $issue.get('next_steps', 'Review application logs and resolve underlying issues')
+                ${next_steps}=    Convert To String    ${next_steps_raw}
+                
+                # Use timestamp from log scan results if available, otherwise extract from details
+                ${issue_timestamp}=    Evaluate    $issue.get('observed_at', '')
 
-        IF    ${has_logs_dir}
-            @{log_files}=    List Files In Directory    ${logs_subdir}    pattern=*_logs.txt    absolute=True
-            Sort List    ${log_files}
-
-            RW.Core.Add Pre To Report    **Log Contents (showing last ${LOG_LINES} lines per file)**
-
-            FOR    ${log_file}    IN    @{log_files}
-                ${base}=    Evaluate    __import__('os').path.basename(r'''${log_file}''')
-
-                # Efficient-ish tail in Python: keeps only last N lines
-                ${tail}=    Evaluate    ''.join(__import__('collections').deque(open(r'''${log_file}''', 'r', encoding='utf-8', errors='replace'), maxlen=int('${LOG_LINES}')))
-
-                RW.Core.Add Pre To Report    [LOG_START: ${base}]\n${tail}\n[LOG_END: ${base}]\n
+                RW.Core.Add Issue
+                ...    severity=${severity}
+                ...    expected=Application logs should be free of critical errors for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in namespace `${NAMESPACE}`
+                ...    actual=${issue_title} in ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in namespace `${NAMESPACE}`
+                ...    title=${issue_title} in ${WORKLOAD_TYPE} `${WORKLOAD_NAME}`
+                ...    reproduce_hint=Check application logs for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in namespace `${NAMESPACE}`
+                ...    details=${summarized_details}
+                ...    next_steps=${next_steps}
+                ...    observed_at=${issue_timestamp}
             END
-        ELSE
-            RW.Core.Add Pre To Report    **Log Contents:**\nNo log files directory found at: ${logs_subdir}
         END
 
-        IF    ${issues_count} == 0
-            ${issue_timestamp}=    Set Variable    ${log_extraction_timestamp}
+        ${issues_count}=    Get Length    ${issues}
+        
+        # Convert scan_results to string to avoid serialization issues, then format for display
+        ${scan_results_str}=    Evaluate    json.dumps($scan_results, indent=2)    json
+        ${formatted_results}=    RW.K8sLog.Format Scan Results For Display    scan_results=${scan_results_str}
+        
+        RW.Core.Add Pre To Report    **Log Analysis Summary for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}`**\n**Health Score:** ${log_health_score}\n**Analysis Depth:** ${LOG_ANALYSIS_DEPTH}\n**Categories Analyzed:** ${LOG_PATTERN_CATEGORIES_STR}\n**Issues Found:** ${issues_count}\n\n${formatted_results}
+        
+        RW.K8sLog.Cleanup Temp Files
+    END
+
+Analyze Workload Stacktraces for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in Namespace `${NAMESPACE}`
+    [Documentation]    Collects and analyzes stacktraces/tracebacks from all pods in the workload for troubleshooting application issues.
+    [Tags]
+    ...    logs
+    ...    stacktraces
+    ...    tracebacks
+    ...    workload
+    ...    troubleshooting
+    ...    errors
+    ...    access:read-only
+    # Skip pod-related checks if workload is scaled to 0
+    IF    not ${SKIP_STACKTRACE_CHECKS}
+        # Convert comma-separated string to list for excluded containers
+        @{EXCLUDED_CONTAINERS}=    Run Keyword If    "${EXCLUDED_CONTAINER_NAMES}" != ""    Split String    ${EXCLUDED_CONTAINER_NAMES}    ,    ELSE    Create List
+        
+        # Fetch logs using RW.K8sLog library (same pattern as deployment healthcheck)
+        ${log_dir}=    RW.K8sLog.Fetch Workload Logs
+        ...    workload_type=${WORKLOAD_TYPE}
+        ...    workload_name=${WORKLOAD_NAME}
+        ...    namespace=${NAMESPACE}
+        ...    context=${CONTEXT}
+        ...    kubeconfig=${kubeconfig}
+        ...    log_age=${LOG_AGE}
+        ...    max_log_lines=${LOG_LINES}
+        ...    max_log_bytes=${LOG_SIZE}
+        ...    excluded_containers=${EXCLUDED_CONTAINERS}
+        
+        # Extract stacktraces from the log directory using the traceback library
+        ${tracebacks}=    RW.LogAnalysis.ExtractTraceback.Extract Tracebacks
+        ...    logs_dir=${log_dir}
+        
+        # Check total number of tracebacks extracted
+        ${total_tracebacks}=    Get Length    ${tracebacks}
+        
+        IF    ${total_tracebacks} == 0
+            # No tracebacks found
+            RW.Core.Add Pre To Report    **📋 No Stacktraces Found for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in Namespace `${NAMESPACE}`**\n**Log Analysis Period:** ${LOG_AGE}\n**Max Log Lines:** ${LOG_LINES}\n**Max Log Size:** ${LOG_SIZE} bytes\n**Excluded Containers:** ${EXCLUDED_CONTAINER_NAMES}\n\nLog analysis completed successfully with no stacktraces detected.
+        ELSE            
+            # Stacktraces found - create issues for each one
+            ${delimiter}=    Evaluate    '-' * 80
             
-            # create a dummy issue with a keyword argument set to a value depicting no issues found
-            RW.Core.Add Pre To Report    **No issues found in application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`**
-        ELSE
-            # set issue_timestamp to the observed_at value from the first issue
-            ${issue_timestamp}=    Evaluate    $issues[0].get('observed_at', '')
+            FOR    ${traceback}    IN    @{tracebacks}
+                ${stacktrace}=    Set Variable    ${traceback["stacktrace"]}
+                ${timestamp}=    Set Variable    ${traceback["timestamp"]}
+                RW.Core.Add Issue
+                ...    severity=2
+                ...    expected=No stacktraces should be present in ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` logs in namespace `${NAMESPACE}`
+                ...    actual=Stacktrace detected in ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` logs in namespace `${NAMESPACE}`
+                ...    title=Stacktrace Detected in ${WORKLOAD_TYPE} `${WORKLOAD_NAME}`
+                ...    reproduce_hint=Check application logs for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in namespace `${NAMESPACE}`
+                ...    details=${delimiter}\n${stacktrace}\n${delimiter}
+                ...    next_steps=Review application logs for the root cause of the stacktrace\nCheck application configuration and resource limits\nInvestigate the specific error conditions that led to this stacktrace\nConsider scaling or restarting the ${WORKLOAD_TYPE} if issues persist\nMonitor application health and performance metrics
+                ...    next_action=analyseStacktrace
+                ...    observed_at=${timestamp}
+            END
             
-            # create a dummy issue with a keyword argument set to a value depicting issues found
-            RW.Core.Add Issue
-            ...    severity=4
-            ...    expected=Application logs should be free of critical errors for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
-            ...    actual=Issues found in application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
-            ...    title=Issues found in application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
-            ...    reproduce_hint=Check application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
-            ...    details=Issues found in application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
-            ...    next_steps=Process the issues found in the application logs
-            ...    observed_at=${issue_timestamp}
-            ...    next_action=processApplogIssues
-        END        
+            # Create consolidated report showing all stacktraces
+            ${stacktrace_strings}=    Evaluate    [tb["stacktrace"] for tb in ${tracebacks}]
+            ${agg_tracebacks}=    Evaluate    "\\n" + "\\n${delimiter}\\n".join(${stacktrace_strings})
+            RW.Core.Add Pre To Report    **🔍 Stacktraces Found for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in Namespace `${NAMESPACE}`**\n**Total Stacktraces:** ${total_tracebacks}\n**Log Analysis Period:** ${LOG_AGE}\n**Max Log Lines:** ${LOG_LINES}\n**Max Log Size:** ${LOG_SIZE} bytes\n**Excluded Containers:** ${EXCLUDED_CONTAINER_NAMES}\n\n${agg_tracebacks}
+        END
+        
+        # Clean up temporary log files
         RW.K8sLog.Cleanup Temp Files
     END
\ No newline at end of file
diff --git a/codebundles/k8s-applog-health/sli.robot b/codebundles/k8s-applog-health/sli.robot
index 3c105a100..d9a4d1f37 100755
--- a/codebundles/k8s-applog-health/sli.robot
+++ b/codebundles/k8s-applog-health/sli.robot
@@ -1,5 +1,5 @@
 *** Settings ***
-Metadata          Author    stewartshea
+Metadata          Author    akshayrw25
 Documentation     This SLI uses kubectl to score application log health. Produces a value between 0 (completely failing the test) and 1 (fully passing the test). Looks for container restarts, critical log errors, pods not ready, deployment status, stacktraces and other recent events.
 Metadata          Display Name    Kubernetes Application Log Healthcheck
 Metadata          Supports    Kubernetes,AKS,EKS,GKE,OpenShift
@@ -8,6 +8,7 @@ Library           BuiltIn
 Library           RW.Core
 Library           RW.CLI
 Library           RW.platform
+Library           RW.LogAnalysis.ExtractTraceback
 Library           RW.K8sLog
 
 Library           OperatingSystem
@@ -31,11 +32,18 @@ Suite Initialization
     ...    description=Which Kubernetes context to operate within.
     ...    pattern=\w*
     ...    example=my-main-cluster
-    ${DEPLOYMENT_NAME}=    RW.Core.Import User Variable    DEPLOYMENT_NAME
+    ${WORKLOAD_TYPE}=    RW.Core.Import User Variable    WORKLOAD_TYPE
     ...    type=string
-    ...    description=The name of the Kubernetes deployment to check.
+    ...    description=The type of Kubernetes workload to analyze.
     ...    pattern=\w*
-    ...    example=my-deployment
+    ...    enum=[deployment,statefulset,daemonset]
+    ...    example=deployment
+    ...    default=deployment
+    ${WORKLOAD_NAME}=    RW.Core.Import User Variable    WORKLOAD_NAME
+    ...    type=string
+    ...    description=The name of the Kubernetes workload to check.
+    ...    pattern=\w*
+    ...    example=my-workload
     ${CONTAINER_RESTART_AGE}=    RW.Core.Import User Variable    CONTAINER_RESTART_AGE
     ...    type=string
     ...    description=The time window in minutes to search for container restarts.
@@ -102,7 +110,8 @@ Suite Initialization
 
     Set Suite Variable    ${CONTEXT}    ${CONTEXT}
     Set Suite Variable    ${NAMESPACE}    ${NAMESPACE}
-    Set Suite Variable    ${DEPLOYMENT_NAME}    ${DEPLOYMENT_NAME}
+    Set Suite Variable    ${WORKLOAD_NAME}    ${WORKLOAD_NAME}
+    Set Suite Variable    ${WORKLOAD_TYPE}    ${WORKLOAD_TYPE}
     Set Suite Variable    ${env}    {"KUBECONFIG":"./${kubeconfig.key}"}
     
     # Initialize score variables
@@ -113,34 +122,43 @@ Suite Initialization
     Set Suite Variable    ${events_score}    0
 
     
-    # Check if deployment is scaled to 0 and handle appropriately
-    ${scale_check}=    RW.CLI.Run Cli
-    ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get deployment/${DEPLOYMENT_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '{spec_replicas: .spec.replicas, ready_replicas: (.status.readyReplicas // 0), available_condition: (.status.conditions[] | select(.type == "Available") | .status // "Unknown"), last_scale_time: (.metadata.annotations."deployment.kubernetes.io/last-applied-configuration" // "N/A")}'
-    ...    env=${env}
-    ...    secret_file__kubeconfig=${kubeconfig}
-    ...    timeout_seconds=30
+    # Check if workload is scaled to 0 and handle appropriately
+    # Different workload types have different field structures
+    IF    '${WORKLOAD_TYPE}' == 'daemonset'
+        ${scale_check}=    RW.CLI.Run Cli
+        ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get ${WORKLOAD_TYPE}/${WORKLOAD_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '{spec_replicas: .status.desiredNumberScheduled, ready_replicas: (.status.numberReady // 0), available_condition: (.status.conditions[] | select(.type == "Available") | .status // "Unknown")}'
+        ...    env=${env}
+        ...    secret_file__kubeconfig=${kubeconfig}
+        ...    timeout_seconds=30
+    ELSE
+        # For deployments and statefulsets
+        ${scale_check}=    RW.CLI.Run Cli
+        ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get ${WORKLOAD_TYPE}/${WORKLOAD_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '{spec_replicas: .spec.replicas, ready_replicas: (.status.readyReplicas // 0), available_condition: (.status.conditions[] | select(.type == "Available") | .status // "Unknown")}'
+        ...    env=${env}
+        ...    secret_file__kubeconfig=${kubeconfig}
+        ...    timeout_seconds=30
+    END
     
     TRY
         ${scale_status}=    Evaluate    json.loads(r'''${scale_check.stdout}''') if r'''${scale_check.stdout}'''.strip() else {}    json
         ${spec_replicas}=    Evaluate    $scale_status.get('spec_replicas', 1)
         
-        # Try to determine when deployment was scaled down by checking recent events and replica set history
-        ${scale_down_info}=    Get Deployment Scale Down Timestamp    ${spec_replicas}
-        
-        IF    ${spec_replicas} == 0
-            Log    Deployment ${DEPLOYMENT_NAME} is scaled to 0 replicas - returning special health score
-            Log    Scale down detected at: ${scale_down_info}
+        # DaemonSets don't scale to 0 in the traditional sense, so skip scale-down logic for them
+        IF    '${WORKLOAD_TYPE}' == 'daemonset'
+            Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} is a DaemonSet - proceeding with stacktrace checks
+            Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${False}
+        ELSE IF    ${spec_replicas} == 0
+            Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} is scaled to 0 replicas - returning perfect health score
             
-            # For scaled-down deployments, return a score of 0.5 to indicate "intentionally down" vs "broken"
+            # For scaled-down workloads, return a score of 1.0 to indicate "intentionally down" vs "broken"
             Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${True}
-            Set Suite Variable    ${SCALED_DOWN_INFO}    ${scale_down_info}
         ELSE
-            Log    Deployment ${DEPLOYMENT_NAME} has ${spec_replicas} desired replicas - proceeding with health checks
+            Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} has ${spec_replicas} desired replicas - proceeding with stacktrace checks
             Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${False}
         END
         
     EXCEPT
-        Log    Warning: Failed to check deployment scale, continuing with normal health checks
+        Log    Warning: Failed to check workload scale, continuing with normal stacktrace checks
         Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${False}
     END
 
@@ -252,6 +270,51 @@ Get Critical Log Errors and Score for Deployment `${DEPLOYMENT_NAME}`
         RW.Core.Push Metric    ${log_health_score}    sub_name=log_errors
     END
 
+Get Stacktrace Health Score for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}`
+    [Documentation]    Checks for recent stacktraces/tracebacks related to the workload within a short time window, with filtering to reduce noise.
+    [Tags]    stacktraces    tracebacks    errors    recent    fast
+    IF    ${SKIP_HEALTH_CHECKS}
+        # For scaled-down deployments, return perfect score to indicate "intentionally down" vs "broken"
+        ${stacktrace_score}=    Set Variable    1.0
+        Set Suite Variable    ${stacktrace_details}     ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` is intentionally scaled to 0 replicas - Score: ${stacktrace_score}
+    ELSE
+        # Fetch logs using RW.K8sLog library (same pattern as deployment healthcheck)
+        ${log_dir}=    RW.K8sLog.Fetch Workload Logs
+        ...    workload_type=${WORKLOAD_TYPE}
+        ...    workload_name=${WORKLOAD_NAME}
+        ...    namespace=${NAMESPACE}
+        ...    context=${CONTEXT}
+        ...    kubeconfig=${kubeconfig}
+        ...    log_age=${RW_LOOKBACK_WINDOW}
+        ...    max_log_lines=${MAX_LOG_LINES}
+        ...    max_log_bytes=${MAX_LOG_BYTES}
+        ...    excluded_containers=${EXCLUDED_CONTAINERS}
+        
+        # Extract stacktraces from the log directory
+        ${recentmost_stacktrace}=    RW.LogAnalysis.ExtractTraceback.Extract Tracebacks
+        ...    logs_dir=${log_dir}
+        ...    fast_exit=${True}
+
+        ${stacktrace_length}=    Get Length    ${recentmost_stacktrace}
+        
+        IF    ${stacktrace_length} != 0
+            # Stacktrace found - set score to 0
+            ${stacktrace_score}=    Set Variable    0
+            ${delimiter}=    Evaluate    '-' * 150
+            Set Suite Variable    ${stacktrace_details}    **Stacktrace(s) identified**:\n${delimiter}\n${recentmost_stacktrace}\n${delimiter}
+        ELSE
+            # No stacktraces found - set score to 1
+            ${stacktrace_score}=    Set Variable    1.0
+            Set Suite Variable    ${stacktrace_details}    **No Stacktraces identified.**\n\nLog analysis completed successfully.
+        END
+        
+        # Clean up temporary log files
+        RW.K8sLog.Cleanup Temp Files
+    END 
+
+    Set Suite Variable    ${stacktrace_score}
+    RW.Core.Push Metric     ${stacktrace_score}   sub_name=stacktrace_score
+
 Generate Application Health Score for `${DEPLOYMENT_NAME}`
     [Documentation]    Generates the final applog health score and report details
     [Tags]    score    health    applog
@@ -263,8 +326,8 @@ Generate Application Health Score for `${DEPLOYMENT_NAME}`
         Log    Deployment ${DEPLOYMENT_NAME} is intentionally scaled to 0 replicas (${SCALED_DOWN_INFO}) - Score: ${health_score}
         RW.Core.Add to Report    Applog Health Score: ${health_score} - Deployment intentionally scaled to 0 replicas
     ELSE
-        # Use the log health score as the final health score
-        ${health_score}=    Set Variable    ${log_health_score}
+        # Use the higher of log health score and stacktrace score as the final health score
+        ${health_score}=    Evaluate    max(${log_health_score}, ${stacktrace_score})
         
         IF    ${health_score} == 1.0
             RW.Core.Add to Report    Applog Health Score: ${health_score} - No applog issues detected in workload logs
diff --git a/codebundles/k8s-daemonset-healthcheck/runbook.robot b/codebundles/k8s-daemonset-healthcheck/runbook.robot
index 8233cb0a9..c6f0d3d29 100644
--- a/codebundles/k8s-daemonset-healthcheck/runbook.robot
+++ b/codebundles/k8s-daemonset-healthcheck/runbook.robot
@@ -20,64 +20,6 @@ Suite Setup         Suite Initialization
 
 
 *** Tasks ***
-Analyze Application Log Patterns for DaemonSet `${DAEMONSET_NAME}` in Namespace `${NAMESPACE}`
-    [Documentation]    Fetches and analyzes logs from the DaemonSet pods for errors, stack traces, connection issues, and other patterns that indicate application health problems.
-    [Tags]
-    ...    logs
-    ...    application
-    ...    errors
-    ...    patterns
-    ...    health
-    ...    daemonset
-    ...    stacktrace
-    ...    access:read-only
-    ${log_dir}=    RW.K8sLog.Fetch Workload Logs
-    ...    workload_type=daemonset
-    ...    workload_name=${DAEMONSET_NAME}
-    ...    namespace=${NAMESPACE}
-    ...    context=${CONTEXT}
-    ...    kubeconfig=${kubeconfig}
-    ...    log_age=${LOG_AGE}
-    
-    ${scan_results}=    RW.K8sLog.Scan Logs For Issues
-    ...    log_dir=${log_dir}
-    ...    workload_type=daemonset
-    ...    workload_name=${DAEMONSET_NAME}
-    ...    namespace=${NAMESPACE}
-    ...    categories=@{LOG_PATTERN_CATEGORIES}
-    
-    ${log_health_score}=    RW.K8sLog.Calculate Log Health Score    scan_results=${scan_results}
-    
-    # Process each issue found in the logs
-    ${issues}=    Evaluate    $scan_results.get('issues', [])
-    FOR    ${issue}    IN    @{issues}
-        ${severity}=    Evaluate    $issue.get('severity', ${LOG_SEVERITY_THRESHOLD})
-        IF    ${severity} <= ${LOG_SEVERITY_THRESHOLD}
-            # Use the full issue details directly without summarization to preserve all log content
-            ${issue_details_raw}=    Evaluate    $issue.get("details", "")
-            ${issue_details_str}=    Convert To String    ${issue_details_raw}
-            ${issue_timestamp}=    Evaluate    $issue.get('observed_at', '')
-            
-            RW.Core.Add Issue
-            ...    severity=${severity}
-            ...    expected=Application logs should be free of critical errors for daemonset `${DAEMONSET_NAME}` in namespace `${NAMESPACE}`
-            ...    actual=${issue.get('title', 'Log pattern issue detected')} in daemonset `${DAEMONSET_NAME}` in namespace `${NAMESPACE}`
-            ...    title=${issue.get('title', 'Log Pattern Issue')} in DaemonSet `${DAEMONSET_NAME}`
-            ...    reproduce_hint=Check application logs for daemonset `${DAEMONSET_NAME}` in namespace `${NAMESPACE}`
-            ...    details=${issue_details_str}
-            ...    next_steps=${issue.get('next_steps', 'Review application logs and resolve underlying issues')}
-            ...    observed_at=${issue_timestamp}
-        END
-    END
-
-    ${issues_count}=    Get Length    ${issues}
-    
-    # Format scan results for better display
-    ${formatted_results}=    RW.K8sLog.Format Scan Results For Display    scan_results=${scan_results}
-    
-    RW.Core.Add Pre To Report    **Log Analysis Summary for DaemonSet `${DAEMONSET_NAME}`**\n**Health Score:** ${log_health_score}\n**Analysis Depth:** ${LOG_ANALYSIS_DEPTH}\n**Categories Analyzed:** ${LOG_PATTERN_CATEGORIES_STR}\n**Issues Found:** ${issues_count}\n\n${formatted_results}
-    
-    RW.K8sLog.Cleanup Temp Files
 
 Detect Log Anomalies for DaemonSet `${DAEMONSET_NAME}` in Namespace `${NAMESPACE}`
     [Documentation]    Analyzes logs for repeating patterns, anomalous behavior, and unusual log volume that may indicate underlying issues.
diff --git a/codebundles/k8s-deployment-healthcheck/runbook.robot b/codebundles/k8s-deployment-healthcheck/runbook.robot
index cde66123b..cb47daaa7 100755
--- a/codebundles/k8s-deployment-healthcheck/runbook.robot
+++ b/codebundles/k8s-deployment-healthcheck/runbook.robot
@@ -215,105 +215,6 @@ Suite Initialization
 
 *** Tasks ***
 
-Analyze Application Log Patterns for Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}`
-    [Documentation]    Fetches and analyzes logs from the deployment pods for errors, connection issues, and other patterns that indicate application health problems. Note: Warning messages about missing log files for excluded containers (like linkerd-proxy, istio-proxy) are expected and harmless.
-    [Tags]
-    ...    logs
-    ...    application
-    ...    errors
-    ...    patterns
-    ...    health
-    ...    deployment
-    ...    access:read-only
-    # Skip pod-related checks if deployment is scaled to 0
-    IF    not ${SKIP_POD_CHECKS}
-        # Temporarily suppress log warnings for excluded containers (they're expected)
-        TRY
-            ${log_dir}=    RW.K8sLog.Fetch Workload Logs
-            ...    workload_type=deployment
-            ...    workload_name=${DEPLOYMENT_NAME}
-            ...    namespace=${NAMESPACE}
-            ...    context=${CONTEXT}
-            ...    kubeconfig=${kubeconfig}
-            ...    log_age=${LOG_AGE}
-            ...    excluded_containers=${EXCLUDED_CONTAINERS}
-        EXCEPT    AS    ${log_error}
-            # If log fetching fails completely, log the error but continue
-            Log    Warning: Log fetching encountered an error: ${log_error}
-            # Set empty log directory to continue with other checks
-            ${log_dir}=    Set Variable    ${EMPTY}
-        END
-        
-        # Only scan logs if we have a valid log directory
-        IF    '''${log_dir}''' != '''${EMPTY}'''
-            ${scan_results}=    RW.K8sLog.Scan Logs For Issues
-            ...    log_dir=${log_dir}
-            ...    workload_type=deployment
-            ...    workload_name=${DEPLOYMENT_NAME}
-            ...    namespace=${NAMESPACE}
-            ...    categories=@{LOG_PATTERN_CATEGORIES}
-            ...    custom_patterns_file=runbook_patterns.json
-            ...    excluded_containers=${EXCLUDED_CONTAINERS}
-        ELSE
-            # Create empty scan results if no logs were fetched
-            ${scan_results}=    Evaluate    {"issues": [], "summary": ["No logs available for analysis"]}
-        END
-        
-        # Post-process results to filter out patterns matching LOGS_EXCLUDE_PATTERN
-        TRY
-            IF    $LOGS_EXCLUDE_PATTERN != ""
-                ${filtered_issues}=    Evaluate    [issue for issue in $scan_results.get('issues', []) if not __import__('re').search('${LOGS_EXCLUDE_PATTERN}', issue.get('details', ''), __import__('re').IGNORECASE)]    modules=re
-                ${filtered_results}=    Evaluate    {**$scan_results, 'issues': $filtered_issues}
-                Set Test Variable    ${scan_results}    ${filtered_results}
-            END
-        EXCEPT
-            Log    Warning: Failed to apply LOGS_EXCLUDE_PATTERN filter, using unfiltered results
-        END
-        
-        ${log_health_score}=    RW.K8sLog.Calculate Log Health Score    scan_results=${scan_results}
-        
-        # Process each issue found in the logs
-        ${issues}=    Evaluate    $scan_results.get('issues', [])
-        FOR    ${issue}    IN    @{issues}
-            ${severity}=    Evaluate    $issue.get('severity', ${LOG_SEVERITY_THRESHOLD})
-            IF    ${severity} <= ${LOG_SEVERITY_THRESHOLD}
-                # Convert issue details to string to avoid serialization issues
-                ${issue_details_raw}=    Evaluate    $issue.get("details", "")
-                ${issue_details_str}=    Convert To String    ${issue_details_raw}
-                ${summarized_details}=    RW.K8sLog.Summarize Log Issues    issue_details=${issue_details_str}
-                
-                # Safely extract title and next_steps as strings
-                ${issue_title_raw}=    Evaluate    $issue.get('title', 'Log pattern issue detected')
-                ${issue_title}=    Convert To String    ${issue_title_raw}
-                ${next_steps_raw}=    Evaluate    $issue.get('next_steps', 'Review application logs and resolve underlying issues')
-                ${next_steps}=    Convert To String    ${next_steps_raw}
-                
-                # Use timestamp from log scan results if available, otherwise extract from details
-                ${issue_timestamp}=    Evaluate    $issue.get('observed_at', '')
-
-                RW.Core.Add Issue
-                ...    severity=${severity}
-                ...    expected=Application logs should be free of critical errors for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
-                ...    actual=${issue_title} in deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
-                ...    title=${issue_title} in Deployment `${DEPLOYMENT_NAME}`
-                ...    reproduce_hint=Check application logs for deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}`
-                ...    details=${summarized_details}
-                ...    next_steps=${next_steps}
-                ...    observed_at=${issue_timestamp}
-            END
-        END
-
-        ${issues_count}=    Get Length    ${issues}
-        
-        # Convert scan_results to string to avoid serialization issues, then format for display
-        ${scan_results_str}=    Evaluate    json.dumps($scan_results, indent=2)    json
-        ${formatted_results}=    RW.K8sLog.Format Scan Results For Display    scan_results=${scan_results_str}
-        
-        RW.Core.Add Pre To Report    **Log Analysis Summary for Deployment `${DEPLOYMENT_NAME}`**\n**Health Score:** ${log_health_score}\n**Analysis Depth:** ${LOG_ANALYSIS_DEPTH}\n**Categories Analyzed:** ${LOG_PATTERN_CATEGORIES_STR}\n**Issues Found:** ${issues_count}\n\n${formatted_results}
-        
-        RW.K8sLog.Cleanup Temp Files
-    END
-
 Detect Event Anomalies for Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}`
     [Documentation]    Analyzes Kubernetes event patterns to identify anomalies such as sudden spikes in event rates, unusual patterns, or recurring issues that might indicate underlying problems with controllers, resources, or deployments.
     [Tags]
diff --git a/codebundles/k8s-statefulset-healthcheck/runbook.robot b/codebundles/k8s-statefulset-healthcheck/runbook.robot
index b5394e3fe..84738e855 100644
--- a/codebundles/k8s-statefulset-healthcheck/runbook.robot
+++ b/codebundles/k8s-statefulset-healthcheck/runbook.robot
@@ -20,65 +20,6 @@ Suite Setup         Suite Initialization
 
 
 *** Tasks ***
-Analyze Application Log Patterns for StatefulSet `${STATEFULSET_NAME}` in Namespace `${NAMESPACE}`
-    [Documentation]    Fetches and analyzes logs from the StatefulSet pods for errors, stack traces, connection issues, and other patterns that indicate application health problems.
-    [Tags]
-    ...    logs
-    ...    application
-    ...    errors
-    ...    patterns
-    ...    health
-    ...    statefulset
-    ...    stacktrace
-    ...    access:read-only
-    ${log_dir}=    RW.K8sLog.Fetch Workload Logs
-    ...    workload_type=statefulset
-    ...    workload_name=${STATEFULSET_NAME}
-    ...    namespace=${NAMESPACE}
-    ...    context=${CONTEXT}
-    ...    kubeconfig=${kubeconfig}
-    ...    log_age=${LOG_AGE}
-    
-    ${scan_results}=    RW.K8sLog.Scan Logs For Issues
-    ...    log_dir=${log_dir}
-    ...    workload_type=statefulset
-    ...    workload_name=${STATEFULSET_NAME}
-    ...    namespace=${NAMESPACE}
-    ...    categories=@{LOG_PATTERN_CATEGORIES}
-    
-    ${log_health_score}=    RW.K8sLog.Calculate Log Health Score    scan_results=${scan_results}
-    
-    # Process each issue found in the logs
-    ${issues}=    Evaluate    $scan_results.get('issues', [])
-    FOR    ${issue}    IN    @{issues}
-        ${severity}=    Evaluate    $issue.get('severity', ${LOG_SEVERITY_THRESHOLD})
-        IF    ${severity} <= ${LOG_SEVERITY_THRESHOLD}
-            # Use the full issue details directly without summarization to preserve all log content
-            ${issue_details_raw}=    Evaluate    $issue.get("details", "")
-            ${issue_details_str}=    Convert To String    ${issue_details_raw}
-            # Use timestamp from log scan results if available, otherwise extract from details
-            ${issue_timestamp}=    Evaluate    $issue.get('observed_at', '')
-            
-            RW.Core.Add Issue
-            ...    severity=${severity}
-            ...    expected=Application logs should be free of critical errors for statefulset `${STATEFULSET_NAME}` in namespace `${NAMESPACE}`
-            ...    actual=${issue.get('title', 'Log pattern issue detected')} in statefulset `${STATEFULSET_NAME}` in namespace `${NAMESPACE}`
-            ...    title=${issue.get('title', 'Log Pattern Issue')} in StatefulSet `${STATEFULSET_NAME}`
-            ...    reproduce_hint=Check application logs for statefulset `${STATEFULSET_NAME}` in namespace `${NAMESPACE}`
-            ...    details=${issue_details_str}
-            ...    next_steps=${issue.get('next_steps', 'Review application logs and resolve underlying issues')}
-            ...    observed_at=${issue_timestamp}
-        END
-    END
-
-    ${issues_count}=    Get Length    ${issues}
-    
-    # Format scan results for better display
-    ${formatted_results}=    RW.K8sLog.Format Scan Results For Display    scan_results=${scan_results}
-    
-    RW.Core.Add Pre To Report    **Log Analysis Summary for StatefulSet `${STATEFULSET_NAME}`**\n**Health Score:** ${log_health_score}\n**Analysis Depth:** ${LOG_ANALYSIS_DEPTH}\n**Categories Analyzed:** ${LOG_PATTERN_CATEGORIES_STR}\n**Issues Found:** ${issues_count}\n\n${formatted_results}
-    
-    RW.K8sLog.Cleanup Temp Files
 
 Detect Log Anomalies for StatefulSet `${STATEFULSET_NAME}` in Namespace `${NAMESPACE}`
     [Documentation]    Analyzes logs for repeating patterns, anomalous behavior, and unusual log volume that may indicate underlying issues.

From 24f4d74167fdf6c38fe83dd4359feb476b32eaf2 Mon Sep 17 00:00:00 2001
From: Akshay Prabhakant <akshay.prabhakant@runwhen.com>
Date: Wed, 11 Feb 2026 18:15:13 +0530
Subject: [PATCH 06/10] \k8s-applog-health: generalize to workload type, drop
 stacktrace SLI, shorten interval- Templates: use WORKLOAD_NAME +
 WORKLOAD_TYPE (from match_resource.kind) instead of DEPLOYMENT_NAME; -
 runbook.robot: remove 'Analyze Workload Stacktraces' task; rely on log
 pattern analysis only; cleanup temp files inside conditional- sli.robot:
 replace DEPLOYMENT_NAME with WORKLOAD_NAME/WORKLOAD_TYPE; remove 'Get
 Stacktrace Health Score' task; final health score from log_health_score only;
 scale-down timestamp logic only for deployment kind

---
 .../templates/k8s-applog-health-sli.yaml      |  6 +-
 .../templates/k8s-applog-health-slx.yaml      |  4 +-
 .../templates/k8s-applog-health-taskset.yaml  |  4 +-
 codebundles/k8s-applog-health/runbook.robot   | 71 +--------------
 codebundles/k8s-applog-health/sli.robot       | 88 +++++--------------
 5 files changed, 31 insertions(+), 142 deletions(-)

diff --git a/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-sli.yaml b/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-sli.yaml
index c4e813650..aa3253be2 100755
--- a/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-sli.yaml
+++ b/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-sli.yaml
@@ -26,7 +26,7 @@ spec:
     pathToRobot: codebundles/k8s-applog-health/sli.robot
   intervalStrategy: intermezzo
   intervalSeconds: 600
-  description: Measures the health of the application logs for the {{match_resource.resource.metadata.name}} deployment.
+  description: Measures the health of the application logs for the {{match_resource.resource.metadata.name}} {{match_resource.kind | lower}}.
   configProvided:
     - name: NAMESPACE
       value: {{match_resource.resource.metadata.namespace}}
@@ -34,8 +34,10 @@ spec:
       value: {{context}}
     - name: KUBERNETES_DISTRIBUTION_BINARY
       value: {{custom.kubernetes_distribution_binary | default("kubectl")}}
-    - name: DEPLOYMENT_NAME
+    - name: WORKLOAD_NAME
       value: {{match_resource.resource.metadata.name}}
+    - name: WORKLOAD_TYPE
+      value: {{match_resource.kind | lower}}
     - name: CONTAINER_RESTART_AGE
       value: "10m"
     - name: CONTAINER_RESTART_THRESHOLD
diff --git a/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-slx.yaml b/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-slx.yaml
index 7ad748284..a9ca8968c 100644
--- a/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-slx.yaml
+++ b/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-slx.yaml
@@ -8,14 +8,14 @@ metadata:
     {% include "common-annotations.yaml" %}
 spec:
   imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/kubernetes/resources/labeled/deploy.svg
-  alias: {{match_resource.resource.metadata.name}} Application Log Health
+  alias: {{match_resource.resource.metadata.name}} {{match_resource.kind}} Application Log Health
   asMeasuredBy: The presence of application-level errors/issues/stacktraces in the application logs indicating runtime errors or exceptions in {{match_resource.resource.metadata.name}}. 
   configProvided:
   - name: OBJECT_NAME
     value: {{match_resource.resource.metadata.name}}
   owners:
   - {{workspace.owner_email}}
-  statement: Application logs for {{match_resource.resource.metadata.name}} should be free of critical errors/issues/stacktraces indicating runtime errors or exceptions. 
+  statement: Application logs for {{match_resource.resource.metadata.name}} {{match_resource.kind | lower}} should be free of critical errors/issues/stacktraces indicating runtime errors or exceptions. 
   additionalContext:  
     {% include "kubernetes-hierarchy.yaml" ignore missing %}
     qualified_name: "{{ match_resource.qualified_name }}"
diff --git a/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-taskset.yaml b/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-taskset.yaml
index 73229c8e6..e67d520e4 100644
--- a/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-taskset.yaml
+++ b/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-taskset.yaml
@@ -27,8 +27,10 @@ spec:
       value: {{context}}
     - name: KUBERNETES_DISTRIBUTION_BINARY
       value: {{custom.kubernetes_distribution_binary}}
-    - name: DEPLOYMENT_NAME
+    - name: WORKLOAD_NAME
       value: {{match_resource.resource.metadata.name}}
+    - name: WORKLOAD_TYPE
+      value: {{match_resource.kind | lower}}
     - name: CONTAINER_RESTART_AGE
       value: "30m"
     - name: CONTAINER_RESTART_THRESHOLD
diff --git a/codebundles/k8s-applog-health/runbook.robot b/codebundles/k8s-applog-health/runbook.robot
index 299757e56..aa11cb6c3 100755
--- a/codebundles/k8s-applog-health/runbook.robot
+++ b/codebundles/k8s-applog-health/runbook.robot
@@ -48,7 +48,7 @@ Suite Initialization
     ...    example=otel-demo
     ${WORKLOAD_NAME}=    RW.Core.Import User Variable    WORKLOAD_NAME
     ...    type=string
-    ...    description=The name of the workload (deployment, statefulset, or daemonset) to analyze for stacktraces.
+    ...    description=The name of the workload (deployment, statefulset, or daemonset) to analyze for application logs.
     ...    pattern=\w*
     ...    example=otel-demo-frontend
     ${WORKLOAD_TYPE}=    RW.Core.Import User Variable    WORKLOAD_TYPE
@@ -180,7 +180,6 @@ Suite Initialization
     ...    LOGS_ERROR_PATTERN=${LOGS_ERROR_PATTERN}
     ...    LOGS_EXCLUDE_PATTERN=${LOGS_EXCLUDE_PATTERN}
     ...    ANOMALY_THRESHOLD=${ANOMALY_THRESHOLD}
-    # ...    DEPLOYMENT_NAME=${DEPLOYMENT_NAME}
     ...    WORKLOAD_NAME=${WORKLOAD_NAME}
     ...    WORKLOAD_TYPE=${WORKLOAD_TYPE}
     ...    CONTAINER_RESTART_AGE=${CONTAINER_RESTART_AGE}
@@ -260,8 +259,6 @@ Analyze Application Log Patterns for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in Name
             # Set empty log directory to continue with other checks
             ${log_dir}=    Set Variable    ${EMPTY}
         END
-
-        RW.Core.Add Pre To Report    **Log Directory:** ${log_dir}
         
         # Only scan logs if we have a valid log directory
         IF    '''${log_dir}''' != '''${EMPTY}'''
@@ -330,71 +327,5 @@ Analyze Application Log Patterns for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in Name
         
         RW.Core.Add Pre To Report    **Log Analysis Summary for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}`**\n**Health Score:** ${log_health_score}\n**Analysis Depth:** ${LOG_ANALYSIS_DEPTH}\n**Categories Analyzed:** ${LOG_PATTERN_CATEGORIES_STR}\n**Issues Found:** ${issues_count}\n\n${formatted_results}
         
-        RW.K8sLog.Cleanup Temp Files
-    END
-
-Analyze Workload Stacktraces for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in Namespace `${NAMESPACE}`
-    [Documentation]    Collects and analyzes stacktraces/tracebacks from all pods in the workload for troubleshooting application issues.
-    [Tags]
-    ...    logs
-    ...    stacktraces
-    ...    tracebacks
-    ...    workload
-    ...    troubleshooting
-    ...    errors
-    ...    access:read-only
-    # Skip pod-related checks if workload is scaled to 0
-    IF    not ${SKIP_STACKTRACE_CHECKS}
-        # Convert comma-separated string to list for excluded containers
-        @{EXCLUDED_CONTAINERS}=    Run Keyword If    "${EXCLUDED_CONTAINER_NAMES}" != ""    Split String    ${EXCLUDED_CONTAINER_NAMES}    ,    ELSE    Create List
-        
-        # Fetch logs using RW.K8sLog library (same pattern as deployment healthcheck)
-        ${log_dir}=    RW.K8sLog.Fetch Workload Logs
-        ...    workload_type=${WORKLOAD_TYPE}
-        ...    workload_name=${WORKLOAD_NAME}
-        ...    namespace=${NAMESPACE}
-        ...    context=${CONTEXT}
-        ...    kubeconfig=${kubeconfig}
-        ...    log_age=${LOG_AGE}
-        ...    max_log_lines=${LOG_LINES}
-        ...    max_log_bytes=${LOG_SIZE}
-        ...    excluded_containers=${EXCLUDED_CONTAINERS}
-        
-        # Extract stacktraces from the log directory using the traceback library
-        ${tracebacks}=    RW.LogAnalysis.ExtractTraceback.Extract Tracebacks
-        ...    logs_dir=${log_dir}
-        
-        # Check total number of tracebacks extracted
-        ${total_tracebacks}=    Get Length    ${tracebacks}
-        
-        IF    ${total_tracebacks} == 0
-            # No tracebacks found
-            RW.Core.Add Pre To Report    **📋 No Stacktraces Found for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in Namespace `${NAMESPACE}`**\n**Log Analysis Period:** ${LOG_AGE}\n**Max Log Lines:** ${LOG_LINES}\n**Max Log Size:** ${LOG_SIZE} bytes\n**Excluded Containers:** ${EXCLUDED_CONTAINER_NAMES}\n\nLog analysis completed successfully with no stacktraces detected.
-        ELSE            
-            # Stacktraces found - create issues for each one
-            ${delimiter}=    Evaluate    '-' * 80
-            
-            FOR    ${traceback}    IN    @{tracebacks}
-                ${stacktrace}=    Set Variable    ${traceback["stacktrace"]}
-                ${timestamp}=    Set Variable    ${traceback["timestamp"]}
-                RW.Core.Add Issue
-                ...    severity=2
-                ...    expected=No stacktraces should be present in ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` logs in namespace `${NAMESPACE}`
-                ...    actual=Stacktrace detected in ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` logs in namespace `${NAMESPACE}`
-                ...    title=Stacktrace Detected in ${WORKLOAD_TYPE} `${WORKLOAD_NAME}`
-                ...    reproduce_hint=Check application logs for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in namespace `${NAMESPACE}`
-                ...    details=${delimiter}\n${stacktrace}\n${delimiter}
-                ...    next_steps=Review application logs for the root cause of the stacktrace\nCheck application configuration and resource limits\nInvestigate the specific error conditions that led to this stacktrace\nConsider scaling or restarting the ${WORKLOAD_TYPE} if issues persist\nMonitor application health and performance metrics
-                ...    next_action=analyseStacktrace
-                ...    observed_at=${timestamp}
-            END
-            
-            # Create consolidated report showing all stacktraces
-            ${stacktrace_strings}=    Evaluate    [tb["stacktrace"] for tb in ${tracebacks}]
-            ${agg_tracebacks}=    Evaluate    "\\n" + "\\n${delimiter}\\n".join(${stacktrace_strings})
-            RW.Core.Add Pre To Report    **🔍 Stacktraces Found for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in Namespace `${NAMESPACE}`**\n**Total Stacktraces:** ${total_tracebacks}\n**Log Analysis Period:** ${LOG_AGE}\n**Max Log Lines:** ${LOG_LINES}\n**Max Log Size:** ${LOG_SIZE} bytes\n**Excluded Containers:** ${EXCLUDED_CONTAINER_NAMES}\n\n${agg_tracebacks}
-        END
-        
-        # Clean up temporary log files
         RW.K8sLog.Cleanup Temp Files
     END
\ No newline at end of file
diff --git a/codebundles/k8s-applog-health/sli.robot b/codebundles/k8s-applog-health/sli.robot
index d9a4d1f37..ab920e7f0 100755
--- a/codebundles/k8s-applog-health/sli.robot
+++ b/codebundles/k8s-applog-health/sli.robot
@@ -8,7 +8,6 @@ Library           BuiltIn
 Library           RW.Core
 Library           RW.CLI
 Library           RW.platform
-Library           RW.LogAnalysis.ExtractTraceback
 Library           RW.K8sLog
 
 Library           OperatingSystem
@@ -74,8 +73,8 @@ Suite Initialization
     ...    type=string
     ...    description=Pattern used to exclude entries from log analysis when searching for errors. Use regex patterns to filter out false positives like JSON structures.
     ...    pattern=.*
-    ...    example="errors":\s*\[\]|"warnings":\s*\[\]
-    ...    default="errors":\s*\[\]|\\bINFO\\b|\\bDEBUG\\b|\\bTRACE\\b|\\bSTART\\s*-\\s*|\\bSTART\\s*method\\b
+    ...    example="errors":\\s*\\[\\]|"warnings":\\s*\\[\\]
+    ...    default="errors":\\\\s*\\\\[\\\\]|\\\\bINFO\\\\b|\\\\bDEBUG\\\\b|\\\\bTRACE\\\\b|\\\\bSTART\\\\s*-\\\\s*|\\\\bSTART\\\\s*method\\\\b
     ${EXCLUDED_CONTAINER_NAMES}=    RW.Core.Import User Variable    EXCLUDED_CONTAINER_NAMES
     ...    type=string
     ...    description=Comma-separated list of container names to exclude from log analysis (e.g., linkerd-proxy, istio-proxy, vault-agent).
@@ -145,7 +144,7 @@ Suite Initialization
         
         # DaemonSets don't scale to 0 in the traditional sense, so skip scale-down logic for them
         IF    '${WORKLOAD_TYPE}' == 'daemonset'
-            Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} is a DaemonSet - proceeding with stacktrace checks
+            Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} is a DaemonSet - proceeding with log checks
             Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${False}
         ELSE IF    ${spec_replicas} == 0
             Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} is scaled to 0 replicas - returning perfect health score
@@ -153,12 +152,12 @@ Suite Initialization
             # For scaled-down workloads, return a score of 1.0 to indicate "intentionally down" vs "broken"
             Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${True}
         ELSE
-            Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} has ${spec_replicas} desired replicas - proceeding with stacktrace checks
+            Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} has ${spec_replicas} desired replicas - proceeding with log checks
             Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${False}
         END
         
     EXCEPT
-        Log    Warning: Failed to check workload scale, continuing with normal stacktrace checks
+        Log    Warning: Failed to check workload scale, continuing with normal log checks
         Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${False}
     END
 
@@ -167,11 +166,11 @@ Get Deployment Scale Down Timestamp
     [Documentation]    Attempts to determine when a deployment was scaled down by examining recent events
     ${scale_down_info}=    Set Variable    Unknown
     
-    IF    ${spec_replicas} == 0
+    IF    ${spec_replicas} == 0 and '${WORKLOAD_TYPE}' == 'deployment'
         TRY
             # Check recent scaling events to find when it was scaled to 0
             ${scaling_events}=    RW.CLI.Run Cli
-            ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get events --context ${CONTEXT} -n ${NAMESPACE} --sort-by='.lastTimestamp' -o json | jq -r '.items[] | select(.reason == "ScalingReplicaSet" and (.message | contains("${DEPLOYMENT_NAME}")) and (.message | contains("to 0"))) | {timestamp: .lastTimestamp, message: .message}' | jq -s 'sort_by(.timestamp) | reverse | .[0] // empty'
+            ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get events --context ${CONTEXT} -n ${NAMESPACE} --sort-by='.lastTimestamp' -o json | jq -r '.items[] | select(.reason == "ScalingReplicaSet" and (.message | contains("${WORKLOAD_NAME}")) and (.message | contains("to 0"))) | {timestamp: .lastTimestamp, message: .message}' | jq -s 'sort_by(.timestamp) | reverse | .[0] // empty'
             ...    env=${env}
             ...    secret_file__kubeconfig=${kubeconfig}
             ...    timeout_seconds=15
@@ -185,7 +184,7 @@ Get Deployment Scale Down Timestamp
             ELSE
                 # Try checking replicaset history as fallback
                 ${rs_history}=    RW.CLI.Run Cli
-                ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get replicasets --context ${CONTEXT} -n ${NAMESPACE} -l app=${DEPLOYMENT_NAME} -o json | jq -r '.items[] | select(.spec.replicas == 0) | {creation_time: .metadata.creationTimestamp, name: .metadata.name}' | jq -s 'sort_by(.creation_time) | reverse | .[0] // empty'
+                ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get replicasets --context ${CONTEXT} -n ${NAMESPACE} -l app=${WORKLOAD_NAME} -o json | jq -r '.items[] | select(.spec.replicas == 0) | {creation_time: .metadata.creationTimestamp, name: .metadata.name}' | jq -s 'sort_by(.creation_time) | reverse | .[0] // empty'
                 ...    env=${env}
                 ...    secret_file__kubeconfig=${kubeconfig}
                 ...    timeout_seconds=15
@@ -197,7 +196,7 @@ Get Deployment Scale Down Timestamp
                     Log    Estimated scale-down time from ReplicaSet: ${scale_down_info}
                 ELSE
                     ${scale_down_info}=    Set Variable    Unable to determine - no recent scaling events found
-                    Log    Could not determine when deployment was scaled down
+                    Log    Could not determine when ${WORKLOAD_TYPE} ${WORKLOAD_NAME} was scaled down
                 END
             END
         EXCEPT
@@ -209,20 +208,20 @@ Get Deployment Scale Down Timestamp
     RETURN    ${scale_down_info}
 
 *** Tasks ***
-Get Critical Log Errors and Score for Deployment `${DEPLOYMENT_NAME}`
+Get Critical Log Errors and Score for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}`
     [Documentation]    Fetches logs and checks for critical error patterns that indicate application failures.
     [Tags]    logs    errors    critical    patterns
     
     # Skip if deployment is scaled down  
     IF    ${SKIP_HEALTH_CHECKS}
-        Log    Skipping log analysis - deployment is scaled to 0 replicas
+        Log    Skipping log analysis - ${WORKLOAD_TYPE} ${WORKLOAD_NAME} is scaled to 0 replicas
         ${log_health_score}=    Set Variable    1  # Perfect score for scaled deployment
         Set Suite Variable    ${log_health_score}
         RW.Core.Push Metric    ${log_health_score}    sub_name=log_errors
     ELSE
         ${log_dir}=    RW.K8sLog.Fetch Workload Logs
-        ...    workload_type=deployment
-        ...    workload_name=${DEPLOYMENT_NAME}
+        ...    workload_type=${WORKLOAD_TYPE}
+        ...    workload_name=${WORKLOAD_NAME}
         ...    namespace=${NAMESPACE}
         ...    context=${CONTEXT}
         ...    kubeconfig=${kubeconfig}
@@ -230,14 +229,14 @@ Get Critical Log Errors and Score for Deployment `${DEPLOYMENT_NAME}`
         ...    max_log_lines=${MAX_LOG_LINES}
         ...    max_log_bytes=${MAX_LOG_BYTES}
         ...    excluded_containers=${EXCLUDED_CONTAINERS}
-        
+                
         # Use only critical error patterns for fast SLI checks
         @{critical_categories}=    Create List    GenericError    AppFailure
         
         ${scan_results}=    RW.K8sLog.Scan Logs For Issues
         ...    log_dir=${log_dir}
-        ...    workload_type=deployment
-        ...    workload_name=${DEPLOYMENT_NAME}
+        ...    workload_type=${WORKLOAD_TYPE}
+        ...    workload_name=${WORKLOAD_NAME}
         ...    namespace=${NAMESPACE}
         ...    categories=${critical_categories}
         ...    custom_patterns_file=sli_critical_patterns.json
@@ -270,52 +269,7 @@ Get Critical Log Errors and Score for Deployment `${DEPLOYMENT_NAME}`
         RW.Core.Push Metric    ${log_health_score}    sub_name=log_errors
     END
 
-Get Stacktrace Health Score for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}`
-    [Documentation]    Checks for recent stacktraces/tracebacks related to the workload within a short time window, with filtering to reduce noise.
-    [Tags]    stacktraces    tracebacks    errors    recent    fast
-    IF    ${SKIP_HEALTH_CHECKS}
-        # For scaled-down deployments, return perfect score to indicate "intentionally down" vs "broken"
-        ${stacktrace_score}=    Set Variable    1.0
-        Set Suite Variable    ${stacktrace_details}     ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` is intentionally scaled to 0 replicas - Score: ${stacktrace_score}
-    ELSE
-        # Fetch logs using RW.K8sLog library (same pattern as deployment healthcheck)
-        ${log_dir}=    RW.K8sLog.Fetch Workload Logs
-        ...    workload_type=${WORKLOAD_TYPE}
-        ...    workload_name=${WORKLOAD_NAME}
-        ...    namespace=${NAMESPACE}
-        ...    context=${CONTEXT}
-        ...    kubeconfig=${kubeconfig}
-        ...    log_age=${RW_LOOKBACK_WINDOW}
-        ...    max_log_lines=${MAX_LOG_LINES}
-        ...    max_log_bytes=${MAX_LOG_BYTES}
-        ...    excluded_containers=${EXCLUDED_CONTAINERS}
-        
-        # Extract stacktraces from the log directory
-        ${recentmost_stacktrace}=    RW.LogAnalysis.ExtractTraceback.Extract Tracebacks
-        ...    logs_dir=${log_dir}
-        ...    fast_exit=${True}
-
-        ${stacktrace_length}=    Get Length    ${recentmost_stacktrace}
-        
-        IF    ${stacktrace_length} != 0
-            # Stacktrace found - set score to 0
-            ${stacktrace_score}=    Set Variable    0
-            ${delimiter}=    Evaluate    '-' * 150
-            Set Suite Variable    ${stacktrace_details}    **Stacktrace(s) identified**:\n${delimiter}\n${recentmost_stacktrace}\n${delimiter}
-        ELSE
-            # No stacktraces found - set score to 1
-            ${stacktrace_score}=    Set Variable    1.0
-            Set Suite Variable    ${stacktrace_details}    **No Stacktraces identified.**\n\nLog analysis completed successfully.
-        END
-        
-        # Clean up temporary log files
-        RW.K8sLog.Cleanup Temp Files
-    END 
-
-    Set Suite Variable    ${stacktrace_score}
-    RW.Core.Push Metric     ${stacktrace_score}   sub_name=stacktrace_score
-
-Generate Application Health Score for `${DEPLOYMENT_NAME}`
+Generate Application Health Score for `${WORKLOAD_TYPE}` `${WORKLOAD_NAME}`
     [Documentation]    Generates the final applog health score and report details
     [Tags]    score    health    applog
     
@@ -323,11 +277,11 @@ Generate Application Health Score for `${DEPLOYMENT_NAME}`
         # For scaled-down deployments, return perfect score to indicate "intentionally down" vs "broken"
         # We distinguish scaled-down vs broken deployments through the log message and report details
         ${health_score}=    Set Variable    1.0
-        Log    Deployment ${DEPLOYMENT_NAME} is intentionally scaled to 0 replicas (${SCALED_DOWN_INFO}) - Score: ${health_score}
-        RW.Core.Add to Report    Applog Health Score: ${health_score} - Deployment intentionally scaled to 0 replicas
+        Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} is intentionally scaled to 0 replicas (${SCALED_DOWN_INFO}) - Score: ${health_score}
+        RW.Core.Add to Report    Applog Health Score: ${health_score} - ${WORKLOAD_TYPE} ${WORKLOAD_NAME} intentionally scaled to 0 replicas
     ELSE
-        # Use the higher of log health score and stacktrace score as the final health score
-        ${health_score}=    Evaluate    max(${log_health_score}, ${stacktrace_score})
+        # Use the log health score as the final health score.
+        ${health_score}=    Set Variable    ${log_health_score}
         
         IF    ${health_score} == 1.0
             RW.Core.Add to Report    Applog Health Score: ${health_score} - No applog issues detected in workload logs

From 4573ff4f4df9e2fb980202601217c9cf5344ea86 Mon Sep 17 00:00:00 2001
From: Akshay Prabhakant <akshay.prabhakant@runwhen.com>
Date: Wed, 11 Feb 2026 14:12:36 +0000
Subject: [PATCH 07/10] - shift the "Fetch Deployment Logs" task to applog
 codebundle - delete the "Detect Log Anomalies" task from
 daemonset-healthcheck and statefulset-healthcheck"

---
 codebundles/k8s-applog-health/runbook.robot   | 67 ++++++++++++++++++-
 .../k8s-daemonset-healthcheck/runbook.robot   | 50 --------------
 .../k8s-deployment-healthcheck/runbook.robot  | 65 ------------------
 3 files changed, 66 insertions(+), 116 deletions(-)

diff --git a/codebundles/k8s-applog-health/runbook.robot b/codebundles/k8s-applog-health/runbook.robot
index aa11cb6c3..ed46577b7 100755
--- a/codebundles/k8s-applog-health/runbook.robot
+++ b/codebundles/k8s-applog-health/runbook.robot
@@ -325,7 +325,72 @@ Analyze Application Log Patterns for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in Name
         ${scan_results_str}=    Evaluate    json.dumps($scan_results, indent=2)    json
         ${formatted_results}=    RW.K8sLog.Format Scan Results For Display    scan_results=${scan_results_str}
         
-        RW.Core.Add Pre To Report    **Log Analysis Summary for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}`**\n**Health Score:** ${log_health_score}\n**Analysis Depth:** ${LOG_ANALYSIS_DEPTH}\n**Categories Analyzed:** ${LOG_PATTERN_CATEGORIES_STR}\n**Issues Found:** ${issues_count}\n\n${formatted_results}
+        RW.Core.Add Pre To Report    **Log Analysis Summary for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in Namespace `${NAMESPACE}` (Last ${LOG_LINES} lines, ${LOG_AGE} age) **\n**Health Score:** ${log_health_score}\n**Analysis Depth:** ${LOG_ANALYSIS_DEPTH}\n**Categories Analyzed:** ${LOG_PATTERN_CATEGORIES_STR}\n**Issues Found:** ${issues_count}\n\n${formatted_results}
         
         RW.K8sLog.Cleanup Temp Files
+    END
+
+Fetch Workload Logs for `${WORKLOAD_TYPE}` `${WORKLOAD_NAME}` in Namespace `${NAMESPACE}`
+    [Documentation]    Fetches and displays workload logs in the report for manual review. Note: Issues are not created by this task - see "Analyze Application Log Patterns" for automated issue detection.
+    [Tags]
+    ...    logs
+    ...    collection
+    ...    ${WORKLOAD_TYPE}
+    ...    troubleshooting
+    ...    access:read-only
+    # Skip pod-related checks if deployment is scaled to 0
+    IF    not ${SKIP_POD_CHECKS}
+        # Fetch raw logs
+        ${workload_logs}=    RW.CLI.Run Cli
+        ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} logs ${WORKLOAD_TYPE}/${WORKLOAD_NAME} --context ${CONTEXT} -n ${NAMESPACE} --tail=${LOG_LINES} --since=${LOG_AGE}
+        ...    env=${env}
+        ...    secret_file__kubeconfig=${kubeconfig}
+        ...    show_in_rwl_cheatsheet=true
+        ...    render_in_commandlist=true
+        
+        IF    ${workload_logs.returncode} == 0
+            # Filter logs to remove repetitive health check messages and focus on meaningful content
+            ${filtered_logs}=    RW.CLI.Run Cli
+            ...    cmd=echo "${workload_logs.stdout}" | grep -v -E "(Checking.*Health|Health.*Check|healthcheck|/health|GET /|POST /health|probe|liveness|readiness)" | grep -E "(error|ERROR|warn|WARN|exception|Exception|fail|FAIL|fatal|FATAL|panic|stack|trace|timeout|connection.*refused|unable.*connect|authentication.*failed|denied|forbidden|unauthorized|500|502|503|504)" | tail -50 || echo "No significant errors or warnings found in recent logs"
+            ...    env=${env}
+            ...    include_in_history=false
+            
+            # Also get a sample of non-health-check logs for context
+            ${context_logs}=    RW.CLI.Run Cli
+            ...    cmd=echo "${workload_logs.stdout}" | grep -v -E "(Checking.*Health|Health.*Check|healthcheck|/health|GET /|POST /health|probe|liveness|readiness)" | head -20 | tail -10
+            ...    env=${env}
+            ...    include_in_history=false
+            
+            ${history}=    RW.CLI.Pop Shell History
+            
+            # Determine if logs are mostly health checks
+            ${total_lines}=    RW.CLI.Run Cli
+            ...    cmd=echo "${workload_logs.stdout}" | wc -l
+            ...    env=${env}
+            ...    include_in_history=false
+            
+            ${health_check_lines}=    RW.CLI.Run Cli
+            ...    cmd=echo "${workload_logs.stdout}" | grep -E "(Checking.*Health|Health.*Check|healthcheck|/health)" | wc -l
+            ...    env=${env}
+            ...    include_in_history=false
+            
+            # Handle empty output from wc -l by providing default values
+            ${total_lines_clean}=    Set Variable If    "${total_lines.stdout.strip()}" == ""    0    ${total_lines.stdout.strip()}
+            ${health_check_lines_clean}=    Set Variable If    "${health_check_lines.stdout.strip()}" == ""    0    ${health_check_lines.stdout.strip()}
+            
+            ${total_count}=    Convert To Integer    ${total_lines_clean}
+            ${health_count}=    Convert To Integer    ${health_check_lines_clean}
+            
+            # Create consolidated logs report
+            IF    ${health_count} > ${total_count} * 0.8
+                ${log_content}=    Set Variable If    "${context_logs.stdout.strip()}" != ""    **🔍 Filtered Error/Warning Logs:**\n${filtered_logs.stdout}\n\n**📝 Sample Application Logs (Non-Health Check):**\n${context_logs.stdout}    **🔍 Filtered Error/Warning Logs:**\n${filtered_logs.stdout}
+                RW.Core.Add Pre To Report    **📋 Raw Workload Logs for `${WORKLOAD_TYPE}` `${WORKLOAD_NAME}`** (Last ${LOG_LINES} lines, ${LOG_AGE} age)\n**Total Log Lines:** ${total_count} | **Health Check Lines:** ${health_count}\n**ℹ️ Logs are mostly health check messages (${health_count}/${total_count} lines)**\n\n${log_content}\n\n**Commands Used:** ${history}\n\n**Note:** Automated issue detection is performed by the "Analyze Application Log Patterns" task.
+            ELSE
+                RW.Core.Add Pre To Report    **📋 Raw Workload Logs for `${WORKLOAD_TYPE}` `${WORKLOAD_NAME}`** (Last ${LOG_LINES} lines, ${LOG_AGE} age)\n**Total Log Lines:** ${total_count} | **Health Check Lines:** ${health_count}\n\n**📝 Recent Application Logs:**\n${workload_logs.stdout}\n\n**Commands Used:** ${history}\n\n**Note:** Automated issue detection is performed by the "Analyze Application Log Patterns" task.
+            END
+        ELSE
+            # Only add to report if fetch failed, don't create issue
+            ${history}=    RW.CLI.Pop Shell History
+            RW.Core.Add Pre To Report    **📋 Raw Logs for `${WORKLOAD_TYPE}` `${WORKLOAD_NAME}`**\n\n⚠️ Unable to fetch workload logs (exit code ${workload_logs.returncode}).\n\n**STDERR:** ${workload_logs.stderr}\n\n**Commands Used:** ${history}
+        END
     END
\ No newline at end of file
diff --git a/codebundles/k8s-daemonset-healthcheck/runbook.robot b/codebundles/k8s-daemonset-healthcheck/runbook.robot
index c6f0d3d29..e2ea9a654 100644
--- a/codebundles/k8s-daemonset-healthcheck/runbook.robot
+++ b/codebundles/k8s-daemonset-healthcheck/runbook.robot
@@ -21,56 +21,6 @@ Suite Setup         Suite Initialization
 
 *** Tasks ***
 
-Detect Log Anomalies for DaemonSet `${DAEMONSET_NAME}` in Namespace `${NAMESPACE}`
-    [Documentation]    Analyzes logs for repeating patterns, anomalous behavior, and unusual log volume that may indicate underlying issues.
-    [Tags]
-    ...    logs
-    ...    anomalies
-    ...    patterns
-    ...    volume
-    ...    daemonset
-    ...    ${DAEMONSET_NAME}
-    ...    access:read-only
-    ${log_dir}=    RW.K8sLog.Fetch Workload Logs
-    ...    workload_type=daemonset
-    ...    workload_name=${DAEMONSET_NAME}
-    ...    namespace=${NAMESPACE}
-    ...    context=${CONTEXT}
-    ...    kubeconfig=${kubeconfig}
-    ...    log_age=${LOG_AGE}
-    
-    ${anomaly_results}=    RW.K8sLog.Analyze Log Anomalies
-    ...    log_dir=${log_dir}
-    ...    workload_type=daemonset
-    ...    workload_name=${DAEMONSET_NAME}
-    ...    namespace=${NAMESPACE}
-    
-    # Process anomaly issues
-    ${anomaly_issues}=    Evaluate    $anomaly_results.get('issues', [])
-    IF    len($anomaly_issues) > 0
-        FOR    ${issue}    IN    @{anomaly_issues}
-            ${summarized_details}=    RW.K8sLog.Summarize Log Issues    issue_details=${issue["details"]}
-            ${next_steps_text}=    Catenate    SEPARATOR=\n    @{issue["next_steps"]}
-            ${issue_timestamp}=    Evaluate    $issue.get('observed_at', '')
-
-            RW.Core.Add Issue
-            ...    severity=${issue["severity"]}
-            ...    expected=No log anomalies should be present in DaemonSet `${DAEMONSET_NAME}` in namespace `${NAMESPACE}`
-            ...    actual=Log anomalies detected in DaemonSet `${DAEMONSET_NAME}` in namespace `${NAMESPACE}`
-            ...    title=${issue["title"]}
-            ...    reproduce_hint=Use RW.K8sLog.Analyze Log Anomalies keyword to reproduce this analysis
-            ...    details=${summarized_details}
-            ...    next_steps=${next_steps_text}
-            ...    observed_at=${issue_timestamp}
-        END
-    END
-    
-    # Add summary to report
-    ${anomaly_summary}=    Catenate    SEPARATOR=\n    @{anomaly_results["summary"]}
-    RW.Core.Add Pre To Report    Log Anomaly Analysis for DaemonSet ${DAEMONSET_NAME}:\n${anomaly_summary}
-    
-    RW.K8sLog.Cleanup Temp Files
-
 Identify Recent Configuration Changes for DaemonSet `${DAEMONSET_NAME}` in Namespace `${NAMESPACE}`
     [Documentation]    Identifies recent configuration changes from ControllerRevision analysis that might be related to current issues.
     [Tags]
diff --git a/codebundles/k8s-deployment-healthcheck/runbook.robot b/codebundles/k8s-deployment-healthcheck/runbook.robot
index cb47daaa7..f825ea8fb 100755
--- a/codebundles/k8s-deployment-healthcheck/runbook.robot
+++ b/codebundles/k8s-deployment-healthcheck/runbook.robot
@@ -289,71 +289,6 @@ Detect Event Anomalies for Deployment `${DEPLOYMENT_NAME}` in Namespace `${NAMES
         END
     END
 
-Fetch Deployment Logs for `${DEPLOYMENT_NAME}` in Namespace `${NAMESPACE}`
-    [Documentation]    Fetches and displays deployment logs in the report for manual review. Note: Issues are not created by this task - see "Analyze Application Log Patterns" for automated issue detection.
-    [Tags]
-    ...    logs
-    ...    collection
-    ...    deployment
-    ...    troubleshooting
-    ...    access:read-only
-    # Skip pod-related checks if deployment is scaled to 0
-    IF    not ${SKIP_POD_CHECKS}
-        # Fetch raw logs
-        ${deployment_logs}=    RW.CLI.Run Cli
-        ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} logs deployment/${DEPLOYMENT_NAME} --context ${CONTEXT} -n ${NAMESPACE} --tail=${LOG_LINES} --since=${LOG_AGE}
-        ...    env=${env}
-        ...    secret_file__kubeconfig=${kubeconfig}
-        ...    show_in_rwl_cheatsheet=true
-        ...    render_in_commandlist=true
-        
-        IF    ${deployment_logs.returncode} == 0
-            # Filter logs to remove repetitive health check messages and focus on meaningful content
-            ${filtered_logs}=    RW.CLI.Run Cli
-            ...    cmd=echo "${deployment_logs.stdout}" | grep -v -E "(Checking.*Health|Health.*Check|healthcheck|/health|GET /|POST /health|probe|liveness|readiness)" | grep -E "(error|ERROR|warn|WARN|exception|Exception|fail|FAIL|fatal|FATAL|panic|stack|trace|timeout|connection.*refused|unable.*connect|authentication.*failed|denied|forbidden|unauthorized|500|502|503|504)" | tail -50 || echo "No significant errors or warnings found in recent logs"
-            ...    env=${env}
-            ...    include_in_history=false
-            
-            # Also get a sample of non-health-check logs for context
-            ${context_logs}=    RW.CLI.Run Cli
-            ...    cmd=echo "${deployment_logs.stdout}" | grep -v -E "(Checking.*Health|Health.*Check|healthcheck|/health|GET /|POST /health|probe|liveness|readiness)" | head -20 | tail -10
-            ...    env=${env}
-            ...    include_in_history=false
-            
-            ${history}=    RW.CLI.Pop Shell History
-            
-            # Determine if logs are mostly health checks
-            ${total_lines}=    RW.CLI.Run Cli
-            ...    cmd=echo "${deployment_logs.stdout}" | wc -l
-            ...    env=${env}
-            ...    include_in_history=false
-            
-            ${health_check_lines}=    RW.CLI.Run Cli
-            ...    cmd=echo "${deployment_logs.stdout}" | grep -E "(Checking.*Health|Health.*Check|healthcheck|/health)" | wc -l
-            ...    env=${env}
-            ...    include_in_history=false
-            
-            # Handle empty output from wc -l by providing default values
-            ${total_lines_clean}=    Set Variable If    "${total_lines.stdout.strip()}" == ""    0    ${total_lines.stdout.strip()}
-            ${health_check_lines_clean}=    Set Variable If    "${health_check_lines.stdout.strip()}" == ""    0    ${health_check_lines.stdout.strip()}
-            
-            ${total_count}=    Convert To Integer    ${total_lines_clean}
-            ${health_count}=    Convert To Integer    ${health_check_lines_clean}
-            
-            # Create consolidated logs report
-            IF    ${health_count} > ${total_count} * 0.8
-                ${log_content}=    Set Variable If    "${context_logs.stdout.strip()}" != ""    **🔍 Filtered Error/Warning Logs:**\n${filtered_logs.stdout}\n\n**📝 Sample Application Logs (Non-Health Check):**\n${context_logs.stdout}    **🔍 Filtered Error/Warning Logs:**\n${filtered_logs.stdout}
-                RW.Core.Add Pre To Report    **📋 Raw Deployment Logs for `${DEPLOYMENT_NAME}`** (Last ${LOG_LINES} lines, ${LOG_AGE} age)\n**Total Log Lines:** ${total_count} | **Health Check Lines:** ${health_count}\n**ℹ️ Logs are mostly health check messages (${health_count}/${total_count} lines)**\n\n${log_content}\n\n**Commands Used:** ${history}\n\n**Note:** Automated issue detection is performed by the "Analyze Application Log Patterns" task.
-            ELSE
-                RW.Core.Add Pre To Report    **📋 Raw Deployment Logs for `${DEPLOYMENT_NAME}`** (Last ${LOG_LINES} lines, ${LOG_AGE} age)\n**Total Log Lines:** ${total_count} | **Health Check Lines:** ${health_count}\n\n**📝 Recent Application Logs:**\n${deployment_logs.stdout}\n\n**Commands Used:** ${history}\n\n**Note:** Automated issue detection is performed by the "Analyze Application Log Patterns" task.
-            END
-        ELSE
-            # Only add to report if fetch failed, don't create issue
-            ${history}=    RW.CLI.Pop Shell History
-            RW.Core.Add Pre To Report    **📋 Raw Deployment Logs for `${DEPLOYMENT_NAME}`**\n\n⚠️ Unable to fetch deployment logs (exit code ${deployment_logs.returncode}).\n\n**STDERR:** ${deployment_logs.stderr}\n\n**Commands Used:** ${history}
-        END
-    END
-
 Check Liveness Probe Configuration for Deployment `${DEPLOYMENT_NAME}`
     [Documentation]    Validates if a Liveness probe has possible misconfigurations
     [Tags]

From cc7700853fee942e527fc229cccdbd33ecc4e76b Mon Sep 17 00:00:00 2001
From: Akshay Prabhakant <akshay.prabhakant@runwhen.com>
Date: Wed, 11 Feb 2026 19:53:32 +0530
Subject: [PATCH 08/10] added README for k8s-applog-health(the new application
 log codebundle);removed the redundant "detect log anomalies" task from
 statefulset-healthcheck

---
 codebundles/k8s-applog-health/README.md       | 56 +++++++++++++++++++
 .../k8s-statefulset-healthcheck/runbook.robot | 49 ----------------
 2 files changed, 56 insertions(+), 49 deletions(-)

diff --git a/codebundles/k8s-applog-health/README.md b/codebundles/k8s-applog-health/README.md
index e69de29bb..5bbe68a13 100644
--- a/codebundles/k8s-applog-health/README.md
+++ b/codebundles/k8s-applog-health/README.md
@@ -0,0 +1,56 @@
+# Kubernetes Application Log Health
+
+This codebundle provides tasks for triaging application log health of Kubernetes workloads (deployments, statefulsets, or daemonsets). It fetches pod logs, scans for error patterns, and reports issues with severity and next steps.
+
+## Tasks
+
+**Runbook**
+- `Analyze Application Log Patterns for ${WORKLOAD_TYPE} ${WORKLOAD_NAME} in Namespace ${NAMESPACE}` — Fetches workload logs, scans for configurable error/exception patterns, creates issues for matches above the severity threshold, and reports a log health score and summary.
+- `Fetch Workload Logs for ${WORKLOAD_TYPE} ${WORKLOAD_NAME} in Namespace ${NAMESPACE}` — Fetches and attaches workload logs to the report for manual review (no issue creation).
+
+**SLI**
+- `Get Critical Log Errors and Score for ${WORKLOAD_TYPE} ${WORKLOAD_NAME}` — Fetches logs and scores health based on critical error patterns (e.g. GenericError, AppFailure) and container restarts; pushes a metric for SLI scoring.
+- `Generate Application Health Score for ${WORKLOAD_TYPE} ${WORKLOAD_NAME}` — Computes the final applog health score and report details (e.g. scaled-to-zero vs healthy vs issues).
+
+### Log pattern categories
+
+Analysis uses pattern categories (configurable via `runbook_patterns.json` or `sli_critical_patterns.json`). Examples:
+
+- **GenericError** — exception, fatal, panic, crash, failed, failure (severity 1)
+- **AppFailure** — application failed, service unavailable, connection refused, timeout, OOM, disk full, auth failures (severity 1)
+- **StackTrace** — stack trace, exception in thread, java.lang., traceback, panic (severity 1)
+- **Connection** — connection reset/timeout, network unreachable, socket error, DNS resolution failed (severity 2)
+- **Timeout** — request/operation timeout, deadline exceeded, read/write timeout (severity 2)
+- **Auth** — unauthorized, authentication error, invalid credentials, forbidden, token expired (severity 2)
+- **Exceptions** — NullPointerException, IllegalArgumentException, SQLException, IOException, etc. (severity 2)
+- **Resource** — resource exhausted, memory leak, CPU throttled, quota/rate limit exceeded (severity 2)
+- **HealthyRecovery** — recovered from error, connection restored, retry successful (severity 4, informational)
+
+Exclude patterns (e.g. INFO/DEBUG/TRACE, health checks, heartbeats) reduce false positives.
+
+## Configuration
+
+The TaskSet/SLI requires initialization with secrets and user variables. Key variables:
+
+- `kubeconfig` — Secret containing cluster access (kubeconfig YAML).
+- `KUBERNETES_DISTRIBUTION_BINARY` — CLI binary for Kubernetes (`kubectl` or `oc`). Default: `kubectl`.
+- `CONTEXT` — Kubernetes context to use.
+- `NAMESPACE` — Namespace of the workload. Leave blank to search all namespaces.
+- `WORKLOAD_NAME` — Name of the deployment, statefulset, or daemonset to analyze.
+- `WORKLOAD_TYPE` — Type of workload: `deployment`, `statefulset`, or `daemonset`. Default: `deployment`.
+- `LOG_AGE` — Age of logs to fetch (e.g. `10m`). Default: `10m`.
+- `LOG_LINES` / `LOG_SIZE` — Max lines or bytes per container for runbook log fetch. Defaults: 1000 lines, 2MB.
+- `LOG_SEVERITY_THRESHOLD` — Minimum severity to create issues (1=critical … 5=info). Default: 3.
+- `LOG_PATTERN_CATEGORIES` — Comma-separated categories to scan (e.g. `GenericError,AppFailure,Connection`). Default includes GenericError, AppFailure, Connection, Timeout, Auth, Exceptions, Resource, HealthyRecovery.
+- `LOGS_EXCLUDE_PATTERN` — Regex to exclude lines from analysis (e.g. INFO/DEBUG, health checks).
+- `EXCLUDED_CONTAINER_NAMES` — Comma-separated container names to skip (e.g. `linkerd-proxy,istio-proxy`). Default: `linkerd-proxy,istio-proxy,vault-agent`.
+- `CONTAINER_RESTART_AGE` / `CONTAINER_RESTART_THRESHOLD` — Time window and threshold for container restarts (SLI). Defaults: e.g. `10m`, `1`.
+- `LOG_SCAN_TIMEOUT` — Timeout in seconds for log scanning. Default: 300.
+
+## Requirements
+
+- A kubeconfig with RBAC permissions to list pods and read logs for the target workload and namespace.
+
+## TODO
+
+- [ ] Add additional documentation.
diff --git a/codebundles/k8s-statefulset-healthcheck/runbook.robot b/codebundles/k8s-statefulset-healthcheck/runbook.robot
index 84738e855..3a8fa9cde 100644
--- a/codebundles/k8s-statefulset-healthcheck/runbook.robot
+++ b/codebundles/k8s-statefulset-healthcheck/runbook.robot
@@ -21,55 +21,6 @@ Suite Setup         Suite Initialization
 
 *** Tasks ***
 
-Detect Log Anomalies for StatefulSet `${STATEFULSET_NAME}` in Namespace `${NAMESPACE}`
-    [Documentation]    Analyzes logs for repeating patterns, anomalous behavior, and unusual log volume that may indicate underlying issues.
-    [Tags]
-    ...    logs
-    ...    anomalies
-    ...    patterns
-    ...    volume
-    ...    statefulset
-    ...    ${STATEFULSET_NAME}
-    ...    access:read-only
-    ${log_dir}=    RW.K8sLog.Fetch Workload Logs
-    ...    workload_type=statefulset
-    ...    workload_name=${STATEFULSET_NAME}
-    ...    namespace=${NAMESPACE}
-    ...    context=${CONTEXT}
-    ...    kubeconfig=${kubeconfig}
-    ...    log_age=${LOG_AGE}
-    
-    ${anomaly_results}=    RW.K8sLog.Analyze Log Anomalies
-    ...    log_dir=${log_dir}
-    ...    workload_type=statefulset
-    ...    workload_name=${STATEFULSET_NAME}
-    ...    namespace=${NAMESPACE}
-    
-    # Process anomaly issues
-    ${anomaly_issues}=    Evaluate    $anomaly_results.get('issues', [])
-    IF    len($anomaly_issues) > 0
-        FOR    ${issue}    IN    @{anomaly_issues}
-            ${summarized_details}=    RW.K8sLog.Summarize Log Issues    issue_details=${issue["details"]}
-            ${next_steps_text}=    Catenate    SEPARATOR=\n    @{issue["next_steps"]}
-            
-            RW.Core.Add Issue
-            ...    severity=${issue["severity"]}
-            ...    expected=No log anomalies should be present in StatefulSet `${STATEFULSET_NAME}` in namespace `${NAMESPACE}`
-            ...    actual=Log anomalies detected in StatefulSet `${STATEFULSET_NAME}` in namespace `${NAMESPACE}`
-            ...    title=${issue["title"]}
-            ...    reproduce_hint=Use RW.K8sLog.Analyze Log Anomalies keyword to reproduce this analysis
-            ...    details=${summarized_details}
-            ...    next_steps=${next_steps_text}
-            ...    observed_at=${issue["observed_at"]}
-        END
-    END
-    
-    # Add summary to report
-    ${anomaly_summary}=    Catenate    SEPARATOR=\n    @{anomaly_results["summary"]}
-    RW.Core.Add Pre To Report    Log Anomaly Analysis for StatefulSet ${STATEFULSET_NAME}:\n${anomaly_summary}
-    
-    RW.K8sLog.Cleanup Temp Files
-
 Check Liveness Probe Configuration for StatefulSet `${STATEFULSET_NAME}`
     [Documentation]    Validates if a Liveness probe has possible misconfigurations
     [Tags]

From 284c11ee4517da58319aff8366544027a0da530f Mon Sep 17 00:00:00 2001
From: Akshay Prabhakant <akshay.prabhakant@runwhen.com>
Date: Thu, 12 Feb 2026 10:52:57 +0530
Subject: [PATCH 09/10] add next_action kwarg to distinguish applog issues in
 platform

---
 codebundles/k8s-applog-health/runbook.robot | 1 +
 1 file changed, 1 insertion(+)

diff --git a/codebundles/k8s-applog-health/runbook.robot b/codebundles/k8s-applog-health/runbook.robot
index ed46577b7..5c7f9ca2e 100755
--- a/codebundles/k8s-applog-health/runbook.robot
+++ b/codebundles/k8s-applog-health/runbook.robot
@@ -316,6 +316,7 @@ Analyze Application Log Patterns for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in Name
                 ...    details=${summarized_details}
                 ...    next_steps=${next_steps}
                 ...    observed_at=${issue_timestamp}
+                ...    next_action=analyzeApplog
             END
         END
 

From 34619c1a686c1831af2ae075b659651e54fb66c3 Mon Sep 17 00:00:00 2001
From: Akshay Prabhakant <akshay.prabhakant@runwhen.com>
Date: Thu, 12 Feb 2026 19:47:37 +0000
Subject: [PATCH 10/10] - added the stacktrace task and issue creation to
 applog-health - rectified the k8s-applog-health runbook and sli
 SKIP_HEALTH_CHECKS evaluation - removed the "Critical Log Errors" sub metric
 from deployment-healthcheck SLI(looks at logs, this codebundle shouldn't
 ideally be looking at logs)

---
 .../templates/k8s-applog-health-sli.yaml      |   2 +-
 codebundles/k8s-applog-health/runbook.robot   | 145 ++++++++++---
 codebundles/k8s-applog-health/sli.robot       | 205 ++++++++++++------
 .../k8s-deployment-healthcheck/sli.robot      |  67 +-----
 4 files changed, 257 insertions(+), 162 deletions(-)

diff --git a/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-sli.yaml b/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-sli.yaml
index aa3253be2..34fa0d3f5 100755
--- a/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-sli.yaml
+++ b/codebundles/k8s-applog-health/.runwhen/templates/k8s-applog-health-sli.yaml
@@ -50,7 +50,7 @@ spec:
       value: "true"
     - name: MAX_LOG_LINES
       value: "1000"
-    - name: MAX_LOG_SIZE
+    - name: MAX_LOG_BYTES
       value: "2097152"
   secretsProvided:
   {% if wb_version %}
diff --git a/codebundles/k8s-applog-health/runbook.robot b/codebundles/k8s-applog-health/runbook.robot
index 5c7f9ca2e..dfd340c09 100755
--- a/codebundles/k8s-applog-health/runbook.robot
+++ b/codebundles/k8s-applog-health/runbook.robot
@@ -187,41 +187,53 @@ Suite Initialization
     ...    LOG_SCAN_TIMEOUT=${LOG_SCAN_TIMEOUT}
     Set Suite Variable    ${env}    ${env_dict}
     
-    # Check if the workload is scaled to 0 and handle appropriately
-    ${scale_check}=    RW.CLI.Run Cli
-    ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get ${WORKLOAD_TYPE}/${WORKLOAD_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '{spec_replicas: .spec.replicas, ready_replicas: (.status.readyReplicas // 0), available_condition: (.status.conditions[] | select(.type == "Available") | .status // "Unknown")}'
-    ...    env=${env}
-    ...    secret_file__kubeconfig=${kubeconfig}
-    ...    timeout_seconds=30
+    # Check if workload is scaled to 0 and handle appropriately
+    # Different workload types have different field structures
     
-    TRY
-        ${scale_status}=    Evaluate    json.loads(r'''${scale_check.stdout}''') if r'''${scale_check.stdout}'''.strip() else {}    json
-        ${spec_replicas}=    Evaluate    $scale_status.get('spec_replicas', 1)
-        
+    IF    '${WORKLOAD_TYPE}' == 'daemonset'
         # DaemonSets don't scale to 0 in the traditional sense, so skip scale-down logic for them
-        IF    '${WORKLOAD_TYPE}' == 'daemonset'
-            Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} is a DaemonSet - proceeding with log analysis
-            Set Suite Variable    ${SKIP_POD_CHECKS}    ${False}
-        ELSE IF    ${spec_replicas} == 0
-            RW.Core.Add Issue
-            ...    severity=4
-            ...    expected=${WORKLOAD_TYPE} `${WORKLOAD_NAME}` operational status documented
-            ...    actual=${WORKLOAD_TYPE} `${WORKLOAD_NAME}` is intentionally scaled to zero replicas
-            ...    title=${WORKLOAD_TYPE} `${WORKLOAD_NAME}` is Scaled Down (Informational)
-            ...    reproduce_hint=kubectl get ${WORKLOAD_TYPE}/${WORKLOAD_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o yaml
-            ...    details=${WORKLOAD_TYPE} `${WORKLOAD_NAME}` is currently scaled to 0 replicas (spec.replicas=0). This is an intentional configuration and not an error. All pod-related healthchecks have been skipped for efficiency. If the workload should be running, scale it up using:\nkubectl scale ${WORKLOAD_TYPE}/${WORKLOAD_NAME} --replicas=<desired_count> --context ${CONTEXT} -n ${NAMESPACE}
-            ...    next_steps=This is informational only. If the workload should be running, scale it up.
-            
-            RW.Core.Add Pre To Report    **ℹ️ ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` is scaled to 0 replicas - Skipping log analysis**\n**Available Condition:** ${scale_status.get('available_condition', 'Unknown')}
-            
-            Set Suite Variable    ${SKIP_POD_CHECKS}    ${True}
+        Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} is a DaemonSet - proceeding with log checks
+        Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${False}
+    ELSE
+        IF    '${WORKLOAD_TYPE}' == 'statefulset'
+            # StatefulSet: use current/updated replicas in addition to spec/ready
+            ${scale_check}=    RW.CLI.Run Cli
+            ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get ${WORKLOAD_TYPE}/${WORKLOAD_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '{spec_replicas: .spec.replicas, ready_replicas: (.status.readyReplicas // 0), current_replicas: (.status.currentReplicas // 0), updated_replicas: (.status.updatedReplicas // 0)}'
+            ...    env=${env}
+            ...    secret_file__kubeconfig=${kubeconfig}
+            ...    timeout_seconds=30
         ELSE
-            Set Suite Variable    ${SKIP_POD_CHECKS}    ${False}
+            # For deployments
+            ${scale_check}=    RW.CLI.Run Cli
+            ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get ${WORKLOAD_TYPE}/${WORKLOAD_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '{spec_replicas: .spec.replicas, ready_replicas: (.status.readyReplicas // 0), available_condition: (.status.conditions[] | select(.type == "Available") | .status // "Unknown")}'
+            ...    env=${env}
+            ...    secret_file__kubeconfig=${kubeconfig}
+            ...    timeout_seconds=30
         END
         
-    EXCEPT
-        Log    Warning: Failed to check workload scale, continuing with normal checks
-        Set Suite Variable    ${SKIP_POD_CHECKS}    ${False}
+        TRY
+            ${scale_status}=    Evaluate    json.loads(r'''${scale_check.stdout}''') if r'''${scale_check.stdout}'''.strip() else {}    json
+            ${spec_replicas}=    Evaluate    $scale_status.get('spec_replicas', 1)
+
+            # Try to determine when deployment was scaled down by checking recent events and replica set history
+            ${scale_down_info}=    Get Deployment Scale Down Timestamp    ${spec_replicas}
+            
+            IF    ${spec_replicas} == 0
+                Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} is scaled to 0 replicas - returning special health score
+                Log    Scale down detected at: ${scale_down_info}
+                
+                # For scaled-down workloads, return a score of 1.0 to indicate "intentionally down" vs "broken"
+                Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${True}
+                Set Suite Variable    ${SCALED_DOWN_INFO}    ${scale_down_info}
+            ELSE
+                Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} has ${spec_replicas} desired replicas - proceeding with log checks
+                Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${False}
+            END
+            
+        EXCEPT
+            Log    Warning: Failed to check workload scale, continuing with normal log checks
+            Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${False}
+        END
     END
 
 
@@ -239,7 +251,7 @@ Analyze Application Log Patterns for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in Name
     ...    ${WORKLOAD_TYPE}
     ...    access:read-only
     # Skip pod-related checks if deployment is scaled to 0
-    IF    not ${SKIP_POD_CHECKS}
+    IF    not ${SKIP_HEALTH_CHECKS}
         # Temporarily suppress log warnings for excluded containers (they're expected)
         TRY
             ${log_dir}=    RW.K8sLog.Fetch Workload Logs
@@ -340,7 +352,7 @@ Fetch Workload Logs for `${WORKLOAD_TYPE}` `${WORKLOAD_NAME}` in Namespace `${NA
     ...    troubleshooting
     ...    access:read-only
     # Skip pod-related checks if deployment is scaled to 0
-    IF    not ${SKIP_POD_CHECKS}
+    IF    not ${SKIP_HEALTH_CHECKS}
         # Fetch raw logs
         ${workload_logs}=    RW.CLI.Run Cli
         ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} logs ${WORKLOAD_TYPE}/${WORKLOAD_NAME} --context ${CONTEXT} -n ${NAMESPACE} --tail=${LOG_LINES} --since=${LOG_AGE}
@@ -394,4 +406,71 @@ Fetch Workload Logs for `${WORKLOAD_TYPE}` `${WORKLOAD_NAME}` in Namespace `${NA
             ${history}=    RW.CLI.Pop Shell History
             RW.Core.Add Pre To Report    **📋 Raw Logs for `${WORKLOAD_TYPE}` `${WORKLOAD_NAME}`**\n\n⚠️ Unable to fetch workload logs (exit code ${workload_logs.returncode}).\n\n**STDERR:** ${workload_logs.stderr}\n\n**Commands Used:** ${history}
         END
-    END
\ No newline at end of file
+    END
+
+
+Analyze Workload Stacktraces for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in Namespace `${NAMESPACE}`
+    [Documentation]    Collects and analyzes stacktraces/tracebacks from all pods in the workload for troubleshooting application issues.
+    [Tags]
+    ...    logs
+    ...    stacktraces
+    ...    tracebacks
+    ...    ${WORKLOAD_TYPE}
+    ...    troubleshooting
+    ...    errors
+    ...    access:read-only
+    # Skip pod-related checks if workload is scaled to 0
+    IF    not ${SKIP_HEALTH_CHECKS}
+        # Convert comma-separated string to list for excluded containers
+        @{EXCLUDED_CONTAINERS}=    Run Keyword If    "${EXCLUDED_CONTAINER_NAMES}" != ""    Split String    ${EXCLUDED_CONTAINER_NAMES}    ,    ELSE    Create List
+        
+        # Fetch logs using RW.K8sLog library (same pattern as deployment healthcheck)
+        ${log_dir}=    RW.K8sLog.Fetch Workload Logs
+        ...    workload_type=${WORKLOAD_TYPE}
+        ...    workload_name=${WORKLOAD_NAME}
+        ...    namespace=${NAMESPACE}
+        ...    context=${CONTEXT}
+        ...    kubeconfig=${kubeconfig}
+        ...    log_age=${LOG_AGE}
+        ...    max_log_lines=${LOG_LINES}
+        ...    max_log_bytes=${LOG_SIZE}
+        ...    excluded_containers=${EXCLUDED_CONTAINERS}
+        
+        # Extract stacktraces from the log directory using the traceback library
+        ${tracebacks}=    RW.LogAnalysis.ExtractTraceback.Extract Tracebacks
+        ...    logs_dir=${log_dir}
+        
+        # Check total number of tracebacks extracted
+        ${total_tracebacks}=    Get Length    ${tracebacks}
+        
+        IF    ${total_tracebacks} == 0
+            # No tracebacks found
+            RW.Core.Add Pre To Report    **📋 No Stacktraces Found for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in Namespace `${NAMESPACE}`**\n**Log Analysis Period:** ${LOG_AGE}\n**Max Log Lines:** ${LOG_LINES}\n**Max Log Size:** ${LOG_SIZE} bytes\n**Excluded Containers:** ${EXCLUDED_CONTAINER_NAMES}\n\nLog analysis completed successfully with no stacktraces detected.
+        ELSE            
+            # Stacktraces found - create issues for each one
+            ${delimiter}=    Evaluate    '-' * 80
+            
+            FOR    ${traceback}    IN    @{tracebacks}
+                ${stacktrace}=    Set Variable    ${traceback["stacktrace"]}
+                ${timestamp}=    Set Variable    ${traceback["timestamp"]}
+                RW.Core.Add Issue
+                ...    severity=2
+                ...    expected=No stacktraces should be present in ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` logs in namespace `${NAMESPACE}`
+                ...    actual=Stacktrace detected in ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` logs in namespace `${NAMESPACE}`
+                ...    title=Stacktrace Detected in ${WORKLOAD_TYPE} `${WORKLOAD_NAME}`
+                ...    reproduce_hint=Check application logs for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in namespace `${NAMESPACE}`
+                ...    details=${delimiter}\n${stacktrace}\n${delimiter}
+                ...    next_steps=Review application logs for the root cause of the stacktrace\nCheck application configuration and resource limits\nInvestigate the specific error conditions that led to this stacktrace\nConsider scaling or restarting the ${WORKLOAD_TYPE} if issues persist\nMonitor application health and performance metrics
+                ...    next_action=analyseStacktrace
+                ...    observed_at=${timestamp}
+            END
+            
+            # Create consolidated report showing all stacktraces
+            ${stacktrace_strings}=    Evaluate    [tb["stacktrace"] for tb in ${tracebacks}]
+            ${agg_tracebacks}=    Evaluate    "\\n" + "\\n${delimiter}\\n".join(${stacktrace_strings})
+            RW.Core.Add Pre To Report    **🔍 Stacktraces Found for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` in Namespace `${NAMESPACE}`**\n**Total Stacktraces:** ${total_tracebacks}\n**Log Analysis Period:** ${LOG_AGE}\n**Max Log Lines:** ${LOG_LINES}\n**Max Log Size:** ${LOG_SIZE} bytes\n**Excluded Containers:** ${EXCLUDED_CONTAINER_NAMES}\n\n${agg_tracebacks}
+        END
+        
+        # Clean up temporary log files
+        RW.K8sLog.Cleanup Temp Files
+    END
diff --git a/codebundles/k8s-applog-health/sli.robot b/codebundles/k8s-applog-health/sli.robot
index ab920e7f0..ce65ddeac 100755
--- a/codebundles/k8s-applog-health/sli.robot
+++ b/codebundles/k8s-applog-health/sli.robot
@@ -9,6 +9,7 @@ Library           RW.Core
 Library           RW.CLI
 Library           RW.platform
 Library           RW.K8sLog
+Library           RW.LogAnalysis.ExtractTraceback
 
 Library           OperatingSystem
 Library           String
@@ -123,42 +124,51 @@ Suite Initialization
     
     # Check if workload is scaled to 0 and handle appropriately
     # Different workload types have different field structures
+    
     IF    '${WORKLOAD_TYPE}' == 'daemonset'
-        ${scale_check}=    RW.CLI.Run Cli
-        ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get ${WORKLOAD_TYPE}/${WORKLOAD_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '{spec_replicas: .status.desiredNumberScheduled, ready_replicas: (.status.numberReady // 0), available_condition: (.status.conditions[] | select(.type == "Available") | .status // "Unknown")}'
-        ...    env=${env}
-        ...    secret_file__kubeconfig=${kubeconfig}
-        ...    timeout_seconds=30
+        # DaemonSets don't scale to 0 in the traditional sense, so skip scale-down logic for them
+        Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} is a DaemonSet - proceeding with log checks
+        Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${False}
     ELSE
-        # For deployments and statefulsets
-        ${scale_check}=    RW.CLI.Run Cli
-        ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get ${WORKLOAD_TYPE}/${WORKLOAD_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '{spec_replicas: .spec.replicas, ready_replicas: (.status.readyReplicas // 0), available_condition: (.status.conditions[] | select(.type == "Available") | .status // "Unknown")}'
-        ...    env=${env}
-        ...    secret_file__kubeconfig=${kubeconfig}
-        ...    timeout_seconds=30
-    END
-    
-    TRY
-        ${scale_status}=    Evaluate    json.loads(r'''${scale_check.stdout}''') if r'''${scale_check.stdout}'''.strip() else {}    json
-        ${spec_replicas}=    Evaluate    $scale_status.get('spec_replicas', 1)
+        IF    '${WORKLOAD_TYPE}' == 'statefulset'
+            # StatefulSet: use current/updated replicas in addition to spec/ready
+            ${scale_check}=    RW.CLI.Run Cli
+            ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get ${WORKLOAD_TYPE}/${WORKLOAD_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '{spec_replicas: .spec.replicas, ready_replicas: (.status.readyReplicas // 0), current_replicas: (.status.currentReplicas // 0), updated_replicas: (.status.updatedReplicas // 0)}'
+            ...    env=${env}
+            ...    secret_file__kubeconfig=${kubeconfig}
+            ...    timeout_seconds=30
+        ELSE
+            # For deployments
+            ${scale_check}=    RW.CLI.Run Cli
+            ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get ${WORKLOAD_TYPE}/${WORKLOAD_NAME} --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '{spec_replicas: .spec.replicas, ready_replicas: (.status.readyReplicas // 0), available_condition: (.status.conditions[] | select(.type == "Available") | .status // "Unknown")}'
+            ...    env=${env}
+            ...    secret_file__kubeconfig=${kubeconfig}
+            ...    timeout_seconds=30
+        END
         
-        # DaemonSets don't scale to 0 in the traditional sense, so skip scale-down logic for them
-        IF    '${WORKLOAD_TYPE}' == 'daemonset'
-            Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} is a DaemonSet - proceeding with log checks
-            Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${False}
-        ELSE IF    ${spec_replicas} == 0
-            Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} is scaled to 0 replicas - returning perfect health score
+        TRY
+            ${scale_status}=    Evaluate    json.loads(r'''${scale_check.stdout}''') if r'''${scale_check.stdout}'''.strip() else {}    json
+            ${spec_replicas}=    Evaluate    $scale_status.get('spec_replicas', 1)
+
+            # Try to determine when deployment was scaled down by checking recent events and replica set history
+            ${scale_down_info}=    Get Deployment Scale Down Timestamp    ${spec_replicas}
             
-            # For scaled-down workloads, return a score of 1.0 to indicate "intentionally down" vs "broken"
-            Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${True}
-        ELSE
-            Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} has ${spec_replicas} desired replicas - proceeding with log checks
+            IF    ${spec_replicas} == 0
+                Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} is scaled to 0 replicas - returning special health score
+                Log    Scale down detected at: ${scale_down_info}
+                
+                # For scaled-down workloads, return a score of 1.0 to indicate "intentionally down" vs "broken"
+                Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${True}
+                Set Suite Variable    ${SCALED_DOWN_INFO}    ${scale_down_info}
+            ELSE
+                Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} has ${spec_replicas} desired replicas - proceeding with log checks
+                Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${False}
+            END
+            
+        EXCEPT
+            Log    Warning: Failed to check workload scale, continuing with normal log checks
             Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${False}
         END
-        
-    EXCEPT
-        Log    Warning: Failed to check workload scale, continuing with normal log checks
-        Set Suite Variable    ${SKIP_HEALTH_CHECKS}    ${False}
     END
 
 Get Deployment Scale Down Timestamp
@@ -166,45 +176,70 @@ Get Deployment Scale Down Timestamp
     [Documentation]    Attempts to determine when a deployment was scaled down by examining recent events
     ${scale_down_info}=    Set Variable    Unknown
     
-    IF    ${spec_replicas} == 0 and '${WORKLOAD_TYPE}' == 'deployment'
-        TRY
-            # Check recent scaling events to find when it was scaled to 0
-            ${scaling_events}=    RW.CLI.Run Cli
-            ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get events --context ${CONTEXT} -n ${NAMESPACE} --sort-by='.lastTimestamp' -o json | jq -r '.items[] | select(.reason == "ScalingReplicaSet" and (.message | contains("${WORKLOAD_NAME}")) and (.message | contains("to 0"))) | {timestamp: .lastTimestamp, message: .message}' | jq -s 'sort_by(.timestamp) | reverse | .[0] // empty'
-            ...    env=${env}
-            ...    secret_file__kubeconfig=${kubeconfig}
-            ...    timeout_seconds=15
-            
-            IF    '''${scaling_events.stdout}''' != ''
-                ${event_data}=    Evaluate    json.loads(r'''${scaling_events.stdout}''') if r'''${scaling_events.stdout}'''.strip() else {}    json
-                ${timestamp}=    Evaluate    $event_data.get('timestamp', 'Unknown')
-                ${message}=    Evaluate    $event_data.get('message', 'Unknown')
-                ${scale_down_info}=    Set Variable    ${timestamp} (${message})
-                Log    Found scale-down event: ${scale_down_info}
-            ELSE
-                # Try checking replicaset history as fallback
-                ${rs_history}=    RW.CLI.Run Cli
-                ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get replicasets --context ${CONTEXT} -n ${NAMESPACE} -l app=${WORKLOAD_NAME} -o json | jq -r '.items[] | select(.spec.replicas == 0) | {creation_time: .metadata.creationTimestamp, name: .metadata.name}' | jq -s 'sort_by(.creation_time) | reverse | .[0] // empty'
+    IF    ${spec_replicas} == 0
+        IF    '${WORKLOAD_TYPE}' == 'deployment'
+            TRY
+                # Check recent scaling events to find when it was scaled to 0
+                ${scaling_events}=    RW.CLI.Run Cli
+                ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get events --context ${CONTEXT} -n ${NAMESPACE} --sort-by='.lastTimestamp' -o json | jq -r '.items[] | select(.reason == "ScalingReplicaSet" and (.message | contains("${WORKLOAD_NAME}")) and (.message | contains("to 0"))) | {timestamp: .lastTimestamp, message: .message}' | jq -s 'sort_by(.timestamp) | reverse | .[0] // empty'
+                ...    env=${env}
+                ...    secret_file__kubeconfig=${kubeconfig}
+                ...    timeout_seconds=15
+                
+                IF    '''${scaling_events.stdout}''' != ''
+                    ${event_data}=    Evaluate    json.loads(r'''${scaling_events.stdout}''') if r'''${scaling_events.stdout}'''.strip() else {}    json
+                    ${timestamp}=    Evaluate    $event_data.get('timestamp', 'Unknown')
+                    ${message}=    Evaluate    $event_data.get('message', 'Unknown')
+                    ${scale_down_info}=    Set Variable    ${timestamp} (${message})
+                    Log    Found scale-down event: ${scale_down_info}
+                ELSE
+                    # Try checking replicaset history as fallback
+                    ${rs_history}=    RW.CLI.Run Cli
+                    ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get replicasets --context ${CONTEXT} -n ${NAMESPACE} -l app=${WORKLOAD_NAME} -o json | jq -r '.items[] | select(.spec.replicas == 0) | {creation_time: .metadata.creationTimestamp, name: .metadata.name}' | jq -s 'sort_by(.creation_time) | reverse | .[0] // empty'
+                    ...    env=${env}
+                    ...    secret_file__kubeconfig=${kubeconfig}
+                    ...    timeout_seconds=15
+                    
+                    IF    '''${rs_history.stdout}''' != ''
+                        ${rs_data}=    Evaluate    json.loads(r'''${rs_history.stdout}''') if r'''${rs_history.stdout}'''.strip() else {}    json
+                        ${rs_time}=    Evaluate    $rs_data.get('creation_time', 'Unknown')
+                        ${scale_down_info}=    Set Variable    Likely around ${rs_time} (based on ReplicaSet history)
+                        Log    Estimated scale-down time from ReplicaSet: ${scale_down_info}
+                    ELSE
+                        ${scale_down_info}=    Set Variable    Unable to determine - no recent scaling events found
+                        Log    Could not determine when ${WORKLOAD_TYPE} ${WORKLOAD_NAME} was scaled down
+                    END
+                END
+            EXCEPT
+                Log    Warning: Failed to determine scale-down timestamp
+                ${scale_down_info}=    Set Variable    Failed to determine scale-down time
+            END
+        ELSE IF    '${WORKLOAD_TYPE}' == 'statefulset'
+            TRY
+                # StatefulSet: find scale-to-0 event via involvedObject
+                ${scaling_events}=    RW.CLI.Run Cli
+                ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get events --context ${CONTEXT} -n ${NAMESPACE} --sort-by='.lastTimestamp' -o json | jq -r '.items[] | select(.involvedObject.kind == "StatefulSet" and .involvedObject.name == "${WORKLOAD_NAME}" and (.message | contains("to 0") or (contains("delete Pod") and contains("successful")))) | {timestamp: .lastTimestamp, message: .message}' | jq -s 'sort_by(.timestamp) | reverse | .[0] // empty'
                 ...    env=${env}
                 ...    secret_file__kubeconfig=${kubeconfig}
                 ...    timeout_seconds=15
                 
-                IF    '''${rs_history.stdout}''' != ''
-                    ${rs_data}=    Evaluate    json.loads(r'''${rs_history.stdout}''') if r'''${rs_history.stdout}'''.strip() else {}    json
-                    ${rs_time}=    Evaluate    $rs_data.get('creation_time', 'Unknown')
-                    ${scale_down_info}=    Set Variable    Likely around ${rs_time} (based on ReplicaSet history)
-                    Log    Estimated scale-down time from ReplicaSet: ${scale_down_info}
+                IF    '''${scaling_events.stdout}''' != ''
+                    ${event_data}=    Evaluate    json.loads(r'''${scaling_events.stdout}''') if r'''${scaling_events.stdout}'''.strip() else {}    json
+                    ${timestamp}=    Evaluate    $event_data.get('timestamp', 'Unknown')
+                    ${message}=    Evaluate    $event_data.get('message', 'Unknown')
+                    ${scale_down_info}=    Set Variable    ${timestamp} (${message})
+                    Log    Found scale-down event: ${scale_down_info}
                 ELSE
-                    ${scale_down_info}=    Set Variable    Unable to determine - no recent scaling events found
+                    ${scale_down_info}=    Set Variable    Unable to determine - no recent scaling events found for StatefulSet
                     Log    Could not determine when ${WORKLOAD_TYPE} ${WORKLOAD_NAME} was scaled down
                 END
+            EXCEPT
+                Log    Warning: Failed to determine scale-down timestamp for StatefulSet
+                ${scale_down_info}=    Set Variable    Failed to determine scale-down time
             END
-        EXCEPT
-            Log    Warning: Failed to determine scale-down timestamp
-            ${scale_down_info}=    Set Variable    Failed to determine scale-down time
         END
     END
-    
+
     RETURN    ${scale_down_info}
 
 *** Tasks ***
@@ -269,6 +304,51 @@ Get Critical Log Errors and Score for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}`
         RW.Core.Push Metric    ${log_health_score}    sub_name=log_errors
     END
 
+Get Stacktrace Health Score for ${WORKLOAD_TYPE} `${WORKLOAD_NAME}`
+    [Documentation]    Checks for recent stacktraces/tracebacks related to the workload within a short time window, with filtering to reduce noise.
+    [Tags]    stacktraces    tracebacks    errors    recent    fast
+    IF    ${SKIP_HEALTH_CHECKS}
+        # For scaled-down deployments, return perfect score to indicate "intentionally down" vs "broken"
+        ${stacktrace_score}=    Set Variable    1.0
+        Set Suite Variable    ${stacktrace_details}     ${WORKLOAD_TYPE} `${WORKLOAD_NAME}` is intentionally scaled to 0 replicas - Score: ${stacktrace_score}
+    ELSE
+        # Fetch logs using RW.K8sLog library (same pattern as deployment healthcheck)
+        ${log_dir}=    RW.K8sLog.Fetch Workload Logs
+        ...    workload_type=${WORKLOAD_TYPE}
+        ...    workload_name=${WORKLOAD_NAME}
+        ...    namespace=${NAMESPACE}
+        ...    context=${CONTEXT}
+        ...    kubeconfig=${kubeconfig}
+        ...    log_age=${RW_LOOKBACK_WINDOW}
+        ...    max_log_lines=${MAX_LOG_LINES}
+        ...    max_log_bytes=${MAX_LOG_BYTES}
+        ...    excluded_containers=${EXCLUDED_CONTAINERS}
+        
+        # Extract stacktraces from the log directory
+        ${recentmost_stacktrace}=    RW.LogAnalysis.ExtractTraceback.Extract Tracebacks
+        ...    logs_dir=${log_dir}
+        ...    fast_exit=${True}
+
+        ${stacktrace_length}=    Get Length    ${recentmost_stacktrace}
+        
+        IF    ${stacktrace_length} != 0
+            # Stacktrace found - set score to 0
+            ${stacktrace_score}=    Set Variable    0
+            ${delimiter}=    Evaluate    '-' * 150
+            Set Suite Variable    ${stacktrace_details}    **Stacktrace(s) identified**:\n${delimiter}\n${recentmost_stacktrace}\n${delimiter}
+        ELSE
+            # No stacktraces found - set score to 1
+            ${stacktrace_score}=    Set Variable    1.0
+            Set Suite Variable    ${stacktrace_details}    **No Stacktraces identified.**\n\nLog analysis completed successfully.
+        END
+        
+        # Clean up temporary log files
+        RW.K8sLog.Cleanup Temp Files
+    END 
+
+    Set Suite Variable    ${stacktrace_score}
+    RW.Core.Push Metric     ${stacktrace_score}   sub_name=stacktrace_score
+
 Generate Application Health Score for `${WORKLOAD_TYPE}` `${WORKLOAD_NAME}`
     [Documentation]    Generates the final applog health score and report details
     [Tags]    score    health    applog
@@ -278,15 +358,14 @@ Generate Application Health Score for `${WORKLOAD_TYPE}` `${WORKLOAD_NAME}`
         # We distinguish scaled-down vs broken deployments through the log message and report details
         ${health_score}=    Set Variable    1.0
         Log    ${WORKLOAD_TYPE} ${WORKLOAD_NAME} is intentionally scaled to 0 replicas (${SCALED_DOWN_INFO}) - Score: ${health_score}
-        RW.Core.Add to Report    Applog Health Score: ${health_score} - ${WORKLOAD_TYPE} ${WORKLOAD_NAME} intentionally scaled to 0 replicas
     ELSE
         # Use the log health score as the final health score.
-        ${health_score}=    Set Variable    ${log_health_score}
+        ${health_score}=    Set Variable    min(${log_health_score}, ${stacktrace_score})
         
         IF    ${health_score} == 1.0
-            RW.Core.Add to Report    Applog Health Score: ${health_score} - No applog issues detected in workload logs
+            RW.Core.Add to Report    Applog Health Score: ${health_score} - No applog issues or stacktraces detected in workload logs
         ELSE
-            RW.Core.Add to Report    Applog Health Score: ${health_score} - Applog issue(s) detected in workload logs: ${log_health_details}
+            RW.Core.Add to Report    Applog Health Score: ${health_score} - Applog issue(s) or stacktrace(s) detected in workload logs: ${log_health_details}
         END
     END
     RW.Core.Push Metric    ${health_score}
\ No newline at end of file
diff --git a/codebundles/k8s-deployment-healthcheck/sli.robot b/codebundles/k8s-deployment-healthcheck/sli.robot
index 7571c55e0..ef4f08444 100755
--- a/codebundles/k8s-deployment-healthcheck/sli.robot
+++ b/codebundles/k8s-deployment-healthcheck/sli.robot
@@ -129,7 +129,6 @@ Suite Initialization
     
     # Initialize score variables
     Set Suite Variable    ${container_restart_score}    0
-    Set Suite Variable    ${log_health_score}    0
     Set Suite Variable    ${pods_notready_score}    0
     Set Suite Variable    ${replica_score}    0
     Set Suite Variable    ${events_score}    0
@@ -246,67 +245,6 @@ Get Container Restarts and Score for Deployment `${DEPLOYMENT_NAME}`
         RW.Core.Push Metric    ${container_restart_score}    sub_name=container_restarts
     END
 
-Get Critical Log Errors and Score for Deployment `${DEPLOYMENT_NAME}`
-    [Documentation]    Fetches logs and checks for critical error patterns that indicate application failures.
-    [Tags]    logs    errors    critical    patterns
-    
-    # Skip if deployment is scaled down  
-    IF    ${SKIP_HEALTH_CHECKS}
-        Log    Skipping log analysis - deployment is scaled to 0 replicas
-        ${log_health_score}=    Set Variable    1  # Perfect score for scaled deployment
-        Set Suite Variable    ${log_health_score}
-        RW.Core.Push Metric    ${log_health_score}    sub_name=log_errors
-    ELSE
-        ${log_dir}=    RW.K8sLog.Fetch Workload Logs
-        ...    workload_type=deployment
-        ...    workload_name=${DEPLOYMENT_NAME}
-        ...    namespace=${NAMESPACE}
-        ...    context=${CONTEXT}
-        ...    kubeconfig=${kubeconfig}
-        ...    log_age=${RW_LOOKBACK_WINDOW}
-        ...    max_log_lines=${MAX_LOG_LINES}
-        ...    max_log_bytes=${MAX_LOG_BYTES}
-        ...    excluded_containers=${EXCLUDED_CONTAINERS}
-        
-        # Use only critical error patterns for fast SLI checks
-        @{critical_categories}=    Create List    GenericError    AppFailure
-        
-        ${scan_results}=    RW.K8sLog.Scan Logs For Issues
-        ...    log_dir=${log_dir}
-        ...    workload_type=deployment
-        ...    workload_name=${DEPLOYMENT_NAME}
-        ...    namespace=${NAMESPACE}
-        ...    categories=${critical_categories}
-        ...    custom_patterns_file=sli_critical_patterns.json
-        ...    excluded_containers=${EXCLUDED_CONTAINERS}
-        
-        # Post-process results to filter out patterns matching LOGS_EXCLUDE_PATTERN
-        TRY
-            IF    $LOGS_EXCLUDE_PATTERN != ""
-                ${filtered_issues}=    Evaluate    [issue for issue in $scan_results.get('issues', []) if not __import__('re').search('${LOGS_EXCLUDE_PATTERN}', issue.get('details', ''), __import__('re').IGNORECASE)]    modules=re
-                ${filtered_results}=    Evaluate    {**$scan_results, 'issues': $filtered_issues}
-                Set Test Variable    ${scan_results}    ${filtered_results}
-            END
-        EXCEPT
-            Log    Warning: Failed to apply LOGS_EXCLUDE_PATTERN filter, using unfiltered results
-        END
-        
-        ${log_health_score}=    RW.K8sLog.Calculate Log Health Score    scan_results=${scan_results}
-        
-        # Store details for final score calculation logging
-        TRY
-            ${issues}=    Evaluate    $scan_results.get('issues', [])
-            ${issue_count}=    Get Length    ${issues}
-            Set Suite Variable    ${log_health_details}    ${issue_count} issues found
-        EXCEPT
-            Set Suite Variable    ${log_health_details}    analysis completed
-        END
-        
-        Set Suite Variable    ${log_health_score}
-        RW.K8sLog.Cleanup Temp Files
-        RW.Core.Push Metric    ${log_health_score}    sub_name=log_errors
-    END
-
 Get NotReady Pods Score for Deployment `${DEPLOYMENT_NAME}`
     [Documentation]    Fetches a count of unready pods for the specific deployment.
     [Tags]    access:read-only    Pods    Status    Phase    Ready    Unready    Running
@@ -411,13 +349,12 @@ Generate Deployment Health Score for `${DEPLOYMENT_NAME}`
         Log    Deployment ${DEPLOYMENT_NAME} is intentionally scaled to 0 replicas (${SCALED_DOWN_INFO}) - Score: ${health_score}
     ELSE
         # Calculate the normal health score
-        ${active_checks}=    Set Variable    5
-        ${deployment_health_score}=    Evaluate    (${container_restart_score} + ${log_health_score} + ${pods_notready_score} + ${replica_score} + ${events_score}) / ${active_checks}
+        ${active_checks}=    Set Variable    4
+        ${deployment_health_score}=    Evaluate    (${container_restart_score} + ${pods_notready_score} + ${replica_score} + ${events_score}) / ${active_checks}
         ${health_score}=    Convert to Number    ${deployment_health_score}    2
         
         # Create a single line showing unhealthy components
         IF    ${container_restart_score} < 1    Append To List    ${unhealthy_components}    Container Restarts (${container_restart_details})
-        IF    ${log_health_score} < 0.8    Append To List    ${unhealthy_components}    Log Health (${log_health_details})
         IF    ${pods_notready_score} < 1    Append To List    ${unhealthy_components}    Pod Readiness (${pod_readiness_details})
         IF    ${replica_score} < 1    Append To List    ${unhealthy_components}    Replica Status (${replica_details})
         IF    ${events_score} < 1    Append To List    ${unhealthy_components}    Warning Events (${events_details})