diff --git a/.gitignore b/.gitignore index 9b81cc2a6..3fa287b2d 100644 --- a/.gitignore +++ b/.gitignore @@ -56,3 +56,7 @@ build **gcp_credentials_json **bashfile-** **db.sqlite3** + + +# Ignore cursor files +cursor-docs/ \ No newline at end of file diff --git a/codebundles/azure-appservice-webapp-health/README.md b/codebundles/azure-appservice-webapp-health/README.md index daa6352a4..31a6f8326 100644 --- a/codebundles/azure-appservice-webapp-health/README.md +++ b/codebundles/azure-appservice-webapp-health/README.md @@ -1,18 +1,93 @@ # Azure App Service Triage + Checks key App Service metrics and the service plan, fetches logs, config and activities for the service and generates a report of present issues for any found. ## Configuration The TaskSet requires initialization to import necessary secrets, services, and user variables. The following variables should be set: -export APPSERVICE +export APP_SERVICE_NAME export AZ_RESOURCE_GROUP +## Log Collection Configuration (Enhanced & Optimized) + +The log collection has been enhanced with multiple log sources while maintaining size optimization. You can control both verbosity and which log sources to include: + +### Log Levels + +- `ERROR`: Only errors and critical issues (minimal output) +- `WARN`: Warnings and errors +- `INFO`: Informational messages (default, filters for errors/warnings) +- `DEBUG`: Detailed debugging information +- `VERBOSE`: All logs including system events (use with caution) + +### Enhanced Features + +- **Docker Container Logs**: Container startup, runtime, and error diagnostics +- **Deployment History**: Recent deployment success/failure status and build logs +- **Performance Traces**: Slow requests and failed API calls (DEBUG+ only) + +### Configuration Variables + +#### Core Settings + +- `LOG_LEVEL`: Set log verbosity (default: INFO) +- `MAX_LOG_LINES`: Maximum lines per log file (default: 100) +- `MAX_TOTAL_SIZE`: Maximum total output size in bytes (default: 500000) + +#### Enhanced Features (New) + +- `INCLUDE_DOCKER_LOGS`: Include Docker container logs (default: true) +- `INCLUDE_DEPLOYMENT_LOGS`: Include deployment history (default: true) +- `INCLUDE_PERFORMANCE_TRACES`: Include performance traces (default: false) + +### Configuration Examples + +```bash +# Production troubleshooting (minimal output) +export LOG_LEVEL=ERROR +export INCLUDE_DOCKER_LOGS=false +export INCLUDE_DEPLOYMENT_LOGS=false + +# Standard configuration (recommended default) +export LOG_LEVEL=INFO +export INCLUDE_DOCKER_LOGS=true +export INCLUDE_DEPLOYMENT_LOGS=true + +# Docker container troubleshooting +export LOG_LEVEL=INFO +export INCLUDE_DOCKER_LOGS=true +export INCLUDE_DEPLOYMENT_LOGS=false + +# Deployment troubleshooting +export LOG_LEVEL=INFO +export INCLUDE_DOCKER_LOGS=false +export INCLUDE_DEPLOYMENT_LOGS=true + +# Full diagnostic mode (advanced) +export LOG_LEVEL=DEBUG +export INCLUDE_DOCKER_LOGS=true +export INCLUDE_DEPLOYMENT_LOGS=true +export INCLUDE_PERFORMANCE_TRACES=true +``` + +## Size Optimization + +The logs task now automatically: + +- Filters out verbose HTTP access logs +- Focuses on application-level logs and errors +- Limits output to 500KB by default +- Provides truncation warnings when limits are reached +- Directs users to Azure Portal for complete logs when needed + +This prevents report.jsonl files from exceeding UI rendering limits while maintaining diagnostic capability. + ## Notes This codebundle assumes the service principal authentication flow. ## TODO + - [ ] look for notable activities in list - [ ] config best practices check -- [ ] Add documentation \ No newline at end of file diff --git a/codebundles/azure-appservice-webapp-health/appservice_logs.sh b/codebundles/azure-appservice-webapp-health/appservice_logs.sh index 8dea435c2..6307b35d2 100755 --- a/codebundles/azure-appservice-webapp-health/appservice_logs.sh +++ b/codebundles/azure-appservice-webapp-health/appservice_logs.sh @@ -1,118 +1,126 @@ #!/bin/bash # ENV: +# AZ_USERNAME +# AZ_SECRET_VALUE +# AZ_SUBSCRIPTION +# AZ_TENANT # APP_SERVICE_NAME # AZ_RESOURCE_GROUP -# AZURE_RESOURCE_SUBSCRIPTION_ID (Optional, defaults to current subscription) +# LOG_LEVEL (Optional, default is INFO) +# MAX_LOG_LINES (Optional, default is 100) -# Configuration for log display limits -MAX_LOG_LINES="${MAX_LOG_LINES:-50}" # Maximum lines to display per file -MAX_LOG_SIZE_MB="${MAX_LOG_SIZE_MB:-2}" # Maximum log file size to process (MB) -LOG_DISPLAY_RECENT_HOURS="${LOG_DISPLAY_RECENT_HOURS:-1}" # Show logs from last 1 hour +# Set defaults +LOG_LEVEL="${LOG_LEVEL:-INFO}" +MAX_LOG_LINES="${MAX_LOG_LINES:-100}" +MAX_TOTAL_SIZE="${MAX_TOTAL_SIZE:-500000}" # 500KB limit -LOG_PATH="app_service_logs.zip" +LOG_PATH="_rw_logs_$APP_SERVICE_NAME.zip" +subscription_id=$(az account show --query "id" -o tsv) -echo "App Service Log Display Configuration:" -echo "- Maximum lines to display: ${MAX_LOG_LINES}" -echo "- Maximum log file size: ${MAX_LOG_SIZE_MB} MB" -echo "- Show logs from last: ${LOG_DISPLAY_RECENT_HOURS} hours" +# Set the subscription +az account set --subscription $subscription_id -# Get or set subscription ID -if [[ -z "${AZURE_RESOURCE_SUBSCRIPTION_ID:-}" ]]; then - subscription_id=$(timeout 10s az account show --query "id" -o tsv) - if [[ -z "$subscription_id" ]]; then - echo "Failed to get current subscription ID within 10 seconds." - exit 1 - fi - echo "AZURE_RESOURCE_SUBSCRIPTION_ID is not set. Using current subscription ID: $subscription_id" -else - subscription_id="$AZURE_RESOURCE_SUBSCRIPTION_ID" - echo "Using specified subscription ID: $subscription_id" -fi +# Download and extract logs +az webapp log download --name $APP_SERVICE_NAME --resource-group $AZ_RESOURCE_GROUP --subscription $subscription_id --log-file $LOG_PATH -# Set the subscription to the determined ID -echo "Switching to subscription ID: $subscription_id" -if ! timeout 10s az account set --subscription "$subscription_id"; then - echo "Failed to set subscription within 10 seconds." - exit 1 -fi +TEMP_DIR="/tmp/_temp_logs_$$" +mkdir -p "$TEMP_DIR" +unzip -o $LOG_PATH -d "$TEMP_DIR" >/dev/null 2>&1 +# Fix permissions on extracted files +chmod -R 755 "$TEMP_DIR" 2>/dev/null || true -echo "Downloading recent logs for App Service '$APP_SERVICE_NAME'..." +output_size=0 +max_exceeded=false -# Download logs with timeout to prevent hanging -if ! timeout 60s az webapp log download --name "$APP_SERVICE_NAME" --resource-group "$AZ_RESOURCE_GROUP" --subscription "$subscription_id" --log-file "$LOG_PATH" 2>/dev/null; then - echo "Error: Failed to download logs for App Service '$APP_SERVICE_NAME' within 60 seconds." - echo "This may be due to:" - echo "- App Service not found" - echo "- Insufficient permissions" - echo "- Logging not enabled" - echo "- Download timeout (large log files)" - exit 1 -fi +echo "Azure App Service $APP_SERVICE_NAME logs (Level: $LOG_LEVEL, Max Lines: $MAX_LOG_LINES):" +echo "" -# Check downloaded file size -if [[ -f "$LOG_PATH" ]]; then - file_size_mb=$(( $(stat -f%z "$LOG_PATH" 2>/dev/null || stat -c%s "$LOG_PATH" 2>/dev/null || echo 0) / 1024 / 1024 )) - echo "Downloaded log file size: ${file_size_mb} MB" +# Function to add content with size check +add_content() { + local content="$1" + local content_size=${#content} - if [[ $file_size_mb -gt $MAX_LOG_SIZE_MB ]]; then - echo "Warning: Log file is ${file_size_mb} MB (exceeds ${MAX_LOG_SIZE_MB} MB limit)." - echo "Only showing recent entries to prevent overwhelming the report." + if (( output_size + content_size > MAX_TOTAL_SIZE )); then + if [ "$max_exceeded" = false ]; then + echo "" + echo "⚠️ Output truncated - size limit reached (${MAX_TOTAL_SIZE} bytes)" + echo "💡 To see more logs, reduce LOG_LEVEL to ERROR or WARN, or download logs directly from Azure Portal" + max_exceeded=true + fi + return 1 fi + + echo "$content" + output_size=$((output_size + content_size)) + return 0 +} + +# Define log level priorities for filtering (compatible with older bash) +case "$LOG_LEVEL" in + "ERROR") CURRENT_PRIORITY=1 ;; + "WARN") CURRENT_PRIORITY=2 ;; + "INFO") CURRENT_PRIORITY=3 ;; + "DEBUG") CURRENT_PRIORITY=4 ;; + "VERBOSE") CURRENT_PRIORITY=5 ;; + *) CURRENT_PRIORITY=3 ;; # Default to INFO +esac + +# Display Application logs (errors, warnings, app output) +if [ -d "$TEMP_DIR/LogFiles/Application" ]; then + add_content "=== Application Logs ===" || exit 0 + + for log_file in "$TEMP_DIR/LogFiles/Application"/*; do + if [ -f "$log_file" ]; then + add_content "--- $(basename "$log_file") ---" || exit 0 + + # Filter by log level - only show errors/warnings for INFO and above + if [ "$CURRENT_PRIORITY" -le 3 ]; then + # For INFO level and higher, filter for important entries + filtered_content=$(grep -iE 'error|warn|exception|fail|critical' "$log_file" | tail -n "$MAX_LOG_LINES" 2>/dev/null || echo "No errors/warnings found in recent logs") + else + # For DEBUG/VERBOSE, show more content but still limited + filtered_content=$(tail -n "$MAX_LOG_LINES" "$log_file") + fi + + add_content "$filtered_content" || exit 0 + add_content "" || exit 0 + fi + done else - echo "Error: Log file was not created." - exit 1 + add_content "No Application logs directory found" || exit 0 fi -# Extract and filter logs -echo "Extracting and filtering logs (last ${LOG_DISPLAY_RECENT_HOURS} hours, max ${MAX_LOG_LINES} lines)..." - -# Create temporary directory for extraction in current working directory -temp_dir="./log_extraction_$$" -mkdir -p "$temp_dir" -if ! unzip -qq "$LOG_PATH" -d "$temp_dir" 2>/dev/null; then - echo "Error: Failed to extract log file." - rm -rf "$temp_dir" - rm -f "$LOG_PATH" - exit 1 +# Display Detailed Error logs (4xx/5xx errors) - always include if present +if [ -d "$TEMP_DIR/LogFiles/DetailedErrors" ]; then + add_content "=== Detailed Error Logs ===" || exit 0 + + for error_file in "$TEMP_DIR/LogFiles/DetailedErrors"/*; do + if [ -f "$error_file" ]; then + add_content "--- $(basename "$error_file") ---" || exit 0 + error_content=$(cat "$error_file") + add_content "$error_content" || exit 0 + add_content "" || exit 0 + fi + done fi -echo "Azure App Service $APP_SERVICE_NAME recent logs:" -echo "============================================================" - -# Find and process log files - simplified approach -log_files_found=false -total_lines_shown=0 - -# Simple log display - just show recent content from each log file -echo "Showing last ${MAX_LOG_LINES} lines from each log file:" - -# Process log files - simple tail of recent content -find "$temp_dir" -name "*.log" -type f -not -name "*.xml" -exec stat -c '%s %n' {} \; | sort -nr | head -3 | while read size filepath; do - if [[ $size -gt 0 ]]; then - log_files_found=true - echo "" - echo "=== $(basename "$filepath") (last ${MAX_LOG_LINES} lines) ===" - cat "$filepath" | tail -${MAX_LOG_LINES} +# Display System Event Log (summary only) - avoid verbose XML dumps +if [ -f "$TEMP_DIR/LogFiles/eventlog.xml" ] && [ "$CURRENT_PRIORITY" -ge 4 ]; then + add_content "=== System Events (Last 20 Events) ===" || exit 0 + if command -v xmllint &>/dev/null; then + event_summary=$(xmllint --xpath '//Event[position()<=20]/concat("Time=", System/TimeCreated/@SystemTime, " | Level=", System/Level/text(), " | Message=", substring(RenderingInfo/Message/text(), 1, 100), "\n")' "$TEMP_DIR/LogFiles/eventlog.xml" 2>/dev/null || echo "No recent system events") + add_content "$event_summary" || exit 0 + else + add_content "xmllint not available, skipping system events" || exit 0 fi -done - -if [[ "$log_files_found" == false ]]; then - echo "No log files with content found in the downloaded archive." - echo "" - echo "Available files:" - find "$temp_dir" -type f | head -10 | while read -r file; do - size=$(stat -c%s "$file" 2>/dev/null || echo 0) - echo " $(basename "$file") (${size} bytes)" - done fi -echo "" -echo "============================================================" -echo "Log display completed:" -echo "- Lines shown: ${total_lines_shown}" -echo "- File size processed: ${file_size_mb} MB" +# # Cleanup +# rm -rf "$TEMP_DIR" "$LOG_PATH" -# Clean up -rm -rf "$temp_dir" -rm -f "$LOG_PATH" \ No newline at end of file +echo "" +echo "📊 Output size: ${output_size} bytes (Limit: ${MAX_TOTAL_SIZE} bytes)" +if [ "$max_exceeded" = true ]; then + echo "🔍 For complete logs, visit: https://portal.azure.com and navigate to your App Service > Logs" +fi \ No newline at end of file diff --git a/codebundles/azure-appservice-webapp-health/appservice_logs_enhanced.sh b/codebundles/azure-appservice-webapp-health/appservice_logs_enhanced.sh new file mode 100755 index 000000000..24232bca7 --- /dev/null +++ b/codebundles/azure-appservice-webapp-health/appservice_logs_enhanced.sh @@ -0,0 +1,381 @@ +#!/bin/bash + +# Enhanced Azure App Service Logs Collection Script +# Implements all optimization phases while maintaining size limits +# +# ENV: +# AZ_USERNAME +# AZ_SECRET_VALUE +# AZ_SUBSCRIPTION +# AZ_TENANT +# APP_SERVICE_NAME +# AZ_RESOURCE_GROUP +# LOG_LEVEL (Optional, default is INFO) +# MAX_LOG_LINES (Optional, default is 100) +# INCLUDE_DOCKER_LOGS (Optional, default is true) +# INCLUDE_DEPLOYMENT_LOGS (Optional, default is true) +# INCLUDE_PERFORMANCE_TRACES (Optional, default is false) + +# Set defaults +LOG_LEVEL="${LOG_LEVEL:-INFO}" +MAX_LOG_LINES="${MAX_LOG_LINES:-100}" +MAX_TOTAL_SIZE="${MAX_TOTAL_SIZE:-500000}" # 500KB limit +INCLUDE_DOCKER_LOGS="${INCLUDE_DOCKER_LOGS:-true}" +INCLUDE_DEPLOYMENT_LOGS="${INCLUDE_DEPLOYMENT_LOGS:-true}" +INCLUDE_PERFORMANCE_TRACES="${INCLUDE_PERFORMANCE_TRACES:-false}" + +LOG_PATH="_rw_logs_$APP_SERVICE_NAME.zip" +subscription_id=$(az account show --query "id" -o tsv) + +# Set the subscription +az account set --subscription $subscription_id + +# Download and extract logs +az webapp log download --name $APP_SERVICE_NAME --resource-group $AZ_RESOURCE_GROUP --subscription $subscription_id --log-file $LOG_PATH + +TEMP_DIR="/tmp/_temp_logs_$$" +mkdir -p "$TEMP_DIR" +unzip -o $LOG_PATH -d "$TEMP_DIR" >/dev/null 2>&1 +# Fix permissions on extracted files +chmod -R 755 "$TEMP_DIR" 2>/dev/null || true + +output_size=0 +max_exceeded=false + +echo "Azure App Service $APP_SERVICE_NAME Enhanced Logs (Level: $LOG_LEVEL, Max Lines: $MAX_LOG_LINES):" +echo "Features: Docker[${INCLUDE_DOCKER_LOGS}] | Deployments[${INCLUDE_DEPLOYMENT_LOGS}] | Performance[${INCLUDE_PERFORMANCE_TRACES}]" +echo "" + +# Function to add content with size check +add_content() { + local content="$1" + local content_size=${#content} + + if (( output_size + content_size > MAX_TOTAL_SIZE )); then + if [ "$max_exceeded" = false ]; then + echo "" + echo "⚠️ Output truncated - size limit reached (${MAX_TOTAL_SIZE} bytes)" + echo "💡 To see more logs, reduce LOG_LEVEL to ERROR or WARN, or download logs directly from Azure Portal" + max_exceeded=true + fi + return 1 + fi + + echo "$content" + output_size=$((output_size + content_size)) + return 0 +} + +# Define log level priorities for filtering (compatible with older bash) +case "$LOG_LEVEL" in + "ERROR") CURRENT_PRIORITY=1 ;; + "WARN") CURRENT_PRIORITY=2 ;; + "INFO") CURRENT_PRIORITY=3 ;; + "DEBUG") CURRENT_PRIORITY=4 ;; + "VERBOSE") CURRENT_PRIORITY=5 ;; + *) CURRENT_PRIORITY=3 ;; # Default to INFO +esac + +# ============================================================================= +# PHASE 1: APPLICATION LOGS (CORE - ALWAYS INCLUDED) +# ============================================================================= +# Debug: Check what's in the temp directory +if [ "$CURRENT_PRIORITY" -ge 4 ]; then + add_content "=== Debug: Temp Directory Contents ===" || exit 0 + add_content "Temp dir: $TEMP_DIR" || exit 0 + add_content "Contents: $(ls -la "$TEMP_DIR" 2>/dev/null || echo 'No temp dir found')" || exit 0 + if [ -d "$TEMP_DIR/LogFiles" ]; then + add_content "LogFiles contents: $(ls -la "$TEMP_DIR/LogFiles" 2>/dev/null || echo 'No LogFiles dir')" || exit 0 + fi + add_content "" || exit 0 +fi + +if [ -d "$TEMP_DIR/LogFiles/Application" ]; then + add_content "=== Application Logs ===" || exit 0 + + for log_file in "$TEMP_DIR/LogFiles/Application"/*; do + if [ -f "$log_file" ]; then + add_content "--- $(basename "$log_file") ---" || exit 0 + + # Filter by log level - only show errors/warnings for INFO and above + if [ "$CURRENT_PRIORITY" -le 3 ]; then + # For INFO level and higher, filter for important entries + filtered_content=$(grep -iE 'error|warn|exception|fail|critical' "$log_file" | tail -n "$MAX_LOG_LINES" 2>/dev/null || echo "No errors/warnings found in recent logs") + else + # For DEBUG/VERBOSE, show more content but still limited + filtered_content=$(tail -n "$MAX_LOG_LINES" "$log_file") + fi + + add_content "$filtered_content" || exit 0 + add_content "" || exit 0 + fi + done +else + add_content "No Application logs directory found" || exit 0 +fi + +# ============================================================================= +# DOCKER CONTAINER LOGS +# ============================================================================= +if [ "$INCLUDE_DOCKER_LOGS" = "true" ] && [ -d "$TEMP_DIR/LogFiles" ]; then + docker_logs_found=false + + for docker_log in "$TEMP_DIR/LogFiles"/*_default_docker.log "$TEMP_DIR/LogFiles"/*_docker.log; do + if [ -f "$docker_log" ]; then + if [ "$docker_logs_found" = false ]; then + add_content "=== Docker Container Logs ===" || exit 0 + docker_logs_found=true + fi + + add_content "--- $(basename "$docker_log") ---" || exit 0 + + # Smart filtering based on log level - FOCUS ON ACTUAL PROBLEMS + if [ "$CURRENT_PRIORITY" -le 2 ]; then + # ERROR/WARN: Only critical failures (deduplicated) + filtered_content=$(grep -iE 'not found|error|fail|fatal|exception|exit.*[1-9]|denied|unable|cannot' "$docker_log" | sort -u | head -8 2>/dev/null || echo "No critical issues found") + elif [ "$CURRENT_PRIORITY" -eq 3 ]; then + # INFO: Show unique errors + key startup info + temp_content=$( + # Show unique critical errors + grep -iE 'not found|error|fail|fatal|exception|exit.*[1-9]|denied|unable|cannot' "$docker_log" | sort -u | head -5 + # Show application startup attempts (unique) + grep -iE 'npm start|next start|blog.*start' "$docker_log" | sort -u | head -3 + # Show ONE port configuration line, not all 40 + grep -E 'export PORT=' "$docker_log" | head -1 + ) + # Add restart/failure summary separately + restart_count=$(grep -c 'export PORT=' "$docker_log" 2>/dev/null || echo "0") + error_count=$(grep -c 'not found|error|fail|fatal|exception|exit.*[1-9]|denied|unable|cannot' "$docker_log" 2>/dev/null || echo "0") + # Ensure we have valid integers + restart_count=${restart_count//[^0-9]/} + error_count=${error_count//[^0-9]/} + restart_count=${restart_count:-0} + error_count=${error_count:-0} + if [ "$restart_count" -gt 1 ]; then + temp_content="$temp_content"$'\n'"INFO: Container restarted $restart_count times with $error_count error logs" + fi + filtered_content=$(echo "$temp_content" | grep -v '^$' | head -10 2>/dev/null || echo "No significant events found") + else + # DEBUG/VERBOSE: More detailed but still avoid excessive repetition + temp_content=$( + # Unique errors and failures + grep -iE 'not found|error|fail|fatal|exception|exit.*[1-9]|denied|unable|cannot' "$docker_log" | sort -u | head -8 + # Key startup events (unique) + grep -iE 'app.*service.*on.*linux|npm.*start|build.*operation|manifest' "$docker_log" | sort -u | head -5 + # Port and environment info (limited) + grep -E 'export PORT=|NODE_PATH=' "$docker_log" | head -2 + ) + # Container restart summary separately + restart_count=$(grep -c 'A P P S E R V I C E O N L I N U X' "$docker_log" 2>/dev/null || echo "0") + # Ensure we have a valid integer + restart_count=${restart_count//[^0-9]/} + restart_count=${restart_count:-0} + if [ "$restart_count" -gt 1 ]; then + temp_content="$temp_content"$'\n'"DEBUG: Container restarted $restart_count times during log period" + fi + filtered_content=$(echo "$temp_content" | grep -v '^$' | head -15 2>/dev/null || tail -n 15 "$docker_log") + fi + + add_content "$filtered_content" || exit 0 + add_content "" || exit 0 + fi + done + + if [ "$docker_logs_found" = false ] && [ "$CURRENT_PRIORITY" -ge 4 ]; then + add_content "=== Docker Container Logs ===" || exit 0 + add_content "No Docker container logs found" || exit 0 + add_content "" || exit 0 + fi +fi + +# ============================================================================= +# RECENT DEPLOYMENT HISTORY +# ============================================================================= +if [ "$INCLUDE_DEPLOYMENT_LOGS" = "true" ] && [ -d "$TEMP_DIR/deployments" ] && [ "$CURRENT_PRIORITY" -ge 3 ]; then + add_content "=== Recent Deployments (Last 3) ===" || exit 0 + + deployment_count=0 + # Sort by modification time, newest first + for deployment_dir in $(find "$TEMP_DIR/deployments" -name "log.log" -exec ls -t {} \; 2>/dev/null | head -3); do + if [ -f "$deployment_dir" ]; then + deployment_id=$(basename "$(dirname "$deployment_dir")") + add_content "--- Deployment: ${deployment_id:0:8}... ---" || exit 0 + + # Show key deployment events and outcomes + if [ "$CURRENT_PRIORITY" -le 3 ]; then + # INFO: Focus on outcomes and errors + deployment_content=$(grep -iE 'successful|failed|error|warning|deployment.*complete|build.*complete|exception' "$deployment_dir" | head -8 2>/dev/null || echo "No deployment status found") + else + # DEBUG/VERBOSE: More detailed deployment steps + deployment_content=$(grep -iE 'successful|failed|error|warning|deployment|build|predeployment|package|npm|dotnet|restore' "$deployment_dir" | head -12 2>/dev/null || head -8 "$deployment_dir") + fi + + add_content "$deployment_content" || exit 0 + add_content "" || exit 0 + + deployment_count=$((deployment_count + 1)) + [ $deployment_count -ge 3 ] && break + fi + done + + if [ $deployment_count -eq 0 ]; then + add_content "No recent deployment logs found" || exit 0 + add_content "" || exit 0 + fi +fi + +# ============================================================================= +# DETAILED ERROR LOGS +# ============================================================================= +if [ -d "$TEMP_DIR/LogFiles/DetailedErrors" ]; then + add_content "=== Detailed Error Logs ===" || exit 0 + + error_count=0 + for error_file in "$TEMP_DIR/LogFiles/DetailedErrors"/*; do + if [ -f "$error_file" ] && [ $error_count -lt 5 ]; then + add_content "--- $(basename "$error_file") ---" || exit 0 + + # Always show detailed errors, but limit size + if [ "$CURRENT_PRIORITY" -le 3 ]; then + # Truncate very large error files for INFO and above + error_content=$(head -c 2000 "$error_file") + if [ $(wc -c < "$error_file") -gt 2000 ]; then + error_content="$error_content... [truncated - full error in Azure Portal]" + fi + else + # Show more for DEBUG/VERBOSE + error_content=$(head -c 4000 "$error_file") + if [ $(wc -c < "$error_file") -gt 4000 ]; then + error_content="$error_content... [truncated]" + fi + fi + + add_content "$error_content" || exit 0 + add_content "" || exit 0 + + error_count=$((error_count + 1)) + fi + done +fi + +# ============================================================================= +# PERFORMANCE & API TRACES +# ============================================================================= +if [ "$INCLUDE_PERFORMANCE_TRACES" = "true" ] && [ -d "$TEMP_DIR/LogFiles/kudu/trace" ] && [ "$CURRENT_PRIORITY" -ge 3 ]; then + add_content "=== Performance Issues ===" || exit 0 + + # Find slow requests (>5s) and failed requests + performance_issues_found=false + + # Look for slow requests (files with timing indicators) + # For INFO level: show requests >30s, for DEBUG/VERBOSE: show requests >10s + if [ "$CURRENT_PRIORITY" -le 3 ]; then + # INFO level: show slower requests (30s+) + trace_files=$(find "$TEMP_DIR/LogFiles/kudu/trace" -name "*_[3-9][0-9]s.xml" -o -name "*_[0-9][0-9][0-9]s.xml" 2>/dev/null | head -5) + else + # DEBUG/VERBOSE: show more requests (10s+) + trace_files=$(find "$TEMP_DIR/LogFiles/kudu/trace" -name "*_[1-9][0-9]s.xml" -o -name "*_[0-9][0-9][0-9]s.xml" 2>/dev/null | head -5) + fi + + for trace_file in $trace_files; do + if [ -f "$trace_file" ]; then + performance_issues_found=true + filename=$(basename "$trace_file") + # Extract timing info from filename (simple and reliable) + timing=$(echo "$filename" | grep -o '_[0-9]*s\.xml' | tr -d '_s.xml' || echo "unknown") + + # Add severity indicator based on timing + if [ "$timing" != "unknown" ] && [ "$timing" -gt 120 ]; then + severity="🔴 CRITICAL" + elif [ "$timing" != "unknown" ] && [ "$timing" -gt 60 ]; then + severity="🟠 HIGH" + else + severity="🟡 MEDIUM" + fi + + # Simple, reliable format that works for all filename patterns + add_content "⚠️ $severity Slow Request (${timing}s): $(echo "$filename" | cut -c1-90)" || exit 0 + fi + done + + # Look for failed requests (HTTP error codes) + for trace_file in $(find "$TEMP_DIR/LogFiles/kudu/trace" -name "*_500_*.xml" -o -name "*_404_*.xml" -o -name "*_pending.xml" 2>/dev/null | head -5); do + if [ -f "$trace_file" ]; then + performance_issues_found=true + filename=$(basename "$trace_file") + + # Extract status code from filename (simple and reliable) + status=$(echo "$filename" | grep -o '_[45][0-9][0-9]_' | tr -d '_' || echo "unknown") + + # Add error severity indicator + if [ "$status" = "500" ]; then + error_type="🔴 SERVER ERROR" + elif [ "$status" = "404" ]; then + error_type="🟡 NOT FOUND" + elif [[ "$filename" =~ pending ]]; then + error_type="⏳ PENDING" + else + error_type="🟠 CLIENT ERROR" + fi + + # Simple, reliable format that works for all filename patterns + if [[ "$filename" =~ pending ]]; then + add_content "⏳ Pending Request: $(echo "$filename" | cut -c1-90)" || exit 0 + else + add_content "❌ $error_type (HTTP $status): $(echo "$filename" | cut -c1-90)" || exit 0 + fi + fi + done + + if [ "$performance_issues_found" = false ]; then + add_content "No significant performance issues detected" || exit 0 + fi + + add_content "" || exit 0 +fi + +# ============================================================================= +# SYSTEM EVENT LOG (SUMMARY ONLY - AVOID VERBOSE XML) +# ============================================================================= +if [ -f "$TEMP_DIR/LogFiles/eventlog.xml" ] && [ "$CURRENT_PRIORITY" -ge 4 ]; then + add_content "=== System Events (Last 10 Events) ===" || exit 0 + if command -v xmllint &>/dev/null; then + event_summary=$(xmllint --xpath '//Event[position()<=10]/concat("Time=", System/TimeCreated/@SystemTime, " | Level=", System/Level/text(), " | Message=", substring(RenderingInfo/Message/text(), 1, 80), "\n")' "$TEMP_DIR/LogFiles/eventlog.xml" 2>/dev/null || echo "No recent system events") + add_content "$event_summary" || exit 0 + else + # Fallback: simple grep for basic event info + event_summary=$(grep -o ' Logs" + +# Cleanup +rm -rf "$TEMP_DIR" "$LOG_PATH" 2>/dev/null || true \ No newline at end of file diff --git a/codebundles/azure-appservice-webapp-health/runbook.robot b/codebundles/azure-appservice-webapp-health/runbook.robot index 4017c08c0..6e9244b35 100644 --- a/codebundles/azure-appservice-webapp-health/runbook.robot +++ b/codebundles/azure-appservice-webapp-health/runbook.robot @@ -159,8 +159,8 @@ Fetch App Service `${APP_SERVICE_NAME}` Utilization Metrics In Resource Group `$ RW.Core.Add Pre To Report 🔗 View Metrics in Azure Portal: ${metrics_url} Get App Service `${APP_SERVICE_NAME}` Logs In Resource Group `${AZ_RESOURCE_GROUP}` - [Documentation] Download and display recent raw log files from App Service (last 50 lines from each log file) - [Tags] appservice logs display raw access:read-only + [Documentation] Fetch enhanced logs of appservice workload (application-level, Docker, deployment, performance traces - optimized for report size) + [Tags] appservice logs enhanced filtered access:read-only ${logs}= RW.CLI.Run Bash File ... bash_file=appservice_logs.sh ... env=${env} @@ -402,6 +402,31 @@ Suite Initialization ... description=The threshold of average response time (ms) in which to generate an issue. Higher than this value indicates slow response time. ... pattern=\w* ... default=300 + ${LOG_LEVEL}= RW.Core.Import User Variable LOG_LEVEL + ... type=string + ... description=Log verbosity level: ERROR, WARN, INFO, DEBUG, VERBOSE + ... pattern=\w* + ... default=INFO + ${MAX_LOG_LINES}= RW.Core.Import User Variable MAX_LOG_LINES + ... type=string + ... description=Maximum lines per log file to display + ... pattern=\w* + ... default=100 + ${INCLUDE_DOCKER_LOGS}= RW.Core.Import User Variable INCLUDE_DOCKER_LOGS + ... type=string + ... description=Include Docker container logs in output (true/false) + ... pattern=\w* + ... default=true + ${INCLUDE_DEPLOYMENT_LOGS}= RW.Core.Import User Variable INCLUDE_DEPLOYMENT_LOGS + ... type=string + ... description=Include deployment history logs in output (true/false) + ... pattern=\w* + ... default=true + ${INCLUDE_PERFORMANCE_TRACES}= RW.Core.Import User Variable INCLUDE_PERFORMANCE_TRACES + ... type=string + ... description=Include performance traces in output (true/false) + ... pattern=\w* + ... default=false Set Suite Variable ${APP_SERVICE_NAME} ${APP_SERVICE_NAME} Set Suite Variable ${AZ_RESOURCE_GROUP} ${AZ_RESOURCE_GROUP} Set Suite Variable ${AZURE_RESOURCE_SUBSCRIPTION_ID} ${AZURE_RESOURCE_SUBSCRIPTION_ID} @@ -414,8 +439,12 @@ Suite Initialization Set Suite Variable ${HTTP4XX_THRESHOLD} ${HTTP4XX_THRESHOLD} Set Suite Variable ${DISK_USAGE_THRESHOLD} ${DISK_USAGE_THRESHOLD} Set Suite Variable ${AVG_RSP_TIME} ${AVG_RSP_TIME} + Set Suite Variable ${LOG_LEVEL} ${LOG_LEVEL} + Set Suite Variable ${MAX_LOG_LINES} ${MAX_LOG_LINES} + Set Suite Variable ${INCLUDE_DOCKER_LOGS} ${INCLUDE_DOCKER_LOGS} + Set Suite Variable ${INCLUDE_DEPLOYMENT_LOGS} ${INCLUDE_DEPLOYMENT_LOGS} + Set Suite Variable ${INCLUDE_PERFORMANCE_TRACES} ${INCLUDE_PERFORMANCE_TRACES} Set Suite Variable ... ${env} - ... {"APP_SERVICE_NAME":"${APP_SERVICE_NAME}", "AZ_RESOURCE_GROUP":"${AZ_RESOURCE_GROUP}", "AZURE_RESOURCE_SUBSCRIPTION_ID":"${AZURE_RESOURCE_SUBSCRIPTION_ID}", "TIME_PERIOD_MINUTES":"${TIME_PERIOD_MINUTES}","CPU_THRESHOLD":"${CPU_THRESHOLD}", "REQUESTS_THRESHOLD":"${REQUESTS_THRESHOLD}", "BYTES_RECEIVED_THRESHOLD":"${BYTES_RECEIVED_THRESHOLD}", "HTTP5XX_THRESHOLD":"${HTTP5XX_THRESHOLD}","HTTP2XX_THRESHOLD":"${HTTP2XX_THRESHOLD}", "HTTP4XX_THRESHOLD":"${HTTP4XX_THRESHOLD}", "DISK_USAGE_THRESHOLD":"${DISK_USAGE_THRESHOLD}", "AVG_RSP_TIME":"${AVG_RSP_TIME}"} - + ... {"APP_SERVICE_NAME":"${APP_SERVICE_NAME}", "AZ_RESOURCE_GROUP":"${AZ_RESOURCE_GROUP}", "AZURE_RESOURCE_SUBSCRIPTION_ID":"${AZURE_RESOURCE_SUBSCRIPTION_ID}", "TIME_PERIOD_MINUTES":"${TIME_PERIOD_MINUTES}","CPU_THRESHOLD":"${CPU_THRESHOLD}", "REQUESTS_THRESHOLD":"${REQUESTS_THRESHOLD}", "BYTES_RECEIVED_THRESHOLD":"${BYTES_RECEIVED_THRESHOLD}", "HTTP5XX_THRESHOLD":"${HTTP5XX_THRESHOLD}","HTTP2XX_THRESHOLD":"${HTTP2XX_THRESHOLD}", "HTTP4XX_THRESHOLD":"${HTTP4XX_THRESHOLD}", "DISK_USAGE_THRESHOLD":"${DISK_USAGE_THRESHOLD}", "AVG_RSP_TIME":"${AVG_RSP_TIME}", "LOG_LEVEL":"${LOG_LEVEL}", "MAX_LOG_LINES":"${MAX_LOG_LINES}", "INCLUDE_DOCKER_LOGS":"${INCLUDE_DOCKER_LOGS}", "INCLUDE_DEPLOYMENT_LOGS":"${INCLUDE_DEPLOYMENT_LOGS}", "INCLUDE_PERFORMANCE_TRACES":"${INCLUDE_PERFORMANCE_TRACES}"}