Skip to content

Commit f0b128c

Browse files
authored
Merge branch 'stable' into monthlyupdate
2 parents 5fc1f32 + 32a2fa7 commit f0b128c

1 file changed

Lines changed: 52 additions & 5 deletions

File tree

src/mas/devops/tekton.py

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
from kubeconfig import kubectl
2222
from openshift.dynamic import DynamicClient
23-
from openshift.dynamic.exceptions import NotFoundError, UnprocessibleEntityError
23+
from openshift.dynamic.exceptions import NotFoundError, UnprocessibleEntityError, ApiException
2424

2525
from jinja2 import Environment, FileSystemLoader
2626

@@ -1152,7 +1152,54 @@ def prepareInstallRBAC(dynClient: DynamicClient, namespace: str, instanceId: str
11521152

11531153
logger.debug(f"Applying RBAC resource {kind}/{name} in namespace {namespace} for instance {instanceId}")
11541154
resourceAPI = dynClient.resources.get(api_version=apiVersion, kind=kind)
1155-
if namespace:
1156-
resourceAPI.apply(body=resourceBody, namespace=namespace)
1157-
else:
1158-
resourceAPI.apply(body=resourceBody)
1155+
1156+
# Optimized retry logic for transient API server errors
1157+
max_retries = 10 # Reduced from 30 to 10 retries
1158+
base_delay = 1 # Reduced initial delay from 2s to 1s
1159+
max_delay = 15 # Reduced max delay from 30s to 15s
1160+
1161+
for attempt in range(max_retries):
1162+
try:
1163+
if namespace:
1164+
resourceAPI.apply(body=resourceBody, namespace=namespace)
1165+
else:
1166+
resourceAPI.apply(body=resourceBody)
1167+
1168+
# Log success only if there were previous failures
1169+
if attempt > 0:
1170+
logger.info(f"Successfully applied {kind}/{name} after {attempt + 1} attempts")
1171+
break # Success, exit retry loop
1172+
1173+
except ApiException as e:
1174+
# Check if it's a retryable error (429, 503, 504, or API server shutdown)
1175+
is_retryable = (e.status in [429, 503, 504] or "apiserver is shutting down" in str(e).lower() or "connection refused" in str(e).lower() or "too many requests" in str(e).lower())
1176+
1177+
if is_retryable and attempt < max_retries - 1:
1178+
# Exponential backoff with jitter to avoid thundering herd
1179+
import random
1180+
wait_time = min(base_delay * (2 ** attempt), max_delay)
1181+
jitter = random.uniform(0, 0.1 * wait_time) # Add up to 10% jitter
1182+
total_wait = wait_time + jitter
1183+
1184+
logger.warning(
1185+
f"API server temporarily unavailable for {kind}/{name} "
1186+
f"(attempt {attempt + 1}/{max_retries}, status: {e.status}). "
1187+
f"Retrying in {total_wait:.1f}s..."
1188+
)
1189+
sleep(total_wait)
1190+
elif is_retryable:
1191+
# Exhausted all retries
1192+
logger.error(
1193+
f"Failed to apply RBAC resource {kind}/{name} after {max_retries} attempts. "
1194+
f"API server may be unavailable. Last error: {e.status} - {str(e)[:200]}"
1195+
)
1196+
raise
1197+
else:
1198+
# Non-retryable error (permissions, invalid resource, etc.)
1199+
logger.error(f"Failed to apply RBAC resource {kind}/{name}: {e.status} - {str(e)[:200]}")
1200+
raise
1201+
1202+
except Exception as e:
1203+
# Catch any other unexpected errors
1204+
logger.error(f"Unexpected error applying RBAC resource {kind}/{name}: {type(e).__name__} - {str(e)[:200]}")
1205+
raise

0 commit comments

Comments
 (0)