Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions build_stream/orchestrator/common/result_poller.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,11 @@ def _on_result_received(self, result: PlaybookResult) -> None:
# S12: On restart failure, still persist node_results.json
if result.stage_name == "restart":
self._on_restart_completed(result)
self._on_restart_failure(result)

# On deploy failure, mark ImageGroup FAILED
if result.stage_name == "deploy":
self._on_deploy_failure(result)

# On validate failure, mark ImageGroup FAILED
if result.stage_name == "validate":
Expand Down Expand Up @@ -968,3 +973,49 @@ def _on_deploy_failure(self, result: PlaybookResult) -> None:
job_id=str(result.job_id),
exc_info=True,
)

def _on_restart_failure(self, result: PlaybookResult) -> None:
"""Transition ImageGroup from RESTARTING to FAILED on restart failure."""
if self._image_group_repo is None:
log_secure_info(
"warning",
f"ImageGroup repo not available; skipping restart failure "
f"update for job={result.job_id}",
job_id=str(result.job_id),
)
return

try:
image_group = self._image_group_repo.find_by_job_id(
JobId(str(result.job_id))
)
if image_group is None:
log_secure_info(
"error",
f"Restart failure callback: No ImageGroup found for job={result.job_id}.",
job_id=str(result.job_id),
)
return

self._image_group_repo.update_status(
image_group_id=image_group.id,
new_status=ImageGroupStatus.FAILED,
)

if hasattr(self._image_group_repo, 'session'):
self._image_group_repo.session.commit()

log_secure_info(
"warning",
f"Restart FAILED for job={result.job_id}. "
f"ImageGroup '{image_group.id}' -> FAILED.",
job_id=str(result.job_id),
)
except Exception as exc: # pylint: disable=broad-except
log_secure_info(
"error",
"Failed to update ImageGroup status on restart "
f"failure for job={result.job_id}: {exc}",
job_id=str(result.job_id),
exc_info=True,
)
11 changes: 11 additions & 0 deletions common/library/module_utils/local_repo/software_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
CSV_COLUMNS,
SOFTWARE_CONFIG_SUBDIR,
DEFAULT_STATUS_FILENAME,
STATUS_CSV_HEADER,
RPM_LABEL_TEMPLATE,
RHEL_OS_URL,
SOFTWARES_KEY,
Expand Down Expand Up @@ -853,6 +854,16 @@ def check_csv_existence(path):

def read_status_csv(csv_path):
"""Reads the status.csv file and returns a list of row dictionaries."""
# Ensure file has valid header before reading
if os.path.exists(csv_path) and os.path.getsize(csv_path) > 0:
with open(csv_path, 'r', encoding='utf-8') as file:
lines = file.readlines()
if lines and lines[0].strip() != STATUS_CSV_HEADER.strip():
# Header missing or invalid - prepend header to existing data
with open(csv_path, 'w', encoding='utf-8') as wfile:
wfile.write(STATUS_CSV_HEADER)
wfile.writelines(lines)

with open(csv_path, mode='r', newline='', encoding='utf-8') as file:
reader = csv.DictReader(file)
return [row for row in reader]
Expand Down
10 changes: 10 additions & 0 deletions common/library/modules/parallel_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,19 @@ def determine_function(

# Construct the status file path using DEFAULT_STATUS_FILENAME.
status_file = os.path.join(csv_file_path, DEFAULT_STATUS_FILENAME)

# Ensure file exists with valid header
if not os.path.exists(status_file) or os.stat(status_file).st_size == 0:
with open(status_file, 'w', encoding="utf-8") as file:
file.write(STATUS_CSV_HEADER)
else:
with open(status_file, 'r', encoding="utf-8") as file:
lines = file.readlines()
if lines and lines[0].strip() != STATUS_CSV_HEADER.strip():
# Header missing or invalid - prepend header to existing data
with open(status_file, 'w', encoding="utf-8") as wfile:
wfile.write(STATUS_CSV_HEADER)
wfile.writelines(lines)


task_type = task.get("type")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,35 @@
########################
# version: version of this values file
# Note: Do not change this value
version: "v2.16.0"
version: "v2.17.0"

images:
# "driver" defines the container image, used for the driver container.
driver:
image: quay.io/dell/container-storage-modules/csi-isilon:v2.16.0
image: quay.io/dell/container-storage-modules/csi-isilon:v2.17.0
# CSI sidecars
attacher:
image: registry.k8s.io/sig-storage/csi-attacher:v4.10.0
image: registry.k8s.io/sig-storage/csi-attacher:v4.11.0
provisioner:
image: registry.k8s.io/sig-storage/csi-provisioner:v6.1.0
image: registry.k8s.io/sig-storage/csi-provisioner:v6.2.0
snapshotter:
image: registry.k8s.io/sig-storage/csi-snapshotter:v8.4.0
image: registry.k8s.io/sig-storage/csi-snapshotter:v8.5.0
resizer:
image: registry.k8s.io/sig-storage/csi-resizer:v2.0.0
image: registry.k8s.io/sig-storage/csi-resizer:v2.1.0
registrar:
image: registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.15.0
image: registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.16.0
healthmonitor:
image: registry.k8s.io/sig-storage/csi-external-health-monitor-controller:v0.16.0
image: registry.k8s.io/sig-storage/csi-external-health-monitor-controller:v0.17.0

# CSM sidecars
replication:
image: quay.io/dell/container-storage-modules/dell-csi-replicator:v1.14.0
image: quay.io/dell/container-storage-modules/dell-csi-replicator:v1.15.0
podmon:
image: quay.io/dell/container-storage-modules/podmon:v1.15.0
image: quay.io/dell/container-storage-modules/podmon:v1.16.0
authorization:
image: quay.io/dell/container-storage-modules/csm-authorization-sidecar:v2.4.0
image: quay.io/dell/container-storage-modules/csm-authorization-sidecar:v2.5.0
metadataretriever:
image: quay.io/dell/container-storage-modules/csi-metadata-retriever:v1.13.0
image: quay.io/dell/container-storage-modules/csi-metadata-retriever:v1.14.0

# CSI driver log level
# Allowed values: "error", "warn"/"warning", "info", "debug"
Expand Down Expand Up @@ -119,7 +119,7 @@ controller:
# the Kubernetes release.
# Allowed values: n, where n > 0
# Default value: None
controllerCount: 2
controllerCount: 1

# volumeNamePrefix: Prefix of PersistentVolume names created
# Allowed values: string
Expand Down Expand Up @@ -184,15 +184,15 @@ controller:
# true: enable volume expansion feature(install resizer sidecar)
# false: disable volume snapshot feature(do not install resizer sidecar)
# Default value: None
enabled: true
enabled: false

healthMonitor:
# enabled: Enable/Disable health monitor of CSI volumes- volume status, volume condition
# Allowed values:
# true: enable checking of health condition of CSI volumes
# false: disable checking of health condition of CSI volumes
# Default value: None
enabled: false
enabled: true

# interval: Interval of monitoring volume health condition
# Allowed values: Number followed by unit of time (s,m,h)
Expand Down Expand Up @@ -301,7 +301,7 @@ node:
# true: enable checking of health condition of CSI volumes
# false: disable checking of health condition of CSI volumes
# Default value: None
enabled: false
enabled: true

## PLATFORM ATTRIBUTES
######################
Expand Down
Loading
Loading