From 4585c9221ff79018234320a350df4d572ec45343 Mon Sep 17 00:00:00 2001 From: pullan1 Date: Wed, 22 Oct 2025 01:36:23 -0400 Subject: [PATCH 01/20] checkmarx fix Signed-off-by: pullan1 --- common/library/module_utils/local_repo/download_common.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/common/library/module_utils/local_repo/download_common.py b/common/library/module_utils/local_repo/download_common.py index 4cb305f346..b81d02f086 100644 --- a/common/library/module_utils/local_repo/download_common.py +++ b/common/library/module_utils/local_repo/download_common.py @@ -38,7 +38,8 @@ ISO_POLL_VAL, TAR_POLL_VAL, FILE_POLL_VAL, - FILE_URI + FILE_URI, + PULP_SSL_CA_CERT ) file_lock = Lock() @@ -208,9 +209,10 @@ def handle_file_upload(repository_name, relative_path, file_url, poll_interval, # Load config for authentication and base_url config = load_pulp_config(CLI_FILE_PATH) base_url = config["base_url"] + passcode = base64.b64decode(config["password"].encode()).decode() # Initialize RestClient - client = RestClient(base_url, config["username"], config["password"]) + client = RestClient(base_url, config["username"], passcode) data = { "file_url": file_url, @@ -230,7 +232,7 @@ def handle_file_upload(repository_name, relative_path, file_url, poll_interval, return "Failed" # Wait for task completion - task_result = wait_for_task(task_href, base_url, config["username"], config["password"], + task_result = wait_for_task(task_href, base_url, config["username"], passcode, logger, timeout=POST_TIMEOUT, interval=poll_interval) if task_result: return "Success" From ea7ef24c94dd744fe48f82a224b9f91e8107dfb7 Mon Sep 17 00:00:00 2001 From: sakshi-singla-1735 Date: Mon, 27 Oct 2025 11:01:26 +0530 Subject: [PATCH 02/20] defect fix for credentials rule Signed-off-by: sakshi-singla-1735 --- .../input_validation/schema/credential_rules.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/library/module_utils/input_validation/schema/credential_rules.json b/common/library/module_utils/input_validation/schema/credential_rules.json index 20700becb4..ab21ea8796 100644 --- a/common/library/module_utils/input_validation/schema/credential_rules.json +++ b/common/library/module_utils/input_validation/schema/credential_rules.json @@ -103,9 +103,9 @@ "pattern": "^[^;\\[\\]`]+$" }, "csi_password": { - "description": "Password for Powerscale UI. Must not contain hyphens (-), single quotes ('), double quotes (\"), at symbols (@), or backslashes (\\).", + "description": "Password for Powerscale UI. Can contain any characters. Length must be between 5 and 32.", "minLength": 5, "maxLength": 32, - "pattern": "^[^\\-\\'\\\"@\\\\]*$" + "pattern": "^.{5,32}$" } } From 0d5e6540bd3738d7e607667d188dc5c8f26584b2 Mon Sep 17 00:00:00 2001 From: Manasa H Date: Wed, 29 Oct 2025 16:42:35 +0530 Subject: [PATCH 03/20] fixed discovery playbook failing --- prepare_oim/roles/deploy_containers/auth/templates/auth.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prepare_oim/roles/deploy_containers/auth/templates/auth.j2 b/prepare_oim/roles/deploy_containers/auth/templates/auth.j2 index 65dc9f982c..4088d1856d 100644 --- a/prepare_oim/roles/deploy_containers/auth/templates/auth.j2 +++ b/prepare_oim/roles/deploy_containers/auth/templates/auth.j2 @@ -13,7 +13,7 @@ PublishPort=0.0.0.0:{{ port }}:{{ port }} # Mount configuration and bootstrap files (read-only, with SELinux relabel) Volume={{ slapd_conf_dest }}:/etc/openldap/slapd.conf:ro,Z Volume={{ bootstrap_ldif_dest }}:/container-init/bootstrap.ldif:ro,Z -Volume={{ openldap_tls_certs_directory }}:/etc/openldap/certs:ro,Z +Volume={{ openldap_tls_certs_directory }}:/etc/openldap/certs:ro,z [Service] Restart=always From 197c282d1930f03879cb2423a2b3476d53e43747 Mon Sep 17 00:00:00 2001 From: Manasa H Date: Wed, 29 Oct 2025 17:11:59 +0530 Subject: [PATCH 04/20] updated the selinux to "z" in auth.j2 --- prepare_oim/roles/deploy_containers/auth/templates/auth.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prepare_oim/roles/deploy_containers/auth/templates/auth.j2 b/prepare_oim/roles/deploy_containers/auth/templates/auth.j2 index 4088d1856d..b851516edb 100644 --- a/prepare_oim/roles/deploy_containers/auth/templates/auth.j2 +++ b/prepare_oim/roles/deploy_containers/auth/templates/auth.j2 @@ -11,8 +11,8 @@ PublishPort=0.0.0.0:{{ port }}:{{ port }} {% endfor %} # Mount configuration and bootstrap files (read-only, with SELinux relabel) -Volume={{ slapd_conf_dest }}:/etc/openldap/slapd.conf:ro,Z -Volume={{ bootstrap_ldif_dest }}:/container-init/bootstrap.ldif:ro,Z +Volume={{ slapd_conf_dest }}:/etc/openldap/slapd.conf:ro,z +Volume={{ bootstrap_ldif_dest }}:/container-init/bootstrap.ldif:ro,z Volume={{ openldap_tls_certs_directory }}:/etc/openldap/certs:ro,z [Service] From 311c7062bbe9ab2c7d9353c9d11a6e5bc7d58590 Mon Sep 17 00:00:00 2001 From: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> Date: Fri, 31 Oct 2025 13:10:18 +0530 Subject: [PATCH 05/20] Update pod_status.yml Signed-off-by: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> --- scheduler/roles/common_plugins/tasks/pod_status.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scheduler/roles/common_plugins/tasks/pod_status.yml b/scheduler/roles/common_plugins/tasks/pod_status.yml index f8318030f6..1ac37b8ec8 100644 --- a/scheduler/roles/common_plugins/tasks/pod_status.yml +++ b/scheduler/roles/common_plugins/tasks/pod_status.yml @@ -66,6 +66,8 @@ ansible.builtin.set_fact: k8s_cluster_issue_msg: >- Cluster issues detected: Please login to kube control plane and check for more details. + After resolving the issue, make sure to rerun the playbook. + For more information, see the Troubleshooting section in the Omnia documentation. {% if k8s_status.not_ready_nodes | length > 0 %} - NotReady nodes: {{ k8s_status.not_ready_nodes }} {% endif %} From 896a3696c40581932084c4f46973fbbb43f75f76 Mon Sep 17 00:00:00 2001 From: Jagadeesh N V Date: Fri, 31 Oct 2025 15:44:43 +0530 Subject: [PATCH 06/20] pxe boot using idrac_boot module --- .../roles/idrac/tasks/check_prerequisites.yml | 40 ----- .../roles/idrac/tasks/configure_pxe_boot.yml | 148 ------------------ utils/roles/idrac/tasks/main.yml | 20 --- .../roles/idrac/tasks/validate_inventory.yml | 26 --- utils/roles/idrac_pxe_boot/tasks/main.yml | 58 +++++++ .../{idrac => idrac_pxe_boot}/vars/main.yml | 25 +-- utils/set_pxe_boot.yml | 26 +-- 7 files changed, 85 insertions(+), 258 deletions(-) delete mode 100644 utils/roles/idrac/tasks/check_prerequisites.yml delete mode 100644 utils/roles/idrac/tasks/configure_pxe_boot.yml delete mode 100644 utils/roles/idrac/tasks/main.yml delete mode 100644 utils/roles/idrac/tasks/validate_inventory.yml create mode 100644 utils/roles/idrac_pxe_boot/tasks/main.yml rename utils/roles/{idrac => idrac_pxe_boot}/vars/main.yml (59%) diff --git a/utils/roles/idrac/tasks/check_prerequisites.yml b/utils/roles/idrac/tasks/check_prerequisites.yml deleted file mode 100644 index e5e20f881a..0000000000 --- a/utils/roles/idrac/tasks/check_prerequisites.yml +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -- name: Initialize variables - ansible.builtin.set_fact: - deploy_os_status: false - -- name: Show status of the Lifecycle Controller - dellemc.openmanage.idrac_lifecycle_controller_status_info: - idrac_ip: "{{ inventory_hostname }}" - idrac_user: "{{ hostvars['localhost']['bmc_username'] }}" - idrac_password: "{{ hostvars['localhost']['bmc_password'] }}" - validate_certs: false - register: lc_check_status - -- name: LC not available - ansible.builtin.fail: - msg: "{{ lc_check_fail_msg }}" - when: not lc_check_status.lc_status_info.LCReady - register: lc_fail - -- name: Get system inventory - dellemc.openmanage.idrac_system_info: - idrac_ip: "{{ inventory_hostname }}" - idrac_user: "{{ hostvars['localhost']['bmc_username'] }}" - idrac_password: "{{ hostvars['localhost']['bmc_password'] }}" - validate_certs: false - register: idrac_info diff --git a/utils/roles/idrac/tasks/configure_pxe_boot.yml b/utils/roles/idrac/tasks/configure_pxe_boot.yml deleted file mode 100644 index 4cb1b7b6e1..0000000000 --- a/utils/roles/idrac/tasks/configure_pxe_boot.yml +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -- name: Initialize active_nic - ansible.builtin.set_fact: - active_nic: [] - -- name: Set active_nic from NIC details - ansible.builtin.set_fact: - active_nic: "{{ active_nic + [idrac_info.system_info.NIC[my_idx].FQDD] }}" - with_items: "{{ idrac_info.system_info.NIC }}" - when: - - idrac_info.system_info.NIC[my_idx].LinkStatus is defined - - '"up" in idrac_info.system_info.NIC[my_idx].LinkStatus | lower' - loop_control: - index_var: my_idx - -- name: Set unique active_nic and active_nic_count - ansible.builtin.set_fact: - active_nic: "{{ active_nic | unique }}" - active_nic_count: "{{ active_nic | unique | length }}" - -- name: No active_nic present for the server - ansible.builtin.fail: - msg: "{{ active_nic_fail_msg }}" - when: active_nic_count == "0" - -- name: Configure PXE boot - block: - - name: Configure boot order for PXE booting of 1 active_nic - dellemc.openmanage.idrac_bios: - idrac_ip: "{{ inventory_hostname }}" - idrac_user: "{{ hostvars['localhost']['bmc_username'] }}" - idrac_password: "{{ hostvars['localhost']['bmc_password'] }}" - validate_certs: false - attributes: - SetBootOrderEn: NIC.PxeDevice.1-1,NIC.PxeDevice.2-1,NIC.PxeDevice.3-1,NIC.PxeDevice.4-1 - PxeDev1Interface: "{{ active_nic[0] }}" - register: deploy_os_pxe1 - until: not deploy_os_pxe1.failed - retries: "{{ retries_count }}" - when: active_nic_count == "1" - - - name: Configure boot order for PXE booting of 2 active_nic - dellemc.openmanage.idrac_bios: - idrac_ip: "{{ inventory_hostname }}" - idrac_user: "{{ hostvars['localhost']['bmc_username'] }}" - idrac_password: "{{ hostvars['localhost']['bmc_password'] }}" - validate_certs: false - attributes: - SetBootOrderEn: NIC.PxeDevice.1-1,NIC.PxeDevice.2-1,NIC.PxeDevice.3-1,NIC.PxeDevice.4-1 - PxeDev1Interface: "{{ active_nic[0] }}" - PxeDev2Interface: "{{ active_nic[1] }}" - register: deploy_os_pxe2 - until: not deploy_os_pxe2.failed - retries: "{{ retries_count }}" - when: active_nic_count == "2" - - - name: Configure boot order for PXE booting of 3 active_nic - dellemc.openmanage.idrac_bios: - idrac_ip: "{{ inventory_hostname }}" - idrac_user: "{{ hostvars['localhost']['bmc_username'] }}" - idrac_password: "{{ hostvars['localhost']['bmc_password'] }}" - validate_certs: false - attributes: - SetBootOrderEn: NIC.PxeDevice.1-1,NIC.PxeDevice.2-1,NIC.PxeDevice.3-1,NIC.PxeDevice.4-1 - PxeDev1Interface: "{{ active_nic[0] }}" - PxeDev2Interface: "{{ active_nic[1] }}" - PxeDev3Interface: "{{ active_nic[2] }}" - register: deploy_os_pxe3 - until: not deploy_os_pxe3.failed - retries: "{{ retries_count }}" - when: active_nic_count == "3" - - - name: Configure boot order for PXE booting of 4 active_nic - dellemc.openmanage.idrac_bios: - idrac_ip: "{{ inventory_hostname }}" - idrac_user: "{{ hostvars['localhost']['bmc_username'] }}" - idrac_password: "{{ hostvars['localhost']['bmc_password'] }}" - validate_certs: false - attributes: - SetBootOrderEn: NIC.PxeDevice.1-1,NIC.PxeDevice.2-1,NIC.PxeDevice.3-1,NIC.PxeDevice.4-1 - PxeDev1Interface: "{{ active_nic[0] }}" - PxeDev2Interface: "{{ active_nic[1] }}" - PxeDev3Interface: "{{ active_nic[2] }}" - PxeDev4Interface: "{{ active_nic[3] }}" - register: deploy_os_pxe4 - until: not deploy_os_pxe4.failed - retries: "{{ retries_count }}" - when: active_nic_count >= "4" - rescue: - - name: Retry configuring boot order - block: - - name: Retry configuring boot order for PXE booting - dellemc.openmanage.idrac_bios: - idrac_ip: "{{ inventory_hostname }}" - idrac_user: "{{ hostvars['localhost']['bmc_username'] }}" - idrac_password: "{{ hostvars['localhost']['bmc_password'] }}" - validate_certs: false - attributes: - SetBootOrderEn: NIC.PxeDevice.1-1,NIC.PxeDevice.2-1,NIC.PxeDevice.3-1,NIC.PxeDevice.4-1 - register: deploy_os_pxe_retry - rescue: - - name: OS provisioning failed using PXE - ansible.builtin.fail: - msg: "{{ pxe_provisioning_fail_msg }}" - always: - - name: Set deploy_os_status when provision_method == PXE for 1 active_nic - ansible.builtin.set_fact: - deploy_os_status: "{{ not deploy_os_pxe1.failed }}" - when: active_nic_count == "1" - - - name: Set deploy_os_status when provision_method == PXE for 2 active_nic - ansible.builtin.set_fact: - deploy_os_status: "{{ not deploy_os_pxe2.failed }}" - when: active_nic_count == "2" - - - name: Set deploy_os_status when provision_method == PXE for 3 active_nic - ansible.builtin.set_fact: - deploy_os_status: "{{ not deploy_os_pxe3.failed }}" - when: active_nic_count == "3" - - - name: Set deploy_os_status when provision_method == PXE for 4 active_nic - ansible.builtin.set_fact: - deploy_os_status: "{{ not deploy_os_pxe4.failed }}" - when: active_nic_count >= "4" - - - name: Set deploy_os_status when provision_method == PXE for retry - ansible.builtin.set_fact: - deploy_os_status: "{{ not deploy_os_pxe_retry.failed }}" - when: deploy_os_pxe_retry.failed is defined - -- name: Provision OS status - ansible.builtin.debug: - msg: "{{ provision_os_msg }}" - when: deploy_os_status diff --git a/utils/roles/idrac/tasks/main.yml b/utils/roles/idrac/tasks/main.yml deleted file mode 100644 index 172254514e..0000000000 --- a/utils/roles/idrac/tasks/main.yml +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -- name: Check prequisites - ansible.builtin.include_tasks: check_prerequisites.yml - -- name: Configure PXE boot - ansible.builtin.include_tasks: configure_pxe_boot.yml diff --git a/utils/roles/idrac/tasks/validate_inventory.yml b/utils/roles/idrac/tasks/validate_inventory.yml deleted file mode 100644 index 6658b8a75b..0000000000 --- a/utils/roles/idrac/tasks/validate_inventory.yml +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -- name: Inventory not provided - ansible.builtin.fail: - msg: "{{ bmc_empty_inventory_fail_msg.splitlines() | join(' ') }}" - when: - - groups['all'] is defined - - (groups['all'] | length == 0) - -- name: Validate bmc group - ansible.builtin.assert: - that: "groups['bmc'] | length | int >= 1" - success_msg: "{{ bmc_validation_fail_msg }}" diff --git a/utils/roles/idrac_pxe_boot/tasks/main.yml b/utils/roles/idrac_pxe_boot/tasks/main.yml new file mode 100644 index 0000000000..e4566cbff7 --- /dev/null +++ b/utils/roles/idrac_pxe_boot/tasks/main.yml @@ -0,0 +1,58 @@ +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Show status of the Lifecycle Controller + dellemc.openmanage.idrac_lifecycle_controller_status_info: + idrac_ip: "{{ inventory_hostname }}" + idrac_user: "{{ bmc_username | default(hostvars['localhost']['bmc_username']) }}" + idrac_password: "{{ bmc_password | default(hostvars['localhost']['bmc_password']) }}" + validate_certs: false + register: lc_check_status + +- name: Check LC availibility + ansible.builtin.fail: + msg: "{{ lc_check_fail_msg }}" + when: not lc_check_status.lc_status_info.LCReady + +- name: Set reboot type + ansible.builtin.set_fact: + reboot_type: "{{ 'none' if not restart_host else ('force_restart' if force_restart else 'graceful_restart') }}" + +- name: Boot idrac pxe + dellemc.openmanage.idrac_boot: + idrac_ip: "{{ inventory_hostname }}" + idrac_user: "{{ bmc_username | default(hostvars['localhost']['bmc_username']) }}" + idrac_password: "{{ bmc_password | default(hostvars['localhost']['bmc_password']) }}" + validate_certs: false + boot_source_override_mode: uefi + boot_source_override_target: "{{ boot_source_override_target }}" + boot_source_override_enabled: "{{ boot_source_override_enabled }}" + reset_type: "{{ reboot_type }}" + register: pxe_provisioning + ignore_errors: true + ignore_unreachable: true + +- name: OS provisioning failed using PXE + ansible.builtin.fail: + msg: "{{ pxe_provisioning_fail_msg }}" + when: pxe_provisioning is failed + +- name: IDRAC might be unreachable during OS provisioning + ansible.builtin.debug: + msg: "{{ unreachable_idrac_msg }}" + when: pxe_provisioning is unreachable + +- name: Provision OS status + ansible.builtin.debug: + msg: "{{ provision_os_msg }}" diff --git a/utils/roles/idrac/vars/main.yml b/utils/roles/idrac_pxe_boot/vars/main.yml similarity index 59% rename from utils/roles/idrac/vars/main.yml rename to utils/roles/idrac_pxe_boot/vars/main.yml index 413026c19a..35d2431a45 100644 --- a/utils/roles/idrac/vars/main.yml +++ b/utils/roles/idrac_pxe_boot/vars/main.yml @@ -12,18 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. --- +# Change to false for not restarting host. only settign pxe_boot will happen +restart_host: true -# Usage: check_prerequisites.yml -lc_check_fail_msg: "Failed. LC is not ready. Retry again after LC is ready" +# Change to true for forceful reboot. by default graceful will happen +force_restart: false -# Usage: configure_pxe_boot.yml -active_nic_fail_msg: "No host active nic present for the device. Please check the host connection for the server and retry again." -provision_os_msg: "OS provisioning is initiated. Wait for installation to complete for all servers." -pxe_provisioning_fail_msg: "OS provisioning using PXE failed. This could be due to outdated NIC firmware. Re-run provision.yml after fixing the issue" -retries_count: 3 +# Set boot source override mode. Valid values are once, continuous, or disabled +boot_source_override_enabled: once + +# Set boot source override target. Valid values are pxe,uefi_http,sd_card,uefi_target,utilities,bios_setup,hdd,cd,floppy,none +boot_source_override_target: pxe -# Usage: validate_inventory.yml -bmc_empty_inventory_fail_msg: | - Failed. Inventory not provided. - Please re-run the playbook with an inventory that includes the groups 'bmc' by using the -i inventory option. +# Usage: main.yml +lc_check_fail_msg: "Failed. LC is not ready. Retry again after LC is ready" +provision_os_msg: "OS provisioning is initiated. Wait for installation to complete for all servers." +pxe_provisioning_fail_msg: "OS provisioning using PXE failed. This could be due to outdated NIC firmware. Re-run discovery.yml after fixing the issue" bmc_validation_fail_msg: "Failed. bmc group in inventory must have atleast one bmc ip." +unreachable_idrac_msg: "iDRAC is unreachable. pxe boot might be set. Please check the host reboot status manually" diff --git a/utils/set_pxe_boot.yml b/utils/set_pxe_boot.yml index 52aaced6c5..0e46b6dd56 100644 --- a/utils/set_pxe_boot.yml +++ b/utils/set_pxe_boot.yml @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. --- - - name: Set_fact for fetch omnia config credentials hosts: localhost connection: local @@ -27,19 +26,20 @@ - name: Invoke get_config_credentials.yml ansible.builtin.import_playbook: credential_utility/get_config_credentials.yml -- name: Validate inventory - hosts: localhost - connection: local - gather_facts: false - tasks: - - name: Validate inventory - ansible.builtin.include_role: - name: idrac - tasks_from: validate_inventory.yml - -- name: Deploy OS via idrac +# This will set the boot mode to pxe +# WARNING: This play will RESTART the host +# Note: Restart will not happen if the server is powered off, only pxe mode will be set. +- name: Reboot Host via PXE hosts: bmc connection: local gather_facts: false + pre_tasks: + - name: Validate bmc group + ansible.builtin.assert: + that: groups['bmc'] | length | int >= 1 + fail_msg: "Failed. bmc group in inventory must have atleast one bmc ip." roles: - - idrac + - role: idrac_pxe_boot + # vars: + # restart_host: false # By default restart will be true, set to false not to restart + # force_restart: true # By default graceful_restart will happen, set to true to force restart From 0aef1a8b1ddc317dede8cafc9aac8e8b43bc7d7a Mon Sep 17 00:00:00 2001 From: Kratika Patidar Date: Mon, 3 Nov 2025 15:16:26 +0530 Subject: [PATCH 07/20] Update validate_idrac_inventory.yml Signed-off-by: Kratika Patidar --- .../tasks/validate_idrac_inventory.yml | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/telemetry/roles/telemetry_validation/tasks/validate_idrac_inventory.yml b/telemetry/roles/telemetry_validation/tasks/validate_idrac_inventory.yml index afb798650d..8fa7c198a4 100644 --- a/telemetry/roles/telemetry_validation/tasks/validate_idrac_inventory.yml +++ b/telemetry/roles/telemetry_validation/tasks/validate_idrac_inventory.yml @@ -63,6 +63,30 @@ loop: "{{ groups['kube_control_plane'] }}" when: "hostvars[item]['bmc_ip'] not in bmc_group_data" +- name: Get list of BMC IPs of kube_control_plane from inventory + ansible.builtin.set_fact: + inventory_bmc_ips: "{{ groups['kube_control_plane'] | map('extract', hostvars, 'bmc_ip') | list }}" + +- name: Read BMC group data file + ansible.builtin.slurp: + src: "{{ bmc_group_data_filename }}" + register: bmc_file_content + +- name: Parse BMC group data lines + ansible.builtin.set_fact: + bmc_file_ips: "{{ (bmc_file_content['content'] | b64decode).splitlines() | map('regex_replace', ',.*', '') | list }}" + when: bmc_file_content is defined + +- name: Find missing BMC IPs + ansible.builtin.set_fact: + missing_bmc_ips: "{{ inventory_bmc_ips | difference(bmc_file_ips) }}" + +- name: Show warning for missing BMC IPs (if any) + ansible.builtin.pause: + seconds: "{{ pause_time_15 }}" + prompt: "{{ warning_bmc_group_data_file_not_updated_msg }}" + when: missing_bmc_ips | length > 0 + - name: Validate BMC group data file validate_bmc_group_data: nodes_bmc_ips: "{{ nodes | map(attribute='bmc_ip') | list }}" From 9cdb83f65407216fdb571988ad8ec19506b0c562 Mon Sep 17 00:00:00 2001 From: Kratika Patidar Date: Mon, 3 Nov 2025 15:18:44 +0530 Subject: [PATCH 08/20] Update main.yml Signed-off-by: Kratika Patidar --- telemetry/roles/telemetry_validation/vars/main.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/telemetry/roles/telemetry_validation/vars/main.yml b/telemetry/roles/telemetry_validation/vars/main.yml index 4f1c106c52..af38f62040 100644 --- a/telemetry/roles/telemetry_validation/vars/main.yml +++ b/telemetry/roles/telemetry_validation/vars/main.yml @@ -22,6 +22,10 @@ bmc_group_data_filename: "/opt/omnia/telemetry/bmc_group_data.csv" warning_telemetry_support_false: | "[WARNING] idrac_telemetry_support are false in telemetry_config.yml. Omnia does not deploy telemetry feature if none of the support category is true." +warning_bmc_group_data_file_not_updated_msg: | + "[WARNING] The following BMC IPs are missing from {{ bmc_group_data_filename }}: + {{ missing_bmc_ips | join('\n') }} + Please re-run the playbook to make sure that bmc_ips are added in {{ bmc_group_data_filename }}." telemetry_config_syntax_fail_msg: "Failed. Syntax errors present in telemetry_config.yml. Fix errors and re-run playbook again." warning_idrac_telemetry_support_false: | "[WARNING] idrac_telemetry_support is set to false in telemetry_config.yml. This means iDRAC telemetry will not be activated. From bf6c21d64573c0c383d73cb6b73fde8418f606cb Mon Sep 17 00:00:00 2001 From: Jagadeesh N V Date: Mon, 3 Nov 2025 18:42:16 +0530 Subject: [PATCH 09/20] Changed boot mode from once to continuous --- utils/roles/idrac_pxe_boot/vars/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/roles/idrac_pxe_boot/vars/main.yml b/utils/roles/idrac_pxe_boot/vars/main.yml index 35d2431a45..07693f1640 100644 --- a/utils/roles/idrac_pxe_boot/vars/main.yml +++ b/utils/roles/idrac_pxe_boot/vars/main.yml @@ -19,7 +19,7 @@ restart_host: true force_restart: false # Set boot source override mode. Valid values are once, continuous, or disabled -boot_source_override_enabled: once +boot_source_override_enabled: continuous # Set boot source override target. Valid values are pxe,uefi_http,sd_card,uefi_target,utilities,bios_setup,hdd,cd,floppy,none boot_source_override_target: pxe From d771cbb05fb3ff53cb94d28f8ad8fa530f338521 Mon Sep 17 00:00:00 2001 From: Kratika Patidar Date: Tue, 4 Nov 2025 10:33:41 +0530 Subject: [PATCH 10/20] Update main.yml Signed-off-by: Kratika Patidar --- telemetry/roles/telemetry_validation/vars/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/telemetry/roles/telemetry_validation/vars/main.yml b/telemetry/roles/telemetry_validation/vars/main.yml index af38f62040..211f4d7375 100644 --- a/telemetry/roles/telemetry_validation/vars/main.yml +++ b/telemetry/roles/telemetry_validation/vars/main.yml @@ -25,7 +25,7 @@ warning_telemetry_support_false: | warning_bmc_group_data_file_not_updated_msg: | "[WARNING] The following BMC IPs are missing from {{ bmc_group_data_filename }}: {{ missing_bmc_ips | join('\n') }} - Please re-run the playbook to make sure that bmc_ips are added in {{ bmc_group_data_filename }}." + If telemetry collection required for missing IPs then re-run the playbook." telemetry_config_syntax_fail_msg: "Failed. Syntax errors present in telemetry_config.yml. Fix errors and re-run playbook again." warning_idrac_telemetry_support_false: | "[WARNING] idrac_telemetry_support is set to false in telemetry_config.yml. This means iDRAC telemetry will not be activated. From 70be7ab294f55b560a424bcd62304267917f40f3 Mon Sep 17 00:00:00 2001 From: Nagachandan-P Date: Tue, 4 Nov 2025 06:56:31 +0000 Subject: [PATCH 11/20] precise permission for epilog file --- .../templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 | 2 +- .../templates/cloud_init/ci-group-slurm_node_x86_64.yaml.j2 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 b/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 index 88f4212994..b006446e4a 100644 --- a/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 +++ b/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 @@ -100,7 +100,7 @@ - chmod {{ file_mode_755 }} /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm - chmod {{ file_mode_400 }} /etc/munge/munge.key - chmod {{ file_mode_755 }} /etc/slurm/epilog.d/ - - chmod +x /etc/slurm/epilog.d/logout_user.sh + - chmod {{ file_mode_755 }} /etc/slurm/epilog.d/logout_user.sh - mkdir -p /var/spool/slurmd - chmod {{ file_mode_755 }} /var/spool/slurmd - chown -R {{ user }}:{{ slurm_group_name }} /var/spool/slurmd diff --git a/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_x86_64.yaml.j2 b/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_x86_64.yaml.j2 index 88f4212994..b006446e4a 100644 --- a/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_x86_64.yaml.j2 +++ b/discovery/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_x86_64.yaml.j2 @@ -100,7 +100,7 @@ - chmod {{ file_mode_755 }} /var/log/slurm /var/run/slurm /var/spool /var/lib/slurm - chmod {{ file_mode_400 }} /etc/munge/munge.key - chmod {{ file_mode_755 }} /etc/slurm/epilog.d/ - - chmod +x /etc/slurm/epilog.d/logout_user.sh + - chmod {{ file_mode_755 }} /etc/slurm/epilog.d/logout_user.sh - mkdir -p /var/spool/slurmd - chmod {{ file_mode_755 }} /var/spool/slurmd - chown -R {{ user }}:{{ slurm_group_name }} /var/spool/slurmd From a1fbac896f6902c87526280b00bef0a15f47ca8a Mon Sep 17 00:00:00 2001 From: Jagadeesh N V Date: Tue, 4 Nov 2025 12:42:02 +0530 Subject: [PATCH 12/20] Added required prereq comments --- utils/roles/idrac_pxe_boot/tasks/main.yml | 2 +- utils/roles/idrac_pxe_boot/vars/main.yml | 6 +++--- utils/set_pxe_boot.yml | 16 +++++++++++++++- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/utils/roles/idrac_pxe_boot/tasks/main.yml b/utils/roles/idrac_pxe_boot/tasks/main.yml index e4566cbff7..0a9078f667 100644 --- a/utils/roles/idrac_pxe_boot/tasks/main.yml +++ b/utils/roles/idrac_pxe_boot/tasks/main.yml @@ -29,7 +29,7 @@ ansible.builtin.set_fact: reboot_type: "{{ 'none' if not restart_host else ('force_restart' if force_restart else 'graceful_restart') }}" -- name: Boot idrac pxe +- name: Set boot from pxe dellemc.openmanage.idrac_boot: idrac_ip: "{{ inventory_hostname }}" idrac_user: "{{ bmc_username | default(hostvars['localhost']['bmc_username']) }}" diff --git a/utils/roles/idrac_pxe_boot/vars/main.yml b/utils/roles/idrac_pxe_boot/vars/main.yml index 07693f1640..bebd2b4a42 100644 --- a/utils/roles/idrac_pxe_boot/vars/main.yml +++ b/utils/roles/idrac_pxe_boot/vars/main.yml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. --- -# Change to false for not restarting host. only settign pxe_boot will happen +# Change to false for not restarting host. only setting pxe_boot will happen restart_host: true # Change to true for forceful reboot. by default graceful will happen @@ -25,8 +25,8 @@ boot_source_override_enabled: continuous boot_source_override_target: pxe # Usage: main.yml -lc_check_fail_msg: "Failed. LC is not ready. Retry again after LC is ready" +lc_check_fail_msg: "Failed. iDRAC is not ready. Retry again after iDRAC is ready" provision_os_msg: "OS provisioning is initiated. Wait for installation to complete for all servers." -pxe_provisioning_fail_msg: "OS provisioning using PXE failed. This could be due to outdated NIC firmware. Re-run discovery.yml after fixing the issue" +pxe_provisioning_fail_msg: "OS booting using PXE failed. This could be due to outdated NIC firmware. Re-run set_pxe_boot.yml after fixing the issue" bmc_validation_fail_msg: "Failed. bmc group in inventory must have atleast one bmc ip." unreachable_idrac_msg: "iDRAC is unreachable. pxe boot might be set. Please check the host reboot status manually" diff --git a/utils/set_pxe_boot.yml b/utils/set_pxe_boot.yml index 0e46b6dd56..cabbbe38f2 100644 --- a/utils/set_pxe_boot.yml +++ b/utils/set_pxe_boot.yml @@ -12,6 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. --- +# ------------------------------------------------------------------------- +# PXE PREREQUISITES +# ------------------------------------------------------------------------- +# 1. Dell iDRAC BMCs must be reachable from the Ansible controller +# 2. PXE (Pre‑boot eXecution Environment) support – the NIC’s +# firmware must implement the PXE option and must be enabled. +# 2. The `dellemc.openmanage` Ansible collection must be installed: +# ansible-galaxy collection install dellemc.openmanage +# 3. iDRAC firmware version must support the “Boot Source Override” +# API (most modern iDRAC9/10 firmware do). +# 4. The TFTP/NFS/HTTP server that provides the PXE +# boot image must be reachable by the target nodes once the iDRAC +# is set to PXE mode. +# ------------------------------------------------------------------------- - name: Set_fact for fetch omnia config credentials hosts: localhost connection: local @@ -26,8 +40,8 @@ - name: Invoke get_config_credentials.yml ansible.builtin.import_playbook: credential_utility/get_config_credentials.yml +# This configures Dell iDRAC BMCs to boot a host from PXE (network) and optionally reboots the server. # This will set the boot mode to pxe -# WARNING: This play will RESTART the host # Note: Restart will not happen if the server is powered off, only pxe mode will be set. - name: Reboot Host via PXE hosts: bmc From 388a3a00acdbc56f05c84edf82913132df65898d Mon Sep 17 00:00:00 2001 From: Jagadeesh N V Date: Tue, 4 Nov 2025 12:43:16 +0530 Subject: [PATCH 13/20] Typo fix --- utils/set_pxe_boot.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/set_pxe_boot.yml b/utils/set_pxe_boot.yml index cabbbe38f2..86df0199fb 100644 --- a/utils/set_pxe_boot.yml +++ b/utils/set_pxe_boot.yml @@ -18,11 +18,11 @@ # 1. Dell iDRAC BMCs must be reachable from the Ansible controller # 2. PXE (Pre‑boot eXecution Environment) support – the NIC’s # firmware must implement the PXE option and must be enabled. -# 2. The `dellemc.openmanage` Ansible collection must be installed: +# 3. The `dellemc.openmanage` Ansible collection must be installed: # ansible-galaxy collection install dellemc.openmanage -# 3. iDRAC firmware version must support the “Boot Source Override” +# 4. iDRAC firmware version must support the “Boot Source Override” # API (most modern iDRAC9/10 firmware do). -# 4. The TFTP/NFS/HTTP server that provides the PXE +# 5. The TFTP/NFS/HTTP server that provides the PXE # boot image must be reachable by the target nodes once the iDRAC # is set to PXE mode. # ------------------------------------------------------------------------- From 2f3ec83a07e6fd5c9d7a40dd6a3e209bb0903e84 Mon Sep 17 00:00:00 2001 From: Jagadeesh N V Date: Tue, 4 Nov 2025 17:52:13 +0530 Subject: [PATCH 14/20] vip address validation for k8s --- .../schema/high_availability_config.json | 7 +++- .../high_availability_validation.py | 37 +++++++++++-------- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/common/library/module_utils/input_validation/schema/high_availability_config.json b/common/library/module_utils/input_validation/schema/high_availability_config.json index 63f7e5e717..9ca6eb3c0f 100644 --- a/common/library/module_utils/input_validation/schema/high_availability_config.json +++ b/common/library/module_utils/input_validation/schema/high_availability_config.json @@ -58,8 +58,11 @@ ], "properties": { "virtual_ip_address": { - "type": "string", - "pattern": "^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$" + "type": "string", + "allOf": [ + { "pattern": "^[0-9.]+$" }, + { "format": "ipv4" } + ] }, "active_node_service_tags": { "type": ["array", "null"], diff --git a/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py b/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py index b158294bf5..a555acc19d 100644 --- a/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py @@ -41,7 +41,8 @@ def get_roles_config_json(input_file_path, logger, module, omnia_base_dir, proje Returns: dict: The roles configuration as json. """ - roles_config_file_path = create_file_path(input_file_path, file_names["functional_groups_config"]) + roles_config_file_path = create_file_path(input_file_path, + file_names["functional_groups_config"]) roles_config_json = validation_utils.load_yaml_as_json( roles_config_file_path, omnia_base_dir, project_name, logger, module ) @@ -307,7 +308,6 @@ def validate_vip_address( - None: The function does not return any value, it only appends error messages to the errors list. """ - # validate if the same virtual_ip_address is already use if vip_address in service_node_vip: errors.append( @@ -318,15 +318,12 @@ def validate_vip_address( ) ) else: - # virtual_ip_address is mutually exclusive with admin static and dynamic ranges - vip_within_static_range = validation_utils.is_ip_within_range( - admin_network["static_range"], vip_address - ) + # virtual_ip_address is mutually exclusive with admin dynamic ranges vip_within_dynamic_range = validation_utils.is_ip_within_range( admin_network["dynamic_range"], vip_address ) - if vip_within_static_range or vip_within_dynamic_range: + if vip_within_dynamic_range: errors.append( create_error_msg( f"{config_type} virtual_ip_address", @@ -350,7 +347,6 @@ def validate_k8s_head_node_ha( config_type, ha_data, network_spec_data, - roles_config_json, all_service_tags, ha_node_vip_list ): @@ -375,10 +371,9 @@ def validate_k8s_head_node_ha( None: Errors are collected in the provided `errors` list. """ admin_network = network_spec_data["admin_network"] - admin_static_range = admin_network.get("static_range", "N/A") admin_dynamic_range = admin_network.get("dynamic_range", "N/A") + admin_netmaskbits = network_spec_data.get("admin_netmaskbits") oim_admin_ip = network_spec_data["oim_admin_ip"] - if not isinstance(ha_data, list): ha_data = [ha_data] for hdata in ha_data: @@ -388,7 +383,7 @@ def validate_k8s_head_node_ha( # validate active_node_service_tag and passive_node_service_tag all_service_tags_set = set(all_service_tags) active_node_service_tags_set = set(active_node_service_tags) - + vip_address = hdata.get("virtual_ip_address") # Find the intersection common_tags = all_service_tags_set & active_node_service_tags_set @@ -402,8 +397,19 @@ def validate_k8s_head_node_ha( ) ) + if vip_address: + validate_vip_address( + errors, + config_type, + vip_address, + ha_node_vip_list, + admin_network, + admin_netmaskbits, + oim_admin_ip + ) + if external_loadbalancer_ip: - ip_ranges = [admin_static_range, admin_dynamic_range, external_loadbalancer_ip] + ip_ranges = [admin_dynamic_range, external_loadbalancer_ip] does_overlap, _ = validation_utils.check_overlap(ip_ranges) if does_overlap: @@ -523,7 +529,6 @@ def validate_ha_config(ha_data, mandatory_fields, errors, config_type=None): config_type, ha_data, network_spec_info, - roles_config_json, all_service_tags, ha_node_vip_list, ) @@ -550,14 +555,14 @@ def validate_ha_config(ha_data, mandatory_fields, errors, config_type=None): errors.append(f"Missing key in HA data: {e}") ha_configs = [ - ("service_k8s_cluster_ha", ["virtual_ip_address", "active_node_service_tags"]) + ("service_k8s_cluster_ha", ["virtual_ip_address", "active_node_service_tags"], + "enable_k8s_ha") ] - for config_name, mandatory_fields in ha_configs: + for config_name, mandatory_fields, enable_key in ha_configs: ha_data = data.get(config_name) if ha_data: ha_data = ha_data[0] if isinstance(ha_data, list) else ha_data - enable_key = f'enable_{config_name.split("_", maxsplit=1)[0]}_ha' if ha_data.get(enable_key): if config_name == "oim_ha": ha_role = "oim_ha_node" # expected role to be defined in roles_config From da206198b872916670c9ddbcfca89dcbf4a48f3c Mon Sep 17 00:00:00 2001 From: Jagadeesh N V Date: Wed, 5 Nov 2025 17:21:41 +0530 Subject: [PATCH 15/20] Added validation for pod external also cleaned up unused --- .../high_availability_validation.py | 257 ++++++++---------- 1 file changed, 117 insertions(+), 140 deletions(-) diff --git a/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py b/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py index a555acc19d..79f563842f 100644 --- a/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py @@ -15,7 +15,9 @@ """ This module contains functions for validating high availability configuration. """ - +import csv +import os +import yaml from ansible.module_utils.input_validation.common_utils import validation_utils from ansible.module_utils.input_validation.common_utils import config from ansible.module_utils.input_validation.common_utils import en_us_validation_msg @@ -286,8 +288,8 @@ def validate_vip_address( errors, config_type, vip_address, - service_node_vip, admin_network, + pod_external_ip_list, admin_netmaskbits, oim_admin_ip ): @@ -309,43 +311,51 @@ def validate_vip_address( error messages to the errors list. """ # validate if the same virtual_ip_address is already use - if vip_address in service_node_vip: + + # virtual_ip_address is mutually exclusive with admin dynamic ranges + vip_within_dynamic_range = validation_utils.is_ip_within_range( + admin_network["dynamic_range"], vip_address + ) + + if vip_within_dynamic_range: errors.append( create_error_msg( - f"{config_type} virtual_ip_address:", + f"{config_type} virtual_ip_address", vip_address, - en_us_validation_msg.DUPLICATE_VIRTUAL_IP, + en_us_validation_msg.VIRTUAL_IP_NOT_VALID, ) ) - else: - # virtual_ip_address is mutually exclusive with admin dynamic ranges - vip_within_dynamic_range = validation_utils.is_ip_within_range( - admin_network["dynamic_range"], vip_address + # pod external + for pod_ext in pod_external_ip_list: + vip_within_pod_external = validation_utils.is_ip_within_range( + pod_ext, vip_address ) - if vip_within_dynamic_range: + if vip_within_pod_external: errors.append( create_error_msg( - f"{config_type} virtual_ip_address", + f"{config_type} vip in pod external", vip_address, en_us_validation_msg.VIRTUAL_IP_NOT_VALID, ) ) - # validate virtual_ip_address is in the admin subnet - if not validation_utils.is_ip_in_subnet(oim_admin_ip, admin_netmaskbits, vip_address): - errors.append( - create_error_msg( - f"{config_type} virtual_ip_address", - vip_address, - en_us_validation_msg.VIRTUAL_IP_NOT_IN_ADMIN_SUBNET, - ) + # pxe_map IPs + # validate virtual_ip_address is in the admin subnet + if not validation_utils.is_ip_in_subnet(oim_admin_ip, admin_netmaskbits, vip_address): + errors.append( + create_error_msg( + f"{config_type} virtual_ip_address", + vip_address, + en_us_validation_msg.VIRTUAL_IP_NOT_IN_ADMIN_SUBNET, ) + ) -def validate_k8s_head_node_ha( +def validate_service_k8s_cluster_ha( errors, config_type, ha_data, + input_file_path, network_spec_data, all_service_tags, ha_node_vip_list @@ -374,6 +384,21 @@ def validate_k8s_head_node_ha( admin_dynamic_range = admin_network.get("dynamic_range", "N/A") admin_netmaskbits = network_spec_data.get("admin_netmaskbits") oim_admin_ip = network_spec_data["oim_admin_ip"] + + with open(os.path.join(input_file_path, "provision_config.yml"), "r", encoding="utf-8") as f: + prov_cfg = yaml.safe_load(f) + + with open(prov_cfg.get('pxe_mapping_file_path'), newline='', encoding='utf-8') as csvfile: + pxe_list = list(csv.DictReader(csvfile, delimiter=",")) + pxe_admin_ips = [item["ADMIN_IP"] for item in pxe_list] + pxe_bmc_ips = [item["BMC_IP"] for item in pxe_list] + + with open(os.path.join(input_file_path, "omnia_config.yml"), "r", encoding="utf-8") as omniacfg: + omnia_config = yaml.safe_load(omniacfg) + pod_external_ip_list = [item.get("pod_external_ip_range") + for item in omnia_config.get('service_k8s_cluster') + if item.get('deployment', False)] + if not isinstance(ha_data, list): ha_data = [ha_data] for hdata in ha_data: @@ -398,12 +423,19 @@ def validate_k8s_head_node_ha( ) if vip_address: + for ip_list in (ha_node_vip_list, pxe_admin_ips, pxe_bmc_ips): + if vip_address in ip_list: + errors.append( + create_error_msg( + f"{config_type} virtual_ip_duplicate", + vip_address, + en_us_validation_msg.DUPLICATE_VIRTUAL_IP)) validate_vip_address( errors, config_type, vip_address, - ha_node_vip_list, admin_network, + pod_external_ip_list, admin_netmaskbits, oim_admin_ip ) @@ -418,62 +450,62 @@ def validate_k8s_head_node_ha( ) - -def validate_slurm_head_node_ha( - errors, - config_type, - ha_data, - network_spec_data, - _roles_config_json, - all_service_tags, - ha_node_vip_list -): +def load_network_spec(input_file_path): """ - Validates the high availability configuration for a service node. + Loads network specification from a YAML file and returns it as a dictionary. - Parameters: - errors (list): A list to store error messages. - config_type (str): The type of high availability configuration. - ha_data (dict): A dictionary containing high availability data. - network_spec_data (dict): A dictionary containing network specification data. - _roles_config_json (dict): A dictionary containing roles configuration data. - all_service_tags (list): A list of all service tags. - ha_node_vip_list (list): A list of virtual IP addresses for high availability nodes. + Args: + input_file_path (str): The path to the directory containing the YAML file. Returns: - None + dict: A dictionary containing network specification information. """ - active_node_service_tag = ha_data.get("active_node_service_tag") - passive_nodes = ha_data.get("passive_nodes", []) - vip_address = ha_data.get("virtual_ip_address") + with open(os.path.join(input_file_path, "network_spec.yml"), "r", encoding="utf-8") as f: + network_spec_json = yaml.safe_load(f) + network_spec_info = { + "admin_network": get_admin_static_dynamic_ranges(network_spec_json), + "admin_nic_name": get_admin_nic_name(network_spec_json), + "bmc_network": get_bmc_network(network_spec_json), + "bmc_nic_name": get_bmc_nic_name(network_spec_json), + "admin_netmaskbits": get_admin_netmaskbits(network_spec_json), + "admin_uncorrelated_node_start_ip": get_admin_uncorrelated_node_start_ip( + network_spec_json + ), + "oim_admin_ip": get_primary_oim_admin_ip(network_spec_json) + } + return network_spec_info - # get network_spec data - admin_network = network_spec_data["admin_network"] - admin_netmaskbits = network_spec_data["admin_netmaskbits"] - oim_admin_ip = network_spec_data["oim_admin_ip"] +def validate_ha_config(ha_data, mandatory_fields, errors, config_type, + input_file_path, all_service_tags, ha_node_vip_list): + """ + Validates high availability configuration. - # validate active_node_service_tag and passive_node_service_tag - validate_service_tag_presence( - errors, config_type, all_service_tags, active_node_service_tag, passive_nodes - ) + Args: + ha_data (dict): The high availability configuration data. + mandatory_fields (list): The list of mandatory fields in the HA configuration. + errors (list): The list to store error messages. + config_type (str): The type of HA configuration. + input_file_path (str): The path to the directory containing the YAML file. + all_service_tags (list): The list of all service tags. + ha_node_vip_list (list): The list of HA node VIPs. - # validate if duplicate virtual ip address is present - if vip_address: - validate_vip_address( + Returns: + None + """ + ha_validation = { + "service_k8s_cluster_ha": validate_service_k8s_cluster_ha + } + network_spec_info = load_network_spec(input_file_path) + check_mandatory_fields(mandatory_fields, ha_data, errors) + if config_type in ha_validation: + ha_validation[config_type]( errors, config_type, - vip_address, - ha_node_vip_list, - admin_network, - admin_netmaskbits, - oim_admin_ip - ) - -# Dispatch table maps config_type to validation handler -ha_validation = { - "service_k8s_cluster_ha": validate_k8s_head_node_ha -} - + ha_data, + input_file_path, + network_spec_info, + all_service_tags, + ha_node_vip_list) def validate_high_availability_config( input_file_path, data, logger, module, omnia_base_dir, _module_utils_base, project_name @@ -496,63 +528,6 @@ def validate_high_availability_config( errors = [] ha_node_vip_list = [] all_service_tags = set() - network_spec_file_path = create_file_path(input_file_path, file_names["network_spec"]) - network_spec_json = validation_utils.load_yaml_as_json( - network_spec_file_path, omnia_base_dir, project_name, logger, module - ) - - # load roles_config for L2 validations - roles_config_json = get_roles_config_json( - input_file_path, logger, module, omnia_base_dir, project_name - ) - - network_spec_info = { - "admin_network": get_admin_static_dynamic_ranges(network_spec_json), - "admin_nic_name": get_admin_nic_name(network_spec_json), - "bmc_network": get_bmc_network(network_spec_json), - "bmc_nic_name": get_bmc_nic_name(network_spec_json), - "admin_netmaskbits": get_admin_netmaskbits(network_spec_json), - "admin_uncorrelated_node_start_ip": get_admin_uncorrelated_node_start_ip( - network_spec_json - ), - "oim_admin_ip": get_primary_oim_admin_ip(network_spec_json) - } - - # pylint: disable=too-many-branches - def validate_ha_config(ha_data, mandatory_fields, errors, config_type=None): - try: - check_mandatory_fields(mandatory_fields, ha_data, errors) - - if config_type in ha_validation: - ha_validation[config_type]( - errors, - config_type, - ha_data, - network_spec_info, - all_service_tags, - ha_node_vip_list, - ) - - # append all the active and passive node service tags to a set - if "active_node_service_tag" in ha_data: - all_service_tags.add(ha_data["active_node_service_tag"]) - elif "active_node_service_tags" in ha_data: - all_service_tags.update(ha_data.get("active_node_service_tags", [])) - - if "passive_nodes" in ha_data: - for node_service_tag in ha_data.get("passive_nodes", []): - all_service_tags.update(node_service_tag.get("node_service_tags", [])) - - if "virtual_ip_address" in ha_data: - ha_node_vip_list.append(ha_data["virtual_ip_address"]) - elif "admin_virtual_ip_address" in ha_data: - ha_node_vip_list.append(ha_data["admin_virtual_ip_address"]) - elif "bmc_virtual_ip_address" in ha_data: - ha_node_vip_list.append(ha_data["bmc_virtual_ip_address"]) - - except KeyError as e: - logger.error(f"Missing key in HA data: {e}") - errors.append(f"Missing key in HA data: {e}") ha_configs = [ ("service_k8s_cluster_ha", ["virtual_ip_address", "active_node_service_tags"], @@ -564,22 +539,24 @@ def validate_ha_config(ha_data, mandatory_fields, errors, config_type=None): if ha_data: ha_data = ha_data[0] if isinstance(ha_data, list) else ha_data if ha_data.get(enable_key): - if config_name == "oim_ha": - ha_role = "oim_ha_node" # expected role to be defined in roles_config - check_and_validate_ha_role_in_roles_config(errors, roles_config_json, ha_role) - validate_ha_config(ha_data, mandatory_fields, errors, config_type=config_name) - elif config_name == "service_node_ha": - ha_role = "service_node" # expected role to be defined in roles_config - check_and_validate_ha_role_in_roles_config(errors, roles_config_json, ha_role) - for service_node in ha_data["service_nodes"]: - validate_ha_config( - service_node, - ["virtual_ip_address", "active_node_service_tag", "passive_nodes"], - errors, - config_type=config_name, - ) - else: - validate_ha_config(ha_data, mandatory_fields, errors, config_type=config_name) + # append all the active and passive node service tags to a set + if "active_node_service_tag" in ha_data: + all_service_tags.add(ha_data["active_node_service_tag"]) + elif "active_node_service_tags" in ha_data: + all_service_tags.update(ha_data.get("active_node_service_tags", [])) + + if "passive_nodes" in ha_data: + for node_service_tag in ha_data.get("passive_nodes", []): + all_service_tags.update(node_service_tag.get("node_service_tags", [])) + + if "admin_virtual_ip_address" in ha_data: + ha_node_vip_list.append(ha_data["admin_virtual_ip_address"]) + elif "bmc_virtual_ip_address" in ha_data: + ha_node_vip_list.append(ha_data["bmc_virtual_ip_address"]) + # oim_ha and service_node_ha has been removed + validate_ha_config(ha_data, mandatory_fields, errors, config_name, + os.path.dirname(input_file_path), + all_service_tags, ha_node_vip_list) else: logger.warning(f"Configuration for {config_name} not found.") From 1642972613feac870ccc7fb74b4037d97e968f48 Mon Sep 17 00:00:00 2001 From: Jagadeesh N V Date: Wed, 5 Nov 2025 17:26:45 +0530 Subject: [PATCH 16/20] Message update for pod_external_ip --- .../input_validation/common_utils/en_us_validation_msg.py | 2 ++ .../validation_flows/high_availability_validation.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/common/library/module_utils/input_validation/common_utils/en_us_validation_msg.py b/common/library/module_utils/input_validation/common_utils/en_us_validation_msg.py index d7af9d4792..7b494487e2 100644 --- a/common/library/module_utils/input_validation/common_utils/en_us_validation_msg.py +++ b/common/library/module_utils/input_validation/common_utils/en_us_validation_msg.py @@ -314,6 +314,8 @@ def server_spec_network_key_fail_msg(nic_device): "Check high_availability_config.yml and network_spec.yml") VIRTUAL_IP_NOT_VALID = ("should be outside the admin static and dynamic ranges. " "Check high_availability_config.yml and network_spec.yml") +VIRTUAL_IP_NOT_POD_EXT = ("should be outside the pod_external_ip ranges. " + "Check high_availability_config.yml and omnia_config.yml") BMC_VIRTUAL_IP_NOT_VALID = ("should be outside any bmc static and dynamic ranges. " "Check high_availability_config.yml, network_spec.yml, and " "roles_config.yml") diff --git a/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py b/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py index 79f563842f..af22106858 100644 --- a/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py @@ -336,7 +336,7 @@ def validate_vip_address( create_error_msg( f"{config_type} vip in pod external", vip_address, - en_us_validation_msg.VIRTUAL_IP_NOT_VALID, + en_us_validation_msg.VIRTUAL_IP_NOT_POD_EXT, ) ) From 6c6bd88fdbb31d70477b0752cd1fa4398419be04 Mon Sep 17 00:00:00 2001 From: pullan1 Date: Thu, 6 Nov 2025 00:51:49 -0500 Subject: [PATCH 17/20] codeready builder reponame correction Signed-off-by: pullan1 --- .../input_validation/validation_flows/local_repo_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/library/module_utils/input_validation/validation_flows/local_repo_validation.py b/common/library/module_utils/input_validation/validation_flows/local_repo_validation.py index da856b5a17..558bf3b92a 100644 --- a/common/library/module_utils/input_validation/validation_flows/local_repo_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/local_repo_validation.py @@ -80,7 +80,7 @@ def validate_local_repo_config(input_file_path, data, arch_list = url_list + [url+'_'+arch for url in url_list] # define base repos dynamically for this arch if subscription registered if sub_result: - base_repo_names = [f"{arch}_baseos",f"{arch}_appstream",f"{arch}_codeready_builder"] + base_repo_names = [f"{arch}_baseos",f"{arch}_appstream",f"{arch}_codeready-builder"] for repurl in arch_list: repos = data.get(repurl) if repos: From 90dae623ecdca348ecfcb4899c8031c03a9965d1 Mon Sep 17 00:00:00 2001 From: Jagadeesh N V Date: Thu, 6 Nov 2025 16:19:28 +0530 Subject: [PATCH 18/20] Issue fix for ipv4 fetching when default route not enabled --- utils/roles/node_repo_update/tasks/main.yml | 11 ++++++----- utils/set_pxe_boot.yml | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/utils/roles/node_repo_update/tasks/main.yml b/utils/roles/node_repo_update/tasks/main.yml index 4ea6bbb74d..c6d1755de1 100644 --- a/utils/roles/node_repo_update/tasks/main.yml +++ b/utils/roles/node_repo_update/tasks/main.yml @@ -30,8 +30,10 @@ - name: Set hosts dict ansible.builtin.set_fact: - hosts_dict: "{{ hosts_dict | default({}) | combine({item: hostvars[item]['ansible_default_ipv4']['address'] + ' ' - + hostvars[item]['ansible_fqdn'] + ' ' + hostvars[item]['ansible_fqdn'] | split('.') | first}) }}" + hosts_dict: "{{ hosts_dict | default({}) | + combine({item: (hostvars[item]['ansible_default_ipv4']['address'] | + default(hostvars[item]['ansible_all_ipv4_addresses'][0]) | default(hostvars[item]['inventory_hostname'])) + + ' ' + hostvars[item]['ansible_fqdn'] + ' ' + hostvars[item]['ansible_fqdn'] | split('.') | first}) }}" delegate_to: localhost run_once: true loop: "{{ ansible_play_hosts | default([]) }}" @@ -53,12 +55,11 @@ - name: Update hosts file ansible.builtin.lineinfile: dest: "{{ hosts_file_dest }}" - line: "{{ hostvars[item]['ansible_default_ipv4']['address'] }} {{ hostvars[item]['ansible_fqdn'] }} - {{ hostvars[item]['ansible_fqdn'] | split('.') | first }}" + line: "{{ item }}" state: present create: true mode: "{{ hosts_file_mode }}" - with_items: "{{ ansible_play_hosts | default([]) }}" + loop: "{{ hosts_dict.values() | default([]) }}" rescue: - name: Updating hosts file failed ansible.builtin.fail: diff --git a/utils/set_pxe_boot.yml b/utils/set_pxe_boot.yml index 86df0199fb..f6cf1258c7 100644 --- a/utils/set_pxe_boot.yml +++ b/utils/set_pxe_boot.yml @@ -16,11 +16,11 @@ # PXE PREREQUISITES # ------------------------------------------------------------------------- # 1. Dell iDRAC BMCs must be reachable from the Ansible controller -# 2. PXE (Pre‑boot eXecution Environment) support – the NIC’s +# 2. PXE (Pre‑boot eXecution Environment) support – the NIC's # firmware must implement the PXE option and must be enabled. # 3. The `dellemc.openmanage` Ansible collection must be installed: # ansible-galaxy collection install dellemc.openmanage -# 4. iDRAC firmware version must support the “Boot Source Override” +# 4. iDRAC firmware version must support the 'Boot Source Override' # API (most modern iDRAC9/10 firmware do). # 5. The TFTP/NFS/HTTP server that provides the PXE # boot image must be reachable by the target nodes once the iDRAC From 507094769f1a7fd8fcba3bece0067d5d9290f6b1 Mon Sep 17 00:00:00 2001 From: Kratika Patidar Date: Mon, 10 Nov 2025 10:56:55 +0530 Subject: [PATCH 19/20] Updating build image with fix tag --- build_image_aarch64/build_image_aarch64.yml | 4 +-- build_image_x86_64/build_image_x86_64.yml | 15 +++++++-- .../image_creation/tasks/build_image_tag.yml | 33 +++++++++++++++++++ .../roles/image_creation/vars/main.yml | 7 +++- 4 files changed, 53 insertions(+), 6 deletions(-) create mode 100644 build_image_x86_64/roles/image_creation/tasks/build_image_tag.yml diff --git a/build_image_aarch64/build_image_aarch64.yml b/build_image_aarch64/build_image_aarch64.yml index d93d0e31ae..9e62087ba4 100644 --- a/build_image_aarch64/build_image_aarch64.yml +++ b/build_image_aarch64/build_image_aarch64.yml @@ -54,11 +54,11 @@ oim_group: true tags: always -- name: Configure auth for openchami +- name: Configure auth for OpenCHAMI hosts: oim connection: ssh tasks: - - name: Openchami cluster authentication + - name: OpenCHAMI cluster authentication ansible.builtin.include_tasks: "{{ playbook_dir }}/../common/tasks/common/openchami_auth.yml" vars: oim_node_name: "{{ hostvars['localhost']['oim_node_name'] }}" diff --git a/build_image_x86_64/build_image_x86_64.yml b/build_image_x86_64/build_image_x86_64.yml index 97cc170c99..7bc7c6e8ac 100644 --- a/build_image_x86_64/build_image_x86_64.yml +++ b/build_image_x86_64/build_image_x86_64.yml @@ -44,11 +44,11 @@ oim_group: true tags: always -- name: Configure auth for openchami +- name: Configure auth for OpenCHAMI hosts: oim connection: ssh tasks: - - name: Openchami cluster authentication + - name: OpenCHAMI cluster authentication ansible.builtin.include_tasks: "{{ playbook_dir }}/../common/tasks/common/openchami_auth.yml" vars: oim_node_name: "{{ hostvars['localhost']['oim_node_name'] }}" @@ -69,7 +69,16 @@ roles: - fetch_packages -- name: Openchmi build image for x86_64 +- name: Tagging OpenCHAMI image + hosts: oim + connection: ssh + tasks: + - name: Tag OpenCHAMI image + ansible.builtin.include_role: + name: image_creation + tasks_from: build_image_tag.yml + +- name: OpenCHAMI build image for x86_64 hosts: localhost connection: local gather_facts: false diff --git a/build_image_x86_64/roles/image_creation/tasks/build_image_tag.yml b/build_image_x86_64/roles/image_creation/tasks/build_image_tag.yml new file mode 100644 index 0000000000..c344f59d8f --- /dev/null +++ b/build_image_x86_64/roles/image_creation/tasks/build_image_tag.yml @@ -0,0 +1,33 @@ +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Pull specific OpenCHAMI image by version tag + ansible.builtin.command: + cmd: "podman pull {{ openchami_image_sha }}" + register: pull_result + changed_when: "'Image is up to date' not in pull_result.stdout" + +- name: Fail if image not pulled successfully + ansible.builtin.fail: + msg: "{{ pull_result.stdout }}" + when: pull_result.rc != 0 + +- name: Tagging OpenCHAMI image with stable name + ansible.builtin.command: + cmd: "{{ ochami_stable_image_tag }}" + args: + creates: "{{ ochami_stable_image_path }}" + register: tag_result + changed_when: "'Tagged' in tag_result.stdout" diff --git a/build_image_x86_64/roles/image_creation/vars/main.yml b/build_image_x86_64/roles/image_creation/vars/main.yml index 4218deadab..d7518ad4b0 100644 --- a/build_image_x86_64/roles/image_creation/vars/main.yml +++ b/build_image_x86_64/roles/image_creation/vars/main.yml @@ -13,6 +13,7 @@ # limitations under the License. --- +openchami_image_sha: "ghcr.io/openchami/image-build@sha256:52dd9d546951ce4f2f6f9febd08a228cfcb5b9e8e204ca4f5ee232f6be65d3a4" input_project_dir: "{{ hostvars['localhost']['input_project_dir'] }}" omnia_metadata_file: "/opt/omnia/.data/oim_metadata.yml" dir_permissions_644: "0644" @@ -33,7 +34,7 @@ ochami_compute_mounts: ochami_x86_64_image: - --entrypoint /bin/bash - - ghcr.io/openchami/image-build:latest + - ghcr.io/openchami/image-build:stable ochami_base_command: - -c 'update-ca-trust extract && image-build --config /home/builder/config.yaml --log-level DEBUG' @@ -48,3 +49,7 @@ compute_image_failure_msg: "Compute aarch64 image build job did not completed su # build_compute_image.yml openchami_compute_image_vars_template: "{{ role_path }}/templates/compute_images_templates.j2" openchami_compute_image_vars_path: "/opt/omnia/openchami/compute_images_template.yaml" + +# build_image_tag.yml +ochami_stable_image_tag: "podman tag {{ openchami_image_sha }} ghcr.io/openchami/image-build:stable" +ochami_stable_image_path: "/var/lib/containers/storage/overlay-images/{{ openchami_image_sha }}" From 27bfc8f3bf6950b2f93738e5b3587caae69afd67 Mon Sep 17 00:00:00 2001 From: Kratika Patidar Date: Mon, 10 Nov 2025 12:41:47 +0530 Subject: [PATCH 20/20] Replace ansible installation with ansible-core Signed-off-by: Kratika Patidar --- .github/workflows/ansible-lint.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ansible-lint.yml b/.github/workflows/ansible-lint.yml index 8b00e2229e..76979bfda2 100644 --- a/.github/workflows/ansible-lint.yml +++ b/.github/workflows/ansible-lint.yml @@ -27,7 +27,8 @@ jobs: - name: Install Ansible and Ansible Lint run: | python -m pip install --upgrade pip - pip install ansible + pip install ansible-core + - name: Install Ansible Collections from requirements.yml run: |