From db0d7a63a54ce8f150253e91b8dd52c9f1c2c26d Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Mon, 7 Jul 2025 12:09:17 +0530 Subject: [PATCH 01/76] create telemetry group --- .../roles/telemetry_validation/tasks/add_host_goups.yml | 6 ++++++ telemetry/roles/telemetry_validation/vars/main.yml | 2 ++ 2 files changed, 8 insertions(+) diff --git a/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml b/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml index fe9b931754..993ff4d38f 100644 --- a/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml +++ b/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml @@ -65,3 +65,9 @@ when: - enable_oim_ha - host_inventory.oim_ha_hosts | length > 0 + +- name: Create telemetry group + ansible.builtin.add_host: + hostname: "{{ telemetry_host_group }}" + ansible_host: "{{ telemetry_host }}" + groups: "{{ telemetry_host_group }}" diff --git a/telemetry/roles/telemetry_validation/vars/main.yml b/telemetry/roles/telemetry_validation/vars/main.yml index 6207cbd7d6..c9e34af05b 100644 --- a/telemetry/roles/telemetry_validation/vars/main.yml +++ b/telemetry/roles/telemetry_validation/vars/main.yml @@ -119,6 +119,8 @@ invalid_parent_tags_message: | If service nodes are not provisioned or compute node provisioning not initiated, please run discovery_provision.yml to provision service nodes and compute nodes from service nodes. And then run `telemetry.yml` playbook again. +telemetry_host_group: "telemetry_group" +telemetry_host: "{{ 'kube_control_plane' if federated_idrac_telemetry_collection | default(false) else 'oim' }}" # Usage: include_high_availability_config.yml high_availability_config_path: "{{ input_project_dir }}/high_availability_config.yml" From 9314ffff948181164ce49219bdfe1daae3b27928 Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Mon, 7 Jul 2025 12:15:47 +0530 Subject: [PATCH 02/76] enable idrac telemetry k8s --- .../tasks/fetch_pods_details.yml | 79 +++++++++++++++++++ .../tasks/initiate_telemetry.yml | 6 +- telemetry/roles/idrac_telemetry/vars/main.yml | 9 +++ telemetry/telemetry.yml | 28 +++---- 4 files changed, 107 insertions(+), 15 deletions(-) create mode 100644 telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml diff --git a/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml b/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml new file mode 100644 index 0000000000..7e766753c5 --- /dev/null +++ b/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml @@ -0,0 +1,79 @@ +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Fetch mysqldb pod details + block: + - name: Wait for idrac-telemetry pod to come to ready state + block: + - name: Wait for idrac-telemetry pod to come to ready state + ansible.builtin.command: kubectl wait --for=condition=ready --timeout=10m -n "{{ telemetry_namespace }}" pod -l app="{{ idrac_telemetry_k8s_name }}" + changed_when: false + rescue: + - name: Failed - idrac-telemetry pod is not running + ansible.builtin.fail: + msg: "{{ idrac_telemetry_pod_wait_fail_msg }}" + + - name: Wait for mysqldb pod to come to ready state + block: + - name: Wait for mysqldb pod to come to ready state + ansible.builtin.command: kubectl wait --for=condition=ready --timeout=10m -n "{{ telemetry_namespace }}" pod -l app="{{ mysqldb_k8s_name }}" + changed_when: false + rescue: + - name: Failed - mysqldb pod is not running + ansible.builtin.fail: + msg: "{{ mysqldb_pod_wait_fail_msg }}" + + - name: Get mysqlDB svc IP + ansible.builtin.command: kubectl get svc "{{ mysqldb_k8s_name }}" -n "{{ telemetry_namespace }}" -o=jsonpath='{.spec.clusterIP}' + changed_when: false + register: mysql_svc_ip + + - name: Get mysqlDB svc port + ansible.builtin.command: kubectl get svc "{{ mysqldb_k8s_name }}" -n "{{ telemetry_namespace }}" -o=jsonpath='{.spec.ports[0].port}' + changed_when: false + register: mysql_svc_port + +- name: Fetch idrac-telemetry pod details + block: + - name: Wait for idrac-telemetry pod to come to ready state + block: + - name: Wait for idrac-telemetry pod to come to ready state + ansible.builtin.command: kubectl wait --for=condition=ready --timeout=10m -n "{{ telemetry_namespace }}" pod -l app="{{ idrac_telemetry_k8s_name }}" + changed_when: false + rescue: + - name: Failed - idrac-telemetry pod is not running + ansible.builtin.fail: + msg: "{{ idrac_telemetry_pod_wait_fail_msg }}" + + - name: Wait for mysqldb pod to come to ready state + block: + - name: Wait for mysqldb pod to come to ready state + ansible.builtin.command: kubectl wait --for=condition=ready --timeout=10m -n "{{ telemetry_namespace }}" pod -l app="{{ mysqldb_k8s_name }}" + changed_when: false + rescue: + - name: Failed - mysqldb pod is not running + ansible.builtin.fail: + msg: "{{ mysqldb_pod_wait_fail_msg }}" + + - name: Get idrac-telemetry pod name + ansible.builtin.command: kubectl get pods -n "{{ telemetry_namespace }}" -l app="{{ idrac_telemetry_k8s_name }}" -o jsonpath="{.items[0].metadata.name}" + changed_when: false + register: idrac_telemetry_pod + +- name: Set telemetry pod details + ansible.builtin.set_fact: + idrac_telemetry_pod_name: "{{ idrac_telemetry_pod.stdout }}" + mysqldb_host: "{{ mysql_svc_ip.stdout }}" + mysqldb_container_port: "{{ mysql_svc_port.stdout }}" diff --git a/telemetry/roles/idrac_telemetry/tasks/initiate_telemetry.yml b/telemetry/roles/idrac_telemetry/tasks/initiate_telemetry.yml index 44e98adf7c..48c48e70cb 100644 --- a/telemetry/roles/idrac_telemetry/tasks/initiate_telemetry.yml +++ b/telemetry/roles/idrac_telemetry/tasks/initiate_telemetry.yml @@ -30,6 +30,10 @@ ansible.builtin.include_vars: "{{ playbook_dir }}/roles/telemetry_validation/vars/main.yml" no_log: true + - name: Fetch pod details for federated telemetry + ansible.builtin.include_tasks: fetch_pods_details.yml + when: hostvars['localhost']['federated_idrac_telemetry_collection'] + - name: Initialize variables ansible.builtin.set_fact: telemetry_idrac: [] @@ -173,7 +177,7 @@ python3 ./ConfigurationScripts/EnableOrDisableAllTelemetryReports.py -ip "{{ item }}" -u "{{ hostvars['localhost']['bmc_username'] }}" -p "{{ hostvars['localhost']['bmc_password'] }}" -s Enabled args: - chdir: "{{ telemetry_dir_path }}/{{ idrac_telemetry_scripting_folder }}" + chdir: "{{ idrac_telemetry_scripting_git_clone_path }}" with_items: "{{ telemetry_idrac }}" changed_when: false no_log: true diff --git a/telemetry/roles/idrac_telemetry/vars/main.yml b/telemetry/roles/idrac_telemetry/vars/main.yml index 301e9f9939..97a3475b08 100644 --- a/telemetry/roles/idrac_telemetry/vars/main.yml +++ b/telemetry/roles/idrac_telemetry/vars/main.yml @@ -85,3 +85,12 @@ telemetry_report_sn: | {% for item in failed_idrac + invalid_idrac_list %} - {{ item }} {% endfor %} + +# Usage: fetch_pods_details.yml +telemetry_namespace: "telemetry" +idrac_telemetry_k8s_name: idrac-telemetry +mysqldb_k8s_name: mysqldb +idrac_telemetry_pod_wait_fail_msg: "Execution failed as the idrac-telemetry pods did not start within the expected time. + Please re-run the playbook after verifying that the idrac-telemetry pods are in running state by executing the command 'kubectl get pods -A.'" +mysqldb_pod_wait_fail_msg: "Execution failed as the mysqldb pods did not start within the expected time. + Please re-run the playbook after verifying that the mysqldb pods are in running state by executing the command 'kubectl get pods -A.'" diff --git a/telemetry/telemetry.yml b/telemetry/telemetry.yml index 1887b742f1..550c1d5351 100644 --- a/telemetry/telemetry.yml +++ b/telemetry/telemetry.yml @@ -124,26 +124,26 @@ name: service_k8s_telemetry - name: Enable idrac telemetry in OIM - hosts: localhost - connection: local + hosts: telemetry_group + connection: ssh gather_facts: false tasks: - name: Enable idrac telemetry ansible.builtin.include_role: name: idrac_telemetry -- name: Initiate telemetry collection on OIM - hosts: oim - connection: ssh - gather_facts: false - tasks: - - name: Initiate telemetry collection - ansible.builtin.include_role: - name: idrac_telemetry - tasks_from: trigger_telemetry_collection.yml - when: - - hostvars['localhost']['telemetry_idrac'] is defined - - (hostvars['localhost']['telemetry_idrac'] | length > 0) +# - name: Initiate telemetry collection on OIM +# hosts: oim +# connection: ssh +# gather_facts: false +# tasks: +# - name: Initiate telemetry collection +# ansible.builtin.include_role: +# name: idrac_telemetry +# tasks_from: trigger_telemetry_collection.yml +# when: +# - hostvars['localhost']['telemetry_idrac'] is defined +# - (hostvars['localhost']['telemetry_idrac'] | length > 0) # - name: Enable idrac telemetry in SN # hosts: sn_active From b8da3aa3ca835215ee73895627e9c5b0852ecc3f Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Mon, 7 Jul 2025 12:38:47 +0530 Subject: [PATCH 03/76] telemetry host update --- telemetry/roles/telemetry_validation/vars/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/telemetry/roles/telemetry_validation/vars/main.yml b/telemetry/roles/telemetry_validation/vars/main.yml index c9e34af05b..402040092f 100644 --- a/telemetry/roles/telemetry_validation/vars/main.yml +++ b/telemetry/roles/telemetry_validation/vars/main.yml @@ -120,7 +120,7 @@ invalid_parent_tags_message: | please run discovery_provision.yml to provision service nodes and compute nodes from service nodes. And then run `telemetry.yml` playbook again. telemetry_host_group: "telemetry_group" -telemetry_host: "{{ 'kube_control_plane' if federated_idrac_telemetry_collection | default(false) else 'oim' }}" +telemetry_host: "{{ groups['kube_control_plane'] if federated_idrac_telemetry_collection | default(false) else 'oim' }}" # Usage: include_high_availability_config.yml high_availability_config_path: "{{ input_project_dir }}/high_availability_config.yml" From 9a89eebef9a794b1ee8c2c832b630559a3abb767 Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Mon, 7 Jul 2025 12:38:47 +0530 Subject: [PATCH 04/76] telemetry host update --- .../roles/telemetry_validation/tasks/add_host_goups.yml | 5 +++-- telemetry/roles/telemetry_validation/vars/main.yml | 7 +++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml b/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml index 993ff4d38f..895bcc659a 100644 --- a/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml +++ b/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml @@ -68,6 +68,7 @@ - name: Create telemetry group ansible.builtin.add_host: - hostname: "{{ telemetry_host_group }}" - ansible_host: "{{ telemetry_host }}" + hostname: "{{ item }}" + ansible_host: "{{ item }}" groups: "{{ telemetry_host_group }}" + loop: "{{ telemetry_host }}" diff --git a/telemetry/roles/telemetry_validation/vars/main.yml b/telemetry/roles/telemetry_validation/vars/main.yml index c9e34af05b..f661887dc9 100644 --- a/telemetry/roles/telemetry_validation/vars/main.yml +++ b/telemetry/roles/telemetry_validation/vars/main.yml @@ -120,8 +120,11 @@ invalid_parent_tags_message: | please run discovery_provision.yml to provision service nodes and compute nodes from service nodes. And then run `telemetry.yml` playbook again. telemetry_host_group: "telemetry_group" -telemetry_host: "{{ 'kube_control_plane' if federated_idrac_telemetry_collection | default(false) else 'oim' }}" - +telemetry_host: >- + {{ groups['kube_control_plane'] + if federated_idrac_telemetry_collection | default(false) + else ['oim'] + }} # Usage: include_high_availability_config.yml high_availability_config_path: "{{ input_project_dir }}/high_availability_config.yml" fail_msg_high_availability_config_file: "high_availability_config.yml file doesn't exist." From 077b45ea216d9bbf3237a577b11ce3bd50777bfe Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Mon, 7 Jul 2025 13:57:13 +0530 Subject: [PATCH 05/76] update k8s pod check --- .../tasks/fetch_pods_details.yml | 78 +++++++------------ 1 file changed, 27 insertions(+), 51 deletions(-) diff --git a/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml b/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml index 7e766753c5..e46c2fc6a7 100644 --- a/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml +++ b/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml @@ -13,64 +13,40 @@ # limitations under the License. --- -- name: Fetch mysqldb pod details +- name: Wait for idrac-telemetry pod to come to ready state block: - name: Wait for idrac-telemetry pod to come to ready state - block: - - name: Wait for idrac-telemetry pod to come to ready state - ansible.builtin.command: kubectl wait --for=condition=ready --timeout=10m -n "{{ telemetry_namespace }}" pod -l app="{{ idrac_telemetry_k8s_name }}" - changed_when: false - rescue: - - name: Failed - idrac-telemetry pod is not running - ansible.builtin.fail: - msg: "{{ idrac_telemetry_pod_wait_fail_msg }}" - - - name: Wait for mysqldb pod to come to ready state - block: - - name: Wait for mysqldb pod to come to ready state - ansible.builtin.command: kubectl wait --for=condition=ready --timeout=10m -n "{{ telemetry_namespace }}" pod -l app="{{ mysqldb_k8s_name }}" - changed_when: false - rescue: - - name: Failed - mysqldb pod is not running - ansible.builtin.fail: - msg: "{{ mysqldb_pod_wait_fail_msg }}" - - - name: Get mysqlDB svc IP - ansible.builtin.command: kubectl get svc "{{ mysqldb_k8s_name }}" -n "{{ telemetry_namespace }}" -o=jsonpath='{.spec.clusterIP}' - changed_when: false - register: mysql_svc_ip - - - name: Get mysqlDB svc port - ansible.builtin.command: kubectl get svc "{{ mysqldb_k8s_name }}" -n "{{ telemetry_namespace }}" -o=jsonpath='{.spec.ports[0].port}' + ansible.builtin.command: kubectl wait --for=condition=ready --timeout=10m -n "{{ telemetry_namespace }}" pod -l app="{{ idrac_telemetry_k8s_name }}" changed_when: false - register: mysql_svc_port + rescue: + - name: Failed - idrac-telemetry pod is not running + ansible.builtin.fail: + msg: "{{ idrac_telemetry_pod_wait_fail_msg }}" -- name: Fetch idrac-telemetry pod details +- name: Wait for mysqldb pod to come to ready state block: - - name: Wait for idrac-telemetry pod to come to ready state - block: - - name: Wait for idrac-telemetry pod to come to ready state - ansible.builtin.command: kubectl wait --for=condition=ready --timeout=10m -n "{{ telemetry_namespace }}" pod -l app="{{ idrac_telemetry_k8s_name }}" - changed_when: false - rescue: - - name: Failed - idrac-telemetry pod is not running - ansible.builtin.fail: - msg: "{{ idrac_telemetry_pod_wait_fail_msg }}" - - name: Wait for mysqldb pod to come to ready state - block: - - name: Wait for mysqldb pod to come to ready state - ansible.builtin.command: kubectl wait --for=condition=ready --timeout=10m -n "{{ telemetry_namespace }}" pod -l app="{{ mysqldb_k8s_name }}" - changed_when: false - rescue: - - name: Failed - mysqldb pod is not running - ansible.builtin.fail: - msg: "{{ mysqldb_pod_wait_fail_msg }}" - - - name: Get idrac-telemetry pod name - ansible.builtin.command: kubectl get pods -n "{{ telemetry_namespace }}" -l app="{{ idrac_telemetry_k8s_name }}" -o jsonpath="{.items[0].metadata.name}" + ansible.builtin.command: kubectl wait --for=condition=ready --timeout=10m -n "{{ telemetry_namespace }}" pod -l app="{{ mysqldb_k8s_name }}" changed_when: false - register: idrac_telemetry_pod + rescue: + - name: Failed - mysqldb pod is not running + ansible.builtin.fail: + msg: "{{ mysqldb_pod_wait_fail_msg }}" + +- name: Get mysqlDB svc IP + ansible.builtin.command: kubectl get svc "{{ mysqldb_k8s_name }}" -n "{{ telemetry_namespace }}" -o=jsonpath='{.spec.clusterIP}' + changed_when: false + register: mysql_svc_ip + +- name: Get mysqlDB svc port + ansible.builtin.command: kubectl get svc "{{ mysqldb_k8s_name }}" -n "{{ telemetry_namespace }}" -o=jsonpath='{.spec.ports[0].port}' + changed_when: false + register: mysql_svc_port + +- name: Get idrac-telemetry pod name + ansible.builtin.command: kubectl get pods -n "{{ telemetry_namespace }}" -l app="{{ idrac_telemetry_k8s_name }}" -o jsonpath="{.items[0].metadata.name}" + changed_when: false + register: idrac_telemetry_pod - name: Set telemetry pod details ansible.builtin.set_fact: From 41c70d5bbb06e140e9842f48d0a5bcfa7f05f9c3 Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Mon, 7 Jul 2025 16:41:49 +0530 Subject: [PATCH 06/76] mysql db pod node ip and nodeport --- .../tasks/fetch_pods_details.yml | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml b/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml index e46c2fc6a7..f3736bf8e8 100644 --- a/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml +++ b/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml @@ -34,14 +34,19 @@ msg: "{{ mysqldb_pod_wait_fail_msg }}" - name: Get mysqlDB svc IP - ansible.builtin.command: kubectl get svc "{{ mysqldb_k8s_name }}" -n "{{ telemetry_namespace }}" -o=jsonpath='{.spec.clusterIP}' + ansible.builtin.command: kubectl get svc "{{ mysqldb_k8s_name }}"-n "{{ telemetry_namespace }}" -o=jsonpath='{.spec.ports[0].nodePort}' changed_when: false - register: mysql_svc_ip + register: mysql_svc_port -- name: Get mysqlDB svc port - ansible.builtin.command: kubectl get svc "{{ mysqldb_k8s_name }}" -n "{{ telemetry_namespace }}" -o=jsonpath='{.spec.ports[0].port}' +- name: Get mysqlDB pod node name + ansible.builtin.command: kubectl get pods -n "{{ telemetry_namespace }}" -l app="{{ mysqldb_k8s_name }}" -o jsonpath="{.items[0].spec.nodeName}" changed_when: false - register: mysql_svc_port + register: mysql_node_name + +- name: Get mysqlDB node IP + ansible.builtin.command: kubectl get nodes -o=jsonpath='{.items[?(@.metadata.name=="{{ mysql_node_name.stdout }}")].status.addresses[?(@.type=="InternalIP")].address}' + changed_when: false + register: mysql_node_ip - name: Get idrac-telemetry pod name ansible.builtin.command: kubectl get pods -n "{{ telemetry_namespace }}" -l app="{{ idrac_telemetry_k8s_name }}" -o jsonpath="{.items[0].metadata.name}" @@ -51,5 +56,5 @@ - name: Set telemetry pod details ansible.builtin.set_fact: idrac_telemetry_pod_name: "{{ idrac_telemetry_pod.stdout }}" - mysqldb_host: "{{ mysql_svc_ip.stdout }}" + mysqldb_host: "{{ mysql_node_ip.stdout }}" mysqldb_container_port: "{{ mysql_svc_port.stdout }}" From 635521b87ac3e40118f1219a484fddf4230a0161 Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Mon, 7 Jul 2025 16:47:54 +0530 Subject: [PATCH 07/76] Mysql pod svc nodeport --- .../roles/service_k8s_telemetry/tasks/mysqldb_deployment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/telemetry/roles/service_k8s_telemetry/tasks/mysqldb_deployment.yml b/telemetry/roles/service_k8s_telemetry/tasks/mysqldb_deployment.yml index bb55cee2dd..f9e6024eba 100644 --- a/telemetry/roles/service_k8s_telemetry/tasks/mysqldb_deployment.yml +++ b/telemetry/roles/service_k8s_telemetry/tasks/mysqldb_deployment.yml @@ -96,7 +96,7 @@ labels: app: "{{ mysqldb_k8s_name }}" spec: - type: ClusterIP + type: NodePort ports: - name: mysqldb-http-port-1 port: "{{ mysqldb_container_port1 }}" From 047eec3dd32de27b73a2d4feeec928d9f5a2d91a Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Mon, 7 Jul 2025 16:53:30 +0530 Subject: [PATCH 08/76] mysql svc update --- telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml b/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml index f3736bf8e8..993880e4a4 100644 --- a/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml +++ b/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml @@ -33,8 +33,8 @@ ansible.builtin.fail: msg: "{{ mysqldb_pod_wait_fail_msg }}" -- name: Get mysqlDB svc IP - ansible.builtin.command: kubectl get svc "{{ mysqldb_k8s_name }}"-n "{{ telemetry_namespace }}" -o=jsonpath='{.spec.ports[0].nodePort}' +- name: Get mysqlDB svc port + ansible.builtin.command: kubectl get svc "{{ mysqldb_k8s_name }}" -n "{{ telemetry_namespace }}" -o=jsonpath='{.spec.ports[0].nodePort}' changed_when: false register: mysql_svc_port From 6e5fce6243c07070fce975122d7d1e29e9e10719 Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Tue, 8 Jul 2025 10:17:03 +0530 Subject: [PATCH 09/76] git repo dir path update --- telemetry/roles/idrac_telemetry/vars/main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/telemetry/roles/idrac_telemetry/vars/main.yml b/telemetry/roles/idrac_telemetry/vars/main.yml index 97a3475b08..fbd1bf7fa2 100644 --- a/telemetry/roles/idrac_telemetry/vars/main.yml +++ b/telemetry/roles/idrac_telemetry/vars/main.yml @@ -56,6 +56,8 @@ unreachable_service_node_bmc_msg: > invalid_bmc_warning_msg: | [WARNING] Some BMC IPs are not valid. Kindly address the issues mentioned above and execute telemetry.yml. Telemetry feature wont be enabled for these BMC IPs from {{ bmc_group_data_filename }} file. +service_cluster_idrac_telemetry_dir_path: "{{ omnia_nfs_share }}/service_cluster/telemetry/idrac_telemetry" +idrac_telemetry_scripting_git_clone_path: "{{ service_cluster_idrac_telemetry_dir_path }}/iDRAC-Telemetry-Scripting" # Usage: trigger_telemetry_collection.yml idrac_telemetry_container: "idrac_telemetry_receiver" From 5f9cde1304ba3bac33ce687c09e3917f20e6b523 Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Tue, 8 Jul 2025 10:54:09 +0530 Subject: [PATCH 10/76] lint fix --- .../roles/idrac_telemetry/tasks/fetch_pods_details.yml | 2 +- .../roles/telemetry_validation/tasks/add_host_goups.yml | 2 +- telemetry/roles/telemetry_validation/vars/main.yml | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml b/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml index 993880e4a4..4ef1e5002a 100644 --- a/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml +++ b/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml @@ -44,7 +44,7 @@ register: mysql_node_name - name: Get mysqlDB node IP - ansible.builtin.command: kubectl get nodes -o=jsonpath='{.items[?(@.metadata.name=="{{ mysql_node_name.stdout }}")].status.addresses[?(@.type=="InternalIP")].address}' + ansible.builtin.command: kubectl get nodes -o=jsonpath='{.items[?(@.metadata.name=="{{ mysql_node_name.stdout }}")].status.addresses[?(@.type=="InternalIP")].address}' # noqa: yaml[line-length] changed_when: false register: mysql_node_ip diff --git a/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml b/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml index 895bcc659a..bcd558d8ff 100644 --- a/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml +++ b/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml @@ -69,6 +69,6 @@ - name: Create telemetry group ansible.builtin.add_host: hostname: "{{ item }}" - ansible_host: "{{ item }}" + ansible_host: "{{ item }}" groups: "{{ telemetry_host_group }}" loop: "{{ telemetry_host }}" diff --git a/telemetry/roles/telemetry_validation/vars/main.yml b/telemetry/roles/telemetry_validation/vars/main.yml index f661887dc9..719d969c9f 100644 --- a/telemetry/roles/telemetry_validation/vars/main.yml +++ b/telemetry/roles/telemetry_validation/vars/main.yml @@ -121,9 +121,9 @@ invalid_parent_tags_message: | And then run `telemetry.yml` playbook again. telemetry_host_group: "telemetry_group" telemetry_host: >- - {{ groups['kube_control_plane'] - if federated_idrac_telemetry_collection | default(false) - else ['oim'] + {{ groups['kube_control_plane'] + if federated_idrac_telemetry_collection | default(false) + else ['oim'] }} # Usage: include_high_availability_config.yml high_availability_config_path: "{{ input_project_dir }}/high_availability_config.yml" From 7e89f75193c891446ebc3681e9f7289ecfef0995 Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Tue, 8 Jul 2025 10:56:31 +0530 Subject: [PATCH 11/76] telemetry update --- telemetry/telemetry.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/telemetry/telemetry.yml b/telemetry/telemetry.yml index 550c1d5351..df820edba5 100644 --- a/telemetry/telemetry.yml +++ b/telemetry/telemetry.yml @@ -132,18 +132,18 @@ ansible.builtin.include_role: name: idrac_telemetry -# - name: Initiate telemetry collection on OIM -# hosts: oim -# connection: ssh -# gather_facts: false -# tasks: -# - name: Initiate telemetry collection -# ansible.builtin.include_role: -# name: idrac_telemetry -# tasks_from: trigger_telemetry_collection.yml -# when: -# - hostvars['localhost']['telemetry_idrac'] is defined -# - (hostvars['localhost']['telemetry_idrac'] | length > 0) +- name: Initiate telemetry collection on OIM + hosts: oim + connection: ssh + gather_facts: false + tasks: + - name: Initiate telemetry collection + ansible.builtin.include_role: + name: idrac_telemetry + tasks_from: trigger_telemetry_collection.yml + when: + - hostvars['localhost']['telemetry_idrac'] is defined + - (hostvars['localhost']['telemetry_idrac'] | length > 0) # - name: Enable idrac telemetry in SN # hosts: sn_active From 42b2f476dd27198928d5930effdba1630a4339bc Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Tue, 8 Jul 2025 11:16:35 +0530 Subject: [PATCH 12/76] ansible host update --- telemetry/roles/telemetry_validation/tasks/add_host_goups.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml b/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml index bcd558d8ff..38b8f39945 100644 --- a/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml +++ b/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml @@ -69,6 +69,5 @@ - name: Create telemetry group ansible.builtin.add_host: hostname: "{{ item }}" - ansible_host: "{{ item }}" groups: "{{ telemetry_host_group }}" loop: "{{ telemetry_host }}" From 143782d28f6e67e77b95fb71618acbfc26d249e9 Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Tue, 8 Jul 2025 11:38:13 +0530 Subject: [PATCH 13/76] update addhost --- telemetry/roles/telemetry_validation/vars/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/telemetry/roles/telemetry_validation/vars/main.yml b/telemetry/roles/telemetry_validation/vars/main.yml index 719d969c9f..eeacb91934 100644 --- a/telemetry/roles/telemetry_validation/vars/main.yml +++ b/telemetry/roles/telemetry_validation/vars/main.yml @@ -123,7 +123,7 @@ telemetry_host_group: "telemetry_group" telemetry_host: >- {{ groups['kube_control_plane'] if federated_idrac_telemetry_collection | default(false) - else ['oim'] + else ['localhost'] }} # Usage: include_high_availability_config.yml high_availability_config_path: "{{ input_project_dir }}/high_availability_config.yml" From f9d4389f217baf153c0eb84f65ac5e65f972c706 Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Tue, 8 Jul 2025 12:02:01 +0530 Subject: [PATCH 14/76] host update --- telemetry/roles/telemetry_validation/tasks/add_host_goups.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml b/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml index 38b8f39945..0bf6c55595 100644 --- a/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml +++ b/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml @@ -70,4 +70,5 @@ ansible.builtin.add_host: hostname: "{{ item }}" groups: "{{ telemetry_host_group }}" + ansible_connection: "{{ 'local' if item == 'localhost' else 'ssh' }}" loop: "{{ telemetry_host }}" From 27a04c3a2da9cd60cab92720b40efdf40bff82f0 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Thu, 10 Jul 2025 14:44:20 +0530 Subject: [PATCH 15/76] update telemetry restart --- .../templates/idrac_telemetry_receiver_init.sh.j2 | 11 +++++++++-- .../roles/deploy_containers/pcs/vars/main.yml | 2 +- .../tasks/trigger_telemetry_collection.yml | 14 ++++++++------ 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/prepare_oim/roles/deploy_containers/idrac_telemetry/templates/idrac_telemetry_receiver_init.sh.j2 b/prepare_oim/roles/deploy_containers/idrac_telemetry/templates/idrac_telemetry_receiver_init.sh.j2 index bf9bbfd337..b278170b59 100644 --- a/prepare_oim/roles/deploy_containers/idrac_telemetry/templates/idrac_telemetry_receiver_init.sh.j2 +++ b/prepare_oim/roles/deploy_containers/idrac_telemetry/templates/idrac_telemetry_receiver_init.sh.j2 @@ -18,5 +18,12 @@ # Script to initialize idrac-telemetry-receiver -exec /go/src/github.com/telemetry-reference-tools/scripts/example/idrac-telemetry-receiver.sh -nohup go run cmd/redfishread/redfishread.go & +go run cmd/dbdiscauth/dbdiscauth.go & +PID1=$! +go run cmd/configui/configui.go & +PID2=$! +go run cmd/redfishread/redfishread.go & +PID3=$! + +# Wait for all processes +wait $PID1 $PID2 $PID3 diff --git a/prepare_oim/roles/deploy_containers/pcs/vars/main.yml b/prepare_oim/roles/deploy_containers/pcs/vars/main.yml index 1dd93646ff..1d097be9dc 100644 --- a/prepare_oim/roles/deploy_containers/pcs/vars/main.yml +++ b/prepare_oim/roles/deploy_containers/pcs/vars/main.yml @@ -76,7 +76,7 @@ stop_interval: "0s" stop_timeout: "60s" migration_threshold: 0 -ha_migration_threshold: 1 +ha_migration_threshold: 3 failure_timeout: "60s" pcs_group: omnia vip_group: omnia_vip diff --git a/telemetry/roles/idrac_telemetry/tasks/trigger_telemetry_collection.yml b/telemetry/roles/idrac_telemetry/tasks/trigger_telemetry_collection.yml index 05a21a2564..5217b844a5 100644 --- a/telemetry/roles/idrac_telemetry/tasks/trigger_telemetry_collection.yml +++ b/telemetry/roles/idrac_telemetry/tasks/trigger_telemetry_collection.yml @@ -13,10 +13,12 @@ # limitations under the License. --- -# Initiate iDRAC collection -- name: Initiate telemetry-collector - containers.podman.podman_container_exec: +# Restart iDRAC telemetry container +- name: Restart telemetry-collector + containers.podman.podman_container: name: "{{ idrac_telemetry_container }}" - command: "/bin/bash {{ idrac_telemetry_receiver_entry_script }}" - detach: true - when: hostvars['localhost']['idrac_telemetry_support'] + state: started + restart: true + when: + - hostvars['localhost']['idrac_telemetry_support'] + - not hostvars['localhost']['federated_idrac_telemetry_collection'] From 384c2f80e0492445c7c95ac683589a7d869a80f8 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Thu, 10 Jul 2025 16:01:28 +0530 Subject: [PATCH 16/76] update rescue --- .../tasks/trigger_telemetry_collection.yml | 16 +++++++++++----- telemetry/roles/idrac_telemetry/vars/main.yml | 2 ++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/telemetry/roles/idrac_telemetry/tasks/trigger_telemetry_collection.yml b/telemetry/roles/idrac_telemetry/tasks/trigger_telemetry_collection.yml index 5217b844a5..dff1a493fa 100644 --- a/telemetry/roles/idrac_telemetry/tasks/trigger_telemetry_collection.yml +++ b/telemetry/roles/idrac_telemetry/tasks/trigger_telemetry_collection.yml @@ -14,11 +14,17 @@ --- # Restart iDRAC telemetry container -- name: Restart telemetry-collector - containers.podman.podman_container: - name: "{{ idrac_telemetry_container }}" - state: started - restart: true +- name: Restart iDRAC telemetry container when: - hostvars['localhost']['idrac_telemetry_support'] - not hostvars['localhost']['federated_idrac_telemetry_collection'] + block: + - name: Restart telemetry-collector + containers.podman.podman_container: + name: "{{ idrac_telemetry_container }}" + state: started + restart: true + rescue: + - name: Telemetry container restart failed + ansible.builtin.fail: + msg: "{{ idrac_telemetry_restart_failure_msg.splitlines() | join(' ') }}" diff --git a/telemetry/roles/idrac_telemetry/vars/main.yml b/telemetry/roles/idrac_telemetry/vars/main.yml index 301e9f9939..7c5571f270 100644 --- a/telemetry/roles/idrac_telemetry/vars/main.yml +++ b/telemetry/roles/idrac_telemetry/vars/main.yml @@ -60,6 +60,8 @@ invalid_bmc_warning_msg: | # Usage: trigger_telemetry_collection.yml idrac_telemetry_container: "idrac_telemetry_receiver" idrac_telemetry_receiver_entry_script: "/usr/local/bin/idrac_telemetry_receiver_init.sh" +idrac_telemetry_restart_failure_msg: | + Failed to restart idrac_telemetry_receiver container. Please check the logs using the command `podman logs idrac_telemetry_receiver` and try again later. # Usage: prometheus_config_reload.yml prometheus_reload: From c9a20b16f3ef178fead9a1ba1b4539c18571f15e Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Thu, 10 Jul 2025 16:21:58 +0530 Subject: [PATCH 17/76] idrac telemetry stateful set --- .../tasks/idrac_telemetry_deployment.yml | 168 +++++++++++++++--- .../service_k8s_telemetry/tasks/main.yml | 4 +- .../tasks/read_software_config.yml | 2 +- .../templates/omnia_credential.j2 | 6 +- 4 files changed, 153 insertions(+), 27 deletions(-) diff --git a/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml b/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml index b040b3d3a9..e101b7a20f 100644 --- a/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml +++ b/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml @@ -13,16 +13,6 @@ # limitations under the License. --- -- name: Get prometheus svc IP - ansible.builtin.command: kubectl get svc "{{ prometheus_k8s_name }}" -n "{{ telemetry_namespace }}" -o=jsonpath='{.spec.clusterIP}' - changed_when: false - register: prometheus_svc_ip - -- name: Get mysqldb svc IP - ansible.builtin.command: kubectl get svc "{{ mysqldb_k8s_name }}" -n "{{ telemetry_namespace }}" -o=jsonpath='{.spec.clusterIP}' - changed_when: false - register: mysql_svc_ip - - name: Create directory for iDRAC telemetry ansible.builtin.file: path: "{{ service_cluster_idrac_telemetry_dir_path }}" @@ -72,24 +62,21 @@ msg: "{{ idrac_script_git_clone_error_msg.splitlines() | join(' ') }}" when: clone_idrac_script is failed -- name: Deploy idrac-telemetry pod +- name: idrac-telemetry StatefulSet kubernetes.core.k8s: state: present definition: apiVersion: apps/v1 - kind: Deployment + kind: StatefulSet metadata: name: "{{ idrac_telemetry_k8s_name }}" namespace: "{{ telemetry_namespace }}" - labels: - app: "{{ idrac_telemetry_k8s_name }}" spec: + serviceName: "{{ mysqldb_k8s_name }}" + replicas: "{{ statefulset_replicas }}" selector: matchLabels: app: "{{ idrac_telemetry_k8s_name }}" - replicas: 1 - strategy: - type: RollingUpdate template: metadata: labels: @@ -105,14 +92,42 @@ - ip: "127.0.0.1" hostnames: - "activemq" - - ip: "{{ prometheus_svc_ip.stdout }}" - hostnames: - - "prometheus" - - ip: "{{ mysql_svc_ip.stdout }}" + # - ip: "{{ prometheus_svc_ip.stdout }}" + # hostnames: + # - "prometheus" + - ip: "127.0.0.1" hostnames: - "mysqldb" containers: + - name: mysqldb + image: "{{ mysql_image }}" + imagePullPolicy: IfNotPresent + volumeMounts: + - name: mysqldb-pvc + mountPath: /var/lib/mysql/ + env: + - name: MYSQL_DATABASE + value: "{{ mysqldb_name }}" + - name: MYSQL_USER + valueFrom: + secretKeyRef: + name: "{{ mysqldb_secrets_name }}" + key: mysqldb_user + - name: MYSQL_PASSWORD + valueFrom: + secretKeyRef: + name: "{{ mysqldb_secrets_name }}" + key: mysqldb_password + - name: MYSQL_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: "{{ mysqldb_secrets_name }}" + key: mysqldb_root_password + ports: + - containerPort: "{{ mysqldb_container_port1 }}" + - containerPort: "{{ mysqldb_container_port2 }}" + - name: activemq image: "{{ activemq_image }}" imagePullPolicy: "IfNotPresent" @@ -173,3 +188,114 @@ - "/bin/sh" - "-c" args: ["go run cmd/prometheuspump/prometheuspump.go"] + + volumeClaimTemplates: + - metadata: + name: mysqldb-pvc + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: "{{ mysqldb_storage_size | default('10Gi') }}" + +# - name: Deploy idrac-telemetry pod +# kubernetes.core.k8s: +# state: present +# definition: +# apiVersion: apps/v1 +# kind: Deployment +# metadata: +# name: "{{ idrac_telemetry_k8s_name }}" +# namespace: "{{ telemetry_namespace }}" +# labels: +# app: "{{ idrac_telemetry_k8s_name }}" +# spec: +# selector: +# matchLabels: +# app: "{{ idrac_telemetry_k8s_name }}" +# replicas: 1 +# strategy: +# type: RollingUpdate +# template: +# metadata: +# labels: +# app: "{{ idrac_telemetry_k8s_name }}" +# spec: +# volumes: +# - name: telemetry-reference-tools +# hostPath: +# path: "{{ idrac_telemetry_reference_git_clone_path }}" +# type: Directory + +# hostAliases: +# - ip: "127.0.0.1" +# hostnames: +# - "activemq" +# # - ip: "{{ prometheus_svc_ip.stdout }}" +# # hostnames: +# # - "prometheus" +# - ip: "127.0.0.1" +# hostnames: +# - "mysqldb" + +# containers: +# - name: activemq +# image: "{{ activemq_image }}" +# imagePullPolicy: "IfNotPresent" +# ports: +# - containerPort: "{{ activemq_http_port_1 }}" +# - containerPort: "{{ activemq_http_port_2 }}" + +# - name: idrac-telemetry-receiver +# image: "{{ go_image }}" +# imagePullPolicy: "IfNotPresent" +# volumeMounts: +# - mountPath: /go/src/github.com/telemetry-reference-tools +# name: telemetry-reference-tools +# workingDir: /go/src/github.com/telemetry-reference-tools +# env: +# - name: MESSAGEBUS_HOST +# value: activemq +# - name: MESSAGEBUS_PORT +# value: "{{ messagebus_http_port | string }}" +# - name: CONFIGUI_HTTP_PORT +# value: "{{ configui_http_port | string }}" +# - name: MYSQL_DATABASE +# value: "{{ mysqldb_name }}" +# - name: MYSQL_USER +# valueFrom: +# secretKeyRef: +# name: "{{ mysqldb_secrets_name }}" +# key: mysqldb_user +# - name: MYSQL_PASSWORD +# valueFrom: +# secretKeyRef: +# name: "{{ mysqldb_secrets_name }}" +# key: mysqldb_password +# - name: MYSQL_HOST +# value: mysqldb +# - name: MYSQL_HOST_PORT +# value: "{{ mysql_svc_port.stdout | string }}" +# command: +# - "/bin/sh" +# - "-c" +# args: ["./scripts/example/idrac-telemetry-receiver.sh"] + +# - name: prometheus-pump +# image: "{{ go_image }}" +# imagePullPolicy: "IfNotPresent" +# volumeMounts: +# - mountPath: /go/src/github.com/telemetry-reference-tools +# name: telemetry-reference-tools +# workingDir: /go/src/github.com/telemetry-reference-tools +# env: +# - name: MESSAGEBUS_HOST +# value: activemq +# - name: MESSAGEBUS_PORT +# value: "{{ messagebus_http_port | string }}" +# - name: PROMETHEUSDB_SERVER +# value: prometheus +# command: +# - "/bin/sh" +# - "-c" +# args: ["go run cmd/prometheuspump/prometheuspump.go"] diff --git a/telemetry/roles/service_k8s_telemetry/tasks/main.yml b/telemetry/roles/service_k8s_telemetry/tasks/main.yml index dd8eee92a8..622d691d1d 100644 --- a/telemetry/roles/service_k8s_telemetry/tasks/main.yml +++ b/telemetry/roles/service_k8s_telemetry/tasks/main.yml @@ -24,8 +24,8 @@ - name: Configure k8s secrets ansible.builtin.include_tasks: secrets_creation.yml - - name: Deployment of mysqldb pod - ansible.builtin.include_tasks: mysqldb_deployment.yml + # - name: Deployment of mysqldb pod + # ansible.builtin.include_tasks: mysqldb_deployment.yml - name: Deployment of prometheus pod ansible.builtin.include_tasks: prometheus_deployment.yml diff --git a/telemetry/roles/telemetry_validation/tasks/read_software_config.yml b/telemetry/roles/telemetry_validation/tasks/read_software_config.yml index 685aad8be3..696ff3d796 100644 --- a/telemetry/roles/telemetry_validation/tasks/read_software_config.yml +++ b/telemetry/roles/telemetry_validation/tasks/read_software_config.yml @@ -105,7 +105,7 @@ python_package_name: >- {{ telemetry_packages['service_k8s']['cluster'] | selectattr('type', 'equalto', 'rpm') - | selectattr('package', 'search', 'python3') + | selectattr('package', 'search', '^python3\.\d+$') | map(attribute='package') | join }} k8s_pip_packages: >- {{ telemetry_packages['service_k8s']['cluster'] diff --git a/utils/credential_utility/roles/create_config/templates/omnia_credential.j2 b/utils/credential_utility/roles/create_config/templates/omnia_credential.j2 index 10a8d38030..b05c1acdbc 100644 --- a/utils/credential_utility/roles/create_config/templates/omnia_credential.j2 +++ b/utils/credential_utility/roles/create_config/templates/omnia_credential.j2 @@ -13,7 +13,7 @@ pulp_password: "" docker_username: "" docker_password: "" -#Omnia credentials +# Omnia credentials slurm_db_password: "" # Security credentials @@ -25,11 +25,11 @@ openldap_monitor_password: "" kerberos_admin_password: "" directory_manager_password: "" -# idrac telemetry +# iDrac Telemetry credentials mysqldb_user: "" mysqldb_password: "" mysqldb_root_password: "" -#visualization +# Visualization credentials grafana_username: "" grafana_password: "" From b805ecdf86fdc98338d99f8d90f1c8045815bbc4 Mon Sep 17 00:00:00 2001 From: mcas Date: Thu, 10 Jul 2025 16:25:55 +0530 Subject: [PATCH 18/76] adding the base code for powerscale plugins --- .../rhel/9.6/csi_driver_powerscale.json | 84 +++++++ .../rhel/9.6/csi_driver_powerscale1.json | 84 +++++++ .../files/empty_certificate_template.yml | 8 + .../csi_powerscale_config_certificate.yml | 35 +++ .../tasks/csi_powerscale_config_secret.yml | 32 +++ .../tasks/csi_powerscale_image_pull.yml | 72 ++++++ .../tasks/csi_powerscale_install.yml | 72 ++++++ .../tasks/csi_powerscale_prereq.yml | 206 ++++++++++++++++++ .../k8s_csi_powerscale_plugin/tasks/main.yml | 34 +++ .../templates/ps_storage_class.j2 | 13 ++ .../k8s_csi_powerscale_plugin/vars/main.yml | 59 +++++ 11 files changed, 699 insertions(+) create mode 100644 input/config/rhel/9.6/csi_driver_powerscale.json create mode 100644 input/config/rhel/9.6/csi_driver_powerscale1.json create mode 100644 scheduler/roles/k8s_csi_powerscale_plugin/files/empty_certificate_template.yml create mode 100644 scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_certificate.yml create mode 100644 scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml create mode 100644 scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_image_pull.yml create mode 100644 scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_install.yml create mode 100644 scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_prereq.yml create mode 100644 scheduler/roles/k8s_csi_powerscale_plugin/tasks/main.yml create mode 100644 scheduler/roles/k8s_csi_powerscale_plugin/templates/ps_storage_class.j2 create mode 100644 scheduler/roles/k8s_csi_powerscale_plugin/vars/main.yml diff --git a/input/config/rhel/9.6/csi_driver_powerscale.json b/input/config/rhel/9.6/csi_driver_powerscale.json new file mode 100644 index 0000000000..9a087c5382 --- /dev/null +++ b/input/config/rhel/9.6/csi_driver_powerscale.json @@ -0,0 +1,84 @@ +{ + "csi_driver_powerscale": { + "cluster": [ + { + "package": "csi-powerscale", + "url": "https://github.com/dell/csi-powerscale.git", + "type": "git", + "version": "v2.14.0" + }, + { + "package": "external-snapshotter", + "url": "https://github.com/kubernetes-csi/external-snapshotter.git", + "type": "git", + "version": "v8.3.0" + }, + { + "package": "helm-charts", + "url": "https://github.com/dell/helm-charts.git", + "type": "git", + "version": "csi-isilon-2.15.0" + }, + { + "package": "docker.io/dellemc/csi-isilon", + "tag": "v2.15.0", + "type": "image" + }, + { + "package": "registry.k8s.io/sig-storage/csi-attacher", + "tag": "v4.9.0", + "type": "image" + }, + { + "package": "registry.k8s.io/sig-storage/csi-provisioner", + "tag": "v5.3.0", + "type": "image" + }, + { + "package": "registry.k8s.io/sig-storage/csi-snapshotter", + "tag": "v8.3.0", + "type": "image" + }, + { + "package": "registry.k8s.io/sig-storage/csi-resizer", + "tag": "v1.14.0", + "type": "image" + }, + { + "package": "registry.k8s.io/sig-storage/csi-node-driver-registrar", + "tag": "v2.14.0", + "type": "image" + }, + { + "package": "registry.k8s.io/sig-storage/csi-external-health-monitor-controller", + "tag": "v0.15.0", + "type": "image" + }, + { + "package": "docker.io/dellemc/dell-csi-replicator", + "tag": "v1.13.0", + "type": "image" + }, + { + "package": "docker.io/dellemc/podmon", + "tag": "v1.14.0", + "type": "image" + }, + { + "package": "docker.io/dellemc/csm-authorization-sidecar", + "tag": "v2.3.0", + "type": "image" + }, + { + "package": "docker.io/dellemc/csi-metadata-retriever", + "tag": "v1.12.0", + "type": "image" + }, + { + "package": "docker.io/dellemc/csm-encryption", + "tag": "v0.6.0", + "type": "image" + } + ] + } +} diff --git a/input/config/rhel/9.6/csi_driver_powerscale1.json b/input/config/rhel/9.6/csi_driver_powerscale1.json new file mode 100644 index 0000000000..5abb7404f2 --- /dev/null +++ b/input/config/rhel/9.6/csi_driver_powerscale1.json @@ -0,0 +1,84 @@ +{ + "csi_driver_powerscale": { + "cluster": [ + { + "package": "csi-powerscale", + "url": "https://github.com/dell/csi-powerscale.git", + "type": "git", + "version": "v2.14.0" + }, + { + "package": "external-snapshotter", + "url": "https://github.com/kubernetes-csi/external-snapshotter.git", + "type": "git", + "version": "v8.3.0" + }, + { + "package": "helm-charts", + "url": "https://github.com/dell/helm-charts.git", + "type": "git", + "version": "csi-isilon-2.14.0" + }, + { + "package": "docker.io/dellemc/csi-isilon", + "tag": "v2.14.0", + "type": "image" + }, + { + "package": "registry.k8s.io/sig-storage/csi-attacher", + "tag": "v4.8.1", + "type": "image" + }, + { + "package": "registry.k8s.io/sig-storage/csi-provisioner", + "tag": "v5.2.0", + "type": "image" + }, + { + "package": "registry.k8s.io/sig-storage/csi-snapshotter", + "tag": "v8.2.1", + "type": "image" + }, + { + "package": "registry.k8s.io/sig-storage/csi-resizer", + "tag": "v1.13.2", + "type": "image" + }, + { + "package": "registry.k8s.io/sig-storage/csi-node-driver-registrar", + "tag": "v2.13.0", + "type": "image" + }, + { + "package": "registry.k8s.io/sig-storage/csi-external-health-monitor-controller", + "tag": "v0.14.0", + "type": "image" + }, + { + "package": "docker.io/dellemc/dell-csi-replicator", + "tag": "v1.12.0", + "type": "image" + }, + { + "package": "docker.io/dellemc/podmon", + "tag": "v1.13.0", + "type": "image" + }, + { + "package": "docker.io/dellemc/csm-authorization-sidecar", + "tag": "v2.2.0", + "type": "image" + }, + { + "package": "docker.io/dellemc/csi-metadata-retriever", + "tag": "v1.11.0", + "type": "image" + }, + { + "package": "docker.io/dellemc/csm-encryption", + "tag": "v0.6.0", + "type": "image" + } + ] + } +} diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/files/empty_certificate_template.yml b/scheduler/roles/k8s_csi_powerscale_plugin/files/empty_certificate_template.yml new file mode 100644 index 0000000000..2462770283 --- /dev/null +++ b/scheduler/roles/k8s_csi_powerscale_plugin/files/empty_certificate_template.yml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Secret +metadata: + name: isilon-certs-0 + namespace: isilon +type: Opaque +data: + cert-0: "" diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_certificate.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_certificate.yml new file mode 100644 index 0000000000..0a058067d7 --- /dev/null +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_certificate.yml @@ -0,0 +1,35 @@ +# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Empty certificate creation + block: + - name: Copy empty certificate yaml file + ansible.builtin.copy: + dest: "{{ empty_certificate_path }}" + src: "{{ empty_certificate_template_path }}" + mode: "{{ permission_644 }}" + + - name: Apply the Secret YAML to Kubernetes + block: + - name: Create empty certificate secret + ansible.builtin.command: + cmd: "kubectl apply -f {{ empty_certificate_path }}" + register: result + changed_when: result.changed + + rescue: + - name: Empty certificate secret creation failure + ansible.builtin.fail: + msg: "{{ fail_msg_empty_certificate }}" diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml new file mode 100644 index 0000000000..ad6cf6d85f --- /dev/null +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml @@ -0,0 +1,32 @@ +# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Remove existing isilon-creds secret if already present in isilon namespace + ansible.builtin.command: kubectl delete secret isilon-creds -n {{ powerscale_ns }} + failed_when: false + changed_when: false + +- name: Create isilon-creds secret in isilon namespace + ansible.builtin.command: kubectl create secret generic isilon-creds -n {{ powerscale_ns }} --from-file=config="{{ csi_powerscale_secret_path }}" + failed_when: false + register: apply_secret + changed_when: apply_secret.changed + +# Remove the secret file +- name: Remove secret file + ansible.builtin.file: + path: "{{ csi_powerscale_secret_path }}" + state: absent + failed_when: false diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_image_pull.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_image_pull.yml new file mode 100644 index 0000000000..8298544889 --- /dev/null +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_image_pull.yml @@ -0,0 +1,72 @@ +# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Set empty image list + ansible.builtin.set_fact: + csi_powerscale_image_versions: [] + +- name: Fetch and store image versions + ansible.builtin.set_fact: + csi_powerscale_image_versions: "{{ csi_powerscale_image_versions + [item.package + ':' + item.tag] }}" + loop: "{{ hostvars['localhost']['csi_driver_powerscale_packages_json']['csi_driver_powerscale']['cluster'] }}" + when: item.type == 'image' + +#- name: Pull csi powerscale images + # ansible.builtin.command: nerdctl pull {{ item }} + #with_items: "{{ csi_powerscale_image_versions }}" + #changed_when: true + #failed_when: false + #environment: + # http_proxy: "{{ hostvars['localhost']['http_proxy'] }}" + #https_proxy: "{{ hostvars['localhost']['https_proxy'] }}" + #no_proxy: "{{ hostvars['localhost']['oim_hostname'] }},{{ hostvars['localhost']['admin_nic_ip'] }}" + +# Pulling images from pulp - always,partial, never +- name: Pull K8s services docker images from pulp + ansible.builtin.command: nerdctl pull {{ item }} + with_items: "{{ csi_powerscale_image_versions }}" + changed_when: true + failed_when: false + when: + - hostvars['localhost']['k8s_offline_install'] + +# Pulling images directly when k8s_offline_install: false, enable_routed_internet: true + +- name: Pull K8s services docker images from proxy + ansible.builtin.command: nerdctl pull {{ item }} + with_items: "{{ csi_powerscale_image_versions }}" + changed_when: true + failed_when: false + environment: + http_proxy: "http://{{ hostvars['localhost']['admin_nic_ip'] }}:{{ proxy_port }}" + https_proxy: "http://{{ hostvars['localhost']['admin_nic_ip'] }}:{{ proxy_port }}" + no_proxy: "localhost,127.0.0.1,{{ hostvars['localhost']['admin_nic_ip'] }},{{ hostvars['localhost']['oim_hostname'] }}" # noqa: yaml[line-length] + when: + - not hostvars['localhost']['k8s_offline_install'] + - hostvars['localhost']['enable_routed_internet'] + +# Pulling images directly when k8s_offline_install: false, enable_routed_internet: false + +- name: Pull K8s services docker images from dedicated internet + ansible.builtin.command: nerdctl pull {{ item }} + with_items: "{{ csi_powerscale_image_versions }}" + changed_when: true + failed_when: false + environment: + no_proxy: "localhost,127.0.0.1,{{ hostvars['localhost']['admin_nic_ip'] }},{{ hostvars['localhost']['oim_hostname'] }}" # noqa: yaml[line-length] + when: + - not hostvars['localhost']['k8s_offline_install'] + - not hostvars['localhost']['enable_routed_internet'] + diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_install.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_install.yml new file mode 100644 index 0000000000..da39443b04 --- /dev/null +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_install.yml @@ -0,0 +1,72 @@ +# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Deploy external-snapshotter config CRDs + ansible.builtin.command: + cmd: "kubectl apply -f client/config/crd/" + chdir: "{{ csi_powerscale_path }}/csi-powerscale/external-snapshotter/" + register: install_result + failed_when: false + changed_when: install_result.changed + +- name: Deploy external-snapshotter snapshot-controller CRDs + ansible.builtin.command: + cmd: "kubectl apply -f deploy/kubernetes/snapshot-controller/" + chdir: "{{ csi_powerscale_path }}/csi-powerscale/external-snapshotter/" + register: install_result + failed_when: false + changed_when: install_result.changed + +- name: Execute CSI driver installation script with timeout of seconds {{ async_time }} + ansible.builtin.command: + cmd: "./csi-install.sh --namespace {{ isilon_ns }} --values {{ csi_powerscale_values_path }}" + chdir: "{{ csi_powerscale_path }}/{{ csi_powerscale_git | regex_replace('\\.tar\\.gz$', '') }}/dell-csi-helm-installer" + register: install_result + async: "{{ async_time }}" + poll: "{{ poll_time }}" + failed_when: false + changed_when: install_result.changed + +- name: Wait for csi pods to be in Running state + ansible.builtin.shell: > + set -o pipefail && \ + kubectl get pod -n {{ isilon_ns }} --no-headers | grep {{ powerscale_pod_indcator }} | grep -v "Running" + register: isilon_non_running_pods + failed_when: false + changed_when: false + until: isilon_non_running_pods.stdout_lines | length == 0 + retries: "{{ max_attempts }}" + delay: "{{ wait_time }}" + +- name: Verify csi driver installation + ansible.builtin.pause: + seconds: "{{ warning_wait_time }}" + prompt: "{{ fail_msg_csi_powerscale_driver }}" + when: isilon_non_running_pods.stdout_lines | length > 0 + +- name: Create powerscale storage class if deployment was successful + ansible.builtin.command: + cmd: "kubectl apply -f ps_storage_class.yml" + chdir: "{{ csi_powerscale_path }}" + register: sc_command_result + failed_when: false + changed_when: sc_command_result.changed + when: isilon_non_running_pods.stdout_lines | length == 0 + +- name: Remove ps_storage_class.yml file + ansible.builtin.file: + path: "{{ csi_powerscale_path }}/ps_storage_class.yml" + state: absent + force: true diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_prereq.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_prereq.yml new file mode 100644 index 0000000000..79efeabaca --- /dev/null +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_prereq.yml @@ -0,0 +1,206 @@ +# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Check Kubernetes is deployed on cluster +- name: Verify Kubernetes is deployed on cluster + ansible.builtin.command: kubectl get node + register: k8s_return_code + changed_when: false + failed_when: false + +- name: Fail if Kubernetes is not deployed + ansible.builtin.assert: + that: + - k8s_return_code.rc == 0 + fail_msg: "{{ k8s_not_deployed }}" + +# Check if powerscale is already deployed +- name: Verify powerscale is deployed on cluster + ansible.builtin.shell: > + set -o pipefail && \ + kubectl get pod -n {{ powerscale_ns }} --no-headers | grep {{ powerscale_pod_indcator }} + register: powerscale_precheck + changed_when: false + failed_when: false + +- name: Set flag if powerscale is already deployed + ansible.builtin.set_fact: + powerscale_already_deployed: "{{ powerscale_precheck.rc == 0 }}" + +- name: Pause to notify powerscale already deployed + ansible.builtin.pause: + seconds: "{{ warning_wait_time }}" + prompt: "{{ warning_msg_already_deployed }}" + when: powerscale_already_deployed + +- name: Proceed prereq if powerscale not already deployed + when: not powerscale_already_deployed + block: + # Check helm is deployed on cluster + - name: Verify helm is deployed on cluster + ansible.builtin.command: helm + register: helm_return_code + changed_when: false + failed_when: false + + - name: Fail if helm is not deployed + ansible.builtin.assert: + that: + - helm_return_code.rc == 0 + fail_msg: "{{ helm_not_deployed }}" + + - name: Remove /opt/omnia/csi-driver-powerscale directory if already present + ansible.builtin.file: + path: "{{ csi_powerscale_path }}" + state: absent + + - name: Create csi-driver-powerscale directory under /opt/omnia + ansible.builtin.file: + path: "{{ csi_powerscale_path }}" + mode: "{{ permission_644 }}" + state: directory + owner: "{{ owner_value }}" + group: "{{ group_value }}" + + - name: Check if secret file is encrypted + ansible.builtin.command: cat "{{ hostvars['localhost']['csi_powerscale_driver_secret_file_path'] }}" + changed_when: false + register: config_content + connection: local + delegate_to: localhost + + - name: Decrpyt secret file + ansible.builtin.command: >- + ansible-vault decrypt {{ hostvars['localhost']['csi_powerscale_driver_secret_file_path'] }} + --vault-password-file {{ role_path }}/files/{{ csi_powerscale_secret_vaultname }} + when: "'$ANSIBLE_VAULT;' in config_content.stdout" + changed_when: true + connection: local + delegate_to: localhost + + # Copy secret file to /opt/omnia + - name: Copy secret file + ansible.builtin.copy: + src: "{{ hostvars['localhost']['csi_powerscale_driver_secret_file_path'] }}" + dest: "{{ csi_powerscale_secret_path }}" + owner: "{{ owner_value }}" + group: "{{ group_value }}" + mode: "{{ permission_644 }}" + + # check if powerscale is pininging by reading endpoint value from secrets.yaml file + - name: Load values.yaml file + ansible.builtin.include_vars: + file: "{{ hostvars['localhost']['csi_powerscale_driver_values_file_path'] }}" + name: csi_powerscale_values_file + + - name: Load secret file for input validation + ansible.builtin.include_vars: + file: "{{ hostvars['localhost']['csi_powerscale_driver_secret_file_path'] }}" + name: clusters + no_log: true + + - name: Extract PowerScale endpoint IP or Host from loaded secret data + ansible.builtin.set_fact: + powerscale_host: "{{ clusters.isilonClusters[0].endpoint | regex_replace('https?://', '') | regex_replace('/.*', '') }}" + + - name: Check if the extracted PowerScale IP or Host is reachable + ansible.builtin.command: + cmd: "ping -c 1 {{ powerscale_host }}" # Replace {{ power_scale_host }} with your actual host variable + register: ping_result + ignore_errors: true # Continue even if the ping fail + changed_when: false + + - name: Print ping result or error if ping fails + ansible.builtin.debug: + msg: > + {% if ping_result.rc == 0 %} + Powerscale Host reachable! Output: {{ ping_result.stdout }} + {% else %} + Powerscale Host not reachable. Error: {{ ping_result.stderr }} + {% endif %} + + - name: Encrypt secret file + ansible.builtin.command: >- + ansible-vault encrypt {{ hostvars['localhost']['csi_powerscale_driver_secret_file_path'] }} + --vault-password-file {{ role_path }}/files/{{ csi_powerscale_secret_vaultname }} + changed_when: false + connection: local + delegate_to: localhost + + # Copy values file to /opt/omnia + - name: Copy values file + ansible.builtin.copy: + src: "{{ hostvars['localhost']['csi_powerscale_driver_values_file_path'] }}" + dest: "{{ csi_powerscale_values_path }}" + owner: "{{ owner_value }}" + group: "{{ group_value }}" + mode: "{{ permission_644 }}" + + - name: Get dependencies from local repo + block: + - name: Get csi-powerscale git tar + ansible.builtin.get_url: + url: "{{ hostvars['localhost']['offline_git_path'] }}/{{ csi_powerscale_git }}" + dest: "{{ csi_powerscale_path }}/{{ csi_powerscale_git }}" + mode: "{{ permission_644 }}" + + - name: Extract csi-powerscale tar file + ansible.builtin.unarchive: + src: "{{ csi_powerscale_path }}/{{ csi_powerscale_git }}" + dest: "{{ csi_powerscale_path }}" + remote_src: true + + - name: Get dell/helm-charts git tar + ansible.builtin.get_url: + url: "{{ hostvars['localhost']['offline_git_path'] }}/{{ helm_charts_git }}" + dest: "{{ csi_powerscale_path }}/csi-powerscale/{{ helm_charts_git }}" + mode: "{{ permission_644 }}" + + - name: Get external-snapshotter git tar + ansible.builtin.get_url: + url: "{{ hostvars['localhost']['offline_git_path'] }}/{{ external_snapshotter_git }}" + dest: "{{ csi_powerscale_path }}/csi-powerscale/{{ external_snapshotter_git }}" + mode: "{{ permission_644 }}" + rescue: + - name: Handle dependency failure + ansible.builtin.fail: + msg: "{{ fail_msg_download }}" + + - name: Extract dell/helm-charts tar file under csi-powerscale directory + ansible.builtin.unarchive: + src: "{{ csi_powerscale_path }}/csi-powerscale/{{ helm_charts_git }}" + dest: "{{ csi_powerscale_path }}/csi-powerscale" + remote_src: true + + - name: Extract external snapshotter tar file under csi-powerscale directory + ansible.builtin.unarchive: + src: "{{ csi_powerscale_path }}/csi-powerscale/{{ external_snapshotter_git }}" + dest: "{{ csi_powerscale_path }}/csi-powerscale" + remote_src: true + + - name: Transfer storage class template to kube_control_plane + ansible.builtin.template: + src: ps_storage_class.j2 + dest: "{{ csi_powerscale_path }}/ps_storage_class.yml" + owner: "{{ owner_value }}" + group: "{{ group_value }}" + mode: "{{ permission_644 }}" + + - name: Create isilon namespace + ansible.builtin.command: + cmd: "kubectl create ns isilon" + register: command_result + failed_when: false + changed_when: command_result.changed diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/main.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/main.yml new file mode 100644 index 0000000000..2eb47e2911 --- /dev/null +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/main.yml @@ -0,0 +1,34 @@ +# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: CSI powerscale driver installation + when: + - hostvars['localhost']['csi_driver_powerscale_precheck_pass'] + - hostvars['localhost']['omnia_config']['k8s_offline_install'] + block: + - name: Fetch required files to kube control plane + ansible.builtin.include_tasks: csi_powerscale_prereq.yml + + - name: Deploy powerscale if not already deployed + when: not powerscale_already_deployed + block: + - name: Configure secret + ansible.builtin.include_tasks: csi_powerscale_config_secret.yml + + - name: Configure certificate + ansible.builtin.include_tasks: csi_powerscale_config_certificate.yml + + - name: Install powerscale driver + ansible.builtin.include_tasks: csi_powerscale_install.yml diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/templates/ps_storage_class.j2 b/scheduler/roles/k8s_csi_powerscale_plugin/templates/ps_storage_class.j2 new file mode 100644 index 0000000000..a8158d410b --- /dev/null +++ b/scheduler/roles/k8s_csi_powerscale_plugin/templates/ps_storage_class.j2 @@ -0,0 +1,13 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: ps01 +provisioner: csi-isilon.dellemc.com +reclaimPolicy: Delete +allowVolumeExpansion: true +volumeBindingMode: Immediate +parameters: + AccessZone: {{ ps_access_zone }} + Isipath: {{ ps_isipath }} + RootClientEnabled: "true" + csi.storage.k8s.io/fstype: "nfs" diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/vars/main.yml b/scheduler/roles/k8s_csi_powerscale_plugin/vars/main.yml new file mode 100644 index 0000000000..3b37fcef43 --- /dev/null +++ b/scheduler/roles/k8s_csi_powerscale_plugin/vars/main.yml @@ -0,0 +1,59 @@ +# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# Usage: csi_powerscale_config_certificate.yml +empty_certificate_path: "{{ csi_powerscale_path }}/empty_isilon-certs.yaml" +fail_msg_empty_certificate: "Failed. Unable to create empty certificate." +empty_certificate_template_path: "{{ role_path }}/files/empty_certificate_template.yml" + +# Usage: csi_powerscale_config_secret.yml, csi_powerscale_prereq.yml +csi_powerscale_secret_path: "{{ csi_powerscale_path }}/csi_powerscale_secret.yaml" + +# Usage: csi_powerscale_install.yml, csi_powerscale_prereq.yml +csi_powerscale_path: "/opt/omnia/csi-driver-powerscale" + +# Usage: csi_powerscale_install.yml, csi_powerscale_prereq.yml +csi_powerscale_git: "csi-powerscale.tar.gz" + +# Usage: csi_powerscale_install.yml +fail_msg_csi_powerscale_driver: "Error. Deployment of csi driver was not successful. Please review the deployment. Run playbook with -vvv for more details" +pass_msg_csi_powerscale_driver: "CSI Powerscale driver installation completed successfully." +wait_time: 10 +warning_wait_time: 30 +max_attempts: 5 +isilon_ns: "isilon" +async_time: 180 +poll_time: 10 + +# Usage: csi_powerscale_prereq.yml +permission_644: "0644" +owner_value: "root" +group_value: "root" +powerscale_ns: "isilon" +powerscale_pod_indcator: "isilon-" +csi_powerscale_values_path: "{{ csi_powerscale_path }}/values.yaml" +fail_msg_download: "Failed to get required dependencies. Make sure to verify entries in csi_driver_powerscale.json and run local_repo.yml first." +helm_charts_git: "helm-charts.tar.gz" +external_snapshotter_git: "external-snapshotter.tar.gz" +k8s_not_deployed: "Failed, Kubernetes is not deployed on the cluster. Run omnia.yml with k8s entry in software_config.json to install kubernetes first." +helm_not_deployed: "Failed, Helm is not deployed on the cluster." +csi_powerscale_secret_vaultname: ".csi_powerscale_secret_vault" +vault_key_permission: "0644" +warning_msg_already_deployed: "Powerscale will not be deployed. Existing powerscale deployment is already present on the cluster. + Please remove the existing powerscale deployment first using steps mentioned in omnia document and rerun playbook to install powerscale." + +# Usage: template ps_storage_class.j2 +ps_isipath: "{{ hostvars['localhost']['csi_powerscale_values_file']['isiPath'] }}" +ps_access_zone: "{{ hostvars['localhost']['csi_powerscale_values_file']['isiAccessZone'] }}" From fdf7737a58092407ea514490ac10937a1f56429d Mon Sep 17 00:00:00 2001 From: mcas Date: Thu, 10 Jul 2025 19:35:30 +0530 Subject: [PATCH 19/76] initial chnages --- .../input_validation/schema/omnia_config.json | 48 +++++++++++ .../validation_flows/common_validation.py | 26 +++++- .../rhel/9.6/csi_driver_powerscale.json | 24 +++--- .../rhel/9.6/csi_driver_powerscale1.json | 84 ------------------- input/omnia_config.yml | 4 + .../cluster_validation/tasks/set_facts.yml | 2 + .../roles/cluster_validation/vars/main.yml | 4 +- 7 files changed, 93 insertions(+), 99 deletions(-) delete mode 100644 input/config/rhel/9.6/csi_driver_powerscale1.json diff --git a/common/library/module_utils/input_validation/schema/omnia_config.json b/common/library/module_utils/input_validation/schema/omnia_config.json index c2bd0f58eb..ee063a12a8 100644 --- a/common/library/module_utils/input_validation/schema/omnia_config.json +++ b/common/library/module_utils/input_validation/schema/omnia_config.json @@ -59,6 +59,16 @@ "k8s_offline_install": { "type": "boolean", "description": "Whether to pull packages/images from local repo." + }, + "csi_powerscale_driver_secret_file_path": { + "description": "Absolute file path for the secret.yaml file.", + "type": "string", + "pattern": "^$|^/?([a-zA-Z0-9_-]+(/?[a-zA-Z0-9_-]+)*)$" + }, + "csi_powerscale_driver_values_file_path": { + "description": "File path for the values.yaml file.", + "type": "string", + "pattern": "^$|^/?([a-zA-Z0-9_-]+(/?[a-zA-Z0-9_-]+)*)$" } }, "required": [ @@ -74,6 +84,20 @@ "then": { "required": ["topology_manager_scope"] } + }, + { + "if": { + "properties": { + "csi_powerscale_driver_secret_file_path": { + "type": "string", + "minLength": 1 + } + }, + "required": ["csi_powerscale_driver_secret_file_path"] + }, + "then": { + "required": ["csi_powerscale_driver_values_file_path"] + } } ] } @@ -122,6 +146,16 @@ "k8s_offline_install": { "type": "boolean", "description": "Whether to pull packages/images from local repo." + }, + "csi_powerscale_driver_secret_file_path": { + "description": "Absolute file path for the secret.yaml file.", + "type": "string", + "pattern": "^$|^/?([a-zA-Z0-9_-]+(/?[a-zA-Z0-9_-]+)*)$" + }, + "csi_powerscale_driver_values_file_path": { + "description": "File path for the values.yaml file.", + "type": "string", + "pattern": "^$|^/?([a-zA-Z0-9_-]+(/?[a-zA-Z0-9_-]+)*)$" } }, "required": [ @@ -137,6 +171,20 @@ "then": { "required": ["topology_manager_scope"] } + }, + { + "if": { + "properties": { + "csi_powerscale_driver_secret_file_path": { + "type": "string", + "minLength": 1 + } + }, + "required": ["csi_powerscale_driver_secret_file_path"] + }, + "then": { + "required": ["csi_powerscale_driver_values_file_path"] + } } ] } diff --git a/common/library/module_utils/input_validation/validation_flows/common_validation.py b/common/library/module_utils/input_validation/validation_flows/common_validation.py index a31667a036..c77e16ec11 100644 --- a/common/library/module_utils/input_validation/validation_flows/common_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/common_validation.py @@ -962,7 +962,7 @@ def validate_k8s(data, admin_bmc_networks, softwares, ha_config, tag_names, erro bmc_static_range = admin_bmc_networks["bmc_network"]["static_range"] bmc_dynamic_range = admin_bmc_networks["bmc_network"]["dynamic_range"] primary_oim_admin_ip = admin_bmc_networks["admin_network"]["primary_oim_admin_ip"] - + # service_k8s_cluster = data["service_k8s_cluster"] cluster_set = {} if "k8s" in softwares and "k8s" in tag_names: @@ -1032,6 +1032,30 @@ def validate_k8s(data, admin_bmc_networks, softwares, ha_config, tag_names, erro "IP overlap -", None, en_us_validation_msg.ip_overlap_fail_msg)) + #csi validation + if "csi_driver_powerscale" in softwares and "k8s" in softwares: + csi_powerscale_driver_secret_file_path = kluster.get("csi_powerscale_driver_secret_file_path") + csi_powerscale_driver_values_file_path = kluster.get("csi_powerscale_driver_values_file_path") + # Validate if secret file path is empty + if not csi_powerscale_driver_secret_file_path: + errors.append( + create_error_msg( + "csi_powerscale_driver_secret_file_path", + csi_powerscale_driver_secret_file_path, + en_us_validation_msg.CSI_DRIVER_SECRET_FAIL_MSG, + ) + ) + + # Validate if values file path is empty + if not csi_powerscale_driver_values_file_path: + errors.append( + create_error_msg( + "csi_powerscale_driver_values_file_path", + csi_powerscale_driver_values_file_path, + en_us_validation_msg.CSI_DRIVER_VALUES_FAIL_MSG, + ) + ) + def validate_omnia_config( input_file_path, diff --git a/input/config/rhel/9.6/csi_driver_powerscale.json b/input/config/rhel/9.6/csi_driver_powerscale.json index 9a087c5382..5abb7404f2 100644 --- a/input/config/rhel/9.6/csi_driver_powerscale.json +++ b/input/config/rhel/9.6/csi_driver_powerscale.json @@ -17,61 +17,61 @@ "package": "helm-charts", "url": "https://github.com/dell/helm-charts.git", "type": "git", - "version": "csi-isilon-2.15.0" + "version": "csi-isilon-2.14.0" }, { "package": "docker.io/dellemc/csi-isilon", - "tag": "v2.15.0", + "tag": "v2.14.0", "type": "image" }, { "package": "registry.k8s.io/sig-storage/csi-attacher", - "tag": "v4.9.0", + "tag": "v4.8.1", "type": "image" }, { "package": "registry.k8s.io/sig-storage/csi-provisioner", - "tag": "v5.3.0", + "tag": "v5.2.0", "type": "image" }, { "package": "registry.k8s.io/sig-storage/csi-snapshotter", - "tag": "v8.3.0", + "tag": "v8.2.1", "type": "image" }, { "package": "registry.k8s.io/sig-storage/csi-resizer", - "tag": "v1.14.0", + "tag": "v1.13.2", "type": "image" }, { "package": "registry.k8s.io/sig-storage/csi-node-driver-registrar", - "tag": "v2.14.0", + "tag": "v2.13.0", "type": "image" }, { "package": "registry.k8s.io/sig-storage/csi-external-health-monitor-controller", - "tag": "v0.15.0", + "tag": "v0.14.0", "type": "image" }, { "package": "docker.io/dellemc/dell-csi-replicator", - "tag": "v1.13.0", + "tag": "v1.12.0", "type": "image" }, { "package": "docker.io/dellemc/podmon", - "tag": "v1.14.0", + "tag": "v1.13.0", "type": "image" }, { "package": "docker.io/dellemc/csm-authorization-sidecar", - "tag": "v2.3.0", + "tag": "v2.2.0", "type": "image" }, { "package": "docker.io/dellemc/csi-metadata-retriever", - "tag": "v1.12.0", + "tag": "v1.11.0", "type": "image" }, { diff --git a/input/config/rhel/9.6/csi_driver_powerscale1.json b/input/config/rhel/9.6/csi_driver_powerscale1.json deleted file mode 100644 index 5abb7404f2..0000000000 --- a/input/config/rhel/9.6/csi_driver_powerscale1.json +++ /dev/null @@ -1,84 +0,0 @@ -{ - "csi_driver_powerscale": { - "cluster": [ - { - "package": "csi-powerscale", - "url": "https://github.com/dell/csi-powerscale.git", - "type": "git", - "version": "v2.14.0" - }, - { - "package": "external-snapshotter", - "url": "https://github.com/kubernetes-csi/external-snapshotter.git", - "type": "git", - "version": "v8.3.0" - }, - { - "package": "helm-charts", - "url": "https://github.com/dell/helm-charts.git", - "type": "git", - "version": "csi-isilon-2.14.0" - }, - { - "package": "docker.io/dellemc/csi-isilon", - "tag": "v2.14.0", - "type": "image" - }, - { - "package": "registry.k8s.io/sig-storage/csi-attacher", - "tag": "v4.8.1", - "type": "image" - }, - { - "package": "registry.k8s.io/sig-storage/csi-provisioner", - "tag": "v5.2.0", - "type": "image" - }, - { - "package": "registry.k8s.io/sig-storage/csi-snapshotter", - "tag": "v8.2.1", - "type": "image" - }, - { - "package": "registry.k8s.io/sig-storage/csi-resizer", - "tag": "v1.13.2", - "type": "image" - }, - { - "package": "registry.k8s.io/sig-storage/csi-node-driver-registrar", - "tag": "v2.13.0", - "type": "image" - }, - { - "package": "registry.k8s.io/sig-storage/csi-external-health-monitor-controller", - "tag": "v0.14.0", - "type": "image" - }, - { - "package": "docker.io/dellemc/dell-csi-replicator", - "tag": "v1.12.0", - "type": "image" - }, - { - "package": "docker.io/dellemc/podmon", - "tag": "v1.13.0", - "type": "image" - }, - { - "package": "docker.io/dellemc/csm-authorization-sidecar", - "tag": "v2.2.0", - "type": "image" - }, - { - "package": "docker.io/dellemc/csi-metadata-retriever", - "tag": "v1.11.0", - "type": "image" - }, - { - "package": "docker.io/dellemc/csm-encryption", - "tag": "v0.6.0", - "type": "image" - } - ] - } -} diff --git a/input/omnia_config.yml b/input/omnia_config.yml index ee0b2deee8..3fad8558ee 100644 --- a/input/omnia_config.yml +++ b/input/omnia_config.yml @@ -82,6 +82,8 @@ service_k8s_cluster: topology_manager_policy: "none" topology_manager_scope: "container" k8s_offline_install: true + csi_powerscale_driver_secret_file_path: + csi_powerscale_driver_values_file_path: compute_k8s_cluster: - cluster_name: compute_cluster @@ -93,3 +95,5 @@ compute_k8s_cluster: topology_manager_policy: "none" topology_manager_scope: "container" k8s_offline_install: true + csi_powerscale_driver_secret_file_path: + csi_powerscale_driver_values_file_path: diff --git a/scheduler/roles/cluster_validation/tasks/set_facts.yml b/scheduler/roles/cluster_validation/tasks/set_facts.yml index 08665e4db1..6cd7e6690d 100644 --- a/scheduler/roles/cluster_validation/tasks/set_facts.yml +++ b/scheduler/roles/cluster_validation/tasks/set_facts.yml @@ -77,6 +77,8 @@ topology_manager_policy: "{{ selected_cluster.topology_manager_policy }}" topology_manager_scope: "{{ selected_cluster.topology_manager_scope }}" k8s_offline_install: "{{ selected_cluster.k8s_offline_install }}" + csi_powerscale_driver_secret_file_path: "{{selected_cluster.csi_powerscale_driver_secret_file_path}}" + csi_powerscale_driver_values_file_path: "{{selected_cluster.csi_powerscale_driver_values_file_path}}" - name: Create a directory to store kubespray log files ansible.builtin.file: diff --git a/scheduler/roles/cluster_validation/vars/main.yml b/scheduler/roles/cluster_validation/vars/main.yml index 14e7a9359a..2337e2f82c 100644 --- a/scheduler/roles/cluster_validation/vars/main.yml +++ b/scheduler/roles/cluster_validation/vars/main.yml @@ -123,8 +123,8 @@ To install k8s provide scheduler_type: k8s. To install slurm and k8s provide sch install_scheduler_msg: "Installing job scheduler:" # # Usage: Fetch_software_config.yml -# csi_driver_powerscale_packages_file: >- -# {{ role_path }}/../../../input/config/{{ software_config.cluster_os_type }}/{{ software_config.cluster_os_version }}/csi_driver_powerscale.json +csi_driver_powerscale_packages_file: >- + {{ input_project_dir }}/config/{{ software_config.cluster_os_type }}/{{ software_config.cluster_os_version }}/csi_driver_powerscale.json # Usage: fetch_omnia_inputs.yml csi_driver_secret_file_path_success_msg: "Success. csi_driver_secret_file_path is valid in omnia_config.yml" From 763dec1c8a71f2f9868ed669f9f98f693b7d2692 Mon Sep 17 00:00:00 2001 From: mcas Date: Mon, 14 Jul 2025 11:15:25 +0530 Subject: [PATCH 20/76] adding the callingbmethode for csi in scheduler --- scheduler/scheduler.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/scheduler/scheduler.yml b/scheduler/scheduler.yml index c9f5469f6c..58e7ae7198 100644 --- a/scheduler/scheduler.yml +++ b/scheduler/scheduler.yml @@ -153,6 +153,23 @@ roles: - k8s_amd +- name: CSI powerscale image pulling + hosts: kube_node, kube_control_plane + tasks: + - name: Pull images + ansible.builtin.include_role: + name: k8s_csi_powerscale_plugin + tasks_from: csi_powerscale_image_pull.yml + when: + - hostvars['127.0.0.1']['csi_driver_powerscale_precheck_pass'] | default(false) | bool + - hostvars['127.0.0.1']['omnia_config']['k8s_offline_install'] + +- name: Install CSI powerscale plugin on kube control nodes + hosts: kube_control_plane + gather_facts: false + roles: + - k8s_csi_powerscale_plugin + - name: Install Slurm hosts: slurm_control_node, slurm_node, login any_errors_fatal: true From 0f8c47687ede8f0440cafaa33c49dd51a07928b9 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Mon, 14 Jul 2025 23:15:50 +0530 Subject: [PATCH 21/76] update telemetry --- prepare_oim/roles/deploy_containers/pcs/vars/main.yml | 2 +- .../telemetry_validation/tasks/validate_telemetry_config.yml | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/prepare_oim/roles/deploy_containers/pcs/vars/main.yml b/prepare_oim/roles/deploy_containers/pcs/vars/main.yml index 1d097be9dc..d84972735b 100644 --- a/prepare_oim/roles/deploy_containers/pcs/vars/main.yml +++ b/prepare_oim/roles/deploy_containers/pcs/vars/main.yml @@ -77,7 +77,7 @@ stop_timeout: "60s" migration_threshold: 0 ha_migration_threshold: 3 -failure_timeout: "60s" +failure_timeout: "30s" pcs_group: omnia vip_group: omnia_vip diff --git a/telemetry/roles/telemetry_validation/tasks/validate_telemetry_config.yml b/telemetry/roles/telemetry_validation/tasks/validate_telemetry_config.yml index f3bfefdf52..2a8deca0b4 100644 --- a/telemetry/roles/telemetry_validation/tasks/validate_telemetry_config.yml +++ b/telemetry/roles/telemetry_validation/tasks/validate_telemetry_config.yml @@ -66,11 +66,6 @@ prompt: "{{ warning_idrac_telemetry_support_true }}" when: idrac_telemetry_support -- name: Failed, Federated iDRAC Telemetry Collection not supported - ansible.builtin.fail: - msg: "{{ warning_federated_telemetry_support }}" - when: federated_idrac_telemetry_collection - # - name: Validate k8s prometheus, scrape interval and prometheus gaudi # ansible.builtin.include_tasks: validate_k8s_prometheus_prometheus_gaudi.yml # when: k8s_prometheus_support or prometheus_gaudi_support From fc4429757b177b2527ac21b1c172046f5f2587c4 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Mon, 14 Jul 2025 23:17:18 +0530 Subject: [PATCH 22/76] Update main.yml --- telemetry/roles/telemetry_validation/vars/main.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/telemetry/roles/telemetry_validation/vars/main.yml b/telemetry/roles/telemetry_validation/vars/main.yml index ee593c3e13..98d3845a96 100644 --- a/telemetry/roles/telemetry_validation/vars/main.yml +++ b/telemetry/roles/telemetry_validation/vars/main.yml @@ -34,7 +34,6 @@ warning_idrac_telemetry_support_true: | Confirm that all BMC IPs are reachable from the OIM and respective service cluster nodes for telemetry to function properly. Make sure that Redfish is enabled and the iDRAC has a datacenter license. Also, ensure that the firmware version is greater than 4 for iDRAC9 or greater than 1 for iDRAC10." -warning_federated_telemetry_support: "Failed: Federated iDRAC Telemetry Collection is not supported yet and will be available in future releases." # # Usage: include_provision_config.yml # provision_config_file: "{{ input_project_dir }}/provision_config.yml" From c115d3d3ddd78769bb1e116db9d3dcf8282c1078 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Tue, 15 Jul 2025 09:18:48 +0530 Subject: [PATCH 23/76] update commit id --- .../roles/deploy_containers/idrac_telemetry/vars/main.yml | 2 +- telemetry/roles/service_k8s_telemetry/vars/main.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/prepare_oim/roles/deploy_containers/idrac_telemetry/vars/main.yml b/prepare_oim/roles/deploy_containers/idrac_telemetry/vars/main.yml index 25f5c10ee9..4bb4a55f23 100644 --- a/prepare_oim/roles/deploy_containers/idrac_telemetry/vars/main.yml +++ b/prepare_oim/roles/deploy_containers/idrac_telemetry/vars/main.yml @@ -112,7 +112,7 @@ idrac_telemetry_receiver_dir_path: "{{ idrac_telemetry_dir }}/idrac_telemetry_re idrac_telemetry_github: https://github.com/dell/iDRAC-Telemetry-Reference-Tools.git idrac_telemetry_reference_git_clone_path: "/opt/omnia/telemetry/idrac_telemetry/idrac_telemetry_receiver/iDRAC-Telemetry-Reference-Tools" idrac_telemetry_reference_path: "{{ idrac_telemetry_receiver_dir_path }}/iDRAC-Telemetry-Reference-Tools" -reference_tools_stable_commit: "94e7621" +reference_tools_stable_commit: "9a1c72b" max_retries: 10 delay_count: 5 idrac_git_clone_error_msg: | diff --git a/telemetry/roles/service_k8s_telemetry/vars/main.yml b/telemetry/roles/service_k8s_telemetry/vars/main.yml index f7e922c002..26f00024d2 100644 --- a/telemetry/roles/service_k8s_telemetry/vars/main.yml +++ b/telemetry/roles/service_k8s_telemetry/vars/main.yml @@ -37,7 +37,7 @@ omnia_nfs_share: "{{ hostvars['localhost']['oim_shared_path'] }}/omnia" service_cluster_idrac_telemetry_dir_path: "{{ omnia_nfs_share }}/service_cluster/telemetry/idrac_telemetry" dir_permissions_755: "0755" idrac_telemetry_github_repo: https://github.com/dell/iDRAC-Telemetry-Reference-Tools.git -reference_tools_stable_commit: "94e7621" +reference_tools_stable_commit: "9a1c72b" idrac_telemetry_reference_git_clone_path: "{{ service_cluster_idrac_telemetry_dir_path }}/iDRAC-Telemetry-Reference-Tools" idrac_git_clone_error_msg: | Failed to clone iDRAC Telemetry GitHub repository from {{ idrac_telemetry_github_repo }} From 3ecb28fd1b36fd75e6911633a7c70115d35aed87 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Tue, 15 Jul 2025 12:03:35 +0530 Subject: [PATCH 24/76] Update main.yml --- prepare_oim/roles/deploy_containers/pcs/vars/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prepare_oim/roles/deploy_containers/pcs/vars/main.yml b/prepare_oim/roles/deploy_containers/pcs/vars/main.yml index d84972735b..7e339da4b2 100644 --- a/prepare_oim/roles/deploy_containers/pcs/vars/main.yml +++ b/prepare_oim/roles/deploy_containers/pcs/vars/main.yml @@ -76,7 +76,7 @@ stop_interval: "0s" stop_timeout: "60s" migration_threshold: 0 -ha_migration_threshold: 3 +ha_migration_threshold: 1 failure_timeout: "30s" pcs_group: omnia vip_group: omnia_vip From 565816afb2299646dc7b973c1227ee470f7f1b1f Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Tue, 15 Jul 2025 12:58:25 +0530 Subject: [PATCH 25/76] telemetry pod update --- .../tasks/idrac_telemetry_deployment.yml | 125 ++++-------------- .../service_k8s_telemetry/tasks/main.yml | 3 - .../tasks/mysqldb_deployment.yml | 106 --------------- .../roles/service_k8s_telemetry/vars/main.yml | 18 +-- .../tasks/add_host_goups.yml | 1 - 5 files changed, 32 insertions(+), 221 deletions(-) delete mode 100644 telemetry/roles/service_k8s_telemetry/tasks/mysqldb_deployment.yml diff --git a/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml b/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml index e101b7a20f..aa12fb6dd5 100644 --- a/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml +++ b/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml @@ -72,7 +72,7 @@ name: "{{ idrac_telemetry_k8s_name }}" namespace: "{{ telemetry_namespace }}" spec: - serviceName: "{{ mysqldb_k8s_name }}" + serviceName: "{{ idrac_telemetry_service_name }}" replicas: "{{ statefulset_replicas }}" selector: matchLabels: @@ -198,104 +198,25 @@ requests: storage: "{{ mysqldb_storage_size | default('10Gi') }}" -# - name: Deploy idrac-telemetry pod -# kubernetes.core.k8s: -# state: present -# definition: -# apiVersion: apps/v1 -# kind: Deployment -# metadata: -# name: "{{ idrac_telemetry_k8s_name }}" -# namespace: "{{ telemetry_namespace }}" -# labels: -# app: "{{ idrac_telemetry_k8s_name }}" -# spec: -# selector: -# matchLabels: -# app: "{{ idrac_telemetry_k8s_name }}" -# replicas: 1 -# strategy: -# type: RollingUpdate -# template: -# metadata: -# labels: -# app: "{{ idrac_telemetry_k8s_name }}" -# spec: -# volumes: -# - name: telemetry-reference-tools -# hostPath: -# path: "{{ idrac_telemetry_reference_git_clone_path }}" -# type: Directory - -# hostAliases: -# - ip: "127.0.0.1" -# hostnames: -# - "activemq" -# # - ip: "{{ prometheus_svc_ip.stdout }}" -# # hostnames: -# # - "prometheus" -# - ip: "127.0.0.1" -# hostnames: -# - "mysqldb" - -# containers: -# - name: activemq -# image: "{{ activemq_image }}" -# imagePullPolicy: "IfNotPresent" -# ports: -# - containerPort: "{{ activemq_http_port_1 }}" -# - containerPort: "{{ activemq_http_port_2 }}" - -# - name: idrac-telemetry-receiver -# image: "{{ go_image }}" -# imagePullPolicy: "IfNotPresent" -# volumeMounts: -# - mountPath: /go/src/github.com/telemetry-reference-tools -# name: telemetry-reference-tools -# workingDir: /go/src/github.com/telemetry-reference-tools -# env: -# - name: MESSAGEBUS_HOST -# value: activemq -# - name: MESSAGEBUS_PORT -# value: "{{ messagebus_http_port | string }}" -# - name: CONFIGUI_HTTP_PORT -# value: "{{ configui_http_port | string }}" -# - name: MYSQL_DATABASE -# value: "{{ mysqldb_name }}" -# - name: MYSQL_USER -# valueFrom: -# secretKeyRef: -# name: "{{ mysqldb_secrets_name }}" -# key: mysqldb_user -# - name: MYSQL_PASSWORD -# valueFrom: -# secretKeyRef: -# name: "{{ mysqldb_secrets_name }}" -# key: mysqldb_password -# - name: MYSQL_HOST -# value: mysqldb -# - name: MYSQL_HOST_PORT -# value: "{{ mysql_svc_port.stdout | string }}" -# command: -# - "/bin/sh" -# - "-c" -# args: ["./scripts/example/idrac-telemetry-receiver.sh"] - -# - name: prometheus-pump -# image: "{{ go_image }}" -# imagePullPolicy: "IfNotPresent" -# volumeMounts: -# - mountPath: /go/src/github.com/telemetry-reference-tools -# name: telemetry-reference-tools -# workingDir: /go/src/github.com/telemetry-reference-tools -# env: -# - name: MESSAGEBUS_HOST -# value: activemq -# - name: MESSAGEBUS_PORT -# value: "{{ messagebus_http_port | string }}" -# - name: PROMETHEUSDB_SERVER -# value: prometheus -# command: -# - "/bin/sh" -# - "-c" -# args: ["go run cmd/prometheuspump/prometheuspump.go"] +- name: Service for idrac telemetry + kubernetes.core.k8s: + state: present + definition: + apiVersion: v1 + kind: Service + metadata: + name: "{{ idrac_telemetry_service_name }}" + namespace: "{{ telemetry_namespace }}" + labels: + app: "{{ idrac_telemetry_service_name }}" + spec: + clusterIP: None + ports: + - name: mysql-port-1 + port: "{{ mysqldb_container_port1 }}" + - name: mysql-port-2 + port: "{{ mysqldb_container_port2 }}" + - name: pump-port + port: "{{ prometheus_pump_port }}" + selector: + app: "{{ idrac_telemetry_k8s_name }}" \ No newline at end of file diff --git a/telemetry/roles/service_k8s_telemetry/tasks/main.yml b/telemetry/roles/service_k8s_telemetry/tasks/main.yml index 622d691d1d..ee818b1ab6 100644 --- a/telemetry/roles/service_k8s_telemetry/tasks/main.yml +++ b/telemetry/roles/service_k8s_telemetry/tasks/main.yml @@ -24,9 +24,6 @@ - name: Configure k8s secrets ansible.builtin.include_tasks: secrets_creation.yml - # - name: Deployment of mysqldb pod - # ansible.builtin.include_tasks: mysqldb_deployment.yml - - name: Deployment of prometheus pod ansible.builtin.include_tasks: prometheus_deployment.yml diff --git a/telemetry/roles/service_k8s_telemetry/tasks/mysqldb_deployment.yml b/telemetry/roles/service_k8s_telemetry/tasks/mysqldb_deployment.yml deleted file mode 100644 index f9e6024eba..0000000000 --- a/telemetry/roles/service_k8s_telemetry/tasks/mysqldb_deployment.yml +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -- name: Persistent volume claim for mysqldb - kubernetes.core.k8s: - state: present - definition: - apiVersion: v1 - kind: PersistentVolumeClaim - metadata: - name: "{{ mysqldb_pvc_name }}" - namespace: "{{ telemetry_namespace }}" - spec: - storageClassName: "{{ storage_class_name }}" - accessModes: - - ReadWriteOnce - resources: - requests: - storage: "{{ mysqldb_storage }}" - -- name: Mysqldb pod definition - kubernetes.core.k8s: - state: present - definition: - apiVersion: apps/v1 - kind: StatefulSet - metadata: - name: "{{ mysqldb_k8s_name }}" - namespace: "{{ telemetry_namespace }}" - spec: - selector: - matchLabels: - app: "{{ mysqldb_k8s_name }}" - serviceName: "{{ mysqldb_k8s_name }}" - replicas: "{{ statefulset_replicas }}" - template: - metadata: - labels: - app: "{{ mysqldb_k8s_name }}" - spec: - volumes: - - name: mysqldb-pvc - persistentVolumeClaim: - claimName: "{{ mysqldb_pvc_name }}" - - containers: - - name: mysqldb - image: "{{ mysql_image }}" - imagePullPolicy: "IfNotPresent" - volumeMounts: - - mountPath: /var/lib/mysql/ - name: mysqldb-pvc - env: - - name: MYSQL_DATABASE - value: "{{ mysqldb_name }}" - - name: MYSQL_USER - valueFrom: - secretKeyRef: - name: "{{ mysqldb_secrets_name }}" - key: mysqldb_user - - name: MYSQL_PASSWORD - valueFrom: - secretKeyRef: - name: "{{ mysqldb_secrets_name }}" - key: mysqldb_password - - name: MYSQL_ROOT_PASSWORD - valueFrom: - secretKeyRef: - name: "{{ mysqldb_secrets_name }}" - key: mysqldb_root_password - ports: - - containerPort: "{{ mysqldb_container_port1 }}" - - containerPort: "{{ mysqldb_container_port2 }}" - -- name: Service for mysqldb - kubernetes.core.k8s: - state: present - definition: - apiVersion: v1 - kind: Service - metadata: - name: "{{ mysqldb_k8s_name }}" - namespace: "{{ telemetry_namespace }}" - labels: - app: "{{ mysqldb_k8s_name }}" - spec: - type: NodePort - ports: - - name: mysqldb-http-port-1 - port: "{{ mysqldb_container_port1 }}" - - name: mysqldb-http-port-2 - port: "{{ mysqldb_container_port2 }}" - selector: - app: "{{ mysqldb_k8s_name }}" diff --git a/telemetry/roles/service_k8s_telemetry/vars/main.yml b/telemetry/roles/service_k8s_telemetry/vars/main.yml index f7e922c002..b0d75fd143 100644 --- a/telemetry/roles/service_k8s_telemetry/vars/main.yml +++ b/telemetry/roles/service_k8s_telemetry/vars/main.yml @@ -20,15 +20,6 @@ k8s_not_installed_fail_msg: "Failed. Kubernetes installation required. storage_class_missing_fail_msg: "Failed. StorageClass {{ storage_class_name }} is not present in the cluster. To resolve this, create the StorageClass by running scheduler/service_k8s_cluster.yml playbook." -# Usage: mysqldb_deployment.yml -mysqldb_storage: 1Gi -mysqldb_pvc_name: mysqldb-storage-claim -mysqldb_k8s_name: mysqldb -mysqldb_name: "idrac_telemetrydb" -mysqldb_container_port1: 3306 -mysqldb_container_port2: 33060 -mysql_image: "docker.io/library/mysql:9.3.0" - # Usage: k8s_secrets.yml mysqldb_secrets_name: mysqldb-credentials @@ -58,6 +49,15 @@ activemq_http_port_1: 8161 activemq_http_port_2: 61616 messagebus_http_port: 61613 configui_http_port: 8082 +mysqldb_storage: 1Gi +mysqldb_pvc_name: mysqldb-storage-claim +mysqldb_k8s_name: mysqldb +mysqldb_name: "idrac_telemetrydb" +idrac_telemetry_service_name: "idrac_telemetry_service" +mysqldb_container_port1: 3306 +mysqldb_container_port2: 33060 +prometheus_pump_port: 2112 +mysql_image: "docker.io/library/mysql:9.3.0" # Usage: prometheus_deployment.yml prometheus_pvc_name: "prometheus-pvc" diff --git a/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml b/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml index 32c7e4f4fe..3d3ddd0e79 100644 --- a/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml +++ b/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml @@ -73,7 +73,6 @@ when: - enable_oim_ha - - name: Create telemetry group ansible.builtin.add_host: hostname: "{{ item }}" From 62665459049d618b00185e0945ea7741d18efb07 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Tue, 15 Jul 2025 15:42:13 +0530 Subject: [PATCH 26/76] Update main.yml --- prepare_oim/roles/deploy_containers/pcs/vars/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prepare_oim/roles/deploy_containers/pcs/vars/main.yml b/prepare_oim/roles/deploy_containers/pcs/vars/main.yml index 7e339da4b2..1dd93646ff 100644 --- a/prepare_oim/roles/deploy_containers/pcs/vars/main.yml +++ b/prepare_oim/roles/deploy_containers/pcs/vars/main.yml @@ -77,7 +77,7 @@ stop_timeout: "60s" migration_threshold: 0 ha_migration_threshold: 1 -failure_timeout: "30s" +failure_timeout: "60s" pcs_group: omnia vip_group: omnia_vip From 2dbd598420226fedc5711afb99005ed93960fb55 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Tue, 15 Jul 2025 15:42:25 +0530 Subject: [PATCH 27/76] Update main.yml --- prepare_oim/roles/deploy_containers/pcs/vars/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prepare_oim/roles/deploy_containers/pcs/vars/main.yml b/prepare_oim/roles/deploy_containers/pcs/vars/main.yml index 1dd93646ff..1d097be9dc 100644 --- a/prepare_oim/roles/deploy_containers/pcs/vars/main.yml +++ b/prepare_oim/roles/deploy_containers/pcs/vars/main.yml @@ -76,7 +76,7 @@ stop_interval: "0s" stop_timeout: "60s" migration_threshold: 0 -ha_migration_threshold: 1 +ha_migration_threshold: 3 failure_timeout: "60s" pcs_group: omnia vip_group: omnia_vip From aa4b43a44d00d964f5a5b32ae694adb2d03d71fc Mon Sep 17 00:00:00 2001 From: Aditya Deshpande <115771515+Aditya-DP@users.noreply.github.com> Date: Tue, 15 Jul 2025 16:31:19 +0530 Subject: [PATCH 28/76] Update ansible.cfg --- accelerator/ansible.cfg | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/accelerator/ansible.cfg b/accelerator/ansible.cfg index 22858e204c..18cb3ca935 100644 --- a/accelerator/ansible.cfg +++ b/accelerator/ansible.cfg @@ -5,6 +5,8 @@ host_key_checking = false forks = 5 timeout = 180 executable = /bin/bash +library = ../common/library/modules +module_utils = ../common/library/module_utils [persistent_connection] command_timeout = 180 @@ -12,4 +14,4 @@ connect_timeout = 180 [ssh_connection] retries = 3 -ssh_args = -o ControlMaster=auto -o ControlPersist=60 -o ConnectTimeout=60 \ No newline at end of file +ssh_args = -o ControlMaster=auto -o ControlPersist=60 -o ConnectTimeout=60 From 59318d592f03473dced0f2829b7c1b76e5d6c576 Mon Sep 17 00:00:00 2001 From: Aditya Deshpande <115771515+Aditya-DP@users.noreply.github.com> Date: Tue, 15 Jul 2025 16:32:10 +0530 Subject: [PATCH 29/76] Update ansible.cfg --- provision/ansible.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/provision/ansible.cfg b/provision/ansible.cfg index a74f898989..de81339c87 100644 --- a/provision/ansible.cfg +++ b/provision/ansible.cfg @@ -6,6 +6,7 @@ forks = 5 timeout = 180 executable = /bin/bash library = ../common/library/modules +module_utils = ../common/library/module_utils [persistent_connection] command_timeout = 180 From 46eabf9cc23e85011739527c99b3b170917e216f Mon Sep 17 00:00:00 2001 From: Aditya Deshpande <115771515+Aditya-DP@users.noreply.github.com> Date: Tue, 15 Jul 2025 16:32:47 +0530 Subject: [PATCH 30/76] Update ansible.cfg --- tools/ansible.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/ansible.cfg b/tools/ansible.cfg index 9ef8540c34..c8a8ee13bb 100644 --- a/tools/ansible.cfg +++ b/tools/ansible.cfg @@ -5,6 +5,8 @@ host_key_checking = false forks = 5 timeout = 180 executable = /bin/bash +library = ../common/library/modules +module_utils = ../common/library/module_utils [persistent_connection] command_timeout = 180 From ff212a900e68f9abfb4f339483e50150aec5c998 Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Tue, 15 Jul 2025 17:16:41 +0530 Subject: [PATCH 31/76] restore enable telemetry changes --- .../tasks/fetch_pods_details.yml | 60 ------------------- .../tasks/initiate_telemetry.yml | 6 +- .../tasks/add_host_goups.yml | 7 --- .../roles/telemetry_validation/vars/main.yml | 7 +-- telemetry/telemetry.yml | 4 +- 5 files changed, 4 insertions(+), 80 deletions(-) delete mode 100644 telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml diff --git a/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml b/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml deleted file mode 100644 index 4ef1e5002a..0000000000 --- a/telemetry/roles/idrac_telemetry/tasks/fetch_pods_details.yml +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -- name: Wait for idrac-telemetry pod to come to ready state - block: - - name: Wait for idrac-telemetry pod to come to ready state - ansible.builtin.command: kubectl wait --for=condition=ready --timeout=10m -n "{{ telemetry_namespace }}" pod -l app="{{ idrac_telemetry_k8s_name }}" - changed_when: false - rescue: - - name: Failed - idrac-telemetry pod is not running - ansible.builtin.fail: - msg: "{{ idrac_telemetry_pod_wait_fail_msg }}" - -- name: Wait for mysqldb pod to come to ready state - block: - - name: Wait for mysqldb pod to come to ready state - ansible.builtin.command: kubectl wait --for=condition=ready --timeout=10m -n "{{ telemetry_namespace }}" pod -l app="{{ mysqldb_k8s_name }}" - changed_when: false - rescue: - - name: Failed - mysqldb pod is not running - ansible.builtin.fail: - msg: "{{ mysqldb_pod_wait_fail_msg }}" - -- name: Get mysqlDB svc port - ansible.builtin.command: kubectl get svc "{{ mysqldb_k8s_name }}" -n "{{ telemetry_namespace }}" -o=jsonpath='{.spec.ports[0].nodePort}' - changed_when: false - register: mysql_svc_port - -- name: Get mysqlDB pod node name - ansible.builtin.command: kubectl get pods -n "{{ telemetry_namespace }}" -l app="{{ mysqldb_k8s_name }}" -o jsonpath="{.items[0].spec.nodeName}" - changed_when: false - register: mysql_node_name - -- name: Get mysqlDB node IP - ansible.builtin.command: kubectl get nodes -o=jsonpath='{.items[?(@.metadata.name=="{{ mysql_node_name.stdout }}")].status.addresses[?(@.type=="InternalIP")].address}' # noqa: yaml[line-length] - changed_when: false - register: mysql_node_ip - -- name: Get idrac-telemetry pod name - ansible.builtin.command: kubectl get pods -n "{{ telemetry_namespace }}" -l app="{{ idrac_telemetry_k8s_name }}" -o jsonpath="{.items[0].metadata.name}" - changed_when: false - register: idrac_telemetry_pod - -- name: Set telemetry pod details - ansible.builtin.set_fact: - idrac_telemetry_pod_name: "{{ idrac_telemetry_pod.stdout }}" - mysqldb_host: "{{ mysql_node_ip.stdout }}" - mysqldb_container_port: "{{ mysql_svc_port.stdout }}" diff --git a/telemetry/roles/idrac_telemetry/tasks/initiate_telemetry.yml b/telemetry/roles/idrac_telemetry/tasks/initiate_telemetry.yml index 48c48e70cb..44e98adf7c 100644 --- a/telemetry/roles/idrac_telemetry/tasks/initiate_telemetry.yml +++ b/telemetry/roles/idrac_telemetry/tasks/initiate_telemetry.yml @@ -30,10 +30,6 @@ ansible.builtin.include_vars: "{{ playbook_dir }}/roles/telemetry_validation/vars/main.yml" no_log: true - - name: Fetch pod details for federated telemetry - ansible.builtin.include_tasks: fetch_pods_details.yml - when: hostvars['localhost']['federated_idrac_telemetry_collection'] - - name: Initialize variables ansible.builtin.set_fact: telemetry_idrac: [] @@ -177,7 +173,7 @@ python3 ./ConfigurationScripts/EnableOrDisableAllTelemetryReports.py -ip "{{ item }}" -u "{{ hostvars['localhost']['bmc_username'] }}" -p "{{ hostvars['localhost']['bmc_password'] }}" -s Enabled args: - chdir: "{{ idrac_telemetry_scripting_git_clone_path }}" + chdir: "{{ telemetry_dir_path }}/{{ idrac_telemetry_scripting_folder }}" with_items: "{{ telemetry_idrac }}" changed_when: false no_log: true diff --git a/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml b/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml index 3d3ddd0e79..429c8524c7 100644 --- a/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml +++ b/telemetry/roles/telemetry_validation/tasks/add_host_goups.yml @@ -72,10 +72,3 @@ groups: "active_oim_node" when: - enable_oim_ha - -- name: Create telemetry group - ansible.builtin.add_host: - hostname: "{{ item }}" - groups: "{{ telemetry_host_group }}" - ansible_connection: "{{ 'local' if item == 'localhost' else 'ssh' }}" - loop: "{{ telemetry_host }}" diff --git a/telemetry/roles/telemetry_validation/vars/main.yml b/telemetry/roles/telemetry_validation/vars/main.yml index b11c770d1d..ee593c3e13 100644 --- a/telemetry/roles/telemetry_validation/vars/main.yml +++ b/telemetry/roles/telemetry_validation/vars/main.yml @@ -120,12 +120,7 @@ invalid_parent_tags_message: | If service nodes are not provisioned or compute node provisioning not initiated, please run discovery_provision.yml to provision service nodes and compute nodes from service nodes. And then run `telemetry.yml` playbook again. -telemetry_host_group: "telemetry_group" -telemetry_host: >- - {{ groups['kube_control_plane'] - if federated_idrac_telemetry_collection | default(false) - else ['localhost'] - }} + # Usage: include_high_availability_config.yml high_availability_config_path: "{{ input_project_dir }}/high_availability_config.yml" fail_msg_high_availability_config_file: "high_availability_config.yml file doesn't exist." diff --git a/telemetry/telemetry.yml b/telemetry/telemetry.yml index 8c74d3fa2e..0b5c9a14d9 100644 --- a/telemetry/telemetry.yml +++ b/telemetry/telemetry.yml @@ -124,8 +124,8 @@ name: service_k8s_telemetry - name: Enable idrac telemetry in OIM - hosts: telemetry_group - connection: ssh + hosts: localhost + connection: local gather_facts: false tasks: - name: Enable idrac telemetry From 34419883bbfb69cbf95251487faf2c202820b950 Mon Sep 17 00:00:00 2001 From: Aditya Deshpande <115771515+Aditya-DP@users.noreply.github.com> Date: Tue, 15 Jul 2025 17:17:35 +0530 Subject: [PATCH 32/76] Update idrac_telemetry_deployment.yml --- .../service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml b/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml index aa12fb6dd5..9d1e1d2012 100644 --- a/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml +++ b/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml @@ -219,4 +219,4 @@ - name: pump-port port: "{{ prometheus_pump_port }}" selector: - app: "{{ idrac_telemetry_k8s_name }}" \ No newline at end of file + app: "{{ idrac_telemetry_k8s_name }}" From 4f34cca3ee62a01e677a792c2ac05ae833c387ec Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Tue, 15 Jul 2025 18:25:48 +0530 Subject: [PATCH 33/76] Update idrac_telemetry_receiver_init.sh.j2 --- .../templates/idrac_telemetry_receiver_init.sh.j2 | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/prepare_oim/roles/deploy_containers/idrac_telemetry/templates/idrac_telemetry_receiver_init.sh.j2 b/prepare_oim/roles/deploy_containers/idrac_telemetry/templates/idrac_telemetry_receiver_init.sh.j2 index b278170b59..7f2c9a6904 100644 --- a/prepare_oim/roles/deploy_containers/idrac_telemetry/templates/idrac_telemetry_receiver_init.sh.j2 +++ b/prepare_oim/roles/deploy_containers/idrac_telemetry/templates/idrac_telemetry_receiver_init.sh.j2 @@ -18,12 +18,5 @@ # Script to initialize idrac-telemetry-receiver -go run cmd/dbdiscauth/dbdiscauth.go & -PID1=$! -go run cmd/configui/configui.go & -PID2=$! -go run cmd/redfishread/redfishread.go & -PID3=$! - -# Wait for all processes -wait $PID1 $PID2 $PID3 +exec /go/src/github.com/telemetry-reference-tools/scripts/example/idrac-telemetry-receiver.sh +nohup go run cmd/redfishread/redfishread.go & \ No newline at end of file From 63c0368d09d44ddc5224173254da2b42c64becc7 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Tue, 15 Jul 2025 18:26:19 +0530 Subject: [PATCH 34/76] Update idrac_telemetry_receiver_init.sh.j2 --- .../templates/idrac_telemetry_receiver_init.sh.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prepare_oim/roles/deploy_containers/idrac_telemetry/templates/idrac_telemetry_receiver_init.sh.j2 b/prepare_oim/roles/deploy_containers/idrac_telemetry/templates/idrac_telemetry_receiver_init.sh.j2 index 7f2c9a6904..bf9bbfd337 100644 --- a/prepare_oim/roles/deploy_containers/idrac_telemetry/templates/idrac_telemetry_receiver_init.sh.j2 +++ b/prepare_oim/roles/deploy_containers/idrac_telemetry/templates/idrac_telemetry_receiver_init.sh.j2 @@ -19,4 +19,4 @@ # Script to initialize idrac-telemetry-receiver exec /go/src/github.com/telemetry-reference-tools/scripts/example/idrac-telemetry-receiver.sh -nohup go run cmd/redfishread/redfishread.go & \ No newline at end of file +nohup go run cmd/redfishread/redfishread.go & From 518020062670ffcf4df2eb1b81b9fc69b8862d72 Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Wed, 16 Jul 2025 11:22:21 +0530 Subject: [PATCH 35/76] Dynamically updating replica count usnig node count --- .../roles/service_k8s_telemetry/tasks/prereq_checks.yml | 6 ++++++ telemetry/roles/service_k8s_telemetry/vars/main.yml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/telemetry/roles/service_k8s_telemetry/tasks/prereq_checks.yml b/telemetry/roles/service_k8s_telemetry/tasks/prereq_checks.yml index 35cec831e2..0f61c58de9 100644 --- a/telemetry/roles/service_k8s_telemetry/tasks/prereq_checks.yml +++ b/telemetry/roles/service_k8s_telemetry/tasks/prereq_checks.yml @@ -57,3 +57,9 @@ ansible.builtin.fail: msg: "{{ storage_class_missing_fail_msg }}" when: storage_class_name not in (sc_info.resources | map(attribute='metadata.name') | list) + +- name: Get Kubernetes service cluster node count + kubernetes.core.k8s_info: + api_version: v1 + kind: Node + register: node_count diff --git a/telemetry/roles/service_k8s_telemetry/vars/main.yml b/telemetry/roles/service_k8s_telemetry/vars/main.yml index b0d75fd143..01ff2011c4 100644 --- a/telemetry/roles/service_k8s_telemetry/vars/main.yml +++ b/telemetry/roles/service_k8s_telemetry/vars/main.yml @@ -65,6 +65,6 @@ prometheus_k8s_name: "prometheus" prometheus_configmap_name: "prometheus-config" prometheus_image: "docker.io/prom/prometheus:v3.4.1" prometheus_container_port: 9090 -statefulset_replicas: 1 +statefulset_replicas: "{{ node_count.resources | length + 1 }}" prometheus_storage: 1Gi prometheus_service_port: 30090 From 69c2111da68d8de7f6b871a70fc5111028492303 Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Wed, 16 Jul 2025 11:24:33 +0530 Subject: [PATCH 36/76] lint fix --- .../tasks/idrac_telemetry_deployment.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml b/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml index 9d1e1d2012..59f236ff27 100644 --- a/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml +++ b/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml @@ -62,7 +62,7 @@ msg: "{{ idrac_script_git_clone_error_msg.splitlines() | join(' ') }}" when: clone_idrac_script is failed -- name: idrac-telemetry StatefulSet +- name: Deployment definition for idrac-telemetry StatefulSet kubernetes.core.k8s: state: present definition: @@ -193,7 +193,7 @@ - metadata: name: mysqldb-pvc spec: - accessModes: [ "ReadWriteOnce" ] + accessModes: ["ReadWriteOnce"] resources: requests: storage: "{{ mysqldb_storage_size | default('10Gi') }}" From dee408720f5a615a49c1f8950e5663cf7d494631 Mon Sep 17 00:00:00 2001 From: mcas Date: Wed, 16 Jul 2025 11:32:17 +0530 Subject: [PATCH 37/76] Added the validation code for the secret and value yaml file for csi --- .../validation_flows/common_validation.py | 181 ++++++++++++++++-- scheduler/service_k8s_cluster.yml | 17 ++ 2 files changed, 185 insertions(+), 13 deletions(-) diff --git a/common/library/module_utils/input_validation/validation_flows/common_validation.py b/common/library/module_utils/input_validation/validation_flows/common_validation.py index c77e16ec11..a789cac015 100644 --- a/common/library/module_utils/input_validation/validation_flows/common_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/common_validation.py @@ -32,7 +32,9 @@ ) from ansible.module_utils.input_validation.validation_flows import scheduler_validation - +from ansible.module_utils.local_repo.common_functions import ( + load_yaml_file +) from ansible.module_utils.local_repo.software_utils import ( load_json, set_version_variables, @@ -947,6 +949,149 @@ def is_ip_in_range(ip_str, ip_range_str): except ValueError: return False + +def validate_secret_isilon_clusters(data): + cluster_errors = [] + + clusters = data.get("isilonClusters") + + # Check if isilonClusters is a defined, non-empty list + if not isinstance(clusters, list) or len(clusters) == 0: + cluster_errors.append("isilonClusters must be a non-empty list.") + return cluster_errors # Stop further checks + + for idx, item in enumerate(clusters): + cluster_prefix = f"Cluster {idx + 1}" + + # Validate clusterName + if not item.get("clusterName") or not isinstance(item["clusterName"], str): + cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'clusterName'.") + + # Validate username + if not item.get("username") or not isinstance(item["username"], str): + cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'username'.") + + # Validate password + if not item.get("password") or not isinstance(item["password"], str): + cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'password'.") + + # Validate endpoint + if not item.get("endpoint") or not isinstance(item["endpoint"], str): + cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'endpoint'.") + + # Validate endpointPort if defined + if "endpointPort" in item: + if not isinstance(item["endpointPort"], int) or not (0 < item["endpointPort"] < 65536): + cluster_errors.append(f"{cluster_prefix}: 'endpointPort' must be an integer between 1 and 65535.") + + # Validate isDefault + if "isDefault" not in item or not isinstance(item["isDefault"], bool): + cluster_errors.append(f"{cluster_prefix}: 'isDefault' must be a boolean and must be defined.") + + # Validate skipCertificateValidation if defined + if "skipCertificateValidation" in item: + if item["skipCertificateValidation"] is not True: + cluster_errors.append(f"{cluster_prefix}: 'skipCertificateValidation' must be true if defined.") + + # Validate isiPath if defined + if "isiPath" in item: + if not isinstance(item["isiPath"], str) or not item["isiPath"].startswith('/'): + cluster_errors.append(f"{cluster_prefix}: 'isiPath' must be a valid Unix absolute path.") + + # Validate isiVolumePathPermissions if defined + if "isiVolumePathPermissions" in item: + perms = item["isiVolumePathPermissions"] + if not isinstance(perms, str) or not perms.strip().isdigit(): + cluster_errors.append(f"{cluster_prefix}: 'isiVolumePathPermissions' must be a non-empty string of digits.") + + return cluster_errors + +def validate_value_file_inputs(values_data): + value_errors = [] + + def add_error(field_path, value, msg): + value_errors.append( + f"Validation Error - {field_path}: '{value}' -> {msg}" + ) + + # Helper to safely get nested values + def get_nested(data, keys, default=None): + for key in keys: + if not isinstance(data, dict) or key not in data: + return default + data = data[key] + return data + + # 1. controller.controllerCount == 1 + controller_count = get_nested(values_data, ["controller", "controllerCount"]) + if controller_count != 1: + add_error("controller.controllerCount", controller_count, "Must be 1") + + # 2. controller.replication.enabled == false + replication_enabled = get_nested(values_data, ["controller", "replication", "enabled"]) + if replication_enabled is None or replication_enabled is not False: + add_error("controller.replication.enabled", replication_enabled, "Must be false") + + # 3. controller.resizer.enabled in [true, false] + resizer_enabled = get_nested(values_data, ["controller", "resizer", "enabled"]) + if resizer_enabled not in [True, False]: + add_error("controller.resizer.enabled", resizer_enabled, "Must be true or false") + + # 4. controller.snapshot.enabled == true + snapshot_enabled = get_nested(values_data, ["controller", "snapshot", "enabled"]) + if snapshot_enabled is not True: + add_error("controller.snapshot.enabled", snapshot_enabled, "Must be true") + + # 5. endpointPort is int in 1..65535 + endpoint_port = values_data.get("endpointPort") + if endpoint_port is None or not isinstance(endpoint_port, int) or not (1 <= endpoint_port <= 65535): + add_error("endpointPort", endpoint_port, "Must be between 1 and 65535") + + # 6. skipCertificateValidation == true + skip_cert = values_data.get("skipCertificateValidation") + if skip_cert is not True: + add_error("skipCertificateValidation", skip_cert, "Must be true") + + # 7. isiAuthType in [0, 1] + isi_auth = values_data.get("isiAuthType") + if isi_auth not in [0, 1]: + add_error("isiAuthType", isi_auth, "Must be 0 or 1") + + # 8. isiAccessZone is non-empty string + isi_access = values_data.get("isiAccessZone") + if not isi_access or not isinstance(isi_access, str) or not isi_access.strip(): + add_error("isiAccessZone", isi_access, "Must be a non-empty string") + + # 9. isiPath is Unix absolute path + isi_path = values_data.get("isiPath") + if not isinstance(isi_path, str) or not isi_path.startswith("/"): + add_error("isiPath", isi_path, "Must be a valid Unix absolute path") + + # 10. isiVolumePathPermissions is a non-empty string + permissions = values_data.get("isiVolumePathPermissions") + if not permissions or not isinstance(permissions, str) or not permissions.strip(): + add_error("isiVolumePathPermissions", permissions, "Must be a valid octal string") + + return value_errors + +def validate_powerscale_secret_and_values_file(secret_file_path, values_file_path): + #valiadte secret file inputs + secret_filename = os.path.basename(secret_file_path) + secret_data = load_yaml_file(secret_file_path.strip()) + secret_validation_errors = validate_secret_isilon_clusters(secret_data) + if secret_validation_errors: + for err in secret_validation_errors: + errors.append(create_error_msg(f"Powerscale Secret File Validation Error: {err}")) + + #validate values file input + values_filename= os.path.basename(values_file_path) + values_data = load_yaml_file(values_file_path.strip()) + values_validation_errros = validate_value_file_inputs(values_data) + if values_validation_errros: + for value_err in values_validation_errros: + errors.append(create_error_msg(f"Powerscale Secret File Validation Error: {value_err}")) + + def validate_k8s(data, admin_bmc_networks, softwares, ha_config, tag_names, errors): """ Validates Kubernetes cluster configurations. @@ -1033,11 +1178,18 @@ def validate_k8s(data, admin_bmc_networks, softwares, ha_config, tag_names, erro None, en_us_validation_msg.ip_overlap_fail_msg)) #csi validation - if "csi_driver_powerscale" in softwares and "k8s" in softwares: + if ( + "csi_driver_powerscale" in softwares + and ("k8s" in softwares or "service_k8s" in softwares) + ): + csi_powerscale_driver_secret_file_path = kluster.get("csi_powerscale_driver_secret_file_path") csi_powerscale_driver_values_file_path = kluster.get("csi_powerscale_driver_values_file_path") - # Validate if secret file path is empty - if not csi_powerscale_driver_secret_file_path: + + # Validate secret file path + if not csi_powerscale_driver_secret_file_path or \ + not csi_powerscale_driver_secret_file_path.strip() or \ + not os.path.exists(csi_powerscale_driver_secret_file_path.strip()): errors.append( create_error_msg( "csi_powerscale_driver_secret_file_path", @@ -1045,16 +1197,19 @@ def validate_k8s(data, admin_bmc_networks, softwares, ha_config, tag_names, erro en_us_validation_msg.CSI_DRIVER_SECRET_FAIL_MSG, ) ) - - # Validate if values file path is empty - if not csi_powerscale_driver_values_file_path: - errors.append( - create_error_msg( - "csi_powerscale_driver_values_file_path", - csi_powerscale_driver_values_file_path, - en_us_validation_msg.CSI_DRIVER_VALUES_FAIL_MSG, + else: + # If secret path is valid, ensure values path is also valid + if not csi_powerscale_driver_values_file_path or \ + not csi_powerscale_driver_values_file_path.strip() or \ + not os.path.exists(csi_powerscale_driver_values_file_path.strip()): + errors.append( + create_error_msg( + "csi_powerscale_driver_values_file_path", + csi_powerscale_driver_values_file_path, + en_us_validation_msg.CSI_DRIVER_VALUES_FAIL_MSG, + ) ) - ) + validate_powerscale_secret_and_values_file(csi_powerscale_driver_secret_file_path,csi_powerscale_driver_values_file_path) def validate_omnia_config( diff --git a/scheduler/service_k8s_cluster.yml b/scheduler/service_k8s_cluster.yml index f6f71987b7..ec68d90b0b 100644 --- a/scheduler/service_k8s_cluster.yml +++ b/scheduler/service_k8s_cluster.yml @@ -124,3 +124,20 @@ gather_facts: false roles: - update_containerd_config + +- name: CSI powerscale image pulling + hosts: kube_node, kube_control_plane + tasks: + - name: Pull images + ansible.builtin.include_role: + name: k8s_csi_powerscale_plugin + tasks_from: csi_powerscale_image_pull.yml + when: + - hostvars['127.0.0.1']['csi_driver_powerscale_precheck_pass'] | default(false) | bool + - hostvars['127.0.0.1']['omnia_config']['k8s_offline_install'] + +- name: Install CSI powerscale plugin on kube control nodes + hosts: kube_control_plane + gather_facts: false + roles: + - k8s_csi_powerscale_plugin From 8bf87364139a276f9d017c54b458ba5a09c793c5 Mon Sep 17 00:00:00 2001 From: Aditya Deshpande <115771515+Aditya-DP@users.noreply.github.com> Date: Wed, 16 Jul 2025 11:32:37 +0530 Subject: [PATCH 38/76] Update main.yml --- telemetry/roles/service_k8s_telemetry/vars/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/telemetry/roles/service_k8s_telemetry/vars/main.yml b/telemetry/roles/service_k8s_telemetry/vars/main.yml index 01ff2011c4..43eff2331c 100644 --- a/telemetry/roles/service_k8s_telemetry/vars/main.yml +++ b/telemetry/roles/service_k8s_telemetry/vars/main.yml @@ -65,6 +65,6 @@ prometheus_k8s_name: "prometheus" prometheus_configmap_name: "prometheus-config" prometheus_image: "docker.io/prom/prometheus:v3.4.1" prometheus_container_port: 9090 -statefulset_replicas: "{{ node_count.resources | length + 1 }}" +statefulset_replicas: "{{ node_count.resources | length }}" prometheus_storage: 1Gi prometheus_service_port: 30090 From ce3982652f037629a3aa93a1c1510414f0937879 Mon Sep 17 00:00:00 2001 From: Aditya Deshpande <115771515+Aditya-DP@users.noreply.github.com> Date: Wed, 16 Jul 2025 11:36:31 +0530 Subject: [PATCH 39/76] Update idrac_telemetry_deployment.yml --- .../service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml b/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml index 59f236ff27..8bb148e583 100644 --- a/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml +++ b/telemetry/roles/service_k8s_telemetry/tasks/idrac_telemetry_deployment.yml @@ -196,7 +196,7 @@ accessModes: ["ReadWriteOnce"] resources: requests: - storage: "{{ mysqldb_storage_size | default('10Gi') }}" + storage: "{{ mysqldb_storage }}" - name: Service for idrac telemetry kubernetes.core.k8s: From 53bef19fc6854efb840951a3c84777ab4fb6daef Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Wed, 16 Jul 2025 13:00:34 +0530 Subject: [PATCH 40/76] removing unused vars --- telemetry/roles/idrac_telemetry/vars/main.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/telemetry/roles/idrac_telemetry/vars/main.yml b/telemetry/roles/idrac_telemetry/vars/main.yml index fbd1bf7fa2..1031dc006a 100644 --- a/telemetry/roles/idrac_telemetry/vars/main.yml +++ b/telemetry/roles/idrac_telemetry/vars/main.yml @@ -87,12 +87,3 @@ telemetry_report_sn: | {% for item in failed_idrac + invalid_idrac_list %} - {{ item }} {% endfor %} - -# Usage: fetch_pods_details.yml -telemetry_namespace: "telemetry" -idrac_telemetry_k8s_name: idrac-telemetry -mysqldb_k8s_name: mysqldb -idrac_telemetry_pod_wait_fail_msg: "Execution failed as the idrac-telemetry pods did not start within the expected time. - Please re-run the playbook after verifying that the idrac-telemetry pods are in running state by executing the command 'kubectl get pods -A.'" -mysqldb_pod_wait_fail_msg: "Execution failed as the mysqldb pods did not start within the expected time. - Please re-run the playbook after verifying that the mysqldb pods are in running state by executing the command 'kubectl get pods -A.'" From 560514b93842e6bf2027593806291ef0fe35d58d Mon Sep 17 00:00:00 2001 From: mcas Date: Wed, 16 Jul 2025 16:43:31 +0530 Subject: [PATCH 41/76] Modified the input validation for secret file as it will be encrypted --- .../input_validation/schema/omnia_config.json | 8 +- .../validation_flows/common_validation.py | 110 ++++++--- ...csi_powerscale_driver_input_validation.yml | 219 ------------------ .../roles/cluster_validation/vars/main.yml | 10 - scheduler/scheduler.yml | 6 + scheduler/service_k8s_cluster.yml | 2 +- 6 files changed, 91 insertions(+), 264 deletions(-) diff --git a/common/library/module_utils/input_validation/schema/omnia_config.json b/common/library/module_utils/input_validation/schema/omnia_config.json index ee063a12a8..d1303df6b3 100644 --- a/common/library/module_utils/input_validation/schema/omnia_config.json +++ b/common/library/module_utils/input_validation/schema/omnia_config.json @@ -63,12 +63,12 @@ "csi_powerscale_driver_secret_file_path": { "description": "Absolute file path for the secret.yaml file.", "type": "string", - "pattern": "^$|^/?([a-zA-Z0-9_-]+(/?[a-zA-Z0-9_-]+)*)$" + "pattern": "^/?([a-zA-Z0-9._-]+/)*[a-zA-Z0-9._-]+\\.yaml$" }, "csi_powerscale_driver_values_file_path": { "description": "File path for the values.yaml file.", "type": "string", - "pattern": "^$|^/?([a-zA-Z0-9_-]+(/?[a-zA-Z0-9_-]+)*)$" + "pattern": "^/?([a-zA-Z0-9._-]+/)*[a-zA-Z0-9._-]+\\.yaml$" } }, "required": [ @@ -150,12 +150,12 @@ "csi_powerscale_driver_secret_file_path": { "description": "Absolute file path for the secret.yaml file.", "type": "string", - "pattern": "^$|^/?([a-zA-Z0-9_-]+(/?[a-zA-Z0-9_-]+)*)$" + "pattern": "^/?([a-zA-Z0-9._-]+/)*[a-zA-Z0-9._-]+\\.yaml$" }, "csi_powerscale_driver_values_file_path": { "description": "File path for the values.yaml file.", "type": "string", - "pattern": "^$|^/?([a-zA-Z0-9_-]+(/?[a-zA-Z0-9_-]+)*)$" + "pattern": "^/?([a-zA-Z0-9._-]+/)*[a-zA-Z0-9._-]+\\.yaml$" } }, "required": [ diff --git a/common/library/module_utils/input_validation/validation_flows/common_validation.py b/common/library/module_utils/input_validation/validation_flows/common_validation.py index a789cac015..66d04d1702 100644 --- a/common/library/module_utils/input_validation/validation_flows/common_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/common_validation.py @@ -19,11 +19,10 @@ import os import ipaddress import yaml +import subprocess from ast import literal_eval import ansible.module_utils.input_validation.common_utils.data_fetch as get import ansible.module_utils.input_validation.common_utils.data_validation as validate -from ansible.modules.validate_input import generate_log_failure_message - from ansible.module_utils.input_validation.common_utils import ( validation_utils, config, @@ -32,9 +31,6 @@ ) from ansible.module_utils.input_validation.validation_flows import scheduler_validation -from ansible.module_utils.local_repo.common_functions import ( - load_yaml_file -) from ansible.module_utils.local_repo.software_utils import ( load_json, set_version_variables, @@ -1074,25 +1070,78 @@ def get_nested(data, keys, default=None): return value_errors -def validate_powerscale_secret_and_values_file(secret_file_path, values_file_path): +def encrypt_file(secret_file_path, vault_secret_file_path): + cmd = [ + "ansible-vault", + "encrypt", + secret_file_path, + "--vault-password-file", + vault_secret_file_path, + ] + return validation_utils.run_subprocess(cmd) + +def decrypt_file(secret_file_path, vault_secret_file_path): + cmd = [ + "ansible-vault", + "decrypt", + secret_file_path, + "--vault-password-file", + vault_secret_file_path, + ] + return validation_utils.run_subprocess(cmd) + +def process_encrypted_file(secret_file_path,vault_secret_file_path,errors): + decrypted_file = decrypt_file(secret_file_path, vault_secret_file_path,) + + if decrypted_file: + try: + with open(secret_file_path, "r") as f: + data = yaml.safe_load(f) + encrypt_file(secret_file_path, vault_secret_file_path) + return data + except FileNotFoundError: + errors.append(create_error_msg("File not found", + secret_file_path, "Please check the associated file exists")) + except yaml.YAMLError as e: + errors.append(create_error_msg("Error loading yaml file", + secret_file_path, "Please check the associated file syntax")) + else: + errors.append(create_error_msg("Error occured when attempting to decrypt file.", + secret_file_path, "Please check that the assoicated vault file exists")) + return decrypted_file + +def validate_powerscale_secret_and_values_file(secret_file_path, values_file_path,errors): #valiadte secret file inputs - secret_filename = os.path.basename(secret_file_path) - secret_data = load_yaml_file(secret_file_path.strip()) - secret_validation_errors = validate_secret_isilon_clusters(secret_data) - if secret_validation_errors: - for err in secret_validation_errors: - errors.append(create_error_msg(f"Powerscale Secret File Validation Error: {err}")) - + secrets_file_encrypted = validation_utils.is_file_encrypted(secret_file_path) + vault_secret_file_path= "/root/omnia/scheduler/roles/k8s_csi_powerscale_plugin/files/.csi_powerscale_secret_vault" + #check if secret file exists + file_exists = os.path.exists(vault_secret_file_path.strip()) + + if secrets_file_encrypted: + secret_data = process_encrypted_file(secret_file_path, vault_secret_file_path,errors) + if secret_data is None or secret_data is False: + errors.append(create_error_msg( + "Secret File Load", + secret_file_path, + "Failed to load or parse secret.yaml file. It may be invalid or empty." + )) + else: + secret_validation_errors = validate_secret_isilon_clusters(secret_data) + if secret_validation_errors: + for err in secret_validation_errors: + errors.append(create_error_msg("Powerscale Secret File Validation Error:", err, None)) + #validate values file input - values_filename= os.path.basename(values_file_path) - values_data = load_yaml_file(values_file_path.strip()) + with open(values_file_path, "r") as f: + values_data = yaml.safe_load(f) values_validation_errros = validate_value_file_inputs(values_data) if values_validation_errros: for value_err in values_validation_errros: - errors.append(create_error_msg(f"Powerscale Secret File Validation Error: {value_err}")) + errors.append(create_error_msg(f"Powerscale Value File Validation Error: ",value_err, None)) -def validate_k8s(data, admin_bmc_networks, softwares, ha_config, tag_names, errors): +def validate_k8s(data, admin_bmc_networks, softwares, ha_config, tag_names, errors, + omnia_base_dir, project_name, logger, module): """ Validates Kubernetes cluster configurations. @@ -1142,7 +1191,7 @@ def validate_k8s(data, admin_bmc_networks, softwares, ha_config, tag_names, erro f"{cluster_name} not found in high_availability_config.yml" )) pod_external_ip_range = kluster.get("pod_external_ip_range") - if not pod_external_ip_range: + if not pod_external_ip_range or str(pod_external_ip_range).strip() == "": errors.append( create_error_msg( "Pod External IP Range -", @@ -1183,33 +1232,33 @@ def validate_k8s(data, admin_bmc_networks, softwares, ha_config, tag_names, erro and ("k8s" in softwares or "service_k8s" in softwares) ): - csi_powerscale_driver_secret_file_path = kluster.get("csi_powerscale_driver_secret_file_path") - csi_powerscale_driver_values_file_path = kluster.get("csi_powerscale_driver_values_file_path") + csi_secret_file_path = kluster.get("csi_powerscale_driver_secret_file_path") + csi_values_file_path = kluster.get("csi_powerscale_driver_values_file_path") # Validate secret file path - if not csi_powerscale_driver_secret_file_path or \ - not csi_powerscale_driver_secret_file_path.strip() or \ - not os.path.exists(csi_powerscale_driver_secret_file_path.strip()): + if not csi_secret_file_path or \ + not csi_secret_file_path.strip() or \ + not os.path.exists(csi_secret_file_path.strip()): errors.append( create_error_msg( "csi_powerscale_driver_secret_file_path", - csi_powerscale_driver_secret_file_path, + csi_secret_file_path, en_us_validation_msg.CSI_DRIVER_SECRET_FAIL_MSG, ) ) else: # If secret path is valid, ensure values path is also valid - if not csi_powerscale_driver_values_file_path or \ - not csi_powerscale_driver_values_file_path.strip() or \ - not os.path.exists(csi_powerscale_driver_values_file_path.strip()): + if not csi_values_file_path or \ + not csi_values_file_path.strip() or \ + not os.path.exists(csi_values_file_path.strip()): errors.append( create_error_msg( "csi_powerscale_driver_values_file_path", - csi_powerscale_driver_values_file_path, + csi_values_file_path, en_us_validation_msg.CSI_DRIVER_VALUES_FAIL_MSG, ) ) - validate_powerscale_secret_and_values_file(csi_powerscale_driver_secret_file_path,csi_powerscale_driver_values_file_path) + validate_powerscale_secret_and_values_file(csi_secret_file_path,csi_values_file_path, errors) def validate_omnia_config( @@ -1266,7 +1315,8 @@ def validate_omnia_config( ha_config = yaml.safe_load(f) for k in ["service_k8s_cluster_ha", "compute_k8s_cluster_ha"]: ha_config[k] = [xha["cluster_name"] for xha in ha_config.get(k, [])] - validate_k8s(data, admin_bmc_networks, sw_list, ha_config, tag_names, errors) + validate_k8s(data, admin_bmc_networks, sw_list, ha_config, tag_names, + errors, omnia_base_dir, project_name, logger, module) return errors def validate_telemetry_config( diff --git a/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_input_validation.yml b/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_input_validation.yml index b681fa20b6..d382695ce4 100644 --- a/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_input_validation.yml +++ b/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_input_validation.yml @@ -57,231 +57,12 @@ --vault-password-file {{ role_path }}/../k8s_csi_powerscale_plugin/files/{{ csi_powerscale_secret_vaultname }} changed_when: false -# Validate secret file -- name: Validate isilonClusters configuration - block: - - name: Ensure isilonClusters is a list - ansible.builtin.assert: - that: - - clusters.isilonClusters is defined - - clusters.isilonClusters is iterable - - clusters.isilonClusters | length > 0 - msg: "{{ fail_msg_isilon_clusters }}" - - - name: Validate each cluster entry - block: - - name: Validate clusterName in secret.yaml - block: - - name: Validate clusterName is a non-empty string - ansible.builtin.assert: - that: - - item.clusterName is defined - - item.clusterName | length > 0 - loop: "{{ clusters.isilonClusters }}" - no_log: true - rescue: - - name: Invalid clusterName - ansible.builtin.fail: - msg: "{{ fail_msg_cluster_name }}" - - - name: Validate username in secret.yaml - block: - - name: Validate username is a non-empty string - ansible.builtin.assert: - that: - - item.username is defined - - item.username | length > 0 - loop: "{{ clusters.isilonClusters }}" - no_log: true - rescue: - - name: Invalid username - ansible.builtin.fail: - msg: "{{ fail_msg_user_name }}" - - - name: Validate password in secret.yaml - block: - - name: Validate password is a non-empty string - ansible.builtin.assert: - that: - - item.password is defined - - item.password | length > 0 - loop: "{{ clusters.isilonClusters }}" - no_log: true - rescue: - - name: Invalid password - ansible.builtin.fail: - msg: "{{ fail_msg_password }}" - - - name: Validate endpoint in secret.yaml - block: - - name: Validate endpoint is a non-empty string - ansible.builtin.assert: - that: - - item.endpoint is defined - - item.endpoint | length > 0 - loop: "{{ clusters.isilonClusters }}" - no_log: true - rescue: - - name: Invalid endpoint - ansible.builtin.fail: - msg: "{{ fail_msg_endpoint }}" - - - name: Validate endpointPort in secret.yaml - block: - - name: Validate endpointPort is a non-empty string - when: item.endpointPort is defined - ansible.builtin.assert: - that: - - item.endpointPort is integer - - item.endpointPort > 0 and item.endpointPort < 65536 - loop: "{{ clusters.isilonClusters }}" - no_log: true - rescue: - - name: Invalid endpointPort - ansible.builtin.fail: - msg: "{{ fail_msg_endpoint_port }}" - - - name: Validate isDefault in secret.yaml - block: - - name: Validate isDefault is boolean - ansible.builtin.assert: - that: - - item.isDefault is defined - - item.isDefault is boolean - loop: "{{ clusters.isilonClusters }}" - no_log: true - rescue: - - name: Invalid isDefault - ansible.builtin.fail: - msg: "{{ fail_msg_isdefault }}" - - - name: Validate skipCertificateValidation in secret.yaml - block: - - name: Validate skipCertificateValidation is true - when: item.skipCertificateValidation is defined - ansible.builtin.assert: - that: - - item.skipCertificateValidation in [true] - loop: "{{ clusters.isilonClusters }}" - no_log: true - rescue: - - name: Invalid skipCertificateValidation - ansible.builtin.fail: - msg: "{{ fail_msg_skip_certificate_validation }}" - - - name: Validate isiPath in secret.yaml - block: - - name: Validate isiPath is a valid Unix absolute path - when: item.isiPath is defined - ansible.builtin.assert: - that: - - item.isiPath is match('^/[^/].*') - loop: "{{ clusters.isilonClusters }}" - no_log: true - rescue: - - name: Invalid isiPath - ansible.builtin.fail: - msg: "{{ fail_msg_isipath }}" - - - name: Validate isiVolumePathPermissions in secret.yaml - block: - - name: Validate isiVolumePathPermissions is a valid octal mode number - when: item.isiVolumePathPermissions is defined - ansible.builtin.assert: - that: - - item.isiVolumePathPermissions is string - - item.isiVolumePathPermissions | length > 0 - loop: "{{ clusters.isilonClusters }}" - no_log: true - rescue: - - name: Invalid isiVolumePathPermissions - ansible.builtin.fail: - msg: "{{ fail_msg_isi_volume_path_permissions }}" - # Validate mandate user input in values file for csi driver - name: Load values.yaml file ansible.builtin.include_vars: file: "{{ hostvars['localhost']['csi_powerscale_driver_values_file_path'] }}" name: csi_powerscale_values_file -- name: Validate controller count - ansible.builtin.assert: - that: - - csi_powerscale_values_file.controller.controllerCount == 1 - msg: | - "Invalid controllerCount value: {{ csi_powerscale_values_file.controller.controllerCount }}. It must be 1 in values.yaml file." - -- name: Validate replication enabled - ansible.builtin.assert: - that: - - csi_powerscale_values_file.controller.replication.enabled is defined - - csi_powerscale_values_file.controller.replication.enabled in [false] - msg: | - "Invalid replication enabled value: {{ csi_powerscale_values_file.controller.replication.enabled }}. It must be false in values.yaml file." - -- name: Validate resizer enabled - ansible.builtin.assert: - that: - - csi_powerscale_values_file.controller.resizer.enabled is defined - - csi_powerscale_values_file.controller.resizer.enabled in [false, true] - msg: "Invalid resizer enabled value: {{ csi_powerscale_values_file.controller.resizer.enabled }}. It must be true or false in values.yaml file." - -- name: Validate snapshot enabled - ansible.builtin.assert: - that: - - csi_powerscale_values_file.controller.snapshot.enabled is defined - - csi_powerscale_values_file.controller.snapshot.enabled in [true] - msg: "Invalid snapshot enabled value: {{ csi_powerscale_values_file.controller.snapshot.enabled }}. It must be true in values.yaml file." - -- name: Validate endpointPort - ansible.builtin.assert: - that: - - csi_powerscale_values_file.endpointPort is defined - - csi_powerscale_values_file.endpointPort | int >= 1 - - csi_powerscale_values_file.endpointPort | int <= 65535 - msg: "Invalid endpointPort: {{ csi_powerscale_values_file.endpointPort }}. It must be between 1 and 65535 in values.yaml file." - -- name: Validate skipCertificateValidation - ansible.builtin.assert: - that: - - csi_powerscale_values_file.skipCertificateValidation is defined - - csi_powerscale_values_file.skipCertificateValidation in [true] - msg: "Invalid skipCertificateValidation value: {{ csi_powerscale_values_file.skipCertificateValidation }}. It must be true in values.yaml file." - -- name: Set skipCertificateValidation to be used later - ansible.builtin.set_fact: - skip_certificate_validation_value: csi_powerscale_values_file.skipCertificateValidation - -- name: Validate isiAuthType - ansible.builtin.assert: - that: - - csi_powerscale_values_file.isiAuthType is defined - - csi_powerscale_values_file.isiAuthType in [0, 1] - msg: | - "Invalid isiAuthType: {{ csi_powerscale_values_file.isiAuthType }}. - It must be 0 (basic authentication) or 1 (session-based authentication) in values.yaml file." - -- name: Validate isiAccessZone - ansible.builtin.assert: - that: - - csi_powerscale_values_file.isiAccessZone is defined - - csi_powerscale_values_file.isiAccessZone | length > 0 - msg: "Invalid isiAccessZone: {{ csi_powerscale_values_file.isiAccessZone }}. It must be a non-empty string in values.yaml file." - -- name: Validate isiPath - ansible.builtin.assert: - that: - - csi_powerscale_values_file.isiPath is defined - - csi_powerscale_values_file.isiPath | regex_search('^/[^/].*') # Basic validation for Unix absolute path - msg: "Invalid isiPath: {{ csi_powerscale_values_file.isiPath }}. It must be a valid Unix absolute path in values.yaml file." - -- name: Validate isiVolumePathPermissions - ansible.builtin.assert: - that: - - csi_powerscale_values_file.isiVolumePathPermissions is defined - - csi_powerscale_values_file.isiVolumePathPermissions | length > 0 - msg: "Invalid isiVolumePathPermissions: {{ csi_powerscale_values_file.isiVolumePathPermissions }}. It must be valid octal mode in values.yaml file." - - name: Validate powerscale ip and credential in secret.yaml file using API call to powerscale ansible.builtin.include_tasks: csi_powerscale_driver_api_validation.yml loop: "{{ clusters.isilonClusters }}" diff --git a/scheduler/roles/cluster_validation/vars/main.yml b/scheduler/roles/cluster_validation/vars/main.yml index 2337e2f82c..ac579f792f 100644 --- a/scheduler/roles/cluster_validation/vars/main.yml +++ b/scheduler/roles/cluster_validation/vars/main.yml @@ -135,16 +135,6 @@ csi_driver_values_file_path_fail_msg: "Failed. csi_driver_values_file_path is no # Usage: csi_powerscale_driver_input_validation.yml csi_powerscale_secret_vaultname: ".csi_powerscale_secret_vault" -fail_msg_isilon_clusters: "isilonClusters must be a valid list of powerscale details in secret.yaml file." -fail_msg_cluster_name: "clusterName is not valid. Provide powerscale cluster name in secret.yaml file." -fail_msg_user_name: "userName is not valid. Provide powerscale user name in secret.yaml file." -fail_msg_password: "Password is not valid. Provide powerscale password in secret.yaml file." -fail_msg_endpoint: "Endpoint is not valid. Provide powerscale IP or hostname in secret.yaml file." -fail_msg_endpoint_port: "endpointPort is not valid. Provide valid port number in secret.yaml file." -fail_msg_isdefault: "isDefault value should be true or false in secret.yaml file." -fail_msg_skip_certificate_validation: "skipCertificateValidation must be true in secret.yaml file." -fail_msg_isipath: "isiPath must be a valid Unix absolute path in secret.yaml file." -fail_msg_isi_volume_path_permissions: "isiVolumePathPermissions must be a valid directory permission (example: 0777) in secret.yaml file." fail_msg_api_call: "Please recheck powerscale username, password, endpoint and endpointPort details provided in secret.yaml and values.yaml (if endpointPort is provided only in values.yaml) file. API call to powerscale was not successful" vault_key_permission: "0644" diff --git a/scheduler/scheduler.yml b/scheduler/scheduler.yml index 58e7ae7198..3cfd65987d 100644 --- a/scheduler/scheduler.yml +++ b/scheduler/scheduler.yml @@ -170,6 +170,12 @@ roles: - k8s_csi_powerscale_plugin +- name: Install CSI powerscale plugin on kube control nodes + hosts: kube_control_plane + gather_facts: false + roles: + - k8s_csi_powerscale_plugin + - name: Install Slurm hosts: slurm_control_node, slurm_node, login any_errors_fatal: true diff --git a/scheduler/service_k8s_cluster.yml b/scheduler/service_k8s_cluster.yml index ec68d90b0b..0c78792989 100644 --- a/scheduler/service_k8s_cluster.yml +++ b/scheduler/service_k8s_cluster.yml @@ -134,7 +134,7 @@ tasks_from: csi_powerscale_image_pull.yml when: - hostvars['127.0.0.1']['csi_driver_powerscale_precheck_pass'] | default(false) | bool - - hostvars['127.0.0.1']['omnia_config']['k8s_offline_install'] + #- hostvars['127.0.0.1']['omnia_config']['k8s_offline_install'] - name: Install CSI powerscale plugin on kube control nodes hosts: kube_control_plane From 6e9243e45b7716bcdbca071e11c6fd6c62999465 Mon Sep 17 00:00:00 2001 From: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> Date: Mon, 21 Jul 2025 12:39:11 +0530 Subject: [PATCH 42/76] Update telemetry.yml Signed-off-by: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> --- telemetry/telemetry.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/telemetry/telemetry.yml b/telemetry/telemetry.yml index 1f0e9407d6..e7c848f215 100644 --- a/telemetry/telemetry.yml +++ b/telemetry/telemetry.yml @@ -74,7 +74,7 @@ ansible.builtin.include_role: name: service_k8s_telemetry tasks_from: prereq_checks.yml - when: hostvars['localhost']['kube_prometheus_support'] + when: hostvars['localhost']['kube_prometheus_support'] or hostvars['localhost']['federated_idrac_telemetry_collection'] # - name: Update Repositories/Registries on nodes # ansible.builtin.import_playbook: ../utils/update_user_repo.yml From bb6c50fcd5651a3f5a4e94b3d334e24e93aed89a Mon Sep 17 00:00:00 2001 From: mcas Date: Mon, 21 Jul 2025 14:11:48 +0530 Subject: [PATCH 43/76] changed code to use ansible standard k8s libraries --- .../tasks/csi_powerscale_config_secret.yml | 39 +++++++++++-- .../tasks/csi_powerscale_install.yml | 58 ++++++++++++++----- .../tasks/csi_powerscale_prereq.yml | 28 +++++++-- scheduler/scheduler.yml | 6 -- scheduler/service_k8s_cluster.yml | 4 +- 5 files changed, 102 insertions(+), 33 deletions(-) diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml index ad6cf6d85f..eb150d9541 100644 --- a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml @@ -13,15 +13,44 @@ # limitations under the License. --- -- name: Remove existing isilon-creds secret if already present in isilon namespace - ansible.builtin.command: kubectl delete secret isilon-creds -n {{ powerscale_ns }} +# - name: Remove existing isilon-creds secret if already present in isilon namespace +# ansible.builtin.command: kubectl delete secret isilon-creds -n {{ powerscale_ns }} +# failed_when: false +# changed_when: false + +- name: Remove existing isilon-creds secret if present + kubernetes.core.k8s: + api_version: v1 + kind: Secret + name: isilon-creds + namespace: "{{ powerscale_ns }}" + state: absent + register: delete_secret_result failed_when: false - changed_when: false + changed_when: delete_secret_result.changed + +# - name: Create isilon-creds secret in isilon namespace +# ansible.builtin.command: kubectl create secret generic isilon-creds -n {{ powerscale_ns }} --from-file=config="{{ csi_powerscale_secret_path }}" +# failed_when: false +# register: apply_secret +# changed_when: apply_secret.changed - name: Create isilon-creds secret in isilon namespace - ansible.builtin.command: kubectl create secret generic isilon-creds -n {{ powerscale_ns }} --from-file=config="{{ csi_powerscale_secret_path }}" - failed_when: false + kubernetes.core.k8s: + api_version: v1 + kind: Secret + name: isilon-creds + namespace: "{{ powerscale_ns }}" + state: present + definition: + metadata: + name: isilon-creds + namespace: "{{ powerscale_ns }}" + data: + config: "{{ lookup('file', csi_powerscale_secret_path) | b64encode }}" + type: Opaque register: apply_secret + failed_when: false changed_when: apply_secret.changed # Remove the secret file diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_install.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_install.yml index da39443b04..04918e8c1f 100644 --- a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_install.yml +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_install.yml @@ -13,21 +13,40 @@ # limitations under the License. --- -- name: Deploy external-snapshotter config CRDs - ansible.builtin.command: - cmd: "kubectl apply -f client/config/crd/" - chdir: "{{ csi_powerscale_path }}/csi-powerscale/external-snapshotter/" +#- name: Deploy external-snapshotter config CRDs + # ansible.builtin.command: + # cmd: "kubectl apply -f client/config/crd/" + # chdir: "{{ csi_powerscale_path }}/csi-powerscale/external-snapshotter/" + #register: install_result + #failed_when: false + #changed_when: install_result.changed + +- name: Deploy external-snapshotter config CRDs using k8s module + kubernetes.core.k8s: + state: present + definition: "{{ lookup('file', item) | from_yaml }}" + loop: "{{ lookup('fileglob', csi_powerscale_path + '/csi-powerscale/external-snapshotter/client/config/crd/*.yaml', wantlist=True) }}" register: install_result failed_when: false - changed_when: install_result.changed + changed_when: install_result is changed -- name: Deploy external-snapshotter snapshot-controller CRDs - ansible.builtin.command: - cmd: "kubectl apply -f deploy/kubernetes/snapshot-controller/" - chdir: "{{ csi_powerscale_path }}/csi-powerscale/external-snapshotter/" +# - name: Deploy external-snapshotter snapshot-controller CRDs +# ansible.builtin.command: +# cmd: "kubectl apply -f deploy/kubernetes/snapshot-controller/" +# chdir: "{{ csi_powerscale_path }}/csi-powerscale/external-snapshotter/" +# register: install_result +# failed_when: false +# changed_when: install_result.changed + +- name: Deploy external-snapshotter snapshot-controller CRDs using k8s module + kubernetes.core.k8s: + state: present + definition: "{{ lookup('file', item) | from_yaml }}" + loop: "{{ lookup('fileglob', csi_powerscale_path + '/csi-powerscale/external-snapshotter/deploy/kubernetes/snapshot-controller/*.yaml', wantlist=True) }}" register: install_result failed_when: false - changed_when: install_result.changed + changed_when: install_result is changed + - name: Execute CSI driver installation script with timeout of seconds {{ async_time }} ansible.builtin.command: @@ -56,13 +75,22 @@ prompt: "{{ fail_msg_csi_powerscale_driver }}" when: isilon_non_running_pods.stdout_lines | length > 0 -- name: Create powerscale storage class if deployment was successful - ansible.builtin.command: - cmd: "kubectl apply -f ps_storage_class.yml" - chdir: "{{ csi_powerscale_path }}" +# - name: Create powerscale storage class if deployment was successful +# ansible.builtin.command: +# cmd: "kubectl apply -f ps_storage_class.yml" +# chdir: "{{ csi_powerscale_path }}" +# register: sc_command_result +# failed_when: false +# changed_when: sc_command_result.changed +# when: isilon_non_running_pods.stdout_lines | length == 0 + +- name: Create PowerScale storage class if deployment was successful + kubernetes.core.k8s: + state: present + definition: "{{ lookup('file', csi_powerscale_path + '/ps_storage_class.yml') | from_yaml }}" register: sc_command_result failed_when: false - changed_when: sc_command_result.changed + changed_when: sc_command_result is changed when: isilon_non_running_pods.stdout_lines | length == 0 - name: Remove ps_storage_class.yml file diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_prereq.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_prereq.yml index 79efeabaca..6fce44d24b 100644 --- a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_prereq.yml +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_prereq.yml @@ -14,11 +14,18 @@ --- # Check Kubernetes is deployed on cluster +# - name: Verify Kubernetes is deployed on cluster +# ansible.builtin.command: kubectl get node +# register: k8s_return_code +# changed_when: false +# failed_when: false + - name: Verify Kubernetes is deployed on cluster - ansible.builtin.command: kubectl get node + kubernetes.core.k8s_info: + kind: Node register: k8s_return_code - changed_when: false failed_when: false + changed_when: false - name: Fail if Kubernetes is not deployed ansible.builtin.assert: @@ -198,9 +205,20 @@ group: "{{ group_value }}" mode: "{{ permission_644 }}" + # - name: Create isilon namespace + # ansible.builtin.command: + # cmd: "kubectl create ns isilon" + # register: command_result + # failed_when: false + # changed_when: command_result.changed + - name: Create isilon namespace - ansible.builtin.command: - cmd: "kubectl create ns isilon" + kubernetes.core.k8s: + api_version: v1 + kind: Namespace + name: isilon + state: present register: command_result - failed_when: false changed_when: command_result.changed + failed_when: false + diff --git a/scheduler/scheduler.yml b/scheduler/scheduler.yml index 3cfd65987d..58e7ae7198 100644 --- a/scheduler/scheduler.yml +++ b/scheduler/scheduler.yml @@ -170,12 +170,6 @@ roles: - k8s_csi_powerscale_plugin -- name: Install CSI powerscale plugin on kube control nodes - hosts: kube_control_plane - gather_facts: false - roles: - - k8s_csi_powerscale_plugin - - name: Install Slurm hosts: slurm_control_node, slurm_node, login any_errors_fatal: true diff --git a/scheduler/service_k8s_cluster.yml b/scheduler/service_k8s_cluster.yml index 0c78792989..2b9c2cad3f 100644 --- a/scheduler/service_k8s_cluster.yml +++ b/scheduler/service_k8s_cluster.yml @@ -126,7 +126,7 @@ - update_containerd_config - name: CSI powerscale image pulling - hosts: kube_node, kube_control_plane + hosts: kube_node, kube_control_plane[0] tasks: - name: Pull images ansible.builtin.include_role: @@ -137,7 +137,7 @@ #- hostvars['127.0.0.1']['omnia_config']['k8s_offline_install'] - name: Install CSI powerscale plugin on kube control nodes - hosts: kube_control_plane + hosts: kube_control_plane[0] gather_facts: false roles: - k8s_csi_powerscale_plugin From f16947916570b81b522b220d1347d6389f316aa7 Mon Sep 17 00:00:00 2001 From: mcas Date: Tue, 22 Jul 2025 14:15:10 +0530 Subject: [PATCH 44/76] chnages wrt powerscale service_k8s_deployment --- .../input_validation/schema/omnia_config.json | 8 ++-- input/omnia_config.yml | 8 ++-- .../tasks/csi_powerscale_config_secret.yml | 27 +---------- .../tasks/csi_powerscale_image_pull.yml | 10 ----- .../tasks/csi_powerscale_install.yml | 38 +++------------- .../tasks/csi_powerscale_prereq.yml | 45 +++++++------------ .../k8s_csi_powerscale_plugin/tasks/main.yml | 3 +- .../k8s_csi_powerscale_plugin/vars/main.yml | 1 + scheduler/service_k8s_cluster.yml | 33 +++++++------- 9 files changed, 48 insertions(+), 125 deletions(-) diff --git a/common/library/module_utils/input_validation/schema/omnia_config.json b/common/library/module_utils/input_validation/schema/omnia_config.json index d1303df6b3..0f5c359d1f 100644 --- a/common/library/module_utils/input_validation/schema/omnia_config.json +++ b/common/library/module_utils/input_validation/schema/omnia_config.json @@ -63,12 +63,12 @@ "csi_powerscale_driver_secret_file_path": { "description": "Absolute file path for the secret.yaml file.", "type": "string", - "pattern": "^/?([a-zA-Z0-9._-]+/)*[a-zA-Z0-9._-]+\\.yaml$" + "pattern": "^(|/?([a-zA-Z0-9._-]+/)*[a-zA-Z0-9._-]+\\.yaml)$" }, "csi_powerscale_driver_values_file_path": { "description": "File path for the values.yaml file.", "type": "string", - "pattern": "^/?([a-zA-Z0-9._-]+/)*[a-zA-Z0-9._-]+\\.yaml$" + "pattern": "^(|/?([a-zA-Z0-9._-]+/)*[a-zA-Z0-9._-]+\\.yaml)$" } }, "required": [ @@ -150,12 +150,12 @@ "csi_powerscale_driver_secret_file_path": { "description": "Absolute file path for the secret.yaml file.", "type": "string", - "pattern": "^/?([a-zA-Z0-9._-]+/)*[a-zA-Z0-9._-]+\\.yaml$" + "pattern": "^(|/?([a-zA-Z0-9._-]+/)*[a-zA-Z0-9._-]+\\.yaml)$" }, "csi_powerscale_driver_values_file_path": { "description": "File path for the values.yaml file.", "type": "string", - "pattern": "^/?([a-zA-Z0-9._-]+/)*[a-zA-Z0-9._-]+\\.yaml$" + "pattern": "^(|/?([a-zA-Z0-9._-]+/)*[a-zA-Z0-9._-]+\\.yaml)$" } }, "required": [ diff --git a/input/omnia_config.yml b/input/omnia_config.yml index 3fad8558ee..8418ecd94b 100644 --- a/input/omnia_config.yml +++ b/input/omnia_config.yml @@ -82,8 +82,8 @@ service_k8s_cluster: topology_manager_policy: "none" topology_manager_scope: "container" k8s_offline_install: true - csi_powerscale_driver_secret_file_path: - csi_powerscale_driver_values_file_path: + csi_powerscale_driver_secret_file_path: "" + csi_powerscale_driver_values_file_path: "" compute_k8s_cluster: - cluster_name: compute_cluster @@ -95,5 +95,5 @@ compute_k8s_cluster: topology_manager_policy: "none" topology_manager_scope: "container" k8s_offline_install: true - csi_powerscale_driver_secret_file_path: - csi_powerscale_driver_values_file_path: + csi_powerscale_driver_secret_file_path: "" + csi_powerscale_driver_values_file_path: "" diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml index eb150d9541..23ac13fc0e 100644 --- a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml @@ -12,12 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. --- - -# - name: Remove existing isilon-creds secret if already present in isilon namespace -# ansible.builtin.command: kubectl delete secret isilon-creds -n {{ powerscale_ns }} -# failed_when: false -# changed_when: false - - name: Remove existing isilon-creds secret if present kubernetes.core.k8s: api_version: v1 @@ -30,27 +24,10 @@ changed_when: delete_secret_result.changed -# - name: Create isilon-creds secret in isilon namespace -# ansible.builtin.command: kubectl create secret generic isilon-creds -n {{ powerscale_ns }} --from-file=config="{{ csi_powerscale_secret_path }}" -# failed_when: false -# register: apply_secret -# changed_when: apply_secret.changed - name: Create isilon-creds secret in isilon namespace - kubernetes.core.k8s: - api_version: v1 - kind: Secret - name: isilon-creds - namespace: "{{ powerscale_ns }}" - state: present - definition: - metadata: - name: isilon-creds - namespace: "{{ powerscale_ns }}" - data: - config: "{{ lookup('file', csi_powerscale_secret_path) | b64encode }}" - type: Opaque - register: apply_secret + ansible.builtin.command: kubectl create secret generic isilon-creds -n {{ powerscale_ns }} --from-file=config="{{ csi_powerscale_secret_path }}" failed_when: false + register: apply_secret changed_when: apply_secret.changed # Remove the secret file diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_image_pull.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_image_pull.yml index 8298544889..c3cf655564 100644 --- a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_image_pull.yml +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_image_pull.yml @@ -23,16 +23,6 @@ loop: "{{ hostvars['localhost']['csi_driver_powerscale_packages_json']['csi_driver_powerscale']['cluster'] }}" when: item.type == 'image' -#- name: Pull csi powerscale images - # ansible.builtin.command: nerdctl pull {{ item }} - #with_items: "{{ csi_powerscale_image_versions }}" - #changed_when: true - #failed_when: false - #environment: - # http_proxy: "{{ hostvars['localhost']['http_proxy'] }}" - #https_proxy: "{{ hostvars['localhost']['https_proxy'] }}" - #no_proxy: "{{ hostvars['localhost']['oim_hostname'] }},{{ hostvars['localhost']['admin_nic_ip'] }}" - # Pulling images from pulp - always,partial, never - name: Pull K8s services docker images from pulp ansible.builtin.command: nerdctl pull {{ item }} diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_install.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_install.yml index 04918e8c1f..98cd4a195d 100644 --- a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_install.yml +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_install.yml @@ -12,15 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. --- - -#- name: Deploy external-snapshotter config CRDs - # ansible.builtin.command: - # cmd: "kubectl apply -f client/config/crd/" - # chdir: "{{ csi_powerscale_path }}/csi-powerscale/external-snapshotter/" - #register: install_result - #failed_when: false - #changed_when: install_result.changed - - name: Deploy external-snapshotter config CRDs using k8s module kubernetes.core.k8s: state: present @@ -30,23 +21,13 @@ failed_when: false changed_when: install_result is changed -# - name: Deploy external-snapshotter snapshot-controller CRDs -# ansible.builtin.command: -# cmd: "kubectl apply -f deploy/kubernetes/snapshot-controller/" -# chdir: "{{ csi_powerscale_path }}/csi-powerscale/external-snapshotter/" -# register: install_result -# failed_when: false -# changed_when: install_result.changed - -- name: Deploy external-snapshotter snapshot-controller CRDs using k8s module - kubernetes.core.k8s: - state: present - definition: "{{ lookup('file', item) | from_yaml }}" - loop: "{{ lookup('fileglob', csi_powerscale_path + '/csi-powerscale/external-snapshotter/deploy/kubernetes/snapshot-controller/*.yaml', wantlist=True) }}" +- name: Deploy external-snapshotter snapshot-controller CRDs + ansible.builtin.command: + cmd: "kubectl apply -f deploy/kubernetes/snapshot-controller/" + chdir: "{{ csi_powerscale_path }}/csi-powerscale/external-snapshotter/" register: install_result failed_when: false - changed_when: install_result is changed - + changed_when: install_result.changed - name: Execute CSI driver installation script with timeout of seconds {{ async_time }} ansible.builtin.command: @@ -75,15 +56,6 @@ prompt: "{{ fail_msg_csi_powerscale_driver }}" when: isilon_non_running_pods.stdout_lines | length > 0 -# - name: Create powerscale storage class if deployment was successful -# ansible.builtin.command: -# cmd: "kubectl apply -f ps_storage_class.yml" -# chdir: "{{ csi_powerscale_path }}" -# register: sc_command_result -# failed_when: false -# changed_when: sc_command_result.changed -# when: isilon_non_running_pods.stdout_lines | length == 0 - - name: Create PowerScale storage class if deployment was successful kubernetes.core.k8s: state: present diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_prereq.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_prereq.yml index 6fce44d24b..af2e1efaa5 100644 --- a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_prereq.yml +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_prereq.yml @@ -12,26 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. --- - -# Check Kubernetes is deployed on cluster -# - name: Verify Kubernetes is deployed on cluster -# ansible.builtin.command: kubectl get node -# register: k8s_return_code -# changed_when: false -# failed_when: false - -- name: Verify Kubernetes is deployed on cluster - kubernetes.core.k8s_info: - kind: Node - register: k8s_return_code - failed_when: false - changed_when: false - -- name: Fail if Kubernetes is not deployed - ansible.builtin.assert: - that: - - k8s_return_code.rc == 0 - fail_msg: "{{ k8s_not_deployed }}" +- name: Check if k8s is running + block: + - name: Check if Kubernetes is running + kubernetes.core.k8s_info: + api_version: v1 + kind: Node + register: node_info + failed_when: node_info.resources is not defined or node_info.resources | length == 0 + rescue: + - name: Kubernetes is not running + ansible.builtin.fail: + msg: "{{ k8s_not_deployed }}" # Check if powerscale is already deployed - name: Verify powerscale is deployed on cluster @@ -159,7 +151,7 @@ block: - name: Get csi-powerscale git tar ansible.builtin.get_url: - url: "{{ hostvars['localhost']['offline_git_path'] }}/{{ csi_powerscale_git }}" + url: "{{ hostvars['localhost']['offline_git_path'] }}/csi-powerscale/{{ csi_powerscale_git }}" dest: "{{ csi_powerscale_path }}/{{ csi_powerscale_git }}" mode: "{{ permission_644 }}" @@ -171,13 +163,13 @@ - name: Get dell/helm-charts git tar ansible.builtin.get_url: - url: "{{ hostvars['localhost']['offline_git_path'] }}/{{ helm_charts_git }}" + url: "{{ hostvars['localhost']['offline_git_path'] }}/helm-charts/{{ helm_charts_git }}" dest: "{{ csi_powerscale_path }}/csi-powerscale/{{ helm_charts_git }}" mode: "{{ permission_644 }}" - name: Get external-snapshotter git tar ansible.builtin.get_url: - url: "{{ hostvars['localhost']['offline_git_path'] }}/{{ external_snapshotter_git }}" + url: "{{ hostvars['localhost']['offline_git_path'] }}/external-snapshotter/{{ external_snapshotter_git }}" dest: "{{ csi_powerscale_path }}/csi-powerscale/{{ external_snapshotter_git }}" mode: "{{ permission_644 }}" rescue: @@ -204,13 +196,6 @@ owner: "{{ owner_value }}" group: "{{ group_value }}" mode: "{{ permission_644 }}" - - # - name: Create isilon namespace - # ansible.builtin.command: - # cmd: "kubectl create ns isilon" - # register: command_result - # failed_when: false - # changed_when: command_result.changed - name: Create isilon namespace kubernetes.core.k8s: diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/main.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/main.yml index 2eb47e2911..475966e328 100644 --- a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/main.yml +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/main.yml @@ -15,8 +15,7 @@ - name: CSI powerscale driver installation when: - - hostvars['localhost']['csi_driver_powerscale_precheck_pass'] - - hostvars['localhost']['omnia_config']['k8s_offline_install'] + - hostvars['127.0.0.1']['csi_driver_powerscale_precheck_pass'] block: - name: Fetch required files to kube control plane ansible.builtin.include_tasks: csi_powerscale_prereq.yml diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/vars/main.yml b/scheduler/roles/k8s_csi_powerscale_plugin/vars/main.yml index 3b37fcef43..8e51cbd782 100644 --- a/scheduler/roles/k8s_csi_powerscale_plugin/vars/main.yml +++ b/scheduler/roles/k8s_csi_powerscale_plugin/vars/main.yml @@ -23,6 +23,7 @@ csi_powerscale_secret_path: "{{ csi_powerscale_path }}/csi_powerscale_secret.yam # Usage: csi_powerscale_install.yml, csi_powerscale_prereq.yml csi_powerscale_path: "/opt/omnia/csi-driver-powerscale" +ansible_python_interpreter: "/usr/bin/{{ hostvars['localhost']['python_package'] }}" # Usage: csi_powerscale_install.yml, csi_powerscale_prereq.yml csi_powerscale_git: "csi-powerscale.tar.gz" diff --git a/scheduler/service_k8s_cluster.yml b/scheduler/service_k8s_cluster.yml index 9e28f83eba..a5fe24047d 100644 --- a/scheduler/service_k8s_cluster.yml +++ b/scheduler/service_k8s_cluster.yml @@ -125,23 +125,6 @@ roles: - update_containerd_config -- name: CSI powerscale image pulling - hosts: kube_node, kube_control_plane[0] - tasks: - - name: Pull images - ansible.builtin.include_role: - name: k8s_csi_powerscale_plugin - tasks_from: csi_powerscale_image_pull.yml - when: - - hostvars['127.0.0.1']['csi_driver_powerscale_precheck_pass'] | default(false) | bool - #- hostvars['127.0.0.1']['omnia_config']['k8s_offline_install'] - -- name: Install CSI powerscale plugin on kube control nodes - hosts: kube_control_plane[0] - gather_facts: false - roles: - - k8s_csi_powerscale_plugin - - name: Pull k8s images hosts: kube_control_plane, kube_node, etcd gather_facts: false @@ -163,3 +146,19 @@ whereabouts-device-plugin: true roles: - common_plugins + +- name: CSI powerscale image pulling + hosts: kube_node, kube_control_plane[0] + tasks: + - name: Pull images + ansible.builtin.include_role: + name: k8s_csi_powerscale_plugin + tasks_from: csi_powerscale_image_pull.yml + when: + - hostvars['127.0.0.1']['csi_driver_powerscale_precheck_pass'] | default(false) | bool + +- name: Install CSI powerscale plugin on kube control nodes + hosts: kube_control_plane[0] + gather_facts: false + roles: + - k8s_csi_powerscale_plugin From a11e524275392ef772095b906bd37f6db3353861 Mon Sep 17 00:00:00 2001 From: sakshi-singla-1735 Date: Tue, 22 Jul 2025 08:52:13 +0000 Subject: [PATCH 45/76] ansible lint fixes --- scheduler/roles/cluster_validation/tasks/set_facts.yml | 4 ++-- .../tasks/csi_powerscale_image_pull.yml | 1 - .../k8s_csi_powerscale_plugin/tasks/csi_powerscale_prereq.yml | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/scheduler/roles/cluster_validation/tasks/set_facts.yml b/scheduler/roles/cluster_validation/tasks/set_facts.yml index 6cd7e6690d..1ad78a1836 100644 --- a/scheduler/roles/cluster_validation/tasks/set_facts.yml +++ b/scheduler/roles/cluster_validation/tasks/set_facts.yml @@ -77,8 +77,8 @@ topology_manager_policy: "{{ selected_cluster.topology_manager_policy }}" topology_manager_scope: "{{ selected_cluster.topology_manager_scope }}" k8s_offline_install: "{{ selected_cluster.k8s_offline_install }}" - csi_powerscale_driver_secret_file_path: "{{selected_cluster.csi_powerscale_driver_secret_file_path}}" - csi_powerscale_driver_values_file_path: "{{selected_cluster.csi_powerscale_driver_values_file_path}}" + csi_powerscale_driver_secret_file_path: "{{ selected_cluster.csi_powerscale_driver_secret_file_path }}" + csi_powerscale_driver_values_file_path: "{{ selected_cluster.csi_powerscale_driver_values_file_path }}" - name: Create a directory to store kubespray log files ansible.builtin.file: diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_image_pull.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_image_pull.yml index c3cf655564..1bf9718882 100644 --- a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_image_pull.yml +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_image_pull.yml @@ -59,4 +59,3 @@ when: - not hostvars['localhost']['k8s_offline_install'] - not hostvars['localhost']['enable_routed_internet'] - diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_prereq.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_prereq.yml index af2e1efaa5..10677289df 100644 --- a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_prereq.yml +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_prereq.yml @@ -196,7 +196,7 @@ owner: "{{ owner_value }}" group: "{{ group_value }}" mode: "{{ permission_644 }}" - + - name: Create isilon namespace kubernetes.core.k8s: api_version: v1 @@ -206,4 +206,3 @@ register: command_result changed_when: command_result.changed failed_when: false - From b03e0dc60af1a5ddf483b8fc287a3485295d29b4 Mon Sep 17 00:00:00 2001 From: sakshi-singla-1735 Date: Tue, 22 Jul 2025 09:06:13 +0000 Subject: [PATCH 46/76] chnaging vault path --- .../input_validation/validation_flows/common_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/library/module_utils/input_validation/validation_flows/common_validation.py b/common/library/module_utils/input_validation/validation_flows/common_validation.py index e953bd9ef3..09e82c0780 100644 --- a/common/library/module_utils/input_validation/validation_flows/common_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/common_validation.py @@ -1113,7 +1113,7 @@ def process_encrypted_file(secret_file_path,vault_secret_file_path,errors): def validate_powerscale_secret_and_values_file(secret_file_path, values_file_path,errors): #valiadte secret file inputs secrets_file_encrypted = validation_utils.is_file_encrypted(secret_file_path) - vault_secret_file_path= "/root/omnia/scheduler/roles/k8s_csi_powerscale_plugin/files/.csi_powerscale_secret_vault" + vault_secret_file_path= "/omnia/scheduler/roles/k8s_csi_powerscale_plugin/files/.csi_powerscale_secret_vault" #check if secret file exists file_exists = os.path.exists(vault_secret_file_path.strip()) From ae3dd2cde74d254a90d265e5e84b69a9b2dc38bf Mon Sep 17 00:00:00 2001 From: mcas Date: Tue, 22 Jul 2025 15:07:23 +0530 Subject: [PATCH 47/76] separate file for csi validation --- .../validation_flows/common_validation.py | 195 +------------- .../validation_flows/csi_driver_validation.py | 248 ++++++++++++++++++ 2 files changed, 250 insertions(+), 193 deletions(-) create mode 100644 common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py diff --git a/common/library/module_utils/input_validation/validation_flows/common_validation.py b/common/library/module_utils/input_validation/validation_flows/common_validation.py index e953bd9ef3..2d0a8ffb5a 100644 --- a/common/library/module_utils/input_validation/validation_flows/common_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/common_validation.py @@ -22,6 +22,7 @@ import subprocess from ast import literal_eval import ansible.module_utils.input_validation.common_utils.data_fetch as get +from ansible.module_utils.input_validation.validation_flows import csi_driver_validation import ansible.module_utils.input_validation.common_utils.data_validation as validate from ansible.module_utils.input_validation.common_utils import ( validation_utils, @@ -946,198 +947,6 @@ def is_ip_in_range(ip_str, ip_range_str): return False -def validate_secret_isilon_clusters(data): - cluster_errors = [] - - clusters = data.get("isilonClusters") - - # Check if isilonClusters is a defined, non-empty list - if not isinstance(clusters, list) or len(clusters) == 0: - cluster_errors.append("isilonClusters must be a non-empty list.") - return cluster_errors # Stop further checks - - for idx, item in enumerate(clusters): - cluster_prefix = f"Cluster {idx + 1}" - - # Validate clusterName - if not item.get("clusterName") or not isinstance(item["clusterName"], str): - cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'clusterName'.") - - # Validate username - if not item.get("username") or not isinstance(item["username"], str): - cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'username'.") - - # Validate password - if not item.get("password") or not isinstance(item["password"], str): - cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'password'.") - - # Validate endpoint - if not item.get("endpoint") or not isinstance(item["endpoint"], str): - cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'endpoint'.") - - # Validate endpointPort if defined - if "endpointPort" in item: - if not isinstance(item["endpointPort"], int) or not (0 < item["endpointPort"] < 65536): - cluster_errors.append(f"{cluster_prefix}: 'endpointPort' must be an integer between 1 and 65535.") - - # Validate isDefault - if "isDefault" not in item or not isinstance(item["isDefault"], bool): - cluster_errors.append(f"{cluster_prefix}: 'isDefault' must be a boolean and must be defined.") - - # Validate skipCertificateValidation if defined - if "skipCertificateValidation" in item: - if item["skipCertificateValidation"] is not True: - cluster_errors.append(f"{cluster_prefix}: 'skipCertificateValidation' must be true if defined.") - - # Validate isiPath if defined - if "isiPath" in item: - if not isinstance(item["isiPath"], str) or not item["isiPath"].startswith('/'): - cluster_errors.append(f"{cluster_prefix}: 'isiPath' must be a valid Unix absolute path.") - - # Validate isiVolumePathPermissions if defined - if "isiVolumePathPermissions" in item: - perms = item["isiVolumePathPermissions"] - if not isinstance(perms, str) or not perms.strip().isdigit(): - cluster_errors.append(f"{cluster_prefix}: 'isiVolumePathPermissions' must be a non-empty string of digits.") - - return cluster_errors - -def validate_value_file_inputs(values_data): - value_errors = [] - - def add_error(field_path, value, msg): - value_errors.append( - f"Validation Error - {field_path}: '{value}' -> {msg}" - ) - - # Helper to safely get nested values - def get_nested(data, keys, default=None): - for key in keys: - if not isinstance(data, dict) or key not in data: - return default - data = data[key] - return data - - # 1. controller.controllerCount == 1 - controller_count = get_nested(values_data, ["controller", "controllerCount"]) - if controller_count != 1: - add_error("controller.controllerCount", controller_count, "Must be 1") - - # 2. controller.replication.enabled == false - replication_enabled = get_nested(values_data, ["controller", "replication", "enabled"]) - if replication_enabled is None or replication_enabled is not False: - add_error("controller.replication.enabled", replication_enabled, "Must be false") - - # 3. controller.resizer.enabled in [true, false] - resizer_enabled = get_nested(values_data, ["controller", "resizer", "enabled"]) - if resizer_enabled not in [True, False]: - add_error("controller.resizer.enabled", resizer_enabled, "Must be true or false") - - # 4. controller.snapshot.enabled == true - snapshot_enabled = get_nested(values_data, ["controller", "snapshot", "enabled"]) - if snapshot_enabled is not True: - add_error("controller.snapshot.enabled", snapshot_enabled, "Must be true") - - # 5. endpointPort is int in 1..65535 - endpoint_port = values_data.get("endpointPort") - if endpoint_port is None or not isinstance(endpoint_port, int) or not (1 <= endpoint_port <= 65535): - add_error("endpointPort", endpoint_port, "Must be between 1 and 65535") - - # 6. skipCertificateValidation == true - skip_cert = values_data.get("skipCertificateValidation") - if skip_cert is not True: - add_error("skipCertificateValidation", skip_cert, "Must be true") - - # 7. isiAuthType in [0, 1] - isi_auth = values_data.get("isiAuthType") - if isi_auth not in [0, 1]: - add_error("isiAuthType", isi_auth, "Must be 0 or 1") - - # 8. isiAccessZone is non-empty string - isi_access = values_data.get("isiAccessZone") - if not isi_access or not isinstance(isi_access, str) or not isi_access.strip(): - add_error("isiAccessZone", isi_access, "Must be a non-empty string") - - # 9. isiPath is Unix absolute path - isi_path = values_data.get("isiPath") - if not isinstance(isi_path, str) or not isi_path.startswith("/"): - add_error("isiPath", isi_path, "Must be a valid Unix absolute path") - - # 10. isiVolumePathPermissions is a non-empty string - permissions = values_data.get("isiVolumePathPermissions") - if not permissions or not isinstance(permissions, str) or not permissions.strip(): - add_error("isiVolumePathPermissions", permissions, "Must be a valid octal string") - - return value_errors - -def encrypt_file(secret_file_path, vault_secret_file_path): - cmd = [ - "ansible-vault", - "encrypt", - secret_file_path, - "--vault-password-file", - vault_secret_file_path, - ] - return validation_utils.run_subprocess(cmd) - -def decrypt_file(secret_file_path, vault_secret_file_path): - cmd = [ - "ansible-vault", - "decrypt", - secret_file_path, - "--vault-password-file", - vault_secret_file_path, - ] - return validation_utils.run_subprocess(cmd) - -def process_encrypted_file(secret_file_path,vault_secret_file_path,errors): - decrypted_file = decrypt_file(secret_file_path, vault_secret_file_path,) - - if decrypted_file: - try: - with open(secret_file_path, "r") as f: - data = yaml.safe_load(f) - encrypt_file(secret_file_path, vault_secret_file_path) - return data - except FileNotFoundError: - errors.append(create_error_msg("File not found", - secret_file_path, "Please check the associated file exists")) - except yaml.YAMLError as e: - errors.append(create_error_msg("Error loading yaml file", - secret_file_path, "Please check the associated file syntax")) - else: - errors.append(create_error_msg("Error occured when attempting to decrypt file.", - secret_file_path, "Please check that the assoicated vault file exists")) - return decrypted_file - -def validate_powerscale_secret_and_values_file(secret_file_path, values_file_path,errors): - #valiadte secret file inputs - secrets_file_encrypted = validation_utils.is_file_encrypted(secret_file_path) - vault_secret_file_path= "/root/omnia/scheduler/roles/k8s_csi_powerscale_plugin/files/.csi_powerscale_secret_vault" - #check if secret file exists - file_exists = os.path.exists(vault_secret_file_path.strip()) - - if secrets_file_encrypted: - secret_data = process_encrypted_file(secret_file_path, vault_secret_file_path,errors) - if secret_data is None or secret_data is False: - errors.append(create_error_msg( - "Secret File Load", - secret_file_path, - "Failed to load or parse secret.yaml file. It may be invalid or empty." - )) - else: - secret_validation_errors = validate_secret_isilon_clusters(secret_data) - if secret_validation_errors: - for err in secret_validation_errors: - errors.append(create_error_msg("Powerscale Secret File Validation Error:", err, None)) - - #validate values file input - with open(values_file_path, "r") as f: - values_data = yaml.safe_load(f) - values_validation_errros = validate_value_file_inputs(values_data) - if values_validation_errros: - for value_err in values_validation_errros: - errors.append(create_error_msg(f"Powerscale Value File Validation Error: ",value_err, None)) def validate_k8s(data, admin_bmc_networks, softwares, ha_config, tag_names, errors, @@ -1259,7 +1068,7 @@ def validate_k8s(data, admin_bmc_networks, softwares, ha_config, tag_names, erro en_us_validation_msg.CSI_DRIVER_VALUES_FAIL_MSG, ) ) - validate_powerscale_secret_and_values_file(csi_secret_file_path,csi_values_file_path, errors) + csi_driver_validation.validate_powerscale_secret_and_values_file(csi_secret_file_path,csi_values_file_path, errors) def validate_omnia_config( input_file_path, diff --git a/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py b/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py new file mode 100644 index 0000000000..8757d00deb --- /dev/null +++ b/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py @@ -0,0 +1,248 @@ +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=import-error,no-name-in-module,too-many-positional-arguments,too-many-arguments,unused-argument +""" +Validates csi driver configuration files for Omnia. +""" +import os +import yaml +from ansible.module_utils.input_validation.common_utils import validation_utils +from ansible.module_utils.input_validation.common_utils import config + +file_names = config.files +create_error_msg = validation_utils.create_error_msg +create_file_path = validation_utils.create_file_path +contains_software = validation_utils.contains_software +check_mandatory_fields = validation_utils.check_mandatory_fields +flatten_sub_groups = validation_utils.flatten_sub_groups + + +def validate_secret_isilon_clusters(data): + """ + Validates csi secret file inputs for Omnia. + """ + + cluster_errors = [] + + clusters = data.get("isilonClusters") + + # Check if isilonClusters is a defined, non-empty list + if not isinstance(clusters, list) or len(clusters) == 0: + cluster_errors.append("isilonClusters must be a non-empty list.") + return cluster_errors # Stop further checks + + for idx, item in enumerate(clusters): + cluster_prefix = f"Cluster {idx + 1}" + + # Validate clusterName + if not item.get("clusterName") or not isinstance(item["clusterName"], str): + cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'clusterName'.") + + # Validate username + if not item.get("username") or not isinstance(item["username"], str): + cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'username'.") + + # Validate password + if not item.get("password") or not isinstance(item["password"], str): + cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'password'.") + + # Validate endpoint + if not item.get("endpoint") or not isinstance(item["endpoint"], str): + cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'endpoint'.") + + # Validate endpointPort if defined + if "endpointPort" in item: + if not isinstance(item["endpointPort"], int) or not (0 < item["endpointPort"] < 65536): + cluster_errors.append(f"{cluster_prefix}: 'endpointPort' must be an integer between 1 and 65535.") + + # Validate isDefault + if "isDefault" not in item or not isinstance(item["isDefault"], bool): + cluster_errors.append(f"{cluster_prefix}: 'isDefault' must be a boolean and must be defined.") + + # Validate skipCertificateValidation if defined + if "skipCertificateValidation" in item: + if item["skipCertificateValidation"] is not True: + cluster_errors.append(f"{cluster_prefix}: 'skipCertificateValidation' must be true if defined.") + + # Validate isiPath if defined + if "isiPath" in item: + if not isinstance(item["isiPath"], str) or not item["isiPath"].startswith('/'): + cluster_errors.append(f"{cluster_prefix}: 'isiPath' must be a valid Unix absolute path.") + + # Validate isiVolumePathPermissions if defined + if "isiVolumePathPermissions" in item: + perms = item["isiVolumePathPermissions"] + if not isinstance(perms, str) or not perms.strip().isdigit(): + cluster_errors.append(f"{cluster_prefix}: 'isiVolumePathPermissions' must be a non-empty string of digits.") + + return cluster_errors + +def validate_value_file_inputs(values_data): + """ + Validates csi value file inputs for Omnia. + """ + + value_errors = [] + + def add_error(field_path, value, msg): + value_errors.append( + f"Validation Error - {field_path}: '{value}' -> {msg}" + ) + + # Helper to safely get nested values + def get_nested(data, keys, default=None): + for key in keys: + if not isinstance(data, dict) or key not in data: + return default + data = data[key] + return data + + # 1. controller.controllerCount == 1 + controller_count = get_nested(values_data, ["controller", "controllerCount"]) + if controller_count != 1: + add_error("controller.controllerCount", controller_count, "Must be 1") + + # 2. controller.replication.enabled == false + replication_enabled = get_nested(values_data, ["controller", "replication", "enabled"]) + if replication_enabled is None or replication_enabled is not False: + add_error("controller.replication.enabled", replication_enabled, "Must be false") + + # 3. controller.resizer.enabled in [true, false] + resizer_enabled = get_nested(values_data, ["controller", "resizer", "enabled"]) + if resizer_enabled not in [True, False]: + add_error("controller.resizer.enabled", resizer_enabled, "Must be true or false") + + # 4. controller.snapshot.enabled == true + snapshot_enabled = get_nested(values_data, ["controller", "snapshot", "enabled"]) + if snapshot_enabled is not True: + add_error("controller.snapshot.enabled", snapshot_enabled, "Must be true") + + # 5. endpointPort is int in 1..65535 + endpoint_port = values_data.get("endpointPort") + if endpoint_port is None or not isinstance(endpoint_port, int) or not (1 <= endpoint_port <= 65535): + add_error("endpointPort", endpoint_port, "Must be between 1 and 65535") + + # 6. skipCertificateValidation == true + skip_cert = values_data.get("skipCertificateValidation") + if skip_cert is not True: + add_error("skipCertificateValidation", skip_cert, "Must be true") + + # 7. isiAuthType in [0, 1] + isi_auth = values_data.get("isiAuthType") + if isi_auth not in [0, 1]: + add_error("isiAuthType", isi_auth, "Must be 0 or 1") + + # 8. isiAccessZone is non-empty string + isi_access = values_data.get("isiAccessZone") + if not isi_access or not isinstance(isi_access, str) or not isi_access.strip(): + add_error("isiAccessZone", isi_access, "Must be a non-empty string") + + # 9. isiPath is Unix absolute path + isi_path = values_data.get("isiPath") + if not isinstance(isi_path, str) or not isi_path.startswith("/"): + add_error("isiPath", isi_path, "Must be a valid Unix absolute path") + + # 10. isiVolumePathPermissions is a non-empty string + permissions = values_data.get("isiVolumePathPermissions") + if not permissions or not isinstance(permissions, str) or not permissions.strip(): + add_error("isiVolumePathPermissions", permissions, "Must be a valid octal string") + + return value_errors + +def encrypt_file(secret_file_path, vault_secret_file_path): + """ + encrypt the secret file + """ + + cmd = [ + "ansible-vault", + "encrypt", + secret_file_path, + "--vault-password-file", + vault_secret_file_path, + ] + return validation_utils.run_subprocess(cmd) + +def decrypt_file(secret_file_path, vault_secret_file_path): + """ + encrypt the secret file + Takes 2 inputs: file name and secret file path + """ + + cmd = [ + "ansible-vault", + "decrypt", + secret_file_path, + "--vault-password-file", + vault_secret_file_path, + ] + return validation_utils.run_subprocess(cmd) + +def process_encrypted_file(secret_file_path,vault_secret_file_path,errors): + """ + Process the secret file + decrypt the file first then parse it to get data + """ + + decrypted_file = decrypt_file(secret_file_path, vault_secret_file_path,) + + if decrypted_file: + try: + with open(secret_file_path, "r") as f: + data = yaml.safe_load(f) + encrypt_file(secret_file_path, vault_secret_file_path) + return data + except FileNotFoundError: + errors.append(create_error_msg("File not found", + secret_file_path, "Please check the associated file exists")) + except yaml.YAMLError as e: + errors.append(create_error_msg("Error loading yaml file", + secret_file_path, "Please check the associated file syntax")) + else: + errors.append(create_error_msg("Error occured when attempting to decrypt file.", + secret_file_path, "Please check that the assoicated vault file exists")) + return decrypted_file + +def validate_powerscale_secret_and_values_file(secret_file_path, values_file_path, errors): + """ + Driver code to initiate the powerscale secret and values file input validation + """ + + #valiadte secret file inputs + secrets_file_encrypted = validation_utils.is_file_encrypted(secret_file_path) + vault_secret_file_path= "/omnia/scheduler/roles/k8s_csi_powerscale_plugin/files/.csi_powerscale_secret_vault" + #check if secret file exists + file_exists = os.path.exists(vault_secret_file_path.strip()) + + if secrets_file_encrypted: + secret_data = process_encrypted_file(secret_file_path, vault_secret_file_path,errors) + if secret_data is None or secret_data is False: + errors.append(create_error_msg( + "Secret File Load", + secret_file_path, + "Failed to load or parse secret.yaml file. It may be invalid or empty." + )) + else: + secret_validation_errors = validate_secret_isilon_clusters(secret_data) + if secret_validation_errors: + for err in secret_validation_errors: + errors.append(create_error_msg("Powerscale Secret File Validation Error:", err, None)) + + #validate values file input + with open(values_file_path, "r") as f: + values_data = yaml.safe_load(f) + values_validation_errros = validate_value_file_inputs(values_data) + if values_validation_errros: + for value_err in values_validation_errros: + errors.append(create_error_msg(f"Powerscale Value File Validation Error: ",value_err, None)) From 04554ccfb23b6fa9a7d1b3d2c54200884355f8d0 Mon Sep 17 00:00:00 2001 From: sakshi-singla-1735 Date: Tue, 22 Jul 2025 09:42:10 +0000 Subject: [PATCH 48/76] separating csi validation --- .../validation_flows/common_validation.py | 198 +------------- .../validation_flows/csi_driver_validation.py | 248 ++++++++++++++++++ 2 files changed, 252 insertions(+), 194 deletions(-) create mode 100644 common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py diff --git a/common/library/module_utils/input_validation/validation_flows/common_validation.py b/common/library/module_utils/input_validation/validation_flows/common_validation.py index 09e82c0780..71d0ac51f3 100644 --- a/common/library/module_utils/input_validation/validation_flows/common_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/common_validation.py @@ -17,11 +17,12 @@ """ import json import os -import ipaddress import yaml +import ipaddress import subprocess from ast import literal_eval import ansible.module_utils.input_validation.common_utils.data_fetch as get +from ansible.module_utils.input_validation.validation_flows import csi_driver_validation import ansible.module_utils.input_validation.common_utils.data_validation as validate from ansible.module_utils.input_validation.common_utils import ( validation_utils, @@ -946,198 +947,6 @@ def is_ip_in_range(ip_str, ip_range_str): return False -def validate_secret_isilon_clusters(data): - cluster_errors = [] - - clusters = data.get("isilonClusters") - - # Check if isilonClusters is a defined, non-empty list - if not isinstance(clusters, list) or len(clusters) == 0: - cluster_errors.append("isilonClusters must be a non-empty list.") - return cluster_errors # Stop further checks - - for idx, item in enumerate(clusters): - cluster_prefix = f"Cluster {idx + 1}" - - # Validate clusterName - if not item.get("clusterName") or not isinstance(item["clusterName"], str): - cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'clusterName'.") - - # Validate username - if not item.get("username") or not isinstance(item["username"], str): - cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'username'.") - - # Validate password - if not item.get("password") or not isinstance(item["password"], str): - cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'password'.") - - # Validate endpoint - if not item.get("endpoint") or not isinstance(item["endpoint"], str): - cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'endpoint'.") - - # Validate endpointPort if defined - if "endpointPort" in item: - if not isinstance(item["endpointPort"], int) or not (0 < item["endpointPort"] < 65536): - cluster_errors.append(f"{cluster_prefix}: 'endpointPort' must be an integer between 1 and 65535.") - - # Validate isDefault - if "isDefault" not in item or not isinstance(item["isDefault"], bool): - cluster_errors.append(f"{cluster_prefix}: 'isDefault' must be a boolean and must be defined.") - - # Validate skipCertificateValidation if defined - if "skipCertificateValidation" in item: - if item["skipCertificateValidation"] is not True: - cluster_errors.append(f"{cluster_prefix}: 'skipCertificateValidation' must be true if defined.") - - # Validate isiPath if defined - if "isiPath" in item: - if not isinstance(item["isiPath"], str) or not item["isiPath"].startswith('/'): - cluster_errors.append(f"{cluster_prefix}: 'isiPath' must be a valid Unix absolute path.") - - # Validate isiVolumePathPermissions if defined - if "isiVolumePathPermissions" in item: - perms = item["isiVolumePathPermissions"] - if not isinstance(perms, str) or not perms.strip().isdigit(): - cluster_errors.append(f"{cluster_prefix}: 'isiVolumePathPermissions' must be a non-empty string of digits.") - - return cluster_errors - -def validate_value_file_inputs(values_data): - value_errors = [] - - def add_error(field_path, value, msg): - value_errors.append( - f"Validation Error - {field_path}: '{value}' -> {msg}" - ) - - # Helper to safely get nested values - def get_nested(data, keys, default=None): - for key in keys: - if not isinstance(data, dict) or key not in data: - return default - data = data[key] - return data - - # 1. controller.controllerCount == 1 - controller_count = get_nested(values_data, ["controller", "controllerCount"]) - if controller_count != 1: - add_error("controller.controllerCount", controller_count, "Must be 1") - - # 2. controller.replication.enabled == false - replication_enabled = get_nested(values_data, ["controller", "replication", "enabled"]) - if replication_enabled is None or replication_enabled is not False: - add_error("controller.replication.enabled", replication_enabled, "Must be false") - - # 3. controller.resizer.enabled in [true, false] - resizer_enabled = get_nested(values_data, ["controller", "resizer", "enabled"]) - if resizer_enabled not in [True, False]: - add_error("controller.resizer.enabled", resizer_enabled, "Must be true or false") - - # 4. controller.snapshot.enabled == true - snapshot_enabled = get_nested(values_data, ["controller", "snapshot", "enabled"]) - if snapshot_enabled is not True: - add_error("controller.snapshot.enabled", snapshot_enabled, "Must be true") - - # 5. endpointPort is int in 1..65535 - endpoint_port = values_data.get("endpointPort") - if endpoint_port is None or not isinstance(endpoint_port, int) or not (1 <= endpoint_port <= 65535): - add_error("endpointPort", endpoint_port, "Must be between 1 and 65535") - - # 6. skipCertificateValidation == true - skip_cert = values_data.get("skipCertificateValidation") - if skip_cert is not True: - add_error("skipCertificateValidation", skip_cert, "Must be true") - - # 7. isiAuthType in [0, 1] - isi_auth = values_data.get("isiAuthType") - if isi_auth not in [0, 1]: - add_error("isiAuthType", isi_auth, "Must be 0 or 1") - - # 8. isiAccessZone is non-empty string - isi_access = values_data.get("isiAccessZone") - if not isi_access or not isinstance(isi_access, str) or not isi_access.strip(): - add_error("isiAccessZone", isi_access, "Must be a non-empty string") - - # 9. isiPath is Unix absolute path - isi_path = values_data.get("isiPath") - if not isinstance(isi_path, str) or not isi_path.startswith("/"): - add_error("isiPath", isi_path, "Must be a valid Unix absolute path") - - # 10. isiVolumePathPermissions is a non-empty string - permissions = values_data.get("isiVolumePathPermissions") - if not permissions or not isinstance(permissions, str) or not permissions.strip(): - add_error("isiVolumePathPermissions", permissions, "Must be a valid octal string") - - return value_errors - -def encrypt_file(secret_file_path, vault_secret_file_path): - cmd = [ - "ansible-vault", - "encrypt", - secret_file_path, - "--vault-password-file", - vault_secret_file_path, - ] - return validation_utils.run_subprocess(cmd) - -def decrypt_file(secret_file_path, vault_secret_file_path): - cmd = [ - "ansible-vault", - "decrypt", - secret_file_path, - "--vault-password-file", - vault_secret_file_path, - ] - return validation_utils.run_subprocess(cmd) - -def process_encrypted_file(secret_file_path,vault_secret_file_path,errors): - decrypted_file = decrypt_file(secret_file_path, vault_secret_file_path,) - - if decrypted_file: - try: - with open(secret_file_path, "r") as f: - data = yaml.safe_load(f) - encrypt_file(secret_file_path, vault_secret_file_path) - return data - except FileNotFoundError: - errors.append(create_error_msg("File not found", - secret_file_path, "Please check the associated file exists")) - except yaml.YAMLError as e: - errors.append(create_error_msg("Error loading yaml file", - secret_file_path, "Please check the associated file syntax")) - else: - errors.append(create_error_msg("Error occured when attempting to decrypt file.", - secret_file_path, "Please check that the assoicated vault file exists")) - return decrypted_file - -def validate_powerscale_secret_and_values_file(secret_file_path, values_file_path,errors): - #valiadte secret file inputs - secrets_file_encrypted = validation_utils.is_file_encrypted(secret_file_path) - vault_secret_file_path= "/omnia/scheduler/roles/k8s_csi_powerscale_plugin/files/.csi_powerscale_secret_vault" - #check if secret file exists - file_exists = os.path.exists(vault_secret_file_path.strip()) - - if secrets_file_encrypted: - secret_data = process_encrypted_file(secret_file_path, vault_secret_file_path,errors) - if secret_data is None or secret_data is False: - errors.append(create_error_msg( - "Secret File Load", - secret_file_path, - "Failed to load or parse secret.yaml file. It may be invalid or empty." - )) - else: - secret_validation_errors = validate_secret_isilon_clusters(secret_data) - if secret_validation_errors: - for err in secret_validation_errors: - errors.append(create_error_msg("Powerscale Secret File Validation Error:", err, None)) - - #validate values file input - with open(values_file_path, "r") as f: - values_data = yaml.safe_load(f) - values_validation_errros = validate_value_file_inputs(values_data) - if values_validation_errros: - for value_err in values_validation_errros: - errors.append(create_error_msg(f"Powerscale Value File Validation Error: ",value_err, None)) def validate_k8s(data, admin_bmc_networks, softwares, ha_config, tag_names, errors, @@ -1259,7 +1068,7 @@ def validate_k8s(data, admin_bmc_networks, softwares, ha_config, tag_names, erro en_us_validation_msg.CSI_DRIVER_VALUES_FAIL_MSG, ) ) - validate_powerscale_secret_and_values_file(csi_secret_file_path,csi_values_file_path, errors) + csi_driver_validation.validate_powerscale_secret_and_values_file(csi_secret_file_path,csi_values_file_path, errors) def validate_omnia_config( input_file_path, @@ -1462,3 +1271,4 @@ def validate_additional_software( ) ) return errors + diff --git a/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py b/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py new file mode 100644 index 0000000000..5b4e55f74a --- /dev/null +++ b/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py @@ -0,0 +1,248 @@ +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=import-error,no-name-in-module,too-many-positional-arguments,too-many-arguments,unused-argument +""" +Validates csi driver configuration files for Omnia. +""" +import os +import yaml +from ansible.module_utils.input_validation.common_utils import validation_utils +from ansible.module_utils.input_validation.common_utils import config + +file_names = config.files +create_error_msg = validation_utils.create_error_msg +create_file_path = validation_utils.create_file_path +contains_software = validation_utils.contains_software +check_mandatory_fields = validation_utils.check_mandatory_fields +flatten_sub_groups = validation_utils.flatten_sub_groups + + +def validate_secret_isilon_clusters(data): + """ + Validates csi secret file inputs for Omnia. + """ + + cluster_errors = [] + + clusters = data.get("isilonClusters") + + # Check if isilonClusters is a defined, non-empty list + if not isinstance(clusters, list) or len(clusters) == 0: + cluster_errors.append("isilonClusters must be a non-empty list.") + return cluster_errors # Stop further checks + + for idx, item in enumerate(clusters): + cluster_prefix = f"Cluster {idx + 1}" + + # Validate clusterName + if not item.get("clusterName") or not isinstance(item["clusterName"], str): + cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'clusterName'.") + + # Validate username + if not item.get("username") or not isinstance(item["username"], str): + cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'username'.") + + # Validate password + if not item.get("password") or not isinstance(item["password"], str): + cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'password'.") + + # Validate endpoint + if not item.get("endpoint") or not isinstance(item["endpoint"], str): + cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'endpoint'.") + + # Validate endpointPort if defined + if "endpointPort" in item: + if not isinstance(item["endpointPort"], int) or not (0 < item["endpointPort"] < 65536): + cluster_errors.append(f"{cluster_prefix}: 'endpointPort' must be an integer between 1 and 65535.") + + # Validate isDefault + if "isDefault" not in item or not isinstance(item["isDefault"], bool): + cluster_errors.append(f"{cluster_prefix}: 'isDefault' must be a boolean and must be defined.") + + # Validate skipCertificateValidation if defined + if "skipCertificateValidation" in item: + if item["skipCertificateValidation"] is not True: + cluster_errors.append(f"{cluster_prefix}: 'skipCertificateValidation' must be true if defined.") + + # Validate isiPath if defined + if "isiPath" in item: + if not isinstance(item["isiPath"], str) or not item["isiPath"].startswith('/'): + cluster_errors.append(f"{cluster_prefix}: 'isiPath' must be a valid Unix absolute path.") + + # Validate isiVolumePathPermissions if defined + if "isiVolumePathPermissions" in item: + perms = item["isiVolumePathPermissions"] + if not isinstance(perms, str) or not perms.strip().isdigit(): + cluster_errors.append(f"{cluster_prefix}: 'isiVolumePathPermissions' must be a non-empty string of digits.") + + return cluster_errors + +def validate_value_file_inputs(values_data): + """ + Validates csi value file inputs for Omnia. + """ + + value_errors = [] + + def add_error(field_path, value, msg): + value_errors.append( + f"Validation Error - {field_path}: '{value}' -> {msg}" + ) + + # Helper to safely get nested values + def get_nested(data, keys, default=None): + for key in keys: + if not isinstance(data, dict) or key not in data: + return default + data = data[key] + return data + + # 1. controller.controllerCount == 1 + controller_count = get_nested(values_data, ["controller", "controllerCount"]) + if controller_count != 1: + add_error("controller.controllerCount", controller_count, "Must be 1") + + # 2. controller.replication.enabled == false + replication_enabled = get_nested(values_data, ["controller", "replication", "enabled"]) + if replication_enabled is None or replication_enabled is not False: + add_error("controller.replication.enabled", replication_enabled, "Must be false") + + # 3. controller.resizer.enabled in [true, false] + resizer_enabled = get_nested(values_data, ["controller", "resizer", "enabled"]) + if resizer_enabled not in [True, False]: + add_error("controller.resizer.enabled", resizer_enabled, "Must be true or false") + + # 4. controller.snapshot.enabled == true + snapshot_enabled = get_nested(values_data, ["controller", "snapshot", "enabled"]) + if snapshot_enabled is not True: + add_error("controller.snapshot.enabled", snapshot_enabled, "Must be true") + + # 5. endpointPort is int in 1..65535 + endpoint_port = values_data.get("endpointPort") + if endpoint_port is None or not isinstance(endpoint_port, int) or not (1 <= endpoint_port <= 65535): + add_error("endpointPort", endpoint_port, "Must be between 1 and 65535") + + # 6. skipCertificateValidation == true + skip_cert = values_data.get("skipCertificateValidation") + if skip_cert is not True: + add_error("skipCertificateValidation", skip_cert, "Must be true") + + # 7. isiAuthType in [0, 1] + isi_auth = values_data.get("isiAuthType") + if isi_auth not in [0, 1]: + add_error("isiAuthType", isi_auth, "Must be 0 or 1") + + # 8. isiAccessZone is non-empty string + isi_access = values_data.get("isiAccessZone") + if not isi_access or not isinstance(isi_access, str) or not isi_access.strip(): + add_error("isiAccessZone", isi_access, "Must be a non-empty string") + + # 9. isiPath is Unix absolute path + isi_path = values_data.get("isiPath") + if not isinstance(isi_path, str) or not isi_path.startswith("/"): + add_error("isiPath", isi_path, "Must be a valid Unix absolute path") + + # 10. isiVolumePathPermissions is a non-empty string + permissions = values_data.get("isiVolumePathPermissions") + if not permissions or not isinstance(permissions, str) or not permissions.strip(): + add_error("isiVolumePathPermissions", permissions, "Must be a valid octal string") + + return value_errors + +def encrypt_file(secret_file_path, vault_secret_file_path): + """ + encrypt the secret file + """ + + cmd = [ + "ansible-vault", + "encrypt", + secret_file_path, + "--vault-password-file", + vault_secret_file_path, + ] + return validation_utils.run_subprocess(cmd) + +def decrypt_file(secret_file_path, vault_secret_file_path): + """ + encrypt the secret file + Takes 2 inputs: file name and secret file path + """ + + cmd = [ + "ansible-vault", + "decrypt", + secret_file_path, + "--vault-password-file", + vault_secret_file_path, + ] + return validation_utils.run_subprocess(cmd) + +def process_encrypted_file(secret_file_path,vault_secret_file_path,errors): + """ + Process the secret file + decrypt the file first then parse it to get data + """ + + decrypted_file = decrypt_file(secret_file_path, vault_secret_file_path,) + + if decrypted_file: + try: + with open(secret_file_path, "r") as f: + data = yaml.safe_load(f) + encrypt_file(secret_file_path, vault_secret_file_path) + return data + except FileNotFoundError: + errors.append(create_error_msg("File not found", + secret_file_path, "Please check the associated file exists")) + except yaml.YAMLError as e: + errors.append(create_error_msg("Error loading yaml file", + secret_file_path, "Please check the associated file syntax")) + else: + errors.append(create_error_msg("Error occured when attempting to decrypt file.", + secret_file_path, "Please check that the assoicated vault file exists")) + return decrypted_file + +def validate_powerscale_secret_and_values_file(secret_file_path, values_file_path, errors): + """ + Driver code to initiate the powerscale secret and values file input validation + """ + + #valiadte secret file inputs + secrets_file_encrypted = validation_utils.is_file_encrypted(secret_file_path) + vault_secret_file_path= "/omnia/scheduler/roles/k8s_csi_powerscale_plugin/files/.csi_powerscale_secret_vault" + #check if secret file exists + file_exists = os.path.exists(vault_secret_file_path.strip()) + + if secrets_file_encrypted: + secret_data = process_encrypted_file(secret_file_path, vault_secret_file_path,errors) + if secret_data is None or secret_data is False: + errors.append(create_error_msg( + "Secret File Load", + secret_file_path, + "Failed to load or parse secret.yaml file. It may be invalid or empty." + )) + else: + secret_validation_errors = validate_secret_isilon_clusters(secret_data) + if secret_validation_errors: + for err in secret_validation_errors: + errors.append(create_error_msg("Powerscale Secret File Validation Error:", err, None)) + + #validate values file input + with open(values_file_path, "r") as f: + values_data = yaml.safe_load(f) + values_validation_errros = validate_value_file_inputs(values_data) + if values_validation_errros: + for value_err in values_validation_errros: + errors.append(create_error_msg(f"Powerscale Value File Validation Error: ",value_err, None)) From 75644bcff020923d4edcad72b00760bfcf0005ce Mon Sep 17 00:00:00 2001 From: sakshi-singla-1735 Date: Wed, 23 Jul 2025 11:59:06 +0000 Subject: [PATCH 49/76] added code for storage class --- .../tasks/csi_powerscale_install.yml | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_install.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_install.yml index 98cd4a195d..da39443b04 100644 --- a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_install.yml +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_install.yml @@ -12,14 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. --- -- name: Deploy external-snapshotter config CRDs using k8s module - kubernetes.core.k8s: - state: present - definition: "{{ lookup('file', item) | from_yaml }}" - loop: "{{ lookup('fileglob', csi_powerscale_path + '/csi-powerscale/external-snapshotter/client/config/crd/*.yaml', wantlist=True) }}" + +- name: Deploy external-snapshotter config CRDs + ansible.builtin.command: + cmd: "kubectl apply -f client/config/crd/" + chdir: "{{ csi_powerscale_path }}/csi-powerscale/external-snapshotter/" register: install_result failed_when: false - changed_when: install_result is changed + changed_when: install_result.changed - name: Deploy external-snapshotter snapshot-controller CRDs ansible.builtin.command: @@ -56,13 +56,13 @@ prompt: "{{ fail_msg_csi_powerscale_driver }}" when: isilon_non_running_pods.stdout_lines | length > 0 -- name: Create PowerScale storage class if deployment was successful - kubernetes.core.k8s: - state: present - definition: "{{ lookup('file', csi_powerscale_path + '/ps_storage_class.yml') | from_yaml }}" +- name: Create powerscale storage class if deployment was successful + ansible.builtin.command: + cmd: "kubectl apply -f ps_storage_class.yml" + chdir: "{{ csi_powerscale_path }}" register: sc_command_result failed_when: false - changed_when: sc_command_result is changed + changed_when: sc_command_result.changed when: isilon_non_running_pods.stdout_lines | length == 0 - name: Remove ps_storage_class.yml file From 475acab76c5d515b39af2ce92c656c56e9bb5948 Mon Sep 17 00:00:00 2001 From: sakshi-singla-1735 Date: Thu, 24 Jul 2025 10:31:20 +0000 Subject: [PATCH 50/76] adding the csi innexamples file --- examples/rhel_software_config.json | 3 ++- .../template_rhel_9.6_software_config.json | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/rhel_software_config.json b/examples/rhel_software_config.json index 8e3fe97df5..e53ab67e36 100644 --- a/examples/rhel_software_config.json +++ b/examples/rhel_software_config.json @@ -20,7 +20,8 @@ {"name": "utils"}, {"name": "ucx", "version": "1.15.0"}, {"name": "openmpi", "version": "4.1.6"}, - {"name": "racadm"} + {"name": "racadm"}, + {"name": "csi_driver_powerscale", "version":"v2.14.0"} ], "amdgpu": [ diff --git a/examples/software_config_template/template_rhel_9.6_software_config.json b/examples/software_config_template/template_rhel_9.6_software_config.json index 8e3fe97df5..69d158e5ca 100644 --- a/examples/software_config_template/template_rhel_9.6_software_config.json +++ b/examples/software_config_template/template_rhel_9.6_software_config.json @@ -20,7 +20,8 @@ {"name": "utils"}, {"name": "ucx", "version": "1.15.0"}, {"name": "openmpi", "version": "4.1.6"}, - {"name": "racadm"} + {"name": "racadm"}, + {"name": "csi_driver_powerscale", "version":"v2.14.0"} ], "amdgpu": [ From cb54d3d683fa5867e77ac7281f613320ec7168ba Mon Sep 17 00:00:00 2001 From: sakshi-singla-1735 Date: Mon, 28 Jul 2025 12:54:37 +0530 Subject: [PATCH 51/76] Update scheduler.yml adding the [0] index Signed-off-by: sakshi-singla-1735 --- scheduler/scheduler.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scheduler/scheduler.yml b/scheduler/scheduler.yml index ed4453db94..0698a9f0be 100644 --- a/scheduler/scheduler.yml +++ b/scheduler/scheduler.yml @@ -171,10 +171,9 @@ tasks_from: csi_powerscale_image_pull.yml when: - hostvars['127.0.0.1']['csi_driver_powerscale_precheck_pass'] | default(false) | bool - - hostvars['127.0.0.1']['omnia_config']['k8s_offline_install'] - name: Install CSI powerscale plugin on kube control nodes - hosts: kube_control_plane + hosts: kube_control_plane[0] gather_facts: false roles: - k8s_csi_powerscale_plugin From 7add8e1836f4d112ec61882be54398f31bc63247 Mon Sep 17 00:00:00 2001 From: sakshi-singla-1735 Date: Mon, 28 Jul 2025 12:55:14 +0530 Subject: [PATCH 52/76] Update service_k8s_cluster.yml Signed-off-by: sakshi-singla-1735 --- scheduler/service_k8s_cluster.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scheduler/service_k8s_cluster.yml b/scheduler/service_k8s_cluster.yml index a5fe24047d..d575a20698 100644 --- a/scheduler/service_k8s_cluster.yml +++ b/scheduler/service_k8s_cluster.yml @@ -148,7 +148,7 @@ - common_plugins - name: CSI powerscale image pulling - hosts: kube_node, kube_control_plane[0] + hosts: kube_node, kube_control_plane tasks: - name: Pull images ansible.builtin.include_role: From 61e6cbdaf50b7ccdbc111f86e8a9a3e04cb2dd8b Mon Sep 17 00:00:00 2001 From: balajikumaran-c-s Date: Thu, 17 Jul 2025 09:45:46 +0000 Subject: [PATCH 53/76] fix: fixed ROCm package installation Signed-off-by: balajikumaran-c-s --- .../rocm_installation/tasks/install_rocm.yml | 19 +----- utils/roles/rocm_installation/vars/main.yml | 9 +-- .../rocm_validation/tasks/validate_amd.yml | 65 +++++-------------- utils/roles/rocm_validation/vars/main.yml | 15 +++-- utils/roles/rocm_validation/vars/redhat.yml | 18 ----- utils/roles/rocm_validation/vars/rocky.yml | 1 - utils/roles/rocm_validation/vars/ubuntu.yml | 17 ----- 7 files changed, 28 insertions(+), 116 deletions(-) delete mode 100644 utils/roles/rocm_validation/vars/redhat.yml delete mode 120000 utils/roles/rocm_validation/vars/rocky.yml delete mode 100644 utils/roles/rocm_validation/vars/ubuntu.yml diff --git a/utils/roles/rocm_installation/tasks/install_rocm.yml b/utils/roles/rocm_installation/tasks/install_rocm.yml index 6d7f28bbfc..c29bc5e5e5 100644 --- a/utils/roles/rocm_installation/tasks/install_rocm.yml +++ b/utils/roles/rocm_installation/tasks/install_rocm.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,23 +15,6 @@ - name: Verify Repo and Install ROCm packages block: - - name: Local local_repo_access.yml file - ansible.builtin.include_vars: "{{ local_repo_access_path }}" - - - name: Check if the ROCm preference source file exists - ansible.builtin.stat: - path: "{{ rocm_prefrence_src }}" - register: rocm_preference_src_stat - - - name: Create ROCm preference file - ansible.builtin.template: - src: "{{ rocm_prefrence_src }}" - dest: "{{ rocm_prefrence_dst }}" - mode: "{{ prefrence_file_mode }}" - when: - - ansible_distribution | lower in ubuntu_os - - not rocm_preference_src_stat.stat.exists - - name: Install ROCm packages ansible.builtin.package: name: "{{ rocm_packages }}" diff --git a/utils/roles/rocm_installation/vars/main.yml b/utils/roles/rocm_installation/vars/main.yml index d0dc9ceb30..8f4d11451a 100644 --- a/utils/roles/rocm_installation/vars/main.yml +++ b/utils/roles/rocm_installation/vars/main.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,12 +13,7 @@ # limitations under the License. --- -# Used: install_rocm.yml -local_repo_access_path: "/opt/omnia/offline/local_repo_access.yml" -rocm_prefrence_dst: "/etc/apt/preferences.d/rocm-pin-600" -rocm_prefrence_src: "rocm_preferences_ubuntu.j2" -ubuntu_os: "ubuntu" -prefrence_file_mode: '0644' +# Usage: install_rocm.yml rocm_packages: - "rocm" - "rocm-validation-suite" diff --git a/utils/roles/rocm_validation/tasks/validate_amd.yml b/utils/roles/rocm_validation/tasks/validate_amd.yml index 32bf826916..c093faafe3 100644 --- a/utils/roles/rocm_validation/tasks/validate_amd.yml +++ b/utils/roles/rocm_validation/tasks/validate_amd.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,8 +23,9 @@ file: "{{ software_config_json_file }}" name: user_config -- name: Include vars for {{ oim_os }} - ansible.builtin.include_vars: "{{ role_path }}/vars/{{ oim_os }}.yml" +- name: Set fact provision_os + ansible.builtin.set_fact: + provision_os: "{{ user_config.cluster_os_type }}" - name: Get rocm status only if amdgpu present amd_status is true ansible.builtin.set_fact: @@ -41,62 +42,26 @@ seconds: "{{ warning_time }}" when: not rocm_input_status -- name: Check if the rocm offline repo exists - ansible.builtin.stat: - path: "{{ offline_rocm_directory }}/rocm/" - register: check_rocm_repo - when: rocm_input_status - - name: Set rocm_config_status when: - rocm_input_status - - user_config.repo_config == 'always' or user_config.repo_config == 'partial' - - check_rocm_repo.stat.exists block: - name: Fetch rocm_version ansible.builtin.set_fact: rocm_version: "{{ user_config.amdgpu | selectattr('name', 'equalto', 'rocm') | map(attribute='version') | first }}" - - name: Set rocm_directory - ansible.builtin.set_fact: - rocm_directory: "{{ offline_rocm_directory }}/rocm/{{ rocm_version }}/" - - - name: Check rocm_directory exists or not - ansible.builtin.stat: - path: "{{ rocm_directory }}" - register: check_rocm_dir - - - name: Warning, rocm directory repo not found - ansible.builtin.pause: - prompt: "{{ rocm_repo_msg }}" - seconds: "{{ warning_time }}" - when: not check_rocm_dir.stat.exists - - - name: Set rocm_config_status to true - ansible.builtin.set_fact: - rocm_config_status: true - when: check_rocm_dir.stat.exists - rescue: - - name: Warning, rocm version not found - ansible.builtin.pause: - prompt: "{{ rocm_version_msg }}" - seconds: "{{ warning_time }}" - -- name: Set rocm_config_status - when: - - rocm_input_status - - user_config.repo_config == 'never' or user_config.repo_config == 'partial' - - not check_rocm_repo.stat.exists - block: - - name: Fetch rocm_version - ansible.builtin.set_fact: - rocm_version: "{{ user_config.amdgpu | selectattr('name', 'equalto', 'rocm') | map(attribute='version') | first }}" + - name: Get ROCm repository details from Pulp + ansible.builtin.command: "{{ pulp_bin_path }} {{ os_package_map[provision_os] }} distribution list --name rocm_{{ rocm_version }}" + delegate_to: localhost + register: check_rocm_repo + changed_when: false + no_log: true - - name: Set rocm_config_status to true + - name: Set rocm_config_status based on pulp rpm distribution ansible.builtin.set_fact: rocm_config_status: true + when: check_rocm_repo.stdout | from_json | length > 0 rescue: - - name: Warning, rocm version not found - ansible.builtin.pause: - prompt: "{{ rocm_version_msg }}" - seconds: "{{ warning_time }}" + - name: Log an error message + ansible.builtin.debug: + msg: " {{ rocm_repo_fail_msg }} " \ No newline at end of file diff --git a/utils/roles/rocm_validation/vars/main.yml b/utils/roles/rocm_validation/vars/main.yml index 9b3432d4ca..56dcdc45ef 100644 --- a/utils/roles/rocm_validation/vars/main.yml +++ b/utils/roles/rocm_validation/vars/main.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,13 +13,18 @@ # limitations under the License. --- input_project_dir: "{{ hostvars['localhost']['input_project_dir'] }}" -# Usage: rocm_validation.yml + +# Usage: validate_amd.yml software_config_json_file: "{{ input_project_dir }}/software_config.json" -rocm_version_msg: "Warning, ROCm will not be installed. software_config.json does not have the version for ROCM." -rocm_repo_msg: "Warning, ROCm will not be installed. local_repo.yml is not executed for downloading ROCM packages." +rocm_repo_fail_msg: "Warning, ROCm will not be installed. local_repo.yml is not executed for downloading ROCM packages." rocm_stack_msg: "AMDGPU ROCm software stack not present in software_config.json" warning_time: 10 +pulp_bin_path: "/usr/local/bin/pulp" +os_package_map: + rhel: rpm + rocky: rpm + ubuntu: deb # Usage: include_local_repo_config.yml local_repo_config_file: "{{ input_project_dir }}/local_repo_config.yml" -local_repo_config_syntax_fail_msg: "Failed. Syntax errors present in local_repo_config.yml. Fix errors and re-run playbook again." +local_repo_config_syntax_fail_msg: "Failed. Syntax errors present in local_repo_config.yml. Fix errors and re-run playbook again." \ No newline at end of file diff --git a/utils/roles/rocm_validation/vars/redhat.yml b/utils/roles/rocm_validation/vars/redhat.yml deleted file mode 100644 index ca1a4ce480..0000000000 --- a/utils/roles/rocm_validation/vars/redhat.yml +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -# Usage: validate_amd.yml -# offline_rocm_directory: "{{ repo_store_path }}/cluster/yum" -offline_rocm_directory: "/opt/omnia/offline_repo/cluster/yum" diff --git a/utils/roles/rocm_validation/vars/rocky.yml b/utils/roles/rocm_validation/vars/rocky.yml deleted file mode 120000 index ba2f905fb1..0000000000 --- a/utils/roles/rocm_validation/vars/rocky.yml +++ /dev/null @@ -1 +0,0 @@ -redhat.yml \ No newline at end of file diff --git a/utils/roles/rocm_validation/vars/ubuntu.yml b/utils/roles/rocm_validation/vars/ubuntu.yml deleted file mode 100644 index fd5b6b94a9..0000000000 --- a/utils/roles/rocm_validation/vars/ubuntu.yml +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -# Usage: validate_amd.yml -offline_rocm_directory: "{{ repo_store_path }}/cluster/apt" From a406bfa4ce256b53c3e4cb8615f93beae8be460a Mon Sep 17 00:00:00 2001 From: balajikumaran-c-s Date: Fri, 18 Jul 2025 04:23:25 +0000 Subject: [PATCH 54/76] lint fix Signed-off-by: balajikumaran-c-s --- utils/roles/rocm_validation/tasks/validate_amd.yml | 2 +- utils/roles/rocm_validation/vars/main.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/roles/rocm_validation/tasks/validate_amd.yml b/utils/roles/rocm_validation/tasks/validate_amd.yml index c093faafe3..33e81da4ad 100644 --- a/utils/roles/rocm_validation/tasks/validate_amd.yml +++ b/utils/roles/rocm_validation/tasks/validate_amd.yml @@ -64,4 +64,4 @@ rescue: - name: Log an error message ansible.builtin.debug: - msg: " {{ rocm_repo_fail_msg }} " \ No newline at end of file + msg: " {{ rocm_repo_fail_msg }} " diff --git a/utils/roles/rocm_validation/vars/main.yml b/utils/roles/rocm_validation/vars/main.yml index 56dcdc45ef..cbf367be66 100644 --- a/utils/roles/rocm_validation/vars/main.yml +++ b/utils/roles/rocm_validation/vars/main.yml @@ -27,4 +27,4 @@ os_package_map: # Usage: include_local_repo_config.yml local_repo_config_file: "{{ input_project_dir }}/local_repo_config.yml" -local_repo_config_syntax_fail_msg: "Failed. Syntax errors present in local_repo_config.yml. Fix errors and re-run playbook again." \ No newline at end of file +local_repo_config_syntax_fail_msg: "Failed. Syntax errors present in local_repo_config.yml. Fix errors and re-run playbook again." From 3c7239a590bcdd190fa460ba9b5c0523dff274e9 Mon Sep 17 00:00:00 2001 From: Jagadeesh N V Date: Sun, 20 Jul 2025 02:10:28 -0500 Subject: [PATCH 55/76] Defect fixes 1. ha_config validating even when enable_ha is false 2. slurm db password not prompted while running scheduler.yml alone --- scheduler/scheduler.yml | 2 +- .../roles/inventory_validation/tasks/fetch_omnia_config.yml | 4 ++-- .../roles/inventory_validation/tasks/validate_inventory.yml | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scheduler/scheduler.yml b/scheduler/scheduler.yml index cbca483c06..d234c051f5 100644 --- a/scheduler/scheduler.yml +++ b/scheduler/scheduler.yml @@ -19,7 +19,7 @@ tasks: - name: Set dynamic run tags including k8s ansible.builtin.set_fact: - omnia_run_tags: "{{ (ansible_run_tags | default([]) + ['k8s']) | unique }}" + omnia_run_tags: "{{ (ansible_run_tags | default([]) + ['k8s', 'slurm']) | unique }}" cacheable: true - name: Invoke get_config_credentials.yml diff --git a/utils/roles/inventory_validation/tasks/fetch_omnia_config.yml b/utils/roles/inventory_validation/tasks/fetch_omnia_config.yml index 50d8f201c0..5cc1f70011 100644 --- a/utils/roles/inventory_validation/tasks/fetch_omnia_config.yml +++ b/utils/roles/inventory_validation/tasks/fetch_omnia_config.yml @@ -77,9 +77,9 @@ {{ vars[cluster_ha_var_name] | selectattr('cluster_name', 'equalto', cluster_name) | list - | first }} + | default([]) }} - name: Set HA-related facts when: selected_cluster_ha is defined ansible.builtin.set_fact: - enable_k8s_ha: "{{ selected_cluster_ha.enable_k8s_ha | default(false) }}" + enable_k8s_ha: "{{ selected_cluster_ha[0].enable_k8s_ha | default(false) }}" diff --git a/utils/roles/inventory_validation/tasks/validate_inventory.yml b/utils/roles/inventory_validation/tasks/validate_inventory.yml index 728e4de32d..311e0f4f96 100644 --- a/utils/roles/inventory_validation/tasks/validate_inventory.yml +++ b/utils/roles/inventory_validation/tasks/validate_inventory.yml @@ -55,9 +55,9 @@ when: service_k8s_support and service_k8s_playbook is defined ansible.builtin.include_tasks: fetch_omnia_config.yml -- name: Validate service k8s nodes requirements - when: service_k8s_support and service_k8s_playbook is defined - ansible.builtin.include_tasks: k8s_validations.yml +# - name: Validate service k8s nodes requirements +# when: service_k8s_support and service_k8s_playbook is defined +# ansible.builtin.include_tasks: k8s_validations.yml - name: Validate inventory in all playbooks except service_k8s_cluster when: service_k8s_playbook is not defined From 384395e280c1133840d002fd7f85212c8fb9cde0 Mon Sep 17 00:00:00 2001 From: Jagadeesh N V Date: Mon, 21 Jul 2025 05:56:18 -0500 Subject: [PATCH 56/76] input config handling ha case --- input/omnia_config.yml | 1 + scheduler/service_k8s_cluster.yml | 6 +++--- .../roles/inventory_validation/tasks/validate_inventory.yml | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/input/omnia_config.yml b/input/omnia_config.yml index ee0b2deee8..c006061727 100644 --- a/input/omnia_config.yml +++ b/input/omnia_config.yml @@ -45,6 +45,7 @@ run_intel_gaudi_tests: false # - cluster_name is required field # - deployment: Exactly one entry in both the service_k8s_cluster and compute_k8s_cluster lists must have deployment set to true to indicate where Kubernetes should be deployed. +# Please ensure corresponding cluster entry is added to high_availability_config.yml if deployment is set to true. # - Kubernetes SDN network.K8s_cni (Mandatory) - It can either be "calico" or "flannel".Default value assigned is "calico". # While setting up Kubernetes plugin for RoCE NIC, ensure that this value is set to "flannel" diff --git a/scheduler/service_k8s_cluster.yml b/scheduler/service_k8s_cluster.yml index ead3a8e793..a13f606a96 100644 --- a/scheduler/service_k8s_cluster.yml +++ b/scheduler/service_k8s_cluster.yml @@ -43,6 +43,9 @@ ansible.builtin.set_fact: service_k8s_playbook: true +- name: Invoke validate_config.yml to perform L1 and L2 validations for k8s + ansible.builtin.import_playbook: ../input_validation/validate_config.yml + - name: Validate inventory ansible.builtin.import_playbook: ../utils/inventory_validation.yml when: not ( hostvars['127.0.0.1']['inventory_validation_executed'] | default(false) | bool ) @@ -61,9 +64,6 @@ name: cluster_validation tasks_from: validation_status_check.yml -- name: Invoke validate_config.yml to perform L1 and L2 validations for k8s - ansible.builtin.import_playbook: ../input_validation/validate_config.yml - - name: Update Repositories/Registries on nodes ansible.builtin.import_playbook: ../utils/update_user_repo.yml diff --git a/utils/roles/inventory_validation/tasks/validate_inventory.yml b/utils/roles/inventory_validation/tasks/validate_inventory.yml index 311e0f4f96..728e4de32d 100644 --- a/utils/roles/inventory_validation/tasks/validate_inventory.yml +++ b/utils/roles/inventory_validation/tasks/validate_inventory.yml @@ -55,9 +55,9 @@ when: service_k8s_support and service_k8s_playbook is defined ansible.builtin.include_tasks: fetch_omnia_config.yml -# - name: Validate service k8s nodes requirements -# when: service_k8s_support and service_k8s_playbook is defined -# ansible.builtin.include_tasks: k8s_validations.yml +- name: Validate service k8s nodes requirements + when: service_k8s_support and service_k8s_playbook is defined + ansible.builtin.include_tasks: k8s_validations.yml - name: Validate inventory in all playbooks except service_k8s_cluster when: service_k8s_playbook is not defined From 6a85aef1f8933a5d1eac05f6cd8128b2964d0d05 Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Tue, 22 Jul 2025 19:13:46 +0530 Subject: [PATCH 57/76] ansible lint fix --- .config/ansible-lint.yml | 2 ++ .../common/tasks/configure_postscripts_additional_softwares.yml | 2 +- .../common/tasks/generate_role_based_postscripts.yml | 2 +- .../roles/postscripts/rhel/tasks/configure_postbootscripts.yml | 2 +- .../credential_utility/roles/validation/tasks/pre_requisite.yml | 2 +- utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml | 2 +- 6 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.config/ansible-lint.yml b/.config/ansible-lint.yml index ece2e3f9a9..991883a21b 100644 --- a/.config/ansible-lint.yml +++ b/.config/ansible-lint.yml @@ -21,6 +21,8 @@ exclude_paths: - utils/server_spec_update/roles/os_update/tasks/kcmdline_update_rocky.yml - utils/roles/oim_cleanup/vars/rocky.yml - scheduler/roles/k8s_start_services/files/k8s_dashboard_admin.yaml + - scheduler/playbooks/k8s_add_node.yml + - scheduler/playbooks/k8s_install.yml - "*ubuntu*" - "*rocky*" diff --git a/discovery/roles/postscripts/common/tasks/configure_postscripts_additional_softwares.yml b/discovery/roles/postscripts/common/tasks/configure_postscripts_additional_softwares.yml index 3589a4d0bc..b13c62f735 100644 --- a/discovery/roles/postscripts/common/tasks/configure_postscripts_additional_softwares.yml +++ b/discovery/roles/postscripts/common/tasks/configure_postscripts_additional_softwares.yml @@ -15,7 +15,7 @@ - name: Create a list of discovered groups ansible.builtin.set_fact: - discovered_groups: "{{ discovered_groups | default([]) + item.split(',') | map('trim') | list }}" + discovered_groups: "{{ (discovered_groups | default([])) + (item.split(',') | map('trim') | list) }}" loop: "{{ discovered_nodes | map(attribute='group_name') | list }}" - name: Fetch list of unique groups from discovered groups diff --git a/discovery/roles/postscripts/common/tasks/generate_role_based_postscripts.yml b/discovery/roles/postscripts/common/tasks/generate_role_based_postscripts.yml index fe25f0576e..d28a1cb3da 100644 --- a/discovery/roles/postscripts/common/tasks/generate_role_based_postscripts.yml +++ b/discovery/roles/postscripts/common/tasks/generate_role_based_postscripts.yml @@ -15,7 +15,7 @@ - name: Create a list of unique roles ansible.builtin.set_fact: - unique_roles: "{{ unique_roles | default([]) + item.split(',') | map('trim') | list }}" + unique_roles: "{{ (unique_roles | default([])) + (item.split(',') | map('trim') | list) }}" loop: "{{ discovered_nodes | map(attribute='role') | select('defined') | list }}" when: item | length > 0 diff --git a/discovery/roles/postscripts/rhel/tasks/configure_postbootscripts.yml b/discovery/roles/postscripts/rhel/tasks/configure_postbootscripts.yml index f5f15e899c..7c766fcd86 100644 --- a/discovery/roles/postscripts/rhel/tasks/configure_postbootscripts.yml +++ b/discovery/roles/postscripts/rhel/tasks/configure_postbootscripts.yml @@ -28,7 +28,7 @@ - name: Create a list of unique roles ansible.builtin.set_fact: - all_roles: "{{ all_roles | default([]) + item.split(',') | map('trim') | list }}" + all_roles: "{{ (all_roles | default([])) + (item.split(',') | map('trim') | list) }}" loop: "{{ discovered_nodes | map(attribute='role') | select('defined') | list }}" when: item | length > 0 diff --git a/utils/credential_utility/roles/validation/tasks/pre_requisite.yml b/utils/credential_utility/roles/validation/tasks/pre_requisite.yml index a9da8d1c8a..a0c062bf22 100644 --- a/utils/credential_utility/roles/validation/tasks/pre_requisite.yml +++ b/utils/credential_utility/roles/validation/tasks/pre_requisite.yml @@ -83,7 +83,7 @@ - name: Set run tags for telemetry ansible.builtin.set_fact: - omnia_run_tags: "{{ (omnia_run_tags | default([])) + result.telemetry_status_list | unique }}" + omnia_run_tags: "{{ ((omnia_run_tags | default([])) + result.telemetry_status_list) | unique }}" when: - not result.skipped | default(false) - result.telemetry_status_list | length > 0 diff --git a/utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml b/utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml index dec22b3002..f937ea7313 100644 --- a/utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml +++ b/utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml @@ -81,7 +81,7 @@ - name: Set Complete query based on user input ansible.builtin.set_fact: - query: "{{ query_part1 + query_part2 }}" + query: "{{ query_part1 ~ ' ' ~ query_part2 }}" rescue: - name: Handle the rescue condition From dd314f4b459467f5530ca9dd121e388863c3eaae Mon Sep 17 00:00:00 2001 From: Aditya Deshpande <115771515+Aditya-DP@users.noreply.github.com> Date: Tue, 22 Jul 2025 19:24:14 +0530 Subject: [PATCH 58/76] Update configure_postscripts_additional_softwares.yml --- .../common/tasks/configure_postscripts_additional_softwares.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/discovery/roles/postscripts/common/tasks/configure_postscripts_additional_softwares.yml b/discovery/roles/postscripts/common/tasks/configure_postscripts_additional_softwares.yml index b13c62f735..a788938dc3 100644 --- a/discovery/roles/postscripts/common/tasks/configure_postscripts_additional_softwares.yml +++ b/discovery/roles/postscripts/common/tasks/configure_postscripts_additional_softwares.yml @@ -15,7 +15,7 @@ - name: Create a list of discovered groups ansible.builtin.set_fact: - discovered_groups: "{{ (discovered_groups | default([])) + (item.split(',') | map('trim') | list) }}" + discovered_groups: "{{ (discovered_groups | default([])) + ((item | default('') | string).split(',') | map('trim') | list) }}" loop: "{{ discovered_nodes | map(attribute='group_name') | list }}" - name: Fetch list of unique groups from discovered groups From 4a9a7743c089029d72a4d47e9d22cef90574ef5d Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Tue, 22 Jul 2025 19:29:59 +0530 Subject: [PATCH 59/76] lint fix --- .../common/tasks/generate_role_based_postscripts.yml | 2 +- .../roles/postscripts/rhel/tasks/configure_postbootscripts.yml | 2 +- .../credential_utility/roles/validation/tasks/pre_requisite.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/discovery/roles/postscripts/common/tasks/generate_role_based_postscripts.yml b/discovery/roles/postscripts/common/tasks/generate_role_based_postscripts.yml index d28a1cb3da..0bfc565c88 100644 --- a/discovery/roles/postscripts/common/tasks/generate_role_based_postscripts.yml +++ b/discovery/roles/postscripts/common/tasks/generate_role_based_postscripts.yml @@ -15,7 +15,7 @@ - name: Create a list of unique roles ansible.builtin.set_fact: - unique_roles: "{{ (unique_roles | default([])) + (item.split(',') | map('trim') | list) }}" + unique_roles: "{{ (unique_roles | default([])) + ((item | default('') | string).split(',') | map('trim') | list) }}" loop: "{{ discovered_nodes | map(attribute='role') | select('defined') | list }}" when: item | length > 0 diff --git a/discovery/roles/postscripts/rhel/tasks/configure_postbootscripts.yml b/discovery/roles/postscripts/rhel/tasks/configure_postbootscripts.yml index 7c766fcd86..ff5356abb1 100644 --- a/discovery/roles/postscripts/rhel/tasks/configure_postbootscripts.yml +++ b/discovery/roles/postscripts/rhel/tasks/configure_postbootscripts.yml @@ -28,7 +28,7 @@ - name: Create a list of unique roles ansible.builtin.set_fact: - all_roles: "{{ (all_roles | default([])) + (item.split(',') | map('trim') | list) }}" + all_roles: "{{ (all_roles | default([])) + ((item | default('') | string).split(',') | map('trim') | list) }}" loop: "{{ discovered_nodes | map(attribute='role') | select('defined') | list }}" when: item | length > 0 diff --git a/utils/credential_utility/roles/validation/tasks/pre_requisite.yml b/utils/credential_utility/roles/validation/tasks/pre_requisite.yml index a0c062bf22..0a562cc480 100644 --- a/utils/credential_utility/roles/validation/tasks/pre_requisite.yml +++ b/utils/credential_utility/roles/validation/tasks/pre_requisite.yml @@ -83,7 +83,7 @@ - name: Set run tags for telemetry ansible.builtin.set_fact: - omnia_run_tags: "{{ ((omnia_run_tags | default([])) + result.telemetry_status_list) | unique }}" + omnia_run_tags: "{{ (omnia_run_tags | default([])) + (result.telemetry_status_list | default([])) | unique }}" when: - not result.skipped | default(false) - result.telemetry_status_list | length > 0 From 0a3ddc6d3b7e7fb6ae55670452b1bc0933268f85 Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Tue, 22 Jul 2025 19:31:56 +0530 Subject: [PATCH 60/76] ignore lint issue --- utils/nodeinfo_db/roles/nodeinfo_db/tasks/db_dump.yml | 2 +- utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/nodeinfo_db/roles/nodeinfo_db/tasks/db_dump.yml b/utils/nodeinfo_db/roles/nodeinfo_db/tasks/db_dump.yml index 89f6570881..66a1632438 100644 --- a/utils/nodeinfo_db/roles/nodeinfo_db/tasks/db_dump.yml +++ b/utils/nodeinfo_db/roles/nodeinfo_db/tasks/db_dump.yml @@ -13,7 +13,7 @@ # limitations under the License. --- -- name: Extract query data from omniadb cluster.nodeinfo for {{ filename }} +- name: Extract query data from omniadb cluster.nodeinfo for {{ filename }} # noqa jinja[invalid] community.postgresql.postgresql_copy: db: "{{ nodeinfo_db_omniadb }}" login_user: "{{ nodeinfo_db_login_user }}" diff --git a/utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml b/utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml index f937ea7313..dec22b3002 100644 --- a/utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml +++ b/utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml @@ -81,7 +81,7 @@ - name: Set Complete query based on user input ansible.builtin.set_fact: - query: "{{ query_part1 ~ ' ' ~ query_part2 }}" + query: "{{ query_part1 + query_part2 }}" rescue: - name: Handle the rescue condition From 6d0f01edd802c6296ec91a9c75823549ed314c37 Mon Sep 17 00:00:00 2001 From: balajikumaran-c-s Date: Thu, 17 Jul 2025 09:45:46 +0000 Subject: [PATCH 61/76] fix: fixed ROCm package installation Signed-off-by: balajikumaran-c-s --- .../rocm_installation/tasks/install_rocm.yml | 19 +----- utils/roles/rocm_installation/vars/main.yml | 9 +-- .../rocm_validation/tasks/validate_amd.yml | 65 +++++-------------- utils/roles/rocm_validation/vars/main.yml | 15 +++-- utils/roles/rocm_validation/vars/redhat.yml | 18 ----- utils/roles/rocm_validation/vars/rocky.yml | 1 - utils/roles/rocm_validation/vars/ubuntu.yml | 17 ----- 7 files changed, 28 insertions(+), 116 deletions(-) delete mode 100644 utils/roles/rocm_validation/vars/redhat.yml delete mode 120000 utils/roles/rocm_validation/vars/rocky.yml delete mode 100644 utils/roles/rocm_validation/vars/ubuntu.yml diff --git a/utils/roles/rocm_installation/tasks/install_rocm.yml b/utils/roles/rocm_installation/tasks/install_rocm.yml index 6d7f28bbfc..c29bc5e5e5 100644 --- a/utils/roles/rocm_installation/tasks/install_rocm.yml +++ b/utils/roles/rocm_installation/tasks/install_rocm.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,23 +15,6 @@ - name: Verify Repo and Install ROCm packages block: - - name: Local local_repo_access.yml file - ansible.builtin.include_vars: "{{ local_repo_access_path }}" - - - name: Check if the ROCm preference source file exists - ansible.builtin.stat: - path: "{{ rocm_prefrence_src }}" - register: rocm_preference_src_stat - - - name: Create ROCm preference file - ansible.builtin.template: - src: "{{ rocm_prefrence_src }}" - dest: "{{ rocm_prefrence_dst }}" - mode: "{{ prefrence_file_mode }}" - when: - - ansible_distribution | lower in ubuntu_os - - not rocm_preference_src_stat.stat.exists - - name: Install ROCm packages ansible.builtin.package: name: "{{ rocm_packages }}" diff --git a/utils/roles/rocm_installation/vars/main.yml b/utils/roles/rocm_installation/vars/main.yml index d0dc9ceb30..8f4d11451a 100644 --- a/utils/roles/rocm_installation/vars/main.yml +++ b/utils/roles/rocm_installation/vars/main.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,12 +13,7 @@ # limitations under the License. --- -# Used: install_rocm.yml -local_repo_access_path: "/opt/omnia/offline/local_repo_access.yml" -rocm_prefrence_dst: "/etc/apt/preferences.d/rocm-pin-600" -rocm_prefrence_src: "rocm_preferences_ubuntu.j2" -ubuntu_os: "ubuntu" -prefrence_file_mode: '0644' +# Usage: install_rocm.yml rocm_packages: - "rocm" - "rocm-validation-suite" diff --git a/utils/roles/rocm_validation/tasks/validate_amd.yml b/utils/roles/rocm_validation/tasks/validate_amd.yml index 32bf826916..c093faafe3 100644 --- a/utils/roles/rocm_validation/tasks/validate_amd.yml +++ b/utils/roles/rocm_validation/tasks/validate_amd.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,8 +23,9 @@ file: "{{ software_config_json_file }}" name: user_config -- name: Include vars for {{ oim_os }} - ansible.builtin.include_vars: "{{ role_path }}/vars/{{ oim_os }}.yml" +- name: Set fact provision_os + ansible.builtin.set_fact: + provision_os: "{{ user_config.cluster_os_type }}" - name: Get rocm status only if amdgpu present amd_status is true ansible.builtin.set_fact: @@ -41,62 +42,26 @@ seconds: "{{ warning_time }}" when: not rocm_input_status -- name: Check if the rocm offline repo exists - ansible.builtin.stat: - path: "{{ offline_rocm_directory }}/rocm/" - register: check_rocm_repo - when: rocm_input_status - - name: Set rocm_config_status when: - rocm_input_status - - user_config.repo_config == 'always' or user_config.repo_config == 'partial' - - check_rocm_repo.stat.exists block: - name: Fetch rocm_version ansible.builtin.set_fact: rocm_version: "{{ user_config.amdgpu | selectattr('name', 'equalto', 'rocm') | map(attribute='version') | first }}" - - name: Set rocm_directory - ansible.builtin.set_fact: - rocm_directory: "{{ offline_rocm_directory }}/rocm/{{ rocm_version }}/" - - - name: Check rocm_directory exists or not - ansible.builtin.stat: - path: "{{ rocm_directory }}" - register: check_rocm_dir - - - name: Warning, rocm directory repo not found - ansible.builtin.pause: - prompt: "{{ rocm_repo_msg }}" - seconds: "{{ warning_time }}" - when: not check_rocm_dir.stat.exists - - - name: Set rocm_config_status to true - ansible.builtin.set_fact: - rocm_config_status: true - when: check_rocm_dir.stat.exists - rescue: - - name: Warning, rocm version not found - ansible.builtin.pause: - prompt: "{{ rocm_version_msg }}" - seconds: "{{ warning_time }}" - -- name: Set rocm_config_status - when: - - rocm_input_status - - user_config.repo_config == 'never' or user_config.repo_config == 'partial' - - not check_rocm_repo.stat.exists - block: - - name: Fetch rocm_version - ansible.builtin.set_fact: - rocm_version: "{{ user_config.amdgpu | selectattr('name', 'equalto', 'rocm') | map(attribute='version') | first }}" + - name: Get ROCm repository details from Pulp + ansible.builtin.command: "{{ pulp_bin_path }} {{ os_package_map[provision_os] }} distribution list --name rocm_{{ rocm_version }}" + delegate_to: localhost + register: check_rocm_repo + changed_when: false + no_log: true - - name: Set rocm_config_status to true + - name: Set rocm_config_status based on pulp rpm distribution ansible.builtin.set_fact: rocm_config_status: true + when: check_rocm_repo.stdout | from_json | length > 0 rescue: - - name: Warning, rocm version not found - ansible.builtin.pause: - prompt: "{{ rocm_version_msg }}" - seconds: "{{ warning_time }}" + - name: Log an error message + ansible.builtin.debug: + msg: " {{ rocm_repo_fail_msg }} " \ No newline at end of file diff --git a/utils/roles/rocm_validation/vars/main.yml b/utils/roles/rocm_validation/vars/main.yml index 9b3432d4ca..56dcdc45ef 100644 --- a/utils/roles/rocm_validation/vars/main.yml +++ b/utils/roles/rocm_validation/vars/main.yml @@ -1,4 +1,4 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,13 +13,18 @@ # limitations under the License. --- input_project_dir: "{{ hostvars['localhost']['input_project_dir'] }}" -# Usage: rocm_validation.yml + +# Usage: validate_amd.yml software_config_json_file: "{{ input_project_dir }}/software_config.json" -rocm_version_msg: "Warning, ROCm will not be installed. software_config.json does not have the version for ROCM." -rocm_repo_msg: "Warning, ROCm will not be installed. local_repo.yml is not executed for downloading ROCM packages." +rocm_repo_fail_msg: "Warning, ROCm will not be installed. local_repo.yml is not executed for downloading ROCM packages." rocm_stack_msg: "AMDGPU ROCm software stack not present in software_config.json" warning_time: 10 +pulp_bin_path: "/usr/local/bin/pulp" +os_package_map: + rhel: rpm + rocky: rpm + ubuntu: deb # Usage: include_local_repo_config.yml local_repo_config_file: "{{ input_project_dir }}/local_repo_config.yml" -local_repo_config_syntax_fail_msg: "Failed. Syntax errors present in local_repo_config.yml. Fix errors and re-run playbook again." +local_repo_config_syntax_fail_msg: "Failed. Syntax errors present in local_repo_config.yml. Fix errors and re-run playbook again." \ No newline at end of file diff --git a/utils/roles/rocm_validation/vars/redhat.yml b/utils/roles/rocm_validation/vars/redhat.yml deleted file mode 100644 index ca1a4ce480..0000000000 --- a/utils/roles/rocm_validation/vars/redhat.yml +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -# Usage: validate_amd.yml -# offline_rocm_directory: "{{ repo_store_path }}/cluster/yum" -offline_rocm_directory: "/opt/omnia/offline_repo/cluster/yum" diff --git a/utils/roles/rocm_validation/vars/rocky.yml b/utils/roles/rocm_validation/vars/rocky.yml deleted file mode 120000 index ba2f905fb1..0000000000 --- a/utils/roles/rocm_validation/vars/rocky.yml +++ /dev/null @@ -1 +0,0 @@ -redhat.yml \ No newline at end of file diff --git a/utils/roles/rocm_validation/vars/ubuntu.yml b/utils/roles/rocm_validation/vars/ubuntu.yml deleted file mode 100644 index fd5b6b94a9..0000000000 --- a/utils/roles/rocm_validation/vars/ubuntu.yml +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2024 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -# Usage: validate_amd.yml -offline_rocm_directory: "{{ repo_store_path }}/cluster/apt" From 774fb9ded3a7485f76f8e0347511ecf5c03501eb Mon Sep 17 00:00:00 2001 From: balajikumaran-c-s Date: Fri, 18 Jul 2025 04:23:25 +0000 Subject: [PATCH 62/76] lint fix Signed-off-by: balajikumaran-c-s --- utils/roles/rocm_validation/tasks/validate_amd.yml | 2 +- utils/roles/rocm_validation/vars/main.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/roles/rocm_validation/tasks/validate_amd.yml b/utils/roles/rocm_validation/tasks/validate_amd.yml index c093faafe3..33e81da4ad 100644 --- a/utils/roles/rocm_validation/tasks/validate_amd.yml +++ b/utils/roles/rocm_validation/tasks/validate_amd.yml @@ -64,4 +64,4 @@ rescue: - name: Log an error message ansible.builtin.debug: - msg: " {{ rocm_repo_fail_msg }} " \ No newline at end of file + msg: " {{ rocm_repo_fail_msg }} " diff --git a/utils/roles/rocm_validation/vars/main.yml b/utils/roles/rocm_validation/vars/main.yml index 56dcdc45ef..cbf367be66 100644 --- a/utils/roles/rocm_validation/vars/main.yml +++ b/utils/roles/rocm_validation/vars/main.yml @@ -27,4 +27,4 @@ os_package_map: # Usage: include_local_repo_config.yml local_repo_config_file: "{{ input_project_dir }}/local_repo_config.yml" -local_repo_config_syntax_fail_msg: "Failed. Syntax errors present in local_repo_config.yml. Fix errors and re-run playbook again." \ No newline at end of file +local_repo_config_syntax_fail_msg: "Failed. Syntax errors present in local_repo_config.yml. Fix errors and re-run playbook again." From 514e59a8eec4e4bdce329dc6a48353edacd91060 Mon Sep 17 00:00:00 2001 From: Jagadeesh N V Date: Sun, 20 Jul 2025 02:10:28 -0500 Subject: [PATCH 63/76] Defect fixes 1. ha_config validating even when enable_ha is false 2. slurm db password not prompted while running scheduler.yml alone --- scheduler/scheduler.yml | 2 +- .../roles/inventory_validation/tasks/fetch_omnia_config.yml | 4 ++-- .../roles/inventory_validation/tasks/validate_inventory.yml | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scheduler/scheduler.yml b/scheduler/scheduler.yml index cbca483c06..d234c051f5 100644 --- a/scheduler/scheduler.yml +++ b/scheduler/scheduler.yml @@ -19,7 +19,7 @@ tasks: - name: Set dynamic run tags including k8s ansible.builtin.set_fact: - omnia_run_tags: "{{ (ansible_run_tags | default([]) + ['k8s']) | unique }}" + omnia_run_tags: "{{ (ansible_run_tags | default([]) + ['k8s', 'slurm']) | unique }}" cacheable: true - name: Invoke get_config_credentials.yml diff --git a/utils/roles/inventory_validation/tasks/fetch_omnia_config.yml b/utils/roles/inventory_validation/tasks/fetch_omnia_config.yml index 50d8f201c0..5cc1f70011 100644 --- a/utils/roles/inventory_validation/tasks/fetch_omnia_config.yml +++ b/utils/roles/inventory_validation/tasks/fetch_omnia_config.yml @@ -77,9 +77,9 @@ {{ vars[cluster_ha_var_name] | selectattr('cluster_name', 'equalto', cluster_name) | list - | first }} + | default([]) }} - name: Set HA-related facts when: selected_cluster_ha is defined ansible.builtin.set_fact: - enable_k8s_ha: "{{ selected_cluster_ha.enable_k8s_ha | default(false) }}" + enable_k8s_ha: "{{ selected_cluster_ha[0].enable_k8s_ha | default(false) }}" diff --git a/utils/roles/inventory_validation/tasks/validate_inventory.yml b/utils/roles/inventory_validation/tasks/validate_inventory.yml index 728e4de32d..311e0f4f96 100644 --- a/utils/roles/inventory_validation/tasks/validate_inventory.yml +++ b/utils/roles/inventory_validation/tasks/validate_inventory.yml @@ -55,9 +55,9 @@ when: service_k8s_support and service_k8s_playbook is defined ansible.builtin.include_tasks: fetch_omnia_config.yml -- name: Validate service k8s nodes requirements - when: service_k8s_support and service_k8s_playbook is defined - ansible.builtin.include_tasks: k8s_validations.yml +# - name: Validate service k8s nodes requirements +# when: service_k8s_support and service_k8s_playbook is defined +# ansible.builtin.include_tasks: k8s_validations.yml - name: Validate inventory in all playbooks except service_k8s_cluster when: service_k8s_playbook is not defined From f1067cfabc4633302b3dae7c4c61aa3ec4f020aa Mon Sep 17 00:00:00 2001 From: Jagadeesh N V Date: Mon, 21 Jul 2025 05:56:18 -0500 Subject: [PATCH 64/76] input config handling ha case --- input/omnia_config.yml | 1 + scheduler/service_k8s_cluster.yml | 6 +++--- .../roles/inventory_validation/tasks/validate_inventory.yml | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/input/omnia_config.yml b/input/omnia_config.yml index ee0b2deee8..c006061727 100644 --- a/input/omnia_config.yml +++ b/input/omnia_config.yml @@ -45,6 +45,7 @@ run_intel_gaudi_tests: false # - cluster_name is required field # - deployment: Exactly one entry in both the service_k8s_cluster and compute_k8s_cluster lists must have deployment set to true to indicate where Kubernetes should be deployed. +# Please ensure corresponding cluster entry is added to high_availability_config.yml if deployment is set to true. # - Kubernetes SDN network.K8s_cni (Mandatory) - It can either be "calico" or "flannel".Default value assigned is "calico". # While setting up Kubernetes plugin for RoCE NIC, ensure that this value is set to "flannel" diff --git a/scheduler/service_k8s_cluster.yml b/scheduler/service_k8s_cluster.yml index ead3a8e793..a13f606a96 100644 --- a/scheduler/service_k8s_cluster.yml +++ b/scheduler/service_k8s_cluster.yml @@ -43,6 +43,9 @@ ansible.builtin.set_fact: service_k8s_playbook: true +- name: Invoke validate_config.yml to perform L1 and L2 validations for k8s + ansible.builtin.import_playbook: ../input_validation/validate_config.yml + - name: Validate inventory ansible.builtin.import_playbook: ../utils/inventory_validation.yml when: not ( hostvars['127.0.0.1']['inventory_validation_executed'] | default(false) | bool ) @@ -61,9 +64,6 @@ name: cluster_validation tasks_from: validation_status_check.yml -- name: Invoke validate_config.yml to perform L1 and L2 validations for k8s - ansible.builtin.import_playbook: ../input_validation/validate_config.yml - - name: Update Repositories/Registries on nodes ansible.builtin.import_playbook: ../utils/update_user_repo.yml diff --git a/utils/roles/inventory_validation/tasks/validate_inventory.yml b/utils/roles/inventory_validation/tasks/validate_inventory.yml index 311e0f4f96..728e4de32d 100644 --- a/utils/roles/inventory_validation/tasks/validate_inventory.yml +++ b/utils/roles/inventory_validation/tasks/validate_inventory.yml @@ -55,9 +55,9 @@ when: service_k8s_support and service_k8s_playbook is defined ansible.builtin.include_tasks: fetch_omnia_config.yml -# - name: Validate service k8s nodes requirements -# when: service_k8s_support and service_k8s_playbook is defined -# ansible.builtin.include_tasks: k8s_validations.yml +- name: Validate service k8s nodes requirements + when: service_k8s_support and service_k8s_playbook is defined + ansible.builtin.include_tasks: k8s_validations.yml - name: Validate inventory in all playbooks except service_k8s_cluster when: service_k8s_playbook is not defined From 77daabf2cbdb318e2f67039612e4d68dd3e9e591 Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Tue, 22 Jul 2025 19:13:46 +0530 Subject: [PATCH 65/76] ansible lint fix --- .config/ansible-lint.yml | 2 ++ .../common/tasks/configure_postscripts_additional_softwares.yml | 2 +- .../common/tasks/generate_role_based_postscripts.yml | 2 +- .../roles/postscripts/rhel/tasks/configure_postbootscripts.yml | 2 +- .../credential_utility/roles/validation/tasks/pre_requisite.yml | 2 +- utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml | 2 +- 6 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.config/ansible-lint.yml b/.config/ansible-lint.yml index ece2e3f9a9..991883a21b 100644 --- a/.config/ansible-lint.yml +++ b/.config/ansible-lint.yml @@ -21,6 +21,8 @@ exclude_paths: - utils/server_spec_update/roles/os_update/tasks/kcmdline_update_rocky.yml - utils/roles/oim_cleanup/vars/rocky.yml - scheduler/roles/k8s_start_services/files/k8s_dashboard_admin.yaml + - scheduler/playbooks/k8s_add_node.yml + - scheduler/playbooks/k8s_install.yml - "*ubuntu*" - "*rocky*" diff --git a/discovery/roles/postscripts/common/tasks/configure_postscripts_additional_softwares.yml b/discovery/roles/postscripts/common/tasks/configure_postscripts_additional_softwares.yml index 3589a4d0bc..b13c62f735 100644 --- a/discovery/roles/postscripts/common/tasks/configure_postscripts_additional_softwares.yml +++ b/discovery/roles/postscripts/common/tasks/configure_postscripts_additional_softwares.yml @@ -15,7 +15,7 @@ - name: Create a list of discovered groups ansible.builtin.set_fact: - discovered_groups: "{{ discovered_groups | default([]) + item.split(',') | map('trim') | list }}" + discovered_groups: "{{ (discovered_groups | default([])) + (item.split(',') | map('trim') | list) }}" loop: "{{ discovered_nodes | map(attribute='group_name') | list }}" - name: Fetch list of unique groups from discovered groups diff --git a/discovery/roles/postscripts/common/tasks/generate_role_based_postscripts.yml b/discovery/roles/postscripts/common/tasks/generate_role_based_postscripts.yml index fe25f0576e..d28a1cb3da 100644 --- a/discovery/roles/postscripts/common/tasks/generate_role_based_postscripts.yml +++ b/discovery/roles/postscripts/common/tasks/generate_role_based_postscripts.yml @@ -15,7 +15,7 @@ - name: Create a list of unique roles ansible.builtin.set_fact: - unique_roles: "{{ unique_roles | default([]) + item.split(',') | map('trim') | list }}" + unique_roles: "{{ (unique_roles | default([])) + (item.split(',') | map('trim') | list) }}" loop: "{{ discovered_nodes | map(attribute='role') | select('defined') | list }}" when: item | length > 0 diff --git a/discovery/roles/postscripts/rhel/tasks/configure_postbootscripts.yml b/discovery/roles/postscripts/rhel/tasks/configure_postbootscripts.yml index f5f15e899c..7c766fcd86 100644 --- a/discovery/roles/postscripts/rhel/tasks/configure_postbootscripts.yml +++ b/discovery/roles/postscripts/rhel/tasks/configure_postbootscripts.yml @@ -28,7 +28,7 @@ - name: Create a list of unique roles ansible.builtin.set_fact: - all_roles: "{{ all_roles | default([]) + item.split(',') | map('trim') | list }}" + all_roles: "{{ (all_roles | default([])) + (item.split(',') | map('trim') | list) }}" loop: "{{ discovered_nodes | map(attribute='role') | select('defined') | list }}" when: item | length > 0 diff --git a/utils/credential_utility/roles/validation/tasks/pre_requisite.yml b/utils/credential_utility/roles/validation/tasks/pre_requisite.yml index a9da8d1c8a..a0c062bf22 100644 --- a/utils/credential_utility/roles/validation/tasks/pre_requisite.yml +++ b/utils/credential_utility/roles/validation/tasks/pre_requisite.yml @@ -83,7 +83,7 @@ - name: Set run tags for telemetry ansible.builtin.set_fact: - omnia_run_tags: "{{ (omnia_run_tags | default([])) + result.telemetry_status_list | unique }}" + omnia_run_tags: "{{ ((omnia_run_tags | default([])) + result.telemetry_status_list) | unique }}" when: - not result.skipped | default(false) - result.telemetry_status_list | length > 0 diff --git a/utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml b/utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml index dec22b3002..f937ea7313 100644 --- a/utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml +++ b/utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml @@ -81,7 +81,7 @@ - name: Set Complete query based on user input ansible.builtin.set_fact: - query: "{{ query_part1 + query_part2 }}" + query: "{{ query_part1 ~ ' ' ~ query_part2 }}" rescue: - name: Handle the rescue condition From 00344e78476e68a6ea91c1a797d7226b9e6be833 Mon Sep 17 00:00:00 2001 From: Aditya Deshpande <115771515+Aditya-DP@users.noreply.github.com> Date: Tue, 22 Jul 2025 19:24:14 +0530 Subject: [PATCH 66/76] Update configure_postscripts_additional_softwares.yml --- .../common/tasks/configure_postscripts_additional_softwares.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/discovery/roles/postscripts/common/tasks/configure_postscripts_additional_softwares.yml b/discovery/roles/postscripts/common/tasks/configure_postscripts_additional_softwares.yml index b13c62f735..a788938dc3 100644 --- a/discovery/roles/postscripts/common/tasks/configure_postscripts_additional_softwares.yml +++ b/discovery/roles/postscripts/common/tasks/configure_postscripts_additional_softwares.yml @@ -15,7 +15,7 @@ - name: Create a list of discovered groups ansible.builtin.set_fact: - discovered_groups: "{{ (discovered_groups | default([])) + (item.split(',') | map('trim') | list) }}" + discovered_groups: "{{ (discovered_groups | default([])) + ((item | default('') | string).split(',') | map('trim') | list) }}" loop: "{{ discovered_nodes | map(attribute='group_name') | list }}" - name: Fetch list of unique groups from discovered groups From addcb337d6dc053cb06f9868f05764b0d55b71a5 Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Tue, 22 Jul 2025 19:29:59 +0530 Subject: [PATCH 67/76] lint fix --- .../common/tasks/generate_role_based_postscripts.yml | 2 +- .../roles/postscripts/rhel/tasks/configure_postbootscripts.yml | 2 +- .../credential_utility/roles/validation/tasks/pre_requisite.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/discovery/roles/postscripts/common/tasks/generate_role_based_postscripts.yml b/discovery/roles/postscripts/common/tasks/generate_role_based_postscripts.yml index d28a1cb3da..0bfc565c88 100644 --- a/discovery/roles/postscripts/common/tasks/generate_role_based_postscripts.yml +++ b/discovery/roles/postscripts/common/tasks/generate_role_based_postscripts.yml @@ -15,7 +15,7 @@ - name: Create a list of unique roles ansible.builtin.set_fact: - unique_roles: "{{ (unique_roles | default([])) + (item.split(',') | map('trim') | list) }}" + unique_roles: "{{ (unique_roles | default([])) + ((item | default('') | string).split(',') | map('trim') | list) }}" loop: "{{ discovered_nodes | map(attribute='role') | select('defined') | list }}" when: item | length > 0 diff --git a/discovery/roles/postscripts/rhel/tasks/configure_postbootscripts.yml b/discovery/roles/postscripts/rhel/tasks/configure_postbootscripts.yml index 7c766fcd86..ff5356abb1 100644 --- a/discovery/roles/postscripts/rhel/tasks/configure_postbootscripts.yml +++ b/discovery/roles/postscripts/rhel/tasks/configure_postbootscripts.yml @@ -28,7 +28,7 @@ - name: Create a list of unique roles ansible.builtin.set_fact: - all_roles: "{{ (all_roles | default([])) + (item.split(',') | map('trim') | list) }}" + all_roles: "{{ (all_roles | default([])) + ((item | default('') | string).split(',') | map('trim') | list) }}" loop: "{{ discovered_nodes | map(attribute='role') | select('defined') | list }}" when: item | length > 0 diff --git a/utils/credential_utility/roles/validation/tasks/pre_requisite.yml b/utils/credential_utility/roles/validation/tasks/pre_requisite.yml index a0c062bf22..0a562cc480 100644 --- a/utils/credential_utility/roles/validation/tasks/pre_requisite.yml +++ b/utils/credential_utility/roles/validation/tasks/pre_requisite.yml @@ -83,7 +83,7 @@ - name: Set run tags for telemetry ansible.builtin.set_fact: - omnia_run_tags: "{{ ((omnia_run_tags | default([])) + result.telemetry_status_list) | unique }}" + omnia_run_tags: "{{ (omnia_run_tags | default([])) + (result.telemetry_status_list | default([])) | unique }}" when: - not result.skipped | default(false) - result.telemetry_status_list | length > 0 From 46248481d8ce1fe53127c99f905422d017dc478d Mon Sep 17 00:00:00 2001 From: aditya-deshpande Date: Tue, 22 Jul 2025 19:31:56 +0530 Subject: [PATCH 68/76] ignore lint issue --- utils/nodeinfo_db/roles/nodeinfo_db/tasks/db_dump.yml | 2 +- utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/nodeinfo_db/roles/nodeinfo_db/tasks/db_dump.yml b/utils/nodeinfo_db/roles/nodeinfo_db/tasks/db_dump.yml index 89f6570881..66a1632438 100644 --- a/utils/nodeinfo_db/roles/nodeinfo_db/tasks/db_dump.yml +++ b/utils/nodeinfo_db/roles/nodeinfo_db/tasks/db_dump.yml @@ -13,7 +13,7 @@ # limitations under the License. --- -- name: Extract query data from omniadb cluster.nodeinfo for {{ filename }} +- name: Extract query data from omniadb cluster.nodeinfo for {{ filename }} # noqa jinja[invalid] community.postgresql.postgresql_copy: db: "{{ nodeinfo_db_omniadb }}" login_user: "{{ nodeinfo_db_login_user }}" diff --git a/utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml b/utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml index f937ea7313..dec22b3002 100644 --- a/utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml +++ b/utils/nodeinfo_db/roles/nodeinfo_db/tasks/validate_inputs.yml @@ -81,7 +81,7 @@ - name: Set Complete query based on user input ansible.builtin.set_fact: - query: "{{ query_part1 ~ ' ' ~ query_part2 }}" + query: "{{ query_part1 + query_part2 }}" rescue: - name: Handle the rescue condition From e1d3de6fc065aedf30cdb79d478c2950da0f949d Mon Sep 17 00:00:00 2001 From: Nagachandan-P Date: Tue, 29 Jul 2025 12:22:45 +0000 Subject: [PATCH 69/76] pulp with docker creds --- common/library/module_utils/local_repo/config.py | 2 +- common/library/module_utils/local_repo/download_image.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/common/library/module_utils/local_repo/config.py b/common/library/module_utils/local_repo/config.py index 22b8d26357..d2e1580966 100644 --- a/common/library/module_utils/local_repo/config.py +++ b/common/library/module_utils/local_repo/config.py @@ -101,7 +101,7 @@ "create_container_remote_auth": "pulp container remote create --name %s --url %s --upstream-name %s --policy %s --include-tags '%s' --username %s --password '%s'", - "update_container_remote_auth": "pulp container remote update --name %s --url %s --upstream-name %s --policy %s --include-tags '[\"%s\"]' --username %s --password '%s'" + "update_container_remote_auth": "pulp container remote update --name %s --url %s --upstream-name %s --policy %s --include-tags '%s' --username %s --password '%s'" } OMNIA_CREDENTIALS_YAML_PATH = "/opt/omnia/input/project_default/omnia_config_credentials.yml" diff --git a/common/library/module_utils/local_repo/download_image.py b/common/library/module_utils/local_repo/download_image.py index 07293d9c6c..c9b3020a7b 100644 --- a/common/library/module_utils/local_repo/download_image.py +++ b/common/library/module_utils/local_repo/download_image.py @@ -82,7 +82,8 @@ def create_container_remote_with_auth(remote_name, remote_url, package, policy_t return True new_tags = existing_tags + [tag] - tags_str = ",".join(new_tags) + tags_str = json.dumps(new_tags) + update_command = pulp_container_commands["update_container_remote_auth"] % ( remote_name, remote_url, package, policy_type, tags_str, docker_username, docker_password From 47aabfad4738ccc240c5955682afca9e4fee0e0c Mon Sep 17 00:00:00 2001 From: sakshi-singla-1735 Date: Thu, 31 Jul 2025 13:26:33 +0530 Subject: [PATCH 70/76] Update csi_driver_validation.py Signed-off-by: sakshi-singla-1735 --- .../validation_flows/csi_driver_validation.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py b/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py index 1d7fa824e3..cfb4ded5b0 100644 --- a/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py @@ -214,23 +214,15 @@ def process_encrypted_file(secret_file_path,vault_secret_file_path,errors): secret_file_path, "Please check that the assoicated vault file exists")) return decrypted_file -<<<<<<< HEAD -def validate_powerscale_secret_and_values_file(secret_file_path, values_file_path, errors): -======= def validate_powerscale_secret_and_values_file(secret_file_path, values_file_path, errors, input_file_path): ->>>>>>> 7e4a9b9647004e002ab4a861e825557dbde303ee """ Driver code to initiate the powerscale secret and values file input validation """ #valiadte secret file inputs secrets_file_encrypted = validation_utils.is_file_encrypted(secret_file_path) -<<<<<<< HEAD - vault_secret_file_path= "/omnia/scheduler/roles/k8s_csi_powerscale_plugin/files/.csi_powerscale_secret_vault" -======= file_path = os.path.dirname(input_file_path) vault_secret_file_path = os.path.join(file_path, ".csi_powerscale_secret_vault") ->>>>>>> 7e4a9b9647004e002ab4a861e825557dbde303ee #check if secret file exists file_exists = os.path.exists(vault_secret_file_path.strip()) @@ -247,11 +239,7 @@ def validate_powerscale_secret_and_values_file(secret_file_path, values_file_pat if secret_validation_errors: for err in secret_validation_errors: errors.append(create_error_msg("Powerscale Secret File Validation Error:", err, None)) -<<<<<<< HEAD -======= - ->>>>>>> 7e4a9b9647004e002ab4a861e825557dbde303ee #validate values file input with open(values_file_path, "r") as f: values_data = yaml.safe_load(f) From c4302b91991ea1fdcce27b61dfe971732cf56f2e Mon Sep 17 00:00:00 2001 From: sakshi-singla-1735 Date: Tue, 5 Aug 2025 12:45:25 +0000 Subject: [PATCH 71/76] csi patch fix Signed-off-by: sakshi-singla-1735 --- .../schema/credential_rules.json | 12 ++++++ .../validation_flows/csi_driver_validation.py | 37 ++++++++++++------- scheduler/inv | 9 +++++ .../tasks/csi_powerscale_config_secret.yml | 12 +++++- scheduler/scheduler.yml | 2 +- scheduler/service_k8s_cluster.yml | 2 +- 6 files changed, 57 insertions(+), 17 deletions(-) create mode 100644 scheduler/inv diff --git a/common/library/module_utils/input_validation/schema/credential_rules.json b/common/library/module_utils/input_validation/schema/credential_rules.json index b5c10d041b..6ffa66cf9b 100644 --- a/common/library/module_utils/input_validation/schema/credential_rules.json +++ b/common/library/module_utils/input_validation/schema/credential_rules.json @@ -130,5 +130,17 @@ "maxLength": 128, "pattern": "^(?!admin$)[^\\\\\\-'\"]+$", "description": "Password for grafana UI. Should not be kept 'admin. Length must be at least 5 characters and must not contain backslashes (\\), hyphens (-), single quotes ('), or double quotes (\\\")." + }, + "csi_username": { + "minLength": 4, + "maxLength": 64, + "description": "Username for Powerscale UI. Must not contain semicolons (;), square brackets ([]), or backticks (`).", + "pattern": "^[^;\\[\\]`]+$" + }, + "csi_password": { + "description": "Password for Powerscale UI. Must not contain hyphens (-), single quotes ('), double quotes (\"), at symbols (@), or backslashes (\\).", + "minLength": 5, + "maxLength": 32, + "pattern": "^[^\\-\\'\\\"@\\\\]*$" } } diff --git a/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py b/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py index cfb4ded5b0..5657475b7d 100644 --- a/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py @@ -34,7 +34,6 @@ def validate_secret_isilon_clusters(data): """ cluster_errors = [] - clusters = data.get("isilonClusters") # Check if isilonClusters is a defined, non-empty list @@ -46,19 +45,19 @@ def validate_secret_isilon_clusters(data): cluster_prefix = f"Cluster {idx + 1}" # Validate clusterName - if not item.get("clusterName") or not isinstance(item["clusterName"], str): + if not isinstance(item.get("clusterName"), str) or not item["clusterName"].strip(): cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'clusterName'.") # Validate username - if not item.get("username") or not isinstance(item["username"], str): + if not isinstance(item.get("username"), str) or not item["username"].strip(): cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'username'.") # Validate password - if not item.get("password") or not isinstance(item["password"], str): + if not isinstance(item.get("password"), str) or not item["password"].strip(): cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'password'.") # Validate endpoint - if not item.get("endpoint") or not isinstance(item["endpoint"], str): + if not isinstance(item.get("endpoint"), str) or not item["endpoint"].strip(): cluster_errors.append(f"{cluster_prefix}: Invalid or missing 'endpoint'.") # Validate endpointPort if defined @@ -77,8 +76,13 @@ def validate_secret_isilon_clusters(data): # Validate isiPath if defined if "isiPath" in item: - if not isinstance(item["isiPath"], str) or not item["isiPath"].startswith('/'): - cluster_errors.append(f"{cluster_prefix}: 'isiPath' must be a valid Unix absolute path.") + isi_path = item["isiPath"] + if ( + not isinstance(isi_path, str) or + not isi_path.strip() or + not isi_path.lstrip().startswith('/') + ): + cluster_errors.append(f"{cluster_prefix}: 'isiPath' must be a non-empty valid Unix absolute path.") # Validate isiVolumePathPermissions if defined if "isiVolumePathPermissions" in item: @@ -148,10 +152,16 @@ def get_nested(data, keys, default=None): if not isi_access or not isinstance(isi_access, str) or not isi_access.strip(): add_error("isiAccessZone", isi_access, "Must be a non-empty string") - # 9. isiPath is Unix absolute path - isi_path = values_data.get("isiPath") - if not isinstance(isi_path, str) or not isi_path.startswith("/"): - add_error("isiPath", isi_path, "Must be a valid Unix absolute path") + # Validate isiPath if defined + if "isiPath" in item: + isi_path = item["isiPath"] + if ( + not isinstance(isi_path, str) or + not isi_path.strip() or + not isi_path.lstrip().startswith('/') + ): + add_error(f"{cluster_prefix}: 'isiPath' must be a non-empty valid Unix absolute path.") + # 10. isiVolumePathPermissions is a non-empty string permissions = values_data.get("isiVolumePathPermissions") @@ -195,8 +205,8 @@ def process_encrypted_file(secret_file_path,vault_secret_file_path,errors): decrypt the file first then parse it to get data """ - decrypted_file = decrypt_file(secret_file_path, vault_secret_file_path,) - + decrypted_file = decrypt_file(secret_file_path, vault_secret_file_path) + errors.append(create_error_msg("decrypted_file",decrypted_file,"decrypted_file")) if decrypted_file: try: with open(secret_file_path, "r") as f: @@ -228,6 +238,7 @@ def validate_powerscale_secret_and_values_file(secret_file_path, values_file_pat if secrets_file_encrypted: secret_data = process_encrypted_file(secret_file_path, vault_secret_file_path,errors) + errors.append(create_error_msg("secrete_data",secret_data,"secret")) if secret_data is None or secret_data is False: errors.append(create_error_msg( "Secret File Load", diff --git a/scheduler/inv b/scheduler/inv new file mode 100644 index 0000000000..d77420f09c --- /dev/null +++ b/scheduler/inv @@ -0,0 +1,9 @@ +[etcd] +10.19.0.3 + +[kube_control_plane] +10.19.0.3 + +[kube_node] +10.19.0.2 +10.19.0.4 diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml index e7e47b7c54..a1f7724c5c 100644 --- a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml @@ -41,14 +41,22 @@ set_fact: decoded_config: "{{ existing_secret.resources[0].data.config | b64decode | from_yaml }}" +- name: Debug username + debug: + var: hostvars['127.0.0.1']['csi_username'] + +- name: Debug username + debug: + var: hostvars['127.0.0.1']['csi_password'] + - name: Update username and password in decoded config set_fact: updated_config: >- {{ decoded_config | combine({ 'isilonClusters': [ decoded_config.isilonClusters[0] | combine({ - 'username': correct_username, - 'password': correct_password + 'username': hostvars['127.0.0.1']['csi_username'], + 'password': hostvars['127.0.0.1']['csi_password'] }) ] }) }} diff --git a/scheduler/scheduler.yml b/scheduler/scheduler.yml index 88b89aab61..ba31541ea0 100644 --- a/scheduler/scheduler.yml +++ b/scheduler/scheduler.yml @@ -19,7 +19,7 @@ tasks: - name: Set dynamic run tags including k8s ansible.builtin.set_fact: - omnia_run_tags: "{{ (ansible_run_tags | default([]) + ['compute_k8s', 'slurm']) | unique }}" + omnia_run_tags: "{{ (ansible_run_tags | default([]) + ['compute_k8s', 'slurm', 'csi_driver_powerscale']) | unique }}" cacheable: true - name: Invoke get_config_credentials.yml diff --git a/scheduler/service_k8s_cluster.yml b/scheduler/service_k8s_cluster.yml index c311194a02..fccc375402 100644 --- a/scheduler/service_k8s_cluster.yml +++ b/scheduler/service_k8s_cluster.yml @@ -19,7 +19,7 @@ tasks: - name: Set dynamic run tags including k8s ansible.builtin.set_fact: - omnia_run_tags: "{{ (ansible_run_tags | default([]) + ['service_k8s']) | unique }}" + omnia_run_tags: "{{ (ansible_run_tags | default([]) + ['service_k8s' , 'csi_driver_powerscale']) | unique }}" cacheable: true - name: Invoke get_config_credentials.yml From 6be21d5653c1fabcff953c04050abe06ac3616cf Mon Sep 17 00:00:00 2001 From: sakshi-singla-1735 Date: Tue, 5 Aug 2025 18:23:29 +0530 Subject: [PATCH 72/76] Delete scheduler/inv Signed-off-by: sakshi-singla-1735 --- scheduler/inv | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 scheduler/inv diff --git a/scheduler/inv b/scheduler/inv deleted file mode 100644 index d77420f09c..0000000000 --- a/scheduler/inv +++ /dev/null @@ -1,9 +0,0 @@ -[etcd] -10.19.0.3 - -[kube_control_plane] -10.19.0.3 - -[kube_node] -10.19.0.2 -10.19.0.4 From 94492c5049b5253312355fa4ab0a9c7cc6dfb461 Mon Sep 17 00:00:00 2001 From: Jagadeesh N V Date: Tue, 5 Aug 2025 08:35:33 -0500 Subject: [PATCH 73/76] input validation with updated compute_k8s for HA --- .../module_utils/input_validation/schema/roles_config.json | 2 +- .../validation_flows/high_availability_validation.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/common/library/module_utils/input_validation/schema/roles_config.json b/common/library/module_utils/input_validation/schema/roles_config.json index 2e2ee1aae9..e8c0162ca2 100644 --- a/common/library/module_utils/input_validation/schema/roles_config.json +++ b/common/library/module_utils/input_validation/schema/roles_config.json @@ -58,7 +58,7 @@ "architecture": { "type": "string", "description": "Architecture of the nodes - x86 or ARM", - "enum": ["x86", "ARM"], + "enum": ["x86_64", "aarch64"], "required": ["architecture"] } }, diff --git a/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py b/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py index 1529875d6b..e6ab242401 100644 --- a/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py @@ -655,7 +655,8 @@ def validate_ha_config(ha_data, mandatory_fields, errors, config_type=None): ("oim_ha", ["admin_virtual_ip_address", "active_node_service_tag", "passive_nodes"]), ("service_node_ha", ["service_nodes"]), ("slurm_head_node_ha", ["virtual_ip_address", "active_node_service_tag", "passive_nodes"]), - ("k8s_head_node_ha", ["virtual_ip_address", "active_node_service_tags"]) + ("compute_k8s_head_node_ha", ["virtual_ip_address", "active_node_service_tags"]), + ("service_k8s_head_node_ha", ["virtual_ip_address", "active_node_service_tags"]) ] for config_name, mandatory_fields in ha_configs: From 6c624224340f978eca17bd9a316cc18330bf1c41 Mon Sep 17 00:00:00 2001 From: sakshi-singla-1735 Date: Tue, 5 Aug 2025 14:02:18 +0000 Subject: [PATCH 74/76] csi creds update Signed-off-by: sakshi-singla-1735 --- .../validation_flows/csi_driver_validation.py | 2 -- .../tasks/csi_powerscale_driver_api_validation.yml | 2 +- .../tasks/csi_powerscale_driver_input_validation.yml | 1 + .../tasks/csi_powerscale_config_secret.yml | 8 -------- 4 files changed, 2 insertions(+), 11 deletions(-) diff --git a/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py b/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py index 5657475b7d..dc24a4b705 100644 --- a/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py @@ -206,7 +206,6 @@ def process_encrypted_file(secret_file_path,vault_secret_file_path,errors): """ decrypted_file = decrypt_file(secret_file_path, vault_secret_file_path) - errors.append(create_error_msg("decrypted_file",decrypted_file,"decrypted_file")) if decrypted_file: try: with open(secret_file_path, "r") as f: @@ -238,7 +237,6 @@ def validate_powerscale_secret_and_values_file(secret_file_path, values_file_pat if secrets_file_encrypted: secret_data = process_encrypted_file(secret_file_path, vault_secret_file_path,errors) - errors.append(create_error_msg("secrete_data",secret_data,"secret")) if secret_data is None or secret_data is False: errors.append(create_error_msg( "Secret File Load", diff --git a/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_api_validation.yml b/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_api_validation.yml index 54909d4186..43a19aad6f 100644 --- a/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_api_validation.yml +++ b/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_api_validation.yml @@ -16,7 +16,7 @@ - name: Generate Base64 authentication token ansible.builtin.shell: > set -o pipefail && \ - echo -n "{{ item.username }}:{{ item.password }}" | base64 + echo -n "{{ hostvars['127.0.0.1']['csi_username'] }}:{{ hostvars['127.0.0.1']['csi_password'] }}" | base64 register: auth_token changed_when: false no_log: true diff --git a/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_input_validation.yml b/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_input_validation.yml index 673dbc5839..f6faed2d89 100644 --- a/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_input_validation.yml +++ b/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_input_validation.yml @@ -66,3 +66,4 @@ - name: Validate powerscale ip and credential in secret.yaml file using API call to powerscale ansible.builtin.include_tasks: csi_powerscale_driver_api_validation.yml loop: "{{ clusters.isilonClusters }}" + no_log: true \ No newline at end of file diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml index a1f7724c5c..a89b02770a 100644 --- a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml @@ -40,14 +40,6 @@ - name: Decode the config from secret set_fact: decoded_config: "{{ existing_secret.resources[0].data.config | b64decode | from_yaml }}" - -- name: Debug username - debug: - var: hostvars['127.0.0.1']['csi_username'] - -- name: Debug username - debug: - var: hostvars['127.0.0.1']['csi_password'] - name: Update username and password in decoded config set_fact: From d31417e6be54a89b76f9fe7d2d2025920692fc42 Mon Sep 17 00:00:00 2001 From: sakshi-singla-1735 Date: Wed, 6 Aug 2025 06:09:50 +0000 Subject: [PATCH 75/76] ansible lint and pylint fixes Signed-off-by: sakshi-singla-1735 --- .../validation_flows/csi_driver_validation.py | 61 ++++++++++--------- ...csi_powerscale_driver_input_validation.yml | 2 +- .../tasks/csi_powerscale_config_secret.yml | 8 +-- scheduler/service_k8s_cluster.yml | 2 +- 4 files changed, 38 insertions(+), 35 deletions(-) diff --git a/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py b/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py index dc24a4b705..27066c0b0a 100644 --- a/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/csi_driver_validation.py @@ -62,17 +62,20 @@ def validate_secret_isilon_clusters(data): # Validate endpointPort if defined if "endpointPort" in item: - if not isinstance(item["endpointPort"], int) or not (0 < item["endpointPort"] < 65536): - cluster_errors.append(f"{cluster_prefix}: 'endpointPort' must be an integer between 1 and 65535.") + if not isinstance(item["endpointPort"], int) or not 0 < item["endpointPort"] < 65536: + cluster_errors.append( + f"{cluster_prefix}: 'endpointPort' must be an integer between 1 and 65535.") # Validate isDefault if "isDefault" not in item or not isinstance(item["isDefault"], bool): - cluster_errors.append(f"{cluster_prefix}: 'isDefault' must be a boolean and must be defined.") + cluster_errors.append( + f"{cluster_prefix}: 'isDefault' must be a boolean and must be defined.") # Validate skipCertificateValidation if defined if "skipCertificateValidation" in item: if item["skipCertificateValidation"] is not True: - cluster_errors.append(f"{cluster_prefix}: 'skipCertificateValidation' must be true if defined.") + cluster_errors.append( + f"{cluster_prefix}: 'skipCertificateValidation' must be true if defined.") # Validate isiPath if defined if "isiPath" in item: @@ -82,14 +85,18 @@ def validate_secret_isilon_clusters(data): not isi_path.strip() or not isi_path.lstrip().startswith('/') ): - cluster_errors.append(f"{cluster_prefix}: 'isiPath' must be a non-empty valid Unix absolute path.") + cluster_errors.append( + f"{cluster_prefix}: 'isiPath' must be a non-empty valid Unix absolute path.") # Validate isiVolumePathPermissions if defined if "isiVolumePathPermissions" in item: perms = item["isiVolumePathPermissions"] if not isinstance(perms, str) or not perms.strip().isdigit(): - cluster_errors.append(f"{cluster_prefix}: 'isiVolumePathPermissions' must be a non-empty string of digits.") - + msg = ( + f"{cluster_prefix}: 'endpointPort' must be an " + "integer between 1 and 65535." + ) + cluster_errors.append(msg) return cluster_errors def validate_value_file_inputs(values_data): @@ -134,7 +141,7 @@ def get_nested(data, keys, default=None): # 5. endpointPort is int in 1..65535 endpoint_port = values_data.get("endpointPort") - if endpoint_port is None or not isinstance(endpoint_port, int) or not (1 <= endpoint_port <= 65535): + if endpoint_port is None or not isinstance(endpoint_port, int) or not 1 <= endpoint_port <= 65535: add_error("endpointPort", endpoint_port, "Must be between 1 and 65535") # 6. skipCertificateValidation == true @@ -152,16 +159,10 @@ def get_nested(data, keys, default=None): if not isi_access or not isinstance(isi_access, str) or not isi_access.strip(): add_error("isiAccessZone", isi_access, "Must be a non-empty string") - # Validate isiPath if defined - if "isiPath" in item: - isi_path = item["isiPath"] - if ( - not isinstance(isi_path, str) or - not isi_path.strip() or - not isi_path.lstrip().startswith('/') - ): - add_error(f"{cluster_prefix}: 'isiPath' must be a non-empty valid Unix absolute path.") - + # 9. isiPath is Unix absolute path + isi_path = values_data.get("isiPath") + if not isinstance(isi_path, str) or not isi_path.startswith("/"): + add_error("isiPath", isi_path, "Must be a valid Unix absolute path") # 10. isiVolumePathPermissions is a non-empty string permissions = values_data.get("isiVolumePathPermissions") @@ -208,14 +209,14 @@ def process_encrypted_file(secret_file_path,vault_secret_file_path,errors): decrypted_file = decrypt_file(secret_file_path, vault_secret_file_path) if decrypted_file: try: - with open(secret_file_path, "r") as f: + with open(secret_file_path, "r", encoding="utf-8") as f: data = yaml.safe_load(f) encrypt_file(secret_file_path, vault_secret_file_path) return data except FileNotFoundError: errors.append(create_error_msg("File not found", secret_file_path, "Please check the associated file exists")) - except yaml.YAMLError as e: + except yaml.YAMLError: errors.append(create_error_msg("Error loading yaml file", secret_file_path, "Please check the associated file syntax")) else: @@ -223,7 +224,9 @@ def process_encrypted_file(secret_file_path,vault_secret_file_path,errors): secret_file_path, "Please check that the assoicated vault file exists")) return decrypted_file -def validate_powerscale_secret_and_values_file(secret_file_path, values_file_path, errors, input_file_path): +def validate_powerscale_secret_and_values_file( + secret_file_path, values_file_path, + errors, input_file_path): """ Driver code to initiate the powerscale secret and values file input validation """ @@ -232,13 +235,11 @@ def validate_powerscale_secret_and_values_file(secret_file_path, values_file_pat secrets_file_encrypted = validation_utils.is_file_encrypted(secret_file_path) file_path = os.path.dirname(input_file_path) vault_secret_file_path = os.path.join(file_path, ".csi_powerscale_secret_vault") - #check if secret file exists - file_exists = os.path.exists(vault_secret_file_path.strip()) if secrets_file_encrypted: secret_data = process_encrypted_file(secret_file_path, vault_secret_file_path,errors) if secret_data is None or secret_data is False: - errors.append(create_error_msg( + errors.append(create_error_msg( "Secret File Load", secret_file_path, "Failed to load or parse secret.yaml file. It may be invalid or empty." @@ -246,13 +247,15 @@ def validate_powerscale_secret_and_values_file(secret_file_path, values_file_pat else: secret_validation_errors = validate_secret_isilon_clusters(secret_data) if secret_validation_errors: - for err in secret_validation_errors: - errors.append(create_error_msg("Powerscale Secret File Validation Error:", err, None)) + for err in secret_validation_errors: + errors.append( + create_error_msg("Powerscale Secret File Validation Error:", err, None)) #validate values file input - with open(values_file_path, "r") as f: - values_data = yaml.safe_load(f) + with open(values_file_path, "r", encoding="utf-8") as f: + values_data = yaml.safe_load(f) values_validation_errros = validate_value_file_inputs(values_data) if values_validation_errros: for value_err in values_validation_errros: - errors.append(create_error_msg(f"Powerscale Value File Validation Error: ",value_err, None)) + errors.append( + create_error_msg(f"Powerscale Value File Validation Error: ",value_err, None)) diff --git a/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_input_validation.yml b/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_input_validation.yml index f6faed2d89..2095022bcf 100644 --- a/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_input_validation.yml +++ b/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_input_validation.yml @@ -66,4 +66,4 @@ - name: Validate powerscale ip and credential in secret.yaml file using API call to powerscale ansible.builtin.include_tasks: csi_powerscale_driver_api_validation.yml loop: "{{ clusters.isilonClusters }}" - no_log: true \ No newline at end of file + no_log: true diff --git a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml index a89b02770a..280406d1a2 100644 --- a/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml +++ b/scheduler/roles/k8s_csi_powerscale_plugin/tasks/csi_powerscale_config_secret.yml @@ -38,11 +38,11 @@ register: existing_secret - name: Decode the config from secret - set_fact: + ansible.builtin.set_fact: decoded_config: "{{ existing_secret.resources[0].data.config | b64decode | from_yaml }}" - + - name: Update username and password in decoded config - set_fact: + ansible.builtin.set_fact: updated_config: >- {{ decoded_config | combine({ @@ -54,7 +54,7 @@ }} - name: Encode updated config to base64 - set_fact: + ansible.builtin.set_fact: encoded_config: "{{ updated_config | to_nice_yaml(indent=2) | b64encode }}" - name: Patch isilon-creds secret with updated credentials diff --git a/scheduler/service_k8s_cluster.yml b/scheduler/service_k8s_cluster.yml index fccc375402..08bddc4c33 100644 --- a/scheduler/service_k8s_cluster.yml +++ b/scheduler/service_k8s_cluster.yml @@ -19,7 +19,7 @@ tasks: - name: Set dynamic run tags including k8s ansible.builtin.set_fact: - omnia_run_tags: "{{ (ansible_run_tags | default([]) + ['service_k8s' , 'csi_driver_powerscale']) | unique }}" + omnia_run_tags: "{{ (ansible_run_tags | default([]) + ['service_k8s', 'csi_driver_powerscale']) | unique }}" cacheable: true - name: Invoke get_config_credentials.yml From 2c1a7ced6cf08eb15c26965f8339b02fa6a7bc26 Mon Sep 17 00:00:00 2001 From: sakshi-singla-1735 Date: Wed, 6 Aug 2025 08:30:53 +0000 Subject: [PATCH 76/76] review comment Signed-off-by: sakshi-singla-1735 --- .../tasks/csi_powerscale_driver_api_validation.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_api_validation.yml b/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_api_validation.yml index 43a19aad6f..9d2a567d0c 100644 --- a/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_api_validation.yml +++ b/scheduler/roles/cluster_validation/tasks/csi_powerscale_driver_api_validation.yml @@ -14,11 +14,8 @@ --- - name: Generate Base64 authentication token - ansible.builtin.shell: > - set -o pipefail && \ - echo -n "{{ hostvars['127.0.0.1']['csi_username'] }}:{{ hostvars['127.0.0.1']['csi_password'] }}" | base64 - register: auth_token - changed_when: false + ansible.builtin.set_fact: + auth_token: "{{ (hostvars['127.0.0.1']['csi_username'] + ':' + hostvars['127.0.0.1']['csi_password']) | b64encode }}" no_log: true - name: Set the URL for the API request @@ -34,7 +31,7 @@ url: "{{ api_url }}" method: GET headers: - Authorization: "Basic {{ auth_token.stdout }}" + Authorization: "Basic {{ auth_token }}" validate_certs: false register: response ignore_errors: true