From cdc944efcccfd29a30d0379e21e4df756299e2be Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Sat, 30 May 2026 11:33:20 +0530 Subject: [PATCH 01/78] Merge pull request #4605 from abhishek-sa1/pub/q2_ansible Ansible core 2.20 support for omnia 2.2 release --- build_image_aarch64/ansible.cfg | 1 + build_image_aarch64/build_image_aarch64.yml | 2 +- build_image_x86_64/ansible.cfg | 1 + build_image_x86_64/build_image_x86_64.yml | 2 +- build_stream/requirements-dev.txt | 2 +- build_stream/requirements.txt | 2 +- .../validation_flows/local_repo_validation.py | 77 ++++++++++--------- .../modules/functional_group_parser.py | 4 +- discovery/ansible.cfg | 1 + discovery/discovery.yml | 2 +- gitlab/ansible.cfg | 1 + gitlab/cleanup_gitlab.yml | 2 +- gitlab/gitlab.yml | 2 +- input_validation/ansible.cfg | 1 + .../roles/validate_input/tasks/main.yml | 2 +- .../tasks/configure_rhel_os_urls.yml | 8 +- input_validation/validate_config.yml | 4 +- local_repo/ansible.cfg | 1 + local_repo/local_repo.yml | 2 +- .../tasks/validate_software_config_json.yml | 11 ++- .../roles/log_collector/tasks/bundle.yml | 2 +- prepare_oim/ansible.cfg | 1 + prepare_oim/prepare_oim.yml | 2 +- .../deploy_containers/auth/vars/main.yml | 2 +- .../tasks/check_k8s_support.yml | 2 +- provision/ansible.cfg | 1 + provision/provision.yml | 2 +- ...-group-login_compiler_node_aarch64.yaml.j2 | 6 +- ...i-group-login_compiler_node_x86_64.yaml.j2 | 6 +- .../ci-group-login_node_aarch64.yaml.j2 | 6 +- .../ci-group-login_node_x86_64.yaml.j2 | 6 +- ...ce_kube_control_plane_first_x86_64.yaml.j2 | 14 ++-- ...-service_kube_control_plane_x86_64.yaml.j2 | 14 ++-- .../ci-group-service_kube_node_x86_64.yaml.j2 | 12 +-- ...ci-group-slurm_control_node_x86_64.yaml.j2 | 6 +- .../ci-group-slurm_node_aarch64.yaml.j2 | 6 +- .../ci-group-slurm_node_x86_64.yaml.j2 | 6 +- .../tasks/validate_telemetry_config.yml | 2 +- .../slurm_config/tasks/build_slurm_conf.yml | 10 +-- .../slurm_config/tasks/check_ctld_running.yml | 4 +- provision/roles/slurm_config/tasks/confs.yml | 12 +-- .../slurm_config/tasks/create_slurm_dir.yml | 14 ++-- provision/roles/slurm_config/tasks/main.yml | 2 +- rollback/ansible.cfg | 1 + rollback/rollback.yml | 4 +- telemetry/ansible.cfg | 1 + .../roles/telemetry_disable/tasks/main.yml | 2 +- .../roles/telemetry_enable/tasks/main.yml | 2 +- telemetry/telemetry.yml | 2 +- upgrade/ansible.cfg | 1 + .../scripts/transform_software_config.py | 9 ++- upgrade/upgrade.yml | 12 +-- utils/ansible.cfg | 1 + utils/credential_utility/ansible.cfg | 1 + .../update_config/tasks/credential_status.yml | 32 ++++---- .../tasks/update_bs_credential_file.yml | 6 +- .../roles/validation/tasks/main.yml | 2 +- .../roles/validation/tasks/pre_requisite.yml | 2 +- utils/set_pxe_boot.yml | 2 +- 59 files changed, 188 insertions(+), 158 deletions(-) diff --git a/build_image_aarch64/ansible.cfg b/build_image_aarch64/ansible.cfg index dba4437510..2ddcf7b8bc 100644 --- a/build_image_aarch64/ansible.cfg +++ b/build_image_aarch64/ansible.cfg @@ -5,6 +5,7 @@ host_key_checking = false forks = 5 timeout = 180 executable = /bin/bash +interpreter_python = /usr/bin/python3 library = ../common/library/modules module_utils = ../common/library/module_utils diff --git a/build_image_aarch64/build_image_aarch64.yml b/build_image_aarch64/build_image_aarch64.yml index a30f66e471..cd01d809d7 100644 --- a/build_image_aarch64/build_image_aarch64.yml +++ b/build_image_aarch64/build_image_aarch64.yml @@ -24,7 +24,7 @@ - name: Set dynamic run tags including 'build_aarch_image' when: not config_file_status | default(false) | bool ansible.builtin.set_fact: - omnia_run_tags: "{{ (ansible_run_tags | default([]) + ['build_aarch_image']) | unique }}" + omnia_run_tags: "{{ (ansible_run_tags | default([]) | list + ['build_aarch_image']) | unique }}" cacheable: true - name: Invoke validate_config.yml to perform L1 and L2 validations with build_image tag diff --git a/build_image_x86_64/ansible.cfg b/build_image_x86_64/ansible.cfg index eec7b1c4cf..4d5729de9a 100644 --- a/build_image_x86_64/ansible.cfg +++ b/build_image_x86_64/ansible.cfg @@ -5,6 +5,7 @@ host_key_checking = false forks = 5 timeout = 180 executable = /bin/bash +interpreter_python = /usr/bin/python3 library = ../common/library/modules module_utils = ../common/library/module_utils diff --git a/build_image_x86_64/build_image_x86_64.yml b/build_image_x86_64/build_image_x86_64.yml index 1909803155..28029298f4 100644 --- a/build_image_x86_64/build_image_x86_64.yml +++ b/build_image_x86_64/build_image_x86_64.yml @@ -24,7 +24,7 @@ - name: Set dynamic run tags including 'build_image' when: not config_file_status | default(false) | bool ansible.builtin.set_fact: - omnia_run_tags: "{{ (ansible_run_tags | default([]) + ['build_image']) | unique }}" + omnia_run_tags: "{{ (ansible_run_tags | default([]) | list + ['build_image']) | unique }}" cacheable: true - name: Invoke validate_config.yml to perform L1 and L2 validations with build_image tag diff --git a/build_stream/requirements-dev.txt b/build_stream/requirements-dev.txt index 6cae6350c7..f7abb54867 100644 --- a/build_stream/requirements-dev.txt +++ b/build_stream/requirements-dev.txt @@ -11,5 +11,5 @@ httpx>=0.25.0 # Code quality pylint>=3.0.0 -black>=23.0.0 +black>=26.5.0 isort>=5.12.0 diff --git a/build_stream/requirements.txt b/build_stream/requirements.txt index 631dbb182e..b47e1cf326 100644 --- a/build_stream/requirements.txt +++ b/build_stream/requirements.txt @@ -8,7 +8,7 @@ pydantic>=2.5.0 # Authentication PyJWT>=2.8.0 -cryptography>=41.0.0 +cryptography>=48.0.0 argon2-cffi>=23.1.0 # Dependency injection diff --git a/common/library/module_utils/input_validation/validation_flows/local_repo_validation.py b/common/library/module_utils/input_validation/validation_flows/local_repo_validation.py index e81ecd8be7..8167984fb1 100644 --- a/common/library/module_utils/input_validation/validation_flows/local_repo_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/local_repo_validation.py @@ -43,39 +43,43 @@ def check_subscription_status(logger=None): # 1. Check system entitlement certs first system_entitlement_certs = glob.glob(config.SYSTEM_ENTITLEMENT_PATH) has_system_entitlement = len(system_entitlement_certs) > 0 - + if has_system_entitlement: # System entitlement found - use system paths only entitlement_certs = system_entitlement_certs has_entitlement = True repo_file_to_check = config.SYSTEM_REDHAT_REPO - + if logger: - logger.info(f"Found {len(system_entitlement_certs)} system entitlement certs - using system paths only") + logger.info( + f"Found {len(system_entitlement_certs)} system entitlement certs" + " - using system paths only") else: # No system entitlement - check Omnia paths omnia_entitlement_certs = glob.glob(config.OMNIA_ENTITLEMENT_PATH) entitlement_certs = omnia_entitlement_certs has_entitlement = len(omnia_entitlement_certs) > 0 repo_file_to_check = config.OMNIA_REDHAT_REPO - + if logger: - logger.info(f"No system entitlement found - checking Omnia paths: {len(omnia_entitlement_certs)} certs found") + logger.info( + f"No system entitlement found - checking Omnia paths:" + f" {len(omnia_entitlement_certs)} certs found") # 2. Check repos based on which entitlement path was used has_repos = False repo_urls = [] redhat_repo_used = None - + if os.path.exists(repo_file_to_check): try: - with open(repo_file_to_check, "r") as f: + with open(repo_file_to_check, "r", encoding="utf-8") as f: for line in f: if line.startswith("baseurl ="): url = line.split("=", 1)[1].strip() if re.search(r"(codeready-builder|baseos|appstream)", url, re.IGNORECASE): repo_urls.append(url) - + if repo_urls: has_repos = True redhat_repo_used = repo_file_to_check @@ -91,7 +95,7 @@ def check_subscription_status(logger=None): # 3. Subscription enabled if entitlement and repos are found in the same source subscription_enabled = has_entitlement and has_repos - + if logger: logger.info( f"Subscription enabled: {subscription_enabled} " @@ -111,24 +115,24 @@ def validate_local_repo_config(input_file_path, data, omnia_repo_url_rhel fields are present and accessible. """ errors = [] - base_repo_names = [] local_repo_yml = create_file_path(input_file_path, file_names["local_repo_config"]) - - user_registry = data.get("user_registry") + + user_registry = data.get("user_registry") if user_registry: for registry in user_registry: - host = registry.get("host") cert_path = registry.get("cert_path") key_path = registry.get("key_path") - + # Validate user_registry certificate and key paths if cert_path and not os.path.exists(cert_path): - errors.append(create_error_msg(local_repo_yml, "user_registry", - f"Certificate file not found: {cert_path}")) - + errors.append(create_error_msg( + local_repo_yml, "user_registry", + f"Certificate file not found: {cert_path}")) + if key_path and not os.path.exists(key_path): - errors.append(create_error_msg(local_repo_yml, "user_registry", - f"Key file not found: {key_path}")) + errors.append(create_error_msg( + local_repo_yml, "user_registry", + f"Key file not found: {key_path}")) # Validate user_repo_url entries have a 'name' field for repo_key in ("user_repo_url_x86_64", "user_repo_url_aarch64"): @@ -156,11 +160,12 @@ def validate_local_repo_config(input_file_path, data, for arch in all_archs: arch_repo_names = [] arch_list = url_list + [url+'_'+arch for url in url_list] - # define base repos dynamically for this arch if subscription registered + base_subscription_repos = [] + # define base repos dynamically for this arch if subscription registered if sub_result: base_subscription_repos = ["baseos", "appstream", "codeready-builder"] logger.info(f"Base subscription repos for {arch}: {base_subscription_repos}") - + # Collect repo names from standard repo lists # Names are kept as-is (short format); build_repo_name() is applied at runtime for repurl in arch_list: @@ -194,11 +199,11 @@ def validate_local_repo_config(input_file_path, data, raw_name = x.get('name') if raw_name: arch_repo_names.append(raw_name) - + # Add base subscription repos to the final list (they will be dynamically generated) if sub_result: arch_repo_names = arch_repo_names + base_subscription_repos - + repo_names[arch] = arch_repo_names logger.info(f"Total repos for {arch}: {repo_names[arch]}") @@ -232,7 +237,6 @@ def validate_local_repo_config(input_file_path, data, ) ) - os_ver_path = f"/{software_config_json['cluster_os_type']}/{software_config_json['cluster_os_version']}/" supported_subgroups = config.ADDITIONAL_PACKAGES_SUPPORTED_SUBGROUPS additional_packages_warnings = False @@ -244,7 +248,7 @@ def validate_local_repo_config(input_file_path, data, for arch in arch_list: # Use get_json_file_path for proper versioned JSON file resolution json_path = get_json_file_path( - sw, cluster_os_type, cluster_os_version, + sw, cluster_os_type, cluster_os_version, software_config_file_path, arch, software_version=software_version) if not json_path or not os.path.exists(json_path): @@ -254,7 +258,10 @@ def validate_local_repo_config(input_file_path, data, else: expected_file = f"{sw}.json" errors.append( - create_error_msg(sw + '/' + arch, f"{sw} JSON file not found for architecture {arch}.", expected_file)) + create_error_msg( + sw + '/' + arch, + f"{sw} JSON file not found for architecture {arch}.", + expected_file)) else: curr_json = load_json(json_path) pkg_list = curr_json[sw]['cluster'] @@ -278,7 +285,8 @@ def validate_local_repo_config(input_file_path, data, elif json_key not in user_subgroups: logger.warning( f"{sw}/{arch}: {json_path} - " - f"Subgroup '{json_key}' is present in JSON but not listed under additional_packages in software_config.json.") + f"Subgroup '{json_key}' is present in JSON but not listed" + f" under additional_packages in software_config.json.") additional_packages_warnings = True if sw in software_config_json: for sub_pkg in software_config_json[sw]: @@ -289,12 +297,11 @@ def validate_local_repo_config(input_file_path, data, if sw == "additional_packages": if sub_sw not in supported_subgroups.get(arch, []): continue - else: - logger.warning( - f"{sw}/{arch}: {json_path} - " - f"Software {sub_sw} not found in {sw}.") - additional_packages_warnings = True - continue + logger.warning( + f"{sw}/{arch}: {json_path} - " + f"Software {sub_sw} not found in {sw}.") + additional_packages_warnings = True + continue errors.append( create_error_msg(sw + '/' + arch, json_path, @@ -317,10 +324,10 @@ def validate_local_repo_config(input_file_path, data, create_error_msg(sw + '/' + arch, f"Repo name {repo_name} not found.", json_path)) - + if additional_packages_warnings: logger.info( "[INFO] Additional packages validation completed with warnings. " "Please review the log file for additional_packages configuration details.") - + return errors diff --git a/common/library/modules/functional_group_parser.py b/common/library/modules/functional_group_parser.py index 418ba79fe4..9c5cc6d107 100644 --- a/common/library/modules/functional_group_parser.py +++ b/common/library/modules/functional_group_parser.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/python from ansible.module_utils.basic import AnsibleModule import yaml @@ -37,7 +37,7 @@ def normalize_functional_groups(data): def get_functional_groups(config_path): - with open(config_path, "r") as f: + with open(config_path, "r", encoding="utf-8") as f: data = yaml.safe_load(f) return normalize_functional_groups(data) diff --git a/discovery/ansible.cfg b/discovery/ansible.cfg index a30bba0211..b7f517ca21 100644 --- a/discovery/ansible.cfg +++ b/discovery/ansible.cfg @@ -5,6 +5,7 @@ host_key_checking = false forks = 5 timeout = 180 executable = /bin/bash +interpreter_python = /usr/bin/python3 library = library:../common/library/modules module_utils = ../common/library/module_utils diff --git a/discovery/discovery.yml b/discovery/discovery.yml index 8adfa0e5d1..1dec83db67 100644 --- a/discovery/discovery.yml +++ b/discovery/discovery.yml @@ -27,7 +27,7 @@ tasks: - name: Set dynamic run tags for discovery validation ansible.builtin.set_fact: - omnia_run_tags: "{{ (ansible_run_tags | default([]) + ['discovery']) | unique }}" + omnia_run_tags: "{{ (ansible_run_tags | default([]) | list + ['discovery']) | unique }}" cacheable: true - name: Invoke validate_config.yml to perform L1 and L2 validations with discovery tag diff --git a/gitlab/ansible.cfg b/gitlab/ansible.cfg index bbc7c851d0..2e53b46c4a 100644 --- a/gitlab/ansible.cfg +++ b/gitlab/ansible.cfg @@ -5,6 +5,7 @@ host_key_checking = false forks = 5 timeout = 180 executable = /bin/bash +interpreter_python = /usr/bin/python3 library = ../common/library/modules module_utils = ../common/library/module_utils diff --git a/gitlab/cleanup_gitlab.yml b/gitlab/cleanup_gitlab.yml index ee2250201e..a622151d95 100644 --- a/gitlab/cleanup_gitlab.yml +++ b/gitlab/cleanup_gitlab.yml @@ -26,7 +26,7 @@ - name: Set dynamic run tags including 'gitlab' when: not config_file_status | default(false) | bool ansible.builtin.set_fact: - omnia_run_tags: "{{ (ansible_run_tags | default([]) + ['provision']) | unique }}" + omnia_run_tags: "{{ (ansible_run_tags | default([]) | list + ['provision']) | unique }}" cacheable: true - name: Include input project directory diff --git a/gitlab/gitlab.yml b/gitlab/gitlab.yml index 8ef3378313..3034c6e23e 100644 --- a/gitlab/gitlab.yml +++ b/gitlab/gitlab.yml @@ -22,7 +22,7 @@ - name: Set dynamic run tags including 'gitlab' when: not config_file_status | default(false) | bool ansible.builtin.set_fact: - omnia_run_tags: "{{ (ansible_run_tags | default([]) + ['gitlab'] + ['provision']) | unique }}" + omnia_run_tags: "{{ (ansible_run_tags | default([]) | list + ['gitlab'] + ['provision']) | unique }}" cacheable: true - name: Include input project directory diff --git a/input_validation/ansible.cfg b/input_validation/ansible.cfg index 183d086b85..f5adeaf305 100644 --- a/input_validation/ansible.cfg +++ b/input_validation/ansible.cfg @@ -5,6 +5,7 @@ host_key_checking = false forks = 5 timeout = 180 executable = /bin/bash +interpreter_python = /usr/bin/python3 library = ../common/library/modules module_utils = ../common/library/module_utils diff --git a/input_validation/roles/validate_input/tasks/main.yml b/input_validation/roles/validate_input/tasks/main.yml index cb852a74e9..a4e80fb6e2 100644 --- a/input_validation/roles/validate_input/tasks/main.yml +++ b/input_validation/roles/validate_input/tasks/main.yml @@ -14,7 +14,7 @@ --- - name: Initialize list of tags ansible.builtin.set_fact: - omnia_run_tags: "{{ ansible_run_tags | default([]) }}" + omnia_run_tags: "{{ ansible_run_tags | default([]) | list }}" when: omnia_run_tags is not defined - name: Set validation messages diff --git a/input_validation/roles/validate_subscription/tasks/configure_rhel_os_urls.yml b/input_validation/roles/validate_subscription/tasks/configure_rhel_os_urls.yml index ba2220204b..d4a83e6d81 100644 --- a/input_validation/roles/validate_subscription/tasks/configure_rhel_os_urls.yml +++ b/input_validation/roles/validate_subscription/tasks/configure_rhel_os_urls.yml @@ -191,9 +191,13 @@ {%- endfor -%} {{ result }} - - name: Identify non-matching aarch64 override repos + - name: Create name mapping for aarch64 dynamic repos ansible.builtin.set_fact: aarch64_dynamic_names: "{{ sub_rhel_aarch64_urls | map(attribute='name') | list }}" + when: "'aarch64' in archs" + + - name: Identify non-matching aarch64 override repos + ansible.builtin.set_fact: additional_aarch64_repos: >- {{ sub_aarch64_override_config | rejectattr('name', 'in', aarch64_dynamic_names) | list @@ -248,7 +252,7 @@ Expected: {{ required_repos | join(', ') }}. Found: {{ present_repos | join(', ') }} vars: - present_repos: "{{ vars['rhel_url_' ~ arch_item] | map(attribute='name') | list }}" + present_repos: "{{ lookup('vars', 'rhel_url_' ~ arch_item) | map(attribute='name') | list }}" when: present_repos is not superset(required_repos) loop: "{{ archs }}" loop_control: diff --git a/input_validation/validate_config.yml b/input_validation/validate_config.yml index e30491fb82..dc9dfa3913 100644 --- a/input_validation/validate_config.yml +++ b/input_validation/validate_config.yml @@ -36,7 +36,7 @@ - name: Create oim group when: - not oim_group_status | default(false) | bool - - "'local_repo' in (omnia_run_tags | default(ansible_run_tags) | default([])) or 'all' in (ansible_run_tags | default([]))" + - "'local_repo' in (omnia_run_tags | default(ansible_run_tags | default([]) | list)) or 'all' in (ansible_run_tags | default([]) | list)" ansible.builtin.import_playbook: ../utils/create_container_group.yml vars: oim_group: true @@ -51,7 +51,7 @@ - always tasks: - name: Run subscription validation tasks - when: "'local_repo' in (omnia_run_tags | default(ansible_run_tags) | default([])) or 'all' in (ansible_run_tags | default([]))" + when: "'local_repo' in (omnia_run_tags | default(ansible_run_tags | default([]) | list)) or 'all' in (ansible_run_tags | default([]) | list)" block: - name: Include metadata vars ansible.builtin.include_vars: "/opt/omnia/.data/oim_metadata.yml" diff --git a/local_repo/ansible.cfg b/local_repo/ansible.cfg index 60bb2a3005..57097c487c 100644 --- a/local_repo/ansible.cfg +++ b/local_repo/ansible.cfg @@ -5,6 +5,7 @@ host_key_checking = false forks = 5 timeout = 180 executable = /bin/bash +interpreter_python = /usr/bin/python3 library = roles/parse_and_download/library:../common/library/modules module_utils = ../common/library/module_utils diff --git a/local_repo/local_repo.yml b/local_repo/local_repo.yml index b9400ab89b..d4bb1d488d 100644 --- a/local_repo/local_repo.yml +++ b/local_repo/local_repo.yml @@ -26,7 +26,7 @@ - name: Set dynamic run tags including 'local_repo' ansible.builtin.set_fact: - omnia_run_tags: "{{ (ansible_run_tags | default([]) + ['local_repo']) | unique }}" + omnia_run_tags: "{{ (ansible_run_tags | default([]) | list + ['local_repo']) | unique }}" cacheable: true - name: Include metadata vars diff --git a/local_repo/roles/validation/tasks/validate_software_config_json.yml b/local_repo/roles/validation/tasks/validate_software_config_json.yml index 190904eb95..6e734957f1 100644 --- a/local_repo/roles/validation/tasks/validate_software_config_json.yml +++ b/local_repo/roles/validation/tasks/validate_software_config_json.yml @@ -32,16 +32,19 @@ ansible.builtin.set_fact: service_k8s_support: "{{ software_config.softwares | selectattr('name', 'equalto', 'service_k8s') | list | length > 0 }}" software_names: "{{ software_config.softwares | map(attribute='name') | select('defined') | list }}" + +- name: Build software JSON file list + ansible.builtin.set_fact: software_json_list: "{{ software_names | map('regex_replace', '$', '.json') | list }}" - name: Get k8s archs ansible.builtin.set_fact: - service_k8s_arch: "{{ (software_config.softwares | selectattr('name', 'equalto', 'service_k8s') | first).get('arch', default_archs) }}" + service_k8s_arch: "{{ (software_config.softwares | selectattr('name', 'equalto', 'service_k8s') | first).arch | default(['x86_64']) }}" when: service_k8s_support - name: Get k8s archs ansible.builtin.set_fact: - k8s_arch: "{{ (software_config.softwares | selectattr('name', 'equalto', 'k8s') | first).get('arch', default_archs) }}" + k8s_arch: "{{ (software_config.softwares | selectattr('name', 'equalto', 'k8s') | first).arch | default(['x86_64']) }}" when: k8s_support - name: Validation for version property for softwares mentioned in software_config.json @@ -50,7 +53,7 @@ ansible.builtin.assert: that: - item.name not in specific_softwares or (item.version is defined and item.version != "") - loop: "{{ software_config.softwares + software_config.amdgpu + software_config.bcm_roce | default([]) }}" + loop: "{{ software_config.softwares + (software_config.amdgpu | default([])) + (software_config.bcm_roce | default([])) }}" when: item.name is defined loop_control: loop_var: item @@ -61,7 +64,7 @@ ansible.builtin.fail: msg: "{{ item.msg }}" loop: "{{ version_result.results }}" - when: item.evaluated_to is false + when: item.msg is defined and item.msg == 'Assertion failed' rescue: - name: Versions were not defined for softwares diff --git a/log_collector/roles/log_collector/tasks/bundle.yml b/log_collector/roles/log_collector/tasks/bundle.yml index 8d2757d729..21d67574e1 100644 --- a/log_collector/roles/log_collector/tasks/bundle.yml +++ b/log_collector/roles/log_collector/tasks/bundle.yml @@ -22,7 +22,7 @@ - name: Override collection mode to curated_support when tag is active ansible.builtin.set_fact: collection_mode: "curated_support" - when: "'curated_support' in ansible_run_tags" + when: "'curated_support' in (ansible_run_tags | list)" - name: Set ISO-8601 warning timestamp for this run ansible.builtin.set_fact: diff --git a/prepare_oim/ansible.cfg b/prepare_oim/ansible.cfg index e3d0666533..84969280c3 100644 --- a/prepare_oim/ansible.cfg +++ b/prepare_oim/ansible.cfg @@ -5,6 +5,7 @@ host_key_checking = false forks = 5 timeout = 180 executable = /bin/bash +interpreter_python = /usr/bin/python3 library = ../common/library/modules module_utils = ../common/library/module_utils diff --git a/prepare_oim/prepare_oim.yml b/prepare_oim/prepare_oim.yml index 51f9dc1f0c..4a9c969499 100644 --- a/prepare_oim/prepare_oim.yml +++ b/prepare_oim/prepare_oim.yml @@ -31,7 +31,7 @@ omnia_run_tags: >- {{ ( - ansible_run_tags | default([]) + + ansible_run_tags | default([]) | list + ['prepare_oim', 'local_repo', 'discovery', 'provision'] ) | unique }} diff --git a/prepare_oim/roles/deploy_containers/auth/vars/main.yml b/prepare_oim/roles/deploy_containers/auth/vars/main.yml index 49c72cbc13..01ceb2007a 100644 --- a/prepare_oim/roles/deploy_containers/auth/vars/main.yml +++ b/prepare_oim/roles/deploy_containers/auth/vars/main.yml @@ -23,7 +23,7 @@ openldap_ports: - 636 wait_time: 10 auth_service_image_name: omnia_auth -auth_service_image_tag: "1.0" +auth_service_image_tag: "1.1" auth_service_registry: "docker.io/dellhpcomniaaisolution" auth_service_container_name: omnia_auth auth_service_image_pull_fail_msg: diff --git a/prepare_oim/roles/prepare_oim_validation/tasks/check_k8s_support.yml b/prepare_oim/roles/prepare_oim_validation/tasks/check_k8s_support.yml index b3b4c76fb9..f9c1e701bd 100644 --- a/prepare_oim/roles/prepare_oim_validation/tasks/check_k8s_support.yml +++ b/prepare_oim/roles/prepare_oim_validation/tasks/check_k8s_support.yml @@ -29,7 +29,7 @@ - name: Extract service k8s version ansible.builtin.set_fact: k8s_versions: "{{ software_config.softwares | selectattr('name', 'in', ['compute_k8s', 'service_k8s']) | map(attribute='version') | list | unique }}" # noqa: yaml[line-length] - k8s_arch: "{{ (software_config.softwares | selectattr('name', 'in', ['compute_k8s', 'service_k8s']) | first).get('arch', default_archs) }}" + k8s_arch: "{{ (software_config.softwares | selectattr('name', 'in', ['compute_k8s', 'service_k8s']) | first).arch | default(['x86_64']) }}" - name: Set k8s_support_check to false if any k8s version is not in supported_k8s_versions ansible.builtin.set_fact: diff --git a/provision/ansible.cfg b/provision/ansible.cfg index 3d1500dd3b..b65edbf7ab 100644 --- a/provision/ansible.cfg +++ b/provision/ansible.cfg @@ -5,6 +5,7 @@ host_key_checking = false forks = 5 timeout = 180 executable = /bin/bash +interpreter_python = /usr/bin/python3 library = library:../common/library/modules module_utils = ../common/library/module_utils diff --git a/provision/provision.yml b/provision/provision.yml index 5aac96f3dc..e476c50835 100644 --- a/provision/provision.yml +++ b/provision/provision.yml @@ -62,7 +62,7 @@ omnia_run_tags: >- {{ ( - ansible_run_tags | default([]) + + ansible_run_tags | default([]) | list + ['provision', 'slurm', 'slurm_custom', 'security', 'csi_driver_powerscale', 'ldms', 'telemetry'] + ( ['service_k8s'] diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_aarch64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_aarch64.yaml.j2 index 548129dc17..4580f03ca3 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_aarch64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_aarch64.yaml.j2 @@ -187,7 +187,7 @@ permissions: '{{ file_mode_755 }}' content: | {{ lookup('template', 'templates/hpc_tools/configure_nvhpc_env.sh.j2') | indent(12) }} -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - path: /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh permissions: '{{ file_mode_755 }}' content: | @@ -208,7 +208,7 @@ - bash /usr/local/bin/doca-install.sh || true - bash /usr/local/bin/configure-ib-network.sh {# Mount-specific runcmd entries - moved after DOCA to ensure RDMA is available #} -{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined %} +{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined and cloud_init_groups_dict[functional_group_name].runcmd is not none %} {% for cmd in cloud_init_groups_dict[functional_group_name].runcmd %} - {{ cmd }} {% endfor %} @@ -233,7 +233,7 @@ {% set fg_swap = cloud_init_groups_dict.get(functional_group_name, {}).get('swap', {}) %} {% include 'configure_swap.yaml.j2' %} - mount -av -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - bash /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh {% endfor %} # VAST storage: create subdirectory structure for login+compiler node diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_x86_64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_x86_64.yaml.j2 index cb3e7940af..f65e284b3f 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_x86_64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_x86_64.yaml.j2 @@ -186,7 +186,7 @@ permissions: '{{ file_mode_755 }}' content: | {{ lookup('template', 'templates/hpc_tools/configure_nvhpc_env.sh.j2') | indent(12) }} -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - path: /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh permissions: '{{ file_mode_755 }}' content: | @@ -207,7 +207,7 @@ - bash /usr/local/bin/doca-install.sh || true - bash /usr/local/bin/configure-ib-network.sh {# Mount-specific runcmd entries - moved after DOCA to ensure RDMA is available #} -{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined %} +{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined and cloud_init_groups_dict[functional_group_name].runcmd is not none %} {% for cmd in cloud_init_groups_dict[functional_group_name].runcmd %} - {{ cmd }} {% endfor %} @@ -232,7 +232,7 @@ {% set fg_swap = cloud_init_groups_dict.get(functional_group_name, {}).get('swap', {}) %} {% include 'configure_swap.yaml.j2' %} - mount -av -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - bash /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh {% endfor %} # VAST storage: create subdirectory structure for login+compiler node diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_node_aarch64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_node_aarch64.yaml.j2 index df8389b3e1..5410ac2d5a 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_node_aarch64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_node_aarch64.yaml.j2 @@ -145,7 +145,7 @@ permissions: '0644' content: | {{ lookup('template', 'templates/nodes/apptainer_mirror.conf.j2') | indent(12) }} -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - path: /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh permissions: '{{ file_mode_755 }}' content: | @@ -166,7 +166,7 @@ - bash /usr/local/bin/doca-install.sh || true - bash /usr/local/bin/configure-ib-network.sh {# Mount-specific runcmd entries - moved after DOCA to ensure RDMA is available #} -{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined %} +{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined and cloud_init_groups_dict[functional_group_name].runcmd is not none %} {% for cmd in cloud_init_groups_dict[functional_group_name].runcmd %} - {{ cmd }} {% endfor %} @@ -191,7 +191,7 @@ {% set fg_swap = cloud_init_groups_dict.get(functional_group_name, {}).get('swap', {}) %} {% include 'configure_swap.yaml.j2' %} - mount -av -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh {% endfor %} # VAST storage: create subdirectory structure for login node diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_node_x86_64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_node_x86_64.yaml.j2 index af74a044c9..93cac94165 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_node_x86_64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_node_x86_64.yaml.j2 @@ -144,7 +144,7 @@ permissions: '0644' content: | {{ lookup('template', 'templates/nodes/apptainer_mirror.conf.j2') | indent(12) }} -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - path: /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh permissions: '{{ file_mode_755 }}' content: | @@ -165,7 +165,7 @@ - bash /usr/local/bin/doca-install.sh || true - bash /usr/local/bin/configure-ib-network.sh {# Mount-specific runcmd entries - moved after DOCA to ensure RDMA is available #} -{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined %} +{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined and cloud_init_groups_dict[functional_group_name].runcmd is not none %} {% for cmd in cloud_init_groups_dict[functional_group_name].runcmd %} - {{ cmd }} {% endfor %} @@ -190,7 +190,7 @@ {% set fg_swap = cloud_init_groups_dict.get(functional_group_name, {}).get('swap', {}) %} {% include 'configure_swap.yaml.j2' %} - mount -av -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - bash /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh {% endfor %} # VAST storage: create subdirectory structure for login node diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_first_x86_64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_first_x86_64.yaml.j2 index 8bbaa7ca74..d86341fbd2 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_first_x86_64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_first_x86_64.yaml.j2 @@ -178,8 +178,8 @@ location = "gcr.io" [[registry.mirror]] location = "{{ pulp_mirror }}" -{% if user_registry | default([]) | length > 0 %} -{% for registry in user_registry %} +{% if user_registry | default([], true) | length > 0 %} +{% for registry in user_registry | default([], true) %} [[registry]] prefix = "{{ registry.host }}" @@ -348,7 +348,7 @@ - "chronyc sources" - "chronyc -a makestep" {# Mount-specific runcmd entries #} -{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined %} +{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined and cloud_init_groups_dict[functional_group_name].runcmd is not none %} {% for cmd in cloud_init_groups_dict[functional_group_name].runcmd %} - {{ cmd }} {% endfor %} @@ -370,7 +370,7 @@ {% endif %} {% endif %} {% endraw %} -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - bash /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh {% endfor %} # K8s NFS mount entries @@ -445,7 +445,7 @@ [ -n "$search_line" ] && echo "$search_line" > "$tmpfile" # Add your new nameserver entries - {% for ns in dns %} + {% for ns in dns | default([], true) %} echo "nameserver {{ ns }}" >> "$tmpfile" {% endfor %} @@ -480,7 +480,7 @@ echo "Installing Necessary Python pip packages" python3 -m ensurepip - PACKAGES=({% for pkg in k8s_pip_packages %}"{{ pkg }}"{% if not loop.last %} {% endif %}{% endfor %}) + PACKAGES=({% for pkg in k8s_pip_packages | default([], true) %}"{{ pkg }}"{% if not loop.last %} {% endif %}{% endfor %}) for pkg in "${PACKAGES[@]}"; do echo "Installing $pkg from offline repo..." @@ -630,7 +630,7 @@ kubectl -n kube-system get configmap coredns -o yaml > "$cfg" # Patch: append nameservers after /etc/resolv.conf using Jinja list "dns" - sed -i 's|/etc/resolv.conf|/etc/resolv.conf{% for ns in dns %} {{ ns }}{% endfor %}|' "$cfg" + sed -i 's|/etc/resolv.conf|/etc/resolv.conf{% for ns in dns | default([], true) %} {{ ns }}{% endfor %}|' "$cfg" {% if dns_enabled | default(false) | bool %} # Forward cluster-internal DNS domain to OIM CoreDNS diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_x86_64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_x86_64.yaml.j2 index f381046222..37ef9f69d2 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_x86_64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_x86_64.yaml.j2 @@ -149,8 +149,8 @@ location = "gcr.io" [[registry.mirror]] location = "{{ pulp_mirror }}" -{% if user_registry | default([]) | length > 0 %} -{% for registry in user_registry %} +{% if user_registry | default([], true) | length > 0 %} +{% for registry in user_registry | default([], true) %} [[registry]] prefix = "{{ registry.host }}" @@ -248,7 +248,7 @@ # Optional: Set up bash completion /usr/local/bin/helm completion bash > /etc/bash_completion.d/helm.sh chmod 0755 /etc/bash_completion.d/helm.sh -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - path: /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh permissions: '{{ file_mode_755 }}' content: | @@ -262,7 +262,7 @@ - "chronyc sources" - "chronyc -a makestep" {# Mount-specific runcmd entries #} -{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined %} +{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined and cloud_init_groups_dict[functional_group_name].runcmd is not none %} {% for cmd in cloud_init_groups_dict[functional_group_name].runcmd %} - {{ cmd }} {% endfor %} @@ -284,7 +284,7 @@ {% endif %} {% endif %} {% endraw %} -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - bash /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh {% endfor %} # K8s NFS mount entries @@ -360,7 +360,7 @@ [ -n "$search_line" ] && echo "$search_line" > "$tmpfile" # Add your new nameserver entries - {% for ns in dns %} + {% for ns in dns | default([], true) %} echo "nameserver {{ ns }}" >> "$tmpfile" {% endfor %} @@ -394,7 +394,7 @@ echo "Installing Necessary Python pip packages" python3 -m ensurepip - PACKAGES=({% for pkg in k8s_pip_packages %}"{{ pkg }}"{% if not loop.last %} {% endif %}{% endfor %}) + PACKAGES=({% for pkg in k8s_pip_packages | default([], true) %}"{{ pkg }}"{% if not loop.last %} {% endif %}{% endfor %}) for pkg in "${PACKAGES[@]}"; do echo "Installing $pkg from offline repo..." diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_node_x86_64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_node_x86_64.yaml.j2 index 1683736916..843be05a37 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_node_x86_64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_node_x86_64.yaml.j2 @@ -137,8 +137,8 @@ location = "gcr.io" [[registry.mirror]] location = "{{ pulp_mirror }}" -{% if user_registry | default([]) | length > 0 %} -{% for registry in user_registry %} +{% if user_registry | default([], true) | length > 0 %} +{% for registry in user_registry | default([], true) %} [[registry]] prefix = "{{ registry.host }}" @@ -147,7 +147,7 @@ location = "{{ pulp_mirror }}" {% endfor %} {% endif %} -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - path: /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh permissions: '{{ file_mode_755 }}' content: | @@ -161,7 +161,7 @@ - "chronyc sources" - "chronyc -a makestep" {# Mount-specific runcmd entries #} -{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined %} +{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined and cloud_init_groups_dict[functional_group_name].runcmd is not none %} {% for cmd in cloud_init_groups_dict[functional_group_name].runcmd %} - {{ cmd }} {% endfor %} @@ -183,7 +183,7 @@ {% endif %} {% endif %} {% endraw %} -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - bash /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh {% endfor %} # K8s NFS mount entries @@ -246,7 +246,7 @@ [ -n "$search_line" ] && echo "$search_line" > "$tmpfile" # Add your new nameserver entries - {% for ns in dns %} + {% for ns in dns | default([], true) %} echo "nameserver {{ ns }}" >> "$tmpfile" {% endfor %} diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_control_node_x86_64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_control_node_x86_64.yaml.j2 index 3face481e1..c979d0795d 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_control_node_x86_64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_control_node_x86_64.yaml.j2 @@ -77,7 +77,7 @@ IdentityFile {{ client_mount_path }}/slurm/ssh/oim_rsa IdentitiesOnly yes {% if cloud_init_groups_dict[functional_group_name].powervault_scripts is defined %} -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - path: /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh permissions: '{{ file_mode_755 }}' content: | @@ -297,7 +297,7 @@ - bash /usr/local/bin/doca-install.sh || true - bash /usr/local/bin/configure-ib-network.sh {# Mount-specific runcmd entries - moved after DOCA to ensure RDMA is available #} -{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined %} +{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined and cloud_init_groups_dict[functional_group_name].runcmd is not none %} {% for cmd in cloud_init_groups_dict[functional_group_name].runcmd %} - {{ cmd }} {% endfor %} @@ -322,7 +322,7 @@ {% set fg_swap = cloud_init_groups_dict.get(functional_group_name, {}).get('swap', {}) %} {% include 'configure_swap.yaml.j2' %} - mount -av -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - bash /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh {% endfor %} # Ensure Slurm NFS root is mounted at client_mount_path (e.g. /share_omnia) diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 index cf689051ff..a150b6af9f 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 @@ -397,7 +397,7 @@ permissions: '{{ file_mode_755 }}' content: | {{ lookup('template', 'templates/hpc_tools/export_nvhpc_env.sh.j2') | indent(12) }} -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - path: /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh permissions: '{{ file_mode_755 }}' content: | @@ -415,7 +415,7 @@ - bash /usr/local/bin/doca-install.sh || true - bash /usr/local/bin/configure-ib-network.sh {# Mount-specific runcmd entries - moved after DOCA to ensure RDMA is available #} -{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined %} +{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined and cloud_init_groups_dict[functional_group_name].runcmd is not none %} {% for cmd in cloud_init_groups_dict[functional_group_name].runcmd %} - {{ cmd }} {% endfor %} @@ -440,7 +440,7 @@ {% set fg_swap = cloud_init_groups_dict.get(functional_group_name, {}).get('swap', {}) %} {% include 'configure_swap.yaml.j2' %} - mount -av -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - bash /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh {% endfor %} # VAST storage: create subdirectory structure for compute node diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_x86_64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_x86_64.yaml.j2 index 317b67272c..b9ac1db5ec 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_x86_64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_x86_64.yaml.j2 @@ -402,7 +402,7 @@ content: | {{ lookup('template', 'templates/hpc_tools/export_nvhpc_env.sh.j2') | indent(12) }} -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - path: /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh permissions: '{{ file_mode_755 }}' content: | @@ -421,7 +421,7 @@ - bash /usr/local/bin/doca-install.sh || echo "DOCA install failed (non-critical)" - bash /usr/local/bin/configure-ib-network.sh || echo "IB network configuration failed (non-critical)" {# Mount-specific runcmd entries - moved after DOCA to ensure RDMA is available #} -{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined %} +{%- if cloud_init_groups_dict[functional_group_name].runcmd is defined and cloud_init_groups_dict[functional_group_name].runcmd is not none %} {% for cmd in cloud_init_groups_dict[functional_group_name].runcmd %} - {{ cmd }} {% endfor %} @@ -446,7 +446,7 @@ {% set fg_swap = cloud_init_groups_dict.get(functional_group_name, {}).get('swap', {}) %} {% include 'configure_swap.yaml.j2' %} - mount -av -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - bash /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh {% endfor %} # VAST storage: create subdirectory structure for compute node diff --git a/provision/roles/provision_validations/tasks/validate_telemetry_config.yml b/provision/roles/provision_validations/tasks/validate_telemetry_config.yml index 9deada8961..a8e7a4d09d 100644 --- a/provision/roles/provision_validations/tasks/validate_telemetry_config.yml +++ b/provision/roles/provision_validations/tasks/validate_telemetry_config.yml @@ -28,7 +28,7 @@ - name: Get k8s cluster details ansible.builtin.set_fact: service_cluster_info: >- - {{ vars[k8s_cluster_name] + {{ lookup('vars', k8s_cluster_name) | selectattr('deployment', 'equalto', true) | list | first }} diff --git a/provision/roles/slurm_config/tasks/build_slurm_conf.yml b/provision/roles/slurm_config/tasks/build_slurm_conf.yml index d930f64418..beeb14f4c8 100644 --- a/provision/roles/slurm_config/tasks/build_slurm_conf.yml +++ b/provision/roles/slurm_config/tasks/build_slurm_conf.yml @@ -17,7 +17,7 @@ apply_config: "{{ apply_config | default({}) | combine({'slurm': (apply_config['slurm'] | combine({'NodeName': (apply_config['slurm'].NodeName | default([])) + (node_params | default([]))}))}) }}" - when: node_params is defined and node_params + when: node_params is defined and (node_params | length > 0) no_log: "{{ _no_log }}" - name: Append login nodes to NodeName list @@ -26,7 +26,7 @@ | combine({'slurm': (apply_config['slurm'] | combine({'NodeName': (apply_config['slurm'].NodeName | default([])) + [{'NodeName': item}]}))}) }}" loop: "{{ login_list }}" - when: login_list is defined and login_list + when: login_list is defined and (login_list | length > 0) no_log: "{{ _no_log }}" - name: Append compiler login nodes to NodeName list @@ -35,7 +35,7 @@ | combine({'slurm': (apply_config['slurm'] | combine({'NodeName': (apply_config['slurm'].NodeName | default([])) + [{'NodeName': item}]}))}) }}" loop: "{{ compiler_login_list }}" - when: compiler_login_list is defined and compiler_login_list + when: compiler_login_list is defined and (compiler_login_list | length > 0) no_log: "{{ _no_log }}" - name: Append Partition @@ -43,11 +43,11 @@ apply_config: "{{ apply_config | default({}) | combine({'slurm': (apply_config['slurm'] | combine({'PartitionName': (apply_config['slurm'].PartitionName | default([])) + [partition_params]}))}) }}" - when: node_params is defined and node_params + when: node_params is defined and (node_params | length > 0) no_log: "{{ _no_log }}" - name: Add dbd parameters to slurm conf ansible.builtin.set_fact: apply_config: "{{ apply_config | default({}) | combine({'slurm': (apply_config['slurm'] | combine(dbd_slurm_conf))}) }}" - when: dbd_list is defined and dbd_list + when: dbd_list is defined and (dbd_list | length > 0) no_log: "{{ _no_log }}" diff --git a/provision/roles/slurm_config/tasks/check_ctld_running.yml b/provision/roles/slurm_config/tasks/check_ctld_running.yml index a8bacada0f..a6380573ad 100644 --- a/provision/roles/slurm_config/tasks/check_ctld_running.yml +++ b/provision/roles/slurm_config/tasks/check_ctld_running.yml @@ -81,7 +81,7 @@ register: scontrol_reconfig delegate_to: "{{ ctld }}" when: - - ctld_state[ctld] is true + - ctld_state[ctld] | bool - name: Undrain if any nodes are drain ansible.builtin.command: @@ -94,7 +94,7 @@ register: scontrol_node_resume delegate_to: "{{ ctld }}" when: - - ctld_state[ctld] is true + - ctld_state[ctld] | bool rescue: - name: Fail if slurmctld is not running on any host diff --git a/provision/roles/slurm_config/tasks/confs.yml b/provision/roles/slurm_config/tasks/confs.yml index 2378682b7d..7b188c3fe3 100644 --- a/provision/roles/slurm_config/tasks/confs.yml +++ b/provision/roles/slurm_config/tasks/confs.yml @@ -148,13 +148,13 @@ apply_config: "{{ apply_config | default({}) | combine({'slurmdbd': (apply_config['slurmdbd'] | combine({'DbdHost': ctld_list[0], 'StorageHost': ctld_list[0]}))}) }}" - when: ctld_list + when: ctld_list | length > 0 no_log: "{{ _no_log }}" - name: Check .conf files existence ansible.builtin.stat: path: "{{ slurm_config_path }}/{{ item.0 }}/etc/slurm/{{ item.1 }}.conf" - when: ctld_list + when: ctld_list | length > 0 loop: "{{ ctld_list | product(conf_files | default([])) }}" register: ctld_conf_files @@ -169,7 +169,7 @@ no_log: "{{ _no_log }}" when: - configs_input is defined - - configs_input + - configs_input | length > 0 - item.value is string - item.key in conf_files @@ -189,7 +189,7 @@ no_log: "{{ _no_log }}" when: - configs_input is defined - - configs_input + - configs_input | length > 0 - item.value is mapping - name: Create lists for conf_merge @@ -367,7 +367,7 @@ register: ctld_conf_files no_log: "{{ _no_log }}" when: - - item.ini_lines + - item.ini_lines | length > 0 - name: Add extra confs which are not handled ansible.builtin.include_tasks: handle_extra_confs.yml @@ -384,7 +384,7 @@ - name: Check if cluster running ansible.builtin.include_tasks: check_ctld_running.yml when: - - ctld_list + - ctld_list | length > 0 - ctld_conf_files is changed loop: "{{ ctld_list }}" loop_control: diff --git a/provision/roles/slurm_config/tasks/create_slurm_dir.yml b/provision/roles/slurm_config/tasks/create_slurm_dir.yml index 7ae0ae88af..0864bd2b6a 100644 --- a/provision/roles/slurm_config/tasks/create_slurm_dir.yml +++ b/provision/roles/slurm_config/tasks/create_slurm_dir.yml @@ -85,11 +85,11 @@ ansible.builtin.file: path: "{{ slurm_config_path }}/{{ slurm_item }}" state: absent - loop: "{{ (ctld_list | default([]) - + cmpt_list | default([]) - + login_list | default([]) - + compiler_login_list | default([]) - + dbd_list | default([]) + loop: "{{ ((ctld_list | default([])) + + (cmpt_list | default([])) + + (login_list | default([])) + + (compiler_login_list | default([])) + + (dbd_list | default([])) + ['munge.key']) | flatten }}" loop_control: loop_var: slurm_item @@ -180,7 +180,7 @@ owner: "{{ root_user }}" group: "{{ root_group }}" mode: "{{ conf_file_mode }}" - when: ctld_list + when: ctld_list | length > 0 loop: "{{ ctld_list }}" - name: Generate slurmd opts for Configless @@ -194,7 +194,7 @@ owner: "{{ root_user }}" group: "{{ root_group }}" mode: "{{ common_mode }}" - when: cmpt_list + when: cmpt_list | length > 0 loop: "{{ cmpt_list | product(['logout_user.sh']) }}" - name: Get the slurm NFS path diff --git a/provision/roles/slurm_config/tasks/main.yml b/provision/roles/slurm_config/tasks/main.yml index f300c5da5d..2540c6f95a 100644 --- a/provision/roles/slurm_config/tasks/main.yml +++ b/provision/roles/slurm_config/tasks/main.yml @@ -37,4 +37,4 @@ ansible.builtin.include_tasks: create_slurm_dir.yml when: - slurm_support - - ctld_list or (cmpt_list or login_list or compiler_login_list) + - (ctld_list | length > 0) or (cmpt_list | length > 0) or (login_list | length > 0) or (compiler_login_list | length > 0) diff --git a/rollback/ansible.cfg b/rollback/ansible.cfg index 1e6688a5cf..c7116f280a 100644 --- a/rollback/ansible.cfg +++ b/rollback/ansible.cfg @@ -5,6 +5,7 @@ host_key_checking = false forks = 5 timeout = 180 executable = /bin/bash +interpreter_python = /usr/bin/python3 roles_path = roles:../upgrade/roles:../utils/roles library = ../common/library/modules module_utils = ../common/library/module_utils diff --git a/rollback/rollback.yml b/rollback/rollback.yml index 6530fd21ec..ab409f1719 100644 --- a/rollback/rollback.yml +++ b/rollback/rollback.yml @@ -181,8 +181,8 @@ ansible.builtin.set_fact: requested_tags: >- {{ all_rollback_components - if (ansible_run_tags is not defined or 'all' in ansible_run_tags) - else ansible_run_tags }} + if (ansible_run_tags is not defined or 'all' in (ansible_run_tags | list)) + else ansible_run_tags | list }} # ─── Initialize rollback_manifest.yml using oim_metadata as source-of-truth ─── - name: Initialize rollback_manifest.yml (first invocation) diff --git a/telemetry/ansible.cfg b/telemetry/ansible.cfg index ef1156baf9..4b6f270146 100644 --- a/telemetry/ansible.cfg +++ b/telemetry/ansible.cfg @@ -5,6 +5,7 @@ host_key_checking = false forks = 5 timeout = 180 executable = /bin/bash +interpreter_python = /usr/bin/python3 library = ../common/library/modules module_utils = ../common/library/module_utils diff --git a/telemetry/roles/telemetry_disable/tasks/main.yml b/telemetry/roles/telemetry_disable/tasks/main.yml index ef0122c36f..79bbc62bb4 100644 --- a/telemetry/roles/telemetry_disable/tasks/main.yml +++ b/telemetry/roles/telemetry_disable/tasks/main.yml @@ -19,7 +19,7 @@ - name: Fail if no tags provided ansible.builtin.fail: msg: "{{ tags_required_msg }}" - when: ansible_run_tags | default(['all']) | length == 1 and 'all' in ansible_run_tags | default(['all']) + when: ansible_run_tags | default(['all']) | list | length == 1 and 'all' in (ansible_run_tags | default(['all']) | list) - name: Load telemetry configuration ansible.builtin.include_vars: diff --git a/telemetry/roles/telemetry_enable/tasks/main.yml b/telemetry/roles/telemetry_enable/tasks/main.yml index 2897085dee..0914a08bc2 100644 --- a/telemetry/roles/telemetry_enable/tasks/main.yml +++ b/telemetry/roles/telemetry_enable/tasks/main.yml @@ -19,7 +19,7 @@ - name: Fail if no tags provided ansible.builtin.fail: msg: "{{ tags_required_msg }}" - when: ansible_run_tags | default(['all']) | length == 1 and 'all' in ansible_run_tags | default(['all']) + when: ansible_run_tags | default(['all']) | list | length == 1 and 'all' in (ansible_run_tags | default(['all']) | list) - name: Load telemetry configuration ansible.builtin.include_vars: diff --git a/telemetry/telemetry.yml b/telemetry/telemetry.yml index d535c475ce..1aeb03a2fe 100644 --- a/telemetry/telemetry.yml +++ b/telemetry/telemetry.yml @@ -19,7 +19,7 @@ tasks: - name: Set dynamic run tags including 'telemetry' ansible.builtin.set_fact: - omnia_run_tags: "{{ (ansible_run_tags | default([]) + ['telemetry']) | unique }}" + omnia_run_tags: "{{ (ansible_run_tags | default([]) | list + ['telemetry']) | unique }}" cacheable: true - name: Invoke validate_config.yml to perform L1 and L2 validations diff --git a/upgrade/ansible.cfg b/upgrade/ansible.cfg index 5a6d4bcf28..ff4775869e 100644 --- a/upgrade/ansible.cfg +++ b/upgrade/ansible.cfg @@ -5,6 +5,7 @@ host_key_checking = false forks = 5 timeout = 180 executable = /bin/bash +interpreter_python = /usr/bin/python3 roles_path = roles:../utils/roles:../prepare_oim/roles library = ../common/library/modules module_utils = ../common/library/module_utils diff --git a/upgrade/roles/import_input_parameters/scripts/transform_software_config.py b/upgrade/roles/import_input_parameters/scripts/transform_software_config.py index 4073e89f3a..9a314e7614 100644 --- a/upgrade/roles/import_input_parameters/scripts/transform_software_config.py +++ b/upgrade/roles/import_input_parameters/scripts/transform_software_config.py @@ -26,7 +26,7 @@ "csi_driver_powerscale": "v2.16.0" } -with open(backup_file, 'r') as f: +with open(backup_file, 'r', encoding='utf-8') as f: backup = json.load(f) # Start with a copy of the backup (preserves user's configuration exactly) @@ -50,13 +50,16 @@ # If additional_packages exists as a TOP-LEVEL key in backup, append "os" if not present # This is the array like: "additional_packages": [{"name": "..."}, ...] if 'additional_packages' in result and isinstance(result['additional_packages'], list): - existing_names = {item.get('name') for item in result['additional_packages'] if isinstance(item, dict) and 'name' in item} + existing_names = { + item.get('name') for item in result['additional_packages'] + if isinstance(item, dict) and 'name' in item + } if 'os' not in existing_names: result['additional_packages'].append({"name": "os"}) print("Added {'name': 'os'} to additional_packages array", file=sys.stderr) # Write the result with compact formatting (no extra whitespace in arrays) -with open(target_file, 'w') as f: +with open(target_file, 'w', encoding='utf-8') as f: json.dump(result, f, indent=4, separators=(',', ': ')) f.write('\n') diff --git a/upgrade/upgrade.yml b/upgrade/upgrade.yml index d0f0d40041..9cc2852a5a 100644 --- a/upgrade/upgrade.yml +++ b/upgrade/upgrade.yml @@ -245,8 +245,8 @@ ansible.builtin.set_fact: requested_tags: >- {{ all_components - if (ansible_run_tags is not defined or 'all' in ansible_run_tags) - else ansible_run_tags }} + if (ansible_run_tags is not defined or 'all' in (ansible_run_tags | list)) + else ansible_run_tags | list }} - name: Validate tag dependency order ansible.builtin.fail: @@ -312,7 +312,7 @@ omnia_run_tags: >- {{ ( - ansible_run_tags | default([]) + + ansible_run_tags | default([]) | list + ['build_stream', 'gitlab'] ) | unique }} @@ -532,7 +532,7 @@ omnia_run_tags: >- {{ ( - ansible_run_tags | default([]) + + ansible_run_tags | default([]) | list + ['prepare_oim', 'local_repo', 'ufm_telemetry', 'vast_telemetry', 'ldms', 'idrac_telemetry'] ) | unique }} @@ -631,8 +631,8 @@ ansible.builtin.set_fact: finalize_requested_tags: >- {{ all_components - if (ansible_run_tags is not defined or 'all' in ansible_run_tags) - else ansible_run_tags | intersect(all_components) }} + if (ansible_run_tags is not defined or 'all' in (ansible_run_tags | list)) + else ansible_run_tags | list | intersect(all_components) }} - name: Build cleaned component_status (only update requested tags) ansible.builtin.set_fact: diff --git a/utils/ansible.cfg b/utils/ansible.cfg index c2472ef518..2aec0b37f9 100644 --- a/utils/ansible.cfg +++ b/utils/ansible.cfg @@ -5,6 +5,7 @@ host_key_checking = false forks = 5 timeout = 180 executable = /bin/bash +interpreter_python = /usr/bin/python3 library = ../common/library/modules module_utils = ../common/library/module_utils diff --git a/utils/credential_utility/ansible.cfg b/utils/credential_utility/ansible.cfg index 3b339cbaf5..505802ff42 100644 --- a/utils/credential_utility/ansible.cfg +++ b/utils/credential_utility/ansible.cfg @@ -5,6 +5,7 @@ host_key_checking = false forks = 5 timeout = 180 executable = /bin/bash +interpreter_python = /usr/bin/python3 library = ../../common/library/modules module_utils = ../../common/library/module_utils diff --git a/utils/credential_utility/roles/update_config/tasks/credential_status.yml b/utils/credential_utility/roles/update_config/tasks/credential_status.yml index af2568f4df..d56e6b761e 100644 --- a/utils/credential_utility/roles/update_config/tasks/credential_status.yml +++ b/utils/credential_utility/roles/update_config/tasks/credential_status.yml @@ -34,18 +34,18 @@ ( ((field.file is not defined or field.file != credential_files[1].file_path) and - (vars[field.username] is not defined or - vars[field.username] == "" or - (vars[field.username] | length == 0)) and + (lookup('vars', field.username, default='') is not defined or + lookup('vars', field.username, default='') == "" or + (lookup('vars', field.username, default='') | length == 0)) and (mandatory_credentials_status or conditional_mandatory_credentials_status or optional_credentials_status)) or ((field.file is defined and field.file == credential_files[1].file_path) and - (vars['build_stream_auth_username'] is not defined or - vars['build_stream_auth_username'] == "" or - (vars['build_stream_auth_username'] | length == 0))) + (lookup('vars', 'build_stream_auth_username', default='') is not defined or + lookup('vars', 'build_stream_auth_username', default='') == "" or + (lookup('vars', 'build_stream_auth_username', default='') | length == 0))) ) }} @@ -62,9 +62,9 @@ ( ((field.file is not defined or field.file != credential_files[1].file_path) and - (vars[field.password] is not defined or - vars[field.password] == "" or - (vars[field.password] | length == 0)) and + (lookup('vars', field.password, default='') is not defined or + lookup('vars', field.password, default='') == "" or + (lookup('vars', field.password, default='') | length == 0)) and ( (mandatory_credentials_status | default(false) | bool or conditional_mandatory_credentials_status | @@ -72,15 +72,15 @@ or (optional_credentials_status | default(false) | bool and field.username is defined and - ((vars[field.username] is defined and - vars[field.username] != "") or + ((lookup('vars', field.username, default='') is defined and + lookup('vars', field.username, default='') != "") or (username_status | default(false) | bool))))) or ((field.file is defined and field.file == credential_files[1].file_path) and - (vars['build_stream_auth_password_hash'] is not defined or - vars['build_stream_auth_password_hash'] == "" or - (vars['build_stream_auth_password_hash'] | length == 0))) + (lookup('vars', 'build_stream_auth_password_hash', default='') is not defined or + lookup('vars', 'build_stream_auth_password_hash', default='') == "" or + (lookup('vars', 'build_stream_auth_password_hash', default='') | length == 0))) ) }} @@ -94,8 +94,8 @@ - field.username is defined - optional_credentials_status | default(false) | bool - username_status | default(false) | bool - - vars[field.username] is not defined or - vars[field.username] == "" + - lookup('vars', field.username, default='') is not defined or + lookup('vars', field.username, default='') == "" # Reset credential status after processing - name: Reset credentials status diff --git a/utils/credential_utility/roles/update_config/tasks/update_bs_credential_file.yml b/utils/credential_utility/roles/update_config/tasks/update_bs_credential_file.yml index b01cdfb9fd..898f7189a3 100644 --- a/utils/credential_utility/roles/update_config/tasks/update_bs_credential_file.yml +++ b/utils/credential_utility/roles/update_config/tasks/update_bs_credential_file.yml @@ -27,7 +27,7 @@ - name: Use existing username when username not updated ansible.builtin.set_fact: - build_stream_auth_username: "{{ vars['build_stream_auth_username'] | default('') }}" + build_stream_auth_username: "{{ lookup('vars', 'build_stream_auth_username', default='') }}" no_log: true when: not username_status | default(false) @@ -43,7 +43,7 @@ - name: Use existing password when password not updated ansible.builtin.set_fact: - build_stream_auth_password: "{{ vars['build_stream_auth_password'] | default('') }}" + build_stream_auth_password: "{{ lookup('vars', 'build_stream_auth_password', default='') }}" no_log: true when: not password_status | default(false) @@ -76,7 +76,7 @@ - name: Use existing password hash when password not updated ansible.builtin.set_fact: - build_stream_auth_password_hash: "{{ vars['build_stream_auth_password_hash'] | default('') }}" + build_stream_auth_password_hash: "{{ lookup('vars', 'build_stream_auth_password_hash', default='') }}" no_log: true when: not password_status | default(false) or password_hash is not defined or password_hash is not succeeded diff --git a/utils/credential_utility/roles/validation/tasks/main.yml b/utils/credential_utility/roles/validation/tasks/main.yml index 63929bcaf3..223126008d 100644 --- a/utils/credential_utility/roles/validation/tasks/main.yml +++ b/utils/credential_utility/roles/validation/tasks/main.yml @@ -15,7 +15,7 @@ - name: Initialize list of tags ansible.builtin.set_fact: - omnia_run_tags: "{{ ansible_run_tags | default([]) }}" + omnia_run_tags: "{{ ansible_run_tags | default([]) | list }}" when: omnia_run_tags is not defined - name: Load build_stream_config.yml to check if enabled diff --git a/utils/credential_utility/roles/validation/tasks/pre_requisite.yml b/utils/credential_utility/roles/validation/tasks/pre_requisite.yml index 572dd5f79a..14804973b3 100644 --- a/utils/credential_utility/roles/validation/tasks/pre_requisite.yml +++ b/utils/credential_utility/roles/validation/tasks/pre_requisite.yml @@ -44,7 +44,7 @@ - name: Set run tags for telemetry ansible.builtin.set_fact: - omnia_run_tags: "{{ (omnia_run_tags | default([])) + (result.telemetry_status_list | default([])) | unique }}" + omnia_run_tags: "{{ ((omnia_run_tags | default([])) + (result.telemetry_status_list | default([]))) | unique }}" when: - not result.skipped | default(false) - result.telemetry_status_list | length > 0 diff --git a/utils/set_pxe_boot.yml b/utils/set_pxe_boot.yml index fba614a5da..bad598b0e7 100644 --- a/utils/set_pxe_boot.yml +++ b/utils/set_pxe_boot.yml @@ -39,7 +39,7 @@ - name: Set dynamic run tags including 'provision' when: not config_file_status | default(false) | bool ansible.builtin.set_fact: - omnia_run_tags: "{{ (ansible_run_tags | default([]) + ['provision']) | unique }}" + omnia_run_tags: "{{ (ansible_run_tags | default([]) | list + ['provision']) | unique }}" cacheable: true - name: Invoke get_config_credentials.yml From 923c43b03324236b63ba3806f51e8255fc34f3d0 Mon Sep 17 00:00:00 2001 From: venu <236371043+Venu-p1@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:44:52 +0530 Subject: [PATCH 02/78] Add catalog generator documentation. Signed-off-by: venu <236371043+Venu-p1@users.noreply.github.com> --- .../core/catalog/CATALOG_GENERATOR.md | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 build_stream/core/catalog/CATALOG_GENERATOR.md diff --git a/build_stream/core/catalog/CATALOG_GENERATOR.md b/build_stream/core/catalog/CATALOG_GENERATOR.md new file mode 100644 index 0000000000..22ef3613a1 --- /dev/null +++ b/build_stream/core/catalog/CATALOG_GENERATOR.md @@ -0,0 +1,106 @@ +# Catalog generator + +This directory contains utility tools for catalog generation and validation. + +## Tools + +### 1. generate_catalog_examples.py + +Generates example catalog JSON files from input configuration by cycling through different mapping/software_config combinations. + +**Location:** `build_stream/generate_catalog_examples.py` + +**Usage:** +```bash +cd /omnia/build_stream +python3 generate_catalog_examples.py --base-dir /omnia/input/project_default/ +``` + +**What it does:** +- Copies mapping files from `examples/catalog/mapping_file_software_config/` to the input directory +- Generates catalogs for each mapping variant (slurm-only, nfs-provisioner, etc.) +- Outputs generated catalogs to `examples/catalog/` directory +- Provides a summary of packages and layers generated + +**Generated catalogs:** +- `catalog_rhel_aarch64_with_slurm_only.json` +- `catalog_rhel_x86_64_with_slurm_only.json` +- `catalog_rhel_with_nfs_provisioner.json` +- `catalog_rhel_x86_64.json` +- `catalog_rhel.json` + +--- + +### 2. diff_input_configs.py + +Compares two input directories (expected vs actual) and reports per-file, per-cluster package differences. +This can be used independantly or after running the catalog generator to check the differences. + +**Location:** `build_stream/core/catalog/tests/diff_input_configs.py` + +**Usage:** +```bash +cd /omnia/build_stream/core/catalog/tests +python3 diff_input_configs.py \ + --expected /omnia/input \ + --actual /tmp/adapter_output_test/input +``` + +**Optional arguments:** +- `--file-level`: Compare packages at file level (flatten all clusters) instead of per-cluster +- `--report `: Write a human-readable table report to the given file +- `--pxe-mapping `: Path to PXE mapping CSV file for information display +- `--catalog `: Path to catalog file for information display + +**What it does:** +1. Compares `software_config.json` (softwares list and versions) +2. Walks `config////*.json` in both directories +3. For each matching JSON, compares packages per cluster section +4. Reports missing files, extra files, and per-cluster diffs +5. Handles versioned filenames (e.g., `service_k8s_v1.35.1.json` matches `service_k8s.json`) +6. Ignores common package extraction and `_first` cluster merging artifacts + +**Programmatic usage (for tests):** +```python +from diff_input_configs import run_diff_for_test + +passed, issue_count, report_path = run_diff_for_test( + expected_dir="/path/to/expected", + actual_dir="/path/to/actual", + report_file="/path/to/report.txt" # optional, uses temp file if not provided +) +# Returns: (passed: bool, issue_count: int, report_path: str) +``` + +**Exit codes:** +- `0`: No differences found +- `1`: Differences found + +--- + +### 3. test_catalog_diff_regression.py + +Regression test suite that validates catalog generation and adapter policy output. + +**Location:** `build_stream/core/catalog/tests/test_catalog_diff_regression.py` + +**Core idea:** +Validates the end-to-end flow: catalog → adapter policy → input configs, ensuring the generated output matches the expected input configuration files. + +**Steps:** +1. Loads example catalog (`catalog_rhel.json`) +2. Runs generator to create root JSONs (functional_layer.json, infrastructure.json, etc.) +3. Runs adapter policy to generate input configs from root JSONs +4. Uses `diff_input_configs.py` to compare generated output with expected input configs +5. Validates functional layers match PXE mapping expectations +6. Checks specific package routing and architecture constraints + +**Usage:** +```bash +cd /omnia/build_stream/core/catalog/tests +python3 -m pytest test_catalog_diff_regression.py -v +``` + +**Test classes:** +- `TestAdapterDiffReport`: Verifies adapter output matches expected configs using diff tool +- `TestCatalogFunctionalLayers`: Validates functional layers against PXE mapping and architecture constraints From 990220d175455bd41c138da99caf53d49a4b4262 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Tue, 2 Jun 2026 19:43:14 +0530 Subject: [PATCH 03/78] ansible 2.20 fixes Signed-off-by: Abhishek S A --- .../module_utils/discovery/standard_functions.py | 4 ++-- .../input_validation/common_utils/validation_utils.py | 8 ++++---- .../module_utils/local_repo/process_metadata.py | 6 +++--- .../module_utils/local_repo/process_parallel.py | 2 +- .../library/module_utils/local_repo/software_utils.py | 10 +++++----- .../module_utils/upgrade/upgrade_hop_calculator_lib.py | 8 ++++---- common/library/modules/create_upgrade_staging.py | 8 ++++---- common/library/modules/fetch_credential_rule.py | 2 +- common/library/modules/generate_discovery_report.py | 2 +- common/library/modules/generate_functional_groups.py | 6 +++--- common/library/modules/get_service_cluster_info.py | 2 +- .../ci-group-login_compiler_node_aarch64.yaml.j2 | 2 +- .../ci-group-login_compiler_node_x86_64.yaml.j2 | 2 +- .../cloud_init/ci-group-slurm_node_aarch64.yaml.j2 | 2 +- .../cloud_init/ci-group-slurm_node_x86_64.yaml.j2 | 2 +- .../mount_config/tasks/process_single_powervault.yml | 2 +- .../files/nersc-ldms-aggr/make_host_map.dell.py | 2 +- .../telemetry/files/nersc-ldms-aggr/mkmanifest.py | 6 +++--- 18 files changed, 38 insertions(+), 38 deletions(-) diff --git a/common/library/module_utils/discovery/standard_functions.py b/common/library/module_utils/discovery/standard_functions.py index 3af21a4c79..111f06de4d 100644 --- a/common/library/module_utils/discovery/standard_functions.py +++ b/common/library/module_utils/discovery/standard_functions.py @@ -82,7 +82,7 @@ def update_json(new_data, filepath): """ if os.path.exists(filepath): # Load existing data - with open(filepath, 'r') as f: + with open(filepath, 'r', encoding='utf-8') as f: try: existing_data = json.load(f) except json.JSONDecodeError: @@ -94,5 +94,5 @@ def update_json(new_data, filepath): existing_data.update(new_data) # Write back to file - with open(filepath, 'w') as f: + with open(filepath, 'w', encoding='utf-8') as f: json.dump(existing_data, f, indent=2) diff --git a/common/library/module_utils/input_validation/common_utils/validation_utils.py b/common/library/module_utils/input_validation/common_utils/validation_utils.py index 395da87ec9..df1a5ed8e7 100644 --- a/common/library/module_utils/input_validation/common_utils/validation_utils.py +++ b/common/library/module_utils/input_validation/common_utils/validation_utils.py @@ -45,7 +45,7 @@ def load_yaml_as_json(yaml_file, omnia_base_dir, project_name, logger, module): if is_file_encrypted(yaml_file): data = process_encrypted_file(yaml_file, omnia_base_dir, project_name, logger, module) return data - with open(yaml_file, "r") as f: + with open(yaml_file, "r", encoding="utf-8") as f: data = yaml.safe_load(f) return data except FileNotFoundError: @@ -167,7 +167,7 @@ def load_json(file_path): ValueError: If the JSON parsing fails. """ try: - with open(file_path, 'r') as file: + with open(file_path, 'r', encoding='utf-8') as file: return json.load(file) except FileNotFoundError as exc: raise FileNotFoundError(f"Error: File '{file_path}' not found.") from exc @@ -221,7 +221,7 @@ def is_file_encrypted(file_path): bool: True if the file is encrypted, False otherwise. """ try: - with open(file_path, 'r') as file: + with open(file_path, 'r', encoding='utf-8') as file: first_line = file.readline().strip() return first_line.startswith('$ANSIBLE_VAULT') except (IOError, OSError): @@ -245,7 +245,7 @@ def process_encrypted_file(yaml_file, omnia_base_dir, project_name, logger, modu decrypted_file = decrypt_file(omnia_base_dir, project_name, yaml_file, vault_password_file) if decrypted_file: try: - with open(yaml_file, "r") as f: + with open(yaml_file, "r", encoding="utf-8") as f: data = yaml.safe_load(f) encrypt_file(omnia_base_dir, project_name, yaml_file, vault_password_file) return data diff --git a/common/library/module_utils/local_repo/process_metadata.py b/common/library/module_utils/local_repo/process_metadata.py index 936788dda4..9ddbaef320 100644 --- a/common/library/module_utils/local_repo/process_metadata.py +++ b/common/library/module_utils/local_repo/process_metadata.py @@ -32,7 +32,7 @@ def load_yaml(path): """ if not os.path.isfile(path): return {} - with open(path, 'r') as f: + with open(path, 'r', encoding='utf-8') as f: return yaml.safe_load(f) or {} def write_yaml(path, data): @@ -42,7 +42,7 @@ def write_yaml(path, data): Uses block-style formatting (not flow style). """ os.makedirs(os.path.dirname(path), exist_ok=True) - with open(path, 'w') as f: + with open(path, 'w', encoding='utf-8') as f: yaml.dump(data, f, default_flow_style=False) def load_config(config_path: str) -> dict: @@ -53,7 +53,7 @@ def load_config(config_path: str) -> dict: """ if not os.path.exists(config_path): raise FileNotFoundError(f"Config file not found: {config_path}") - with open(config_path) as f: + with open(config_path, encoding='utf-8') as f: return json.load(f) def generate_policy_dict(repo_list, default_policy): diff --git a/common/library/module_utils/local_repo/process_parallel.py b/common/library/module_utils/local_repo/process_parallel.py index 2c55098c98..0530943379 100644 --- a/common/library/module_utils/local_repo/process_parallel.py +++ b/common/library/module_utils/local_repo/process_parallel.py @@ -142,7 +142,7 @@ def log_table_output(table_output, log_file): # Ensure the directory for the log file exists os.makedirs(os.path.dirname(log_file), exist_ok=True) # Write the table output to the log file - with open(log_file, "w") as file: + with open(log_file, "w", encoding="utf-8") as file: file.write("Command Execution Results Table:\n") # Add a header to the table file.write(table_output) # Write the actual table content except Exception as e: diff --git a/common/library/module_utils/local_repo/software_utils.py b/common/library/module_utils/local_repo/software_utils.py index d3306d58b8..4bff9c8fc0 100644 --- a/common/library/module_utils/local_repo/software_utils.py +++ b/common/library/module_utils/local_repo/software_utils.py @@ -92,7 +92,7 @@ def load_json(file_path): ValueError: If the JSON parsing fails. """ try: - with open(file_path, 'r') as file: + with open(file_path, 'r', encoding='utf-8') as file: return json.load(file) except FileNotFoundError as exc: raise FileNotFoundError(f"Error: File '{file_path}' not found.") from exc @@ -573,7 +573,7 @@ def get_csv_software(file_name): if not os.path.isfile(file_name): return csv_software - with open(file_name, mode='r') as csv_file: + with open(file_name, mode='r', encoding='utf-8') as csv_file: reader = csv.DictReader(csv_file) csv_software = [row.get(CSV_COLUMNS["column1"], "").strip() for row in reader] @@ -596,7 +596,7 @@ def get_failed_software(file_path): if not os.path.isfile(file_path): return failed_software - with open(file_path, mode='r') as csv_file: + with open(file_path, mode='r', encoding='utf-8') as csv_file: reader = csv.DictReader(csv_file) failed_software = [ str(row.get(CSV_COLUMNS["column1"]) or "").strip() @@ -796,7 +796,7 @@ def check_csv_existence(path): def read_status_csv(csv_path): """Reads the status.csv file and returns a list of row dictionaries.""" - with open(csv_path, mode='r', newline='') as file: + with open(csv_path, mode='r', newline='', encoding='utf-8') as file: reader = csv.DictReader(file) return [row for row in reader] @@ -911,7 +911,7 @@ def process_software(software, fresh_installation, json_path, csv_path, subgroup return combined, failed_packages def get_software_names(json_file_path): - with open(json_file_path, "r") as f: + with open(json_file_path, "r", encoding="utf-8") as f: data = json.load(f) softwares = data.get("softwares", []) diff --git a/common/library/module_utils/upgrade/upgrade_hop_calculator_lib.py b/common/library/module_utils/upgrade/upgrade_hop_calculator_lib.py index e7b77fea58..d888ebdd56 100644 --- a/common/library/module_utils/upgrade/upgrade_hop_calculator_lib.py +++ b/common/library/module_utils/upgrade/upgrade_hop_calculator_lib.py @@ -253,7 +253,7 @@ def update_software_config( - total_hops: Total number of hops """ # Load current config - with open(input_file) as f: + with open(input_file, encoding='utf-8') as f: config = json.load(f) # Find final version for each software (last hop in chain) @@ -293,7 +293,7 @@ def update_software_config( }) # Write updated config - with open(input_file, 'w') as f: + with open(input_file, 'w', encoding='utf-8') as f: json.dump(config, f, indent=4) # Output result @@ -372,7 +372,7 @@ def update_component_json_repos( # Read JSON file try: - with open(json_path, 'r') as f: + with open(json_path, 'r', encoding='utf-8') as f: json_data = json.load(f) except Exception as e: msg = f" Error reading {json_path}: {e}" @@ -396,7 +396,7 @@ def update_component_json_repos( # Write updated JSON file if updated: try: - with open(json_path, 'w') as f: + with open(json_path, 'w', encoding='utf-8') as f: json.dump(json_data, f, indent=4) msg = f" Success: Updated {json_path}" messages.append(msg) diff --git a/common/library/modules/create_upgrade_staging.py b/common/library/modules/create_upgrade_staging.py index 2e98a4ffbf..66edc14954 100644 --- a/common/library/modules/create_upgrade_staging.py +++ b/common/library/modules/create_upgrade_staging.py @@ -93,7 +93,7 @@ def create_staging( else: print(f"Preserved unchanged: {name}") - with open(os.path.join(staging_dir, 'software_config.json'), 'w') as f: + with open(os.path.join(staging_dir, 'software_config.json'), 'w', encoding='utf-8') as f: json.dump(sw_config, f, indent=4) print(f"software_config.json written to staging ({sw_delta_count} version(s) updated)") @@ -102,7 +102,7 @@ def create_staging( local_repo_config_path = os.path.join(input_dir, 'local_repo_config.yml') base_config = {} if os.path.exists(local_repo_config_path): - with open(local_repo_config_path) as f: + with open(local_repo_config_path, encoding='utf-8') as f: base_config = yaml.safe_load(f) or {} print(f"Loaded base local_repo_config.yml from {local_repo_config_path}") else: @@ -123,7 +123,7 @@ def create_staging( repos_added = 0 if os.path.exists(repos_file): - with open(repos_file) as f: + with open(repos_file, encoding='utf-8') as f: repos = yaml.safe_load(f) or {} # Collect Omnia versions whose repos need to be merged. @@ -186,7 +186,7 @@ def create_staging( print(f"local_repo_config.yml: {repos_added} repo(s) added from repos.yml to base config") - with open(os.path.join(staging_dir, 'local_repo_config.yml'), 'w') as f: + with open(os.path.join(staging_dir, 'local_repo_config.yml'), 'w', encoding='utf-8') as f: yaml.dump(base_config, f, default_flow_style=False, sort_keys=False) # --- 4. Copy vault credentials files if they exist --- diff --git a/common/library/modules/fetch_credential_rule.py b/common/library/modules/fetch_credential_rule.py index 1038b3d9c0..4142ba1a35 100644 --- a/common/library/modules/fetch_credential_rule.py +++ b/common/library/modules/fetch_credential_rule.py @@ -24,7 +24,7 @@ def load_rules(file_path): """Loads validation rules from JSON file.""" - with open(file_path, 'r') as file: + with open(file_path, 'r', encoding='utf-8') as file: return json.load(file) def fetch_rule(field, rules): diff --git a/common/library/modules/generate_discovery_report.py b/common/library/modules/generate_discovery_report.py index 8b23d43f8e..24581022c2 100644 --- a/common/library/modules/generate_discovery_report.py +++ b/common/library/modules/generate_discovery_report.py @@ -85,7 +85,7 @@ def generate_report(servers, output_file): os.makedirs(output_dir, exist_ok=True) server_count = 0 - with open(output_file, 'w', newline='') as csvfile: + with open(output_file, 'w', newline='', encoding='utf-8') as csvfile: writer = csv.writer(csvfile) writer.writerow(REPORT_HEADERS) diff --git a/common/library/modules/generate_functional_groups.py b/common/library/modules/generate_functional_groups.py index f6c3c0857b..5d8e820866 100644 --- a/common/library/modules/generate_functional_groups.py +++ b/common/library/modules/generate_functional_groups.py @@ -29,7 +29,7 @@ def load_omnia_config(omnia_config_path, module): module.fail_json(msg=f"omnia_config.yml not found: {omnia_config_path}") try: - with open(omnia_config_path) as f: + with open(omnia_config_path, encoding='utf-8') as f: config = yaml.safe_load(f) or {} kube_name = None @@ -61,7 +61,7 @@ def parse_csv(filename, module): kube_control_seen = False try: - with open(filename, newline="") as f: + with open(filename, newline="", encoding="utf-8") as f: cleaned_lines = [line.strip() for line in f if line.strip()] header = cleaned_lines[0].split(",") expected_columns = len(header) @@ -131,7 +131,7 @@ def build_yaml(new_groups, new_func_groups, kube_cluster_name, slurm_cluster_nam def dump_yaml_with_comments(data, filename): """Write YAML data to file with custom formatting and comments.""" - with open(filename, "w") as f: + with open(filename, "w", encoding="utf-8") as f: f.write("# ---------------------------------------------------------------------------\n") f.write("# Groups definition\n") f.write("# ---------------------------------------------------------------------------\n") diff --git a/common/library/modules/get_service_cluster_info.py b/common/library/modules/get_service_cluster_info.py index bcb0f3af3e..5fcb73520a 100644 --- a/common/library/modules/get_service_cluster_info.py +++ b/common/library/modules/get_service_cluster_info.py @@ -24,7 +24,7 @@ def load_functional_groups_yaml(path, module): """Load functional group names from YAML.""" try: - with open(path, 'r') as f: + with open(path, 'r', encoding='utf-8') as f: data = yaml.safe_load(f) return data.get("groups", {}) except ValueError as e: diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_aarch64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_aarch64.yaml.j2 index c815bb1152..5a5b73da5e 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_aarch64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_aarch64.yaml.j2 @@ -189,7 +189,7 @@ content: | {{ lookup('template', 'templates/hpc_tools/setup_nvhpc_sdk.sh.j2') | indent(12) }} -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - path: /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh permissions: '{{ file_mode_755 }}' content: | diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_x86_64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_x86_64.yaml.j2 index 070bbd0f86..7cedbbbc00 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_x86_64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_x86_64.yaml.j2 @@ -189,7 +189,7 @@ content: | {{ lookup('template', 'templates/hpc_tools/setup_nvhpc_sdk.sh.j2') | indent(12) }} -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - path: /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh permissions: '{{ file_mode_755 }}' content: | diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 index 74182bdca8..2f7f52c930 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 @@ -399,7 +399,7 @@ content: | {{ lookup('template', 'templates/hpc_tools/setup_nvhpc_sdk.sh.j2') | indent(12) }} -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - path: /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh permissions: '{{ file_mode_755 }}' content: | diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_x86_64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_x86_64.yaml.j2 index 541098e1ad..427703257c 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_x86_64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_x86_64.yaml.j2 @@ -404,7 +404,7 @@ content: | {{ lookup('template', 'templates/hpc_tools/setup_nvhpc_sdk.sh.j2') | indent(12) }} -{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([]) %} +{% for pv_entry in cloud_init_groups_dict[functional_group_name].powervault_scripts | default([], true) %} - path: /usr/local/bin/setup_iscsi_storage_{{ pv_entry.name }}.sh permissions: '{{ file_mode_755 }}' content: | diff --git a/provision/roles/mount_config/tasks/process_single_powervault.yml b/provision/roles/mount_config/tasks/process_single_powervault.yml index 9d83e1b5b7..e611696156 100644 --- a/provision/roles/mount_config/tasks/process_single_powervault.yml +++ b/provision/roles/mount_config/tasks/process_single_powervault.yml @@ -30,7 +30,7 @@ cloud_init_groups_dict: >- {{ cloud_init_groups_dict | combine({ item: (cloud_init_groups_dict[item] | default({}) | combine({ - 'powervault_scripts': (cloud_init_groups_dict[item].powervault_scripts | default([])) + [{ + 'powervault_scripts': (cloud_init_groups_dict[item].powervault_scripts | default([], true)) + [{ 'name': pv_item.name, 'content': pv_rendered_script }] diff --git a/provision/roles/telemetry/files/nersc-ldms-aggr/make_host_map.dell.py b/provision/roles/telemetry/files/nersc-ldms-aggr/make_host_map.dell.py index 4fadf264c4..c3f1151846 100644 --- a/provision/roles/telemetry/files/nersc-ldms-aggr/make_host_map.dell.py +++ b/provision/roles/telemetry/files/nersc-ldms-aggr/make_host_map.dell.py @@ -23,7 +23,7 @@ def load_config(config_path): """Load the json config file given a file path.""" if not os.path.exists(config_path): return {} - with open(config_path, 'r') as f: + with open(config_path, 'r', encoding='utf-8') as f: return json.load(f) diff --git a/provision/roles/telemetry/files/nersc-ldms-aggr/mkmanifest.py b/provision/roles/telemetry/files/nersc-ldms-aggr/mkmanifest.py index 91f842f5ce..febf274d73 100644 --- a/provision/roles/telemetry/files/nersc-ldms-aggr/mkmanifest.py +++ b/provision/roles/telemetry/files/nersc-ldms-aggr/mkmanifest.py @@ -63,7 +63,7 @@ def load_yaml_file(path, required=True): else: logging.warning(f"Optional YAML file missing: {path}") return None - with open(path, 'r') as fh: + with open(path, 'r', encoding='utf-8') as fh: return yaml.safe_load(fh) def load_json_file(path, required=True): @@ -75,7 +75,7 @@ def load_json_file(path, required=True): else: logging.warning(f"Optional JSON file missing: {path}") return None - with open(path, 'r') as fh: + with open(path, 'r', encoding='utf-8') as fh: return json.load(fh) def harvest_cluster_info(cluster_file): @@ -303,7 +303,7 @@ def update_manifest(manifest, aggs, store_stateful_replicas, replicas_exporter, def write_yaml_file(path, data, description=None): """Write YAML data to file.""" try: - with open(path, 'w') as fh: + with open(path, 'w', encoding='utf-8') as fh: yaml.dump(data, fh, indent=2) if description: logging.info(f"Wrote {description} to {path}") From f9e0d8c84ae1194ca60a58c9acb7a1b2d0282201 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Tue, 2 Jun 2026 19:46:18 +0530 Subject: [PATCH 04/78] lint update Signed-off-by: Abhishek S A --- .github/workflows/ansible-lint.yml | 1 + .github/workflows/pylint.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/ansible-lint.yml b/.github/workflows/ansible-lint.yml index fbe78977fa..aea0698715 100644 --- a/.github/workflows/ansible-lint.yml +++ b/.github/workflows/ansible-lint.yml @@ -10,6 +10,7 @@ on: - pub/q2_dev - pub/telemetry - pub/q2_upgrade + - pub/q2_ansible jobs: build: diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 6b2b1f4d3d..3aaded93be 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -10,6 +10,7 @@ on: - pub/q2_dev - pub/telemetry - pub/q2_upgrade + - pub/q2_ansible jobs: build: From 0b20c0cda9533fc6a1b264ce6d27a16537bb3599 Mon Sep 17 00:00:00 2001 From: Nagachandan-P Date: Wed, 3 Jun 2026 10:29:17 +0000 Subject: [PATCH 05/78] ib ip port matching enhanced logic Signed-off-by: Nagachandan-P --- .../doca-ofed/configure-ib-network.sh.j2 | 124 +++++++----------- 1 file changed, 47 insertions(+), 77 deletions(-) diff --git a/provision/roles/configure_ochami/templates/doca-ofed/configure-ib-network.sh.j2 b/provision/roles/configure_ochami/templates/doca-ofed/configure-ib-network.sh.j2 index 0bc535918e..a1fef8447d 100644 --- a/provision/roles/configure_ochami/templates/doca-ofed/configure-ib-network.sh.j2 +++ b/provision/roles/configure_ochami/templates/doca-ofed/configure-ib-network.sh.j2 @@ -65,8 +65,8 @@ if [[ "$IB_NIC_NAME" =~ ^(InfiniBand\.PCIe\.Slot\.|InfiniBand\.Slot\.|NIC\.Infin SLOT_NUMBER="${BASH_REMATCH[2]}" PORT_NUMBER="${BASH_REMATCH[3]}" echo "INFO: Successfully parsed slot-based format" - echo "INFO: Extracted slot number: $SLOT_NUMBER" - echo "INFO: Extracted port number: $PORT_NUMBER" + echo "INFO: Extracted slot number: $SLOT_NUMBER (will be ignored for device selection)" + echo "INFO: Extracted port number: $PORT_NUMBER (used for device selection)" elif [[ "$IB_NIC_NAME" =~ ^InfiniBand\.Single-([0-9]+)$ ]]; then PORT_NUMBER="${BASH_REMATCH[1]}" echo "INFO: Successfully parsed single-port format" @@ -86,10 +86,6 @@ else exit 1 fi -echo "INFO: Successfully parsed slot-based format" - echo "INFO: Extracted slot number: $SLOT_NUMBER" - echo "INFO: Extracted port number: $PORT_NUMBER" - # Check if IB hardware is present if ! ls /sys/class/infiniband/ >/dev/null 2>&1; then echo "ERROR: No IB hardware found in /sys/class/infiniband/" @@ -117,40 +113,7 @@ for mlx_device in /sys/class/infiniband/mlx5_*; do fi done -# Simple slot-to-device mapping function -find_device_by_slot() { - local target_slot="$1" - - echo "DEBUG: Starting slot-based device search for slot '$target_slot'" >&2 - - for mlx_device in /sys/class/infiniband/mlx5_*; do - [ -e "$mlx_device" ] || continue - device_name=$(basename "$mlx_device") - - # Get PCI address and extract slot number - pci_address=$(basename "$(readlink -f "/sys/class/infiniband/$device_name/device" 2>/dev/null)") - if [ -n "$pci_address" ]; then - # Extract slot from PCI address: 0000:17:00.0 → "17" - pci_slot=$(echo "$pci_address" | cut -d: -f2 | sed 's/^0*//') - target_clean=$(echo "$target_slot" | sed 's/^0*//') - - echo "DEBUG: Checking device $device_name: PCI $pci_address → slot $pci_slot (target: $target_clean)" >&2 - - if [[ "$pci_slot" == "$target_clean" ]]; then - echo "SUCCESS: Found matching device! Slot $target_clean matches PCI slot $pci_slot for device $device_name" >&2 - echo "$device_name" - return 0 - else - echo "DEBUG: Slot mismatch - device $device_name has PCI slot $pci_slot, looking for slot $target_clean" >&2 - fi - else - echo "DEBUG: Could not get PCI address for device $device_name" >&2 - fi - done - - echo "ERROR: No device found matching slot '$target_slot'" >&2 - return 1 -} + # === DEVICE COUNT ANALYSIS AND SELECTION LOGIC === echo "INFO: DEVICE ANALYSIS: Analyzing mlx5 device count for configuration strategy" @@ -162,7 +125,7 @@ if [ "$MLX5_DEVICE_COUNT" -eq 0 ]; then elif [ "$MLX5_DEVICE_COUNT" -eq 1 ]; then echo "INFO: SINGLE-DEVICE MODE: Only one mlx5 device available - no slot-based mapping needed" - echo "INFO: SINGLE-DEVICE MODE: Using available device regardless of slot number in PXE mapping" + echo "INFO: SINGLE-DEVICE MODE: Using available device (slot number from PXE mapping ignored)" echo "INFO: SINGLE-DEVICE MODE: This ensures robustness for single-device deployments" MLX5_DEVICE=$(ls /sys/class/infiniband/ | grep mlx5 | head -1) @@ -174,54 +137,61 @@ elif [ "$MLX5_DEVICE_COUNT" -eq 1 ]; then echo "SUCCESS: SINGLE-DEVICE MODE: Selected device '$MLX5_DEVICE' (only available device)" elif [ "$MLX5_DEVICE_COUNT" -gt 1 ]; then - echo "INFO: MULTI-DEVICE MODE: Found $MLX5_DEVICE_COUNT active mlx5 devices - using slot-based mapping" - echo "INFO: MULTI-DEVICE MODE: Slot-based mapping required for precise device selection" + echo "INFO: MULTI-DEVICE MODE: Found $MLX5_DEVICE_COUNT active mlx5 devices" + echo "INFO: MULTI-DEVICE MODE: Checking all devices for port $PORT_NUMBER with GUID validation" - if [ -z "$SLOT_NUMBER" ]; then - echo "ERROR: MULTI-DEVICE MODE: Multiple devices found but no slot number specified in PXE mapping" - echo "ERROR: MULTI-DEVICE MODE: PXE mapping contains: '$IB_NIC_NAME'" - echo "ERROR: MULTI-DEVICE MODE: Expected format: 'InfiniBand.PCIe.Slot.X-Y' where X is the slot number" - echo "ERROR: MULTI-DEVICE MODE: Please update PXE mapping file with correct slot numbers" - echo "ERROR: MULTI-DEVICE MODE: Available mlx5 devices with their PCI slots:" - for mlx_device in /sys/class/infiniband/mlx5_*; do - [ -e "$mlx_device" ] || continue - device_name=$(basename "$mlx_device") - pci_address=$(basename "$(readlink -f "/sys/class/infiniband/$device_name/device" 2>/dev/null)") - if [ -n "$pci_address" ]; then - pci_slot=$(echo "$pci_address" | cut -d: -f2 | sed 's/^0*//') - echo " - $device_name: PCI $pci_address (slot $pci_slot)" - else - echo " - $device_name: PCI information not available" - fi - done - exit 1 - fi + # Array to store devices that have the specified port with valid GUID + MATCHING_DEVICES=() - echo "INFO: MULTI-DEVICE MODE: Looking for device matching slot '$SLOT_NUMBER'" - MLX5_DEVICE=$(find_device_by_slot "$SLOT_NUMBER") + for mlx_device in /sys/class/infiniband/mlx5_*; do + [ -e "$mlx_device" ] || continue + device_name=$(basename "$mlx_device") + + # Check if port exists on this device + PORT_PATH="/sys/class/infiniband/${device_name}/ports/${PORT_NUMBER}" + if [ ! -d "$PORT_PATH" ]; then + echo "DEBUG: Device $device_name - Port $PORT_NUMBER does not exist" + continue + fi + + # Check if GUID exists for this port + GUID_PATH="${PORT_PATH}/gids/0" + if [ ! -f "$GUID_PATH" ]; then + echo "DEBUG: Device $device_name - Port $PORT_NUMBER exists but GUID not found" + continue + fi + + # Device has both port and GUID + MATCHING_DEVICES+=("$device_name") + echo "DEBUG: Device $device_name - Port $PORT_NUMBER with GUID found" + done - if [ -z "$MLX5_DEVICE" ]; then - echo "ERROR: MULTI-DEVICE MODE: Found $MLX5_DEVICE_COUNT active mlx5 devices but none match slot '$SLOT_NUMBER'" - echo "ERROR: MULTI-DEVICE MODE: Slot '$SLOT_NUMBER' from PXE mapping doesn't match any hardware PCI slot" - echo "ERROR: MULTI-DEVICE MODE: Available mlx5 devices with their PCI slots:" + # Analyze matching results + if [ ${#MATCHING_DEVICES[@]} -eq 0 ]; then + echo "ERROR: MULTI-DEVICE MODE: Port $PORT_NUMBER not found on any device (or no GUID available)" + echo "ERROR: MULTI-DEVICE MODE: Available devices and their ports:" for mlx_device in /sys/class/infiniband/mlx5_*; do [ -e "$mlx_device" ] || continue device_name=$(basename "$mlx_device") - pci_address=$(basename "$(readlink -f "/sys/class/infiniband/$device_name/device" 2>/dev/null)") - if [ -n "$pci_address" ]; then - pci_slot=$(echo "$pci_address" | cut -d: -f2 | sed 's/^0*//') - echo " - $device_name: PCI $pci_address (slot $pci_slot)" + echo " - $device_name:" + if [ -d "/sys/class/infiniband/${device_name}/ports" ]; then + ls /sys/class/infiniband/${device_name}/ports/ 2>/dev/null | sed 's/^/ Port: /' else - echo " - $device_name: PCI information not available" + echo " No ports directory found" fi done - echo "ERROR: MULTI-DEVICE MODE: Please update PXE mapping file with correct PCI slot numbers" - echo "ERROR: MULTI-DEVICE MODE: Use format: 'InfiniBand.PCIe.Slot.-'" + exit 1 + elif [ ${#MATCHING_DEVICES[@]} -gt 1 ]; then + echo "ERROR: MULTI-DEVICE MODE: Port $PORT_NUMBER found on multiple devices" + echo "ERROR: MULTI-DEVICE MODE: Matching devices: ${MATCHING_DEVICES[*]}" + echo "ERROR: MULTI-DEVICE MODE: Cannot determine which device to use" + echo "ERROR: MULTI-DEVICE MODE: Please update PXE mapping to specify device uniquely" exit 1 fi - echo "SUCCESS: MULTI-DEVICE MODE: Slot '$SLOT_NUMBER' matches device '$MLX5_DEVICE'" - echo "SUCCESS: MULTI-DEVICE MODE: Slot-based device selection completed successfully" + # Use the single matching device + MLX5_DEVICE="${MATCHING_DEVICES[0]}" + echo "SUCCESS: MULTI-DEVICE MODE: Selected device '$MLX5_DEVICE' (only device with port $PORT_NUMBER and valid GUID)" fi echo "INFO: FINAL DEVICE SELECTION: Using mlx5 device '$MLX5_DEVICE'" From bd6009908c8ddd3a8809c59cc69593f55b33d65f Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Wed, 3 Jun 2026 17:08:54 +0530 Subject: [PATCH 06/78] Update software_utils.py Signed-off-by: Abhishek S A --- .../module_utils/local_repo/software_utils.py | 72 +++++++++++++++++-- 1 file changed, 65 insertions(+), 7 deletions(-) diff --git a/common/library/module_utils/local_repo/software_utils.py b/common/library/module_utils/local_repo/software_utils.py index 4bff9c8fc0..2c8a3b3da0 100644 --- a/common/library/module_utils/local_repo/software_utils.py +++ b/common/library/module_utils/local_repo/software_utils.py @@ -17,14 +17,18 @@ This module util contains all custom software utilities used across custom modules """ from collections import defaultdict +import logging import os import json import csv import re import shlex +import ssl import yaml from jinja2 import Template import requests +from requests.adapters import HTTPAdapter +from urllib3.poolmanager import PoolManager from ansible.module_utils.local_repo.standard_logger import setup_standard_logger from ansible.module_utils.local_repo.common_functions import is_encrypted, process_file, get_arch_from_sw_config from ansible.module_utils.local_repo.parse_and_download import execute_command @@ -170,6 +174,35 @@ def get_csv_file_path(software_name, user_csv_dir, arch): return status_csv_file_path +class _RelaxedCAAdapter(HTTPAdapter): + """HTTPAdapter that loads a custom CA but clears VERIFY_X509_STRICT. + + Python 3.13+ enforces strict RFC 5280 Basic Constraints validation, + rejecting CA certs where the extension is not marked critical. Some + vendor CAs (e.g. Red Hat redhat-uep.pem) have non-critical Basic + Constraints which OpenSSL/curl accept. This adapter restores the + Python 3.12 behavior while keeping full chain and hostname validation. + + Remove this workaround once the upstream CA is reissued with the + Basic Constraints extension marked critical. + """ + + def __init__(self, ca_cert, client_cert, client_key, *args, **kwargs): + self._ca_cert = ca_cert + self._client_cert = client_cert + self._client_key = client_key + super().__init__(*args, **kwargs) + + def init_poolmanager(self, connections, maxsize, block=False, **pool_kwargs): + ctx = ssl.create_default_context(cafile=self._ca_cert) + ctx.verify_flags &= ~ssl.VERIFY_X509_STRICT + if self._client_cert and self._client_key: + ctx.load_cert_chain(self._client_cert, self._client_key) + self.poolmanager = PoolManager( + num_pools=connections, maxsize=maxsize, + block=block, ssl_context=ctx, **pool_kwargs) + + def is_remote_url_reachable(remote_url, timeout=10, client_cert=None, client_key=None, ca_cert=None): """ @@ -186,20 +219,41 @@ def is_remote_url_reachable(remote_url, timeout=10, Returns: bool: True if the URL is reachable (HTTP status 200), False otherwise. """ + logger = logging.getLogger(__name__) try: # Check if SSL certs are provided and handle accordingly if client_cert and client_key and ca_cert: - response = requests.get( - remote_url, - cert=(client_cert, client_key), - verify=ca_cert, - timeout=timeout - ) + try: + response = requests.get( + remote_url, + cert=(client_cert, client_key), + verify=ca_cert, + timeout=timeout + ) + except requests.exceptions.SSLError as ssl_exc: + # Python 3.13+ rejects CA certs with non-critical Basic + # Constraints (RFC 5280 strict mode). Retry against the + # SAME CA with VERIFY_X509_STRICT cleared — still validates + # the full chain and hostname, just relaxes the one check. + logger.warning( + f"Strict SSL verification failed for {remote_url}: " + f"{ssl_exc}. Retrying with VERIFY_X509_STRICT cleared.") + session = requests.Session() + adapter = _RelaxedCAAdapter( + ca_cert, client_cert, client_key) + session.mount("https://", adapter) + response = session.get(remote_url, timeout=timeout) else: # Proceed with a regular HTTP request if no SSL certs are provided response = requests.get(remote_url, timeout=timeout) + if response.status_code != 200: + logger.error( + f"URL {remote_url} returned HTTP {response.status_code}") return response.status_code == 200 - except Exception: + except Exception as exc: + logger.error( + f"URL reachability exception for {remote_url}: " + f"{type(exc).__name__}: {exc}") return False def transform_package_dict(data, arch_val,logger): @@ -396,6 +450,10 @@ def parse_repo_urls(repo_config, local_repo_config_path, ) logger.info(f"Processing RHEL repo '{name}' for arch '{arch}' - URL: {url}") + logger.info(f"RHEL SSL paths: ca_cert={ca_cert}, client_key={client_key}, client_cert={client_cert}") + logger.info(f"RHEL SSL files exist: ca_cert={os.path.exists(ca_cert) if ca_cert else 'N/A'}, " + f"client_key={os.path.exists(client_key) if client_key else 'N/A'}, " + f"client_cert={os.path.exists(client_cert) if client_cert else 'N/A'}") for path in [ca_cert, client_key, client_cert]: mode = "decrypt" From 51f8e71737ffe2ea9619845d9e18d70ae8bd01a4 Mon Sep 17 00:00:00 2001 From: Nagachandan P Date: Thu, 4 Jun 2026 13:22:01 +0530 Subject: [PATCH 07/78] Update configure-ib-network.sh.j2 Signed-off-by: Nagachandan P --- .../doca-ofed/configure-ib-network.sh.j2 | 140 +++++++++++++----- 1 file changed, 106 insertions(+), 34 deletions(-) diff --git a/provision/roles/configure_ochami/templates/doca-ofed/configure-ib-network.sh.j2 b/provision/roles/configure_ochami/templates/doca-ofed/configure-ib-network.sh.j2 index a1fef8447d..9ecc30d17f 100644 --- a/provision/roles/configure_ochami/templates/doca-ofed/configure-ib-network.sh.j2 +++ b/provision/roles/configure_ochami/templates/doca-ofed/configure-ib-network.sh.j2 @@ -65,8 +65,8 @@ if [[ "$IB_NIC_NAME" =~ ^(InfiniBand\.PCIe\.Slot\.|InfiniBand\.Slot\.|NIC\.Infin SLOT_NUMBER="${BASH_REMATCH[2]}" PORT_NUMBER="${BASH_REMATCH[3]}" echo "INFO: Successfully parsed slot-based format" - echo "INFO: Extracted slot number: $SLOT_NUMBER (will be ignored for device selection)" - echo "INFO: Extracted port number: $PORT_NUMBER (used for device selection)" + echo "INFO: Extracted slot number: $SLOT_NUMBER" + echo "INFO: Extracted port number: $PORT_NUMBER" elif [[ "$IB_NIC_NAME" =~ ^InfiniBand\.Single-([0-9]+)$ ]]; then PORT_NUMBER="${BASH_REMATCH[1]}" echo "INFO: Successfully parsed single-port format" @@ -113,7 +113,57 @@ for mlx_device in /sys/class/infiniband/mlx5_*; do fi done +# Always verify mlx5 devices are InfiniBand-capable using ibstat +# This prevents Ethernet-only Mellanox NICs from being configured as IB devices +if command -v ibstat >/dev/null 2>&1; then + echo "INFO: Verifying mlx5 devices are InfiniBand-capable using ibstat" + + IB_DEVICES=() + current_ca="" + + while IFS= read -r line; do + if [[ "$line" =~ ^CA\ \'(mlx5_[0-9]+)\' ]]; then + current_ca="${BASH_REMATCH[1]}" + echo "DEBUG: Found mlx5 device: $current_ca" + elif [[ "$line" =~ Link\ layer:\ InfiniBand ]] && [ -n "$current_ca" ]; then + IB_DEVICES+=("$current_ca") + echo "DEBUG: Device $current_ca has InfiniBand link layer - adding to list" + current_ca="" + elif [[ "$line" =~ Link\ layer:\ (.+) ]] && [ -n "$current_ca" ]; then + link_layer="${BASH_REMATCH[1]}" + echo "DEBUG: Device $current_ca has '$link_layer' link layer - skipping" + current_ca="" + elif [[ "$line" =~ ^CA\ \' ]] && [ -n "$current_ca" ]; then + echo "DEBUG: Device $current_ca does not have InfiniBand link layer - skipping" + current_ca="" + fi + done < <(ibstat 2>/dev/null) + + FILTERED_COUNT=0 + for device in "${IB_DEVICES[@]}"; do + FILTERED_COUNT=$((FILTERED_COUNT + 1)) + done + if [ "$FILTERED_COUNT" -gt 0 ]; then + if [ "$MLX5_DEVICE_COUNT" -ne "$FILTERED_COUNT" ]; then + echo "INFO: Filtered $MLX5_DEVICE_COUNT mlx5 devices down to $FILTERED_COUNT InfiniBand-capable devices" + fi + MLX5_DEVICE_COUNT="$FILTERED_COUNT" + + if [ "$FILTERED_COUNT" -eq 1 ]; then + echo "INFO: Only one InfiniBand device found - using single-device mode" + SINGLE_IB_DEVICE="${IB_DEVICES[0]}" + fi + else + echo "ERROR: ibstat filtering found no InfiniBand-capable mlx5 devices" + echo "ERROR: All found mlx5 devices are Ethernet-only (RoCE) or not InfiniBand-capable" + echo "ERROR: Please ensure at least one Mellanox device has InfiniBand link layer" + exit 1 + fi +else + echo "WARNING: ibstat command not found. Cannot verify devices are InfiniBand-capable" + echo "WARNING: Proceeding with all mlx5 devices - may include Ethernet-only NICs" +fi # === DEVICE COUNT ANALYSIS AND SELECTION LOGIC === echo "INFO: DEVICE ANALYSIS: Analyzing mlx5 device count for configuration strategy" @@ -122,55 +172,78 @@ if [ "$MLX5_DEVICE_COUNT" -eq 0 ]; then echo "ERROR: DEVICE ANALYSIS: No mlx5 devices found on this system" echo "ERROR: DEVICE ANALYSIS: Please check hardware and driver installation" exit 1 - + elif [ "$MLX5_DEVICE_COUNT" -eq 1 ]; then echo "INFO: SINGLE-DEVICE MODE: Only one mlx5 device available - no slot-based mapping needed" echo "INFO: SINGLE-DEVICE MODE: Using available device (slot number from PXE mapping ignored)" echo "INFO: SINGLE-DEVICE MODE: This ensures robustness for single-device deployments" - - MLX5_DEVICE=$(ls /sys/class/infiniband/ | grep mlx5 | head -1) + + if [ -n "${SINGLE_IB_DEVICE:-}" ]; then + MLX5_DEVICE="$SINGLE_IB_DEVICE" + echo "INFO: SINGLE-DEVICE MODE: Using filtered InfiniBand device: $MLX5_DEVICE" + else + MLX5_DEVICE=$(ls /sys/class/infiniband/ | grep mlx5 | head -1) + echo "INFO: SINGLE-DEVICE MODE: Using first available mlx5 device: $MLX5_DEVICE" + fi if [ -z "$MLX5_DEVICE" ]; then echo "ERROR: DEVICE ANALYSIS: Failed to detect mlx5 device despite count showing 1" exit 1 fi - + echo "SUCCESS: SINGLE-DEVICE MODE: Selected device '$MLX5_DEVICE' (only available device)" - + elif [ "$MLX5_DEVICE_COUNT" -gt 1 ]; then echo "INFO: MULTI-DEVICE MODE: Found $MLX5_DEVICE_COUNT active mlx5 devices" echo "INFO: MULTI-DEVICE MODE: Checking all devices for port $PORT_NUMBER with GUID validation" - + # Array to store devices that have the specified port with valid GUID MATCHING_DEVICES=() - - for mlx_device in /sys/class/infiniband/mlx5_*; do + + # Use filtered device list if available, otherwise check all devices + search_devices=() + if [ -n "${IB_DEVICES[*]:-}" ]; then + for device in "${IB_DEVICES[@]}"; do + search_devices+=("/sys/class/infiniband/$device") + done + else + for mlx_device in /sys/class/infiniband/mlx5_*; do + [ -e "$mlx_device" ] && search_devices+=("$mlx_device") + done + fi + + for mlx_device in "${search_devices[@]}"; do [ -e "$mlx_device" ] || continue device_name=$(basename "$mlx_device") - + # Check if port exists on this device PORT_PATH="/sys/class/infiniband/${device_name}/ports/${PORT_NUMBER}" if [ ! -d "$PORT_PATH" ]; then echo "DEBUG: Device $device_name - Port $PORT_NUMBER does not exist" continue fi - + # Check if GUID exists for this port GUID_PATH="${PORT_PATH}/gids/0" if [ ! -f "$GUID_PATH" ]; then echo "DEBUG: Device $device_name - Port $PORT_NUMBER exists but GUID not found" continue fi - + # Device has both port and GUID MATCHING_DEVICES+=("$device_name") echo "DEBUG: Device $device_name - Port $PORT_NUMBER with GUID found" done - + # Analyze matching results - if [ ${#MATCHING_DEVICES[@]} -eq 0 ]; then + MATCHING_COUNT=0 + for device in "${MATCHING_DEVICES[@]}"; do + MATCHING_COUNT=$((MATCHING_COUNT + 1)) + done + + if [ "$MATCHING_COUNT" -eq 0 ]; then echo "ERROR: MULTI-DEVICE MODE: Port $PORT_NUMBER not found on any device (or no GUID available)" echo "ERROR: MULTI-DEVICE MODE: Available devices and their ports:" - for mlx_device in /sys/class/infiniband/mlx5_*; do + for mlx_device in "${search_devices[@]}"; do [ -e "$mlx_device" ] || continue device_name=$(basename "$mlx_device") echo " - $device_name:" @@ -181,14 +254,14 @@ elif [ "$MLX5_DEVICE_COUNT" -gt 1 ]; then fi done exit 1 - elif [ ${#MATCHING_DEVICES[@]} -gt 1 ]; then + elif [ "$MATCHING_COUNT" -gt 1 ]; then echo "ERROR: MULTI-DEVICE MODE: Port $PORT_NUMBER found on multiple devices" echo "ERROR: MULTI-DEVICE MODE: Matching devices: ${MATCHING_DEVICES[*]}" echo "ERROR: MULTI-DEVICE MODE: Cannot determine which device to use" echo "ERROR: MULTI-DEVICE MODE: Please update PXE mapping to specify device uniquely" exit 1 fi - + # Use the single matching device MLX5_DEVICE="${MATCHING_DEVICES[0]}" echo "SUCCESS: MULTI-DEVICE MODE: Selected device '$MLX5_DEVICE' (only device with port $PORT_NUMBER and valid GUID)" @@ -254,11 +327,11 @@ done for iface in $(ip link show | grep -E "^[0-9]+: ib" | awk -F: '{print $2}'); do echo "DEBUG: Checking interface $iface for GUID match" - + # Get the full address from the interface and extract the last 8 octets full_addr=$(ip link show "$iface" | grep "link/infiniband" | sed 's/.*link\/infiniband //' | sed 's/ brd.*//') echo "DEBUG: Interface $iface full address: $full_addr" - + # Extract the last 8 octets (fields 13-20) and format with colons for comparison iface_octets=$(echo "$full_addr" | cut -d: -f13-20 | python3 -c " import sys @@ -268,7 +341,7 @@ if len(parts) >= 8: print(formatted) ") echo "DEBUG: Interface $iface octets: $iface_octets (target: $GUID_OCTETS)" - + if [ "$iface_octets" = "$GUID_OCTETS" ]; then echo "SUCCESS: GUID MATCH FOUND! Interface $iface matches hardware GUID" IB_INTERFACE="$iface" @@ -301,33 +374,33 @@ echo "SUCCESS: INTERFACE SELECTION: Found IB interface $IB_INTERFACE matching ha if command -v nmcli >/dev/null 2>&1; then echo "INFO: IP CONFIGURATION: Using NetworkManager to configure IB interface" echo "DEBUG: IP CONFIGURATION: Target IP $IB_IP/$NETMASK_BITS on interface $IB_INTERFACE" - + echo "DEBUG: IP CONFIGURATION: Removing existing NetworkManager connection for $IB_INTERFACE" nmcli con delete "$IB_INTERFACE" &>/dev/null || true - + echo "DEBUG: IP CONFIGURATION: Creating new NetworkManager connection for $IB_INTERFACE" nmcli con add type infiniband ifname "$IB_INTERFACE" con-name "$IB_INTERFACE" - + echo "DEBUG: IP CONFIGURATION: Setting IP address $IB_IP/$NETMASK_BITS on $IB_INTERFACE" nmcli con modify "$IB_INTERFACE" ipv4.method manual ipv4.addresses "$IB_IP/$NETMASK_BITS" - + echo "DEBUG: IP CONFIGURATION: Bringing up NetworkManager connection for $IB_INTERFACE" nmcli con up "$IB_INTERFACE" - + echo "SUCCESS: IP CONFIGURATION: NetworkManager successfully configured $IB_INTERFACE with IP $IB_IP/$NETMASK_BITS" else echo "INFO: IP CONFIGURATION: Using iproute2 to configure IB interface (NetworkManager not available)" echo "DEBUG: IP CONFIGURATION: Target IP $IB_IP/$NETMASK_BITS on interface $IB_INTERFACE" - + echo "DEBUG: IP CONFIGURATION: Flushing existing IP addresses from $IB_INTERFACE" ip addr flush dev "$IB_INTERFACE" - + echo "DEBUG: IP CONFIGURATION: Adding IP address $IB_IP/$NETMASK_BITS to $IB_INTERFACE" ip addr add "$IB_IP/$NETMASK_BITS" dev "$IB_INTERFACE" - + echo "DEBUG: IP CONFIGURATION: Bringing up interface $IB_INTERFACE" ip link set "$IB_INTERFACE" up - + echo "SUCCESS: IP CONFIGURATION: iproute2 successfully configured $IB_INTERFACE with IP $IB_IP/$NETMASK_BITS" fi @@ -336,13 +409,13 @@ echo "SUCCESS: FINAL IP ASSIGNMENT: Successfully assigned $IB_IP/$NETMASK_BITS t # Configure DNS for InfiniBand network if [ -n "$IB_IP" ]; then echo "INFO: DNS CONFIGURATION: Configuring DNS for InfiniBand interface" - + # Add VAST DNS servers (completely safe - handles empty arrays) {% for dns_server in hostvars['localhost']['ib_network_dns'] %} echo "DEBUG: DNS CONFIGURATION: Adding DNS server {{ dns_server }}" echo "nameserver {{ dns_server }}" >> /etc/resolv.conf {% endfor %} - + echo "SUCCESS: DNS CONFIGURATION: DNS configuration completed for IB network" else echo "INFO: DNS CONFIGURATION: No DNS configuration needed (IB IP is empty)" @@ -351,5 +424,4 @@ fi echo "=== IB NETWORK CONFIGURATION COMPLETED SUCCESSFULLY ===" echo "SUMMARY: IB interface $IB_INTERFACE configured with IP $IB_IP/$NETMASK_BITS" echo "SUMMARY: Device used: $MLX5_DEVICE, Port: $PORT_NUMBER" -echo "SUMMARY: Configuration method: $([ "$MLX5_DEVICE_COUNT" -eq 1 ] && echo "Single-device mode (no slot mapping needed)" || echo "Multi-device mode (slot-based mapping)")" - +echo "SUMMARY: Configuration method: $([ "$MLX5_DEVICE_COUNT" -eq 1 ] && echo "Single-device mode (no slot mapping needed)" || echo "Multi-device mode (port-based mapping)")" From 47b6149a7af61a27dd68152c83e7fa3cea4c5c28 Mon Sep 17 00:00:00 2001 From: pullan1 Date: Thu, 4 Jun 2026 14:18:20 +0530 Subject: [PATCH 08/78] Fix for upgarde is failing in localrepo when omnia_config_credential.yml file is decrypted Signed-off-by: pullan1 --- .../local_repo/process_parallel.py | 41 ++++++++++++------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/common/library/module_utils/local_repo/process_parallel.py b/common/library/module_utils/local_repo/process_parallel.py index 0530943379..63c8560ba2 100644 --- a/common/library/module_utils/local_repo/process_parallel.py +++ b/common/library/module_utils/local_repo/process_parallel.py @@ -42,17 +42,20 @@ def load_docker_credentials(vault_yml_path, vault_password_file): """ - Decrypts an Ansible Vault YAML file, extracts docker_username and docker_password, - and validates them using Docker Hub API. + Loads docker_username and docker_password from a credentials YAML file, + decrypting it with Ansible Vault only when the file is actually encrypted, + and validates the credentials using the Docker Hub API. Validation Logic: + - If the file is vault-encrypted, decrypts it using ansible-vault view. + - If the file is plain YAML (e.g. during upgrade staging), reads it directly. - Validates credentials via Docker Hub REST API - Returns credentials if authentication succeeds (HTTP 200) - Raises RuntimeError for all authentication failures Args: - vault_yml_path (str): Path to the encrypted Ansible Vault YAML file. - vault_password_file (str): Path to the vault password file. + vault_yml_path (str): Path to the Ansible Vault YAML file (may or may not be encrypted). + vault_password_file (str): Path to the vault password file (used only when encrypted). Returns: tuple: (docker_username, docker_password) or (None, None) if not provided. @@ -63,17 +66,25 @@ def load_docker_credentials(vault_yml_path, vault_password_file): is not installed. """ try: - env = os.environ.copy() - env["ANSIBLE_VAULT_PASSWORD_FILE"] = vault_password_file - - result = subprocess.run( - ["ansible-vault", "view", vault_yml_path], - capture_output=True, - text=True, - check=True, - env=env - ) - data = yaml.safe_load(result.stdout) + # Check if the file is vault-encrypted before attempting decryption. + # If it is plain YAML (e.g. during upgrade where the staging copy was + # never encrypted), read it directly to avoid the + # "input is not vault encrypted data" error. + if is_encrypted(vault_yml_path): + env = os.environ.copy() + env["ANSIBLE_VAULT_PASSWORD_FILE"] = vault_password_file + + result = subprocess.run( + ["ansible-vault", "view", vault_yml_path], + capture_output=True, + text=True, + check=True, + env=env + ) + data = yaml.safe_load(result.stdout) + else: + with open(vault_yml_path, "r", encoding="utf-8") as fh: + data = yaml.safe_load(fh) docker_username = data.get("docker_username") docker_password = data.get("docker_password") From bff9ad22da4e3b3bb2cf0c7a51be22ed578c7c8c Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Thu, 4 Jun 2026 14:25:42 +0530 Subject: [PATCH 09/78] Fixing ansible 2.20 issues in upgrade and rollback flow (#4673) * ansible 2.20 upgrade fix Signed-off-by: Abhishek S A * issue fix Signed-off-by: Abhishek S A * upgrade fix Signed-off-by: Abhishek S A * Update restore_user_registry_credential.yml Signed-off-by: Abhishek S A * Update restore_omnia_config_credentials.yml Signed-off-by: Abhishek S A --------- Signed-off-by: Abhishek S A --- rollback/playbooks/rollback_slurm.yml | 6 +- .../rollback_buildstream/tasks/gitlab.yml | 6 +- .../tasks/load_rollback_status.yml | 2 +- .../tasks/check_slurm_cluster.yml | 2 +- rollback/rollback.yml | 2 +- upgrade/playbooks/upgrade_k8s.yml | 2 +- upgrade/playbooks/upgrade_slurm.yml | 6 +- .../tasks/restore_input_files.yml | 2 +- .../restore_omnia_config_credentials.yml | 2 +- .../restore_user_registry_credential.yml | 2 +- .../tasks/transform_build_stream_config.yml | 4 +- .../tasks/transform_gitlab_config.yml | 4 +- .../transform_high_availability_config.yml | 12 +-- .../tasks/transform_local_repo_config.yml | 34 ++++---- .../tasks/transform_network_spec.yml | 4 +- .../tasks/transform_omnia_config.yml | 8 +- .../tasks/transform_pxe_mapping_file.yml | 2 +- .../tasks/transform_storage_config.yml | 6 +- .../tasks/transform_telemetry_config.yml | 78 +++++++++---------- .../templates/local_repo_config.j2 | 14 ++-- .../templates/network_spec.j2 | 8 +- .../templates/omnia_config.j2 | 4 +- .../templates/storage_config.j2 | 4 +- .../templates/telemetry_config.j2 | 6 +- .../prep_local_repo/tasks/create_staging.yml | 2 +- .../prep_local_repo/tasks/sync_local_repo.yml | 2 +- .../tasks/gitlab_config_upgrade.yml | 2 +- upgrade/roles/upgrade_cluster/vars/main.yml | 4 +- .../upgrade_k8s/tasks/execute_single_hop.yml | 6 +- .../tasks/powerscale_prepare_upgrade.yml | 2 +- .../tasks/preflight_checks_pulp.yml | 6 +- .../roles/upgrade_k8s/tasks/step_drain.yml | 2 +- .../tasks/post_upgrade_health_check.yml | 10 +-- .../tasks/check_slurm_cluster.yml | 2 +- .../roles/upgrade_slurm/tasks/nfs_client.yml | 2 +- .../tasks/include_required_input.yml | 8 +- upgrade/upgrade.yml | 7 +- 37 files changed, 140 insertions(+), 135 deletions(-) diff --git a/rollback/playbooks/rollback_slurm.yml b/rollback/playbooks/rollback_slurm.yml index c23c405534..c25e43383c 100644 --- a/rollback/playbooks/rollback_slurm.yml +++ b/rollback/playbooks/rollback_slurm.yml @@ -110,7 +110,7 @@ - name: Set previously successful reboot list ansible.builtin.set_fact: - slurm_previously_rebooted: "{{ _reboot_state.successfully_rebooted | default([]) }}" + slurm_previously_rebooted: "{{ _reboot_state.successfully_rebooted | default([], true) }}" when: _reboot_state_stat.stat.exists | default(false) - name: Initialize previously rebooted list (no prior state) @@ -201,7 +201,7 @@ - name: End play if not slurm nodes in pxe_mapping ansible.builtin.meta: end_play - when: slurm_host_group_map | default({}) | length == 0 + when: slurm_host_group_map | default({}, true) | length == 0 - name: Get the backup directory ansible.builtin.set_fact: @@ -306,7 +306,7 @@ when: ssh_check is failed - name: Skip reboot — node successfully rebooted in previous run - when: inventory_hostname in (hostvars['localhost']['slurm_previously_rebooted'] | default([])) + when: inventory_hostname in (hostvars['localhost']['slurm_previously_rebooted'] | default([], true)) block: - name: Mark node as previously completed ansible.builtin.set_fact: diff --git a/rollback/roles/rollback_buildstream/tasks/gitlab.yml b/rollback/roles/rollback_buildstream/tasks/gitlab.yml index 50af909a29..27a5b45337 100644 --- a/rollback/roles/rollback_buildstream/tasks/gitlab.yml +++ b/rollback/roles/rollback_buildstream/tasks/gitlab.yml @@ -146,7 +146,7 @@ | first).id | string }} when: - _action == 'revert_commits' - - _gl_project_search.json | default([]) | length > 0 + - _gl_project_search.json | default([], true) | length > 0 - name: "GitLab | Restore — Read upgrade commit SHA from metadata" ansible.builtin.set_fact: @@ -180,7 +180,7 @@ - _gl_upgrade_commit_sha | default('') == '' - _gl_latest_commits is defined - _gl_latest_commits.status | default(0) == 200 - - _gl_latest_commits.json | default([]) | selectattr('message', 'search', '\\[omnia-upgrade-2.1-to-2.2\\]') | list | length > 0 + - _gl_latest_commits.json | default([], true) | selectattr('message', 'search', '\\[omnia-upgrade-2.1-to-2.2\\]') | list | length > 0 - name: "GitLab | Restore — Skip if no upgrade commit SHA found" ansible.builtin.debug: @@ -207,7 +207,7 @@ - name: "GitLab | Restore — Analyze if already reverted" ansible.builtin.set_fact: _gl_already_reverted: >- - {{ _gl_revert_search.json | default([]) + {{ _gl_revert_search.json | default([], true) | rejectattr('id', 'equalto', _gl_upgrade_commit_sha) | selectattr('message', 'search', _gl_upgrade_commit_sha) | list | length > 0 }} diff --git a/rollback/roles/rollback_k8s/tasks/load_rollback_status.yml b/rollback/roles/rollback_k8s/tasks/load_rollback_status.yml index cbe19fb7b5..d0291d0102 100644 --- a/rollback/roles/rollback_k8s/tasks/load_rollback_status.yml +++ b/rollback/roles/rollback_k8s/tasks/load_rollback_status.yml @@ -133,7 +133,7 @@ | combine({ 'steps': ( rollback_status.nodes[item].steps - | default({}) + | default({}, true) | combine({ 'crio_restart': ( rollback_status.nodes[item].steps.crio_restart diff --git a/rollback/roles/rollback_slurm/tasks/check_slurm_cluster.yml b/rollback/roles/rollback_slurm/tasks/check_slurm_cluster.yml index 6d2f5c6db6..1003099f4f 100644 --- a/rollback/roles/rollback_slurm/tasks/check_slurm_cluster.yml +++ b/rollback/roles/rollback_slurm/tasks/check_slurm_cluster.yml @@ -27,7 +27,7 @@ state: absent when: - slurm_ctld_host is defined - - slurm_ctld_host not in (hostvars['localhost']['slurm_previously_rebooted'] | default([])) + - slurm_ctld_host not in (hostvars['localhost']['slurm_previously_rebooted'] | default([], true)) - name: Check for running jobs on slurm cluster ansible.builtin.shell: diff --git a/rollback/rollback.yml b/rollback/rollback.yml index 20ba43b426..ef7da98d57 100644 --- a/rollback/rollback.yml +++ b/rollback/rollback.yml @@ -336,7 +336,7 @@ # ── Identify tags skipped by BuildStream terminal gate ── - name: Identify BuildStream-skipped components (rollback) ansible.builtin.set_fact: - bs_skipped_tags: "{{ bs_rollback_skipped | default([]) }}" + bs_skipped_tags: "{{ bs_rollback_skipped | default([], true) }}" - name: Resolve requested tags for finalize ansible.builtin.set_fact: diff --git a/upgrade/playbooks/upgrade_k8s.yml b/upgrade/playbooks/upgrade_k8s.yml index 7aa8a962ad..d5cb163dda 100644 --- a/upgrade/playbooks/upgrade_k8s.yml +++ b/upgrade/playbooks/upgrade_k8s.yml @@ -1421,7 +1421,7 @@ multi_hop: current_hop: "{{ upgrade_status.multi_hop.current_hop | default(0) }}" hops: >- - {{ (upgrade_status.multi_hop.hops | default([])) + {{ (upgrade_status.multi_hop.hops | default([], true)) | map('combine', {'status': 'completed', 'completed_at': now(utc=true).strftime('%Y-%m-%dT%H:%M:%SZ')}) | list }} diff --git a/upgrade/playbooks/upgrade_slurm.yml b/upgrade/playbooks/upgrade_slurm.yml index 21f6dc0963..05ab2b069a 100644 --- a/upgrade/playbooks/upgrade_slurm.yml +++ b/upgrade/playbooks/upgrade_slurm.yml @@ -111,7 +111,7 @@ - name: Set previously successful reboot list ansible.builtin.set_fact: - slurm_previously_rebooted: "{{ _reboot_state.successfully_rebooted | default([]) }}" + slurm_previously_rebooted: "{{ _reboot_state.successfully_rebooted | default([], true) }}" when: _reboot_state_stat.stat.exists | default(false) - name: Initialize previously rebooted list (no prior state) @@ -210,7 +210,7 @@ - name: End play if not slurm nodes in pxe_mapping ansible.builtin.meta: end_play - when: slurm_host_group_map | default({}) | length == 0 + when: slurm_host_group_map | default({}, true) | length == 0 - name: SLURM UPGRADE WARNING ansible.builtin.pause: @@ -310,7 +310,7 @@ when: ssh_check is failed - name: Skip reboot — node successfully rebooted in previous run - when: inventory_hostname in (hostvars['localhost']['slurm_previously_rebooted'] | default([])) + when: inventory_hostname in (hostvars['localhost']['slurm_previously_rebooted'] | default([], true)) block: - name: Mark node as previously completed ansible.builtin.set_fact: diff --git a/upgrade/roles/import_input_parameters/tasks/restore_input_files.yml b/upgrade/roles/import_input_parameters/tasks/restore_input_files.yml index 3dd6d45206..8ece1546e8 100644 --- a/upgrade/roles/import_input_parameters/tasks/restore_input_files.yml +++ b/upgrade/roles/import_input_parameters/tasks/restore_input_files.yml @@ -15,7 +15,7 @@ - name: Validate restore_input_files is defined ansible.builtin.set_fact: - restore_input_files_effective: "{{ restore_input_files | default([]) }}" + restore_input_files_effective: "{{ restore_input_files | default([], true) }}" - name: Restore input files from backup (overwrite target) ansible.builtin.include_tasks: restore_single_input_file.yml diff --git a/upgrade/roles/import_input_parameters/tasks/restore_omnia_config_credentials.yml b/upgrade/roles/import_input_parameters/tasks/restore_omnia_config_credentials.yml index 5bf72a106d..089011e96f 100644 --- a/upgrade/roles/import_input_parameters/tasks/restore_omnia_config_credentials.yml +++ b/upgrade/roles/import_input_parameters/tasks/restore_omnia_config_credentials.yml @@ -195,7 +195,7 @@ not backup_omnia_config_credentials_key_stat.stat.exists and (backup_omnia_config_credentials_content.stdout is not defined or '$ANSIBLE_VAULT;' not in backup_omnia_config_credentials_content.stdout) and - "'INFO: Both omnia_config_credentials.yml and .omnia_config_credentials_key' not in (upgrade_warnings | join(' '))" + 'INFO: Both omnia_config_credentials.yml and .omnia_config_credentials_key' not in (upgrade_warnings | join(' ')) ansible.builtin.set_fact: upgrade_warnings: > {{ upgrade_warnings + [msg_omnia_config_credentials_info_missing] }} diff --git a/upgrade/roles/import_input_parameters/tasks/restore_user_registry_credential.yml b/upgrade/roles/import_input_parameters/tasks/restore_user_registry_credential.yml index 16e31a4d63..e5d303a38b 100644 --- a/upgrade/roles/import_input_parameters/tasks/restore_user_registry_credential.yml +++ b/upgrade/roles/import_input_parameters/tasks/restore_user_registry_credential.yml @@ -84,7 +84,7 @@ not backup_local_repo_credentials_key_stat.stat.exists and (backup_user_registry_content.stdout is not defined or '$ANSIBLE_VAULT;' not in backup_user_registry_content.stdout) and - "'INFO: Both user_registry_credential.yml and .local_repo_credentials_key' not in (upgrade_warnings | join(' '))" + 'INFO: Both user_registry_credential.yml and .local_repo_credentials_key' not in (upgrade_warnings | join(' ')) ansible.builtin.set_fact: upgrade_warnings: >- {{ upgrade_warnings + [ diff --git a/upgrade/roles/import_input_parameters/tasks/transform_build_stream_config.yml b/upgrade/roles/import_input_parameters/tasks/transform_build_stream_config.yml index 9eca462839..58fdcf1d5e 100644 --- a/upgrade/roles/import_input_parameters/tasks/transform_build_stream_config.yml +++ b/upgrade/roles/import_input_parameters/tasks/transform_build_stream_config.yml @@ -64,7 +64,7 @@ - name: Validate build_stream_host_ip format if provided ansible.builtin.assert: that: - - build_stream_host_ip == "" or build_stream_host_ip | ansible.utils.ipaddr + - build_stream_host_ip == "" or (build_stream_host_ip | ansible.utils.ipaddr | bool) fail_msg: "build_stream_host_ip '{{ build_stream_host_ip }}' is not a valid IP address" success_msg: "build_stream_host_ip is valid" when: build_stream_host_ip != "" @@ -72,7 +72,7 @@ - name: Validate build_stream_aarch64_ip format if provided ansible.builtin.assert: that: - - build_stream_aarch64_ip == "" or build_stream_aarch64_ip | ansible.utils.ipaddr + - build_stream_aarch64_ip == "" or (build_stream_aarch64_ip | ansible.utils.ipaddr | bool) fail_msg: "build_stream_aarch64_ip '{{ build_stream_aarch64_ip }}' is not a valid IP address" success_msg: "build_stream_aarch64_ip is valid" when: build_stream_aarch64_ip != "" diff --git a/upgrade/roles/import_input_parameters/tasks/transform_gitlab_config.yml b/upgrade/roles/import_input_parameters/tasks/transform_gitlab_config.yml index 90a04368e1..609f3c0509 100644 --- a/upgrade/roles/import_input_parameters/tasks/transform_gitlab_config.yml +++ b/upgrade/roles/import_input_parameters/tasks/transform_gitlab_config.yml @@ -62,7 +62,7 @@ - name: Validate gitlab_host IP format if provided ansible.builtin.assert: that: - - gitlab_host == "" or gitlab_host | ansible.utils.ipaddr + - gitlab_host == "" or (gitlab_host | ansible.utils.ipaddr | bool) fail_msg: "gitlab_host '{{ gitlab_host }}' is not a valid IP address" success_msg: "gitlab_host is valid" when: gitlab_host != "" @@ -85,7 +85,7 @@ - name: Validate gitlab_default_branch format ansible.builtin.assert: that: - - gitlab_default_branch | regex_search('^[a-zA-Z0-9/_-]+$') + - gitlab_default_branch | regex_search('^[a-zA-Z0-9/_-]+$') is not none fail_msg: "gitlab_default_branch '{{ gitlab_default_branch }}' contains invalid characters" success_msg: "gitlab_default_branch is valid" diff --git a/upgrade/roles/import_input_parameters/tasks/transform_high_availability_config.yml b/upgrade/roles/import_input_parameters/tasks/transform_high_availability_config.yml index 192dfef630..b9a276f5b4 100644 --- a/upgrade/roles/import_input_parameters/tasks/transform_high_availability_config.yml +++ b/upgrade/roles/import_input_parameters/tasks/transform_high_availability_config.yml @@ -49,7 +49,7 @@ ( [backup_ha_config.service_k8s_cluster_ha] if (backup_ha_config.service_k8s_cluster_ha is mapping) - else (backup_ha_config.service_k8s_cluster_ha | default([])) + else (backup_ha_config.service_k8s_cluster_ha | default([], true)) ) }} @@ -57,7 +57,7 @@ ansible.builtin.set_fact: ha_entries_missing_vip: >- {{ - (ha_service_k8s_cluster_ha | default([])) + (ha_service_k8s_cluster_ha | default([], true)) | select('mapping') | selectattr('virtual_ip_address', 'undefined') | map(attribute='cluster_name') @@ -68,7 +68,7 @@ ansible.builtin.set_fact: ha_entries_empty_vip: >- {{ - (ha_service_k8s_cluster_ha | default([])) + (ha_service_k8s_cluster_ha | default([], true)) | select('mapping') | selectattr('virtual_ip_address', 'defined') | selectattr('virtual_ip_address', 'match', '^\\s*$') @@ -80,9 +80,9 @@ ansible.builtin.fail: msg: "{{ msg_ha_virtual_ip_missing }}" when: - - (ha_service_k8s_cluster_ha | default([]) | length) == 0 - or ((ha_entries_missing_vip | default([]) | length) > 0) - or ((ha_entries_empty_vip | default([]) | length) > 0) + - (ha_service_k8s_cluster_ha | default([], true) | length) == 0 + or ((ha_entries_missing_vip | default([], true) | length) > 0) + or ((ha_entries_empty_vip | default([], true) | length) > 0) - name: Write high_availability_config.yml in Omnia 2.2 format ansible.builtin.template: diff --git a/upgrade/roles/import_input_parameters/tasks/transform_local_repo_config.yml b/upgrade/roles/import_input_parameters/tasks/transform_local_repo_config.yml index d220caf0ad..7abc073a01 100644 --- a/upgrade/roles/import_input_parameters/tasks/transform_local_repo_config.yml +++ b/upgrade/roles/import_input_parameters/tasks/transform_local_repo_config.yml @@ -52,7 +52,7 @@ else ( ( - (backup_local_repo_config.omnia_registry | default([])) + (backup_local_repo_config.omnia_registry | default([], true)) | select('string') | map('regex_replace', '^(.*)$', '{"host": "\\1", "cert_path": "", "key_path": ""}') | map('from_json') @@ -67,37 +67,37 @@ local_repo_user_repo_url_x86_64: "{{ backup_local_repo_config.user_repo_url_x86_64 | default(backup_local_repo_config.user_repo | - default([])) + default([], true), true) }}" - local_repo_user_repo_url_aarch64: "{{ backup_local_repo_config.user_repo_url_aarch64 | default([]) }}" + local_repo_user_repo_url_aarch64: "{{ backup_local_repo_config.user_repo_url_aarch64 | default([], true) }}" local_repo_rhel_os_url_x86_64: "{{ backup_local_repo_config.rhel_os_url_x86_64 | default(backup_local_repo_config.rhel_os_url | - default([])) + default([], true), true) }}" - local_repo_rhel_os_url_aarch64: "{{ backup_local_repo_config.rhel_os_url_aarch64 | default([]) }}" + local_repo_rhel_os_url_aarch64: "{{ backup_local_repo_config.rhel_os_url_aarch64 | default([], true) }}" local_repo_omnia_repo_url_rhel_x86_64: "{{ backup_local_repo_config.omnia_repo_url_rhel_x86_64 | default(backup_local_repo_config.omnia_repo_url_rhel | - default([])) + default([], true), true) }}" local_repo_omnia_repo_url_rhel_aarch64: "{{ backup_local_repo_config.omnia_repo_url_rhel_aarch64 | default(backup_local_repo_config.omnia_repo_url_rhel | - default([])) + default([], true), true) }}" local_repo_additional_repos_x86_64: "{{ backup_local_repo_config.additional_repos_x86_64 | default(backup_local_repo_config.additional_repos | - default([])) + default([], true), true) }}" - local_repo_additional_repos_aarch64: "{{ backup_local_repo_config.additional_repos_aarch64 | default([]) }}" + local_repo_additional_repos_aarch64: "{{ backup_local_repo_config.additional_repos_aarch64 | default([], true) }}" - name: Strip x86_64_ prefix from user_repo_url_x86_64 names ansible.builtin.set_fact: local_repo_user_repo_url_x86_64: >- {%- set result = [] -%} - {%- for repo in (local_repo_user_repo_url_x86_64 | default([])) -%} + {%- for repo in (local_repo_user_repo_url_x86_64 | default([], true)) -%} {%- set clean_name = repo.name | default('') | regex_replace('^x86_64_', '') -%} {%- set _ = result.append(repo | combine({'name': clean_name})) -%} {%- endfor -%} @@ -107,7 +107,7 @@ ansible.builtin.set_fact: local_repo_user_repo_url_aarch64: >- {%- set result = [] -%} - {%- for repo in (local_repo_user_repo_url_aarch64 | default([])) -%} + {%- for repo in (local_repo_user_repo_url_aarch64 | default([], true)) -%} {%- set clean_name = repo.name | default('') | regex_replace('^aarch64_', '') -%} {%- set _ = result.append(repo | combine({'name': clean_name})) -%} {%- endfor -%} @@ -117,7 +117,7 @@ ansible.builtin.set_fact: local_repo_rhel_os_url_x86_64: >- {%- set result = [] -%} - {%- for repo in (local_repo_rhel_os_url_x86_64 | default([])) -%} + {%- for repo in (local_repo_rhel_os_url_x86_64 | default([], true)) -%} {%- set clean_name = repo.name | default('') | regex_replace('^x86_64_', '') -%} {%- set _ = result.append(repo | combine({'name': clean_name})) -%} {%- endfor -%} @@ -127,7 +127,7 @@ ansible.builtin.set_fact: local_repo_rhel_os_url_aarch64: >- {%- set result = [] -%} - {%- for repo in (local_repo_rhel_os_url_aarch64 | default([])) -%} + {%- for repo in (local_repo_rhel_os_url_aarch64 | default([], true)) -%} {%- set clean_name = repo.name | default('') | regex_replace('^aarch64_', '') -%} {%- set _ = result.append(repo | combine({'name': clean_name})) -%} {%- endfor -%} @@ -137,7 +137,7 @@ ansible.builtin.set_fact: local_repo_additional_repos_x86_64: >- {%- set result = [] -%} - {%- for repo in (local_repo_additional_repos_x86_64 | default([])) -%} + {%- for repo in (local_repo_additional_repos_x86_64 | default([], true)) -%} {%- set clean_name = repo.name | default('') | regex_replace('^x86_64_', '') -%} {%- set _ = result.append(repo | combine({'name': clean_name})) -%} {%- endfor -%} @@ -147,7 +147,7 @@ ansible.builtin.set_fact: local_repo_additional_repos_aarch64: >- {%- set result = [] -%} - {%- for repo in (local_repo_additional_repos_aarch64 | default([])) -%} + {%- for repo in (local_repo_additional_repos_aarch64 | default([], true)) -%} {%- set clean_name = repo.name | default('') | regex_replace('^aarch64_', '') -%} {%- set _ = result.append(repo | combine({'name': clean_name})) -%} {%- endfor -%} @@ -156,12 +156,12 @@ - name: Fail if omnia_repo_url_rhel_x86_64 is missing ansible.builtin.fail: msg: "{{ msg_omnia_repo_url_rhel_x86_64_missing }}" - when: (local_repo_omnia_repo_url_rhel_x86_64 | default([]) | length) == 0 + when: (local_repo_omnia_repo_url_rhel_x86_64 | default([], true) | length) == 0 - name: Fail if omnia_repo_url_rhel_aarch64 is missing ansible.builtin.fail: msg: "{{ msg_omnia_repo_url_rhel_aarch64_missing }}" - when: (local_repo_omnia_repo_url_rhel_aarch64 | default([]) | length) == 0 + when: (local_repo_omnia_repo_url_rhel_aarch64 | default([], true) | length) == 0 - name: Write local_repo_config.yml in Omnia 2.2 format ansible.builtin.template: diff --git a/upgrade/roles/import_input_parameters/tasks/transform_network_spec.yml b/upgrade/roles/import_input_parameters/tasks/transform_network_spec.yml index 0867e1c405..8b0dc7d46f 100644 --- a/upgrade/roles/import_input_parameters/tasks/transform_network_spec.yml +++ b/upgrade/roles/import_input_parameters/tasks/transform_network_spec.yml @@ -50,7 +50,7 @@ if (backup_network_spec is mapping and backup_network_spec.admin_network is defined) else ( - (backup_network_spec.Networks | default([]) + (backup_network_spec.Networks | default([], true) | select('mapping') | selectattr('admin_network', 'defined') | map(attribute='admin_network') @@ -65,7 +65,7 @@ if (backup_network_spec is mapping and backup_network_spec.ib_network is defined) else ( - (backup_network_spec.Networks | default([]) + (backup_network_spec.Networks | default([], true) | select('mapping') | selectattr('ib_network', 'defined') | map(attribute='ib_network') diff --git a/upgrade/roles/import_input_parameters/tasks/transform_omnia_config.yml b/upgrade/roles/import_input_parameters/tasks/transform_omnia_config.yml index 1fa196cc1e..b495b8b6e9 100644 --- a/upgrade/roles/import_input_parameters/tasks/transform_omnia_config.yml +++ b/upgrade/roles/import_input_parameters/tasks/transform_omnia_config.yml @@ -44,8 +44,8 @@ - name: Normalize omnia_config.yml values ansible.builtin.set_fact: - omnia_slurm_cluster_raw: "{{ backup_omnia_config.slurm_cluster | default([]) }}" - omnia_service_k8s_cluster_raw: "{{ backup_omnia_config.service_k8s_cluster | default([]) }}" + omnia_slurm_cluster_raw: "{{ backup_omnia_config.slurm_cluster | default([], true) }}" + omnia_service_k8s_cluster_raw: "{{ backup_omnia_config.service_k8s_cluster | default([], true) }}" - name: Ensure slurm_cluster and service_k8s_cluster are lists ansible.builtin.set_fact: @@ -65,12 +65,12 @@ - name: Fail if slurm_cluster is missing ansible.builtin.fail: msg: "{{ msg_slurm_cluster_missing }}" - when: (omnia_slurm_cluster | default([]) | length) == 0 + when: (omnia_slurm_cluster | default([], true) | length) == 0 - name: Fail if service_k8s_cluster is missing ansible.builtin.fail: msg: "{{ msg_service_k8s_cluster_missing }}" - when: (omnia_service_k8s_cluster | default([]) | length) == 0 + when: (omnia_service_k8s_cluster | default([], true) | length) == 0 - name: Write omnia_config.yml in Omnia 2.2 format ansible.builtin.template: diff --git a/upgrade/roles/import_input_parameters/tasks/transform_pxe_mapping_file.yml b/upgrade/roles/import_input_parameters/tasks/transform_pxe_mapping_file.yml index c23d8b0692..a467e62aa6 100644 --- a/upgrade/roles/import_input_parameters/tasks/transform_pxe_mapping_file.yml +++ b/upgrade/roles/import_input_parameters/tasks/transform_pxe_mapping_file.yml @@ -47,7 +47,7 @@ - name: Fail if no valid rows found in pxe_mapping_file ansible.builtin.fail: msg: "{{ msg_pxe_mapping_file_empty }}" - when: (pxe_mapping_rows | default([]) | length) == 0 + when: (pxe_mapping_rows | default([], true) | length) == 0 - name: Write pxe_mapping_file in Omnia 2.2 format with IB fields ansible.builtin.template: diff --git a/upgrade/roles/import_input_parameters/tasks/transform_storage_config.yml b/upgrade/roles/import_input_parameters/tasks/transform_storage_config.yml index a9b12b424e..4097f4d472 100644 --- a/upgrade/roles/import_input_parameters/tasks/transform_storage_config.yml +++ b/upgrade/roles/import_input_parameters/tasks/transform_storage_config.yml @@ -59,13 +59,13 @@ ansible.builtin.set_fact: slurm_nfs_client_params: "{{ (backup_storage_config.nfs_client_params | selectattr('nfs_name', 'equalto', slurm_nfs_storage_name) | first | default({})) }}" k8s_nfs_client_params: "{{ (backup_storage_config.nfs_client_params | selectattr('nfs_name', 'equalto', k8s_nfs_storage_name) | first | default({})) }}" - storage_nfs_client_params: "{{ backup_storage_config.nfs_client_params | default([]) }}" - storage_powervault_config: "{{ backup_storage_config.powervault_config | default({}) }}" + storage_nfs_client_params: "{{ backup_storage_config.nfs_client_params | default([], true) }}" + storage_powervault_config: "{{ backup_storage_config.powervault_config | default({}, true) }}" - name: Fail if nfs_client_params is missing ansible.builtin.fail: msg: "{{ msg_nfs_client_params_missing }}" - when: (storage_nfs_client_params | default([]) | length) == 0 + when: (storage_nfs_client_params | default([], true) | length) == 0 - name: Fail if any NFS client entry is missing required keys ansible.builtin.fail: diff --git a/upgrade/roles/import_input_parameters/tasks/transform_telemetry_config.yml b/upgrade/roles/import_input_parameters/tasks/transform_telemetry_config.yml index c112961faf..d86b476e39 100644 --- a/upgrade/roles/import_input_parameters/tasks/transform_telemetry_config.yml +++ b/upgrade/roles/import_input_parameters/tasks/transform_telemetry_config.yml @@ -46,19 +46,19 @@ ansible.builtin.set_fact: backup_telemetry_victoria_config: >- {{ backup_telemetry_config.victoria_metrics_configurations - | default(backup_telemetry_config.victoria_configurations | default({})) }} + | default(backup_telemetry_config.victoria_configurations | default({}, true)) }} backup_telemetry_kafka_config: >- - {{ backup_telemetry_config.kafka_configurations | default({}) }} + {{ backup_telemetry_config.kafka_configurations | default({}, true) }} backup_telemetry_victoria_logs_config: >- - {{ backup_telemetry_config.victoria_logs_configurations | default({}) }} + {{ backup_telemetry_config.victoria_logs_configurations | default({}, true) }} backup_telemetry_powerscale_config: >- - {{ backup_telemetry_config.powerscale_configurations | default({}) }} + {{ backup_telemetry_config.powerscale_configurations | default({}, true) }} backup_telemetry_sources: >- - {{ backup_telemetry_config.telemetry_sources | default({}) }} + {{ backup_telemetry_config.telemetry_sources | default({}, true) }} backup_telemetry_sinks: >- - {{ backup_telemetry_config.telemetry_sinks | default({}) }} + {{ backup_telemetry_config.telemetry_sinks | default({}, true) }} backup_telemetry_ldms_config: >- - {{ backup_telemetry_config.ldms_configurations | default({}) }} + {{ backup_telemetry_config.ldms_configurations | default({}, true) }} - name: Extract iDRAC telemetry support from backup (2.1 or 2.2 format) ansible.builtin.set_fact: @@ -66,7 +66,7 @@ {{ backup_telemetry_config.idrac_telemetry_support | default( - (backup_telemetry_sources.idrac | default({})).metrics_enabled + (backup_telemetry_sources.idrac | default({}, true)).metrics_enabled | default(telemetry_default_idrac_support) ) }} @@ -84,7 +84,7 @@ ansible.builtin.set_fact: telemetry_idrac_collection_targets: >- {{ - (backup_telemetry_sources.idrac | default({})).collection_targets + (backup_telemetry_sources.idrac | default({}, true)).collection_targets | default( telemetry_telemetry_collection_type.split(',') | map('trim') @@ -96,47 +96,47 @@ - name: Normalize VictoriaMetrics sink values from backup ansible.builtin.set_fact: telemetry_victoria_persistence_size: >- - {{ (backup_telemetry_sinks.victoria_metrics | default({})).persistence_size + {{ (backup_telemetry_sinks.victoria_metrics | default({}, true)).persistence_size | default(backup_telemetry_victoria_config.persistence_size | default(telemetry_default_victoria_persistence_size)) }} telemetry_victoria_retention_period: >- - {{ (backup_telemetry_sinks.victoria_metrics | default({})).retention_period + {{ (backup_telemetry_sinks.victoria_metrics | default({}, true)).retention_period | default(backup_telemetry_victoria_config.retention_period | default(telemetry_default_victoria_retention_period)) }} telemetry_additional_metric_remote_write_endpoints: >- - {{ (backup_telemetry_sinks.victoria_metrics | default({})).additional_metric_remote_write_endpoints - | default([]) }} + {{ (backup_telemetry_sinks.victoria_metrics | default({}, true)).additional_metric_remote_write_endpoints + | default([], true) }} - name: Normalize VictoriaLogs sink values from backup ansible.builtin.set_fact: telemetry_victoria_logs_storage_size: >- - {{ (backup_telemetry_sinks.victoria_logs | default({})).storage_size + {{ (backup_telemetry_sinks.victoria_logs | default({}, true)).storage_size | default(backup_telemetry_victoria_logs_config.storage_size | default(telemetry_default_victoria_logs_storage_size)) }} telemetry_victoria_logs_retention_period: >- - {{ (backup_telemetry_sinks.victoria_logs | default({})).retention_period + {{ (backup_telemetry_sinks.victoria_logs | default({}, true)).retention_period | default(backup_telemetry_victoria_logs_config.retention_period | default(telemetry_default_victoria_logs_retention_period)) }} telemetry_additional_log_write_endpoints: >- - {{ (backup_telemetry_sinks.victoria_logs | default({})).additional_log_write_endpoints - | default([]) }} + {{ (backup_telemetry_sinks.victoria_logs | default({}, true)).additional_log_write_endpoints + | default([], true) }} - name: Normalize Kafka sink values from backup ansible.builtin.set_fact: telemetry_kafka_persistence_size: >- - {{ (backup_telemetry_sinks.kafka | default({})).persistence_size + {{ (backup_telemetry_sinks.kafka | default({}, true)).persistence_size | default(backup_telemetry_kafka_config.persistence_size | default(telemetry_default_kafka_persistence_size)) }} telemetry_kafka_log_retention_hours: >- - {{ (backup_telemetry_sinks.kafka | default({})).log_retention_hours + {{ (backup_telemetry_sinks.kafka | default({}, true)).log_retention_hours | default(backup_telemetry_kafka_config.log_retention_hours | default(telemetry_default_kafka_log_retention_hours)) }} telemetry_kafka_log_retention_bytes: >- - {{ (backup_telemetry_sinks.kafka | default({})).log_retention_bytes + {{ (backup_telemetry_sinks.kafka | default({}, true)).log_retention_bytes | default(backup_telemetry_kafka_config.log_retention_bytes | default(telemetry_default_kafka_log_retention_bytes)) }} telemetry_kafka_log_segment_bytes: >- - {{ (backup_telemetry_sinks.kafka | default({})).log_segment_bytes + {{ (backup_telemetry_sinks.kafka | default({}, true)).log_segment_bytes | default(backup_telemetry_kafka_config.log_segment_bytes | default(telemetry_default_kafka_log_segment_bytes)) }} @@ -144,7 +144,7 @@ ansible.builtin.set_fact: telemetry_kafka_topic_partitions_raw: >- {{ - (backup_telemetry_sinks.kafka | default({})).topic_partitions + (backup_telemetry_sinks.kafka | default({}, true)).topic_partitions | default(backup_telemetry_kafka_config.topic_partitions | default(telemetry_default_kafka_topic_partitions)) }} @@ -204,7 +204,7 @@ ansible.builtin.set_fact: ldms_present_in_software_config: >- {{ - (backup_software_config.softwares | default([])) + (backup_software_config.softwares | default([], true)) | selectattr('name', 'defined') | selectattr('name', 'equalto', 'ldms') | list @@ -220,46 +220,46 @@ - name: Normalize LDMS source metrics_enabled from backup ansible.builtin.set_fact: telemetry_ldms_metrics_enabled: >- - {{ (backup_telemetry_sources.ldms | default({})).metrics_enabled + {{ (backup_telemetry_sources.ldms | default({}, true)).metrics_enabled | default(true if ldms_present_in_software_config | bool else telemetry_default_ldms_metrics_enabled) }} - name: Normalize DCGM and PowerScale source values from backup ansible.builtin.set_fact: telemetry_dcgm_support: >- - {{ (backup_telemetry_sources.dcgm | default({})).metrics_enabled + {{ (backup_telemetry_sources.dcgm | default({}, true)).metrics_enabled | default(backup_telemetry_config.dcgm_support | default(telemetry_default_dcgm_support)) }} telemetry_powerscale_metrics_enabled: >- - {{ (backup_telemetry_sources.powerscale | default({})).metrics_enabled + {{ (backup_telemetry_sources.powerscale | default({}, true)).metrics_enabled | default(backup_telemetry_powerscale_config.powerscale_telemetry_support | default(telemetry_default_powerscale_support)) }} telemetry_powerscale_logs_enabled: >- - {{ (backup_telemetry_sources.powerscale | default({})).logs_enabled + {{ (backup_telemetry_sources.powerscale | default({}, true)).logs_enabled | default(backup_telemetry_powerscale_config.powerscale_log_enabled | default(telemetry_default_powerscale_log_enabled)) }} - name: Normalize OME source values from backup (new in 2.2) ansible.builtin.set_fact: telemetry_ome_metrics_enabled: >- - {{ (backup_telemetry_sources.ome | default({})).metrics_enabled + {{ (backup_telemetry_sources.ome | default({}, true)).metrics_enabled | default(telemetry_default_ome_metrics_enabled) }} telemetry_ome_logs_enabled: >- - {{ (backup_telemetry_sources.ome | default({})).logs_enabled + {{ (backup_telemetry_sources.ome | default({}, true)).logs_enabled | default(telemetry_default_ome_logs_enabled) }} - name: Normalize UFM source values from backup (new in 2.2) ansible.builtin.set_fact: telemetry_ufm_metrics_enabled: >- - {{ (backup_telemetry_sources.ufm | default({})).metrics_enabled + {{ (backup_telemetry_sources.ufm | default({}, true)).metrics_enabled | default(telemetry_default_ufm_metrics_enabled) }} telemetry_ufm_logs_enabled: >- - {{ (backup_telemetry_sources.ufm | default({})).logs_enabled + {{ (backup_telemetry_sources.ufm | default({}, true)).logs_enabled | default(telemetry_default_ufm_logs_enabled) }} - name: Normalize UFM configuration values from backup (new in 2.2) ansible.builtin.set_fact: backup_telemetry_ufm_config: >- - {{ backup_telemetry_config.ufm_configuration | default({}) }} + {{ backup_telemetry_config.ufm_configuration | default({}, true) }} - name: Extract UFM configuration fields ansible.builtin.set_fact: @@ -288,16 +288,16 @@ - name: Normalize VAST source values from backup (new in 2.2) ansible.builtin.set_fact: telemetry_vast_metrics_enabled: >- - {{ (backup_telemetry_sources.vast | default({})).metrics_enabled + {{ (backup_telemetry_sources.vast | default({}, true)).metrics_enabled | default(telemetry_default_vast_metrics_enabled) }} telemetry_vast_logs_enabled: >- - {{ (backup_telemetry_sources.vast | default({})).logs_enabled + {{ (backup_telemetry_sources.vast | default({}, true)).logs_enabled | default(telemetry_default_vast_logs_enabled) }} - name: Normalize VAST configuration values from backup (new in 2.2) ansible.builtin.set_fact: backup_telemetry_vast_config: >- - {{ backup_telemetry_config.vast_configuration | default({}) }} + {{ backup_telemetry_config.vast_configuration | default({}, true) }} - name: Extract VAST configuration fields ansible.builtin.set_fact: @@ -329,16 +329,16 @@ - name: Normalize bridge values from backup (2.2 format or defaults) ansible.builtin.set_fact: telemetry_vector_ldms_metrics_enabled: >- - {{ ((backup_telemetry_config.telemetry_bridges | default({})).vector_ldms | default({})).metrics_enabled + {{ ((backup_telemetry_config.telemetry_bridges | default({}, true)).vector_ldms | default({}, true)).metrics_enabled | default(telemetry_default_vector_ldms_metrics_enabled) }} telemetry_vector_ome_metrics_enabled: >- - {{ ((backup_telemetry_config.telemetry_bridges | default({})).vector_ome | default({})).metrics_enabled + {{ ((backup_telemetry_config.telemetry_bridges | default({}, true)).vector_ome | default({}, true)).metrics_enabled | default(telemetry_default_vector_ome_metrics_enabled) }} telemetry_vector_ome_logs_enabled: >- - {{ ((backup_telemetry_config.telemetry_bridges | default({})).vector_ome | default({})).logs_enabled + {{ ((backup_telemetry_config.telemetry_bridges | default({}, true)).vector_ome | default({}, true)).logs_enabled | default(telemetry_default_vector_ome_logs_enabled) }} telemetry_vector_ome_identifier: >- - {{ ((backup_telemetry_config.telemetry_bridges | default({})).vector_ome | default({})).ome_identifier + {{ ((backup_telemetry_config.telemetry_bridges | default({}, true)).vector_ome | default({}, true)).ome_identifier | default('ome') }} - name: Normalize PowerScale configuration values from backup diff --git a/upgrade/roles/import_input_parameters/templates/local_repo_config.j2 b/upgrade/roles/import_input_parameters/templates/local_repo_config.j2 index 1f4025b432..7daf9b941d 100644 --- a/upgrade/roles/import_input_parameters/templates/local_repo_config.j2 +++ b/upgrade/roles/import_input_parameters/templates/local_repo_config.j2 @@ -162,7 +162,7 @@ # user_registry: # - { host: "172.16.107.254:4000", cert_path: "/opt/omnia/domain.crt", key_path: "/opt/omnia/domain.key" } user_registry: -{% set _user_registry = local_repo_user_registry | default([]) %} +{% set _user_registry = local_repo_user_registry | default([], true) %} {% if (_user_registry | length) > 0 %} {% for _reg in _user_registry %} - { host: {{ (_reg.host | default('')) | to_json }}, cert_path: {{ (_reg.cert_path | default('')) | to_json }}, key_path: {{ (_reg.key_path | default('')) | to_json }} } @@ -171,14 +171,14 @@ user_registry: # user_repo_url_x86_64: # - { url: "", gpgkey: "", sslcacert: "", sslclientkey: "", sslclientcert: "", name: "slurm_custom" } user_repo_url_x86_64: -{% set _user_repo_url_x86_64 = local_repo_user_repo_url_x86_64 | default([]) %} +{% set _user_repo_url_x86_64 = local_repo_user_repo_url_x86_64 | default([], true) %} {% if (_user_repo_url_x86_64 | length) > 0 %} {% for _repo in _user_repo_url_x86_64 %} - { url: {{ (_repo.url | default('')) | to_json }}, gpgkey: {{ (_repo.gpgkey | default('')) | to_json }}, sslcacert: {{ (_repo.sslcacert | default('')) | to_json }}, sslclientkey: {{ (_repo.sslclientkey | default('')) | to_json }}, sslclientcert: {{ (_repo.sslclientcert | default('')) | to_json }}, name: {{ (_repo.name | default('')) | to_json }} } {% endfor %} {% endif %} user_repo_url_aarch64: -{% set _user_repo_url_aarch64 = local_repo_user_repo_url_aarch64 | default([]) %} +{% set _user_repo_url_aarch64 = local_repo_user_repo_url_aarch64 | default([], true) %} {% if (_user_repo_url_aarch64 | length) > 0 %} {% for _repo in _user_repo_url_aarch64 %} - { url: {{ (_repo.url | default('')) | to_json }}, gpgkey: {{ (_repo.gpgkey | default('')) | to_json }}, sslcacert: {{ (_repo.sslcacert | default('')) | to_json }}, sslclientkey: {{ (_repo.sslclientkey | default('')) | to_json }}, sslclientcert: {{ (_repo.sslclientcert | default('')) | to_json }}, name: {{ (_repo.name | default('')) | to_json }} } @@ -190,14 +190,14 @@ user_repo_url_aarch64: # - { url: "http://BaseOS.com/BaseOS/x86_64/os/", gpgkey: "http://BaseOS.com/BaseOS/x86_64/os/RPM-GPG-KEY", sslcacert: "", sslclientkey: "", sslclientcert: "", name: "baseos"} # - { url: "http://AppStream.com/AppStream/x86_64/os/", gpgkey: "http://AppStream.com/AppStream/x86_64/os/RPM-GPG-KEY", sslcacert: "", sslclientkey: "", sslclientcert: "", name: "appstream" } rhel_os_url_x86_64: -{% set _rhel_os_url_x86_64 = local_repo_rhel_os_url_x86_64 | default([]) %} +{% set _rhel_os_url_x86_64 = local_repo_rhel_os_url_x86_64 | default([], true) %} {% if (_rhel_os_url_x86_64 | length) > 0 %} {% for _repo in _rhel_os_url_x86_64 %} - { url: {{ (_repo.url | default('')) | to_json }}, gpgkey: {{ (_repo.gpgkey | default('')) | to_json }}, sslcacert: {{ (_repo.sslcacert | default('')) | to_json }}, sslclientkey: {{ (_repo.sslclientkey | default('')) | to_json }}, sslclientcert: {{ (_repo.sslclientcert | default('')) | to_json }}, name: {{ (_repo.name | default('')) | to_json }} } {% endfor %} {% endif %} rhel_os_url_aarch64: -{% set _rhel_os_url_aarch64 = local_repo_rhel_os_url_aarch64 | default([]) %} +{% set _rhel_os_url_aarch64 = local_repo_rhel_os_url_aarch64 | default([], true) %} {% if (_rhel_os_url_aarch64 | length) > 0 %} {% for _repo in _rhel_os_url_aarch64 %} - { url: {{ (_repo.url | default('')) | to_json }}, gpgkey: {{ (_repo.gpgkey | default('')) | to_json }}, sslcacert: {{ (_repo.sslcacert | default('')) | to_json }}, sslclientkey: {{ (_repo.sslclientkey | default('')) | to_json }}, sslclientcert: {{ (_repo.sslclientcert | default('')) | to_json }}, name: {{ (_repo.name | default('')) | to_json }} } @@ -227,14 +227,14 @@ omnia_repo_url_rhel_aarch64: # - { url: "https://rpm.grafana.com/", gpgkey: "", name: "grafana" } # - { url: "https://repo.example.com/x86_64/", gpgkey: "", name: "custom-repo", sslcacert: "/path/ca.crt", sslclientkey: "/path/client.key", sslclientcert: "/path/client.crt" } additional_repos_x86_64: -{% set _additional_repos_x86_64 = local_repo_additional_repos_x86_64 | default([]) %} +{% set _additional_repos_x86_64 = local_repo_additional_repos_x86_64 | default([], true) %} {% if (_additional_repos_x86_64 | length) > 0 %} {% for _repo in _additional_repos_x86_64 %} - { url: {{ (_repo.url | default('')) | to_json }}, gpgkey: {{ (_repo.gpgkey | default('')) | to_json }}, name: {{ (_repo.name | default('')) | to_json }}, sslcacert: {{ (_repo.sslcacert | default('')) | to_json }}, sslclientkey: {{ (_repo.sslclientkey | default('')) | to_json }}, sslclientcert: {{ (_repo.sslclientcert | default('')) | to_json }} } {% endfor %} {% endif %} additional_repos_aarch64: -{% set _additional_repos_aarch64 = local_repo_additional_repos_aarch64 | default([]) %} +{% set _additional_repos_aarch64 = local_repo_additional_repos_aarch64 | default([], true) %} {% if (_additional_repos_aarch64 | length) > 0 %} {% for _repo in _additional_repos_aarch64 %} - { url: {{ (_repo.url | default('')) | to_json }}, gpgkey: {{ (_repo.gpgkey | default('')) | to_json }}, name: {{ (_repo.name | default('')) | to_json }}, sslcacert: {{ (_repo.sslcacert | default('')) | to_json }}, sslclientkey: {{ (_repo.sslclientkey | default('')) | to_json }}, sslclientcert: {{ (_repo.sslclientcert | default('')) | to_json }} } diff --git a/upgrade/roles/import_input_parameters/templates/network_spec.j2 b/upgrade/roles/import_input_parameters/templates/network_spec.j2 index 864fd39014..b46c12f90b 100644 --- a/upgrade/roles/import_input_parameters/templates/network_spec.j2 +++ b/upgrade/roles/import_input_parameters/templates/network_spec.j2 @@ -70,11 +70,11 @@ Networks: primary_oim_admin_ip: "{{ admin_network.primary_oim_admin_ip | default('') }}" primary_oim_bmc_ip: "{{ admin_network.primary_oim_bmc_ip | default('') }}" dynamic_range: "{{ admin_network.dynamic_range | default('') }}" - dns: {{ admin_network.dns | default([]) }} - ntp_servers: {{ admin_network.ntp_servers | default([]) }} - additional_subnets: {{ admin_network.additional_subnets | default([]) }} + dns: {{ admin_network.dns | default([], true) }} + ntp_servers: {{ admin_network.ntp_servers | default([], true) }} + additional_subnets: {{ admin_network.additional_subnets | default([], true) }} - ib_network: subnet: "{{ ib_network.subnet | default('') }}" netmask_bits: "{{ ib_network.netmask_bits | default(admin_network_netmask_bits | default(network_default_netmask_bits)) }}" - dns: {{ ib_network.dns | default([]) }} + dns: {{ ib_network.dns | default([], true) }} diff --git a/upgrade/roles/import_input_parameters/templates/omnia_config.j2 b/upgrade/roles/import_input_parameters/templates/omnia_config.j2 index 91ba9622a1..0c86f50913 100644 --- a/upgrade/roles/import_input_parameters/templates/omnia_config.j2 +++ b/upgrade/roles/import_input_parameters/templates/omnia_config.j2 @@ -118,7 +118,7 @@ # Thes files will be written into the slurm_config directory with .conf suffix slurm_cluster: -{% set _slurm_cluster = omnia_slurm_cluster | default([]) %} +{% set _slurm_cluster = omnia_slurm_cluster | default([], true) %} {% if (_slurm_cluster | length) > 0 %} {% for _cluster in _slurm_cluster %} - cluster_name: {{ _cluster.cluster_name | default('') }} @@ -242,7 +242,7 @@ slurm_cluster: service_k8s_cluster: -{% set _service_k8s_cluster = omnia_service_k8s_cluster | default([]) %} +{% set _service_k8s_cluster = omnia_service_k8s_cluster | default([], true) %} {% if (_service_k8s_cluster | length) > 0 %} {% for _cluster in _service_k8s_cluster %} - cluster_name: {{ _cluster.cluster_name | default('') }} diff --git a/upgrade/roles/import_input_parameters/templates/storage_config.j2 b/upgrade/roles/import_input_parameters/templates/storage_config.j2 index 2220ffd560..1a0a0c617d 100644 --- a/upgrade/roles/import_input_parameters/templates/storage_config.j2 +++ b/upgrade/roles/import_input_parameters/templates/storage_config.j2 @@ -189,12 +189,12 @@ mount_params: # # This mounts the whole powervault volume with to /mnt/slurm # # followed by bind creation of dir under /mnt/slurm # # node_key is the key in cloud-init so that its unique per host -{% set pv = storage_powervault_config | default({}) %} +{% set pv = storage_powervault_config | default({}, true) %} {% if pv %} powervault_config: - name: powervault_slurm_ctld ip: - {% for _ip in pv.ip | default([]) %} + {% for _ip in pv.ip | default([], true) %} - {{ _ip }} {% endfor %} port: {{ pv.port | default('') }} diff --git a/upgrade/roles/import_input_parameters/templates/telemetry_config.j2 b/upgrade/roles/import_input_parameters/templates/telemetry_config.j2 index 80971b74ff..132bf50872 100644 --- a/upgrade/roles/import_input_parameters/templates/telemetry_config.j2 +++ b/upgrade/roles/import_input_parameters/templates/telemetry_config.j2 @@ -295,7 +295,7 @@ telemetry_sinks: # additional_metric_remote_write_endpoints: # - url: https://external-metrics-server:8480/insert/0/prometheus/api/v1/write # tls_insecure_skip_verify: false - additional_metric_remote_write_endpoints: {{ telemetry_additional_metric_remote_write_endpoints | default([]) | to_json }} + additional_metric_remote_write_endpoints: {{ telemetry_additional_metric_remote_write_endpoints | default([], true) | to_json }} # -------------------------------------------------------------------------- # victoria_logs — Centralized log storage and querying @@ -322,7 +322,7 @@ telemetry_sinks: # additional_log_write_endpoints: # - url: https://external-logs-server:9481/internal/insert # tls_insecure_skip_verify: false - additional_log_write_endpoints: {{ telemetry_additional_log_write_endpoints | default([]) | to_json }} + additional_log_write_endpoints: {{ telemetry_additional_log_write_endpoints | default([], true) | to_json }} # -------------------------------------------------------------------------- # Kafka — Distributed streaming platform @@ -391,7 +391,7 @@ ldms_configurations: {% if telemetry_ldms_sampler_configurations is none %} null {% else %} -{% for _plugin in (telemetry_ldms_sampler_configurations | default([])) %} +{% for _plugin in (telemetry_ldms_sampler_configurations | default([], true)) %} - plugin_name: {{ _plugin.plugin_name | default('') }} config_parameters: {{ _plugin.config_parameters | default('') | to_json }} activation_parameters: {{ _plugin.activation_parameters | default('interval=30000000') | to_json }} diff --git a/upgrade/roles/prep_local_repo/tasks/create_staging.yml b/upgrade/roles/prep_local_repo/tasks/create_staging.yml index 8ef5856b26..581452dd9e 100644 --- a/upgrade/roles/prep_local_repo/tasks/create_staging.yml +++ b/upgrade/roles/prep_local_repo/tasks/create_staging.yml @@ -45,7 +45,7 @@ current_software_config: "{{ current_software_config }}" architectures: "{{ upgrade_active_architectures }}" target_omnia_version: "{{ upgrade_target_version }}" - calculated_hop_chains: "{{ calculated_hop_chains | default([]) }}" + calculated_hop_chains: "{{ calculated_hop_chains | default([], true) }}" register: _staging_result - name: "Staging — Display staging summary" diff --git a/upgrade/roles/prep_local_repo/tasks/sync_local_repo.yml b/upgrade/roles/prep_local_repo/tasks/sync_local_repo.yml index 675a33c8e2..eecea9920e 100644 --- a/upgrade/roles/prep_local_repo/tasks/sync_local_repo.yml +++ b/upgrade/roles/prep_local_repo/tasks/sync_local_repo.yml @@ -30,7 +30,7 @@ # Initialise sub_final_repo_urls (normally set by validate_subscription role) - name: "Sync — Initialise subscription repo URLs" ansible.builtin.set_fact: - sub_final_repo_urls: "{{ sub_final_repo_urls | default({}) }}" + sub_final_repo_urls: "{{ sub_final_repo_urls | default({}, true) }}" # Get actual Pulp URL from pulp status command (same as pulp_validation role) - name: "Sync — Get Pulp status" diff --git a/upgrade/roles/upgrade_build_stream/tasks/gitlab_config_upgrade.yml b/upgrade/roles/upgrade_build_stream/tasks/gitlab_config_upgrade.yml index 5a4676b2e1..e7a5dbfb1d 100644 --- a/upgrade/roles/upgrade_build_stream/tasks/gitlab_config_upgrade.yml +++ b/upgrade/roles/upgrade_build_stream/tasks/gitlab_config_upgrade.yml @@ -140,7 +140,7 @@ ACTION REQUIRED: Wait for all pipelines to complete or cancel them before re-running. ============================================================ - when: _active_pipeline_list | default([]) | length > 0 + when: _active_pipeline_list | default([], true) | length > 0 # ══════════════════════════════════════════════════════════════════ # GL-TAG: Create pre-upgrade tag for rollback safety diff --git a/upgrade/roles/upgrade_cluster/vars/main.yml b/upgrade/roles/upgrade_cluster/vars/main.yml index 6e141b6538..4f287a7dfa 100644 --- a/upgrade/roles/upgrade_cluster/vars/main.yml +++ b/upgrade/roles/upgrade_cluster/vars/main.yml @@ -14,5 +14,5 @@ --- storage_config_path: "/opt/omnia/input/project_default/storage_config.yml" storage_content: "{{ lookup('file', storage_config_path, errors='ignore') | default('') }}" -storage_yaml: "{{ storage_content | from_yaml | default({}) }}" -nfs_params: "{{ storage_yaml.nfs_client_params | default([]) }}" +storage_yaml: "{{ storage_content | from_yaml | default({}, true) }}" +nfs_params: "{{ storage_yaml.nfs_client_params | default([], true) }}" diff --git a/upgrade/roles/upgrade_k8s/tasks/execute_single_hop.yml b/upgrade/roles/upgrade_k8s/tasks/execute_single_hop.yml index 3a0a6c2557..cc95b84133 100644 --- a/upgrade/roles/upgrade_k8s/tasks/execute_single_hop.yml +++ b/upgrade/roles/upgrade_k8s/tasks/execute_single_hop.yml @@ -46,7 +46,7 @@ - name: "Hop — Check if hop already completed" ansible.builtin.set_fact: _hop_completed: >- - {{ (upgrade_status.multi_hop.hops | default([])) + {{ (upgrade_status.multi_hop.hops | default([], true)) | selectattr('to', 'equalto', _current_hop.to_version) | selectattr('status', 'equalto', 'completed') | list | length > 0 }} @@ -147,7 +147,7 @@ multi_hop: current_hop: "{{ _hop_idx }}" hops: >- - {{ (upgrade_status.multi_hop.hops | default([])) + {{ (upgrade_status.multi_hop.hops | default([], true)) | rejectattr('to', 'equalto', _current_hop.to_version) | list + [{ @@ -784,7 +784,7 @@ multi_hop: current_hop: "{{ _hop_idx }}" hops: >- - {{ (upgrade_status.multi_hop.hops | default([])) + {{ (upgrade_status.multi_hop.hops | default([], true)) | rejectattr('to', 'equalto', _current_hop.to_version) | list + [{ diff --git a/upgrade/roles/upgrade_k8s/tasks/powerscale_prepare_upgrade.yml b/upgrade/roles/upgrade_k8s/tasks/powerscale_prepare_upgrade.yml index cac3e9165c..14e367b9cf 100644 --- a/upgrade/roles/upgrade_k8s/tasks/powerscale_prepare_upgrade.yml +++ b/upgrade/roles/upgrade_k8s/tasks/powerscale_prepare_upgrade.yml @@ -46,7 +46,7 @@ msg: "{{ msg_powerscale_controller_pods_missing }}" when: > powerscale_controller_pods.rc != 0 or - (powerscale_controllers['items'] | default([])) | length == 0 + (powerscale_controllers['items'] | default([], true)) | length == 0 - name: Check PowerScale node daemonset delegate_to: "{{ kube_vip }}" diff --git a/upgrade/roles/upgrade_k8s/tasks/preflight_checks_pulp.yml b/upgrade/roles/upgrade_k8s/tasks/preflight_checks_pulp.yml index b0d33c4c00..0f1f5184bd 100644 --- a/upgrade/roles/upgrade_k8s/tasks/preflight_checks_pulp.yml +++ b/upgrade/roles/upgrade_k8s/tasks/preflight_checks_pulp.yml @@ -66,13 +66,15 @@ - name: Verify required tags exist for each image ansible.builtin.fail: - msg: "Required image {{ item.item.name }}:{{ item.item.tag }} not found in Pulp registry. Available tags: {{ item.json.tags | default([]) | join(', ') }}" + msg: >- + Required image {{ item.item.name }}:{{ item.item.tag }} not found in Pulp registry. + Available tags: {{ item.json.tags | default([], true) | join(', ') }} loop: "{{ image_checks.results }}" loop_control: label: "{{ item.item.name }}:{{ item.item.tag }}" when: - item.status == 200 - - item.item.tag not in (item.json.tags | default([])) + - item.item.tag not in (item.json.tags | default([], true)) - name: Warn if image check failed ansible.builtin.debug: diff --git a/upgrade/roles/upgrade_k8s/tasks/step_drain.yml b/upgrade/roles/upgrade_k8s/tasks/step_drain.yml index a15514c1d8..daa994ae7d 100644 --- a/upgrade/roles/upgrade_k8s/tasks/step_drain.yml +++ b/upgrade/roles/upgrade_k8s/tasks/step_drain.yml @@ -59,7 +59,7 @@ | combine({ item.namespace ~ '/' ~ item.name: ( - item.matchLabels | default({}) + item.matchLabels | default({}, true) | dictsort | map('join', '=') | join(',') diff --git a/upgrade/roles/upgrade_openchami/tasks/post_upgrade_health_check.yml b/upgrade/roles/upgrade_openchami/tasks/post_upgrade_health_check.yml index 28a20e21d1..df8aae5793 100644 --- a/upgrade/roles/upgrade_openchami/tasks/post_upgrade_health_check.yml +++ b/upgrade/roles/upgrade_openchami/tasks/post_upgrade_health_check.yml @@ -220,7 +220,7 @@ set -o pipefail ochami smd --cacert {{ ca_cert_path | default('/root_ca/root_ca.crt') }} \ component get 2>&1 | head -20 || echo "ochami_unavailable" - environment: "{{ ci_reload_ochami_env | default({}) }}" + environment: "{{ ci_reload_ochami_env | default({}, true) }}" register: ochami_smd_check changed_when: false failed_when: false @@ -267,7 +267,7 @@ ansible.builtin.shell: | set -o pipefail /usr/bin/ochami bss boot params get -F yaml 2>&1 | grep -c 'cloud-init' || echo "0" - environment: "{{ ci_reload_ochami_env | default({}) }}" + environment: "{{ ci_reload_ochami_env | default({}, true) }}" register: bss_ci_ds_check changed_when: false failed_when: false @@ -303,7 +303,7 @@ ansible.builtin.shell: | set -o pipefail /usr/bin/ochami cloud-init service status 2>&1 - environment: "{{ ci_reload_ochami_env | default({}) }}" + environment: "{{ ci_reload_ochami_env | default({}, true) }}" register: ci_http_health changed_when: false failed_when: false @@ -328,7 +328,7 @@ ansible.builtin.shell: | set -o pipefail /usr/bin/ochami cloud-init group get 2>&1 || echo "ci_unavailable" - environment: "{{ ci_reload_ochami_env | default({}) }}" + environment: "{{ ci_reload_ochami_env | default({}, true) }}" register: ci_group_list changed_when: false failed_when: false @@ -354,7 +354,7 @@ ansible.builtin.shell: | set -o pipefail /usr/bin/ochami cloud-init node get 2>&1 || echo "ci_node_unavailable" - environment: "{{ ci_reload_ochami_env | default({}) }}" + environment: "{{ ci_reload_ochami_env | default({}, true) }}" register: ci_node_list changed_when: false failed_when: false diff --git a/upgrade/roles/upgrade_slurm/tasks/check_slurm_cluster.yml b/upgrade/roles/upgrade_slurm/tasks/check_slurm_cluster.yml index 19f6c9ffd6..f79fab053e 100644 --- a/upgrade/roles/upgrade_slurm/tasks/check_slurm_cluster.yml +++ b/upgrade/roles/upgrade_slurm/tasks/check_slurm_cluster.yml @@ -27,7 +27,7 @@ state: absent when: - slurm_ctld_host is defined - - slurm_ctld_host not in (hostvars['localhost']['slurm_previously_rebooted'] | default([])) + - slurm_ctld_host not in (hostvars['localhost']['slurm_previously_rebooted'] | default([], true)) - name: Check for running jobs on slurm cluster ansible.builtin.shell: diff --git a/upgrade/roles/upgrade_slurm/tasks/nfs_client.yml b/upgrade/roles/upgrade_slurm/tasks/nfs_client.yml index e3adbf03fd..cb688b9e77 100644 --- a/upgrade/roles/upgrade_slurm/tasks/nfs_client.yml +++ b/upgrade/roles/upgrade_slurm/tasks/nfs_client.yml @@ -15,7 +15,7 @@ ## Mount an entry on the OIM node. - name: Resolve mount_params profile if specified ansible.builtin.set_fact: - resolved_mount_profile: "{{ storage_config.mount_params[item.mount_params] | default({}) }}" + resolved_mount_profile: "{{ storage_config.mount_params[item.mount_params] | default({}, true) }}" when: item.mount_params is defined and item.mount_params in storage_config.mount_params - name: Initialize client mount path (new schema) diff --git a/upgrade/roles/upgrade_telemetry/tasks/include_required_input.yml b/upgrade/roles/upgrade_telemetry/tasks/include_required_input.yml index 9655f55130..ea699200b8 100644 --- a/upgrade/roles/upgrade_telemetry/tasks/include_required_input.yml +++ b/upgrade/roles/upgrade_telemetry/tasks/include_required_input.yml @@ -123,10 +123,10 @@ ansible.builtin.set_fact: victoria_in_targets: >- {{ - telemetry_config.telemetry_sources.idrac.collection_targets | default([]) | select('search', 'victoria_metrics') | list | length > 0 - or telemetry_config.telemetry_sources.powerscale.collection_targets | default([]) | select('search', 'victoria_metrics') | list | length > 0 - or telemetry_config.telemetry_sources.vast.collection_targets | default([]) | select('search', 'victoria_metrics') | list | length > 0 - or telemetry_config.telemetry_sources.ufm.collection_targets | default([]) | select('search', 'victoria_metrics') | list | length > 0 + telemetry_config.telemetry_sources.idrac.collection_targets | default([], true) | select('search', 'victoria_metrics') | list | length > 0 + or telemetry_config.telemetry_sources.powerscale.collection_targets | default([], true) | select('search', 'victoria_metrics') | list | length > 0 + or telemetry_config.telemetry_sources.vast.collection_targets | default([], true) | select('search', 'victoria_metrics') | list | length > 0 + or telemetry_config.telemetry_sources.ufm.collection_targets | default([], true) | select('search', 'victoria_metrics') | list | length > 0 }} when: - telemetry_config is defined diff --git a/upgrade/upgrade.yml b/upgrade/upgrade.yml index 35398f2ce7..7d15568575 100644 --- a/upgrade/upgrade.yml +++ b/upgrade/upgrade.yml @@ -257,8 +257,11 @@ when: - item in tag_dependencies - tag_dependencies[item] | difference(requested_tags) | length > 0 - - tag_dependencies[item] | reject('in', (manifest.component_status | default({})) | dict2items | selectattr('value', 'in', ['completed', 'skipped']) | - map(attribute='key') | list ) | list | length > 0 + - >- + tag_dependencies[item] | reject('in', + (manifest.component_status | default({}, true)) + | dict2items | selectattr('value', 'in', ['completed', 'skipped']) + | map(attribute='key') | list) | list | length > 0 - name: Report already-upgraded components (will be skipped) ansible.builtin.debug: From 765f0ce206a33d5dcec60d17a0cef141849abcdf Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Thu, 4 Jun 2026 14:45:59 +0530 Subject: [PATCH 10/78] Slurm ansible 2.20 fix Signed-off-by: Abhishek S A --- .../roles/mount_config/tasks/process_single_swap.yml | 4 ++-- .../slurm_config/tasks/validate_path_overrides.yml | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/provision/roles/mount_config/tasks/process_single_swap.yml b/provision/roles/mount_config/tasks/process_single_swap.yml index 3eff9cc8d3..b6cd47e475 100644 --- a/provision/roles/mount_config/tasks/process_single_swap.yml +++ b/provision/roles/mount_config/tasks/process_single_swap.yml @@ -22,8 +22,8 @@ that: - swap_item.filename is defined - swap_item.size is defined - - swap_item.filename | regex_search('^/[a-zA-Z0-9/_.-]+$') - - swap_item.size | regex_search('^(auto|[0-9]+[BKMGT]?)$') + - swap_item.filename | regex_search('^/[a-zA-Z0-9/_.-]+$') is not none + - swap_item.size | regex_search('^(auto|[0-9]+[BKMGT]?)$') is not none fail_msg: "Invalid swap configuration for {{ swap_item.filename | default('unnamed') }}" - name: Determine target functional groups for swap diff --git a/provision/roles/slurm_config/tasks/validate_path_overrides.yml b/provision/roles/slurm_config/tasks/validate_path_overrides.yml index c4a1783b02..6685b2a81e 100644 --- a/provision/roles/slurm_config/tasks/validate_path_overrides.yml +++ b/provision/roles/slurm_config/tasks/validate_path_overrides.yml @@ -29,9 +29,9 @@ and slurm_merged_dict.get(item) | list | length > 0) - >- not ((slurm_merged_dict.get(item) is string - and slurm_merged_dict.get(item) | regex_search('^/')) + and slurm_merged_dict.get(item) | regex_search('^/') is not none) or (slurm_merged_dict.get(item) is iterable - and (slurm_merged_dict.get(item) | first) | regex_search('^/'))) + and (slurm_merged_dict.get(item) | first) | regex_search('^/') is not none)) loop: - SlurmctldLogFile - SlurmdLogFile @@ -76,9 +76,9 @@ and slurmdbd_merged_dict.get(item) | list | length > 0) - >- not ((slurmdbd_merged_dict.get(item) is string - and slurmdbd_merged_dict.get(item) | regex_search('^/')) + and slurmdbd_merged_dict.get(item) | regex_search('^/') is not none) or (slurmdbd_merged_dict.get(item) is iterable - and (slurmdbd_merged_dict.get(item) | first) | regex_search('^/'))) + and (slurmdbd_merged_dict.get(item) | first) | regex_search('^/') is not none)) loop: - LogFile - PidFile @@ -100,8 +100,8 @@ and cgroup_merged_dict.get(item) | list | length > 0) - >- not ((cgroup_merged_dict.get(item) is string - and cgroup_merged_dict.get(item) | regex_search('^/')) + and cgroup_merged_dict.get(item) | regex_search('^/') is not none) or (cgroup_merged_dict.get(item) is iterable - and (cgroup_merged_dict.get(item) | first) | regex_search('^/'))) + and (cgroup_merged_dict.get(item) | first) | regex_search('^/') is not none)) loop: - CgroupMountpoint From 8b5bf1814bda67ec1c481301a95ce3dbc5917b63 Mon Sep 17 00:00:00 2001 From: Venu-p1 <236371043+Venu-p1@users.noreply.github.com> Date: Thu, 4 Jun 2026 15:13:13 +0530 Subject: [PATCH 11/78] Simplify ID format to use package name only (version stripped) instead of {name}_{version}_{type}. Adding unit tests for id generation function (#4672) * Simplify ID format to use package name only (version stripped) instead of {name}_{version}_{type}. Adding unit tests for id generation function Signed-off-by: venu <236371043+Venu-p1@users.noreply.github.com> * Adding copyright Signed-off-by: venu <236371043+Venu-p1@users.noreply.github.com> --------- Signed-off-by: venu <236371043+Venu-p1@users.noreply.github.com> --- .../catalog/tests/test_generate_catalog_id.py | 84 + build_stream/generate_catalog.py | 50 +- examples/catalog/catalog_rhel.json | 1760 ++++++++--------- .../catalog_rhel_aarch64_with_slurm_only.json | 1122 +++++------ .../catalog_rhel_with_nfs_provisioner.json | 1652 ++++++++-------- examples/catalog/catalog_rhel_x86_64.json | 1228 ++++++------ .../catalog_rhel_x86_64_with_slurm_only.json | 654 +++--- 7 files changed, 3320 insertions(+), 3230 deletions(-) create mode 100644 build_stream/core/catalog/tests/test_generate_catalog_id.py diff --git a/build_stream/core/catalog/tests/test_generate_catalog_id.py b/build_stream/core/catalog/tests/test_generate_catalog_id.py new file mode 100644 index 0000000000..00807b3e41 --- /dev/null +++ b/build_stream/core/catalog/tests/test_generate_catalog_id.py @@ -0,0 +1,84 @@ +# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import sys +from pathlib import Path + +# Add parent directory to path to import generate_catalog +sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) + +from generate_catalog import _generate_human_readable_id + +class TestGenerateHumanReadableId(unittest.TestCase): + def setUp(self): + self.used_ids = set() + + def test_basic_names(self): + # Should remain unchanged + self.assertEqual(_generate_human_readable_id("apptainer", "rpm", None, self.used_ids), "apptainer") + self.assertEqual(_generate_human_readable_id("device-mapper-multipath", "rpm", None, self.used_ids), "device-mapper-multipath") + + def test_version_in_name_exact(self): + # Should strip exact version suffix + self.assertEqual(_generate_human_readable_id("external-snapshotter-v8.4.0", "git", "v8.4.0", self.used_ids), "external-snapshotter") + + def test_version_in_name_v_prefixed(self): + # Should strip if 'v' prefix is in name but not in version + self.assertEqual(_generate_human_readable_id("app-v1.0.0", "rpm", "1.0.0", self.used_ids), "app") + + def test_version_in_name_dots_replaced(self): + # Should strip if dots are replaced by hyphens + self.assertEqual(_generate_human_readable_id("helm-charts-2-16-0", "git", "2.16.0", self.used_ids), "helm-charts") + + def test_pip_module_format(self): + # PyMySQL==1.1.2 -> PyMySQL + self.assertEqual(_generate_human_readable_id("PyMySQL==1.1.2", "pip_module", None, self.used_ids), "PyMySQL") + + def test_regex_fallback_no_version(self): + # Regex should strip the version even without explicit pkg_version + self.assertEqual(_generate_human_readable_id("calico-v3.31.4", "manifest", None, self.used_ids), "calico") + self.assertEqual(_generate_human_readable_id("cert-manager-v1-10-0", "tarball", None, self.used_ids), "cert-manager") + self.assertEqual(_generate_human_readable_id("helm-v3-20-1-amd64", "tarball", None, self.used_ids), "helm-amd64") + self.assertEqual(_generate_human_readable_id("helm-v3-20-1-chart", "tarball", None, self.used_ids), "helm-chart") + self.assertEqual(_generate_human_readable_id("helm-v3-20-1-anything-suffixed", "tarball", None, self.used_ids), "helm-anything-suffixed") + self.assertEqual(_generate_human_readable_id("metallb-native-v0-15-3", "manifest", None, self.used_ids), "metallb-native") + self.assertEqual(_generate_human_readable_id("strimzi-kafka-operator-helm-3-chart-0-48-0", "tarball", None, self.used_ids), "strimzi-kafka-operator-helm-3-chart") + self.assertEqual(_generate_human_readable_id("victoria-metrics-operator-0-59-3", "tarball", None, self.used_ids), "victoria-metrics-operator") + self.assertEqual(_generate_human_readable_id("python3-PyMySQL-1.1.2", "rpm", None, self.used_ids), "python3-PyMySQL") + self.assertEqual(_generate_human_readable_id("nfs-subdir-external-provisioner-4-0-18", "tarball", None, self.used_ids), "nfs-subdir-external-provisioner") + + def test_docker_image_without_tag_in_name(self): + # Docker images usually don't have the tag in the name field, just the image path + self.assertEqual(_generate_human_readable_id("docker.io/library/python", "image", "3.12-slim", self.used_ids), "docker.io/library/python") + + def test_collision_handling(self): + # First call gets the base name + id1 = _generate_human_readable_id("calico", "rpm", None, self.used_ids) + self.assertEqual(id1, "calico") + + # Second call with the same base name gets _1 + id2 = _generate_human_readable_id("calico-v1.0.0", "tarball", None, self.used_ids) + self.assertEqual(id2, "calico_1") + + # Third call gets _2 + id3 = _generate_human_readable_id("calico-v2.0.0", "manifest", None, self.used_ids) + self.assertEqual(id3, "calico_2") + + self.assertIn("calico", self.used_ids) + self.assertIn("calico_1", self.used_ids) + self.assertIn("calico_2", self.used_ids) + +if __name__ == "__main__": + unittest.main() diff --git a/build_stream/generate_catalog.py b/build_stream/generate_catalog.py index 712c5b5552..51e7178583 100644 --- a/build_stream/generate_catalog.py +++ b/build_stream/generate_catalog.py @@ -177,37 +177,43 @@ def _merge_package_entries(dst, src): def _generate_human_readable_id(pkg_name, pkg_type, pkg_version, used_ids): """Generate a human-readable package ID with collision handling. - Format: {sanitized_name}_{version}_{type} + Format: {name} (the version is stripped from the name) If collision occurs, append counter: {base_id}_{counter} """ # Extract version from package name if present (e.g., PyMySQL==1.1.2) name_for_id = pkg_name - if '==' in pkg_name and pkg_type == 'pip_module': - # For pip modules, name==version format + if '==' in pkg_name: parts = pkg_name.split('==') - if len(parts) == 2: - name_for_id = parts[0] - if not pkg_version: - pkg_version = parts[1] - elif '-' in pkg_name and pkg_type == 'rpm' and not pkg_version: - # Some RPMs have version in name (e.g., python3-PyMySQL-1.1.2) - # Try to extract version if it looks like a version number - parts = pkg_name.rsplit('-', 1) - if len(parts) == 2 and re.match(r'^\d', parts[1]): - name_for_id = parts[0] + name_for_id = parts[0] + if not pkg_version: pkg_version = parts[1] + + # Try exact match removal if version is known + if pkg_version and isinstance(pkg_version, str): + # match at the end with 'v' prefixed + if name_for_id.endswith('v' + pkg_version): + name_for_id = name_for_id[:-(len(pkg_version) + 1)] + # exact match at the end + elif name_for_id.endswith(pkg_version): + name_for_id = name_for_id[:-len(pkg_version)] + # match with dots replaced by hyphens + elif name_for_id.endswith(pkg_version.replace('.', '-')): + name_for_id = name_for_id[:-len(pkg_version)] + + # Use regex to strip version-like suffixes for remaining cases + # Matches: + # 1. -v followed by digits/dots/hyphens (e.g. -v1.2.3, -v2) + # 2. - followed by multi-part digits (e.g. -2-16-0, -1.1.2) + # 3. - followed by single digit at the end (e.g. -3$) + # Preserves trailing non-version suffixes (e.g. -amd64, -chart) + version_regex = r'[-_](?:v\d+(?:[-.]\d+)*|\d+(?:[-.]\d+)+|\d+$)(?=[-_]|$)' + name_for_id = re.sub(version_regex, '', name_for_id) - # Sanitize package name: replace special chars with hyphens - sanitized = re.sub(r'[/@:.]', '-', name_for_id) - sanitized = re.sub(r'-+', '-', sanitized).strip('-') - - # Truncate very long names to reasonable length - if len(sanitized) > 50: - sanitized = sanitized[:50] + # Clean up trailing separators + name_for_id = name_for_id.rstrip('-_') # Build base ID - version_part = pkg_version if pkg_version else 'na' - base_id = f"{sanitized}_{version_part}_{pkg_type}" + base_id = name_for_id # Handle collisions if base_id not in used_ids: diff --git a/examples/catalog/catalog_rhel.json b/examples/catalog/catalog_rhel.json index 4950344ae2..4132c82c20 100644 --- a/examples/catalog/catalog_rhel.json +++ b/examples/catalog/catalog_rhel.json @@ -7,257 +7,257 @@ { "Name": "login_compiler_node_aarch64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-slurmd_na_rpm", - "slurm_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "likwid", + "lsscsi", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm", + "slurm-slurmd" ] }, { "Name": "login_node_x86_64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "msr-safe_na_tarball", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-slurmd_na_rpm", - "slurm_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "likwid", + "lsscsi", + "msr-safe", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm", + "slurm-slurmd" ] }, { "Name": "os_x86_64", "FunctionalPackages": [ - "openssl-libs_na_rpm", - "ovis-ldms_na_rpm", - "python3-cython_na_rpm", - "python3-devel_na_rpm" + "openssl-libs", + "ovis-ldms", + "python3-cython", + "python3-devel" ] }, { "Name": "service_kube_control_plane_x86_64", "FunctionalPackages": [ - "PyMySQL_1.1.2_pip_module", - "apptainer_na_rpm", - "calico-v3-31-4_na_manifest", - "cert-manager-v1-10-0_na_tarball", - "cffi_1.17.1_pip_module", - "container-selinux_na_rpm", - "cri-o_1.35.1_rpm", - "cryptography_45.0.7_pip_module", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "docker-io-alpine-kubectl_1.35.1_image", - "docker-io-calico-cni_v3.31.4_image", - "docker-io-calico-kube-controllers_v3.31.4_image", - "docker-io-calico-node_v3.31.4_image", - "docker-io-curlimages-curl_8.17.0_image", - "docker-io-dellhpcomniaaisolution-idrac_telemetry_r_1.3_image", - "docker-io-dellhpcomniaaisolution-kafkapump_1.3_image", - "docker-io-dellhpcomniaaisolution-ubuntu-ldms_1.1_image", - "docker-io-dellhpcomniaaisolution-victoriapump_1.3_image", - "docker-io-library-busybox_1.36_image", - "docker-io-library-mysql_9.3.0_image", - "docker-io-library-python_3.12-slim_image", - "docker-io-nginxinc-nginx-unprivileged_1.29_image", - "docker-io-rmohr-activemq_5.15.9_image", - "docker-io-timberio-vector_0.54.0-debian_image", - "docker-io-victoriametrics-operator_config-reloader-v0.68.3_image", - "docker-io-victoriametrics-operator_v0.68.3_image", - "docker-io-victoriametrics-victoria-logs_v1.50.0_image", - "docker-io-victoriametrics-victoria-metrics_v1.128.0_image", - "docker-io-victoriametrics-vlagent_v1.50.0_image", - "docker-io-victoriametrics-vmagent_v1.128.0_image", - "docker-io-victoriametrics-vminsert_v1.128.0-cluster_image", - "docker-io-victoriametrics-vmselect_v1.128.0-cluster_image", - "docker-io-victoriametrics-vmstorage_v1.128.0-cluster_image", - "firewalld_na_rpm", - "fuse-overlayfs_na_rpm", - "ghcr-io-kube-vip-kube-vip_v0.8.9_image", - "ghcr-io-open-telemetry-opentelemetry-collector-rel_0.143.1_image", - "git_na_rpm", - "helm-charts_container-storage-modules-1.9.2_git", - "helm-v3-20-1-amd64_na_tarball", - "iscsi-initiator-utils_na_rpm", - "karavi-observability_v1.12.0_git", - "kubeadm_1.35.1_rpm", - "kubectl_1.35.1_rpm", - "kubelet_1.35.1_rpm", - "kubernetes_33.1.0_pip_module", - "lsscsi_na_rpm", - "metallb-native-v0-15-3_na_manifest", - "nfs-subdir-external-provisioner-4-0-18_na_tarball", - "omsdk_1.2.518_pip_module", - "podman_na_rpm", - "prettytable_3.14.0_pip_module", - "prometheus_client_0.20.0_pip_module", - "python3-firewall_na_rpm", - "python3_3.12.9_rpm", - "quay-io-dell-container-storage-modules-csm-metrics_v1.11.0_image", - "quay-io-jetstack-cert-manager-acmesolver_v1.10.0_image", - "quay-io-jetstack-cert-manager-cainjector_v1.10.0_image", - "quay-io-jetstack-cert-manager-controller_v1.10.0_image", - "quay-io-jetstack-cert-manager-webhook_v1.10.0_image", - "quay-io-metallb-speaker_v0.15.3_image", - "quay-io-strimzi-kafka-bridge_0.33.1_image", - "quay-io-strimzi-kafka_0.48.0-kafka-4.1.0_image", - "quay-io-strimzi-operator_0.48.0_image", - "registry-k8s-io-coredns-coredns_v1.13.1_image", - "registry-k8s-io-etcd_3.6.6-0_image", - "registry-k8s-io-kube-apiserver_v1.35.1_image", - "registry-k8s-io-kube-controller-manager_v1.35.1_image", - "registry-k8s-io-kube-proxy_v1.35.1_image", - "registry-k8s-io-kube-scheduler_v1.35.1_image", - "registry-k8s-io-pause_3.10.1_image", - "sg3_utils_na_rpm", - "strimzi-kafka-operator-helm-3-chart-0-48-0_na_tarball", - "victoria-metrics-operator-0-59-3_na_tarball", - "vim-enhanced_na_rpm" + "PyMySQL", + "apptainer", + "calico", + "cert-manager", + "cffi", + "container-selinux", + "cri-o", + "cryptography", + "device-mapper-multipath", + "doca-ofed", + "docker.io/alpine/kubectl", + "docker.io/calico/cni", + "docker.io/calico/kube-controllers", + "docker.io/calico/node", + "docker.io/curlimages/curl", + "docker.io/dellhpcomniaaisolution/idrac_telemetry_receiver", + "docker.io/dellhpcomniaaisolution/kafkapump", + "docker.io/dellhpcomniaaisolution/ubuntu-ldms", + "docker.io/dellhpcomniaaisolution/victoriapump", + "docker.io/library/busybox", + "docker.io/library/mysql", + "docker.io/library/python", + "docker.io/nginxinc/nginx-unprivileged", + "docker.io/rmohr/activemq", + "docker.io/timberio/vector", + "docker.io/victoriametrics/operator", + "docker.io/victoriametrics/operator_1", + "docker.io/victoriametrics/victoria-logs", + "docker.io/victoriametrics/victoria-metrics", + "docker.io/victoriametrics/vlagent", + "docker.io/victoriametrics/vmagent", + "docker.io/victoriametrics/vminsert", + "docker.io/victoriametrics/vmselect", + "docker.io/victoriametrics/vmstorage", + "firewalld", + "fuse-overlayfs", + "ghcr.io/kube-vip/kube-vip", + "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector", + "git", + "helm-amd64", + "helm-charts", + "iscsi-initiator-utils", + "karavi-observability", + "kubeadm", + "kubectl", + "kubelet", + "kubernetes", + "lsscsi", + "metallb-native", + "nfs-subdir-external-provisioner", + "omsdk", + "podman", + "prettytable", + "prometheus_client", + "python3", + "python3-firewall", + "quay.io/dell/container-storage-modules/csm-metrics-powerscale", + "quay.io/jetstack/cert-manager-acmesolver", + "quay.io/jetstack/cert-manager-cainjector", + "quay.io/jetstack/cert-manager-controller", + "quay.io/jetstack/cert-manager-webhook", + "quay.io/metallb/speaker", + "quay.io/strimzi/kafka", + "quay.io/strimzi/kafka-bridge", + "quay.io/strimzi/operator", + "registry.k8s.io/coredns/coredns", + "registry.k8s.io/etcd", + "registry.k8s.io/kube-apiserver", + "registry.k8s.io/kube-controller-manager", + "registry.k8s.io/kube-proxy", + "registry.k8s.io/kube-scheduler", + "registry.k8s.io/pause", + "sg3_utils", + "strimzi-kafka-operator-helm-3-chart", + "victoria-metrics-operator", + "vim-enhanced" ] }, { "Name": "service_kube_node_x86_64", "FunctionalPackages": [ - "apptainer_na_rpm", - "cert-manager-v1-10-0_na_tarball", - "cffi_1.17.1_pip_module", - "container-selinux_na_rpm", - "cri-o_1.35.1_rpm", - "cryptography_45.0.7_pip_module", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "docker-io-alpine-kubectl_1.35.1_image", - "docker-io-curlimages-curl_8.17.0_image", - "docker-io-dellhpcomniaaisolution-idrac_telemetry_r_1.3_image", - "docker-io-dellhpcomniaaisolution-kafkapump_1.3_image", - "docker-io-dellhpcomniaaisolution-ubuntu-ldms_1.1_image", - "docker-io-dellhpcomniaaisolution-victoriapump_1.3_image", - "docker-io-library-busybox_1.36_image", - "docker-io-library-mysql_9.3.0_image", - "docker-io-library-python_3.12-slim_image", - "docker-io-nginxinc-nginx-unprivileged_1.29_image", - "docker-io-rmohr-activemq_5.15.9_image", - "docker-io-timberio-vector_0.54.0-debian_image", - "docker-io-victoriametrics-operator_config-reloader-v0.68.3_image", - "docker-io-victoriametrics-operator_v0.68.3_image", - "docker-io-victoriametrics-victoria-logs_v1.50.0_image", - "docker-io-victoriametrics-victoria-metrics_v1.128.0_image", - "docker-io-victoriametrics-vlagent_v1.50.0_image", - "docker-io-victoriametrics-vmagent_v1.128.0_image", - "docker-io-victoriametrics-vminsert_v1.128.0-cluster_image", - "docker-io-victoriametrics-vmselect_v1.128.0-cluster_image", - "docker-io-victoriametrics-vmstorage_v1.128.0-cluster_image", - "firewalld_na_rpm", - "fuse-overlayfs_na_rpm", - "ghcr-io-open-telemetry-opentelemetry-collector-rel_0.143.1_image", - "git_na_rpm", - "helm-charts_container-storage-modules-1.9.2_git", - "iscsi-initiator-utils_na_rpm", - "karavi-observability_v1.12.0_git", - "kubeadm_1.35.1_rpm", - "kubelet_1.35.1_rpm", - "kubernetes_33.1.0_pip_module", - "lsscsi_na_rpm", - "omsdk_1.2.518_pip_module", - "podman_na_rpm", - "prometheus_client_0.20.0_pip_module", - "python3-firewall_na_rpm", - "quay-io-dell-container-storage-modules-csm-metrics_v1.11.0_image", - "quay-io-jetstack-cert-manager-acmesolver_v1.10.0_image", - "quay-io-jetstack-cert-manager-cainjector_v1.10.0_image", - "quay-io-jetstack-cert-manager-controller_v1.10.0_image", - "quay-io-jetstack-cert-manager-webhook_v1.10.0_image", - "quay-io-metallb-controller_v0.15.3_image", - "quay-io-metallb-speaker_v0.15.3_image", - "quay-io-strimzi-kafka-bridge_0.33.1_image", - "quay-io-strimzi-kafka_0.48.0-kafka-4.1.0_image", - "quay-io-strimzi-operator_0.48.0_image", - "registry-k8s-io-sig-storage-nfs-subdir-external-pr_v4.0.2_image", - "sg3_utils_na_rpm", - "strimzi-kafka-operator-helm-3-chart-0-48-0_na_tarball", - "victoria-metrics-operator-0-59-3_na_tarball", - "vim-enhanced_na_rpm" + "apptainer", + "cert-manager", + "cffi", + "container-selinux", + "cri-o", + "cryptography", + "device-mapper-multipath", + "doca-ofed", + "docker.io/alpine/kubectl", + "docker.io/curlimages/curl", + "docker.io/dellhpcomniaaisolution/idrac_telemetry_receiver", + "docker.io/dellhpcomniaaisolution/kafkapump", + "docker.io/dellhpcomniaaisolution/ubuntu-ldms", + "docker.io/dellhpcomniaaisolution/victoriapump", + "docker.io/library/busybox", + "docker.io/library/mysql", + "docker.io/library/python", + "docker.io/nginxinc/nginx-unprivileged", + "docker.io/rmohr/activemq", + "docker.io/timberio/vector", + "docker.io/victoriametrics/operator", + "docker.io/victoriametrics/operator_1", + "docker.io/victoriametrics/victoria-logs", + "docker.io/victoriametrics/victoria-metrics", + "docker.io/victoriametrics/vlagent", + "docker.io/victoriametrics/vmagent", + "docker.io/victoriametrics/vminsert", + "docker.io/victoriametrics/vmselect", + "docker.io/victoriametrics/vmstorage", + "firewalld", + "fuse-overlayfs", + "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector", + "git", + "helm-charts", + "iscsi-initiator-utils", + "karavi-observability", + "kubeadm", + "kubelet", + "kubernetes", + "lsscsi", + "omsdk", + "podman", + "prometheus_client", + "python3-firewall", + "quay.io/dell/container-storage-modules/csm-metrics-powerscale", + "quay.io/jetstack/cert-manager-acmesolver", + "quay.io/jetstack/cert-manager-cainjector", + "quay.io/jetstack/cert-manager-controller", + "quay.io/jetstack/cert-manager-webhook", + "quay.io/metallb/controller", + "quay.io/metallb/speaker", + "quay.io/strimzi/kafka", + "quay.io/strimzi/kafka-bridge", + "quay.io/strimzi/operator", + "registry.k8s.io/sig-storage/nfs-subdir-external-provisioner", + "sg3_utils", + "strimzi-kafka-operator-helm-3-chart", + "victoria-metrics-operator", + "vim-enhanced" ] }, { "Name": "slurm_control_node_x86_64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "mariadb-server_na_rpm", - "msr-safe_na_tarball", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-PyMySQL_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-slurmctld_na_rpm", - "slurm-slurmdbd_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "likwid", + "lsscsi", + "mariadb-server", + "msr-safe", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-PyMySQL", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm-slurmctld", + "slurm-slurmdbd" ] }, { "Name": "slurm_node_aarch64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "kernel-devel_na_rpm", - "kernel-headers_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-pam_slurm_na_rpm", - "slurm-slurmd_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "kernel-devel", + "kernel-headers", + "likwid", + "lsscsi", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm-pam_slurm", + "slurm-slurmd" ] } ], @@ -266,106 +266,106 @@ "Name": "RHEL", "Version": "10.0", "osPackages": [ - "NetworkManager_na_rpm", - "authselect_na_rpm", - "autoconf_na_rpm", - "automake_na_rpm", - "bash-completion_na_rpm", - "bash_na_rpm", - "binutils-devel_na_rpm", - "binutils_na_rpm", - "bzip2_na_rpm", - "chrony_na_rpm", - "cloud-init_na_rpm", - "clustershell_na_rpm", - "cmake_na_rpm", - "coreutils_na_rpm", - "cryptsetup_na_rpm", - "curl_na_rpm", - "device-mapper_na_rpm", - "dmidecode_na_rpm", - "docker-io-dellhpcomniaaisolution-image-build-aarch_1.1_image", - "docker-io-dellhpcomniaaisolution-image-build-el10_1.1_image", - "dracut-live_na_rpm", - "dracut-network_na_rpm", - "dracut_na_rpm", - "emacs_na_rpm", - "file_na_rpm", - "findutils_na_rpm", - "fping_na_rpm", - "gawk_na_rpm", - "gcc-c++_na_rpm", - "gcc-gfortran_na_rpm", - "gcc_na_rpm", - "gdb-gdbserver_na_rpm", - "gdb_na_rpm", - "gedit_na_rpm", - "glibc-langpack-en_na_rpm", - "grep_na_rpm", - "gzip_na_rpm", - "hwloc-libs_na_rpm", - "hwloc_na_rpm", - "iperf3_na_rpm", - "ipmitool_na_rpm", - "iproute_na_rpm", - "iputils_na_rpm", - "kbd_na_rpm", - "kernel-tools_na_rpm", - "kernel_na_rpm", - "kexec-tools_na_rpm", - "libcurl_na_rpm", - "libtool_na_rpm", - "lldb-devel_na_rpm", - "lldb_na_rpm", - "lshw_na_rpm", - "lsof_na_rpm", - "ltrace_na_rpm", - "lvm2_na_rpm", - "make_na_rpm", - "man-db_na_rpm", - "man-pages_na_rpm", - "munge-devel_na_rpm", - "nfs-utils_na_rpm", - "nfs4-acl-tools_na_rpm", - "nm-connection-editor_na_rpm", - "nss-pam-ldapd_na_rpm", - "oddjob-mkhomedir_na_rpm", - "openldap-clients_na_rpm", - "openmpi_5.0.8_tarball", - "openssh-clients_na_rpm", - "openssh-server_na_rpm", - "openssh_na_rpm", - "openssl-devel_na_rpm", - "openssl-libs_na_rpm_1", - "ovis-ldms_na_rpm_1", - "papi-devel_na_rpm", - "papi-libs_na_rpm", - "papi_na_rpm", - "pciutils_na_rpm", - "perf_na_rpm", - "pmix-devel_na_rpm", - "python3-cython_na_rpm_1", - "python3-devel_na_rpm_1", - "rsync_na_rpm", - "rsyslog_na_rpm", - "sed_na_rpm", - "squashfs-tools_na_rpm", - "sssd_na_rpm", - "strace_na_rpm", - "sudo_na_rpm", - "systemd-udev_na_rpm", - "systemd_na_rpm", - "tar_na_rpm", - "tcpdump_na_rpm", - "traceroute_na_rpm", - "ucx_1.19.0_tarball", - "util-linux_na_rpm", - "valgrind-devel_na_rpm", - "valgrind_na_rpm", - "vim-enhanced_na_rpm_1", - "wget_na_rpm", - "which_na_rpm", - "zsh_na_rpm" + "NetworkManager", + "authselect", + "autoconf", + "automake", + "bash", + "bash-completion", + "binutils", + "binutils-devel", + "bzip2", + "chrony", + "cloud-init", + "clustershell", + "cmake", + "coreutils", + "cryptsetup", + "curl", + "device-mapper", + "dmidecode", + "docker.io/dellhpcomniaaisolution/image-build-aarch64", + "docker.io/dellhpcomniaaisolution/image-build-el10", + "dracut", + "dracut-live", + "dracut-network", + "emacs", + "file", + "findutils", + "fping", + "gawk", + "gcc", + "gcc-c++", + "gcc-gfortran", + "gdb", + "gdb-gdbserver", + "gedit", + "glibc-langpack-en", + "grep", + "gzip", + "hwloc", + "hwloc-libs", + "iperf3", + "ipmitool", + "iproute", + "iputils", + "kbd", + "kernel", + "kernel-tools", + "kexec-tools", + "libcurl", + "libtool", + "lldb", + "lldb-devel", + "lshw", + "lsof", + "ltrace", + "lvm2", + "make", + "man-db", + "man-pages", + "munge-devel", + "nfs-utils", + "nfs4-acl-tools", + "nm-connection-editor", + "nss-pam-ldapd", + "oddjob-mkhomedir", + "openldap-clients", + "openmpi", + "openssh", + "openssh-clients", + "openssh-server", + "openssl-devel", + "openssl-libs_1", + "ovis-ldms_1", + "papi-devel", + "papi-libs", + "papi_1", + "pciutils", + "perf", + "pmix-devel", + "python3-cython_1", + "python3-devel_1", + "rsync", + "rsyslog", + "sed", + "squashfs-tools", + "sssd", + "strace", + "sudo", + "systemd", + "systemd-udev", + "tar", + "tcpdump", + "traceroute", + "ucx", + "util-linux", + "valgrind", + "valgrind-devel", + "vim-enhanced_1", + "wget", + "which", + "zsh" ] } ], @@ -373,29 +373,29 @@ { "Name": "csi", "InfrastructurePackages": [ - "csi-powerscale-v2-16-0_v2.16.0_git", - "docker-io-dellemc-csm-encryption_v0.6.0_image", - "external-snapshotter-v8-4-0_v8.4.0_git", - "helm-charts-2-16-0_csi-isilon-2.16.0_git", - "quay-io-dell-container-storage-modules-csi-isilon_v2.16.0_image", - "quay-io-dell-container-storage-modules-csi-metadat_v1.13.0_image", - "quay-io-dell-container-storage-modules-csm-authori_v2.4.0_image", - "quay-io-dell-container-storage-modules-dell-csi-re_v1.14.0_image", - "quay-io-dell-container-storage-modules-podmon_v1.15.0_image", - "registry-k8s-io-sig-storage-csi-attacher_v4.10.0_image", - "registry-k8s-io-sig-storage-csi-external-health-mo_v0.16.0_image", - "registry-k8s-io-sig-storage-csi-node-driver-regist_v2.15.0_image", - "registry-k8s-io-sig-storage-csi-provisioner_v6.1.0_image", - "registry-k8s-io-sig-storage-csi-resizer_v2.0.0_image", - "registry-k8s-io-sig-storage-csi-snapshotter_v8.4.0_image", - "registry-k8s-io-sig-storage-snapshot-controller_v8.4.0_image" + "csi-powerscale", + "docker.io/dellemc/csm-encryption", + "external-snapshotter", + "helm-charts_1", + "quay.io/dell/container-storage-modules/csi-isilon", + "quay.io/dell/container-storage-modules/csi-metadata-retriever", + "quay.io/dell/container-storage-modules/csm-authorization-sidecar", + "quay.io/dell/container-storage-modules/dell-csi-replicator", + "quay.io/dell/container-storage-modules/podmon", + "registry.k8s.io/sig-storage/csi-attacher", + "registry.k8s.io/sig-storage/csi-external-health-monitor-controller", + "registry.k8s.io/sig-storage/csi-node-driver-registrar", + "registry.k8s.io/sig-storage/csi-provisioner", + "registry.k8s.io/sig-storage/csi-resizer", + "registry.k8s.io/sig-storage/csi-snapshotter", + "registry.k8s.io/sig-storage/snapshot-controller" ] } ], "Drivers": [], "DriverPackages": {}, "FunctionalPackages": { - "PyMySQL_1.1.2_pip_module": { + "PyMySQL": { "Name": "PyMySQL==1.1.2", "SupportedOS": [ { @@ -408,7 +408,7 @@ ], "Type": "pip_module" }, - "apptainer_na_rpm": { + "apptainer": { "Name": "apptainer", "SupportedOS": [ { @@ -423,16 +423,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "epel" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "epel" } ] }, - "calico-v3-31-4_na_manifest": { + "calico": { "Name": "calico-v3.31.4", "SupportedOS": [ { @@ -451,7 +451,7 @@ } ] }, - "cert-manager-v1-10-0_na_tarball": { + "cert-manager": { "Name": "cert-manager-v1.10.0", "SupportedOS": [ { @@ -470,7 +470,7 @@ } ] }, - "cffi_1.17.1_pip_module": { + "cffi": { "Name": "cffi==1.17.1", "SupportedOS": [ { @@ -483,7 +483,7 @@ ], "Type": "pip_module" }, - "container-selinux_na_rpm": { + "container-selinux": { "Name": "container-selinux", "SupportedOS": [ { @@ -502,7 +502,7 @@ } ] }, - "cri-o_1.35.1_rpm": { + "cri-o": { "Name": "cri-o-1.35.1", "SupportedOS": [ { @@ -521,7 +521,7 @@ } ] }, - "cryptography_45.0.7_pip_module": { + "cryptography": { "Name": "cryptography==45.0.7", "SupportedOS": [ { @@ -534,7 +534,7 @@ ], "Type": "pip_module" }, - "device-mapper-multipath_na_rpm": { + "device-mapper-multipath": { "Name": "device-mapper-multipath", "SupportedOS": [ { @@ -549,16 +549,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "doca-ofed_na_rpm_repo": { + "doca-ofed": { "Name": "doca-ofed", "SupportedOS": [ { @@ -573,16 +573,16 @@ "Type": "rpm_repo", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "doca" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "doca" } ] }, - "docker-io-alpine-kubectl_1.35.1_image": { + "docker.io/alpine/kubectl": { "Name": "docker.io/alpine/kubectl", "SupportedOS": [ { @@ -597,7 +597,7 @@ "Tag": "1.35.1", "Version": "1.35.1" }, - "docker-io-calico-cni_v3.31.4_image": { + "docker.io/calico/cni": { "Name": "docker.io/calico/cni", "SupportedOS": [ { @@ -612,7 +612,7 @@ "Tag": "v3.31.4", "Version": "v3.31.4" }, - "docker-io-calico-kube-controllers_v3.31.4_image": { + "docker.io/calico/kube-controllers": { "Name": "docker.io/calico/kube-controllers", "SupportedOS": [ { @@ -627,7 +627,7 @@ "Tag": "v3.31.4", "Version": "v3.31.4" }, - "docker-io-calico-node_v3.31.4_image": { + "docker.io/calico/node": { "Name": "docker.io/calico/node", "SupportedOS": [ { @@ -642,7 +642,7 @@ "Tag": "v3.31.4", "Version": "v3.31.4" }, - "docker-io-curlimages-curl_8.17.0_image": { + "docker.io/curlimages/curl": { "Name": "docker.io/curlimages/curl", "SupportedOS": [ { @@ -657,7 +657,7 @@ "Tag": "8.17.0", "Version": "8.17.0" }, - "docker-io-dellhpcomniaaisolution-idrac_telemetry_r_1.3_image": { + "docker.io/dellhpcomniaaisolution/idrac_telemetry_receiver": { "Name": "docker.io/dellhpcomniaaisolution/idrac_telemetry_receiver", "SupportedOS": [ { @@ -672,7 +672,7 @@ "Tag": "1.3", "Version": "1.3" }, - "docker-io-dellhpcomniaaisolution-kafkapump_1.3_image": { + "docker.io/dellhpcomniaaisolution/kafkapump": { "Name": "docker.io/dellhpcomniaaisolution/kafkapump", "SupportedOS": [ { @@ -687,7 +687,7 @@ "Tag": "1.3", "Version": "1.3" }, - "docker-io-dellhpcomniaaisolution-ubuntu-ldms_1.1_image": { + "docker.io/dellhpcomniaaisolution/ubuntu-ldms": { "Name": "docker.io/dellhpcomniaaisolution/ubuntu-ldms", "SupportedOS": [ { @@ -702,7 +702,7 @@ "Tag": "1.1", "Version": "1.1" }, - "docker-io-dellhpcomniaaisolution-victoriapump_1.3_image": { + "docker.io/dellhpcomniaaisolution/victoriapump": { "Name": "docker.io/dellhpcomniaaisolution/victoriapump", "SupportedOS": [ { @@ -717,7 +717,7 @@ "Tag": "1.3", "Version": "1.3" }, - "docker-io-library-busybox_1.36_image": { + "docker.io/library/busybox": { "Name": "docker.io/library/busybox", "SupportedOS": [ { @@ -732,7 +732,7 @@ "Tag": "1.36", "Version": "1.36" }, - "docker-io-library-mysql_9.3.0_image": { + "docker.io/library/mysql": { "Name": "docker.io/library/mysql", "SupportedOS": [ { @@ -747,7 +747,7 @@ "Tag": "9.3.0", "Version": "9.3.0" }, - "docker-io-library-python_3.12-slim_image": { + "docker.io/library/python": { "Name": "docker.io/library/python", "SupportedOS": [ { @@ -762,7 +762,7 @@ "Tag": "3.12-slim", "Version": "3.12-slim" }, - "docker-io-nginxinc-nginx-unprivileged_1.29_image": { + "docker.io/nginxinc/nginx-unprivileged": { "Name": "docker.io/nginxinc/nginx-unprivileged", "SupportedOS": [ { @@ -777,7 +777,7 @@ "Tag": "1.29", "Version": "1.29" }, - "docker-io-rmohr-activemq_5.15.9_image": { + "docker.io/rmohr/activemq": { "Name": "docker.io/rmohr/activemq", "SupportedOS": [ { @@ -792,7 +792,7 @@ "Tag": "5.15.9", "Version": "5.15.9" }, - "docker-io-timberio-vector_0.54.0-debian_image": { + "docker.io/timberio/vector": { "Name": "docker.io/timberio/vector", "SupportedOS": [ { @@ -807,7 +807,7 @@ "Tag": "0.54.0-debian", "Version": "0.54.0-debian" }, - "docker-io-victoriametrics-operator_config-reloader-v0.68.3_image": { + "docker.io/victoriametrics/operator": { "Name": "docker.io/victoriametrics/operator", "SupportedOS": [ { @@ -819,10 +819,10 @@ "x86_64" ], "Type": "image", - "Tag": "config-reloader-v0.68.3", - "Version": "config-reloader-v0.68.3" + "Tag": "v0.68.3", + "Version": "v0.68.3" }, - "docker-io-victoriametrics-operator_v0.68.3_image": { + "docker.io/victoriametrics/operator_1": { "Name": "docker.io/victoriametrics/operator", "SupportedOS": [ { @@ -834,10 +834,10 @@ "x86_64" ], "Type": "image", - "Tag": "v0.68.3", - "Version": "v0.68.3" + "Tag": "config-reloader-v0.68.3", + "Version": "config-reloader-v0.68.3" }, - "docker-io-victoriametrics-victoria-logs_v1.50.0_image": { + "docker.io/victoriametrics/victoria-logs": { "Name": "docker.io/victoriametrics/victoria-logs", "SupportedOS": [ { @@ -852,7 +852,7 @@ "Tag": "v1.50.0", "Version": "v1.50.0" }, - "docker-io-victoriametrics-victoria-metrics_v1.128.0_image": { + "docker.io/victoriametrics/victoria-metrics": { "Name": "docker.io/victoriametrics/victoria-metrics", "SupportedOS": [ { @@ -867,7 +867,7 @@ "Tag": "v1.128.0", "Version": "v1.128.0" }, - "docker-io-victoriametrics-vlagent_v1.50.0_image": { + "docker.io/victoriametrics/vlagent": { "Name": "docker.io/victoriametrics/vlagent", "SupportedOS": [ { @@ -882,7 +882,7 @@ "Tag": "v1.50.0", "Version": "v1.50.0" }, - "docker-io-victoriametrics-vmagent_v1.128.0_image": { + "docker.io/victoriametrics/vmagent": { "Name": "docker.io/victoriametrics/vmagent", "SupportedOS": [ { @@ -897,7 +897,7 @@ "Tag": "v1.128.0", "Version": "v1.128.0" }, - "docker-io-victoriametrics-vminsert_v1.128.0-cluster_image": { + "docker.io/victoriametrics/vminsert": { "Name": "docker.io/victoriametrics/vminsert", "SupportedOS": [ { @@ -912,7 +912,7 @@ "Tag": "v1.128.0-cluster", "Version": "v1.128.0-cluster" }, - "docker-io-victoriametrics-vmselect_v1.128.0-cluster_image": { + "docker.io/victoriametrics/vmselect": { "Name": "docker.io/victoriametrics/vmselect", "SupportedOS": [ { @@ -927,7 +927,7 @@ "Tag": "v1.128.0-cluster", "Version": "v1.128.0-cluster" }, - "docker-io-victoriametrics-vmstorage_v1.128.0-cluster_image": { + "docker.io/victoriametrics/vmstorage": { "Name": "docker.io/victoriametrics/vmstorage", "SupportedOS": [ { @@ -942,7 +942,7 @@ "Tag": "v1.128.0-cluster", "Version": "v1.128.0-cluster" }, - "firewalld_na_rpm": { + "firewalld": { "Name": "firewalld", "SupportedOS": [ { @@ -957,16 +957,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "fuse-overlayfs_na_rpm": { + "fuse-overlayfs": { "Name": "fuse-overlayfs", "SupportedOS": [ { @@ -985,7 +985,7 @@ } ] }, - "geopm_na_tarball": { + "geopm": { "Name": "geopm", "SupportedOS": [ { @@ -1000,16 +1000,16 @@ "Type": "tarball", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://github.com/geopm/geopm/archive/refs/tags/v3.1.0.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://github.com/geopm/geopm/archive/refs/tags/v3.1.0.tar.gz" } ] }, - "ghcr-io-kube-vip-kube-vip_v0.8.9_image": { + "ghcr.io/kube-vip/kube-vip": { "Name": "ghcr.io/kube-vip/kube-vip", "SupportedOS": [ { @@ -1024,7 +1024,7 @@ "Tag": "v0.8.9", "Version": "v0.8.9" }, - "ghcr-io-open-telemetry-opentelemetry-collector-rel_0.143.1_image": { + "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector": { "Name": "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector", "SupportedOS": [ { @@ -1039,7 +1039,7 @@ "Tag": "0.143.1", "Version": "0.143.1" }, - "git_na_rpm": { + "git": { "Name": "git", "SupportedOS": [ { @@ -1058,8 +1058,8 @@ } ] }, - "helm-charts_container-storage-modules-1.9.2_git": { - "Name": "helm-charts", + "helm-amd64": { + "Name": "helm-v3.20.1-amd64", "SupportedOS": [ { "Name": "RHEL", @@ -1069,17 +1069,16 @@ "Architecture": [ "x86_64" ], - "Type": "git", - "Version": "container-storage-modules-1.9.2", + "Type": "tarball", "Sources": [ { "Architecture": "x86_64", - "Uri": "https://github.com/dell/helm-charts.git" + "Uri": "https://get.helm.sh/helm-v3.20.1-linux-amd64.tar.gz" } ] }, - "helm-v3-20-1-amd64_na_tarball": { - "Name": "helm-v3.20.1-amd64", + "helm-charts": { + "Name": "helm-charts", "SupportedOS": [ { "Name": "RHEL", @@ -1089,15 +1088,16 @@ "Architecture": [ "x86_64" ], - "Type": "tarball", + "Type": "git", + "Version": "container-storage-modules-1.9.2", "Sources": [ { "Architecture": "x86_64", - "Uri": "https://get.helm.sh/helm-v3.20.1-linux-amd64.tar.gz" + "Uri": "https://github.com/dell/helm-charts.git" } ] }, - "imb_na_tarball": { + "imb": { "Name": "imb", "SupportedOS": [ { @@ -1112,16 +1112,16 @@ "Type": "tarball", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://github.com/intel/mpi-benchmarks/archive/refs/tags/IMB-v2021.8.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://github.com/intel/mpi-benchmarks/archive/refs/tags/IMB-v2021.8.tar.gz" } ] }, - "iscsi-initiator-utils_na_rpm": { + "iscsi-initiator-utils": { "Name": "iscsi-initiator-utils", "SupportedOS": [ { @@ -1136,16 +1136,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "karavi-observability_v1.12.0_git": { + "karavi-observability": { "Name": "karavi-observability", "SupportedOS": [ { @@ -1165,7 +1165,7 @@ } ] }, - "kernel-devel_na_rpm": { + "kernel-devel": { "Name": "kernel-devel", "SupportedOS": [ { @@ -1180,16 +1180,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "kernel-headers_na_rpm": { + "kernel-headers": { "Name": "kernel-headers", "SupportedOS": [ { @@ -1204,16 +1204,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "kubeadm_1.35.1_rpm": { + "kubeadm": { "Name": "kubeadm-1.35.1", "SupportedOS": [ { @@ -1232,7 +1232,7 @@ } ] }, - "kubectl_1.35.1_rpm": { + "kubectl": { "Name": "kubectl-1.35.1", "SupportedOS": [ { @@ -1251,7 +1251,7 @@ } ] }, - "kubelet_1.35.1_rpm": { + "kubelet": { "Name": "kubelet-1.35.1", "SupportedOS": [ { @@ -1270,7 +1270,7 @@ } ] }, - "kubernetes_33.1.0_pip_module": { + "kubernetes": { "Name": "kubernetes==33.1.0", "SupportedOS": [ { @@ -1283,7 +1283,7 @@ ], "Type": "pip_module" }, - "likwid_na_tarball": { + "likwid": { "Name": "likwid", "SupportedOS": [ { @@ -1298,16 +1298,16 @@ "Type": "tarball", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://github.com/RRZE-HPC/likwid/archive/refs/tags/v5.4.1.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://github.com/RRZE-HPC/likwid/archive/refs/tags/v5.4.1.tar.gz" } ] }, - "lsscsi_na_rpm": { + "lsscsi": { "Name": "lsscsi", "SupportedOS": [ { @@ -1322,16 +1322,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "mariadb-server_na_rpm": { + "mariadb-server": { "Name": "mariadb-server", "SupportedOS": [ { @@ -1346,16 +1346,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "metallb-native-v0-15-3_na_manifest": { + "metallb-native": { "Name": "metallb-native-v0.15.3", "SupportedOS": [ { @@ -1374,7 +1374,7 @@ } ] }, - "msr-safe_na_tarball": { + "msr-safe": { "Name": "msr-safe", "SupportedOS": [ { @@ -1393,7 +1393,7 @@ } ] }, - "munge_na_rpm": { + "munge": { "Name": "munge", "SupportedOS": [ { @@ -1408,16 +1408,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "nfs-subdir-external-provisioner-4-0-18_na_tarball": { + "nfs-subdir-external-provisioner": { "Name": "nfs-subdir-external-provisioner-4.0.18", "SupportedOS": [ { @@ -1436,7 +1436,7 @@ } ] }, - "nvcr-io-nvidia-hpc-benchmarks_25.09_image": { + "nvcr.io/nvidia/hpc-benchmarks": { "Name": "nvcr.io/nvidia/hpc-benchmarks", "SupportedOS": [ { @@ -1452,7 +1452,7 @@ "Tag": "25.09", "Version": "25.09" }, - "omsdk_1.2.518_pip_module": { + "omsdk": { "Name": "omsdk==1.2.518", "SupportedOS": [ { @@ -1465,7 +1465,7 @@ ], "Type": "pip_module" }, - "openssl-libs_na_rpm": { + "openssl-libs": { "Name": "openssl-libs", "SupportedOS": [ { @@ -1480,16 +1480,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "osu-micro-benchmarks_na_tarball": { + "osu-micro-benchmarks": { "Name": "osu-micro-benchmarks", "SupportedOS": [ { @@ -1504,16 +1504,16 @@ "Type": "tarball", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-7.5.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-7.5.tar.gz" } ] }, - "ovis-ldms_na_rpm": { + "ovis-ldms": { "Name": "ovis-ldms", "SupportedOS": [ { @@ -1528,16 +1528,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "ldms" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "ldms" } ] }, - "papi_na_tarball": { + "papi": { "Name": "papi", "SupportedOS": [ { @@ -1552,16 +1552,16 @@ "Type": "tarball", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://github.com/icl-utk-edu/papi/releases/download/papi-7-2-0-t/papi-7.2.0.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://github.com/icl-utk-edu/papi/releases/download/papi-7-2-0-t/papi-7.2.0.tar.gz" } ] }, - "pmix_na_rpm": { + "pmix": { "Name": "pmix", "SupportedOS": [ { @@ -1576,16 +1576,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "podman_na_rpm": { + "podman": { "Name": "podman", "SupportedOS": [ { @@ -1604,7 +1604,7 @@ } ] }, - "prettytable_3.14.0_pip_module": { + "prettytable": { "Name": "prettytable==3.14.0", "SupportedOS": [ { @@ -1617,7 +1617,7 @@ ], "Type": "pip_module" }, - "prometheus_client_0.20.0_pip_module": { + "prometheus_client": { "Name": "prometheus_client==0.20.0", "SupportedOS": [ { @@ -1630,8 +1630,8 @@ ], "Type": "pip_module" }, - "python3-PyMySQL_na_rpm": { - "Name": "python3-PyMySQL", + "python3": { + "Name": "python3-3.12.9", "SupportedOS": [ { "Name": "RHEL", @@ -1639,23 +1639,18 @@ } ], "Architecture": [ - "aarch64", "x86_64" ], "Type": "rpm", "Sources": [ - { - "Architecture": "aarch64", - "RepoName": "appstream" - }, { "Architecture": "x86_64", - "RepoName": "appstream" + "RepoName": "baseos" } ] }, - "python3-cython_na_rpm": { - "Name": "python3-cython", + "python3-PyMySQL": { + "Name": "python3-PyMySQL", "SupportedOS": [ { "Name": "RHEL", @@ -1669,17 +1664,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", - "RepoName": "codeready-builder" + "Architecture": "x86_64", + "RepoName": "appstream" }, { - "Architecture": "x86_64", - "RepoName": "codeready-builder" + "Architecture": "aarch64", + "RepoName": "appstream" } ] }, - "python3-devel_na_rpm": { - "Name": "python3-devel", + "python3-cython": { + "Name": "python3-cython", "SupportedOS": [ { "Name": "RHEL", @@ -1693,17 +1688,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", - "RepoName": "appstream" + "Architecture": "x86_64", + "RepoName": "codeready-builder" }, { - "Architecture": "x86_64", - "RepoName": "appstream" + "Architecture": "aarch64", + "RepoName": "codeready-builder" } ] }, - "python3-firewall_na_rpm": { - "Name": "python3-firewall", + "python3-devel": { + "Name": "python3-devel", "SupportedOS": [ { "Name": "RHEL", @@ -1717,17 +1712,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", - "RepoName": "baseos" + "Architecture": "x86_64", + "RepoName": "appstream" }, { - "Architecture": "x86_64", - "RepoName": "baseos" + "Architecture": "aarch64", + "RepoName": "appstream" } ] }, - "python3_3.12.9_rpm": { - "Name": "python3-3.12.9", + "python3-firewall": { + "Name": "python3-firewall", "SupportedOS": [ { "Name": "RHEL", @@ -1735,6 +1730,7 @@ } ], "Architecture": [ + "aarch64", "x86_64" ], "Type": "rpm", @@ -1742,10 +1738,14 @@ { "Architecture": "x86_64", "RepoName": "baseos" + }, + { + "Architecture": "aarch64", + "RepoName": "baseos" } ] }, - "quay-io-dell-container-storage-modules-csm-metrics_v1.11.0_image": { + "quay.io/dell/container-storage-modules/csm-metrics-powerscale": { "Name": "quay.io/dell/container-storage-modules/csm-metrics-powerscale", "SupportedOS": [ { @@ -1760,7 +1760,7 @@ "Tag": "v1.11.0", "Version": "v1.11.0" }, - "quay-io-jetstack-cert-manager-acmesolver_v1.10.0_image": { + "quay.io/jetstack/cert-manager-acmesolver": { "Name": "quay.io/jetstack/cert-manager-acmesolver", "SupportedOS": [ { @@ -1775,7 +1775,7 @@ "Tag": "v1.10.0", "Version": "v1.10.0" }, - "quay-io-jetstack-cert-manager-cainjector_v1.10.0_image": { + "quay.io/jetstack/cert-manager-cainjector": { "Name": "quay.io/jetstack/cert-manager-cainjector", "SupportedOS": [ { @@ -1790,7 +1790,7 @@ "Tag": "v1.10.0", "Version": "v1.10.0" }, - "quay-io-jetstack-cert-manager-controller_v1.10.0_image": { + "quay.io/jetstack/cert-manager-controller": { "Name": "quay.io/jetstack/cert-manager-controller", "SupportedOS": [ { @@ -1805,7 +1805,7 @@ "Tag": "v1.10.0", "Version": "v1.10.0" }, - "quay-io-jetstack-cert-manager-webhook_v1.10.0_image": { + "quay.io/jetstack/cert-manager-webhook": { "Name": "quay.io/jetstack/cert-manager-webhook", "SupportedOS": [ { @@ -1820,7 +1820,7 @@ "Tag": "v1.10.0", "Version": "v1.10.0" }, - "quay-io-metallb-controller_v0.15.3_image": { + "quay.io/metallb/controller": { "Name": "quay.io/metallb/controller", "SupportedOS": [ { @@ -1835,7 +1835,7 @@ "Tag": "v0.15.3", "Version": "v0.15.3" }, - "quay-io-metallb-speaker_v0.15.3_image": { + "quay.io/metallb/speaker": { "Name": "quay.io/metallb/speaker", "SupportedOS": [ { @@ -1850,8 +1850,8 @@ "Tag": "v0.15.3", "Version": "v0.15.3" }, - "quay-io-strimzi-kafka-bridge_0.33.1_image": { - "Name": "quay.io/strimzi/kafka-bridge", + "quay.io/strimzi/kafka": { + "Name": "quay.io/strimzi/kafka", "SupportedOS": [ { "Name": "RHEL", @@ -1862,11 +1862,11 @@ "x86_64" ], "Type": "image", - "Tag": "0.33.1", - "Version": "0.33.1" + "Tag": "0.48.0-kafka-4.1.0", + "Version": "0.48.0-kafka-4.1.0" }, - "quay-io-strimzi-kafka_0.48.0-kafka-4.1.0_image": { - "Name": "quay.io/strimzi/kafka", + "quay.io/strimzi/kafka-bridge": { + "Name": "quay.io/strimzi/kafka-bridge", "SupportedOS": [ { "Name": "RHEL", @@ -1877,10 +1877,10 @@ "x86_64" ], "Type": "image", - "Tag": "0.48.0-kafka-4.1.0", - "Version": "0.48.0-kafka-4.1.0" + "Tag": "0.33.1", + "Version": "0.33.1" }, - "quay-io-strimzi-operator_0.48.0_image": { + "quay.io/strimzi/operator": { "Name": "quay.io/strimzi/operator", "SupportedOS": [ { @@ -1895,7 +1895,7 @@ "Tag": "0.48.0", "Version": "0.48.0" }, - "registry-k8s-io-coredns-coredns_v1.13.1_image": { + "registry.k8s.io/coredns/coredns": { "Name": "registry.k8s.io/coredns/coredns", "SupportedOS": [ { @@ -1910,7 +1910,7 @@ "Tag": "v1.13.1", "Version": "v1.13.1" }, - "registry-k8s-io-etcd_3.6.6-0_image": { + "registry.k8s.io/etcd": { "Name": "registry.k8s.io/etcd", "SupportedOS": [ { @@ -1925,7 +1925,7 @@ "Tag": "3.6.6-0", "Version": "3.6.6-0" }, - "registry-k8s-io-kube-apiserver_v1.35.1_image": { + "registry.k8s.io/kube-apiserver": { "Name": "registry.k8s.io/kube-apiserver", "SupportedOS": [ { @@ -1940,7 +1940,7 @@ "Tag": "v1.35.1", "Version": "v1.35.1" }, - "registry-k8s-io-kube-controller-manager_v1.35.1_image": { + "registry.k8s.io/kube-controller-manager": { "Name": "registry.k8s.io/kube-controller-manager", "SupportedOS": [ { @@ -1955,7 +1955,7 @@ "Tag": "v1.35.1", "Version": "v1.35.1" }, - "registry-k8s-io-kube-proxy_v1.35.1_image": { + "registry.k8s.io/kube-proxy": { "Name": "registry.k8s.io/kube-proxy", "SupportedOS": [ { @@ -1970,7 +1970,7 @@ "Tag": "v1.35.1", "Version": "v1.35.1" }, - "registry-k8s-io-kube-scheduler_v1.35.1_image": { + "registry.k8s.io/kube-scheduler": { "Name": "registry.k8s.io/kube-scheduler", "SupportedOS": [ { @@ -1985,7 +1985,7 @@ "Tag": "v1.35.1", "Version": "v1.35.1" }, - "registry-k8s-io-pause_3.10.1_image": { + "registry.k8s.io/pause": { "Name": "registry.k8s.io/pause", "SupportedOS": [ { @@ -2000,7 +2000,7 @@ "Tag": "3.10.1", "Version": "3.10.1" }, - "registry-k8s-io-sig-storage-nfs-subdir-external-pr_v4.0.2_image": { + "registry.k8s.io/sig-storage/nfs-subdir-external-provisioner": { "Name": "registry.k8s.io/sig-storage/nfs-subdir-external-provisioner", "SupportedOS": [ { @@ -2015,7 +2015,7 @@ "Tag": "v4.0.2", "Version": "v4.0.2" }, - "sg3_utils_na_rpm": { + "sg3_utils": { "Name": "sg3_utils", "SupportedOS": [ { @@ -2030,16 +2030,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "sionlib_na_tarball": { + "sionlib": { "Name": "sionlib", "SupportedOS": [ { @@ -2054,17 +2054,17 @@ "Type": "tarball", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://apps.fz-juelich.de/jsc/sionlib/download.php?version=1.7.7" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://apps.fz-juelich.de/jsc/sionlib/download.php?version=1.7.7" } ] }, - "slurm-pam_slurm_na_rpm": { - "Name": "slurm-pam_slurm", + "slurm": { + "Name": "slurm", "SupportedOS": [ { "Name": "RHEL", @@ -2078,17 +2078,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "slurm_custom" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "slurm_custom" } ] }, - "slurm-slurmctld_na_rpm": { - "Name": "slurm-slurmctld", + "slurm-pam_slurm": { + "Name": "slurm-pam_slurm", "SupportedOS": [ { "Name": "RHEL", @@ -2102,17 +2102,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "slurm_custom" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "slurm_custom" } ] }, - "slurm-slurmd_na_rpm": { - "Name": "slurm-slurmd", + "slurm-slurmctld": { + "Name": "slurm-slurmctld", "SupportedOS": [ { "Name": "RHEL", @@ -2126,17 +2126,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "slurm_custom" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "slurm_custom" } ] }, - "slurm-slurmdbd_na_rpm": { - "Name": "slurm-slurmdbd", + "slurm-slurmd": { + "Name": "slurm-slurmd", "SupportedOS": [ { "Name": "RHEL", @@ -2150,17 +2150,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "slurm_custom" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "slurm_custom" } ] }, - "slurm_na_rpm": { - "Name": "slurm", + "slurm-slurmdbd": { + "Name": "slurm-slurmdbd", "SupportedOS": [ { "Name": "RHEL", @@ -2174,16 +2174,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "slurm_custom" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "slurm_custom" } ] }, - "strimzi-kafka-operator-helm-3-chart-0-48-0_na_tarball": { + "strimzi-kafka-operator-helm-3-chart": { "Name": "strimzi-kafka-operator-helm-3-chart-0.48.0", "SupportedOS": [ { @@ -2202,7 +2202,7 @@ } ] }, - "victoria-metrics-operator-0-59-3_na_tarball": { + "victoria-metrics-operator": { "Name": "victoria-metrics-operator-0.59.3", "SupportedOS": [ { @@ -2221,7 +2221,7 @@ } ] }, - "vim-enhanced_na_rpm": { + "vim-enhanced": { "Name": "vim-enhanced", "SupportedOS": [ { @@ -2236,18 +2236,18 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] } }, "OSPackages": { - "NetworkManager_na_rpm": { + "NetworkManager": { "Name": "NetworkManager", "SupportedOS": [ { @@ -2262,16 +2262,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "authselect_na_rpm": { + "authselect": { "Name": "authselect", "SupportedOS": [ { @@ -2286,16 +2286,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "autoconf_na_rpm": { + "autoconf": { "Name": "autoconf", "SupportedOS": [ { @@ -2310,16 +2310,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "automake_na_rpm": { + "automake": { "Name": "automake", "SupportedOS": [ { @@ -2334,17 +2334,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "bash-completion_na_rpm": { - "Name": "bash-completion", + "bash": { + "Name": "bash", "SupportedOS": [ { "Name": "RHEL", @@ -2358,17 +2358,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "bash_na_rpm": { - "Name": "bash", + "bash-completion": { + "Name": "bash-completion", "SupportedOS": [ { "Name": "RHEL", @@ -2382,17 +2382,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "binutils-devel_na_rpm": { - "Name": "binutils-devel", + "binutils": { + "Name": "binutils", "SupportedOS": [ { "Name": "RHEL", @@ -2406,17 +2406,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", - "RepoName": "appstream" + "Architecture": "x86_64", + "RepoName": "baseos" }, { - "Architecture": "x86_64", - "RepoName": "appstream" + "Architecture": "aarch64", + "RepoName": "baseos" } ] }, - "binutils_na_rpm": { - "Name": "binutils", + "binutils-devel": { + "Name": "binutils-devel", "SupportedOS": [ { "Name": "RHEL", @@ -2430,16 +2430,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", - "RepoName": "baseos" + "Architecture": "x86_64", + "RepoName": "appstream" }, { - "Architecture": "x86_64", - "RepoName": "baseos" + "Architecture": "aarch64", + "RepoName": "appstream" } ] }, - "bzip2_na_rpm": { + "bzip2": { "Name": "bzip2", "SupportedOS": [ { @@ -2454,16 +2454,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "chrony_na_rpm": { + "chrony": { "Name": "chrony", "SupportedOS": [ { @@ -2478,16 +2478,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "cloud-init_na_rpm": { + "cloud-init": { "Name": "cloud-init", "SupportedOS": [ { @@ -2502,16 +2502,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "clustershell_na_rpm": { + "clustershell": { "Name": "clustershell", "SupportedOS": [ { @@ -2526,16 +2526,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "epel" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "epel" } ] }, - "cmake_na_rpm": { + "cmake": { "Name": "cmake", "SupportedOS": [ { @@ -2550,16 +2550,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "coreutils_na_rpm": { + "coreutils": { "Name": "coreutils", "SupportedOS": [ { @@ -2574,16 +2574,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "cryptsetup_na_rpm": { + "cryptsetup": { "Name": "cryptsetup", "SupportedOS": [ { @@ -2598,16 +2598,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "curl_na_rpm": { + "curl": { "Name": "curl", "SupportedOS": [ { @@ -2622,16 +2622,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "device-mapper_na_rpm": { + "device-mapper": { "Name": "device-mapper", "SupportedOS": [ { @@ -2646,16 +2646,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "dmidecode_na_rpm": { + "dmidecode": { "Name": "dmidecode", "SupportedOS": [ { @@ -2670,16 +2670,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "docker-io-dellhpcomniaaisolution-image-build-aarch_1.1_image": { + "docker.io/dellhpcomniaaisolution/image-build-aarch64": { "Name": "docker.io/dellhpcomniaaisolution/image-build-aarch64", "SupportedOS": [ { @@ -2694,7 +2694,7 @@ "Tag": "1.1", "Version": "1.1" }, - "docker-io-dellhpcomniaaisolution-image-build-el10_1.1_image": { + "docker.io/dellhpcomniaaisolution/image-build-el10": { "Name": "docker.io/dellhpcomniaaisolution/image-build-el10", "SupportedOS": [ { @@ -2709,8 +2709,8 @@ "Tag": "1.1", "Version": "1.1" }, - "dracut-live_na_rpm": { - "Name": "dracut-live", + "dracut": { + "Name": "dracut", "SupportedOS": [ { "Name": "RHEL", @@ -2724,17 +2724,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", - "RepoName": "appstream" + "Architecture": "x86_64", + "RepoName": "baseos" }, { - "Architecture": "x86_64", - "RepoName": "appstream" + "Architecture": "aarch64", + "RepoName": "baseos" } ] }, - "dracut-network_na_rpm": { - "Name": "dracut-network", + "dracut-live": { + "Name": "dracut-live", "SupportedOS": [ { "Name": "RHEL", @@ -2748,17 +2748,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", - "RepoName": "baseos" + "Architecture": "x86_64", + "RepoName": "appstream" }, { - "Architecture": "x86_64", - "RepoName": "baseos" + "Architecture": "aarch64", + "RepoName": "appstream" } ] }, - "dracut_na_rpm": { - "Name": "dracut", + "dracut-network": { + "Name": "dracut-network", "SupportedOS": [ { "Name": "RHEL", @@ -2772,16 +2772,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "emacs_na_rpm": { + "emacs": { "Name": "emacs", "SupportedOS": [ { @@ -2796,16 +2796,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "file_na_rpm": { + "file": { "Name": "file", "SupportedOS": [ { @@ -2820,16 +2820,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "findutils_na_rpm": { + "findutils": { "Name": "findutils", "SupportedOS": [ { @@ -2844,16 +2844,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "fping_na_rpm": { + "fping": { "Name": "fping", "SupportedOS": [ { @@ -2868,16 +2868,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "epel" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "epel" } ] }, - "gawk_na_rpm": { + "gawk": { "Name": "gawk", "SupportedOS": [ { @@ -2892,17 +2892,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "gcc-c++_na_rpm": { - "Name": "gcc-c++", + "gcc": { + "Name": "gcc", "SupportedOS": [ { "Name": "RHEL", @@ -2916,17 +2916,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "gcc-gfortran_na_rpm": { - "Name": "gcc-gfortran", + "gcc-c++": { + "Name": "gcc-c++", "SupportedOS": [ { "Name": "RHEL", @@ -2940,17 +2940,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "gcc_na_rpm": { - "Name": "gcc", + "gcc-gfortran": { + "Name": "gcc-gfortran", "SupportedOS": [ { "Name": "RHEL", @@ -2964,17 +2964,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "gdb-gdbserver_na_rpm": { - "Name": "gdb-gdbserver", + "gdb": { + "Name": "gdb", "SupportedOS": [ { "Name": "RHEL", @@ -2988,17 +2988,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "gdb_na_rpm": { - "Name": "gdb", + "gdb-gdbserver": { + "Name": "gdb-gdbserver", "SupportedOS": [ { "Name": "RHEL", @@ -3012,16 +3012,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "gedit_na_rpm": { + "gedit": { "Name": "gedit", "SupportedOS": [ { @@ -3036,16 +3036,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "epel" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "epel" } ] }, - "glibc-langpack-en_na_rpm": { + "glibc-langpack-en": { "Name": "glibc-langpack-en", "SupportedOS": [ { @@ -3060,16 +3060,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "grep_na_rpm": { + "grep": { "Name": "grep", "SupportedOS": [ { @@ -3084,16 +3084,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "gzip_na_rpm": { + "gzip": { "Name": "gzip", "SupportedOS": [ { @@ -3108,17 +3108,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "hwloc-libs_na_rpm": { - "Name": "hwloc-libs", + "hwloc": { + "Name": "hwloc", "SupportedOS": [ { "Name": "RHEL", @@ -3132,17 +3132,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "hwloc_na_rpm": { - "Name": "hwloc", + "hwloc-libs": { + "Name": "hwloc-libs", "SupportedOS": [ { "Name": "RHEL", @@ -3156,16 +3156,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "iperf3_na_rpm": { + "iperf3": { "Name": "iperf3", "SupportedOS": [ { @@ -3180,16 +3180,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "ipmitool_na_rpm": { + "ipmitool": { "Name": "ipmitool", "SupportedOS": [ { @@ -3204,16 +3204,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "iproute_na_rpm": { + "iproute": { "Name": "iproute", "SupportedOS": [ { @@ -3228,16 +3228,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "iputils_na_rpm": { + "iputils": { "Name": "iputils", "SupportedOS": [ { @@ -3252,16 +3252,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "kbd_na_rpm": { + "kbd": { "Name": "kbd", "SupportedOS": [ { @@ -3276,17 +3276,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "kernel-tools_na_rpm": { - "Name": "kernel-tools", + "kernel": { + "Name": "kernel", "SupportedOS": [ { "Name": "RHEL", @@ -3300,17 +3300,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "kernel_na_rpm": { - "Name": "kernel", + "kernel-tools": { + "Name": "kernel-tools", "SupportedOS": [ { "Name": "RHEL", @@ -3324,16 +3324,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "kexec-tools_na_rpm": { + "kexec-tools": { "Name": "kexec-tools", "SupportedOS": [ { @@ -3348,16 +3348,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "libcurl_na_rpm": { + "libcurl": { "Name": "libcurl", "SupportedOS": [ { @@ -3372,16 +3372,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "libtool_na_rpm": { + "libtool": { "Name": "libtool", "SupportedOS": [ { @@ -3396,17 +3396,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "lldb-devel_na_rpm": { - "Name": "lldb-devel", + "lldb": { + "Name": "lldb", "SupportedOS": [ { "Name": "RHEL", @@ -3420,17 +3420,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "lldb_na_rpm": { - "Name": "lldb", + "lldb-devel": { + "Name": "lldb-devel", "SupportedOS": [ { "Name": "RHEL", @@ -3444,16 +3444,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "lshw_na_rpm": { + "lshw": { "Name": "lshw", "SupportedOS": [ { @@ -3468,16 +3468,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "lsof_na_rpm": { + "lsof": { "Name": "lsof", "SupportedOS": [ { @@ -3492,16 +3492,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "ltrace_na_rpm": { + "ltrace": { "Name": "ltrace", "SupportedOS": [ { @@ -3516,16 +3516,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "lvm2_na_rpm": { + "lvm2": { "Name": "lvm2", "SupportedOS": [ { @@ -3540,16 +3540,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "make_na_rpm": { + "make": { "Name": "make", "SupportedOS": [ { @@ -3564,16 +3564,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "man-db_na_rpm": { + "man-db": { "Name": "man-db", "SupportedOS": [ { @@ -3588,16 +3588,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "man-pages_na_rpm": { + "man-pages": { "Name": "man-pages", "SupportedOS": [ { @@ -3612,16 +3612,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "munge-devel_na_rpm": { + "munge-devel": { "Name": "munge-devel", "SupportedOS": [ { @@ -3636,16 +3636,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "codeready-builder" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "codeready-builder" } ] }, - "nfs-utils_na_rpm": { + "nfs-utils": { "Name": "nfs-utils", "SupportedOS": [ { @@ -3660,16 +3660,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "nfs4-acl-tools_na_rpm": { + "nfs4-acl-tools": { "Name": "nfs4-acl-tools", "SupportedOS": [ { @@ -3684,16 +3684,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "nm-connection-editor_na_rpm": { + "nm-connection-editor": { "Name": "nm-connection-editor", "SupportedOS": [ { @@ -3708,16 +3708,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "nss-pam-ldapd_na_rpm": { + "nss-pam-ldapd": { "Name": "nss-pam-ldapd", "SupportedOS": [ { @@ -3732,16 +3732,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "epel" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "epel" } ] }, - "oddjob-mkhomedir_na_rpm": { + "oddjob-mkhomedir": { "Name": "oddjob-mkhomedir", "SupportedOS": [ { @@ -3756,16 +3756,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "openldap-clients_na_rpm": { + "openldap-clients": { "Name": "openldap-clients", "SupportedOS": [ { @@ -3780,16 +3780,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "openmpi_5.0.8_tarball": { + "openmpi": { "Name": "openmpi", "SupportedOS": [ { @@ -3805,17 +3805,17 @@ "Version": "5.0.8", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://download.open-mpi.org/release/open-mpi/v5.0/openmpi-5.0.8.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://download.open-mpi.org/release/open-mpi/v5.0/openmpi-5.0.8.tar.gz" } ] }, - "openssh-clients_na_rpm": { - "Name": "openssh-clients", + "openssh": { + "Name": "openssh", "SupportedOS": [ { "Name": "RHEL", @@ -3829,17 +3829,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "openssh-server_na_rpm": { - "Name": "openssh-server", + "openssh-clients": { + "Name": "openssh-clients", "SupportedOS": [ { "Name": "RHEL", @@ -3853,17 +3853,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "openssh_na_rpm": { - "Name": "openssh", + "openssh-server": { + "Name": "openssh-server", "SupportedOS": [ { "Name": "RHEL", @@ -3877,16 +3877,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "openssl-devel_na_rpm": { + "openssl-devel": { "Name": "openssl-devel", "SupportedOS": [ { @@ -3901,16 +3901,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "openssl-libs_na_rpm_1": { + "openssl-libs_1": { "Name": "openssl-libs", "SupportedOS": [ { @@ -3925,16 +3925,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "ovis-ldms_na_rpm_1": { + "ovis-ldms_1": { "Name": "ovis-ldms", "SupportedOS": [ { @@ -3949,16 +3949,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "ldms" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "ldms" } ] }, - "papi-devel_na_rpm": { + "papi-devel": { "Name": "papi-devel", "SupportedOS": [ { @@ -3973,16 +3973,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "papi-libs_na_rpm": { + "papi-libs": { "Name": "papi-libs", "SupportedOS": [ { @@ -3997,16 +3997,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "papi_na_rpm": { + "papi_1": { "Name": "papi", "SupportedOS": [ { @@ -4021,16 +4021,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "pciutils_na_rpm": { + "pciutils": { "Name": "pciutils", "SupportedOS": [ { @@ -4045,16 +4045,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "perf_na_rpm": { + "perf": { "Name": "perf", "SupportedOS": [ { @@ -4069,16 +4069,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "pmix-devel_na_rpm": { + "pmix-devel": { "Name": "pmix-devel", "SupportedOS": [ { @@ -4093,16 +4093,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "python3-cython_na_rpm_1": { + "python3-cython_1": { "Name": "python3-cython", "SupportedOS": [ { @@ -4117,16 +4117,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "codeready-builder" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "codeready-builder" } ] }, - "python3-devel_na_rpm_1": { + "python3-devel_1": { "Name": "python3-devel", "SupportedOS": [ { @@ -4141,16 +4141,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "rsync_na_rpm": { + "rsync": { "Name": "rsync", "SupportedOS": [ { @@ -4165,16 +4165,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "rsyslog_na_rpm": { + "rsyslog": { "Name": "rsyslog", "SupportedOS": [ { @@ -4189,16 +4189,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "sed_na_rpm": { + "sed": { "Name": "sed", "SupportedOS": [ { @@ -4213,16 +4213,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "squashfs-tools_na_rpm": { + "squashfs-tools": { "Name": "squashfs-tools", "SupportedOS": [ { @@ -4237,16 +4237,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "sssd_na_rpm": { + "sssd": { "Name": "sssd", "SupportedOS": [ { @@ -4261,16 +4261,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "strace_na_rpm": { + "strace": { "Name": "strace", "SupportedOS": [ { @@ -4285,16 +4285,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "sudo_na_rpm": { + "sudo": { "Name": "sudo", "SupportedOS": [ { @@ -4309,17 +4309,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "systemd-udev_na_rpm": { - "Name": "systemd-udev", + "systemd": { + "Name": "systemd", "SupportedOS": [ { "Name": "RHEL", @@ -4333,17 +4333,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "systemd_na_rpm": { - "Name": "systemd", + "systemd-udev": { + "Name": "systemd-udev", "SupportedOS": [ { "Name": "RHEL", @@ -4357,16 +4357,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "tar_na_rpm": { + "tar": { "Name": "tar", "SupportedOS": [ { @@ -4381,16 +4381,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "tcpdump_na_rpm": { + "tcpdump": { "Name": "tcpdump", "SupportedOS": [ { @@ -4405,16 +4405,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "traceroute_na_rpm": { + "traceroute": { "Name": "traceroute", "SupportedOS": [ { @@ -4429,16 +4429,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "ucx_1.19.0_tarball": { + "ucx": { "Name": "ucx", "SupportedOS": [ { @@ -4454,16 +4454,16 @@ "Version": "1.19.0", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://github.com/openucx/ucx/releases/download/v1.19.0/ucx-1.19.0.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://github.com/openucx/ucx/releases/download/v1.19.0/ucx-1.19.0.tar.gz" } ] }, - "util-linux_na_rpm": { + "util-linux": { "Name": "util-linux", "SupportedOS": [ { @@ -4478,17 +4478,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "valgrind-devel_na_rpm": { - "Name": "valgrind-devel", + "valgrind": { + "Name": "valgrind", "SupportedOS": [ { "Name": "RHEL", @@ -4502,17 +4502,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "valgrind_na_rpm": { - "Name": "valgrind", + "valgrind-devel": { + "Name": "valgrind-devel", "SupportedOS": [ { "Name": "RHEL", @@ -4526,16 +4526,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "vim-enhanced_na_rpm_1": { + "vim-enhanced_1": { "Name": "vim-enhanced", "SupportedOS": [ { @@ -4550,16 +4550,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "wget_na_rpm": { + "wget": { "Name": "wget", "SupportedOS": [ { @@ -4574,16 +4574,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "which_na_rpm": { + "which": { "Name": "which", "SupportedOS": [ { @@ -4598,16 +4598,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "zsh_na_rpm": { + "zsh": { "Name": "zsh", "SupportedOS": [ { @@ -4622,11 +4622,11 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] @@ -4634,7 +4634,7 @@ }, "Miscellaneous": [], "InfrastructurePackages": { - "csi-powerscale-v2-16-0_v2.16.0_git": { + "csi-powerscale": { "Name": "csi-powerscale-v2.16.0", "Type": "git", "Version": "v2.16.0", @@ -4653,7 +4653,7 @@ } ] }, - "docker-io-dellemc-csm-encryption_v0.6.0_image": { + "docker.io/dellemc/csm-encryption": { "Name": "docker.io/dellemc/csm-encryption", "Type": "image", "Version": "v0.6.0", @@ -4667,7 +4667,7 @@ ], "Tag": "v0.6.0" }, - "external-snapshotter-v8-4-0_v8.4.0_git": { + "external-snapshotter": { "Name": "external-snapshotter-v8.4.0", "Type": "git", "Version": "v8.4.0", @@ -4686,7 +4686,7 @@ } ] }, - "helm-charts-2-16-0_csi-isilon-2.16.0_git": { + "helm-charts_1": { "Name": "helm-charts-2.16.0", "Type": "git", "Version": "csi-isilon-2.16.0", @@ -4705,7 +4705,7 @@ } ] }, - "quay-io-dell-container-storage-modules-csi-isilon_v2.16.0_image": { + "quay.io/dell/container-storage-modules/csi-isilon": { "Name": "quay.io/dell/container-storage-modules/csi-isilon", "Type": "image", "Version": "v2.16.0", @@ -4719,7 +4719,7 @@ ], "Tag": "v2.16.0" }, - "quay-io-dell-container-storage-modules-csi-metadat_v1.13.0_image": { + "quay.io/dell/container-storage-modules/csi-metadata-retriever": { "Name": "quay.io/dell/container-storage-modules/csi-metadata-retriever", "Type": "image", "Version": "v1.13.0", @@ -4733,7 +4733,7 @@ ], "Tag": "v1.13.0" }, - "quay-io-dell-container-storage-modules-csm-authori_v2.4.0_image": { + "quay.io/dell/container-storage-modules/csm-authorization-sidecar": { "Name": "quay.io/dell/container-storage-modules/csm-authorization-sidecar", "Type": "image", "Version": "v2.4.0", @@ -4747,7 +4747,7 @@ ], "Tag": "v2.4.0" }, - "quay-io-dell-container-storage-modules-dell-csi-re_v1.14.0_image": { + "quay.io/dell/container-storage-modules/dell-csi-replicator": { "Name": "quay.io/dell/container-storage-modules/dell-csi-replicator", "Type": "image", "Version": "v1.14.0", @@ -4761,7 +4761,7 @@ ], "Tag": "v1.14.0" }, - "quay-io-dell-container-storage-modules-podmon_v1.15.0_image": { + "quay.io/dell/container-storage-modules/podmon": { "Name": "quay.io/dell/container-storage-modules/podmon", "Type": "image", "Version": "v1.15.0", @@ -4775,7 +4775,7 @@ ], "Tag": "v1.15.0" }, - "registry-k8s-io-sig-storage-csi-attacher_v4.10.0_image": { + "registry.k8s.io/sig-storage/csi-attacher": { "Name": "registry.k8s.io/sig-storage/csi-attacher", "Type": "image", "Version": "v4.10.0", @@ -4789,7 +4789,7 @@ ], "Tag": "v4.10.0" }, - "registry-k8s-io-sig-storage-csi-external-health-mo_v0.16.0_image": { + "registry.k8s.io/sig-storage/csi-external-health-monitor-controller": { "Name": "registry.k8s.io/sig-storage/csi-external-health-monitor-controller", "Type": "image", "Version": "v0.16.0", @@ -4803,7 +4803,7 @@ ], "Tag": "v0.16.0" }, - "registry-k8s-io-sig-storage-csi-node-driver-regist_v2.15.0_image": { + "registry.k8s.io/sig-storage/csi-node-driver-registrar": { "Name": "registry.k8s.io/sig-storage/csi-node-driver-registrar", "Type": "image", "Version": "v2.15.0", @@ -4817,7 +4817,7 @@ ], "Tag": "v2.15.0" }, - "registry-k8s-io-sig-storage-csi-provisioner_v6.1.0_image": { + "registry.k8s.io/sig-storage/csi-provisioner": { "Name": "registry.k8s.io/sig-storage/csi-provisioner", "Type": "image", "Version": "v6.1.0", @@ -4831,7 +4831,7 @@ ], "Tag": "v6.1.0" }, - "registry-k8s-io-sig-storage-csi-resizer_v2.0.0_image": { + "registry.k8s.io/sig-storage/csi-resizer": { "Name": "registry.k8s.io/sig-storage/csi-resizer", "Type": "image", "Version": "v2.0.0", @@ -4845,7 +4845,7 @@ ], "Tag": "v2.0.0" }, - "registry-k8s-io-sig-storage-csi-snapshotter_v8.4.0_image": { + "registry.k8s.io/sig-storage/csi-snapshotter": { "Name": "registry.k8s.io/sig-storage/csi-snapshotter", "Type": "image", "Version": "v8.4.0", @@ -4859,7 +4859,7 @@ ], "Tag": "v8.4.0" }, - "registry-k8s-io-sig-storage-snapshot-controller_v8.4.0_image": { + "registry.k8s.io/sig-storage/snapshot-controller": { "Name": "registry.k8s.io/sig-storage/snapshot-controller", "Type": "image", "Version": "v8.4.0", diff --git a/examples/catalog/catalog_rhel_aarch64_with_slurm_only.json b/examples/catalog/catalog_rhel_aarch64_with_slurm_only.json index 908f8a7728..9dc880c004 100644 --- a/examples/catalog/catalog_rhel_aarch64_with_slurm_only.json +++ b/examples/catalog/catalog_rhel_aarch64_with_slurm_only.json @@ -7,103 +7,103 @@ { "Name": "login_compiler_node_aarch64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-slurmd_na_rpm", - "slurm_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "likwid", + "lsscsi", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm", + "slurm-slurmd" ] }, { "Name": "login_node_x86_64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "msr-safe_na_tarball", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-slurmd_na_rpm", - "slurm_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "likwid", + "lsscsi", + "msr-safe", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm", + "slurm-slurmd" ] }, { "Name": "slurm_control_node_x86_64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "mariadb-server_na_rpm", - "msr-safe_na_tarball", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-PyMySQL_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-slurmctld_na_rpm", - "slurm-slurmdbd_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "likwid", + "lsscsi", + "mariadb-server", + "msr-safe", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-PyMySQL", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm-slurmctld", + "slurm-slurmdbd" ] }, { "Name": "slurm_node_aarch64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "kernel-devel_na_rpm", - "kernel-headers_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-pam_slurm_na_rpm", - "slurm-slurmd_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "kernel-devel", + "kernel-headers", + "likwid", + "lsscsi", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm-pam_slurm", + "slurm-slurmd" ] } ], @@ -112,98 +112,98 @@ "Name": "RHEL", "Version": "10.0", "osPackages": [ - "NetworkManager_na_rpm", - "authselect_na_rpm", - "autoconf_na_rpm", - "automake_na_rpm", - "bash-completion_na_rpm", - "bash_na_rpm", - "binutils-devel_na_rpm", - "binutils_na_rpm", - "bzip2_na_rpm", - "chrony_na_rpm", - "cloud-init_na_rpm", - "clustershell_na_rpm", - "cmake_na_rpm", - "coreutils_na_rpm", - "cryptsetup_na_rpm", - "curl_na_rpm", - "device-mapper_na_rpm", - "dmidecode_na_rpm", - "docker-io-dellhpcomniaaisolution-image-build-aarch_1.1_image", - "docker-io-dellhpcomniaaisolution-image-build-el10_1.1_image", - "dracut-live_na_rpm", - "dracut-network_na_rpm", - "dracut_na_rpm", - "emacs_na_rpm", - "file_na_rpm", - "findutils_na_rpm", - "fping_na_rpm", - "gawk_na_rpm", - "gcc-c++_na_rpm", - "gcc-gfortran_na_rpm", - "gcc_na_rpm", - "gdb-gdbserver_na_rpm", - "gdb_na_rpm", - "gedit_na_rpm", - "glibc-langpack-en_na_rpm", - "grep_na_rpm", - "gzip_na_rpm", - "hwloc-libs_na_rpm", - "hwloc_na_rpm", - "iperf3_na_rpm", - "ipmitool_na_rpm", - "iproute_na_rpm", - "iputils_na_rpm", - "kbd_na_rpm", - "kernel-tools_na_rpm", - "kernel_na_rpm", - "kexec-tools_na_rpm", - "libcurl_na_rpm", - "libtool_na_rpm", - "lldb-devel_na_rpm", - "lldb_na_rpm", - "lshw_na_rpm", - "lsof_na_rpm", - "ltrace_na_rpm", - "lvm2_na_rpm", - "make_na_rpm", - "man-db_na_rpm", - "man-pages_na_rpm", - "nfs-utils_na_rpm", - "nfs4-acl-tools_na_rpm", - "nm-connection-editor_na_rpm", - "nss-pam-ldapd_na_rpm", - "oddjob-mkhomedir_na_rpm", - "openldap-clients_na_rpm", - "openssh-clients_na_rpm", - "openssh-server_na_rpm", - "openssh_na_rpm", - "openssl-devel_na_rpm", - "papi-devel_na_rpm", - "papi-libs_na_rpm", - "papi_na_rpm", - "pciutils_na_rpm", - "perf_na_rpm", - "rsync_na_rpm", - "rsyslog_na_rpm", - "sed_na_rpm", - "squashfs-tools_na_rpm", - "sssd_na_rpm", - "strace_na_rpm", - "sudo_na_rpm", - "systemd-udev_na_rpm", - "systemd_na_rpm", - "tar_na_rpm", - "tcpdump_na_rpm", - "traceroute_na_rpm", - "util-linux_na_rpm", - "valgrind-devel_na_rpm", - "valgrind_na_rpm", - "vim-enhanced_na_rpm", - "wget_na_rpm", - "which_na_rpm", - "zsh_na_rpm" + "NetworkManager", + "authselect", + "autoconf", + "automake", + "bash", + "bash-completion", + "binutils", + "binutils-devel", + "bzip2", + "chrony", + "cloud-init", + "clustershell", + "cmake", + "coreutils", + "cryptsetup", + "curl", + "device-mapper", + "dmidecode", + "docker.io/dellhpcomniaaisolution/image-build-aarch64", + "docker.io/dellhpcomniaaisolution/image-build-el10", + "dracut", + "dracut-live", + "dracut-network", + "emacs", + "file", + "findutils", + "fping", + "gawk", + "gcc", + "gcc-c++", + "gcc-gfortran", + "gdb", + "gdb-gdbserver", + "gedit", + "glibc-langpack-en", + "grep", + "gzip", + "hwloc", + "hwloc-libs", + "iperf3", + "ipmitool", + "iproute", + "iputils", + "kbd", + "kernel", + "kernel-tools", + "kexec-tools", + "libcurl", + "libtool", + "lldb", + "lldb-devel", + "lshw", + "lsof", + "ltrace", + "lvm2", + "make", + "man-db", + "man-pages", + "nfs-utils", + "nfs4-acl-tools", + "nm-connection-editor", + "nss-pam-ldapd", + "oddjob-mkhomedir", + "openldap-clients", + "openssh", + "openssh-clients", + "openssh-server", + "openssl-devel", + "papi-devel", + "papi-libs", + "papi_1", + "pciutils", + "perf", + "rsync", + "rsyslog", + "sed", + "squashfs-tools", + "sssd", + "strace", + "sudo", + "systemd", + "systemd-udev", + "tar", + "tcpdump", + "traceroute", + "util-linux", + "valgrind", + "valgrind-devel", + "vim-enhanced", + "wget", + "which", + "zsh" ] } ], @@ -211,7 +211,7 @@ "Drivers": [], "DriverPackages": {}, "FunctionalPackages": { - "apptainer_na_rpm": { + "apptainer": { "Name": "apptainer", "SupportedOS": [ { @@ -226,16 +226,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "epel" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "epel" } ] }, - "device-mapper-multipath_na_rpm": { + "device-mapper-multipath": { "Name": "device-mapper-multipath", "SupportedOS": [ { @@ -250,16 +250,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "doca-ofed_na_rpm_repo": { + "doca-ofed": { "Name": "doca-ofed", "SupportedOS": [ { @@ -274,16 +274,16 @@ "Type": "rpm_repo", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "doca" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "doca" } ] }, - "firewalld_na_rpm": { + "firewalld": { "Name": "firewalld", "SupportedOS": [ { @@ -298,16 +298,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "geopm_na_tarball": { + "geopm": { "Name": "geopm", "SupportedOS": [ { @@ -322,16 +322,16 @@ "Type": "tarball", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://github.com/geopm/geopm/archive/refs/tags/v3.1.0.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://github.com/geopm/geopm/archive/refs/tags/v3.1.0.tar.gz" } ] }, - "imb_na_tarball": { + "imb": { "Name": "imb", "SupportedOS": [ { @@ -346,16 +346,16 @@ "Type": "tarball", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://github.com/intel/mpi-benchmarks/archive/refs/tags/IMB-v2021.8.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://github.com/intel/mpi-benchmarks/archive/refs/tags/IMB-v2021.8.tar.gz" } ] }, - "iscsi-initiator-utils_na_rpm": { + "iscsi-initiator-utils": { "Name": "iscsi-initiator-utils", "SupportedOS": [ { @@ -370,16 +370,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "kernel-devel_na_rpm": { + "kernel-devel": { "Name": "kernel-devel", "SupportedOS": [ { @@ -394,16 +394,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "kernel-headers_na_rpm": { + "kernel-headers": { "Name": "kernel-headers", "SupportedOS": [ { @@ -418,16 +418,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "likwid_na_tarball": { + "likwid": { "Name": "likwid", "SupportedOS": [ { @@ -442,16 +442,16 @@ "Type": "tarball", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://github.com/RRZE-HPC/likwid/archive/refs/tags/v5.4.1.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://github.com/RRZE-HPC/likwid/archive/refs/tags/v5.4.1.tar.gz" } ] }, - "lsscsi_na_rpm": { + "lsscsi": { "Name": "lsscsi", "SupportedOS": [ { @@ -466,16 +466,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "mariadb-server_na_rpm": { + "mariadb-server": { "Name": "mariadb-server", "SupportedOS": [ { @@ -490,16 +490,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "msr-safe_na_tarball": { + "msr-safe": { "Name": "msr-safe", "SupportedOS": [ { @@ -518,7 +518,7 @@ } ] }, - "munge_na_rpm": { + "munge": { "Name": "munge", "SupportedOS": [ { @@ -533,16 +533,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "nvcr-io-nvidia-hpc-benchmarks_25.09_image": { + "nvcr.io/nvidia/hpc-benchmarks": { "Name": "nvcr.io/nvidia/hpc-benchmarks", "SupportedOS": [ { @@ -558,7 +558,7 @@ "Tag": "25.09", "Version": "25.09" }, - "osu-micro-benchmarks_na_tarball": { + "osu-micro-benchmarks": { "Name": "osu-micro-benchmarks", "SupportedOS": [ { @@ -573,16 +573,16 @@ "Type": "tarball", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-7.5.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-7.5.tar.gz" } ] }, - "papi_na_tarball": { + "papi": { "Name": "papi", "SupportedOS": [ { @@ -597,16 +597,16 @@ "Type": "tarball", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://github.com/icl-utk-edu/papi/releases/download/papi-7-2-0-t/papi-7.2.0.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://github.com/icl-utk-edu/papi/releases/download/papi-7-2-0-t/papi-7.2.0.tar.gz" } ] }, - "pmix_na_rpm": { + "pmix": { "Name": "pmix", "SupportedOS": [ { @@ -621,16 +621,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "python3-PyMySQL_na_rpm": { + "python3-PyMySQL": { "Name": "python3-PyMySQL", "SupportedOS": [ { @@ -645,16 +645,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "python3-firewall_na_rpm": { + "python3-firewall": { "Name": "python3-firewall", "SupportedOS": [ { @@ -669,16 +669,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "sg3_utils_na_rpm": { + "sg3_utils": { "Name": "sg3_utils", "SupportedOS": [ { @@ -693,16 +693,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "sionlib_na_tarball": { + "sionlib": { "Name": "sionlib", "SupportedOS": [ { @@ -717,17 +717,17 @@ "Type": "tarball", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://apps.fz-juelich.de/jsc/sionlib/download.php?version=1.7.7" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://apps.fz-juelich.de/jsc/sionlib/download.php?version=1.7.7" } ] }, - "slurm-pam_slurm_na_rpm": { - "Name": "slurm-pam_slurm", + "slurm": { + "Name": "slurm", "SupportedOS": [ { "Name": "RHEL", @@ -741,17 +741,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "slurm_custom" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "slurm_custom" } ] }, - "slurm-slurmctld_na_rpm": { - "Name": "slurm-slurmctld", + "slurm-pam_slurm": { + "Name": "slurm-pam_slurm", "SupportedOS": [ { "Name": "RHEL", @@ -765,17 +765,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "slurm_custom" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "slurm_custom" } ] }, - "slurm-slurmd_na_rpm": { - "Name": "slurm-slurmd", + "slurm-slurmctld": { + "Name": "slurm-slurmctld", "SupportedOS": [ { "Name": "RHEL", @@ -789,17 +789,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "slurm_custom" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "slurm_custom" } ] }, - "slurm-slurmdbd_na_rpm": { - "Name": "slurm-slurmdbd", + "slurm-slurmd": { + "Name": "slurm-slurmd", "SupportedOS": [ { "Name": "RHEL", @@ -813,17 +813,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "slurm_custom" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "slurm_custom" } ] }, - "slurm_na_rpm": { - "Name": "slurm", + "slurm-slurmdbd": { + "Name": "slurm-slurmdbd", "SupportedOS": [ { "Name": "RHEL", @@ -837,18 +837,18 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "slurm_custom" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "slurm_custom" } ] } }, "OSPackages": { - "NetworkManager_na_rpm": { + "NetworkManager": { "Name": "NetworkManager", "SupportedOS": [ { @@ -863,16 +863,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "authselect_na_rpm": { + "authselect": { "Name": "authselect", "SupportedOS": [ { @@ -887,16 +887,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "autoconf_na_rpm": { + "autoconf": { "Name": "autoconf", "SupportedOS": [ { @@ -911,16 +911,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "automake_na_rpm": { + "automake": { "Name": "automake", "SupportedOS": [ { @@ -935,17 +935,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "bash-completion_na_rpm": { - "Name": "bash-completion", + "bash": { + "Name": "bash", "SupportedOS": [ { "Name": "RHEL", @@ -959,17 +959,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "bash_na_rpm": { - "Name": "bash", + "bash-completion": { + "Name": "bash-completion", "SupportedOS": [ { "Name": "RHEL", @@ -983,17 +983,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "binutils-devel_na_rpm": { - "Name": "binutils-devel", + "binutils": { + "Name": "binutils", "SupportedOS": [ { "Name": "RHEL", @@ -1007,17 +1007,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", - "RepoName": "appstream" + "Architecture": "x86_64", + "RepoName": "baseos" }, { - "Architecture": "x86_64", - "RepoName": "appstream" + "Architecture": "aarch64", + "RepoName": "baseos" } ] }, - "binutils_na_rpm": { - "Name": "binutils", + "binutils-devel": { + "Name": "binutils-devel", "SupportedOS": [ { "Name": "RHEL", @@ -1031,16 +1031,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", - "RepoName": "baseos" + "Architecture": "x86_64", + "RepoName": "appstream" }, { - "Architecture": "x86_64", - "RepoName": "baseos" + "Architecture": "aarch64", + "RepoName": "appstream" } ] }, - "bzip2_na_rpm": { + "bzip2": { "Name": "bzip2", "SupportedOS": [ { @@ -1055,16 +1055,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "chrony_na_rpm": { + "chrony": { "Name": "chrony", "SupportedOS": [ { @@ -1079,16 +1079,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "cloud-init_na_rpm": { + "cloud-init": { "Name": "cloud-init", "SupportedOS": [ { @@ -1103,16 +1103,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "clustershell_na_rpm": { + "clustershell": { "Name": "clustershell", "SupportedOS": [ { @@ -1127,16 +1127,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "epel" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "epel" } ] }, - "cmake_na_rpm": { + "cmake": { "Name": "cmake", "SupportedOS": [ { @@ -1151,16 +1151,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "coreutils_na_rpm": { + "coreutils": { "Name": "coreutils", "SupportedOS": [ { @@ -1175,16 +1175,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "cryptsetup_na_rpm": { + "cryptsetup": { "Name": "cryptsetup", "SupportedOS": [ { @@ -1199,16 +1199,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "curl_na_rpm": { + "curl": { "Name": "curl", "SupportedOS": [ { @@ -1223,16 +1223,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "device-mapper_na_rpm": { + "device-mapper": { "Name": "device-mapper", "SupportedOS": [ { @@ -1247,16 +1247,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "dmidecode_na_rpm": { + "dmidecode": { "Name": "dmidecode", "SupportedOS": [ { @@ -1271,16 +1271,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "docker-io-dellhpcomniaaisolution-image-build-aarch_1.1_image": { + "docker.io/dellhpcomniaaisolution/image-build-aarch64": { "Name": "docker.io/dellhpcomniaaisolution/image-build-aarch64", "SupportedOS": [ { @@ -1295,7 +1295,7 @@ "Tag": "1.1", "Version": "1.1" }, - "docker-io-dellhpcomniaaisolution-image-build-el10_1.1_image": { + "docker.io/dellhpcomniaaisolution/image-build-el10": { "Name": "docker.io/dellhpcomniaaisolution/image-build-el10", "SupportedOS": [ { @@ -1310,8 +1310,8 @@ "Tag": "1.1", "Version": "1.1" }, - "dracut-live_na_rpm": { - "Name": "dracut-live", + "dracut": { + "Name": "dracut", "SupportedOS": [ { "Name": "RHEL", @@ -1325,17 +1325,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", - "RepoName": "appstream" + "Architecture": "x86_64", + "RepoName": "baseos" }, { - "Architecture": "x86_64", - "RepoName": "appstream" + "Architecture": "aarch64", + "RepoName": "baseos" } ] }, - "dracut-network_na_rpm": { - "Name": "dracut-network", + "dracut-live": { + "Name": "dracut-live", "SupportedOS": [ { "Name": "RHEL", @@ -1349,17 +1349,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", - "RepoName": "baseos" + "Architecture": "x86_64", + "RepoName": "appstream" }, { - "Architecture": "x86_64", - "RepoName": "baseos" + "Architecture": "aarch64", + "RepoName": "appstream" } ] }, - "dracut_na_rpm": { - "Name": "dracut", + "dracut-network": { + "Name": "dracut-network", "SupportedOS": [ { "Name": "RHEL", @@ -1373,16 +1373,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "emacs_na_rpm": { + "emacs": { "Name": "emacs", "SupportedOS": [ { @@ -1397,16 +1397,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "file_na_rpm": { + "file": { "Name": "file", "SupportedOS": [ { @@ -1421,16 +1421,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "findutils_na_rpm": { + "findutils": { "Name": "findutils", "SupportedOS": [ { @@ -1445,16 +1445,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "fping_na_rpm": { + "fping": { "Name": "fping", "SupportedOS": [ { @@ -1469,16 +1469,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "epel" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "epel" } ] }, - "gawk_na_rpm": { + "gawk": { "Name": "gawk", "SupportedOS": [ { @@ -1493,17 +1493,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "gcc-c++_na_rpm": { - "Name": "gcc-c++", + "gcc": { + "Name": "gcc", "SupportedOS": [ { "Name": "RHEL", @@ -1517,17 +1517,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "gcc-gfortran_na_rpm": { - "Name": "gcc-gfortran", + "gcc-c++": { + "Name": "gcc-c++", "SupportedOS": [ { "Name": "RHEL", @@ -1541,17 +1541,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "gcc_na_rpm": { - "Name": "gcc", + "gcc-gfortran": { + "Name": "gcc-gfortran", "SupportedOS": [ { "Name": "RHEL", @@ -1565,17 +1565,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "gdb-gdbserver_na_rpm": { - "Name": "gdb-gdbserver", + "gdb": { + "Name": "gdb", "SupportedOS": [ { "Name": "RHEL", @@ -1589,17 +1589,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "gdb_na_rpm": { - "Name": "gdb", + "gdb-gdbserver": { + "Name": "gdb-gdbserver", "SupportedOS": [ { "Name": "RHEL", @@ -1613,16 +1613,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "gedit_na_rpm": { + "gedit": { "Name": "gedit", "SupportedOS": [ { @@ -1637,16 +1637,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "epel" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "epel" } ] }, - "glibc-langpack-en_na_rpm": { + "glibc-langpack-en": { "Name": "glibc-langpack-en", "SupportedOS": [ { @@ -1661,16 +1661,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "grep_na_rpm": { + "grep": { "Name": "grep", "SupportedOS": [ { @@ -1685,16 +1685,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "gzip_na_rpm": { + "gzip": { "Name": "gzip", "SupportedOS": [ { @@ -1709,17 +1709,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "hwloc-libs_na_rpm": { - "Name": "hwloc-libs", + "hwloc": { + "Name": "hwloc", "SupportedOS": [ { "Name": "RHEL", @@ -1733,17 +1733,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "hwloc_na_rpm": { - "Name": "hwloc", + "hwloc-libs": { + "Name": "hwloc-libs", "SupportedOS": [ { "Name": "RHEL", @@ -1757,16 +1757,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "iperf3_na_rpm": { + "iperf3": { "Name": "iperf3", "SupportedOS": [ { @@ -1781,16 +1781,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "ipmitool_na_rpm": { + "ipmitool": { "Name": "ipmitool", "SupportedOS": [ { @@ -1805,16 +1805,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "iproute_na_rpm": { + "iproute": { "Name": "iproute", "SupportedOS": [ { @@ -1829,16 +1829,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "iputils_na_rpm": { + "iputils": { "Name": "iputils", "SupportedOS": [ { @@ -1853,16 +1853,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "kbd_na_rpm": { + "kbd": { "Name": "kbd", "SupportedOS": [ { @@ -1877,17 +1877,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "kernel-tools_na_rpm": { - "Name": "kernel-tools", + "kernel": { + "Name": "kernel", "SupportedOS": [ { "Name": "RHEL", @@ -1901,17 +1901,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "kernel_na_rpm": { - "Name": "kernel", + "kernel-tools": { + "Name": "kernel-tools", "SupportedOS": [ { "Name": "RHEL", @@ -1925,16 +1925,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "kexec-tools_na_rpm": { + "kexec-tools": { "Name": "kexec-tools", "SupportedOS": [ { @@ -1949,16 +1949,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "libcurl_na_rpm": { + "libcurl": { "Name": "libcurl", "SupportedOS": [ { @@ -1973,16 +1973,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "libtool_na_rpm": { + "libtool": { "Name": "libtool", "SupportedOS": [ { @@ -1997,17 +1997,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "lldb-devel_na_rpm": { - "Name": "lldb-devel", + "lldb": { + "Name": "lldb", "SupportedOS": [ { "Name": "RHEL", @@ -2021,17 +2021,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "lldb_na_rpm": { - "Name": "lldb", + "lldb-devel": { + "Name": "lldb-devel", "SupportedOS": [ { "Name": "RHEL", @@ -2045,16 +2045,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "lshw_na_rpm": { + "lshw": { "Name": "lshw", "SupportedOS": [ { @@ -2069,16 +2069,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "lsof_na_rpm": { + "lsof": { "Name": "lsof", "SupportedOS": [ { @@ -2093,16 +2093,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "ltrace_na_rpm": { + "ltrace": { "Name": "ltrace", "SupportedOS": [ { @@ -2117,16 +2117,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "lvm2_na_rpm": { + "lvm2": { "Name": "lvm2", "SupportedOS": [ { @@ -2141,16 +2141,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "make_na_rpm": { + "make": { "Name": "make", "SupportedOS": [ { @@ -2165,16 +2165,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "man-db_na_rpm": { + "man-db": { "Name": "man-db", "SupportedOS": [ { @@ -2189,16 +2189,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "man-pages_na_rpm": { + "man-pages": { "Name": "man-pages", "SupportedOS": [ { @@ -2213,16 +2213,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "nfs-utils_na_rpm": { + "nfs-utils": { "Name": "nfs-utils", "SupportedOS": [ { @@ -2237,16 +2237,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "nfs4-acl-tools_na_rpm": { + "nfs4-acl-tools": { "Name": "nfs4-acl-tools", "SupportedOS": [ { @@ -2261,16 +2261,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "nm-connection-editor_na_rpm": { + "nm-connection-editor": { "Name": "nm-connection-editor", "SupportedOS": [ { @@ -2285,16 +2285,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "nss-pam-ldapd_na_rpm": { + "nss-pam-ldapd": { "Name": "nss-pam-ldapd", "SupportedOS": [ { @@ -2309,16 +2309,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "epel" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "epel" } ] }, - "oddjob-mkhomedir_na_rpm": { + "oddjob-mkhomedir": { "Name": "oddjob-mkhomedir", "SupportedOS": [ { @@ -2333,16 +2333,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "openldap-clients_na_rpm": { + "openldap-clients": { "Name": "openldap-clients", "SupportedOS": [ { @@ -2357,17 +2357,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "openssh-clients_na_rpm": { - "Name": "openssh-clients", + "openssh": { + "Name": "openssh", "SupportedOS": [ { "Name": "RHEL", @@ -2381,17 +2381,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "openssh-server_na_rpm": { - "Name": "openssh-server", + "openssh-clients": { + "Name": "openssh-clients", "SupportedOS": [ { "Name": "RHEL", @@ -2405,17 +2405,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "openssh_na_rpm": { - "Name": "openssh", + "openssh-server": { + "Name": "openssh-server", "SupportedOS": [ { "Name": "RHEL", @@ -2429,16 +2429,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "openssl-devel_na_rpm": { + "openssl-devel": { "Name": "openssl-devel", "SupportedOS": [ { @@ -2453,16 +2453,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "papi-devel_na_rpm": { + "papi-devel": { "Name": "papi-devel", "SupportedOS": [ { @@ -2477,16 +2477,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "papi-libs_na_rpm": { + "papi-libs": { "Name": "papi-libs", "SupportedOS": [ { @@ -2501,16 +2501,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "papi_na_rpm": { + "papi_1": { "Name": "papi", "SupportedOS": [ { @@ -2525,16 +2525,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "pciutils_na_rpm": { + "pciutils": { "Name": "pciutils", "SupportedOS": [ { @@ -2549,16 +2549,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "perf_na_rpm": { + "perf": { "Name": "perf", "SupportedOS": [ { @@ -2573,16 +2573,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "rsync_na_rpm": { + "rsync": { "Name": "rsync", "SupportedOS": [ { @@ -2597,16 +2597,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "rsyslog_na_rpm": { + "rsyslog": { "Name": "rsyslog", "SupportedOS": [ { @@ -2621,16 +2621,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "sed_na_rpm": { + "sed": { "Name": "sed", "SupportedOS": [ { @@ -2645,16 +2645,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "squashfs-tools_na_rpm": { + "squashfs-tools": { "Name": "squashfs-tools", "SupportedOS": [ { @@ -2669,16 +2669,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "sssd_na_rpm": { + "sssd": { "Name": "sssd", "SupportedOS": [ { @@ -2693,16 +2693,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "strace_na_rpm": { + "strace": { "Name": "strace", "SupportedOS": [ { @@ -2717,16 +2717,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "sudo_na_rpm": { + "sudo": { "Name": "sudo", "SupportedOS": [ { @@ -2741,17 +2741,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "systemd-udev_na_rpm": { - "Name": "systemd-udev", + "systemd": { + "Name": "systemd", "SupportedOS": [ { "Name": "RHEL", @@ -2765,17 +2765,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "systemd_na_rpm": { - "Name": "systemd", + "systemd-udev": { + "Name": "systemd-udev", "SupportedOS": [ { "Name": "RHEL", @@ -2789,16 +2789,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "tar_na_rpm": { + "tar": { "Name": "tar", "SupportedOS": [ { @@ -2813,16 +2813,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "tcpdump_na_rpm": { + "tcpdump": { "Name": "tcpdump", "SupportedOS": [ { @@ -2837,16 +2837,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "traceroute_na_rpm": { + "traceroute": { "Name": "traceroute", "SupportedOS": [ { @@ -2861,16 +2861,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "util-linux_na_rpm": { + "util-linux": { "Name": "util-linux", "SupportedOS": [ { @@ -2885,17 +2885,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "valgrind-devel_na_rpm": { - "Name": "valgrind-devel", + "valgrind": { + "Name": "valgrind", "SupportedOS": [ { "Name": "RHEL", @@ -2909,17 +2909,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "valgrind_na_rpm": { - "Name": "valgrind", + "valgrind-devel": { + "Name": "valgrind-devel", "SupportedOS": [ { "Name": "RHEL", @@ -2933,16 +2933,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "vim-enhanced_na_rpm": { + "vim-enhanced": { "Name": "vim-enhanced", "SupportedOS": [ { @@ -2957,16 +2957,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "wget_na_rpm": { + "wget": { "Name": "wget", "SupportedOS": [ { @@ -2981,16 +2981,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "which_na_rpm": { + "which": { "Name": "which", "SupportedOS": [ { @@ -3005,16 +3005,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "zsh_na_rpm": { + "zsh": { "Name": "zsh", "SupportedOS": [ { @@ -3029,11 +3029,11 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] diff --git a/examples/catalog/catalog_rhel_with_nfs_provisioner.json b/examples/catalog/catalog_rhel_with_nfs_provisioner.json index d6c5e467f0..556c97d65e 100644 --- a/examples/catalog/catalog_rhel_with_nfs_provisioner.json +++ b/examples/catalog/catalog_rhel_with_nfs_provisioner.json @@ -7,248 +7,248 @@ { "Name": "login_compiler_node_aarch64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-slurmd_na_rpm", - "slurm_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "likwid", + "lsscsi", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm", + "slurm-slurmd" ] }, { "Name": "login_node_x86_64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "msr-safe_na_tarball", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-slurmd_na_rpm", - "slurm_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "likwid", + "lsscsi", + "msr-safe", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm", + "slurm-slurmd" ] }, { "Name": "service_kube_control_plane_x86_64", "FunctionalPackages": [ - "PyMySQL_1.1.2_pip_module", - "apptainer_na_rpm", - "calico-v3-31-4_na_manifest", - "cert-manager-v1-10-0_na_tarball", - "cffi_1.17.1_pip_module", - "container-selinux_na_rpm", - "cri-o_1.35.1_rpm", - "cryptography_45.0.7_pip_module", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "docker-io-alpine-kubectl_1.35.1_image", - "docker-io-calico-cni_v3.31.4_image", - "docker-io-calico-kube-controllers_v3.31.4_image", - "docker-io-calico-node_v3.31.4_image", - "docker-io-curlimages-curl_8.17.0_image", - "docker-io-dellhpcomniaaisolution-idrac_telemetry_r_1.3_image", - "docker-io-dellhpcomniaaisolution-kafkapump_1.3_image", - "docker-io-dellhpcomniaaisolution-ubuntu-ldms_1.1_image", - "docker-io-dellhpcomniaaisolution-victoriapump_1.3_image", - "docker-io-library-busybox_1.36_image", - "docker-io-library-mysql_9.3.0_image", - "docker-io-library-python_3.12-slim_image", - "docker-io-nginxinc-nginx-unprivileged_1.29_image", - "docker-io-rmohr-activemq_5.15.9_image", - "docker-io-timberio-vector_0.54.0-debian_image", - "docker-io-victoriametrics-operator_config-reloader-v0.68.3_image", - "docker-io-victoriametrics-operator_v0.68.3_image", - "docker-io-victoriametrics-victoria-logs_v1.50.0_image", - "docker-io-victoriametrics-victoria-metrics_v1.128.0_image", - "docker-io-victoriametrics-vlagent_v1.50.0_image", - "docker-io-victoriametrics-vmagent_v1.128.0_image", - "docker-io-victoriametrics-vminsert_v1.128.0-cluster_image", - "docker-io-victoriametrics-vmselect_v1.128.0-cluster_image", - "docker-io-victoriametrics-vmstorage_v1.128.0-cluster_image", - "firewalld_na_rpm", - "fuse-overlayfs_na_rpm", - "ghcr-io-kube-vip-kube-vip_v0.8.9_image", - "ghcr-io-open-telemetry-opentelemetry-collector-rel_0.143.1_image", - "git_na_rpm", - "helm-charts_container-storage-modules-1.9.2_git", - "helm-v3-20-1-amd64_na_tarball", - "iscsi-initiator-utils_na_rpm", - "karavi-observability_v1.12.0_git", - "kubeadm_1.35.1_rpm", - "kubectl_1.35.1_rpm", - "kubelet_1.35.1_rpm", - "kubernetes_33.1.0_pip_module", - "lsscsi_na_rpm", - "metallb-native-v0-15-3_na_manifest", - "nfs-subdir-external-provisioner-4-0-18_na_tarball", - "omsdk_1.2.518_pip_module", - "podman_na_rpm", - "prettytable_3.14.0_pip_module", - "prometheus_client_0.20.0_pip_module", - "python3-firewall_na_rpm", - "python3_3.12.9_rpm", - "quay-io-dell-container-storage-modules-csm-metrics_v1.11.0_image", - "quay-io-jetstack-cert-manager-acmesolver_v1.10.0_image", - "quay-io-jetstack-cert-manager-cainjector_v1.10.0_image", - "quay-io-jetstack-cert-manager-controller_v1.10.0_image", - "quay-io-jetstack-cert-manager-webhook_v1.10.0_image", - "quay-io-metallb-speaker_v0.15.3_image", - "quay-io-strimzi-kafka-bridge_0.33.1_image", - "quay-io-strimzi-kafka_0.48.0-kafka-4.1.0_image", - "quay-io-strimzi-operator_0.48.0_image", - "registry-k8s-io-coredns-coredns_v1.13.1_image", - "registry-k8s-io-etcd_3.6.6-0_image", - "registry-k8s-io-kube-apiserver_v1.35.1_image", - "registry-k8s-io-kube-controller-manager_v1.35.1_image", - "registry-k8s-io-kube-proxy_v1.35.1_image", - "registry-k8s-io-kube-scheduler_v1.35.1_image", - "registry-k8s-io-pause_3.10.1_image", - "sg3_utils_na_rpm", - "strimzi-kafka-operator-helm-3-chart-0-48-0_na_tarball", - "victoria-metrics-operator-0-59-3_na_tarball", - "vim-enhanced_na_rpm" + "PyMySQL", + "apptainer", + "calico", + "cert-manager", + "cffi", + "container-selinux", + "cri-o", + "cryptography", + "device-mapper-multipath", + "doca-ofed", + "docker.io/alpine/kubectl", + "docker.io/calico/cni", + "docker.io/calico/kube-controllers", + "docker.io/calico/node", + "docker.io/curlimages/curl", + "docker.io/dellhpcomniaaisolution/idrac_telemetry_receiver", + "docker.io/dellhpcomniaaisolution/kafkapump", + "docker.io/dellhpcomniaaisolution/ubuntu-ldms", + "docker.io/dellhpcomniaaisolution/victoriapump", + "docker.io/library/busybox", + "docker.io/library/mysql", + "docker.io/library/python", + "docker.io/nginxinc/nginx-unprivileged", + "docker.io/rmohr/activemq", + "docker.io/timberio/vector", + "docker.io/victoriametrics/operator", + "docker.io/victoriametrics/operator_1", + "docker.io/victoriametrics/victoria-logs", + "docker.io/victoriametrics/victoria-metrics", + "docker.io/victoriametrics/vlagent", + "docker.io/victoriametrics/vmagent", + "docker.io/victoriametrics/vminsert", + "docker.io/victoriametrics/vmselect", + "docker.io/victoriametrics/vmstorage", + "firewalld", + "fuse-overlayfs", + "ghcr.io/kube-vip/kube-vip", + "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector", + "git", + "helm-amd64", + "helm-charts", + "iscsi-initiator-utils", + "karavi-observability", + "kubeadm", + "kubectl", + "kubelet", + "kubernetes", + "lsscsi", + "metallb-native", + "nfs-subdir-external-provisioner", + "omsdk", + "podman", + "prettytable", + "prometheus_client", + "python3", + "python3-firewall", + "quay.io/dell/container-storage-modules/csm-metrics-powerscale", + "quay.io/jetstack/cert-manager-acmesolver", + "quay.io/jetstack/cert-manager-cainjector", + "quay.io/jetstack/cert-manager-controller", + "quay.io/jetstack/cert-manager-webhook", + "quay.io/metallb/speaker", + "quay.io/strimzi/kafka", + "quay.io/strimzi/kafka-bridge", + "quay.io/strimzi/operator", + "registry.k8s.io/coredns/coredns", + "registry.k8s.io/etcd", + "registry.k8s.io/kube-apiserver", + "registry.k8s.io/kube-controller-manager", + "registry.k8s.io/kube-proxy", + "registry.k8s.io/kube-scheduler", + "registry.k8s.io/pause", + "sg3_utils", + "strimzi-kafka-operator-helm-3-chart", + "victoria-metrics-operator", + "vim-enhanced" ] }, { "Name": "service_kube_node_x86_64", "FunctionalPackages": [ - "apptainer_na_rpm", - "cert-manager-v1-10-0_na_tarball", - "cffi_1.17.1_pip_module", - "container-selinux_na_rpm", - "cri-o_1.35.1_rpm", - "cryptography_45.0.7_pip_module", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "docker-io-alpine-kubectl_1.35.1_image", - "docker-io-curlimages-curl_8.17.0_image", - "docker-io-dellhpcomniaaisolution-idrac_telemetry_r_1.3_image", - "docker-io-dellhpcomniaaisolution-kafkapump_1.3_image", - "docker-io-dellhpcomniaaisolution-ubuntu-ldms_1.1_image", - "docker-io-dellhpcomniaaisolution-victoriapump_1.3_image", - "docker-io-library-busybox_1.36_image", - "docker-io-library-mysql_9.3.0_image", - "docker-io-library-python_3.12-slim_image", - "docker-io-nginxinc-nginx-unprivileged_1.29_image", - "docker-io-rmohr-activemq_5.15.9_image", - "docker-io-timberio-vector_0.54.0-debian_image", - "docker-io-victoriametrics-operator_config-reloader-v0.68.3_image", - "docker-io-victoriametrics-operator_v0.68.3_image", - "docker-io-victoriametrics-victoria-logs_v1.50.0_image", - "docker-io-victoriametrics-victoria-metrics_v1.128.0_image", - "docker-io-victoriametrics-vlagent_v1.50.0_image", - "docker-io-victoriametrics-vmagent_v1.128.0_image", - "docker-io-victoriametrics-vminsert_v1.128.0-cluster_image", - "docker-io-victoriametrics-vmselect_v1.128.0-cluster_image", - "docker-io-victoriametrics-vmstorage_v1.128.0-cluster_image", - "firewalld_na_rpm", - "fuse-overlayfs_na_rpm", - "ghcr-io-open-telemetry-opentelemetry-collector-rel_0.143.1_image", - "git_na_rpm", - "helm-charts_container-storage-modules-1.9.2_git", - "iscsi-initiator-utils_na_rpm", - "karavi-observability_v1.12.0_git", - "kubeadm_1.35.1_rpm", - "kubelet_1.35.1_rpm", - "kubernetes_33.1.0_pip_module", - "lsscsi_na_rpm", - "omsdk_1.2.518_pip_module", - "podman_na_rpm", - "prometheus_client_0.20.0_pip_module", - "python3-firewall_na_rpm", - "quay-io-dell-container-storage-modules-csm-metrics_v1.11.0_image", - "quay-io-jetstack-cert-manager-acmesolver_v1.10.0_image", - "quay-io-jetstack-cert-manager-cainjector_v1.10.0_image", - "quay-io-jetstack-cert-manager-controller_v1.10.0_image", - "quay-io-jetstack-cert-manager-webhook_v1.10.0_image", - "quay-io-metallb-controller_v0.15.3_image", - "quay-io-metallb-speaker_v0.15.3_image", - "quay-io-strimzi-kafka-bridge_0.33.1_image", - "quay-io-strimzi-kafka_0.48.0-kafka-4.1.0_image", - "quay-io-strimzi-operator_0.48.0_image", - "registry-k8s-io-sig-storage-nfs-subdir-external-pr_v4.0.2_image", - "sg3_utils_na_rpm", - "strimzi-kafka-operator-helm-3-chart-0-48-0_na_tarball", - "victoria-metrics-operator-0-59-3_na_tarball", - "vim-enhanced_na_rpm" + "apptainer", + "cert-manager", + "cffi", + "container-selinux", + "cri-o", + "cryptography", + "device-mapper-multipath", + "doca-ofed", + "docker.io/alpine/kubectl", + "docker.io/curlimages/curl", + "docker.io/dellhpcomniaaisolution/idrac_telemetry_receiver", + "docker.io/dellhpcomniaaisolution/kafkapump", + "docker.io/dellhpcomniaaisolution/ubuntu-ldms", + "docker.io/dellhpcomniaaisolution/victoriapump", + "docker.io/library/busybox", + "docker.io/library/mysql", + "docker.io/library/python", + "docker.io/nginxinc/nginx-unprivileged", + "docker.io/rmohr/activemq", + "docker.io/timberio/vector", + "docker.io/victoriametrics/operator", + "docker.io/victoriametrics/operator_1", + "docker.io/victoriametrics/victoria-logs", + "docker.io/victoriametrics/victoria-metrics", + "docker.io/victoriametrics/vlagent", + "docker.io/victoriametrics/vmagent", + "docker.io/victoriametrics/vminsert", + "docker.io/victoriametrics/vmselect", + "docker.io/victoriametrics/vmstorage", + "firewalld", + "fuse-overlayfs", + "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector", + "git", + "helm-charts", + "iscsi-initiator-utils", + "karavi-observability", + "kubeadm", + "kubelet", + "kubernetes", + "lsscsi", + "omsdk", + "podman", + "prometheus_client", + "python3-firewall", + "quay.io/dell/container-storage-modules/csm-metrics-powerscale", + "quay.io/jetstack/cert-manager-acmesolver", + "quay.io/jetstack/cert-manager-cainjector", + "quay.io/jetstack/cert-manager-controller", + "quay.io/jetstack/cert-manager-webhook", + "quay.io/metallb/controller", + "quay.io/metallb/speaker", + "quay.io/strimzi/kafka", + "quay.io/strimzi/kafka-bridge", + "quay.io/strimzi/operator", + "registry.k8s.io/sig-storage/nfs-subdir-external-provisioner", + "sg3_utils", + "strimzi-kafka-operator-helm-3-chart", + "victoria-metrics-operator", + "vim-enhanced" ] }, { "Name": "slurm_control_node_x86_64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "mariadb-server_na_rpm", - "msr-safe_na_tarball", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-PyMySQL_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-slurmctld_na_rpm", - "slurm-slurmdbd_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "likwid", + "lsscsi", + "mariadb-server", + "msr-safe", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-PyMySQL", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm-slurmctld", + "slurm-slurmdbd" ] }, { "Name": "slurm_node_aarch64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "kernel-devel_na_rpm", - "kernel-headers_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-pam_slurm_na_rpm", - "slurm-slurmd_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "kernel-devel", + "kernel-headers", + "likwid", + "lsscsi", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm-pam_slurm", + "slurm-slurmd" ] } ], @@ -257,106 +257,106 @@ "Name": "RHEL", "Version": "10.0", "osPackages": [ - "NetworkManager_na_rpm", - "authselect_na_rpm", - "autoconf_na_rpm", - "automake_na_rpm", - "bash-completion_na_rpm", - "bash_na_rpm", - "binutils-devel_na_rpm", - "binutils_na_rpm", - "bzip2_na_rpm", - "chrony_na_rpm", - "cloud-init_na_rpm", - "clustershell_na_rpm", - "cmake_na_rpm", - "coreutils_na_rpm", - "cryptsetup_na_rpm", - "curl_na_rpm", - "device-mapper_na_rpm", - "dmidecode_na_rpm", - "docker-io-dellhpcomniaaisolution-image-build-aarch_1.1_image", - "docker-io-dellhpcomniaaisolution-image-build-el10_1.1_image", - "dracut-live_na_rpm", - "dracut-network_na_rpm", - "dracut_na_rpm", - "emacs_na_rpm", - "file_na_rpm", - "findutils_na_rpm", - "fping_na_rpm", - "gawk_na_rpm", - "gcc-c++_na_rpm", - "gcc-gfortran_na_rpm", - "gcc_na_rpm", - "gdb-gdbserver_na_rpm", - "gdb_na_rpm", - "gedit_na_rpm", - "glibc-langpack-en_na_rpm", - "grep_na_rpm", - "gzip_na_rpm", - "hwloc-libs_na_rpm", - "hwloc_na_rpm", - "iperf3_na_rpm", - "ipmitool_na_rpm", - "iproute_na_rpm", - "iputils_na_rpm", - "kbd_na_rpm", - "kernel-tools_na_rpm", - "kernel_na_rpm", - "kexec-tools_na_rpm", - "libcurl_na_rpm", - "libtool_na_rpm", - "lldb-devel_na_rpm", - "lldb_na_rpm", - "lshw_na_rpm", - "lsof_na_rpm", - "ltrace_na_rpm", - "lvm2_na_rpm", - "make_na_rpm", - "man-db_na_rpm", - "man-pages_na_rpm", - "munge-devel_na_rpm", - "nfs-utils_na_rpm", - "nfs4-acl-tools_na_rpm", - "nm-connection-editor_na_rpm", - "nss-pam-ldapd_na_rpm", - "oddjob-mkhomedir_na_rpm", - "openldap-clients_na_rpm", - "openmpi_5.0.8_tarball", - "openssh-clients_na_rpm", - "openssh-server_na_rpm", - "openssh_na_rpm", - "openssl-devel_na_rpm", - "openssl-libs_na_rpm", - "ovis-ldms_na_rpm", - "papi-devel_na_rpm", - "papi-libs_na_rpm", - "papi_na_rpm", - "pciutils_na_rpm", - "perf_na_rpm", - "pmix-devel_na_rpm", - "python3-cython_na_rpm", - "python3-devel_na_rpm", - "rsync_na_rpm", - "rsyslog_na_rpm", - "sed_na_rpm", - "squashfs-tools_na_rpm", - "sssd_na_rpm", - "strace_na_rpm", - "sudo_na_rpm", - "systemd-udev_na_rpm", - "systemd_na_rpm", - "tar_na_rpm", - "tcpdump_na_rpm", - "traceroute_na_rpm", - "ucx_1.19.0_tarball", - "util-linux_na_rpm", - "valgrind-devel_na_rpm", - "valgrind_na_rpm", - "vim-enhanced_na_rpm_1", - "wget_na_rpm", - "which_na_rpm", - "zsh_na_rpm" + "NetworkManager", + "authselect", + "autoconf", + "automake", + "bash", + "bash-completion", + "binutils", + "binutils-devel", + "bzip2", + "chrony", + "cloud-init", + "clustershell", + "cmake", + "coreutils", + "cryptsetup", + "curl", + "device-mapper", + "dmidecode", + "docker.io/dellhpcomniaaisolution/image-build-aarch64", + "docker.io/dellhpcomniaaisolution/image-build-el10", + "dracut", + "dracut-live", + "dracut-network", + "emacs", + "file", + "findutils", + "fping", + "gawk", + "gcc", + "gcc-c++", + "gcc-gfortran", + "gdb", + "gdb-gdbserver", + "gedit", + "glibc-langpack-en", + "grep", + "gzip", + "hwloc", + "hwloc-libs", + "iperf3", + "ipmitool", + "iproute", + "iputils", + "kbd", + "kernel", + "kernel-tools", + "kexec-tools", + "libcurl", + "libtool", + "lldb", + "lldb-devel", + "lshw", + "lsof", + "ltrace", + "lvm2", + "make", + "man-db", + "man-pages", + "munge-devel", + "nfs-utils", + "nfs4-acl-tools", + "nm-connection-editor", + "nss-pam-ldapd", + "oddjob-mkhomedir", + "openldap-clients", + "openmpi", + "openssh", + "openssh-clients", + "openssh-server", + "openssl-devel", + "openssl-libs", + "ovis-ldms", + "papi-devel", + "papi-libs", + "papi_1", + "pciutils", + "perf", + "pmix-devel", + "python3-cython", + "python3-devel", + "rsync", + "rsyslog", + "sed", + "squashfs-tools", + "sssd", + "strace", + "sudo", + "systemd", + "systemd-udev", + "tar", + "tcpdump", + "traceroute", + "ucx", + "util-linux", + "valgrind", + "valgrind-devel", + "vim-enhanced_1", + "wget", + "which", + "zsh" ] } ], @@ -364,7 +364,7 @@ "Drivers": [], "DriverPackages": {}, "FunctionalPackages": { - "PyMySQL_1.1.2_pip_module": { + "PyMySQL": { "Name": "PyMySQL==1.1.2", "SupportedOS": [ { @@ -377,7 +377,7 @@ ], "Type": "pip_module" }, - "apptainer_na_rpm": { + "apptainer": { "Name": "apptainer", "SupportedOS": [ { @@ -392,16 +392,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "epel" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "epel" } ] }, - "calico-v3-31-4_na_manifest": { + "calico": { "Name": "calico-v3.31.4", "SupportedOS": [ { @@ -420,7 +420,7 @@ } ] }, - "cert-manager-v1-10-0_na_tarball": { + "cert-manager": { "Name": "cert-manager-v1.10.0", "SupportedOS": [ { @@ -439,7 +439,7 @@ } ] }, - "cffi_1.17.1_pip_module": { + "cffi": { "Name": "cffi==1.17.1", "SupportedOS": [ { @@ -452,7 +452,7 @@ ], "Type": "pip_module" }, - "container-selinux_na_rpm": { + "container-selinux": { "Name": "container-selinux", "SupportedOS": [ { @@ -471,7 +471,7 @@ } ] }, - "cri-o_1.35.1_rpm": { + "cri-o": { "Name": "cri-o-1.35.1", "SupportedOS": [ { @@ -490,7 +490,7 @@ } ] }, - "cryptography_45.0.7_pip_module": { + "cryptography": { "Name": "cryptography==45.0.7", "SupportedOS": [ { @@ -503,7 +503,7 @@ ], "Type": "pip_module" }, - "device-mapper-multipath_na_rpm": { + "device-mapper-multipath": { "Name": "device-mapper-multipath", "SupportedOS": [ { @@ -518,16 +518,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "doca-ofed_na_rpm_repo": { + "doca-ofed": { "Name": "doca-ofed", "SupportedOS": [ { @@ -542,16 +542,16 @@ "Type": "rpm_repo", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "doca" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "doca" } ] }, - "docker-io-alpine-kubectl_1.35.1_image": { + "docker.io/alpine/kubectl": { "Name": "docker.io/alpine/kubectl", "SupportedOS": [ { @@ -566,7 +566,7 @@ "Tag": "1.35.1", "Version": "1.35.1" }, - "docker-io-calico-cni_v3.31.4_image": { + "docker.io/calico/cni": { "Name": "docker.io/calico/cni", "SupportedOS": [ { @@ -581,7 +581,7 @@ "Tag": "v3.31.4", "Version": "v3.31.4" }, - "docker-io-calico-kube-controllers_v3.31.4_image": { + "docker.io/calico/kube-controllers": { "Name": "docker.io/calico/kube-controllers", "SupportedOS": [ { @@ -596,7 +596,7 @@ "Tag": "v3.31.4", "Version": "v3.31.4" }, - "docker-io-calico-node_v3.31.4_image": { + "docker.io/calico/node": { "Name": "docker.io/calico/node", "SupportedOS": [ { @@ -611,7 +611,7 @@ "Tag": "v3.31.4", "Version": "v3.31.4" }, - "docker-io-curlimages-curl_8.17.0_image": { + "docker.io/curlimages/curl": { "Name": "docker.io/curlimages/curl", "SupportedOS": [ { @@ -626,7 +626,7 @@ "Tag": "8.17.0", "Version": "8.17.0" }, - "docker-io-dellhpcomniaaisolution-idrac_telemetry_r_1.3_image": { + "docker.io/dellhpcomniaaisolution/idrac_telemetry_receiver": { "Name": "docker.io/dellhpcomniaaisolution/idrac_telemetry_receiver", "SupportedOS": [ { @@ -641,7 +641,7 @@ "Tag": "1.3", "Version": "1.3" }, - "docker-io-dellhpcomniaaisolution-kafkapump_1.3_image": { + "docker.io/dellhpcomniaaisolution/kafkapump": { "Name": "docker.io/dellhpcomniaaisolution/kafkapump", "SupportedOS": [ { @@ -656,7 +656,7 @@ "Tag": "1.3", "Version": "1.3" }, - "docker-io-dellhpcomniaaisolution-ubuntu-ldms_1.1_image": { + "docker.io/dellhpcomniaaisolution/ubuntu-ldms": { "Name": "docker.io/dellhpcomniaaisolution/ubuntu-ldms", "SupportedOS": [ { @@ -671,7 +671,7 @@ "Tag": "1.1", "Version": "1.1" }, - "docker-io-dellhpcomniaaisolution-victoriapump_1.3_image": { + "docker.io/dellhpcomniaaisolution/victoriapump": { "Name": "docker.io/dellhpcomniaaisolution/victoriapump", "SupportedOS": [ { @@ -686,7 +686,7 @@ "Tag": "1.3", "Version": "1.3" }, - "docker-io-library-busybox_1.36_image": { + "docker.io/library/busybox": { "Name": "docker.io/library/busybox", "SupportedOS": [ { @@ -701,7 +701,7 @@ "Tag": "1.36", "Version": "1.36" }, - "docker-io-library-mysql_9.3.0_image": { + "docker.io/library/mysql": { "Name": "docker.io/library/mysql", "SupportedOS": [ { @@ -716,7 +716,7 @@ "Tag": "9.3.0", "Version": "9.3.0" }, - "docker-io-library-python_3.12-slim_image": { + "docker.io/library/python": { "Name": "docker.io/library/python", "SupportedOS": [ { @@ -731,7 +731,7 @@ "Tag": "3.12-slim", "Version": "3.12-slim" }, - "docker-io-nginxinc-nginx-unprivileged_1.29_image": { + "docker.io/nginxinc/nginx-unprivileged": { "Name": "docker.io/nginxinc/nginx-unprivileged", "SupportedOS": [ { @@ -746,7 +746,7 @@ "Tag": "1.29", "Version": "1.29" }, - "docker-io-rmohr-activemq_5.15.9_image": { + "docker.io/rmohr/activemq": { "Name": "docker.io/rmohr/activemq", "SupportedOS": [ { @@ -761,7 +761,7 @@ "Tag": "5.15.9", "Version": "5.15.9" }, - "docker-io-timberio-vector_0.54.0-debian_image": { + "docker.io/timberio/vector": { "Name": "docker.io/timberio/vector", "SupportedOS": [ { @@ -776,7 +776,7 @@ "Tag": "0.54.0-debian", "Version": "0.54.0-debian" }, - "docker-io-victoriametrics-operator_config-reloader-v0.68.3_image": { + "docker.io/victoriametrics/operator": { "Name": "docker.io/victoriametrics/operator", "SupportedOS": [ { @@ -788,10 +788,10 @@ "x86_64" ], "Type": "image", - "Tag": "config-reloader-v0.68.3", - "Version": "config-reloader-v0.68.3" + "Tag": "v0.68.3", + "Version": "v0.68.3" }, - "docker-io-victoriametrics-operator_v0.68.3_image": { + "docker.io/victoriametrics/operator_1": { "Name": "docker.io/victoriametrics/operator", "SupportedOS": [ { @@ -803,10 +803,10 @@ "x86_64" ], "Type": "image", - "Tag": "v0.68.3", - "Version": "v0.68.3" + "Tag": "config-reloader-v0.68.3", + "Version": "config-reloader-v0.68.3" }, - "docker-io-victoriametrics-victoria-logs_v1.50.0_image": { + "docker.io/victoriametrics/victoria-logs": { "Name": "docker.io/victoriametrics/victoria-logs", "SupportedOS": [ { @@ -821,7 +821,7 @@ "Tag": "v1.50.0", "Version": "v1.50.0" }, - "docker-io-victoriametrics-victoria-metrics_v1.128.0_image": { + "docker.io/victoriametrics/victoria-metrics": { "Name": "docker.io/victoriametrics/victoria-metrics", "SupportedOS": [ { @@ -836,7 +836,7 @@ "Tag": "v1.128.0", "Version": "v1.128.0" }, - "docker-io-victoriametrics-vlagent_v1.50.0_image": { + "docker.io/victoriametrics/vlagent": { "Name": "docker.io/victoriametrics/vlagent", "SupportedOS": [ { @@ -851,7 +851,7 @@ "Tag": "v1.50.0", "Version": "v1.50.0" }, - "docker-io-victoriametrics-vmagent_v1.128.0_image": { + "docker.io/victoriametrics/vmagent": { "Name": "docker.io/victoriametrics/vmagent", "SupportedOS": [ { @@ -866,7 +866,7 @@ "Tag": "v1.128.0", "Version": "v1.128.0" }, - "docker-io-victoriametrics-vminsert_v1.128.0-cluster_image": { + "docker.io/victoriametrics/vminsert": { "Name": "docker.io/victoriametrics/vminsert", "SupportedOS": [ { @@ -881,7 +881,7 @@ "Tag": "v1.128.0-cluster", "Version": "v1.128.0-cluster" }, - "docker-io-victoriametrics-vmselect_v1.128.0-cluster_image": { + "docker.io/victoriametrics/vmselect": { "Name": "docker.io/victoriametrics/vmselect", "SupportedOS": [ { @@ -896,7 +896,7 @@ "Tag": "v1.128.0-cluster", "Version": "v1.128.0-cluster" }, - "docker-io-victoriametrics-vmstorage_v1.128.0-cluster_image": { + "docker.io/victoriametrics/vmstorage": { "Name": "docker.io/victoriametrics/vmstorage", "SupportedOS": [ { @@ -911,7 +911,7 @@ "Tag": "v1.128.0-cluster", "Version": "v1.128.0-cluster" }, - "firewalld_na_rpm": { + "firewalld": { "Name": "firewalld", "SupportedOS": [ { @@ -926,16 +926,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "fuse-overlayfs_na_rpm": { + "fuse-overlayfs": { "Name": "fuse-overlayfs", "SupportedOS": [ { @@ -954,7 +954,7 @@ } ] }, - "geopm_na_tarball": { + "geopm": { "Name": "geopm", "SupportedOS": [ { @@ -969,16 +969,16 @@ "Type": "tarball", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://github.com/geopm/geopm/archive/refs/tags/v3.1.0.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://github.com/geopm/geopm/archive/refs/tags/v3.1.0.tar.gz" } ] }, - "ghcr-io-kube-vip-kube-vip_v0.8.9_image": { + "ghcr.io/kube-vip/kube-vip": { "Name": "ghcr.io/kube-vip/kube-vip", "SupportedOS": [ { @@ -993,7 +993,7 @@ "Tag": "v0.8.9", "Version": "v0.8.9" }, - "ghcr-io-open-telemetry-opentelemetry-collector-rel_0.143.1_image": { + "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector": { "Name": "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector", "SupportedOS": [ { @@ -1008,7 +1008,7 @@ "Tag": "0.143.1", "Version": "0.143.1" }, - "git_na_rpm": { + "git": { "Name": "git", "SupportedOS": [ { @@ -1027,8 +1027,8 @@ } ] }, - "helm-charts_container-storage-modules-1.9.2_git": { - "Name": "helm-charts", + "helm-amd64": { + "Name": "helm-v3.20.1-amd64", "SupportedOS": [ { "Name": "RHEL", @@ -1038,17 +1038,16 @@ "Architecture": [ "x86_64" ], - "Type": "git", - "Version": "container-storage-modules-1.9.2", + "Type": "tarball", "Sources": [ { "Architecture": "x86_64", - "Uri": "https://github.com/dell/helm-charts.git" + "Uri": "https://get.helm.sh/helm-v3.20.1-linux-amd64.tar.gz" } ] }, - "helm-v3-20-1-amd64_na_tarball": { - "Name": "helm-v3.20.1-amd64", + "helm-charts": { + "Name": "helm-charts", "SupportedOS": [ { "Name": "RHEL", @@ -1058,15 +1057,16 @@ "Architecture": [ "x86_64" ], - "Type": "tarball", + "Type": "git", + "Version": "container-storage-modules-1.9.2", "Sources": [ { "Architecture": "x86_64", - "Uri": "https://get.helm.sh/helm-v3.20.1-linux-amd64.tar.gz" + "Uri": "https://github.com/dell/helm-charts.git" } ] }, - "imb_na_tarball": { + "imb": { "Name": "imb", "SupportedOS": [ { @@ -1081,16 +1081,16 @@ "Type": "tarball", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://github.com/intel/mpi-benchmarks/archive/refs/tags/IMB-v2021.8.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://github.com/intel/mpi-benchmarks/archive/refs/tags/IMB-v2021.8.tar.gz" } ] }, - "iscsi-initiator-utils_na_rpm": { + "iscsi-initiator-utils": { "Name": "iscsi-initiator-utils", "SupportedOS": [ { @@ -1105,16 +1105,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "karavi-observability_v1.12.0_git": { + "karavi-observability": { "Name": "karavi-observability", "SupportedOS": [ { @@ -1134,7 +1134,7 @@ } ] }, - "kernel-devel_na_rpm": { + "kernel-devel": { "Name": "kernel-devel", "SupportedOS": [ { @@ -1149,16 +1149,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "kernel-headers_na_rpm": { + "kernel-headers": { "Name": "kernel-headers", "SupportedOS": [ { @@ -1173,16 +1173,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "kubeadm_1.35.1_rpm": { + "kubeadm": { "Name": "kubeadm-1.35.1", "SupportedOS": [ { @@ -1201,7 +1201,7 @@ } ] }, - "kubectl_1.35.1_rpm": { + "kubectl": { "Name": "kubectl-1.35.1", "SupportedOS": [ { @@ -1220,7 +1220,7 @@ } ] }, - "kubelet_1.35.1_rpm": { + "kubelet": { "Name": "kubelet-1.35.1", "SupportedOS": [ { @@ -1239,7 +1239,7 @@ } ] }, - "kubernetes_33.1.0_pip_module": { + "kubernetes": { "Name": "kubernetes==33.1.0", "SupportedOS": [ { @@ -1252,7 +1252,7 @@ ], "Type": "pip_module" }, - "likwid_na_tarball": { + "likwid": { "Name": "likwid", "SupportedOS": [ { @@ -1267,16 +1267,16 @@ "Type": "tarball", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://github.com/RRZE-HPC/likwid/archive/refs/tags/v5.4.1.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://github.com/RRZE-HPC/likwid/archive/refs/tags/v5.4.1.tar.gz" } ] }, - "lsscsi_na_rpm": { + "lsscsi": { "Name": "lsscsi", "SupportedOS": [ { @@ -1291,16 +1291,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "mariadb-server_na_rpm": { + "mariadb-server": { "Name": "mariadb-server", "SupportedOS": [ { @@ -1315,16 +1315,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "metallb-native-v0-15-3_na_manifest": { + "metallb-native": { "Name": "metallb-native-v0.15.3", "SupportedOS": [ { @@ -1343,7 +1343,7 @@ } ] }, - "msr-safe_na_tarball": { + "msr-safe": { "Name": "msr-safe", "SupportedOS": [ { @@ -1362,7 +1362,7 @@ } ] }, - "munge_na_rpm": { + "munge": { "Name": "munge", "SupportedOS": [ { @@ -1377,16 +1377,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "nfs-subdir-external-provisioner-4-0-18_na_tarball": { + "nfs-subdir-external-provisioner": { "Name": "nfs-subdir-external-provisioner-4.0.18", "SupportedOS": [ { @@ -1405,7 +1405,7 @@ } ] }, - "nvcr-io-nvidia-hpc-benchmarks_25.09_image": { + "nvcr.io/nvidia/hpc-benchmarks": { "Name": "nvcr.io/nvidia/hpc-benchmarks", "SupportedOS": [ { @@ -1421,7 +1421,7 @@ "Tag": "25.09", "Version": "25.09" }, - "omsdk_1.2.518_pip_module": { + "omsdk": { "Name": "omsdk==1.2.518", "SupportedOS": [ { @@ -1434,7 +1434,7 @@ ], "Type": "pip_module" }, - "osu-micro-benchmarks_na_tarball": { + "osu-micro-benchmarks": { "Name": "osu-micro-benchmarks", "SupportedOS": [ { @@ -1449,16 +1449,16 @@ "Type": "tarball", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-7.5.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-7.5.tar.gz" } ] }, - "papi_na_tarball": { + "papi": { "Name": "papi", "SupportedOS": [ { @@ -1473,16 +1473,16 @@ "Type": "tarball", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://github.com/icl-utk-edu/papi/releases/download/papi-7-2-0-t/papi-7.2.0.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://github.com/icl-utk-edu/papi/releases/download/papi-7-2-0-t/papi-7.2.0.tar.gz" } ] }, - "pmix_na_rpm": { + "pmix": { "Name": "pmix", "SupportedOS": [ { @@ -1497,16 +1497,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "podman_na_rpm": { + "podman": { "Name": "podman", "SupportedOS": [ { @@ -1525,7 +1525,7 @@ } ] }, - "prettytable_3.14.0_pip_module": { + "prettytable": { "Name": "prettytable==3.14.0", "SupportedOS": [ { @@ -1538,7 +1538,7 @@ ], "Type": "pip_module" }, - "prometheus_client_0.20.0_pip_module": { + "prometheus_client": { "Name": "prometheus_client==0.20.0", "SupportedOS": [ { @@ -1551,8 +1551,8 @@ ], "Type": "pip_module" }, - "python3-PyMySQL_na_rpm": { - "Name": "python3-PyMySQL", + "python3": { + "Name": "python3-3.12.9", "SupportedOS": [ { "Name": "RHEL", @@ -1560,23 +1560,18 @@ } ], "Architecture": [ - "aarch64", "x86_64" ], "Type": "rpm", "Sources": [ - { - "Architecture": "aarch64", - "RepoName": "appstream" - }, { "Architecture": "x86_64", - "RepoName": "appstream" + "RepoName": "baseos" } ] }, - "python3-firewall_na_rpm": { - "Name": "python3-firewall", + "python3-PyMySQL": { + "Name": "python3-PyMySQL", "SupportedOS": [ { "Name": "RHEL", @@ -1590,17 +1585,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", - "RepoName": "baseos" + "Architecture": "x86_64", + "RepoName": "appstream" }, { - "Architecture": "x86_64", - "RepoName": "baseos" + "Architecture": "aarch64", + "RepoName": "appstream" } ] }, - "python3_3.12.9_rpm": { - "Name": "python3-3.12.9", + "python3-firewall": { + "Name": "python3-firewall", "SupportedOS": [ { "Name": "RHEL", @@ -1608,6 +1603,7 @@ } ], "Architecture": [ + "aarch64", "x86_64" ], "Type": "rpm", @@ -1615,10 +1611,14 @@ { "Architecture": "x86_64", "RepoName": "baseos" + }, + { + "Architecture": "aarch64", + "RepoName": "baseos" } ] }, - "quay-io-dell-container-storage-modules-csm-metrics_v1.11.0_image": { + "quay.io/dell/container-storage-modules/csm-metrics-powerscale": { "Name": "quay.io/dell/container-storage-modules/csm-metrics-powerscale", "SupportedOS": [ { @@ -1633,7 +1633,7 @@ "Tag": "v1.11.0", "Version": "v1.11.0" }, - "quay-io-jetstack-cert-manager-acmesolver_v1.10.0_image": { + "quay.io/jetstack/cert-manager-acmesolver": { "Name": "quay.io/jetstack/cert-manager-acmesolver", "SupportedOS": [ { @@ -1648,7 +1648,7 @@ "Tag": "v1.10.0", "Version": "v1.10.0" }, - "quay-io-jetstack-cert-manager-cainjector_v1.10.0_image": { + "quay.io/jetstack/cert-manager-cainjector": { "Name": "quay.io/jetstack/cert-manager-cainjector", "SupportedOS": [ { @@ -1663,7 +1663,7 @@ "Tag": "v1.10.0", "Version": "v1.10.0" }, - "quay-io-jetstack-cert-manager-controller_v1.10.0_image": { + "quay.io/jetstack/cert-manager-controller": { "Name": "quay.io/jetstack/cert-manager-controller", "SupportedOS": [ { @@ -1678,7 +1678,7 @@ "Tag": "v1.10.0", "Version": "v1.10.0" }, - "quay-io-jetstack-cert-manager-webhook_v1.10.0_image": { + "quay.io/jetstack/cert-manager-webhook": { "Name": "quay.io/jetstack/cert-manager-webhook", "SupportedOS": [ { @@ -1693,7 +1693,7 @@ "Tag": "v1.10.0", "Version": "v1.10.0" }, - "quay-io-metallb-controller_v0.15.3_image": { + "quay.io/metallb/controller": { "Name": "quay.io/metallb/controller", "SupportedOS": [ { @@ -1708,7 +1708,7 @@ "Tag": "v0.15.3", "Version": "v0.15.3" }, - "quay-io-metallb-speaker_v0.15.3_image": { + "quay.io/metallb/speaker": { "Name": "quay.io/metallb/speaker", "SupportedOS": [ { @@ -1723,8 +1723,8 @@ "Tag": "v0.15.3", "Version": "v0.15.3" }, - "quay-io-strimzi-kafka-bridge_0.33.1_image": { - "Name": "quay.io/strimzi/kafka-bridge", + "quay.io/strimzi/kafka": { + "Name": "quay.io/strimzi/kafka", "SupportedOS": [ { "Name": "RHEL", @@ -1735,11 +1735,11 @@ "x86_64" ], "Type": "image", - "Tag": "0.33.1", - "Version": "0.33.1" + "Tag": "0.48.0-kafka-4.1.0", + "Version": "0.48.0-kafka-4.1.0" }, - "quay-io-strimzi-kafka_0.48.0-kafka-4.1.0_image": { - "Name": "quay.io/strimzi/kafka", + "quay.io/strimzi/kafka-bridge": { + "Name": "quay.io/strimzi/kafka-bridge", "SupportedOS": [ { "Name": "RHEL", @@ -1750,10 +1750,10 @@ "x86_64" ], "Type": "image", - "Tag": "0.48.0-kafka-4.1.0", - "Version": "0.48.0-kafka-4.1.0" + "Tag": "0.33.1", + "Version": "0.33.1" }, - "quay-io-strimzi-operator_0.48.0_image": { + "quay.io/strimzi/operator": { "Name": "quay.io/strimzi/operator", "SupportedOS": [ { @@ -1768,7 +1768,7 @@ "Tag": "0.48.0", "Version": "0.48.0" }, - "registry-k8s-io-coredns-coredns_v1.13.1_image": { + "registry.k8s.io/coredns/coredns": { "Name": "registry.k8s.io/coredns/coredns", "SupportedOS": [ { @@ -1783,7 +1783,7 @@ "Tag": "v1.13.1", "Version": "v1.13.1" }, - "registry-k8s-io-etcd_3.6.6-0_image": { + "registry.k8s.io/etcd": { "Name": "registry.k8s.io/etcd", "SupportedOS": [ { @@ -1798,7 +1798,7 @@ "Tag": "3.6.6-0", "Version": "3.6.6-0" }, - "registry-k8s-io-kube-apiserver_v1.35.1_image": { + "registry.k8s.io/kube-apiserver": { "Name": "registry.k8s.io/kube-apiserver", "SupportedOS": [ { @@ -1813,7 +1813,7 @@ "Tag": "v1.35.1", "Version": "v1.35.1" }, - "registry-k8s-io-kube-controller-manager_v1.35.1_image": { + "registry.k8s.io/kube-controller-manager": { "Name": "registry.k8s.io/kube-controller-manager", "SupportedOS": [ { @@ -1828,7 +1828,7 @@ "Tag": "v1.35.1", "Version": "v1.35.1" }, - "registry-k8s-io-kube-proxy_v1.35.1_image": { + "registry.k8s.io/kube-proxy": { "Name": "registry.k8s.io/kube-proxy", "SupportedOS": [ { @@ -1843,7 +1843,7 @@ "Tag": "v1.35.1", "Version": "v1.35.1" }, - "registry-k8s-io-kube-scheduler_v1.35.1_image": { + "registry.k8s.io/kube-scheduler": { "Name": "registry.k8s.io/kube-scheduler", "SupportedOS": [ { @@ -1858,7 +1858,7 @@ "Tag": "v1.35.1", "Version": "v1.35.1" }, - "registry-k8s-io-pause_3.10.1_image": { + "registry.k8s.io/pause": { "Name": "registry.k8s.io/pause", "SupportedOS": [ { @@ -1873,7 +1873,7 @@ "Tag": "3.10.1", "Version": "3.10.1" }, - "registry-k8s-io-sig-storage-nfs-subdir-external-pr_v4.0.2_image": { + "registry.k8s.io/sig-storage/nfs-subdir-external-provisioner": { "Name": "registry.k8s.io/sig-storage/nfs-subdir-external-provisioner", "SupportedOS": [ { @@ -1888,7 +1888,7 @@ "Tag": "v4.0.2", "Version": "v4.0.2" }, - "sg3_utils_na_rpm": { + "sg3_utils": { "Name": "sg3_utils", "SupportedOS": [ { @@ -1903,16 +1903,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "sionlib_na_tarball": { + "sionlib": { "Name": "sionlib", "SupportedOS": [ { @@ -1927,17 +1927,17 @@ "Type": "tarball", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://apps.fz-juelich.de/jsc/sionlib/download.php?version=1.7.7" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://apps.fz-juelich.de/jsc/sionlib/download.php?version=1.7.7" } ] }, - "slurm-pam_slurm_na_rpm": { - "Name": "slurm-pam_slurm", + "slurm": { + "Name": "slurm", "SupportedOS": [ { "Name": "RHEL", @@ -1951,17 +1951,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "slurm_custom" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "slurm_custom" } ] }, - "slurm-slurmctld_na_rpm": { - "Name": "slurm-slurmctld", + "slurm-pam_slurm": { + "Name": "slurm-pam_slurm", "SupportedOS": [ { "Name": "RHEL", @@ -1975,17 +1975,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "slurm_custom" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "slurm_custom" } ] }, - "slurm-slurmd_na_rpm": { - "Name": "slurm-slurmd", + "slurm-slurmctld": { + "Name": "slurm-slurmctld", "SupportedOS": [ { "Name": "RHEL", @@ -1999,17 +1999,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "slurm_custom" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "slurm_custom" } ] }, - "slurm-slurmdbd_na_rpm": { - "Name": "slurm-slurmdbd", + "slurm-slurmd": { + "Name": "slurm-slurmd", "SupportedOS": [ { "Name": "RHEL", @@ -2023,17 +2023,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "slurm_custom" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "slurm_custom" } ] }, - "slurm_na_rpm": { - "Name": "slurm", + "slurm-slurmdbd": { + "Name": "slurm-slurmdbd", "SupportedOS": [ { "Name": "RHEL", @@ -2047,16 +2047,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "slurm_custom" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "slurm_custom" } ] }, - "strimzi-kafka-operator-helm-3-chart-0-48-0_na_tarball": { + "strimzi-kafka-operator-helm-3-chart": { "Name": "strimzi-kafka-operator-helm-3-chart-0.48.0", "SupportedOS": [ { @@ -2075,7 +2075,7 @@ } ] }, - "victoria-metrics-operator-0-59-3_na_tarball": { + "victoria-metrics-operator": { "Name": "victoria-metrics-operator-0.59.3", "SupportedOS": [ { @@ -2094,7 +2094,7 @@ } ] }, - "vim-enhanced_na_rpm": { + "vim-enhanced": { "Name": "vim-enhanced", "SupportedOS": [ { @@ -2109,18 +2109,18 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] } }, "OSPackages": { - "NetworkManager_na_rpm": { + "NetworkManager": { "Name": "NetworkManager", "SupportedOS": [ { @@ -2135,16 +2135,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "authselect_na_rpm": { + "authselect": { "Name": "authselect", "SupportedOS": [ { @@ -2159,16 +2159,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "autoconf_na_rpm": { + "autoconf": { "Name": "autoconf", "SupportedOS": [ { @@ -2183,16 +2183,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "automake_na_rpm": { + "automake": { "Name": "automake", "SupportedOS": [ { @@ -2207,17 +2207,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "bash-completion_na_rpm": { - "Name": "bash-completion", + "bash": { + "Name": "bash", "SupportedOS": [ { "Name": "RHEL", @@ -2231,17 +2231,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "bash_na_rpm": { - "Name": "bash", + "bash-completion": { + "Name": "bash-completion", "SupportedOS": [ { "Name": "RHEL", @@ -2255,17 +2255,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "binutils-devel_na_rpm": { - "Name": "binutils-devel", + "binutils": { + "Name": "binutils", "SupportedOS": [ { "Name": "RHEL", @@ -2279,17 +2279,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", - "RepoName": "appstream" + "Architecture": "x86_64", + "RepoName": "baseos" }, { - "Architecture": "x86_64", - "RepoName": "appstream" + "Architecture": "aarch64", + "RepoName": "baseos" } ] }, - "binutils_na_rpm": { - "Name": "binutils", + "binutils-devel": { + "Name": "binutils-devel", "SupportedOS": [ { "Name": "RHEL", @@ -2303,16 +2303,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", - "RepoName": "baseos" + "Architecture": "x86_64", + "RepoName": "appstream" }, { - "Architecture": "x86_64", - "RepoName": "baseos" + "Architecture": "aarch64", + "RepoName": "appstream" } ] }, - "bzip2_na_rpm": { + "bzip2": { "Name": "bzip2", "SupportedOS": [ { @@ -2327,16 +2327,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "chrony_na_rpm": { + "chrony": { "Name": "chrony", "SupportedOS": [ { @@ -2351,16 +2351,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "cloud-init_na_rpm": { + "cloud-init": { "Name": "cloud-init", "SupportedOS": [ { @@ -2375,16 +2375,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "clustershell_na_rpm": { + "clustershell": { "Name": "clustershell", "SupportedOS": [ { @@ -2399,16 +2399,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "epel" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "epel" } ] }, - "cmake_na_rpm": { + "cmake": { "Name": "cmake", "SupportedOS": [ { @@ -2423,16 +2423,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "coreutils_na_rpm": { + "coreutils": { "Name": "coreutils", "SupportedOS": [ { @@ -2447,16 +2447,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "cryptsetup_na_rpm": { + "cryptsetup": { "Name": "cryptsetup", "SupportedOS": [ { @@ -2471,16 +2471,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "curl_na_rpm": { + "curl": { "Name": "curl", "SupportedOS": [ { @@ -2495,16 +2495,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "device-mapper_na_rpm": { + "device-mapper": { "Name": "device-mapper", "SupportedOS": [ { @@ -2519,16 +2519,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "dmidecode_na_rpm": { + "dmidecode": { "Name": "dmidecode", "SupportedOS": [ { @@ -2543,16 +2543,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "docker-io-dellhpcomniaaisolution-image-build-aarch_1.1_image": { + "docker.io/dellhpcomniaaisolution/image-build-aarch64": { "Name": "docker.io/dellhpcomniaaisolution/image-build-aarch64", "SupportedOS": [ { @@ -2567,7 +2567,7 @@ "Tag": "1.1", "Version": "1.1" }, - "docker-io-dellhpcomniaaisolution-image-build-el10_1.1_image": { + "docker.io/dellhpcomniaaisolution/image-build-el10": { "Name": "docker.io/dellhpcomniaaisolution/image-build-el10", "SupportedOS": [ { @@ -2582,8 +2582,8 @@ "Tag": "1.1", "Version": "1.1" }, - "dracut-live_na_rpm": { - "Name": "dracut-live", + "dracut": { + "Name": "dracut", "SupportedOS": [ { "Name": "RHEL", @@ -2597,17 +2597,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", - "RepoName": "appstream" + "Architecture": "x86_64", + "RepoName": "baseos" }, { - "Architecture": "x86_64", - "RepoName": "appstream" + "Architecture": "aarch64", + "RepoName": "baseos" } ] }, - "dracut-network_na_rpm": { - "Name": "dracut-network", + "dracut-live": { + "Name": "dracut-live", "SupportedOS": [ { "Name": "RHEL", @@ -2621,17 +2621,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", - "RepoName": "baseos" + "Architecture": "x86_64", + "RepoName": "appstream" }, { - "Architecture": "x86_64", - "RepoName": "baseos" + "Architecture": "aarch64", + "RepoName": "appstream" } ] }, - "dracut_na_rpm": { - "Name": "dracut", + "dracut-network": { + "Name": "dracut-network", "SupportedOS": [ { "Name": "RHEL", @@ -2645,16 +2645,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "emacs_na_rpm": { + "emacs": { "Name": "emacs", "SupportedOS": [ { @@ -2669,16 +2669,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "file_na_rpm": { + "file": { "Name": "file", "SupportedOS": [ { @@ -2693,16 +2693,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "findutils_na_rpm": { + "findutils": { "Name": "findutils", "SupportedOS": [ { @@ -2717,16 +2717,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "fping_na_rpm": { + "fping": { "Name": "fping", "SupportedOS": [ { @@ -2741,16 +2741,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "epel" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "epel" } ] }, - "gawk_na_rpm": { + "gawk": { "Name": "gawk", "SupportedOS": [ { @@ -2765,17 +2765,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "gcc-c++_na_rpm": { - "Name": "gcc-c++", + "gcc": { + "Name": "gcc", "SupportedOS": [ { "Name": "RHEL", @@ -2789,17 +2789,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "gcc-gfortran_na_rpm": { - "Name": "gcc-gfortran", + "gcc-c++": { + "Name": "gcc-c++", "SupportedOS": [ { "Name": "RHEL", @@ -2813,17 +2813,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "gcc_na_rpm": { - "Name": "gcc", + "gcc-gfortran": { + "Name": "gcc-gfortran", "SupportedOS": [ { "Name": "RHEL", @@ -2837,17 +2837,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "gdb-gdbserver_na_rpm": { - "Name": "gdb-gdbserver", + "gdb": { + "Name": "gdb", "SupportedOS": [ { "Name": "RHEL", @@ -2861,17 +2861,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "gdb_na_rpm": { - "Name": "gdb", + "gdb-gdbserver": { + "Name": "gdb-gdbserver", "SupportedOS": [ { "Name": "RHEL", @@ -2885,16 +2885,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "gedit_na_rpm": { + "gedit": { "Name": "gedit", "SupportedOS": [ { @@ -2909,16 +2909,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "epel" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "epel" } ] }, - "glibc-langpack-en_na_rpm": { + "glibc-langpack-en": { "Name": "glibc-langpack-en", "SupportedOS": [ { @@ -2933,16 +2933,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "grep_na_rpm": { + "grep": { "Name": "grep", "SupportedOS": [ { @@ -2957,16 +2957,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "gzip_na_rpm": { + "gzip": { "Name": "gzip", "SupportedOS": [ { @@ -2981,17 +2981,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "hwloc-libs_na_rpm": { - "Name": "hwloc-libs", + "hwloc": { + "Name": "hwloc", "SupportedOS": [ { "Name": "RHEL", @@ -3005,17 +3005,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "hwloc_na_rpm": { - "Name": "hwloc", + "hwloc-libs": { + "Name": "hwloc-libs", "SupportedOS": [ { "Name": "RHEL", @@ -3029,16 +3029,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "iperf3_na_rpm": { + "iperf3": { "Name": "iperf3", "SupportedOS": [ { @@ -3053,16 +3053,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "ipmitool_na_rpm": { + "ipmitool": { "Name": "ipmitool", "SupportedOS": [ { @@ -3077,16 +3077,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "iproute_na_rpm": { + "iproute": { "Name": "iproute", "SupportedOS": [ { @@ -3101,16 +3101,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "iputils_na_rpm": { + "iputils": { "Name": "iputils", "SupportedOS": [ { @@ -3125,16 +3125,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "kbd_na_rpm": { + "kbd": { "Name": "kbd", "SupportedOS": [ { @@ -3149,17 +3149,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "kernel-tools_na_rpm": { - "Name": "kernel-tools", + "kernel": { + "Name": "kernel", "SupportedOS": [ { "Name": "RHEL", @@ -3173,17 +3173,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "kernel_na_rpm": { - "Name": "kernel", + "kernel-tools": { + "Name": "kernel-tools", "SupportedOS": [ { "Name": "RHEL", @@ -3197,16 +3197,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "kexec-tools_na_rpm": { + "kexec-tools": { "Name": "kexec-tools", "SupportedOS": [ { @@ -3221,16 +3221,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "libcurl_na_rpm": { + "libcurl": { "Name": "libcurl", "SupportedOS": [ { @@ -3245,16 +3245,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "libtool_na_rpm": { + "libtool": { "Name": "libtool", "SupportedOS": [ { @@ -3269,17 +3269,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "lldb-devel_na_rpm": { - "Name": "lldb-devel", + "lldb": { + "Name": "lldb", "SupportedOS": [ { "Name": "RHEL", @@ -3293,17 +3293,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "lldb_na_rpm": { - "Name": "lldb", + "lldb-devel": { + "Name": "lldb-devel", "SupportedOS": [ { "Name": "RHEL", @@ -3317,16 +3317,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "lshw_na_rpm": { + "lshw": { "Name": "lshw", "SupportedOS": [ { @@ -3341,16 +3341,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "lsof_na_rpm": { + "lsof": { "Name": "lsof", "SupportedOS": [ { @@ -3365,16 +3365,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "ltrace_na_rpm": { + "ltrace": { "Name": "ltrace", "SupportedOS": [ { @@ -3389,16 +3389,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "lvm2_na_rpm": { + "lvm2": { "Name": "lvm2", "SupportedOS": [ { @@ -3413,16 +3413,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "make_na_rpm": { + "make": { "Name": "make", "SupportedOS": [ { @@ -3437,16 +3437,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "man-db_na_rpm": { + "man-db": { "Name": "man-db", "SupportedOS": [ { @@ -3461,16 +3461,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "man-pages_na_rpm": { + "man-pages": { "Name": "man-pages", "SupportedOS": [ { @@ -3485,16 +3485,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "munge-devel_na_rpm": { + "munge-devel": { "Name": "munge-devel", "SupportedOS": [ { @@ -3509,16 +3509,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "codeready-builder" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "codeready-builder" } ] }, - "nfs-utils_na_rpm": { + "nfs-utils": { "Name": "nfs-utils", "SupportedOS": [ { @@ -3533,16 +3533,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "nfs4-acl-tools_na_rpm": { + "nfs4-acl-tools": { "Name": "nfs4-acl-tools", "SupportedOS": [ { @@ -3557,16 +3557,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "nm-connection-editor_na_rpm": { + "nm-connection-editor": { "Name": "nm-connection-editor", "SupportedOS": [ { @@ -3581,16 +3581,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "nss-pam-ldapd_na_rpm": { + "nss-pam-ldapd": { "Name": "nss-pam-ldapd", "SupportedOS": [ { @@ -3605,16 +3605,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "epel" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "epel" } ] }, - "oddjob-mkhomedir_na_rpm": { + "oddjob-mkhomedir": { "Name": "oddjob-mkhomedir", "SupportedOS": [ { @@ -3629,16 +3629,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "openldap-clients_na_rpm": { + "openldap-clients": { "Name": "openldap-clients", "SupportedOS": [ { @@ -3653,16 +3653,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "openmpi_5.0.8_tarball": { + "openmpi": { "Name": "openmpi", "SupportedOS": [ { @@ -3678,17 +3678,17 @@ "Version": "5.0.8", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://download.open-mpi.org/release/open-mpi/v5.0/openmpi-5.0.8.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://download.open-mpi.org/release/open-mpi/v5.0/openmpi-5.0.8.tar.gz" } ] }, - "openssh-clients_na_rpm": { - "Name": "openssh-clients", + "openssh": { + "Name": "openssh", "SupportedOS": [ { "Name": "RHEL", @@ -3702,17 +3702,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "openssh-server_na_rpm": { - "Name": "openssh-server", + "openssh-clients": { + "Name": "openssh-clients", "SupportedOS": [ { "Name": "RHEL", @@ -3726,17 +3726,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "openssh_na_rpm": { - "Name": "openssh", + "openssh-server": { + "Name": "openssh-server", "SupportedOS": [ { "Name": "RHEL", @@ -3750,16 +3750,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "openssl-devel_na_rpm": { + "openssl-devel": { "Name": "openssl-devel", "SupportedOS": [ { @@ -3774,16 +3774,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "openssl-libs_na_rpm": { + "openssl-libs": { "Name": "openssl-libs", "SupportedOS": [ { @@ -3798,16 +3798,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "ovis-ldms_na_rpm": { + "ovis-ldms": { "Name": "ovis-ldms", "SupportedOS": [ { @@ -3822,16 +3822,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "ldms" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "ldms" } ] }, - "papi-devel_na_rpm": { + "papi-devel": { "Name": "papi-devel", "SupportedOS": [ { @@ -3846,16 +3846,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "papi-libs_na_rpm": { + "papi-libs": { "Name": "papi-libs", "SupportedOS": [ { @@ -3870,16 +3870,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "papi_na_rpm": { + "papi_1": { "Name": "papi", "SupportedOS": [ { @@ -3894,16 +3894,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "pciutils_na_rpm": { + "pciutils": { "Name": "pciutils", "SupportedOS": [ { @@ -3918,16 +3918,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "perf_na_rpm": { + "perf": { "Name": "perf", "SupportedOS": [ { @@ -3942,16 +3942,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "pmix-devel_na_rpm": { + "pmix-devel": { "Name": "pmix-devel", "SupportedOS": [ { @@ -3966,16 +3966,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "python3-cython_na_rpm": { + "python3-cython": { "Name": "python3-cython", "SupportedOS": [ { @@ -3990,16 +3990,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "codeready-builder" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "codeready-builder" } ] }, - "python3-devel_na_rpm": { + "python3-devel": { "Name": "python3-devel", "SupportedOS": [ { @@ -4014,16 +4014,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "rsync_na_rpm": { + "rsync": { "Name": "rsync", "SupportedOS": [ { @@ -4038,16 +4038,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "rsyslog_na_rpm": { + "rsyslog": { "Name": "rsyslog", "SupportedOS": [ { @@ -4062,16 +4062,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "sed_na_rpm": { + "sed": { "Name": "sed", "SupportedOS": [ { @@ -4086,16 +4086,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "squashfs-tools_na_rpm": { + "squashfs-tools": { "Name": "squashfs-tools", "SupportedOS": [ { @@ -4110,16 +4110,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "sssd_na_rpm": { + "sssd": { "Name": "sssd", "SupportedOS": [ { @@ -4134,16 +4134,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "strace_na_rpm": { + "strace": { "Name": "strace", "SupportedOS": [ { @@ -4158,16 +4158,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "sudo_na_rpm": { + "sudo": { "Name": "sudo", "SupportedOS": [ { @@ -4182,17 +4182,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "systemd-udev_na_rpm": { - "Name": "systemd-udev", + "systemd": { + "Name": "systemd", "SupportedOS": [ { "Name": "RHEL", @@ -4206,17 +4206,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "systemd_na_rpm": { - "Name": "systemd", + "systemd-udev": { + "Name": "systemd-udev", "SupportedOS": [ { "Name": "RHEL", @@ -4230,16 +4230,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "tar_na_rpm": { + "tar": { "Name": "tar", "SupportedOS": [ { @@ -4254,16 +4254,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "tcpdump_na_rpm": { + "tcpdump": { "Name": "tcpdump", "SupportedOS": [ { @@ -4278,16 +4278,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "traceroute_na_rpm": { + "traceroute": { "Name": "traceroute", "SupportedOS": [ { @@ -4302,16 +4302,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "ucx_1.19.0_tarball": { + "ucx": { "Name": "ucx", "SupportedOS": [ { @@ -4327,16 +4327,16 @@ "Version": "1.19.0", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "Uri": "https://github.com/openucx/ucx/releases/download/v1.19.0/ucx-1.19.0.tar.gz" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "Uri": "https://github.com/openucx/ucx/releases/download/v1.19.0/ucx-1.19.0.tar.gz" } ] }, - "util-linux_na_rpm": { + "util-linux": { "Name": "util-linux", "SupportedOS": [ { @@ -4351,17 +4351,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "valgrind-devel_na_rpm": { - "Name": "valgrind-devel", + "valgrind": { + "Name": "valgrind", "SupportedOS": [ { "Name": "RHEL", @@ -4375,17 +4375,17 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "valgrind_na_rpm": { - "Name": "valgrind", + "valgrind-devel": { + "Name": "valgrind-devel", "SupportedOS": [ { "Name": "RHEL", @@ -4399,16 +4399,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "vim-enhanced_na_rpm_1": { + "vim-enhanced_1": { "Name": "vim-enhanced", "SupportedOS": [ { @@ -4423,16 +4423,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "wget_na_rpm": { + "wget": { "Name": "wget", "SupportedOS": [ { @@ -4447,16 +4447,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "appstream" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "appstream" } ] }, - "which_na_rpm": { + "which": { "Name": "which", "SupportedOS": [ { @@ -4471,16 +4471,16 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] }, - "zsh_na_rpm": { + "zsh": { "Name": "zsh", "SupportedOS": [ { @@ -4495,11 +4495,11 @@ "Type": "rpm", "Sources": [ { - "Architecture": "aarch64", + "Architecture": "x86_64", "RepoName": "baseos" }, { - "Architecture": "x86_64", + "Architecture": "aarch64", "RepoName": "baseos" } ] diff --git a/examples/catalog/catalog_rhel_x86_64.json b/examples/catalog/catalog_rhel_x86_64.json index d984115f65..a70b4f94aa 100644 --- a/examples/catalog/catalog_rhel_x86_64.json +++ b/examples/catalog/catalog_rhel_x86_64.json @@ -7,259 +7,259 @@ { "Name": "login_compiler_node_x86_64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "msr-safe_na_tarball", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-slurmd_na_rpm", - "slurm_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "likwid", + "lsscsi", + "msr-safe", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm", + "slurm-slurmd" ] }, { "Name": "login_node_x86_64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "msr-safe_na_tarball", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-slurmd_na_rpm", - "slurm_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "likwid", + "lsscsi", + "msr-safe", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm", + "slurm-slurmd" ] }, { "Name": "os_x86_64", "FunctionalPackages": [ - "openssl-libs_na_rpm", - "ovis-ldms_na_rpm", - "python3-cython_na_rpm", - "python3-devel_na_rpm" + "openssl-libs", + "ovis-ldms", + "python3-cython", + "python3-devel" ] }, { "Name": "service_kube_control_plane_x86_64", "FunctionalPackages": [ - "PyMySQL_1.1.2_pip_module", - "apptainer_na_rpm", - "calico-v3-31-4_na_manifest", - "cert-manager-v1-10-0_na_tarball", - "cffi_1.17.1_pip_module", - "container-selinux_na_rpm", - "cri-o_1.35.1_rpm", - "cryptography_45.0.7_pip_module", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "docker-io-alpine-kubectl_1.35.1_image", - "docker-io-calico-cni_v3.31.4_image", - "docker-io-calico-kube-controllers_v3.31.4_image", - "docker-io-calico-node_v3.31.4_image", - "docker-io-curlimages-curl_8.17.0_image", - "docker-io-dellhpcomniaaisolution-idrac_telemetry_r_1.3_image", - "docker-io-dellhpcomniaaisolution-kafkapump_1.3_image", - "docker-io-dellhpcomniaaisolution-ubuntu-ldms_1.1_image", - "docker-io-dellhpcomniaaisolution-victoriapump_1.3_image", - "docker-io-library-busybox_1.36_image", - "docker-io-library-mysql_9.3.0_image", - "docker-io-library-python_3.12-slim_image", - "docker-io-nginxinc-nginx-unprivileged_1.29_image", - "docker-io-rmohr-activemq_5.15.9_image", - "docker-io-timberio-vector_0.54.0-debian_image", - "docker-io-victoriametrics-operator_config-reloader-v0.68.3_image", - "docker-io-victoriametrics-operator_v0.68.3_image", - "docker-io-victoriametrics-victoria-logs_v1.50.0_image", - "docker-io-victoriametrics-victoria-metrics_v1.128.0_image", - "docker-io-victoriametrics-vlagent_v1.50.0_image", - "docker-io-victoriametrics-vmagent_v1.128.0_image", - "docker-io-victoriametrics-vminsert_v1.128.0-cluster_image", - "docker-io-victoriametrics-vmselect_v1.128.0-cluster_image", - "docker-io-victoriametrics-vmstorage_v1.128.0-cluster_image", - "firewalld_na_rpm", - "fuse-overlayfs_na_rpm", - "ghcr-io-kube-vip-kube-vip_v0.8.9_image", - "ghcr-io-open-telemetry-opentelemetry-collector-rel_0.143.1_image", - "git_na_rpm", - "helm-charts_container-storage-modules-1.9.2_git", - "helm-v3-20-1-amd64_na_tarball", - "iscsi-initiator-utils_na_rpm", - "karavi-observability_v1.12.0_git", - "kubeadm_1.35.1_rpm", - "kubectl_1.35.1_rpm", - "kubelet_1.35.1_rpm", - "kubernetes_33.1.0_pip_module", - "lsscsi_na_rpm", - "metallb-native-v0-15-3_na_manifest", - "nfs-subdir-external-provisioner-4-0-18_na_tarball", - "omsdk_1.2.518_pip_module", - "podman_na_rpm", - "prettytable_3.14.0_pip_module", - "prometheus_client_0.20.0_pip_module", - "python3-firewall_na_rpm", - "python3_3.12.9_rpm", - "quay-io-dell-container-storage-modules-csm-metrics_v1.11.0_image", - "quay-io-jetstack-cert-manager-acmesolver_v1.10.0_image", - "quay-io-jetstack-cert-manager-cainjector_v1.10.0_image", - "quay-io-jetstack-cert-manager-controller_v1.10.0_image", - "quay-io-jetstack-cert-manager-webhook_v1.10.0_image", - "quay-io-metallb-speaker_v0.15.3_image", - "quay-io-strimzi-kafka-bridge_0.33.1_image", - "quay-io-strimzi-kafka_0.48.0-kafka-4.1.0_image", - "quay-io-strimzi-operator_0.48.0_image", - "registry-k8s-io-coredns-coredns_v1.13.1_image", - "registry-k8s-io-etcd_3.6.6-0_image", - "registry-k8s-io-kube-apiserver_v1.35.1_image", - "registry-k8s-io-kube-controller-manager_v1.35.1_image", - "registry-k8s-io-kube-proxy_v1.35.1_image", - "registry-k8s-io-kube-scheduler_v1.35.1_image", - "registry-k8s-io-pause_3.10.1_image", - "sg3_utils_na_rpm", - "strimzi-kafka-operator-helm-3-chart-0-48-0_na_tarball", - "victoria-metrics-operator-0-59-3_na_tarball", - "vim-enhanced_na_rpm" + "PyMySQL", + "apptainer", + "calico", + "cert-manager", + "cffi", + "container-selinux", + "cri-o", + "cryptography", + "device-mapper-multipath", + "doca-ofed", + "docker.io/alpine/kubectl", + "docker.io/calico/cni", + "docker.io/calico/kube-controllers", + "docker.io/calico/node", + "docker.io/curlimages/curl", + "docker.io/dellhpcomniaaisolution/idrac_telemetry_receiver", + "docker.io/dellhpcomniaaisolution/kafkapump", + "docker.io/dellhpcomniaaisolution/ubuntu-ldms", + "docker.io/dellhpcomniaaisolution/victoriapump", + "docker.io/library/busybox", + "docker.io/library/mysql", + "docker.io/library/python", + "docker.io/nginxinc/nginx-unprivileged", + "docker.io/rmohr/activemq", + "docker.io/timberio/vector", + "docker.io/victoriametrics/operator", + "docker.io/victoriametrics/operator_1", + "docker.io/victoriametrics/victoria-logs", + "docker.io/victoriametrics/victoria-metrics", + "docker.io/victoriametrics/vlagent", + "docker.io/victoriametrics/vmagent", + "docker.io/victoriametrics/vminsert", + "docker.io/victoriametrics/vmselect", + "docker.io/victoriametrics/vmstorage", + "firewalld", + "fuse-overlayfs", + "ghcr.io/kube-vip/kube-vip", + "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector", + "git", + "helm-amd64", + "helm-charts", + "iscsi-initiator-utils", + "karavi-observability", + "kubeadm", + "kubectl", + "kubelet", + "kubernetes", + "lsscsi", + "metallb-native", + "nfs-subdir-external-provisioner", + "omsdk", + "podman", + "prettytable", + "prometheus_client", + "python3", + "python3-firewall", + "quay.io/dell/container-storage-modules/csm-metrics-powerscale", + "quay.io/jetstack/cert-manager-acmesolver", + "quay.io/jetstack/cert-manager-cainjector", + "quay.io/jetstack/cert-manager-controller", + "quay.io/jetstack/cert-manager-webhook", + "quay.io/metallb/speaker", + "quay.io/strimzi/kafka", + "quay.io/strimzi/kafka-bridge", + "quay.io/strimzi/operator", + "registry.k8s.io/coredns/coredns", + "registry.k8s.io/etcd", + "registry.k8s.io/kube-apiserver", + "registry.k8s.io/kube-controller-manager", + "registry.k8s.io/kube-proxy", + "registry.k8s.io/kube-scheduler", + "registry.k8s.io/pause", + "sg3_utils", + "strimzi-kafka-operator-helm-3-chart", + "victoria-metrics-operator", + "vim-enhanced" ] }, { "Name": "service_kube_node_x86_64", "FunctionalPackages": [ - "apptainer_na_rpm", - "cert-manager-v1-10-0_na_tarball", - "cffi_1.17.1_pip_module", - "container-selinux_na_rpm", - "cri-o_1.35.1_rpm", - "cryptography_45.0.7_pip_module", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "docker-io-alpine-kubectl_1.35.1_image", - "docker-io-curlimages-curl_8.17.0_image", - "docker-io-dellhpcomniaaisolution-idrac_telemetry_r_1.3_image", - "docker-io-dellhpcomniaaisolution-kafkapump_1.3_image", - "docker-io-dellhpcomniaaisolution-ubuntu-ldms_1.1_image", - "docker-io-dellhpcomniaaisolution-victoriapump_1.3_image", - "docker-io-library-busybox_1.36_image", - "docker-io-library-mysql_9.3.0_image", - "docker-io-library-python_3.12-slim_image", - "docker-io-nginxinc-nginx-unprivileged_1.29_image", - "docker-io-rmohr-activemq_5.15.9_image", - "docker-io-timberio-vector_0.54.0-debian_image", - "docker-io-victoriametrics-operator_config-reloader-v0.68.3_image", - "docker-io-victoriametrics-operator_v0.68.3_image", - "docker-io-victoriametrics-victoria-logs_v1.50.0_image", - "docker-io-victoriametrics-victoria-metrics_v1.128.0_image", - "docker-io-victoriametrics-vlagent_v1.50.0_image", - "docker-io-victoriametrics-vmagent_v1.128.0_image", - "docker-io-victoriametrics-vminsert_v1.128.0-cluster_image", - "docker-io-victoriametrics-vmselect_v1.128.0-cluster_image", - "docker-io-victoriametrics-vmstorage_v1.128.0-cluster_image", - "firewalld_na_rpm", - "fuse-overlayfs_na_rpm", - "ghcr-io-open-telemetry-opentelemetry-collector-rel_0.143.1_image", - "git_na_rpm", - "helm-charts_container-storage-modules-1.9.2_git", - "iscsi-initiator-utils_na_rpm", - "karavi-observability_v1.12.0_git", - "kubeadm_1.35.1_rpm", - "kubelet_1.35.1_rpm", - "kubernetes_33.1.0_pip_module", - "lsscsi_na_rpm", - "omsdk_1.2.518_pip_module", - "podman_na_rpm", - "prometheus_client_0.20.0_pip_module", - "python3-firewall_na_rpm", - "quay-io-dell-container-storage-modules-csm-metrics_v1.11.0_image", - "quay-io-jetstack-cert-manager-acmesolver_v1.10.0_image", - "quay-io-jetstack-cert-manager-cainjector_v1.10.0_image", - "quay-io-jetstack-cert-manager-controller_v1.10.0_image", - "quay-io-jetstack-cert-manager-webhook_v1.10.0_image", - "quay-io-metallb-controller_v0.15.3_image", - "quay-io-metallb-speaker_v0.15.3_image", - "quay-io-strimzi-kafka-bridge_0.33.1_image", - "quay-io-strimzi-kafka_0.48.0-kafka-4.1.0_image", - "quay-io-strimzi-operator_0.48.0_image", - "registry-k8s-io-sig-storage-nfs-subdir-external-pr_v4.0.2_image", - "sg3_utils_na_rpm", - "strimzi-kafka-operator-helm-3-chart-0-48-0_na_tarball", - "victoria-metrics-operator-0-59-3_na_tarball", - "vim-enhanced_na_rpm" + "apptainer", + "cert-manager", + "cffi", + "container-selinux", + "cri-o", + "cryptography", + "device-mapper-multipath", + "doca-ofed", + "docker.io/alpine/kubectl", + "docker.io/curlimages/curl", + "docker.io/dellhpcomniaaisolution/idrac_telemetry_receiver", + "docker.io/dellhpcomniaaisolution/kafkapump", + "docker.io/dellhpcomniaaisolution/ubuntu-ldms", + "docker.io/dellhpcomniaaisolution/victoriapump", + "docker.io/library/busybox", + "docker.io/library/mysql", + "docker.io/library/python", + "docker.io/nginxinc/nginx-unprivileged", + "docker.io/rmohr/activemq", + "docker.io/timberio/vector", + "docker.io/victoriametrics/operator", + "docker.io/victoriametrics/operator_1", + "docker.io/victoriametrics/victoria-logs", + "docker.io/victoriametrics/victoria-metrics", + "docker.io/victoriametrics/vlagent", + "docker.io/victoriametrics/vmagent", + "docker.io/victoriametrics/vminsert", + "docker.io/victoriametrics/vmselect", + "docker.io/victoriametrics/vmstorage", + "firewalld", + "fuse-overlayfs", + "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector", + "git", + "helm-charts", + "iscsi-initiator-utils", + "karavi-observability", + "kubeadm", + "kubelet", + "kubernetes", + "lsscsi", + "omsdk", + "podman", + "prometheus_client", + "python3-firewall", + "quay.io/dell/container-storage-modules/csm-metrics-powerscale", + "quay.io/jetstack/cert-manager-acmesolver", + "quay.io/jetstack/cert-manager-cainjector", + "quay.io/jetstack/cert-manager-controller", + "quay.io/jetstack/cert-manager-webhook", + "quay.io/metallb/controller", + "quay.io/metallb/speaker", + "quay.io/strimzi/kafka", + "quay.io/strimzi/kafka-bridge", + "quay.io/strimzi/operator", + "registry.k8s.io/sig-storage/nfs-subdir-external-provisioner", + "sg3_utils", + "strimzi-kafka-operator-helm-3-chart", + "victoria-metrics-operator", + "vim-enhanced" ] }, { "Name": "slurm_control_node_x86_64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "mariadb-server_na_rpm", - "msr-safe_na_tarball", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-PyMySQL_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-slurmctld_na_rpm", - "slurm-slurmdbd_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "likwid", + "lsscsi", + "mariadb-server", + "msr-safe", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-PyMySQL", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm-slurmctld", + "slurm-slurmdbd" ] }, { "Name": "slurm_node_x86_64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "kernel-devel_na_rpm", - "kernel-headers_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "msr-safe_na_tarball", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-pam_slurm_na_rpm", - "slurm-slurmd_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "kernel-devel", + "kernel-headers", + "likwid", + "lsscsi", + "msr-safe", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm-pam_slurm", + "slurm-slurmd" ] } ], @@ -268,105 +268,105 @@ "Name": "RHEL", "Version": "10.0", "osPackages": [ - "NetworkManager_na_rpm", - "authselect_na_rpm", - "autoconf_na_rpm", - "automake_na_rpm", - "bash-completion_na_rpm", - "bash_na_rpm", - "binutils-devel_na_rpm", - "binutils_na_rpm", - "bzip2_na_rpm", - "chrony_na_rpm", - "cloud-init_na_rpm", - "clustershell_na_rpm", - "cmake_na_rpm", - "coreutils_na_rpm", - "cryptsetup_na_rpm", - "curl_na_rpm", - "device-mapper_na_rpm", - "dmidecode_na_rpm", - "docker-io-dellhpcomniaaisolution-image-build-el10_1.1_image", - "dracut-live_na_rpm", - "dracut-network_na_rpm", - "dracut_na_rpm", - "emacs_na_rpm", - "file_na_rpm", - "findutils_na_rpm", - "fping_na_rpm", - "gawk_na_rpm", - "gcc-c++_na_rpm", - "gcc-gfortran_na_rpm", - "gcc_na_rpm", - "gdb-gdbserver_na_rpm", - "gdb_na_rpm", - "gedit_na_rpm", - "glibc-langpack-en_na_rpm", - "grep_na_rpm", - "gzip_na_rpm", - "hwloc-libs_na_rpm", - "hwloc_na_rpm", - "iperf3_na_rpm", - "ipmitool_na_rpm", - "iproute_na_rpm", - "iputils_na_rpm", - "kbd_na_rpm", - "kernel-tools_na_rpm", - "kernel_na_rpm", - "kexec-tools_na_rpm", - "libcurl_na_rpm", - "libtool_na_rpm", - "lldb-devel_na_rpm", - "lldb_na_rpm", - "lshw_na_rpm", - "lsof_na_rpm", - "ltrace_na_rpm", - "lvm2_na_rpm", - "make_na_rpm", - "man-db_na_rpm", - "man-pages_na_rpm", - "munge-devel_na_rpm", - "nfs-utils_na_rpm", - "nfs4-acl-tools_na_rpm", - "nm-connection-editor_na_rpm", - "nss-pam-ldapd_na_rpm", - "oddjob-mkhomedir_na_rpm", - "openldap-clients_na_rpm", - "openmpi_5.0.8_tarball", - "openssh-clients_na_rpm", - "openssh-server_na_rpm", - "openssh_na_rpm", - "openssl-devel_na_rpm", - "openssl-libs_na_rpm_1", - "ovis-ldms_na_rpm_1", - "papi-devel_na_rpm", - "papi-libs_na_rpm", - "papi_na_rpm", - "pciutils_na_rpm", - "perf_na_rpm", - "pmix-devel_na_rpm", - "python3-cython_na_rpm_1", - "python3-devel_na_rpm_1", - "rsync_na_rpm", - "rsyslog_na_rpm", - "sed_na_rpm", - "squashfs-tools_na_rpm", - "sssd_na_rpm", - "strace_na_rpm", - "sudo_na_rpm", - "systemd-udev_na_rpm", - "systemd_na_rpm", - "tar_na_rpm", - "tcpdump_na_rpm", - "traceroute_na_rpm", - "ucx_1.19.0_tarball", - "util-linux_na_rpm", - "valgrind-devel_na_rpm", - "valgrind_na_rpm", - "vim-enhanced_na_rpm_1", - "wget_na_rpm", - "which_na_rpm", - "zsh_na_rpm" + "NetworkManager", + "authselect", + "autoconf", + "automake", + "bash", + "bash-completion", + "binutils", + "binutils-devel", + "bzip2", + "chrony", + "cloud-init", + "clustershell", + "cmake", + "coreutils", + "cryptsetup", + "curl", + "device-mapper", + "dmidecode", + "docker.io/dellhpcomniaaisolution/image-build-el10", + "dracut", + "dracut-live", + "dracut-network", + "emacs", + "file", + "findutils", + "fping", + "gawk", + "gcc", + "gcc-c++", + "gcc-gfortran", + "gdb", + "gdb-gdbserver", + "gedit", + "glibc-langpack-en", + "grep", + "gzip", + "hwloc", + "hwloc-libs", + "iperf3", + "ipmitool", + "iproute", + "iputils", + "kbd", + "kernel", + "kernel-tools", + "kexec-tools", + "libcurl", + "libtool", + "lldb", + "lldb-devel", + "lshw", + "lsof", + "ltrace", + "lvm2", + "make", + "man-db", + "man-pages", + "munge-devel", + "nfs-utils", + "nfs4-acl-tools", + "nm-connection-editor", + "nss-pam-ldapd", + "oddjob-mkhomedir", + "openldap-clients", + "openmpi", + "openssh", + "openssh-clients", + "openssh-server", + "openssl-devel", + "openssl-libs_1", + "ovis-ldms_1", + "papi-devel", + "papi-libs", + "papi_1", + "pciutils", + "perf", + "pmix-devel", + "python3-cython_1", + "python3-devel_1", + "rsync", + "rsyslog", + "sed", + "squashfs-tools", + "sssd", + "strace", + "sudo", + "systemd", + "systemd-udev", + "tar", + "tcpdump", + "traceroute", + "ucx", + "util-linux", + "valgrind", + "valgrind-devel", + "vim-enhanced_1", + "wget", + "which", + "zsh" ] } ], @@ -374,29 +374,29 @@ { "Name": "csi", "InfrastructurePackages": [ - "csi-powerscale-v2-16-0_v2.16.0_git", - "docker-io-dellemc-csm-encryption_v0.6.0_image", - "external-snapshotter-v8-4-0_v8.4.0_git", - "helm-charts-2-16-0_csi-isilon-2.16.0_git", - "quay-io-dell-container-storage-modules-csi-isilon_v2.16.0_image", - "quay-io-dell-container-storage-modules-csi-metadat_v1.13.0_image", - "quay-io-dell-container-storage-modules-csm-authori_v2.4.0_image", - "quay-io-dell-container-storage-modules-dell-csi-re_v1.14.0_image", - "quay-io-dell-container-storage-modules-podmon_v1.15.0_image", - "registry-k8s-io-sig-storage-csi-attacher_v4.10.0_image", - "registry-k8s-io-sig-storage-csi-external-health-mo_v0.16.0_image", - "registry-k8s-io-sig-storage-csi-node-driver-regist_v2.15.0_image", - "registry-k8s-io-sig-storage-csi-provisioner_v6.1.0_image", - "registry-k8s-io-sig-storage-csi-resizer_v2.0.0_image", - "registry-k8s-io-sig-storage-csi-snapshotter_v8.4.0_image", - "registry-k8s-io-sig-storage-snapshot-controller_v8.4.0_image" + "csi-powerscale", + "docker.io/dellemc/csm-encryption", + "external-snapshotter", + "helm-charts_1", + "quay.io/dell/container-storage-modules/csi-isilon", + "quay.io/dell/container-storage-modules/csi-metadata-retriever", + "quay.io/dell/container-storage-modules/csm-authorization-sidecar", + "quay.io/dell/container-storage-modules/dell-csi-replicator", + "quay.io/dell/container-storage-modules/podmon", + "registry.k8s.io/sig-storage/csi-attacher", + "registry.k8s.io/sig-storage/csi-external-health-monitor-controller", + "registry.k8s.io/sig-storage/csi-node-driver-registrar", + "registry.k8s.io/sig-storage/csi-provisioner", + "registry.k8s.io/sig-storage/csi-resizer", + "registry.k8s.io/sig-storage/csi-snapshotter", + "registry.k8s.io/sig-storage/snapshot-controller" ] } ], "Drivers": [], "DriverPackages": {}, "FunctionalPackages": { - "PyMySQL_1.1.2_pip_module": { + "PyMySQL": { "Name": "PyMySQL==1.1.2", "SupportedOS": [ { @@ -409,7 +409,7 @@ ], "Type": "pip_module" }, - "apptainer_na_rpm": { + "apptainer": { "Name": "apptainer", "SupportedOS": [ { @@ -428,7 +428,7 @@ } ] }, - "calico-v3-31-4_na_manifest": { + "calico": { "Name": "calico-v3.31.4", "SupportedOS": [ { @@ -447,7 +447,7 @@ } ] }, - "cert-manager-v1-10-0_na_tarball": { + "cert-manager": { "Name": "cert-manager-v1.10.0", "SupportedOS": [ { @@ -466,7 +466,7 @@ } ] }, - "cffi_1.17.1_pip_module": { + "cffi": { "Name": "cffi==1.17.1", "SupportedOS": [ { @@ -479,7 +479,7 @@ ], "Type": "pip_module" }, - "container-selinux_na_rpm": { + "container-selinux": { "Name": "container-selinux", "SupportedOS": [ { @@ -498,7 +498,7 @@ } ] }, - "cri-o_1.35.1_rpm": { + "cri-o": { "Name": "cri-o-1.35.1", "SupportedOS": [ { @@ -517,7 +517,7 @@ } ] }, - "cryptography_45.0.7_pip_module": { + "cryptography": { "Name": "cryptography==45.0.7", "SupportedOS": [ { @@ -530,7 +530,7 @@ ], "Type": "pip_module" }, - "device-mapper-multipath_na_rpm": { + "device-mapper-multipath": { "Name": "device-mapper-multipath", "SupportedOS": [ { @@ -549,7 +549,7 @@ } ] }, - "doca-ofed_na_rpm_repo": { + "doca-ofed": { "Name": "doca-ofed", "SupportedOS": [ { @@ -568,7 +568,7 @@ } ] }, - "docker-io-alpine-kubectl_1.35.1_image": { + "docker.io/alpine/kubectl": { "Name": "docker.io/alpine/kubectl", "SupportedOS": [ { @@ -583,7 +583,7 @@ "Tag": "1.35.1", "Version": "1.35.1" }, - "docker-io-calico-cni_v3.31.4_image": { + "docker.io/calico/cni": { "Name": "docker.io/calico/cni", "SupportedOS": [ { @@ -598,7 +598,7 @@ "Tag": "v3.31.4", "Version": "v3.31.4" }, - "docker-io-calico-kube-controllers_v3.31.4_image": { + "docker.io/calico/kube-controllers": { "Name": "docker.io/calico/kube-controllers", "SupportedOS": [ { @@ -613,7 +613,7 @@ "Tag": "v3.31.4", "Version": "v3.31.4" }, - "docker-io-calico-node_v3.31.4_image": { + "docker.io/calico/node": { "Name": "docker.io/calico/node", "SupportedOS": [ { @@ -628,7 +628,7 @@ "Tag": "v3.31.4", "Version": "v3.31.4" }, - "docker-io-curlimages-curl_8.17.0_image": { + "docker.io/curlimages/curl": { "Name": "docker.io/curlimages/curl", "SupportedOS": [ { @@ -643,7 +643,7 @@ "Tag": "8.17.0", "Version": "8.17.0" }, - "docker-io-dellhpcomniaaisolution-idrac_telemetry_r_1.3_image": { + "docker.io/dellhpcomniaaisolution/idrac_telemetry_receiver": { "Name": "docker.io/dellhpcomniaaisolution/idrac_telemetry_receiver", "SupportedOS": [ { @@ -658,7 +658,7 @@ "Tag": "1.3", "Version": "1.3" }, - "docker-io-dellhpcomniaaisolution-kafkapump_1.3_image": { + "docker.io/dellhpcomniaaisolution/kafkapump": { "Name": "docker.io/dellhpcomniaaisolution/kafkapump", "SupportedOS": [ { @@ -673,7 +673,7 @@ "Tag": "1.3", "Version": "1.3" }, - "docker-io-dellhpcomniaaisolution-ubuntu-ldms_1.1_image": { + "docker.io/dellhpcomniaaisolution/ubuntu-ldms": { "Name": "docker.io/dellhpcomniaaisolution/ubuntu-ldms", "SupportedOS": [ { @@ -688,7 +688,7 @@ "Tag": "1.1", "Version": "1.1" }, - "docker-io-dellhpcomniaaisolution-victoriapump_1.3_image": { + "docker.io/dellhpcomniaaisolution/victoriapump": { "Name": "docker.io/dellhpcomniaaisolution/victoriapump", "SupportedOS": [ { @@ -703,7 +703,7 @@ "Tag": "1.3", "Version": "1.3" }, - "docker-io-library-busybox_1.36_image": { + "docker.io/library/busybox": { "Name": "docker.io/library/busybox", "SupportedOS": [ { @@ -718,7 +718,7 @@ "Tag": "1.36", "Version": "1.36" }, - "docker-io-library-mysql_9.3.0_image": { + "docker.io/library/mysql": { "Name": "docker.io/library/mysql", "SupportedOS": [ { @@ -733,7 +733,7 @@ "Tag": "9.3.0", "Version": "9.3.0" }, - "docker-io-library-python_3.12-slim_image": { + "docker.io/library/python": { "Name": "docker.io/library/python", "SupportedOS": [ { @@ -748,7 +748,7 @@ "Tag": "3.12-slim", "Version": "3.12-slim" }, - "docker-io-nginxinc-nginx-unprivileged_1.29_image": { + "docker.io/nginxinc/nginx-unprivileged": { "Name": "docker.io/nginxinc/nginx-unprivileged", "SupportedOS": [ { @@ -763,7 +763,7 @@ "Tag": "1.29", "Version": "1.29" }, - "docker-io-rmohr-activemq_5.15.9_image": { + "docker.io/rmohr/activemq": { "Name": "docker.io/rmohr/activemq", "SupportedOS": [ { @@ -778,7 +778,7 @@ "Tag": "5.15.9", "Version": "5.15.9" }, - "docker-io-timberio-vector_0.54.0-debian_image": { + "docker.io/timberio/vector": { "Name": "docker.io/timberio/vector", "SupportedOS": [ { @@ -793,7 +793,7 @@ "Tag": "0.54.0-debian", "Version": "0.54.0-debian" }, - "docker-io-victoriametrics-operator_config-reloader-v0.68.3_image": { + "docker.io/victoriametrics/operator": { "Name": "docker.io/victoriametrics/operator", "SupportedOS": [ { @@ -805,10 +805,10 @@ "x86_64" ], "Type": "image", - "Tag": "config-reloader-v0.68.3", - "Version": "config-reloader-v0.68.3" + "Tag": "v0.68.3", + "Version": "v0.68.3" }, - "docker-io-victoriametrics-operator_v0.68.3_image": { + "docker.io/victoriametrics/operator_1": { "Name": "docker.io/victoriametrics/operator", "SupportedOS": [ { @@ -820,10 +820,10 @@ "x86_64" ], "Type": "image", - "Tag": "v0.68.3", - "Version": "v0.68.3" + "Tag": "config-reloader-v0.68.3", + "Version": "config-reloader-v0.68.3" }, - "docker-io-victoriametrics-victoria-logs_v1.50.0_image": { + "docker.io/victoriametrics/victoria-logs": { "Name": "docker.io/victoriametrics/victoria-logs", "SupportedOS": [ { @@ -838,7 +838,7 @@ "Tag": "v1.50.0", "Version": "v1.50.0" }, - "docker-io-victoriametrics-victoria-metrics_v1.128.0_image": { + "docker.io/victoriametrics/victoria-metrics": { "Name": "docker.io/victoriametrics/victoria-metrics", "SupportedOS": [ { @@ -853,7 +853,7 @@ "Tag": "v1.128.0", "Version": "v1.128.0" }, - "docker-io-victoriametrics-vlagent_v1.50.0_image": { + "docker.io/victoriametrics/vlagent": { "Name": "docker.io/victoriametrics/vlagent", "SupportedOS": [ { @@ -868,7 +868,7 @@ "Tag": "v1.50.0", "Version": "v1.50.0" }, - "docker-io-victoriametrics-vmagent_v1.128.0_image": { + "docker.io/victoriametrics/vmagent": { "Name": "docker.io/victoriametrics/vmagent", "SupportedOS": [ { @@ -883,7 +883,7 @@ "Tag": "v1.128.0", "Version": "v1.128.0" }, - "docker-io-victoriametrics-vminsert_v1.128.0-cluster_image": { + "docker.io/victoriametrics/vminsert": { "Name": "docker.io/victoriametrics/vminsert", "SupportedOS": [ { @@ -898,7 +898,7 @@ "Tag": "v1.128.0-cluster", "Version": "v1.128.0-cluster" }, - "docker-io-victoriametrics-vmselect_v1.128.0-cluster_image": { + "docker.io/victoriametrics/vmselect": { "Name": "docker.io/victoriametrics/vmselect", "SupportedOS": [ { @@ -913,7 +913,7 @@ "Tag": "v1.128.0-cluster", "Version": "v1.128.0-cluster" }, - "docker-io-victoriametrics-vmstorage_v1.128.0-cluster_image": { + "docker.io/victoriametrics/vmstorage": { "Name": "docker.io/victoriametrics/vmstorage", "SupportedOS": [ { @@ -928,7 +928,7 @@ "Tag": "v1.128.0-cluster", "Version": "v1.128.0-cluster" }, - "firewalld_na_rpm": { + "firewalld": { "Name": "firewalld", "SupportedOS": [ { @@ -947,7 +947,7 @@ } ] }, - "fuse-overlayfs_na_rpm": { + "fuse-overlayfs": { "Name": "fuse-overlayfs", "SupportedOS": [ { @@ -966,7 +966,7 @@ } ] }, - "geopm_na_tarball": { + "geopm": { "Name": "geopm", "SupportedOS": [ { @@ -985,7 +985,7 @@ } ] }, - "ghcr-io-kube-vip-kube-vip_v0.8.9_image": { + "ghcr.io/kube-vip/kube-vip": { "Name": "ghcr.io/kube-vip/kube-vip", "SupportedOS": [ { @@ -1000,7 +1000,7 @@ "Tag": "v0.8.9", "Version": "v0.8.9" }, - "ghcr-io-open-telemetry-opentelemetry-collector-rel_0.143.1_image": { + "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector": { "Name": "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector", "SupportedOS": [ { @@ -1015,7 +1015,7 @@ "Tag": "0.143.1", "Version": "0.143.1" }, - "git_na_rpm": { + "git": { "Name": "git", "SupportedOS": [ { @@ -1034,8 +1034,8 @@ } ] }, - "helm-charts_container-storage-modules-1.9.2_git": { - "Name": "helm-charts", + "helm-amd64": { + "Name": "helm-v3.20.1-amd64", "SupportedOS": [ { "Name": "RHEL", @@ -1045,17 +1045,16 @@ "Architecture": [ "x86_64" ], - "Type": "git", - "Version": "container-storage-modules-1.9.2", + "Type": "tarball", "Sources": [ { "Architecture": "x86_64", - "Uri": "https://github.com/dell/helm-charts.git" + "Uri": "https://get.helm.sh/helm-v3.20.1-linux-amd64.tar.gz" } ] }, - "helm-v3-20-1-amd64_na_tarball": { - "Name": "helm-v3.20.1-amd64", + "helm-charts": { + "Name": "helm-charts", "SupportedOS": [ { "Name": "RHEL", @@ -1065,15 +1064,16 @@ "Architecture": [ "x86_64" ], - "Type": "tarball", + "Type": "git", + "Version": "container-storage-modules-1.9.2", "Sources": [ { "Architecture": "x86_64", - "Uri": "https://get.helm.sh/helm-v3.20.1-linux-amd64.tar.gz" + "Uri": "https://github.com/dell/helm-charts.git" } ] }, - "imb_na_tarball": { + "imb": { "Name": "imb", "SupportedOS": [ { @@ -1092,7 +1092,7 @@ } ] }, - "iscsi-initiator-utils_na_rpm": { + "iscsi-initiator-utils": { "Name": "iscsi-initiator-utils", "SupportedOS": [ { @@ -1111,7 +1111,7 @@ } ] }, - "karavi-observability_v1.12.0_git": { + "karavi-observability": { "Name": "karavi-observability", "SupportedOS": [ { @@ -1131,7 +1131,7 @@ } ] }, - "kernel-devel_na_rpm": { + "kernel-devel": { "Name": "kernel-devel", "SupportedOS": [ { @@ -1150,7 +1150,7 @@ } ] }, - "kernel-headers_na_rpm": { + "kernel-headers": { "Name": "kernel-headers", "SupportedOS": [ { @@ -1169,7 +1169,7 @@ } ] }, - "kubeadm_1.35.1_rpm": { + "kubeadm": { "Name": "kubeadm-1.35.1", "SupportedOS": [ { @@ -1188,7 +1188,7 @@ } ] }, - "kubectl_1.35.1_rpm": { + "kubectl": { "Name": "kubectl-1.35.1", "SupportedOS": [ { @@ -1207,7 +1207,7 @@ } ] }, - "kubelet_1.35.1_rpm": { + "kubelet": { "Name": "kubelet-1.35.1", "SupportedOS": [ { @@ -1226,7 +1226,7 @@ } ] }, - "kubernetes_33.1.0_pip_module": { + "kubernetes": { "Name": "kubernetes==33.1.0", "SupportedOS": [ { @@ -1239,7 +1239,7 @@ ], "Type": "pip_module" }, - "likwid_na_tarball": { + "likwid": { "Name": "likwid", "SupportedOS": [ { @@ -1258,7 +1258,7 @@ } ] }, - "lsscsi_na_rpm": { + "lsscsi": { "Name": "lsscsi", "SupportedOS": [ { @@ -1277,7 +1277,7 @@ } ] }, - "mariadb-server_na_rpm": { + "mariadb-server": { "Name": "mariadb-server", "SupportedOS": [ { @@ -1296,7 +1296,7 @@ } ] }, - "metallb-native-v0-15-3_na_manifest": { + "metallb-native": { "Name": "metallb-native-v0.15.3", "SupportedOS": [ { @@ -1315,7 +1315,7 @@ } ] }, - "msr-safe_na_tarball": { + "msr-safe": { "Name": "msr-safe", "SupportedOS": [ { @@ -1334,7 +1334,7 @@ } ] }, - "munge_na_rpm": { + "munge": { "Name": "munge", "SupportedOS": [ { @@ -1353,7 +1353,7 @@ } ] }, - "nfs-subdir-external-provisioner-4-0-18_na_tarball": { + "nfs-subdir-external-provisioner": { "Name": "nfs-subdir-external-provisioner-4.0.18", "SupportedOS": [ { @@ -1372,7 +1372,7 @@ } ] }, - "nvcr-io-nvidia-hpc-benchmarks_25.09_image": { + "nvcr.io/nvidia/hpc-benchmarks": { "Name": "nvcr.io/nvidia/hpc-benchmarks", "SupportedOS": [ { @@ -1387,7 +1387,7 @@ "Tag": "25.09", "Version": "25.09" }, - "omsdk_1.2.518_pip_module": { + "omsdk": { "Name": "omsdk==1.2.518", "SupportedOS": [ { @@ -1400,7 +1400,7 @@ ], "Type": "pip_module" }, - "openssl-libs_na_rpm": { + "openssl-libs": { "Name": "openssl-libs", "SupportedOS": [ { @@ -1419,7 +1419,7 @@ } ] }, - "osu-micro-benchmarks_na_tarball": { + "osu-micro-benchmarks": { "Name": "osu-micro-benchmarks", "SupportedOS": [ { @@ -1438,7 +1438,7 @@ } ] }, - "ovis-ldms_na_rpm": { + "ovis-ldms": { "Name": "ovis-ldms", "SupportedOS": [ { @@ -1457,7 +1457,7 @@ } ] }, - "papi_na_tarball": { + "papi": { "Name": "papi", "SupportedOS": [ { @@ -1476,7 +1476,7 @@ } ] }, - "pmix_na_rpm": { + "pmix": { "Name": "pmix", "SupportedOS": [ { @@ -1495,7 +1495,7 @@ } ] }, - "podman_na_rpm": { + "podman": { "Name": "podman", "SupportedOS": [ { @@ -1514,7 +1514,7 @@ } ] }, - "prettytable_3.14.0_pip_module": { + "prettytable": { "Name": "prettytable==3.14.0", "SupportedOS": [ { @@ -1527,7 +1527,7 @@ ], "Type": "pip_module" }, - "prometheus_client_0.20.0_pip_module": { + "prometheus_client": { "Name": "prometheus_client==0.20.0", "SupportedOS": [ { @@ -1540,8 +1540,8 @@ ], "Type": "pip_module" }, - "python3-PyMySQL_na_rpm": { - "Name": "python3-PyMySQL", + "python3": { + "Name": "python3-3.12.9", "SupportedOS": [ { "Name": "RHEL", @@ -1555,12 +1555,12 @@ "Sources": [ { "Architecture": "x86_64", - "RepoName": "appstream" + "RepoName": "baseos" } ] }, - "python3-cython_na_rpm": { - "Name": "python3-cython", + "python3-PyMySQL": { + "Name": "python3-PyMySQL", "SupportedOS": [ { "Name": "RHEL", @@ -1574,12 +1574,12 @@ "Sources": [ { "Architecture": "x86_64", - "RepoName": "codeready-builder" + "RepoName": "appstream" } ] }, - "python3-devel_na_rpm": { - "Name": "python3-devel", + "python3-cython": { + "Name": "python3-cython", "SupportedOS": [ { "Name": "RHEL", @@ -1593,12 +1593,12 @@ "Sources": [ { "Architecture": "x86_64", - "RepoName": "appstream" + "RepoName": "codeready-builder" } ] }, - "python3-firewall_na_rpm": { - "Name": "python3-firewall", + "python3-devel": { + "Name": "python3-devel", "SupportedOS": [ { "Name": "RHEL", @@ -1612,12 +1612,12 @@ "Sources": [ { "Architecture": "x86_64", - "RepoName": "baseos" + "RepoName": "appstream" } ] }, - "python3_3.12.9_rpm": { - "Name": "python3-3.12.9", + "python3-firewall": { + "Name": "python3-firewall", "SupportedOS": [ { "Name": "RHEL", @@ -1635,7 +1635,7 @@ } ] }, - "quay-io-dell-container-storage-modules-csm-metrics_v1.11.0_image": { + "quay.io/dell/container-storage-modules/csm-metrics-powerscale": { "Name": "quay.io/dell/container-storage-modules/csm-metrics-powerscale", "SupportedOS": [ { @@ -1650,7 +1650,7 @@ "Tag": "v1.11.0", "Version": "v1.11.0" }, - "quay-io-jetstack-cert-manager-acmesolver_v1.10.0_image": { + "quay.io/jetstack/cert-manager-acmesolver": { "Name": "quay.io/jetstack/cert-manager-acmesolver", "SupportedOS": [ { @@ -1665,7 +1665,7 @@ "Tag": "v1.10.0", "Version": "v1.10.0" }, - "quay-io-jetstack-cert-manager-cainjector_v1.10.0_image": { + "quay.io/jetstack/cert-manager-cainjector": { "Name": "quay.io/jetstack/cert-manager-cainjector", "SupportedOS": [ { @@ -1680,7 +1680,7 @@ "Tag": "v1.10.0", "Version": "v1.10.0" }, - "quay-io-jetstack-cert-manager-controller_v1.10.0_image": { + "quay.io/jetstack/cert-manager-controller": { "Name": "quay.io/jetstack/cert-manager-controller", "SupportedOS": [ { @@ -1695,7 +1695,7 @@ "Tag": "v1.10.0", "Version": "v1.10.0" }, - "quay-io-jetstack-cert-manager-webhook_v1.10.0_image": { + "quay.io/jetstack/cert-manager-webhook": { "Name": "quay.io/jetstack/cert-manager-webhook", "SupportedOS": [ { @@ -1710,7 +1710,7 @@ "Tag": "v1.10.0", "Version": "v1.10.0" }, - "quay-io-metallb-controller_v0.15.3_image": { + "quay.io/metallb/controller": { "Name": "quay.io/metallb/controller", "SupportedOS": [ { @@ -1725,7 +1725,7 @@ "Tag": "v0.15.3", "Version": "v0.15.3" }, - "quay-io-metallb-speaker_v0.15.3_image": { + "quay.io/metallb/speaker": { "Name": "quay.io/metallb/speaker", "SupportedOS": [ { @@ -1740,8 +1740,8 @@ "Tag": "v0.15.3", "Version": "v0.15.3" }, - "quay-io-strimzi-kafka-bridge_0.33.1_image": { - "Name": "quay.io/strimzi/kafka-bridge", + "quay.io/strimzi/kafka": { + "Name": "quay.io/strimzi/kafka", "SupportedOS": [ { "Name": "RHEL", @@ -1752,11 +1752,11 @@ "x86_64" ], "Type": "image", - "Tag": "0.33.1", - "Version": "0.33.1" + "Tag": "0.48.0-kafka-4.1.0", + "Version": "0.48.0-kafka-4.1.0" }, - "quay-io-strimzi-kafka_0.48.0-kafka-4.1.0_image": { - "Name": "quay.io/strimzi/kafka", + "quay.io/strimzi/kafka-bridge": { + "Name": "quay.io/strimzi/kafka-bridge", "SupportedOS": [ { "Name": "RHEL", @@ -1767,10 +1767,10 @@ "x86_64" ], "Type": "image", - "Tag": "0.48.0-kafka-4.1.0", - "Version": "0.48.0-kafka-4.1.0" + "Tag": "0.33.1", + "Version": "0.33.1" }, - "quay-io-strimzi-operator_0.48.0_image": { + "quay.io/strimzi/operator": { "Name": "quay.io/strimzi/operator", "SupportedOS": [ { @@ -1785,7 +1785,7 @@ "Tag": "0.48.0", "Version": "0.48.0" }, - "registry-k8s-io-coredns-coredns_v1.13.1_image": { + "registry.k8s.io/coredns/coredns": { "Name": "registry.k8s.io/coredns/coredns", "SupportedOS": [ { @@ -1800,7 +1800,7 @@ "Tag": "v1.13.1", "Version": "v1.13.1" }, - "registry-k8s-io-etcd_3.6.6-0_image": { + "registry.k8s.io/etcd": { "Name": "registry.k8s.io/etcd", "SupportedOS": [ { @@ -1815,7 +1815,7 @@ "Tag": "3.6.6-0", "Version": "3.6.6-0" }, - "registry-k8s-io-kube-apiserver_v1.35.1_image": { + "registry.k8s.io/kube-apiserver": { "Name": "registry.k8s.io/kube-apiserver", "SupportedOS": [ { @@ -1830,7 +1830,7 @@ "Tag": "v1.35.1", "Version": "v1.35.1" }, - "registry-k8s-io-kube-controller-manager_v1.35.1_image": { + "registry.k8s.io/kube-controller-manager": { "Name": "registry.k8s.io/kube-controller-manager", "SupportedOS": [ { @@ -1845,7 +1845,7 @@ "Tag": "v1.35.1", "Version": "v1.35.1" }, - "registry-k8s-io-kube-proxy_v1.35.1_image": { + "registry.k8s.io/kube-proxy": { "Name": "registry.k8s.io/kube-proxy", "SupportedOS": [ { @@ -1860,7 +1860,7 @@ "Tag": "v1.35.1", "Version": "v1.35.1" }, - "registry-k8s-io-kube-scheduler_v1.35.1_image": { + "registry.k8s.io/kube-scheduler": { "Name": "registry.k8s.io/kube-scheduler", "SupportedOS": [ { @@ -1875,7 +1875,7 @@ "Tag": "v1.35.1", "Version": "v1.35.1" }, - "registry-k8s-io-pause_3.10.1_image": { + "registry.k8s.io/pause": { "Name": "registry.k8s.io/pause", "SupportedOS": [ { @@ -1890,7 +1890,7 @@ "Tag": "3.10.1", "Version": "3.10.1" }, - "registry-k8s-io-sig-storage-nfs-subdir-external-pr_v4.0.2_image": { + "registry.k8s.io/sig-storage/nfs-subdir-external-provisioner": { "Name": "registry.k8s.io/sig-storage/nfs-subdir-external-provisioner", "SupportedOS": [ { @@ -1905,7 +1905,7 @@ "Tag": "v4.0.2", "Version": "v4.0.2" }, - "sg3_utils_na_rpm": { + "sg3_utils": { "Name": "sg3_utils", "SupportedOS": [ { @@ -1924,7 +1924,7 @@ } ] }, - "sionlib_na_tarball": { + "sionlib": { "Name": "sionlib", "SupportedOS": [ { @@ -1943,8 +1943,8 @@ } ] }, - "slurm-pam_slurm_na_rpm": { - "Name": "slurm-pam_slurm", + "slurm": { + "Name": "slurm", "SupportedOS": [ { "Name": "RHEL", @@ -1962,8 +1962,8 @@ } ] }, - "slurm-slurmctld_na_rpm": { - "Name": "slurm-slurmctld", + "slurm-pam_slurm": { + "Name": "slurm-pam_slurm", "SupportedOS": [ { "Name": "RHEL", @@ -1981,8 +1981,8 @@ } ] }, - "slurm-slurmd_na_rpm": { - "Name": "slurm-slurmd", + "slurm-slurmctld": { + "Name": "slurm-slurmctld", "SupportedOS": [ { "Name": "RHEL", @@ -2000,8 +2000,8 @@ } ] }, - "slurm-slurmdbd_na_rpm": { - "Name": "slurm-slurmdbd", + "slurm-slurmd": { + "Name": "slurm-slurmd", "SupportedOS": [ { "Name": "RHEL", @@ -2019,8 +2019,8 @@ } ] }, - "slurm_na_rpm": { - "Name": "slurm", + "slurm-slurmdbd": { + "Name": "slurm-slurmdbd", "SupportedOS": [ { "Name": "RHEL", @@ -2038,7 +2038,7 @@ } ] }, - "strimzi-kafka-operator-helm-3-chart-0-48-0_na_tarball": { + "strimzi-kafka-operator-helm-3-chart": { "Name": "strimzi-kafka-operator-helm-3-chart-0.48.0", "SupportedOS": [ { @@ -2057,7 +2057,7 @@ } ] }, - "victoria-metrics-operator-0-59-3_na_tarball": { + "victoria-metrics-operator": { "Name": "victoria-metrics-operator-0.59.3", "SupportedOS": [ { @@ -2076,7 +2076,7 @@ } ] }, - "vim-enhanced_na_rpm": { + "vim-enhanced": { "Name": "vim-enhanced", "SupportedOS": [ { @@ -2097,7 +2097,7 @@ } }, "OSPackages": { - "NetworkManager_na_rpm": { + "NetworkManager": { "Name": "NetworkManager", "SupportedOS": [ { @@ -2116,7 +2116,7 @@ } ] }, - "authselect_na_rpm": { + "authselect": { "Name": "authselect", "SupportedOS": [ { @@ -2135,7 +2135,7 @@ } ] }, - "autoconf_na_rpm": { + "autoconf": { "Name": "autoconf", "SupportedOS": [ { @@ -2154,7 +2154,7 @@ } ] }, - "automake_na_rpm": { + "automake": { "Name": "automake", "SupportedOS": [ { @@ -2173,8 +2173,8 @@ } ] }, - "bash-completion_na_rpm": { - "Name": "bash-completion", + "bash": { + "Name": "bash", "SupportedOS": [ { "Name": "RHEL", @@ -2192,8 +2192,8 @@ } ] }, - "bash_na_rpm": { - "Name": "bash", + "bash-completion": { + "Name": "bash-completion", "SupportedOS": [ { "Name": "RHEL", @@ -2211,8 +2211,8 @@ } ] }, - "binutils-devel_na_rpm": { - "Name": "binutils-devel", + "binutils": { + "Name": "binutils", "SupportedOS": [ { "Name": "RHEL", @@ -2226,12 +2226,12 @@ "Sources": [ { "Architecture": "x86_64", - "RepoName": "appstream" + "RepoName": "baseos" } ] }, - "binutils_na_rpm": { - "Name": "binutils", + "binutils-devel": { + "Name": "binutils-devel", "SupportedOS": [ { "Name": "RHEL", @@ -2245,11 +2245,11 @@ "Sources": [ { "Architecture": "x86_64", - "RepoName": "baseos" + "RepoName": "appstream" } ] }, - "bzip2_na_rpm": { + "bzip2": { "Name": "bzip2", "SupportedOS": [ { @@ -2268,7 +2268,7 @@ } ] }, - "chrony_na_rpm": { + "chrony": { "Name": "chrony", "SupportedOS": [ { @@ -2287,7 +2287,7 @@ } ] }, - "cloud-init_na_rpm": { + "cloud-init": { "Name": "cloud-init", "SupportedOS": [ { @@ -2306,7 +2306,7 @@ } ] }, - "clustershell_na_rpm": { + "clustershell": { "Name": "clustershell", "SupportedOS": [ { @@ -2325,7 +2325,7 @@ } ] }, - "cmake_na_rpm": { + "cmake": { "Name": "cmake", "SupportedOS": [ { @@ -2344,7 +2344,7 @@ } ] }, - "coreutils_na_rpm": { + "coreutils": { "Name": "coreutils", "SupportedOS": [ { @@ -2363,7 +2363,7 @@ } ] }, - "cryptsetup_na_rpm": { + "cryptsetup": { "Name": "cryptsetup", "SupportedOS": [ { @@ -2382,7 +2382,7 @@ } ] }, - "curl_na_rpm": { + "curl": { "Name": "curl", "SupportedOS": [ { @@ -2401,7 +2401,7 @@ } ] }, - "device-mapper_na_rpm": { + "device-mapper": { "Name": "device-mapper", "SupportedOS": [ { @@ -2420,7 +2420,7 @@ } ] }, - "dmidecode_na_rpm": { + "dmidecode": { "Name": "dmidecode", "SupportedOS": [ { @@ -2439,7 +2439,7 @@ } ] }, - "docker-io-dellhpcomniaaisolution-image-build-el10_1.1_image": { + "docker.io/dellhpcomniaaisolution/image-build-el10": { "Name": "docker.io/dellhpcomniaaisolution/image-build-el10", "SupportedOS": [ { @@ -2454,8 +2454,8 @@ "Tag": "1.1", "Version": "1.1" }, - "dracut-live_na_rpm": { - "Name": "dracut-live", + "dracut": { + "Name": "dracut", "SupportedOS": [ { "Name": "RHEL", @@ -2469,12 +2469,12 @@ "Sources": [ { "Architecture": "x86_64", - "RepoName": "appstream" + "RepoName": "baseos" } ] }, - "dracut-network_na_rpm": { - "Name": "dracut-network", + "dracut-live": { + "Name": "dracut-live", "SupportedOS": [ { "Name": "RHEL", @@ -2488,12 +2488,12 @@ "Sources": [ { "Architecture": "x86_64", - "RepoName": "baseos" + "RepoName": "appstream" } ] }, - "dracut_na_rpm": { - "Name": "dracut", + "dracut-network": { + "Name": "dracut-network", "SupportedOS": [ { "Name": "RHEL", @@ -2511,7 +2511,7 @@ } ] }, - "emacs_na_rpm": { + "emacs": { "Name": "emacs", "SupportedOS": [ { @@ -2530,7 +2530,7 @@ } ] }, - "file_na_rpm": { + "file": { "Name": "file", "SupportedOS": [ { @@ -2549,7 +2549,7 @@ } ] }, - "findutils_na_rpm": { + "findutils": { "Name": "findutils", "SupportedOS": [ { @@ -2568,7 +2568,7 @@ } ] }, - "fping_na_rpm": { + "fping": { "Name": "fping", "SupportedOS": [ { @@ -2587,7 +2587,7 @@ } ] }, - "gawk_na_rpm": { + "gawk": { "Name": "gawk", "SupportedOS": [ { @@ -2606,8 +2606,8 @@ } ] }, - "gcc-c++_na_rpm": { - "Name": "gcc-c++", + "gcc": { + "Name": "gcc", "SupportedOS": [ { "Name": "RHEL", @@ -2625,8 +2625,8 @@ } ] }, - "gcc-gfortran_na_rpm": { - "Name": "gcc-gfortran", + "gcc-c++": { + "Name": "gcc-c++", "SupportedOS": [ { "Name": "RHEL", @@ -2644,8 +2644,8 @@ } ] }, - "gcc_na_rpm": { - "Name": "gcc", + "gcc-gfortran": { + "Name": "gcc-gfortran", "SupportedOS": [ { "Name": "RHEL", @@ -2663,8 +2663,8 @@ } ] }, - "gdb-gdbserver_na_rpm": { - "Name": "gdb-gdbserver", + "gdb": { + "Name": "gdb", "SupportedOS": [ { "Name": "RHEL", @@ -2682,8 +2682,8 @@ } ] }, - "gdb_na_rpm": { - "Name": "gdb", + "gdb-gdbserver": { + "Name": "gdb-gdbserver", "SupportedOS": [ { "Name": "RHEL", @@ -2701,7 +2701,7 @@ } ] }, - "gedit_na_rpm": { + "gedit": { "Name": "gedit", "SupportedOS": [ { @@ -2720,7 +2720,7 @@ } ] }, - "glibc-langpack-en_na_rpm": { + "glibc-langpack-en": { "Name": "glibc-langpack-en", "SupportedOS": [ { @@ -2739,7 +2739,7 @@ } ] }, - "grep_na_rpm": { + "grep": { "Name": "grep", "SupportedOS": [ { @@ -2758,7 +2758,7 @@ } ] }, - "gzip_na_rpm": { + "gzip": { "Name": "gzip", "SupportedOS": [ { @@ -2777,8 +2777,8 @@ } ] }, - "hwloc-libs_na_rpm": { - "Name": "hwloc-libs", + "hwloc": { + "Name": "hwloc", "SupportedOS": [ { "Name": "RHEL", @@ -2796,8 +2796,8 @@ } ] }, - "hwloc_na_rpm": { - "Name": "hwloc", + "hwloc-libs": { + "Name": "hwloc-libs", "SupportedOS": [ { "Name": "RHEL", @@ -2815,7 +2815,7 @@ } ] }, - "iperf3_na_rpm": { + "iperf3": { "Name": "iperf3", "SupportedOS": [ { @@ -2834,7 +2834,7 @@ } ] }, - "ipmitool_na_rpm": { + "ipmitool": { "Name": "ipmitool", "SupportedOS": [ { @@ -2853,7 +2853,7 @@ } ] }, - "iproute_na_rpm": { + "iproute": { "Name": "iproute", "SupportedOS": [ { @@ -2872,7 +2872,7 @@ } ] }, - "iputils_na_rpm": { + "iputils": { "Name": "iputils", "SupportedOS": [ { @@ -2891,7 +2891,7 @@ } ] }, - "kbd_na_rpm": { + "kbd": { "Name": "kbd", "SupportedOS": [ { @@ -2910,8 +2910,8 @@ } ] }, - "kernel-tools_na_rpm": { - "Name": "kernel-tools", + "kernel": { + "Name": "kernel", "SupportedOS": [ { "Name": "RHEL", @@ -2929,8 +2929,8 @@ } ] }, - "kernel_na_rpm": { - "Name": "kernel", + "kernel-tools": { + "Name": "kernel-tools", "SupportedOS": [ { "Name": "RHEL", @@ -2948,7 +2948,7 @@ } ] }, - "kexec-tools_na_rpm": { + "kexec-tools": { "Name": "kexec-tools", "SupportedOS": [ { @@ -2967,7 +2967,7 @@ } ] }, - "libcurl_na_rpm": { + "libcurl": { "Name": "libcurl", "SupportedOS": [ { @@ -2986,7 +2986,7 @@ } ] }, - "libtool_na_rpm": { + "libtool": { "Name": "libtool", "SupportedOS": [ { @@ -3005,8 +3005,8 @@ } ] }, - "lldb-devel_na_rpm": { - "Name": "lldb-devel", + "lldb": { + "Name": "lldb", "SupportedOS": [ { "Name": "RHEL", @@ -3024,8 +3024,8 @@ } ] }, - "lldb_na_rpm": { - "Name": "lldb", + "lldb-devel": { + "Name": "lldb-devel", "SupportedOS": [ { "Name": "RHEL", @@ -3043,7 +3043,7 @@ } ] }, - "lshw_na_rpm": { + "lshw": { "Name": "lshw", "SupportedOS": [ { @@ -3062,7 +3062,7 @@ } ] }, - "lsof_na_rpm": { + "lsof": { "Name": "lsof", "SupportedOS": [ { @@ -3081,7 +3081,7 @@ } ] }, - "ltrace_na_rpm": { + "ltrace": { "Name": "ltrace", "SupportedOS": [ { @@ -3100,7 +3100,7 @@ } ] }, - "lvm2_na_rpm": { + "lvm2": { "Name": "lvm2", "SupportedOS": [ { @@ -3119,7 +3119,7 @@ } ] }, - "make_na_rpm": { + "make": { "Name": "make", "SupportedOS": [ { @@ -3138,7 +3138,7 @@ } ] }, - "man-db_na_rpm": { + "man-db": { "Name": "man-db", "SupportedOS": [ { @@ -3157,7 +3157,7 @@ } ] }, - "man-pages_na_rpm": { + "man-pages": { "Name": "man-pages", "SupportedOS": [ { @@ -3176,7 +3176,7 @@ } ] }, - "munge-devel_na_rpm": { + "munge-devel": { "Name": "munge-devel", "SupportedOS": [ { @@ -3195,7 +3195,7 @@ } ] }, - "nfs-utils_na_rpm": { + "nfs-utils": { "Name": "nfs-utils", "SupportedOS": [ { @@ -3214,7 +3214,7 @@ } ] }, - "nfs4-acl-tools_na_rpm": { + "nfs4-acl-tools": { "Name": "nfs4-acl-tools", "SupportedOS": [ { @@ -3233,7 +3233,7 @@ } ] }, - "nm-connection-editor_na_rpm": { + "nm-connection-editor": { "Name": "nm-connection-editor", "SupportedOS": [ { @@ -3252,7 +3252,7 @@ } ] }, - "nss-pam-ldapd_na_rpm": { + "nss-pam-ldapd": { "Name": "nss-pam-ldapd", "SupportedOS": [ { @@ -3271,7 +3271,7 @@ } ] }, - "oddjob-mkhomedir_na_rpm": { + "oddjob-mkhomedir": { "Name": "oddjob-mkhomedir", "SupportedOS": [ { @@ -3290,7 +3290,7 @@ } ] }, - "openldap-clients_na_rpm": { + "openldap-clients": { "Name": "openldap-clients", "SupportedOS": [ { @@ -3309,7 +3309,7 @@ } ] }, - "openmpi_5.0.8_tarball": { + "openmpi": { "Name": "openmpi", "SupportedOS": [ { @@ -3329,8 +3329,8 @@ } ] }, - "openssh-clients_na_rpm": { - "Name": "openssh-clients", + "openssh": { + "Name": "openssh", "SupportedOS": [ { "Name": "RHEL", @@ -3348,8 +3348,8 @@ } ] }, - "openssh-server_na_rpm": { - "Name": "openssh-server", + "openssh-clients": { + "Name": "openssh-clients", "SupportedOS": [ { "Name": "RHEL", @@ -3367,8 +3367,8 @@ } ] }, - "openssh_na_rpm": { - "Name": "openssh", + "openssh-server": { + "Name": "openssh-server", "SupportedOS": [ { "Name": "RHEL", @@ -3386,7 +3386,7 @@ } ] }, - "openssl-devel_na_rpm": { + "openssl-devel": { "Name": "openssl-devel", "SupportedOS": [ { @@ -3405,7 +3405,7 @@ } ] }, - "openssl-libs_na_rpm_1": { + "openssl-libs_1": { "Name": "openssl-libs", "SupportedOS": [ { @@ -3424,7 +3424,7 @@ } ] }, - "ovis-ldms_na_rpm_1": { + "ovis-ldms_1": { "Name": "ovis-ldms", "SupportedOS": [ { @@ -3443,7 +3443,7 @@ } ] }, - "papi-devel_na_rpm": { + "papi-devel": { "Name": "papi-devel", "SupportedOS": [ { @@ -3462,7 +3462,7 @@ } ] }, - "papi-libs_na_rpm": { + "papi-libs": { "Name": "papi-libs", "SupportedOS": [ { @@ -3481,7 +3481,7 @@ } ] }, - "papi_na_rpm": { + "papi_1": { "Name": "papi", "SupportedOS": [ { @@ -3500,7 +3500,7 @@ } ] }, - "pciutils_na_rpm": { + "pciutils": { "Name": "pciutils", "SupportedOS": [ { @@ -3519,7 +3519,7 @@ } ] }, - "perf_na_rpm": { + "perf": { "Name": "perf", "SupportedOS": [ { @@ -3538,7 +3538,7 @@ } ] }, - "pmix-devel_na_rpm": { + "pmix-devel": { "Name": "pmix-devel", "SupportedOS": [ { @@ -3557,7 +3557,7 @@ } ] }, - "python3-cython_na_rpm_1": { + "python3-cython_1": { "Name": "python3-cython", "SupportedOS": [ { @@ -3576,7 +3576,7 @@ } ] }, - "python3-devel_na_rpm_1": { + "python3-devel_1": { "Name": "python3-devel", "SupportedOS": [ { @@ -3595,7 +3595,7 @@ } ] }, - "rsync_na_rpm": { + "rsync": { "Name": "rsync", "SupportedOS": [ { @@ -3614,7 +3614,7 @@ } ] }, - "rsyslog_na_rpm": { + "rsyslog": { "Name": "rsyslog", "SupportedOS": [ { @@ -3633,7 +3633,7 @@ } ] }, - "sed_na_rpm": { + "sed": { "Name": "sed", "SupportedOS": [ { @@ -3652,7 +3652,7 @@ } ] }, - "squashfs-tools_na_rpm": { + "squashfs-tools": { "Name": "squashfs-tools", "SupportedOS": [ { @@ -3671,7 +3671,7 @@ } ] }, - "sssd_na_rpm": { + "sssd": { "Name": "sssd", "SupportedOS": [ { @@ -3690,7 +3690,7 @@ } ] }, - "strace_na_rpm": { + "strace": { "Name": "strace", "SupportedOS": [ { @@ -3709,7 +3709,7 @@ } ] }, - "sudo_na_rpm": { + "sudo": { "Name": "sudo", "SupportedOS": [ { @@ -3728,8 +3728,8 @@ } ] }, - "systemd-udev_na_rpm": { - "Name": "systemd-udev", + "systemd": { + "Name": "systemd", "SupportedOS": [ { "Name": "RHEL", @@ -3747,8 +3747,8 @@ } ] }, - "systemd_na_rpm": { - "Name": "systemd", + "systemd-udev": { + "Name": "systemd-udev", "SupportedOS": [ { "Name": "RHEL", @@ -3766,7 +3766,7 @@ } ] }, - "tar_na_rpm": { + "tar": { "Name": "tar", "SupportedOS": [ { @@ -3785,7 +3785,7 @@ } ] }, - "tcpdump_na_rpm": { + "tcpdump": { "Name": "tcpdump", "SupportedOS": [ { @@ -3804,7 +3804,7 @@ } ] }, - "traceroute_na_rpm": { + "traceroute": { "Name": "traceroute", "SupportedOS": [ { @@ -3823,7 +3823,7 @@ } ] }, - "ucx_1.19.0_tarball": { + "ucx": { "Name": "ucx", "SupportedOS": [ { @@ -3843,7 +3843,7 @@ } ] }, - "util-linux_na_rpm": { + "util-linux": { "Name": "util-linux", "SupportedOS": [ { @@ -3862,8 +3862,8 @@ } ] }, - "valgrind-devel_na_rpm": { - "Name": "valgrind-devel", + "valgrind": { + "Name": "valgrind", "SupportedOS": [ { "Name": "RHEL", @@ -3881,8 +3881,8 @@ } ] }, - "valgrind_na_rpm": { - "Name": "valgrind", + "valgrind-devel": { + "Name": "valgrind-devel", "SupportedOS": [ { "Name": "RHEL", @@ -3900,7 +3900,7 @@ } ] }, - "vim-enhanced_na_rpm_1": { + "vim-enhanced_1": { "Name": "vim-enhanced", "SupportedOS": [ { @@ -3919,7 +3919,7 @@ } ] }, - "wget_na_rpm": { + "wget": { "Name": "wget", "SupportedOS": [ { @@ -3938,7 +3938,7 @@ } ] }, - "which_na_rpm": { + "which": { "Name": "which", "SupportedOS": [ { @@ -3957,7 +3957,7 @@ } ] }, - "zsh_na_rpm": { + "zsh": { "Name": "zsh", "SupportedOS": [ { @@ -3979,7 +3979,7 @@ }, "Miscellaneous": [], "InfrastructurePackages": { - "csi-powerscale-v2-16-0_v2.16.0_git": { + "csi-powerscale": { "Name": "csi-powerscale-v2.16.0", "Type": "git", "Version": "v2.16.0", @@ -3998,7 +3998,7 @@ } ] }, - "docker-io-dellemc-csm-encryption_v0.6.0_image": { + "docker.io/dellemc/csm-encryption": { "Name": "docker.io/dellemc/csm-encryption", "Type": "image", "Version": "v0.6.0", @@ -4012,7 +4012,7 @@ ], "Tag": "v0.6.0" }, - "external-snapshotter-v8-4-0_v8.4.0_git": { + "external-snapshotter": { "Name": "external-snapshotter-v8.4.0", "Type": "git", "Version": "v8.4.0", @@ -4031,7 +4031,7 @@ } ] }, - "helm-charts-2-16-0_csi-isilon-2.16.0_git": { + "helm-charts_1": { "Name": "helm-charts-2.16.0", "Type": "git", "Version": "csi-isilon-2.16.0", @@ -4050,7 +4050,7 @@ } ] }, - "quay-io-dell-container-storage-modules-csi-isilon_v2.16.0_image": { + "quay.io/dell/container-storage-modules/csi-isilon": { "Name": "quay.io/dell/container-storage-modules/csi-isilon", "Type": "image", "Version": "v2.16.0", @@ -4064,7 +4064,7 @@ ], "Tag": "v2.16.0" }, - "quay-io-dell-container-storage-modules-csi-metadat_v1.13.0_image": { + "quay.io/dell/container-storage-modules/csi-metadata-retriever": { "Name": "quay.io/dell/container-storage-modules/csi-metadata-retriever", "Type": "image", "Version": "v1.13.0", @@ -4078,7 +4078,7 @@ ], "Tag": "v1.13.0" }, - "quay-io-dell-container-storage-modules-csm-authori_v2.4.0_image": { + "quay.io/dell/container-storage-modules/csm-authorization-sidecar": { "Name": "quay.io/dell/container-storage-modules/csm-authorization-sidecar", "Type": "image", "Version": "v2.4.0", @@ -4092,7 +4092,7 @@ ], "Tag": "v2.4.0" }, - "quay-io-dell-container-storage-modules-dell-csi-re_v1.14.0_image": { + "quay.io/dell/container-storage-modules/dell-csi-replicator": { "Name": "quay.io/dell/container-storage-modules/dell-csi-replicator", "Type": "image", "Version": "v1.14.0", @@ -4106,7 +4106,7 @@ ], "Tag": "v1.14.0" }, - "quay-io-dell-container-storage-modules-podmon_v1.15.0_image": { + "quay.io/dell/container-storage-modules/podmon": { "Name": "quay.io/dell/container-storage-modules/podmon", "Type": "image", "Version": "v1.15.0", @@ -4120,7 +4120,7 @@ ], "Tag": "v1.15.0" }, - "registry-k8s-io-sig-storage-csi-attacher_v4.10.0_image": { + "registry.k8s.io/sig-storage/csi-attacher": { "Name": "registry.k8s.io/sig-storage/csi-attacher", "Type": "image", "Version": "v4.10.0", @@ -4134,7 +4134,7 @@ ], "Tag": "v4.10.0" }, - "registry-k8s-io-sig-storage-csi-external-health-mo_v0.16.0_image": { + "registry.k8s.io/sig-storage/csi-external-health-monitor-controller": { "Name": "registry.k8s.io/sig-storage/csi-external-health-monitor-controller", "Type": "image", "Version": "v0.16.0", @@ -4148,7 +4148,7 @@ ], "Tag": "v0.16.0" }, - "registry-k8s-io-sig-storage-csi-node-driver-regist_v2.15.0_image": { + "registry.k8s.io/sig-storage/csi-node-driver-registrar": { "Name": "registry.k8s.io/sig-storage/csi-node-driver-registrar", "Type": "image", "Version": "v2.15.0", @@ -4162,7 +4162,7 @@ ], "Tag": "v2.15.0" }, - "registry-k8s-io-sig-storage-csi-provisioner_v6.1.0_image": { + "registry.k8s.io/sig-storage/csi-provisioner": { "Name": "registry.k8s.io/sig-storage/csi-provisioner", "Type": "image", "Version": "v6.1.0", @@ -4176,7 +4176,7 @@ ], "Tag": "v6.1.0" }, - "registry-k8s-io-sig-storage-csi-resizer_v2.0.0_image": { + "registry.k8s.io/sig-storage/csi-resizer": { "Name": "registry.k8s.io/sig-storage/csi-resizer", "Type": "image", "Version": "v2.0.0", @@ -4190,7 +4190,7 @@ ], "Tag": "v2.0.0" }, - "registry-k8s-io-sig-storage-csi-snapshotter_v8.4.0_image": { + "registry.k8s.io/sig-storage/csi-snapshotter": { "Name": "registry.k8s.io/sig-storage/csi-snapshotter", "Type": "image", "Version": "v8.4.0", @@ -4204,7 +4204,7 @@ ], "Tag": "v8.4.0" }, - "registry-k8s-io-sig-storage-snapshot-controller_v8.4.0_image": { + "registry.k8s.io/sig-storage/snapshot-controller": { "Name": "registry.k8s.io/sig-storage/snapshot-controller", "Type": "image", "Version": "v8.4.0", diff --git a/examples/catalog/catalog_rhel_x86_64_with_slurm_only.json b/examples/catalog/catalog_rhel_x86_64_with_slurm_only.json index 69f73baa15..7c8d819d0e 100644 --- a/examples/catalog/catalog_rhel_x86_64_with_slurm_only.json +++ b/examples/catalog/catalog_rhel_x86_64_with_slurm_only.json @@ -7,105 +7,105 @@ { "Name": "login_compiler_node_x86_64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "msr-safe_na_tarball", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-slurmd_na_rpm", - "slurm_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "likwid", + "lsscsi", + "msr-safe", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm", + "slurm-slurmd" ] }, { "Name": "login_node_x86_64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "msr-safe_na_tarball", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-slurmd_na_rpm", - "slurm_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "likwid", + "lsscsi", + "msr-safe", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm", + "slurm-slurmd" ] }, { "Name": "slurm_control_node_x86_64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "mariadb-server_na_rpm", - "msr-safe_na_tarball", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-PyMySQL_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-slurmctld_na_rpm", - "slurm-slurmdbd_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "likwid", + "lsscsi", + "mariadb-server", + "msr-safe", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-PyMySQL", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm-slurmctld", + "slurm-slurmdbd" ] }, { "Name": "slurm_node_x86_64", "FunctionalPackages": [ - "apptainer_na_rpm", - "device-mapper-multipath_na_rpm", - "doca-ofed_na_rpm_repo", - "firewalld_na_rpm", - "geopm_na_tarball", - "imb_na_tarball", - "iscsi-initiator-utils_na_rpm", - "kernel-devel_na_rpm", - "kernel-headers_na_rpm", - "likwid_na_tarball", - "lsscsi_na_rpm", - "msr-safe_na_tarball", - "munge_na_rpm", - "nvcr-io-nvidia-hpc-benchmarks_25.09_image", - "osu-micro-benchmarks_na_tarball", - "papi_na_tarball", - "pmix_na_rpm", - "python3-firewall_na_rpm", - "sg3_utils_na_rpm", - "sionlib_na_tarball", - "slurm-pam_slurm_na_rpm", - "slurm-slurmd_na_rpm" + "apptainer", + "device-mapper-multipath", + "doca-ofed", + "firewalld", + "geopm", + "imb", + "iscsi-initiator-utils", + "kernel-devel", + "kernel-headers", + "likwid", + "lsscsi", + "msr-safe", + "munge", + "nvcr.io/nvidia/hpc-benchmarks", + "osu-micro-benchmarks", + "papi", + "pmix", + "python3-firewall", + "sg3_utils", + "sionlib", + "slurm-pam_slurm", + "slurm-slurmd" ] } ], @@ -114,97 +114,97 @@ "Name": "RHEL", "Version": "10.0", "osPackages": [ - "NetworkManager_na_rpm", - "authselect_na_rpm", - "autoconf_na_rpm", - "automake_na_rpm", - "bash-completion_na_rpm", - "bash_na_rpm", - "binutils-devel_na_rpm", - "binutils_na_rpm", - "bzip2_na_rpm", - "chrony_na_rpm", - "cloud-init_na_rpm", - "clustershell_na_rpm", - "cmake_na_rpm", - "coreutils_na_rpm", - "cryptsetup_na_rpm", - "curl_na_rpm", - "device-mapper_na_rpm", - "dmidecode_na_rpm", - "docker-io-dellhpcomniaaisolution-image-build-el10_1.1_image", - "dracut-live_na_rpm", - "dracut-network_na_rpm", - "dracut_na_rpm", - "emacs_na_rpm", - "file_na_rpm", - "findutils_na_rpm", - "fping_na_rpm", - "gawk_na_rpm", - "gcc-c++_na_rpm", - "gcc-gfortran_na_rpm", - "gcc_na_rpm", - "gdb-gdbserver_na_rpm", - "gdb_na_rpm", - "gedit_na_rpm", - "glibc-langpack-en_na_rpm", - "grep_na_rpm", - "gzip_na_rpm", - "hwloc-libs_na_rpm", - "hwloc_na_rpm", - "iperf3_na_rpm", - "ipmitool_na_rpm", - "iproute_na_rpm", - "iputils_na_rpm", - "kbd_na_rpm", - "kernel-tools_na_rpm", - "kernel_na_rpm", - "kexec-tools_na_rpm", - "libcurl_na_rpm", - "libtool_na_rpm", - "lldb-devel_na_rpm", - "lldb_na_rpm", - "lshw_na_rpm", - "lsof_na_rpm", - "ltrace_na_rpm", - "lvm2_na_rpm", - "make_na_rpm", - "man-db_na_rpm", - "man-pages_na_rpm", - "nfs-utils_na_rpm", - "nfs4-acl-tools_na_rpm", - "nm-connection-editor_na_rpm", - "nss-pam-ldapd_na_rpm", - "oddjob-mkhomedir_na_rpm", - "openldap-clients_na_rpm", - "openssh-clients_na_rpm", - "openssh-server_na_rpm", - "openssh_na_rpm", - "openssl-devel_na_rpm", - "papi-devel_na_rpm", - "papi-libs_na_rpm", - "papi_na_rpm", - "pciutils_na_rpm", - "perf_na_rpm", - "rsync_na_rpm", - "rsyslog_na_rpm", - "sed_na_rpm", - "squashfs-tools_na_rpm", - "sssd_na_rpm", - "strace_na_rpm", - "sudo_na_rpm", - "systemd-udev_na_rpm", - "systemd_na_rpm", - "tar_na_rpm", - "tcpdump_na_rpm", - "traceroute_na_rpm", - "util-linux_na_rpm", - "valgrind-devel_na_rpm", - "valgrind_na_rpm", - "vim-enhanced_na_rpm", - "wget_na_rpm", - "which_na_rpm", - "zsh_na_rpm" + "NetworkManager", + "authselect", + "autoconf", + "automake", + "bash", + "bash-completion", + "binutils", + "binutils-devel", + "bzip2", + "chrony", + "cloud-init", + "clustershell", + "cmake", + "coreutils", + "cryptsetup", + "curl", + "device-mapper", + "dmidecode", + "docker.io/dellhpcomniaaisolution/image-build-el10", + "dracut", + "dracut-live", + "dracut-network", + "emacs", + "file", + "findutils", + "fping", + "gawk", + "gcc", + "gcc-c++", + "gcc-gfortran", + "gdb", + "gdb-gdbserver", + "gedit", + "glibc-langpack-en", + "grep", + "gzip", + "hwloc", + "hwloc-libs", + "iperf3", + "ipmitool", + "iproute", + "iputils", + "kbd", + "kernel", + "kernel-tools", + "kexec-tools", + "libcurl", + "libtool", + "lldb", + "lldb-devel", + "lshw", + "lsof", + "ltrace", + "lvm2", + "make", + "man-db", + "man-pages", + "nfs-utils", + "nfs4-acl-tools", + "nm-connection-editor", + "nss-pam-ldapd", + "oddjob-mkhomedir", + "openldap-clients", + "openssh", + "openssh-clients", + "openssh-server", + "openssl-devel", + "papi-devel", + "papi-libs", + "papi_1", + "pciutils", + "perf", + "rsync", + "rsyslog", + "sed", + "squashfs-tools", + "sssd", + "strace", + "sudo", + "systemd", + "systemd-udev", + "tar", + "tcpdump", + "traceroute", + "util-linux", + "valgrind", + "valgrind-devel", + "vim-enhanced", + "wget", + "which", + "zsh" ] } ], @@ -212,7 +212,7 @@ "Drivers": [], "DriverPackages": {}, "FunctionalPackages": { - "apptainer_na_rpm": { + "apptainer": { "Name": "apptainer", "SupportedOS": [ { @@ -231,7 +231,7 @@ } ] }, - "device-mapper-multipath_na_rpm": { + "device-mapper-multipath": { "Name": "device-mapper-multipath", "SupportedOS": [ { @@ -250,7 +250,7 @@ } ] }, - "doca-ofed_na_rpm_repo": { + "doca-ofed": { "Name": "doca-ofed", "SupportedOS": [ { @@ -269,7 +269,7 @@ } ] }, - "firewalld_na_rpm": { + "firewalld": { "Name": "firewalld", "SupportedOS": [ { @@ -288,7 +288,7 @@ } ] }, - "geopm_na_tarball": { + "geopm": { "Name": "geopm", "SupportedOS": [ { @@ -307,7 +307,7 @@ } ] }, - "imb_na_tarball": { + "imb": { "Name": "imb", "SupportedOS": [ { @@ -326,7 +326,7 @@ } ] }, - "iscsi-initiator-utils_na_rpm": { + "iscsi-initiator-utils": { "Name": "iscsi-initiator-utils", "SupportedOS": [ { @@ -345,7 +345,7 @@ } ] }, - "kernel-devel_na_rpm": { + "kernel-devel": { "Name": "kernel-devel", "SupportedOS": [ { @@ -364,7 +364,7 @@ } ] }, - "kernel-headers_na_rpm": { + "kernel-headers": { "Name": "kernel-headers", "SupportedOS": [ { @@ -383,7 +383,7 @@ } ] }, - "likwid_na_tarball": { + "likwid": { "Name": "likwid", "SupportedOS": [ { @@ -402,7 +402,7 @@ } ] }, - "lsscsi_na_rpm": { + "lsscsi": { "Name": "lsscsi", "SupportedOS": [ { @@ -421,7 +421,7 @@ } ] }, - "mariadb-server_na_rpm": { + "mariadb-server": { "Name": "mariadb-server", "SupportedOS": [ { @@ -440,7 +440,7 @@ } ] }, - "msr-safe_na_tarball": { + "msr-safe": { "Name": "msr-safe", "SupportedOS": [ { @@ -459,7 +459,7 @@ } ] }, - "munge_na_rpm": { + "munge": { "Name": "munge", "SupportedOS": [ { @@ -478,7 +478,7 @@ } ] }, - "nvcr-io-nvidia-hpc-benchmarks_25.09_image": { + "nvcr.io/nvidia/hpc-benchmarks": { "Name": "nvcr.io/nvidia/hpc-benchmarks", "SupportedOS": [ { @@ -493,7 +493,7 @@ "Tag": "25.09", "Version": "25.09" }, - "osu-micro-benchmarks_na_tarball": { + "osu-micro-benchmarks": { "Name": "osu-micro-benchmarks", "SupportedOS": [ { @@ -512,7 +512,7 @@ } ] }, - "papi_na_tarball": { + "papi": { "Name": "papi", "SupportedOS": [ { @@ -531,7 +531,7 @@ } ] }, - "pmix_na_rpm": { + "pmix": { "Name": "pmix", "SupportedOS": [ { @@ -550,7 +550,7 @@ } ] }, - "python3-PyMySQL_na_rpm": { + "python3-PyMySQL": { "Name": "python3-PyMySQL", "SupportedOS": [ { @@ -569,7 +569,7 @@ } ] }, - "python3-firewall_na_rpm": { + "python3-firewall": { "Name": "python3-firewall", "SupportedOS": [ { @@ -588,7 +588,7 @@ } ] }, - "sg3_utils_na_rpm": { + "sg3_utils": { "Name": "sg3_utils", "SupportedOS": [ { @@ -607,7 +607,7 @@ } ] }, - "sionlib_na_tarball": { + "sionlib": { "Name": "sionlib", "SupportedOS": [ { @@ -626,8 +626,8 @@ } ] }, - "slurm-pam_slurm_na_rpm": { - "Name": "slurm-pam_slurm", + "slurm": { + "Name": "slurm", "SupportedOS": [ { "Name": "RHEL", @@ -645,8 +645,8 @@ } ] }, - "slurm-slurmctld_na_rpm": { - "Name": "slurm-slurmctld", + "slurm-pam_slurm": { + "Name": "slurm-pam_slurm", "SupportedOS": [ { "Name": "RHEL", @@ -664,8 +664,8 @@ } ] }, - "slurm-slurmd_na_rpm": { - "Name": "slurm-slurmd", + "slurm-slurmctld": { + "Name": "slurm-slurmctld", "SupportedOS": [ { "Name": "RHEL", @@ -683,8 +683,8 @@ } ] }, - "slurm-slurmdbd_na_rpm": { - "Name": "slurm-slurmdbd", + "slurm-slurmd": { + "Name": "slurm-slurmd", "SupportedOS": [ { "Name": "RHEL", @@ -702,8 +702,8 @@ } ] }, - "slurm_na_rpm": { - "Name": "slurm", + "slurm-slurmdbd": { + "Name": "slurm-slurmdbd", "SupportedOS": [ { "Name": "RHEL", @@ -723,7 +723,7 @@ } }, "OSPackages": { - "NetworkManager_na_rpm": { + "NetworkManager": { "Name": "NetworkManager", "SupportedOS": [ { @@ -742,7 +742,7 @@ } ] }, - "authselect_na_rpm": { + "authselect": { "Name": "authselect", "SupportedOS": [ { @@ -761,7 +761,7 @@ } ] }, - "autoconf_na_rpm": { + "autoconf": { "Name": "autoconf", "SupportedOS": [ { @@ -780,7 +780,7 @@ } ] }, - "automake_na_rpm": { + "automake": { "Name": "automake", "SupportedOS": [ { @@ -799,8 +799,8 @@ } ] }, - "bash-completion_na_rpm": { - "Name": "bash-completion", + "bash": { + "Name": "bash", "SupportedOS": [ { "Name": "RHEL", @@ -818,8 +818,8 @@ } ] }, - "bash_na_rpm": { - "Name": "bash", + "bash-completion": { + "Name": "bash-completion", "SupportedOS": [ { "Name": "RHEL", @@ -837,8 +837,8 @@ } ] }, - "binutils-devel_na_rpm": { - "Name": "binutils-devel", + "binutils": { + "Name": "binutils", "SupportedOS": [ { "Name": "RHEL", @@ -852,12 +852,12 @@ "Sources": [ { "Architecture": "x86_64", - "RepoName": "appstream" + "RepoName": "baseos" } ] }, - "binutils_na_rpm": { - "Name": "binutils", + "binutils-devel": { + "Name": "binutils-devel", "SupportedOS": [ { "Name": "RHEL", @@ -871,11 +871,11 @@ "Sources": [ { "Architecture": "x86_64", - "RepoName": "baseos" + "RepoName": "appstream" } ] }, - "bzip2_na_rpm": { + "bzip2": { "Name": "bzip2", "SupportedOS": [ { @@ -894,7 +894,7 @@ } ] }, - "chrony_na_rpm": { + "chrony": { "Name": "chrony", "SupportedOS": [ { @@ -913,7 +913,7 @@ } ] }, - "cloud-init_na_rpm": { + "cloud-init": { "Name": "cloud-init", "SupportedOS": [ { @@ -932,7 +932,7 @@ } ] }, - "clustershell_na_rpm": { + "clustershell": { "Name": "clustershell", "SupportedOS": [ { @@ -951,7 +951,7 @@ } ] }, - "cmake_na_rpm": { + "cmake": { "Name": "cmake", "SupportedOS": [ { @@ -970,7 +970,7 @@ } ] }, - "coreutils_na_rpm": { + "coreutils": { "Name": "coreutils", "SupportedOS": [ { @@ -989,7 +989,7 @@ } ] }, - "cryptsetup_na_rpm": { + "cryptsetup": { "Name": "cryptsetup", "SupportedOS": [ { @@ -1008,7 +1008,7 @@ } ] }, - "curl_na_rpm": { + "curl": { "Name": "curl", "SupportedOS": [ { @@ -1027,7 +1027,7 @@ } ] }, - "device-mapper_na_rpm": { + "device-mapper": { "Name": "device-mapper", "SupportedOS": [ { @@ -1046,7 +1046,7 @@ } ] }, - "dmidecode_na_rpm": { + "dmidecode": { "Name": "dmidecode", "SupportedOS": [ { @@ -1065,7 +1065,7 @@ } ] }, - "docker-io-dellhpcomniaaisolution-image-build-el10_1.1_image": { + "docker.io/dellhpcomniaaisolution/image-build-el10": { "Name": "docker.io/dellhpcomniaaisolution/image-build-el10", "SupportedOS": [ { @@ -1080,8 +1080,8 @@ "Tag": "1.1", "Version": "1.1" }, - "dracut-live_na_rpm": { - "Name": "dracut-live", + "dracut": { + "Name": "dracut", "SupportedOS": [ { "Name": "RHEL", @@ -1095,12 +1095,12 @@ "Sources": [ { "Architecture": "x86_64", - "RepoName": "appstream" + "RepoName": "baseos" } ] }, - "dracut-network_na_rpm": { - "Name": "dracut-network", + "dracut-live": { + "Name": "dracut-live", "SupportedOS": [ { "Name": "RHEL", @@ -1114,12 +1114,12 @@ "Sources": [ { "Architecture": "x86_64", - "RepoName": "baseos" + "RepoName": "appstream" } ] }, - "dracut_na_rpm": { - "Name": "dracut", + "dracut-network": { + "Name": "dracut-network", "SupportedOS": [ { "Name": "RHEL", @@ -1137,7 +1137,7 @@ } ] }, - "emacs_na_rpm": { + "emacs": { "Name": "emacs", "SupportedOS": [ { @@ -1156,7 +1156,7 @@ } ] }, - "file_na_rpm": { + "file": { "Name": "file", "SupportedOS": [ { @@ -1175,7 +1175,7 @@ } ] }, - "findutils_na_rpm": { + "findutils": { "Name": "findutils", "SupportedOS": [ { @@ -1194,7 +1194,7 @@ } ] }, - "fping_na_rpm": { + "fping": { "Name": "fping", "SupportedOS": [ { @@ -1213,7 +1213,7 @@ } ] }, - "gawk_na_rpm": { + "gawk": { "Name": "gawk", "SupportedOS": [ { @@ -1232,8 +1232,8 @@ } ] }, - "gcc-c++_na_rpm": { - "Name": "gcc-c++", + "gcc": { + "Name": "gcc", "SupportedOS": [ { "Name": "RHEL", @@ -1251,8 +1251,8 @@ } ] }, - "gcc-gfortran_na_rpm": { - "Name": "gcc-gfortran", + "gcc-c++": { + "Name": "gcc-c++", "SupportedOS": [ { "Name": "RHEL", @@ -1270,8 +1270,8 @@ } ] }, - "gcc_na_rpm": { - "Name": "gcc", + "gcc-gfortran": { + "Name": "gcc-gfortran", "SupportedOS": [ { "Name": "RHEL", @@ -1289,8 +1289,8 @@ } ] }, - "gdb-gdbserver_na_rpm": { - "Name": "gdb-gdbserver", + "gdb": { + "Name": "gdb", "SupportedOS": [ { "Name": "RHEL", @@ -1308,8 +1308,8 @@ } ] }, - "gdb_na_rpm": { - "Name": "gdb", + "gdb-gdbserver": { + "Name": "gdb-gdbserver", "SupportedOS": [ { "Name": "RHEL", @@ -1327,7 +1327,7 @@ } ] }, - "gedit_na_rpm": { + "gedit": { "Name": "gedit", "SupportedOS": [ { @@ -1346,7 +1346,7 @@ } ] }, - "glibc-langpack-en_na_rpm": { + "glibc-langpack-en": { "Name": "glibc-langpack-en", "SupportedOS": [ { @@ -1365,7 +1365,7 @@ } ] }, - "grep_na_rpm": { + "grep": { "Name": "grep", "SupportedOS": [ { @@ -1384,7 +1384,7 @@ } ] }, - "gzip_na_rpm": { + "gzip": { "Name": "gzip", "SupportedOS": [ { @@ -1403,8 +1403,8 @@ } ] }, - "hwloc-libs_na_rpm": { - "Name": "hwloc-libs", + "hwloc": { + "Name": "hwloc", "SupportedOS": [ { "Name": "RHEL", @@ -1422,8 +1422,8 @@ } ] }, - "hwloc_na_rpm": { - "Name": "hwloc", + "hwloc-libs": { + "Name": "hwloc-libs", "SupportedOS": [ { "Name": "RHEL", @@ -1441,7 +1441,7 @@ } ] }, - "iperf3_na_rpm": { + "iperf3": { "Name": "iperf3", "SupportedOS": [ { @@ -1460,7 +1460,7 @@ } ] }, - "ipmitool_na_rpm": { + "ipmitool": { "Name": "ipmitool", "SupportedOS": [ { @@ -1479,7 +1479,7 @@ } ] }, - "iproute_na_rpm": { + "iproute": { "Name": "iproute", "SupportedOS": [ { @@ -1498,7 +1498,7 @@ } ] }, - "iputils_na_rpm": { + "iputils": { "Name": "iputils", "SupportedOS": [ { @@ -1517,7 +1517,7 @@ } ] }, - "kbd_na_rpm": { + "kbd": { "Name": "kbd", "SupportedOS": [ { @@ -1536,8 +1536,8 @@ } ] }, - "kernel-tools_na_rpm": { - "Name": "kernel-tools", + "kernel": { + "Name": "kernel", "SupportedOS": [ { "Name": "RHEL", @@ -1555,8 +1555,8 @@ } ] }, - "kernel_na_rpm": { - "Name": "kernel", + "kernel-tools": { + "Name": "kernel-tools", "SupportedOS": [ { "Name": "RHEL", @@ -1574,7 +1574,7 @@ } ] }, - "kexec-tools_na_rpm": { + "kexec-tools": { "Name": "kexec-tools", "SupportedOS": [ { @@ -1593,7 +1593,7 @@ } ] }, - "libcurl_na_rpm": { + "libcurl": { "Name": "libcurl", "SupportedOS": [ { @@ -1612,7 +1612,7 @@ } ] }, - "libtool_na_rpm": { + "libtool": { "Name": "libtool", "SupportedOS": [ { @@ -1631,8 +1631,8 @@ } ] }, - "lldb-devel_na_rpm": { - "Name": "lldb-devel", + "lldb": { + "Name": "lldb", "SupportedOS": [ { "Name": "RHEL", @@ -1650,8 +1650,8 @@ } ] }, - "lldb_na_rpm": { - "Name": "lldb", + "lldb-devel": { + "Name": "lldb-devel", "SupportedOS": [ { "Name": "RHEL", @@ -1669,7 +1669,7 @@ } ] }, - "lshw_na_rpm": { + "lshw": { "Name": "lshw", "SupportedOS": [ { @@ -1688,7 +1688,7 @@ } ] }, - "lsof_na_rpm": { + "lsof": { "Name": "lsof", "SupportedOS": [ { @@ -1707,7 +1707,7 @@ } ] }, - "ltrace_na_rpm": { + "ltrace": { "Name": "ltrace", "SupportedOS": [ { @@ -1726,7 +1726,7 @@ } ] }, - "lvm2_na_rpm": { + "lvm2": { "Name": "lvm2", "SupportedOS": [ { @@ -1745,7 +1745,7 @@ } ] }, - "make_na_rpm": { + "make": { "Name": "make", "SupportedOS": [ { @@ -1764,7 +1764,7 @@ } ] }, - "man-db_na_rpm": { + "man-db": { "Name": "man-db", "SupportedOS": [ { @@ -1783,7 +1783,7 @@ } ] }, - "man-pages_na_rpm": { + "man-pages": { "Name": "man-pages", "SupportedOS": [ { @@ -1802,7 +1802,7 @@ } ] }, - "nfs-utils_na_rpm": { + "nfs-utils": { "Name": "nfs-utils", "SupportedOS": [ { @@ -1821,7 +1821,7 @@ } ] }, - "nfs4-acl-tools_na_rpm": { + "nfs4-acl-tools": { "Name": "nfs4-acl-tools", "SupportedOS": [ { @@ -1840,7 +1840,7 @@ } ] }, - "nm-connection-editor_na_rpm": { + "nm-connection-editor": { "Name": "nm-connection-editor", "SupportedOS": [ { @@ -1859,7 +1859,7 @@ } ] }, - "nss-pam-ldapd_na_rpm": { + "nss-pam-ldapd": { "Name": "nss-pam-ldapd", "SupportedOS": [ { @@ -1878,7 +1878,7 @@ } ] }, - "oddjob-mkhomedir_na_rpm": { + "oddjob-mkhomedir": { "Name": "oddjob-mkhomedir", "SupportedOS": [ { @@ -1897,7 +1897,7 @@ } ] }, - "openldap-clients_na_rpm": { + "openldap-clients": { "Name": "openldap-clients", "SupportedOS": [ { @@ -1916,8 +1916,8 @@ } ] }, - "openssh-clients_na_rpm": { - "Name": "openssh-clients", + "openssh": { + "Name": "openssh", "SupportedOS": [ { "Name": "RHEL", @@ -1935,8 +1935,8 @@ } ] }, - "openssh-server_na_rpm": { - "Name": "openssh-server", + "openssh-clients": { + "Name": "openssh-clients", "SupportedOS": [ { "Name": "RHEL", @@ -1954,8 +1954,8 @@ } ] }, - "openssh_na_rpm": { - "Name": "openssh", + "openssh-server": { + "Name": "openssh-server", "SupportedOS": [ { "Name": "RHEL", @@ -1973,7 +1973,7 @@ } ] }, - "openssl-devel_na_rpm": { + "openssl-devel": { "Name": "openssl-devel", "SupportedOS": [ { @@ -1992,7 +1992,7 @@ } ] }, - "papi-devel_na_rpm": { + "papi-devel": { "Name": "papi-devel", "SupportedOS": [ { @@ -2011,7 +2011,7 @@ } ] }, - "papi-libs_na_rpm": { + "papi-libs": { "Name": "papi-libs", "SupportedOS": [ { @@ -2030,7 +2030,7 @@ } ] }, - "papi_na_rpm": { + "papi_1": { "Name": "papi", "SupportedOS": [ { @@ -2049,7 +2049,7 @@ } ] }, - "pciutils_na_rpm": { + "pciutils": { "Name": "pciutils", "SupportedOS": [ { @@ -2068,7 +2068,7 @@ } ] }, - "perf_na_rpm": { + "perf": { "Name": "perf", "SupportedOS": [ { @@ -2087,7 +2087,7 @@ } ] }, - "rsync_na_rpm": { + "rsync": { "Name": "rsync", "SupportedOS": [ { @@ -2106,7 +2106,7 @@ } ] }, - "rsyslog_na_rpm": { + "rsyslog": { "Name": "rsyslog", "SupportedOS": [ { @@ -2125,7 +2125,7 @@ } ] }, - "sed_na_rpm": { + "sed": { "Name": "sed", "SupportedOS": [ { @@ -2144,7 +2144,7 @@ } ] }, - "squashfs-tools_na_rpm": { + "squashfs-tools": { "Name": "squashfs-tools", "SupportedOS": [ { @@ -2163,7 +2163,7 @@ } ] }, - "sssd_na_rpm": { + "sssd": { "Name": "sssd", "SupportedOS": [ { @@ -2182,7 +2182,7 @@ } ] }, - "strace_na_rpm": { + "strace": { "Name": "strace", "SupportedOS": [ { @@ -2201,7 +2201,7 @@ } ] }, - "sudo_na_rpm": { + "sudo": { "Name": "sudo", "SupportedOS": [ { @@ -2220,8 +2220,8 @@ } ] }, - "systemd-udev_na_rpm": { - "Name": "systemd-udev", + "systemd": { + "Name": "systemd", "SupportedOS": [ { "Name": "RHEL", @@ -2239,8 +2239,8 @@ } ] }, - "systemd_na_rpm": { - "Name": "systemd", + "systemd-udev": { + "Name": "systemd-udev", "SupportedOS": [ { "Name": "RHEL", @@ -2258,7 +2258,7 @@ } ] }, - "tar_na_rpm": { + "tar": { "Name": "tar", "SupportedOS": [ { @@ -2277,7 +2277,7 @@ } ] }, - "tcpdump_na_rpm": { + "tcpdump": { "Name": "tcpdump", "SupportedOS": [ { @@ -2296,7 +2296,7 @@ } ] }, - "traceroute_na_rpm": { + "traceroute": { "Name": "traceroute", "SupportedOS": [ { @@ -2315,7 +2315,7 @@ } ] }, - "util-linux_na_rpm": { + "util-linux": { "Name": "util-linux", "SupportedOS": [ { @@ -2334,8 +2334,8 @@ } ] }, - "valgrind-devel_na_rpm": { - "Name": "valgrind-devel", + "valgrind": { + "Name": "valgrind", "SupportedOS": [ { "Name": "RHEL", @@ -2353,8 +2353,8 @@ } ] }, - "valgrind_na_rpm": { - "Name": "valgrind", + "valgrind-devel": { + "Name": "valgrind-devel", "SupportedOS": [ { "Name": "RHEL", @@ -2372,7 +2372,7 @@ } ] }, - "vim-enhanced_na_rpm": { + "vim-enhanced": { "Name": "vim-enhanced", "SupportedOS": [ { @@ -2391,7 +2391,7 @@ } ] }, - "wget_na_rpm": { + "wget": { "Name": "wget", "SupportedOS": [ { @@ -2410,7 +2410,7 @@ } ] }, - "which_na_rpm": { + "which": { "Name": "which", "SupportedOS": [ { @@ -2429,7 +2429,7 @@ } ] }, - "zsh_na_rpm": { + "zsh": { "Name": "zsh", "SupportedOS": [ { From 4b3405bd5ffdcc54d9b435a4a44fde49645ac6aa Mon Sep 17 00:00:00 2001 From: Rajeshkumar-s2 Date: Thu, 4 Jun 2026 15:32:38 +0530 Subject: [PATCH 12/78] Fix the incorrect pipeline status in Gitlab (#4676) * Upgrade gitlab config and Pipeline as part of BuildStream 2.1 to 2.2 Signed-off-by: Rajeshkumar S * Update the upgrade and upgrade_build_stream playbooks Signed-off-by: Rajeshkumar S * Fixing lint issues and review comments Signed-off-by: Rajeshkumar S * Modularize the gitlab tasks under upgrade_build_stream.yml playbook Signed-off-by: Rajeshkumar S * Move relevant tasks Signed-off-by: Rajeshkumar S * Fix integration issues during upgrade Signed-off-by: Rajeshkumar S * Fix lint issues Signed-off-by: Rajeshkumar S * BuildStream Feature Bug fixes and Upgrade Stabilisation Signed-off-by: Rajeshkumar S * Fix lint issues and review comments Signed-off-by: Rajeshkumar S * Removed unused methods Signed-off-by: Rajeshkumar S * Refactor Gitlab configuration in Upgrade Signed-off-by: Rajeshkumar S * Fix upgrade validation, summary paths, lint issues, and validate stage log_file_path - Fix validation: only abort if enable_build_stream was true in 2.1 backup AND false in current config - Fix summary: resolve backup_dir to actual path using source_version instead of Jinja template - Fix 3 yaml line-length lint failures (gitlab_ci_file.yml, gitlab_example_catalog.yml, gitlab_input_file.yml) - Fix missing log_file_path for validate stage: add NFS log file creation in playbook watcher execute_molecule() to match other stages Signed-off-by: Rajeshkumar S * Remove buildstream directories as part of cleanup Signed-off-by: Rajeshkumar S * Fix review comments Signed-off-by: Rajeshkumar S * Address review comments Signed-off-by: Rajeshkumar S * Fix documentation for discover.yml Signed-off-by: Rajeshkumar S * Fix checkmarx issues Signed-off-by: Rajeshkumar S * Fix checkmarx issues2 Signed-off-by: Rajeshkumar S * Fic checkmarx issues3 Signed-off-by: Rajeshkumar S * Fix the checkmarx issues4 Signed-off-by: Rajeshkumar S * Fix checkmarx issues5 Signed-off-by: Rajeshkumar S * Fix the order of trigger files followed by pipeline to avoid incorrect pipelines execution Signed-off-by: Rajeshkumar S * Add ci skip tags Signed-off-by: Rajeshkumar S * Better logging visibility Signed-off-by: Rajeshkumar S * Update the Image_Groups table Signed-off-by: Rajeshkumar S * Catalog update Signed-off-by: Rajeshkumar S * Replace the hardcoded postgres user Signed-off-by: Rajeshkumar S * Fix the incorrect pipeline status in Gitlab Signed-off-by: Rajeshkumar S --------- Signed-off-by: Rajeshkumar S --- gitlab/roles/hosted_gitlab/files/.gitlab-ci.yml | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/gitlab/roles/hosted_gitlab/files/.gitlab-ci.yml b/gitlab/roles/hosted_gitlab/files/.gitlab-ci.yml index a907c3c187..6c484a5523 100644 --- a/gitlab/roles/hosted_gitlab/files/.gitlab-ci.yml +++ b/gitlab/roles/hosted_gitlab/files/.gitlab-ci.yml @@ -24,13 +24,9 @@ # Click exactly one to run that pipeline independently. # API trigger : Set PIPELINE_TYPE to "build", "deploy", or "cleanup". # -# Constraint: no two child pipelines run in the same parent execution. -# Serial execution: all trigger jobs share a single resource_group (pipeline) -# so that only ONE pipeline of any type (build, deploy, or cleanup) runs at a -# time. If a new commit triggers a pipeline while a previous one is still -# running, the new pipeline will wait (queued) until the running one completes. -# This prevents status mismatches, race conditions, and configuration -# corruption from concurrent access to the shared backend. +# Note: Multiple pipelines can now run concurrently as resource_group has been removed. +# This allows parallel execution of build, deploy, and cleanup pipelines. +# Be aware of potential race conditions when accessing shared resources concurrently. # --------------------------------------------------------------------------- workflow: @@ -49,7 +45,6 @@ stages: # --------------------------------------------------------------------------- build_pipeline: stage: pipeline_selection - resource_group: pipeline trigger: include: .gitlab-ci-build.yml strategy: depend @@ -78,7 +73,6 @@ build_pipeline: # --------------------------------------------------------------------------- deploy_pipeline: stage: pipeline_selection - resource_group: pipeline trigger: include: .gitlab-ci-deploy.yml strategy: depend @@ -107,7 +101,6 @@ deploy_pipeline: # --------------------------------------------------------------------------- cleanup_pipeline: stage: pipeline_selection - resource_group: pipeline trigger: include: .gitlab-ci-cleanup.yml strategy: depend From 629e9de402e5fd712cfea6e700145fad98028faa Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Thu, 4 Jun 2026 15:53:53 +0530 Subject: [PATCH 13/78] ansible 2.20 fix Signed-off-by: Abhishek S A --- .../tasks/transform_build_stream_config.yml | 4 ++-- .../import_input_parameters/tasks/transform_gitlab_config.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/upgrade/roles/import_input_parameters/tasks/transform_build_stream_config.yml b/upgrade/roles/import_input_parameters/tasks/transform_build_stream_config.yml index 58fdcf1d5e..5e7592ae01 100644 --- a/upgrade/roles/import_input_parameters/tasks/transform_build_stream_config.yml +++ b/upgrade/roles/import_input_parameters/tasks/transform_build_stream_config.yml @@ -64,7 +64,7 @@ - name: Validate build_stream_host_ip format if provided ansible.builtin.assert: that: - - build_stream_host_ip == "" or (build_stream_host_ip | ansible.utils.ipaddr | bool) + - build_stream_host_ip == "" or (build_stream_host_ip | ansible.utils.ipaddr != false) fail_msg: "build_stream_host_ip '{{ build_stream_host_ip }}' is not a valid IP address" success_msg: "build_stream_host_ip is valid" when: build_stream_host_ip != "" @@ -72,7 +72,7 @@ - name: Validate build_stream_aarch64_ip format if provided ansible.builtin.assert: that: - - build_stream_aarch64_ip == "" or (build_stream_aarch64_ip | ansible.utils.ipaddr | bool) + - build_stream_aarch64_ip == "" or (build_stream_aarch64_ip | ansible.utils.ipaddr != false) fail_msg: "build_stream_aarch64_ip '{{ build_stream_aarch64_ip }}' is not a valid IP address" success_msg: "build_stream_aarch64_ip is valid" when: build_stream_aarch64_ip != "" diff --git a/upgrade/roles/import_input_parameters/tasks/transform_gitlab_config.yml b/upgrade/roles/import_input_parameters/tasks/transform_gitlab_config.yml index 609f3c0509..f6c6e6fe29 100644 --- a/upgrade/roles/import_input_parameters/tasks/transform_gitlab_config.yml +++ b/upgrade/roles/import_input_parameters/tasks/transform_gitlab_config.yml @@ -62,7 +62,7 @@ - name: Validate gitlab_host IP format if provided ansible.builtin.assert: that: - - gitlab_host == "" or (gitlab_host | ansible.utils.ipaddr | bool) + - gitlab_host == "" or (gitlab_host | ansible.utils.ipaddr != false) fail_msg: "gitlab_host '{{ gitlab_host }}' is not a valid IP address" success_msg: "gitlab_host is valid" when: gitlab_host != "" From e544179630abfa2bdaee426464386ca8ee668fe0 Mon Sep 17 00:00:00 2001 From: Mithilesh Reddy Date: Thu, 4 Jun 2026 18:34:22 +0530 Subject: [PATCH 14/78] omnia.sh ssh fix and openchami upgrade optimization (#4679) Signed-off-by: mithileshreddy04 --- omnia.sh | 35 ++++++++++ .../tasks/renew_certificates.yml | 6 +- .../tasks/restart_after_db_restore.yml | 67 ------------------- .../roles/upgrade_openchami/tasks/main.yml | 6 -- .../tasks/renew_certificates.yml | 6 +- 5 files changed, 45 insertions(+), 75 deletions(-) delete mode 100644 rollback/roles/rollback_openchami/tasks/restart_after_db_restore.yml diff --git a/omnia.sh b/omnia.sh index 9bc5856605..aa42a62656 100755 --- a/omnia.sh +++ b/omnia.sh @@ -1321,6 +1321,35 @@ validate_nfs_server() { fi } +# Wait for the SSH daemon inside omnia_core to start accepting connections +# on port 2222. After a container swap (upgrade / rollback) the container +# process is "Up" but sshd may still be initialising. Without this wait, +# ssh-keyscan and the subsequent `ssh omnia_core` fail with +# "Connection refused". +wait_for_ssh_ready() { + local ssh_port=2222 + local max_wait=30 + local waited=0 + + echo -n "[INFO] Waiting for SSH daemon inside omnia_core to be ready" + while [ $waited -lt $max_wait ]; do + # Use ssh-keyscan as a lightweight probe — it exits 0 when it + # receives at least one host key line. + if ssh-keyscan -p "$ssh_port" localhost 2>/dev/null | grep -q .; then + echo " ready (${waited}s)" + return 0 + fi + sleep 1 + waited=$((waited + 1)) + echo -n "." + done + + echo "" + echo "[WARN] SSH daemon did not become ready within ${max_wait}s." + echo "[WARN] You can connect manually later with: ssh omnia_core" + return 1 +} + init_ssh_config() { local ssh_port=2222 @@ -2280,6 +2309,8 @@ upgrade_omnia_core() { echo "" show_post_upgrade_instructions "$TARGET_OMNIA_VERSION" + # Wait for sshd inside the new container before configuring keys / connecting + wait_for_ssh_ready # Initialize SSH config and start container session init_ssh_config remove_container_omnia_sh @@ -2648,6 +2679,8 @@ rollback_omnia_core() { # Fetch config from restored metadata (populates omnia_path, domain_name, etc.) fetch_config + # Wait for sshd inside the restarted container before configuring keys / connecting + wait_for_ssh_ready # Initialize SSH config and start container session init_ssh_config remove_container_omnia_sh @@ -2793,6 +2826,8 @@ rollback_omnia_core() { # Fetch config from restored metadata (populates omnia_path, domain_name, etc.) fetch_config + # Wait for sshd inside the restored container before configuring keys / connecting + wait_for_ssh_ready # Initialize SSH config and start container session init_ssh_config remove_container_omnia_sh diff --git a/rollback/roles/rollback_openchami/tasks/renew_certificates.yml b/rollback/roles/rollback_openchami/tasks/renew_certificates.yml index 71dc704047..884c0da2b9 100644 --- a/rollback/roles/rollback_openchami/tasks/renew_certificates.yml +++ b/rollback/roles/rollback_openchami/tasks/renew_certificates.yml @@ -36,7 +36,11 @@ - name: Renew OpenCHAMI certificates ansible.builtin.command: openchami-certificate-update update {{ rollback_cluster_fqdn }} become: true - changed_when: true + register: cert_renew_result + changed_when: cert_renew_result.rc == 0 + retries: "{{ max_retries }}" + delay: "{{ max_delay }}" + until: cert_renew_result.rc == 0 delegate_to: oim delegate_facts: true connection: ssh diff --git a/rollback/roles/rollback_openchami/tasks/restart_after_db_restore.yml b/rollback/roles/rollback_openchami/tasks/restart_after_db_restore.yml deleted file mode 100644 index 4df53e0f49..0000000000 --- a/rollback/roles/rollback_openchami/tasks/restart_after_db_restore.yml +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -# ============================================================================ -# restart_after_db_restore.yml — Restart Services After Database Restore -# ============================================================================ -# After the pg_dump restore (which DROPs and recreates schema objects), -# SMD and BSS need to be restarted so they reconnect to the database with -# the restored v2.1 schema. Without this restart, SMD may hold stale -# connections or cached schema state from the v2.2 database. -# -# Idempotency: systemd restart is idempotent. -# ============================================================================ - -- name: Restart services after database restore - block: - - name: Restart openchami.target to reconnect services to restored database - ansible.builtin.systemd: - name: openchami.target - state: restarted - daemon_reload: true - delegate_to: oim - delegate_facts: true - connection: ssh - - - name: Wait for services to stabilize after restart - ansible.builtin.pause: - seconds: "{{ wait_time }}" - - # ── Recover any failed services ───────────────────────────────────── - - name: Reset any failed services after database restore restart - ansible.builtin.command: systemctl reset-failed - changed_when: true - failed_when: false - delegate_to: oim - delegate_facts: true - connection: ssh - - - name: Restart openchami.target to recover any failed services - ansible.builtin.systemd: - name: openchami.target - state: restarted - delegate_to: oim - delegate_facts: true - connection: ssh - failed_when: false - - - name: Wait for recovered services to stabilize - ansible.builtin.pause: - seconds: "{{ wait_time }}" - - - name: Display restart status - ansible.builtin.debug: - verbosity: 1 - msg: "Services restarted after database restore. SMD/BSS will re-initialize with v2.1 schema." diff --git a/upgrade/roles/upgrade_openchami/tasks/main.yml b/upgrade/roles/upgrade_openchami/tasks/main.yml index d2106ad5ea..be5da8350c 100644 --- a/upgrade/roles/upgrade_openchami/tasks/main.yml +++ b/upgrade/roles/upgrade_openchami/tasks/main.yml @@ -43,12 +43,6 @@ - name: Renew OpenCHAMI certificates ansible.builtin.include_tasks: renew_certificates.yml - # Re-resolve HAProxy-based endpoints after cert renewal restart. - # HAProxy port 8443 is stable, but re-reading the ochami config - # ensures we pick up any config changes from cert renewal. - - name: Resolve SMD/BSS endpoints after certificate renewal - ansible.builtin.include_tasks: resolve_admin_ip.yml - - name: Reload cloud-init data into cloud-init-server ansible.builtin.include_tasks: reload_cloud_init_data.yml diff --git a/upgrade/roles/upgrade_openchami/tasks/renew_certificates.yml b/upgrade/roles/upgrade_openchami/tasks/renew_certificates.yml index f3fbebb22a..0cff2011b0 100644 --- a/upgrade/roles/upgrade_openchami/tasks/renew_certificates.yml +++ b/upgrade/roles/upgrade_openchami/tasks/renew_certificates.yml @@ -48,7 +48,11 @@ - name: Renew OpenCHAMI certificates ansible.builtin.command: openchami-certificate-update update {{ upgrade_cluster_fqdn }} become: true - changed_when: true + register: cert_renew_result + changed_when: cert_renew_result.rc == 0 + retries: "{{ max_retries }}" + delay: "{{ max_delay }}" + until: cert_renew_result.rc == 0 delegate_to: oim delegate_facts: true connection: ssh From 99a070e19e370a2e63a0f95dbd66e0e6c3441e72 Mon Sep 17 00:00:00 2001 From: Jagadeesh N V Date: Thu, 4 Jun 2026 19:36:44 +0530 Subject: [PATCH 15/78] Oim_metadata stale variable from provision, hence renaming Signed-off-by: Jagadeesh N V --- rollback/roles/rollback_slurm/tasks/slurm_backup.yml | 9 ++------- upgrade/roles/upgrade_slurm/tasks/slurm_backup.yml | 9 ++------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/rollback/roles/rollback_slurm/tasks/slurm_backup.yml b/rollback/roles/rollback_slurm/tasks/slurm_backup.yml index 76dd522833..df9e96ed7f 100644 --- a/rollback/roles/rollback_slurm/tasks/slurm_backup.yml +++ b/rollback/roles/rollback_slurm/tasks/slurm_backup.yml @@ -44,11 +44,11 @@ - name: Include oim_metadata.yml ansible.builtin.include_vars: file: "{{ oim_metadata_path }}" - name: oim_metadata + name: _slurm_oim_metadata - name: Set oim_shared_path fact ansible.builtin.set_fact: - oim_shared_path: "{{ oim_metadata.oim_shared_path | trim }}" + oim_shared_path: "{{ _slurm_oim_metadata.oim_shared_path | trim }}" slurm_nfs_mounted_path: "{{ slurm_nfs[0].client_share_path }}/slurm" - name: Read pxe_mapping_file from backup directory @@ -75,11 +75,6 @@ ) }} -- name: Debug backup_functional_group_hosts - ansible.builtin.debug: - var: backup_functional_group_hosts - # debugger: always - - name: Check for slurm_control_node_x86_64 host dir in nfs ansible.builtin.stat: path: "{{ slurm_nfs_mounted_path }}/{{ backup_functional_group_hosts['slurm_control_node_x86_64'] | first }}" diff --git a/upgrade/roles/upgrade_slurm/tasks/slurm_backup.yml b/upgrade/roles/upgrade_slurm/tasks/slurm_backup.yml index 5fd7141039..27bb58d9c8 100644 --- a/upgrade/roles/upgrade_slurm/tasks/slurm_backup.yml +++ b/upgrade/roles/upgrade_slurm/tasks/slurm_backup.yml @@ -49,11 +49,11 @@ - name: Include oim_metadata.yml ansible.builtin.include_vars: file: "{{ oim_metadata_path }}" - name: oim_metadata + name: _slurm_oim_metadata - name: Set oim_shared_path fact ansible.builtin.set_fact: - oim_shared_path: "{{ oim_metadata.oim_shared_path | trim }}" + oim_shared_path: "{{ _slurm_oim_metadata.oim_shared_path | trim }}" slurm_nfs_mounted_path: "{{ slurm_nfs[0].mount_point }}/slurm" - name: Read pxe_mapping_file from backup directory @@ -80,11 +80,6 @@ ) }} -- name: Debug backup_functional_group_hosts - ansible.builtin.debug: - var: backup_functional_group_hosts - # debugger: always - - name: Check for slurm_control_node_x86_64 host dir in nfs ansible.builtin.stat: path: "{{ slurm_nfs_mounted_path }}/{{ backup_functional_group_hosts['slurm_control_node_x86_64'] | first }}" From d66d4fb4194e598c1e47c844785b16af31bd3bdb Mon Sep 17 00:00:00 2001 From: Jagadeesh N V Date: Thu, 4 Jun 2026 20:03:08 +0530 Subject: [PATCH 16/78] Oim cleanup slurm entry gate Signed-off-by: Jagadeesh N V --- .../oim_cleanup/oim_container_cleanup/tasks/main.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/utils/roles/oim_cleanup/oim_container_cleanup/tasks/main.yml b/utils/roles/oim_cleanup/oim_container_cleanup/tasks/main.yml index 22a8ced6a1..a5b3f52a83 100644 --- a/utils/roles/oim_cleanup/oim_container_cleanup/tasks/main.yml +++ b/utils/roles/oim_cleanup/oim_container_cleanup/tasks/main.yml @@ -22,6 +22,15 @@ ansible.builtin.include_vars: "{{ hostvars['localhost']['input_project_dir'] }}/storage_config.yml" tags: always +- name: Load software_config.json as software_config + ansible.builtin.include_vars: + file: "{{ hostvars['localhost']['input_project_dir'] }}/software_config.json" + name: cleanup_software_config + +- name: Check if slurm support is enabled + ansible.builtin.set_fact: + slurm_support: "{{ (cleanup_software_config.softwares | selectattr('name', 'in', ['slurm_custom']) | list | length) > 0 }}" + - name: Ensure firewalld is installed and running ansible.builtin.dnf: name: firewalld @@ -58,7 +67,8 @@ tags: postgres - name: Cleanup slurm configuration - ansible.builtin.import_tasks: cleanup_slurm.yml + ansible.builtin.include_tasks: cleanup_slurm.yml + when: slurm_support | default(false) | bool - name: Cleanup k8s configuration ansible.builtin.import_tasks: cleanup_k8s.yml From 9f6c844f1ab364c0bbc8e86eeb5e68c711afaed5 Mon Sep 17 00:00:00 2001 From: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> Date: Thu, 4 Jun 2026 22:05:56 +0530 Subject: [PATCH 17/78] Kubernetes Upgrade/Rollback Improvements and Kubelet Configuration Consistency (#4681) * added banner changes and validation of pxemapping file for software presence and cluster initialized file in cloud-init-log Signed-off-by: Katakam-Rakesh * lint fix Signed-off-by: Katakam-Rakesh * Add kubelet feature gate in cloud-init templates Signed-off-by: Katakam-Rakesh * Fix worker kubelet feature gate update and post-validation retry logic Signed-off-by: Katakam-Rakesh * Add archive upgrade artifacts to rollback workflow Signed-off-by: Katakam-Rakesh * Add kubelet restart on subsequent boots in cloud-init Signed-off-by: Katakam-Rakesh * Add kubelet feature gate removal stage to rollback Signed-off-by: Katakam-Rakesh * Add telemetry directory to rollback archive Signed-off-by: Katakam-Rakesh * Add kubelet restart to clear stale pod state after rollback Signed-off-by: Katakam-Rakesh * Move rollback summary banner to separate play for display order Signed-off-by: Katakam-Rakesh * Add retry logic to etcd health check during upgrade Signed-off-by: Katakam-Rakesh * Improve upgrade failure message with recommended steps Signed-off-by: Katakam-Rakesh * Add systemd daemon reload after removing kubelet feature gates Signed-off-by: Katakam-Rakesh * Add rollback log patterns to archive cleanup Signed-off-by: Katakam-Rakesh * Add kubelet config.yaml updates during upgrade to match cloud-init Signed-off-by: Katakam-Rakesh * Add kubelet config updates during rollback and fix upgrade to include first CP - Add rollback Stage 4c to update kubelet config.yaml on all nodes - Fix upgrade to include first CP in node status/sync settings - Match cloud-init behavior for all control planes Signed-off-by: Katakam-Rakesh * Fix ansible-lint violations Signed-off-by: Katakam-Rakesh --------- Signed-off-by: Katakam-Rakesh --- ...ce_kube_control_plane_first_x86_64.yaml.j2 | 12 + ...-service_kube_control_plane_x86_64.yaml.j2 | 11 + .../ci-group-service_kube_node_x86_64.yaml.j2 | 14 +- .../playbooks/archive_upgrade_artifacts.yml | 329 ++++++++++++++++++ rollback/roles/rollback_k8s/tasks/main.yml | 10 + .../tasks/remove_kubelet_feature_gates.yml | 79 +++++ .../tasks/restart_network_pods.yml | 19 +- .../tasks/update_kubelet_config.yml | 152 ++++++++ rollback/rollback.yml | 93 +++-- upgrade/playbooks/upgrade_k8s.yml | 162 ++++++++- upgrade/playbooks/upgrade_worker_first.yml | 24 +- upgrade/playbooks/upgrade_workers.yml | 24 +- upgrade/roles/upgrade_k8s/tasks/main.yml | 11 +- .../upgrade_k8s/tasks/post_validation.yml | 14 +- .../tasks/step_etcd_health_check.yml | 7 +- .../tasks/step_kubelet_restart.yml | 16 +- .../upgrade_k8s/tasks/step_upgrade_apply.yml | 3 + .../upgrade_k8s/tasks/step_upgrade_node.yml | 3 + .../tasks/update_kcm_arguments.yml | 146 ++++++++ .../tasks/update_kubelet_config.yml | 116 ++++++ .../tasks/update_kubelet_feature_gates.yml | 72 ++++ 21 files changed, 1217 insertions(+), 100 deletions(-) create mode 100644 rollback/playbooks/archive_upgrade_artifacts.yml create mode 100644 rollback/roles/rollback_k8s/tasks/remove_kubelet_feature_gates.yml create mode 100644 rollback/roles/rollback_k8s/tasks/update_kubelet_config.yml create mode 100644 upgrade/roles/upgrade_k8s/tasks/update_kcm_arguments.yml create mode 100644 upgrade/roles/upgrade_k8s/tasks/update_kubelet_config.yml create mode 100644 upgrade/roles/upgrade_k8s/tasks/update_kubelet_feature_gates.yml diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_first_x86_64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_first_x86_64.yaml.j2 index d86341fbd2..811c2d4e1f 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_first_x86_64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_first_x86_64.yaml.j2 @@ -620,6 +620,11 @@ echo "Updating the kubelet arguments." sed -i 's/^shutdownGracePeriod:.*/shutdownGracePeriod: 30s/' /var/lib/kubelet/config.yaml sed -i 's/^shutdownGracePeriodCriticalPods:.*/shutdownGracePeriodCriticalPods: 10s/' /var/lib/kubelet/config.yaml + + # Enable old Kubernetes behavior: reset pod status on kubelet restart + echo "Adding kubelet feature gate: ChangeContainerStatusOnKubeletRestart=true" + echo 'KUBELET_EXTRA_ARGS="--feature-gates=ChangeContainerStatusOnKubeletRestart=true"' > /etc/sysconfig/kubelet + systemctl daemon-reload systemctl restart kubelet @@ -869,6 +874,13 @@ PYEOF # SUBSEQUENT BOOT - SKIP INIT echo "service_kube_control_plane_first_x86_64 is already part of cluster." echo "Cluster already initialized. Performing node reboot procedures." + + # Reapply kubelet feature gate (ephemeral overlay filesystem) + echo "Reapplying kubelet feature gate: ChangeContainerStatusOnKubeletRestart=true" + echo 'KUBELET_EXTRA_ARGS="--feature-gates=ChangeContainerStatusOnKubeletRestart=true"' > /etc/sysconfig/kubelet + systemctl daemon-reload + systemctl restart kubelet + # CRI and kubelet already enabled above # You can log health status etc if you wish: {% if etcd_on_local_disk | default(false) %} diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_x86_64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_x86_64.yaml.j2 index 37ef9f69d2..782c7970b9 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_x86_64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_x86_64.yaml.j2 @@ -528,6 +528,10 @@ grep -q "^nodeStatusReportFrequency:" $CONFIG_FILE || echo "nodeStatusReportFrequency: 60s" >> $CONFIG_FILE grep -q "^syncFrequency:" $CONFIG_FILE || echo "syncFrequency: 60s" >> $CONFIG_FILE + # Enable old Kubernetes behavior: reset pod status on kubelet restart + echo "Adding kubelet feature gate: ChangeContainerStatusOnKubeletRestart=true" + echo 'KUBELET_EXTRA_ARGS="--feature-gates=ChangeContainerStatusOnKubeletRestart=true"' > /etc/sysconfig/kubelet + systemctl daemon-reload systemctl restart kubelet @@ -541,6 +545,13 @@ # SUBSEQUENT BOOT - SKIP JOIN echo "service_kube_control_plane_x86_64 is already part of cluster." echo "Cluster already initialized. Performing node reboot procedures." + + # Reapply kubelet feature gate (ephemeral overlay filesystem) + echo "Reapplying kubelet feature gate: ChangeContainerStatusOnKubeletRestart=true" + echo 'KUBELET_EXTRA_ARGS="--feature-gates=ChangeContainerStatusOnKubeletRestart=true"' > /etc/sysconfig/kubelet + systemctl daemon-reload + systemctl restart kubelet + # CRI and kubelet already enabled above # You can log health status etc if you wish: {% if etcd_on_local_disk | default(false) %} diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_node_x86_64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_node_x86_64.yaml.j2 index 843be05a37..705b33f951 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_node_x86_64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_node_x86_64.yaml.j2 @@ -335,7 +335,12 @@ grep -q "^nodeStatusReportFrequency:" $CONFIG_FILE || echo "nodeStatusReportFrequency: 10s" >> $CONFIG_FILE grep -q "^syncFrequency:" $CONFIG_FILE || echo "syncFrequency: 10s" >> $CONFIG_FILE - # Restart kubelet to apply changes + # Enable old Kubernetes behavior: reset pod status on kubelet restart + echo "Adding kubelet feature gate: ChangeContainerStatusOnKubeletRestart=true" + echo 'KUBELET_EXTRA_ARGS="--feature-gates=ChangeContainerStatusOnKubeletRestart=true"' > /etc/sysconfig/kubelet + + # Reload systemd and restart kubelet to apply changes + systemctl daemon-reload systemctl restart kubelet # Mark initialization complete so all of above is skipped on reboot! touch "$MARKER" @@ -345,6 +350,13 @@ # SUBSEQUENT BOOT - SKIP JOIN echo "service_kube_node_x86_64 is already part of cluster." echo "Cluster already initialized. Performing node reboot procedures." + + # Reapply kubelet feature gate (ephemeral overlay filesystem) + echo "Reapplying kubelet feature gate: ChangeContainerStatusOnKubeletRestart=true" + echo 'KUBELET_EXTRA_ARGS="--feature-gates=ChangeContainerStatusOnKubeletRestart=true"' > /etc/sysconfig/kubelet + systemctl daemon-reload + systemctl restart kubelet + # CRI and kubelet already enabled above systemctl status kubelet echo "Cloud-Init finished successfully after the reboot." diff --git a/rollback/playbooks/archive_upgrade_artifacts.yml b/rollback/playbooks/archive_upgrade_artifacts.yml new file mode 100644 index 0000000000..d5a6d5916f --- /dev/null +++ b/rollback/playbooks/archive_upgrade_artifacts.yml @@ -0,0 +1,329 @@ +# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +# ============================================================================ +# archive_upgrade_artifacts.yml — Post-Rollback NFS Artifact Archival +# ============================================================================ +# +# Called after a successful rollback to archive all upgrade/rollback artifacts +# into a timestamped directory on the NFS share. This ensures: +# - A clean slate for the next upgrade run +# - All evidence preserved for root cause analysis +# +# Artifacts archived: +# - upgrade_status.yml, rollback_status.yml (per-node step tracking) +# - backup/ directory (etcd snapshot, K8s configs, addon backups) +# - telemetry/ directory (telemetry backups) +# - Upgrade execution logs from OIM container /tmp/ +# +# Artifacts cleaned (not archived): +# - Lock files on NFS +# - Repo files on all K8s nodes (/etc/yum.repos.d/omnia-upgrade.repo) +# - Temp files from OIM container /tmp/ +# +# Prerequisites: +# - k8s-telemetry rollback must be completed +# - rollback_manifest must be available in hostvars +# ============================================================================ + +# ────────────────────────────────────────────────────────────────────── +# Play 1: Resolve NFS path and stage OIM container logs for transfer +# ────────────────────────────────────────────────────────────────────── +- name: "Archive — Resolve NFS path and stage OIM logs" + hosts: localhost + connection: local + gather_facts: false + vars: + rollback_manifest_path: /opt/omnia/.data/rollback_manifest.yml + input_project_dir: "{{ hostvars['localhost']['input_project_dir'] | default('/opt/omnia/input/project_default') }}" + nfs_storage_name: "nfs_k8s" + tasks: + - name: Read rollback_manifest.yml + ansible.builtin.slurp: + src: "{{ rollback_manifest_path }}" + register: _archive_raw_manifest + failed_when: false + + - name: Parse rollback manifest + ansible.builtin.set_fact: + _archive_rollback_manifest: "{{ _archive_raw_manifest.content | b64decode | from_yaml }}" + when: _archive_raw_manifest.content is defined + + # ── Check if k8s-telemetry was completed ── + - name: Check k8s-telemetry rollback status + ansible.builtin.set_fact: + _k8s_rollback_completed: >- + {{ (_archive_rollback_manifest is defined) and + (_archive_rollback_manifest.component_status is defined) and + (_archive_rollback_manifest.component_status['k8s-telemetry'] | default('pending') == 'completed') }} + + - name: Skip archival if k8s-telemetry was not rolled back + when: not (_k8s_rollback_completed | bool) + block: + - name: Set skip flag + ansible.builtin.set_fact: + _archive_skip: true + - name: End play (k8s-telemetry not completed) + ansible.builtin.meta: end_play + + # ── Resolve NFS mount path ── + - name: Load storage_config.yml for NFS path resolution + ansible.builtin.include_vars: + file: "{{ input_project_dir }}/storage_config.yml" + name: _archive_storage_config + + - name: Load high_availability_config.yml for kube_vip + ansible.builtin.include_vars: + file: "{{ input_project_dir }}/high_availability_config.yml" + name: _archive_ha_config + + - name: Set NFS mount path and kube_vip + ansible.builtin.set_fact: + _archive_mount_path: >- + {{ (_archive_storage_config.mounts + | selectattr('name', 'equalto', nfs_storage_name) + | first).mount_point }} + _archive_kube_vip: "{{ _archive_ha_config.service_k8s_cluster_ha[0].virtual_ip_address }}" + _archive_timestamp: "{{ lookup('pipe', 'date +%Y%m%d_%H%M%S') }}" + _archive_rollback_id: "{{ _archive_rollback_manifest.rollback_id | default('unknown') }}" + + - name: Build archive directory path + ansible.builtin.set_fact: + _nfs_archive_dir: >- + {{ _archive_mount_path }}/upgrade/archive/{{ _archive_rollback_id }}_{{ _archive_timestamp }} + + # ── Add kube_vip to inventory for delegation ── + - name: Add kube_vip to inventory + ansible.builtin.add_host: + name: "{{ _archive_kube_vip }}" + ansible_host: "{{ _archive_kube_vip }}" + ansible_connection: ssh + ansible_user: root + groups: archive_target + + # ── Stage OIM logs into a temp directory for transfer ── + - name: Create temp staging directory + ansible.builtin.file: + path: /tmp/_archive_staging/logs + state: directory + mode: '0755' + + - name: Find upgrade log files in /tmp + ansible.builtin.find: + paths: /tmp + patterns: + - "upgrade_*.log" + - "reboot_*.log" + - "update_bss_*.log" + - "rollback_bss_*.log" + - "rollback_*.log" + - "k8s_upgrade_inventory.ini" + - "k8s_vars.yml" + register: _tmp_log_files + + - name: Copy log files to staging + ansible.builtin.copy: + src: "{{ item.path }}" + dest: "/tmp/_archive_staging/logs/{{ item.path | basename }}" + remote_src: true + mode: '0644' + loop: "{{ _tmp_log_files.files }}" + loop_control: + label: "{{ item.path | basename }}" + +# ────────────────────────────────────────────────────────────────────── +# Play 2: Archive artifacts on NFS (runs on cluster node via kube_vip) +# ────────────────────────────────────────────────────────────────────── +- name: "Archive — Move upgrade artifacts to NFS archive" + hosts: archive_target + gather_facts: false + vars: + archive_mount_path: "{{ hostvars['localhost']['_archive_mount_path'] | default('') }}" + nfs_archive_dir: "{{ hostvars['localhost']['_nfs_archive_dir'] | default('') }}" + tasks: + - name: Skip if archival was not triggered + ansible.builtin.meta: end_play + when: hostvars['localhost']['_archive_skip'] | default(false) | bool + + - name: Verify SSH connectivity to cluster node + ansible.builtin.wait_for_connection: + delay: 2 + timeout: 30 + + # ── Create archive directory structure ── + - name: Create archive directory on NFS + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: '0755' + loop: + - "{{ nfs_archive_dir }}" + - "{{ nfs_archive_dir }}/logs" + + # ── Move NFS artifacts into archive ── + - name: Check for upgrade_status.yml on NFS + ansible.builtin.stat: + path: "{{ archive_mount_path }}/upgrade/upgrade_status.yml" + register: _nfs_upgrade_status + + - name: Archive upgrade_status.yml + ansible.builtin.command: + cmd: "mv {{ archive_mount_path }}/upgrade/upgrade_status.yml {{ nfs_archive_dir }}/upgrade_status.yml" + when: _nfs_upgrade_status.stat.exists + changed_when: true + + - name: Check for rollback_status.yml on NFS + ansible.builtin.stat: + path: "{{ archive_mount_path }}/upgrade/rollback_status.yml" + register: _nfs_rollback_status + + - name: Archive rollback_status.yml + ansible.builtin.command: + cmd: "mv {{ archive_mount_path }}/upgrade/rollback_status.yml {{ nfs_archive_dir }}/rollback_status.yml" + when: _nfs_rollback_status.stat.exists + changed_when: true + + - name: Check for backup directory on NFS + ansible.builtin.stat: + path: "{{ archive_mount_path }}/upgrade/backup" + register: _nfs_backup_dir + + - name: Archive backup directory + ansible.builtin.command: + cmd: "mv {{ archive_mount_path }}/upgrade/backup {{ nfs_archive_dir }}/backup" + when: _nfs_backup_dir.stat.exists + changed_when: true + + - name: Check for telemetry directory on NFS + ansible.builtin.stat: + path: "{{ archive_mount_path }}/upgrade/telemetry" + register: _nfs_telemetry_dir + + - name: Archive telemetry directory + ansible.builtin.command: + cmd: "mv {{ archive_mount_path }}/upgrade/telemetry {{ nfs_archive_dir }}/telemetry" + when: _nfs_telemetry_dir.stat.exists + changed_when: true + + # ── Remove stale lock files from NFS ── + - name: Remove lock files from NFS + ansible.builtin.file: + path: "{{ archive_mount_path }}/upgrade/{{ item }}" + state: absent + loop: + - upgrade.lock + - rollback.lock + failed_when: false + + # ── Copy staged logs from OIM container to NFS archive ── + - name: Find staged log files + ansible.builtin.find: + paths: /tmp/_archive_staging/logs + register: _staged_logs + delegate_to: localhost + + - name: Copy staged logs to NFS archive + ansible.builtin.copy: + src: "{{ item.path }}" + dest: "{{ nfs_archive_dir }}/logs/{{ item.path | basename }}" + mode: '0644' + loop: "{{ _staged_logs.files }}" + loop_control: + label: "{{ item.path | basename }}" + when: _staged_logs.files | length > 0 + +# ────────────────────────────────────────────────────────────────────── +# Play 3: Remove repo file from all K8s nodes +# Uses k8s_nodes group added to inventory by rollback_k8s role. +# If k8s-telemetry was skipped (e.g., Slurm-only cluster), k8s_nodes +# group will be empty and this play runs on zero hosts (no-op). +# ────────────────────────────────────────────────────────────────────── +- name: "Archive — Remove repo file from all K8s nodes" + hosts: k8s_nodes + gather_facts: false + ignore_unreachable: true + tasks: + - name: Skip if archival was not triggered + ansible.builtin.meta: end_play + when: hostvars['localhost']['_archive_skip'] | default(false) | bool + + - name: Remove omnia-upgrade.repo from K8s nodes + ansible.builtin.file: + path: /etc/yum.repos.d/omnia-upgrade.repo + state: absent + failed_when: false + +# ────────────────────────────────────────────────────────────────────── +# Play 4: Clean up OIM container temp files +# ────────────────────────────────────────────────────────────────────── +- name: "Archive — Clean up OIM temp files" + hosts: localhost + connection: local + gather_facts: false + tasks: + - name: Skip if archival was not triggered + ansible.builtin.meta: end_play + when: _archive_skip | default(false) | bool + + - name: Remove staged files + ansible.builtin.file: + path: /tmp/_archive_staging + state: absent + + - name: Remove upgrade log files from /tmp + ansible.builtin.file: + path: "{{ item.path }}" + state: absent + loop: "{{ _tmp_log_files.files | default([]) }}" + loop_control: + label: "{{ item.path | basename }}" + + - name: Remove remaining temp files + ansible.builtin.file: + path: "{{ item }}" + state: absent + loop: + - /tmp/upgrade_status.tmp + - /tmp/upgrade_status.tmp.json + - /tmp/upgrade_status_temp.yml + - /tmp/rollback_status.tmp.json + - /tmp/rollback_status_temp.yml + - /tmp/k8s_upgrade_inventory.ini + - /tmp/k8s_vars.yml + failed_when: false + + - name: Display NFS archive summary + ansible.builtin.debug: + msg: + - "═══════════════════════════════════════════════════════════" + - " UPGRADE ARTIFACTS ARCHIVED ON NFS" + - "═══════════════════════════════════════════════════════════" + - "" + - " Archive location:" + - " {{ hostvars['localhost']['_nfs_archive_dir'] }}" + - "" + - " Contents:" + - " - upgrade_status.yml (per-node upgrade step tracking)" + - " - rollback_status.yml (per-node rollback step tracking)" + - " - backup/ (etcd snapshot, K8s configs, addon backups)" + - " - telemetry/ (telemetry backups)" + - " - logs/ (upgrade execution logs from OIM)" + - "" + - " Cleaned:" + - " - Lock files on NFS" + - " - omnia-upgrade.repo from all K8s nodes" + - " - Temp files from OIM /tmp/" + - "" + - " Next upgrade will start with a clean slate." + - "═══════════════════════════════════════════════════════════" diff --git a/rollback/roles/rollback_k8s/tasks/main.yml b/rollback/roles/rollback_k8s/tasks/main.yml index 821137d122..a5bdae2e88 100644 --- a/rollback/roles/rollback_k8s/tasks/main.yml +++ b/rollback/roles/rollback_k8s/tasks/main.yml @@ -28,6 +28,8 @@ # 2b. Clean up stale MetalLB IPs on all nodes # 3. Restore etcd snapshot on all CPs # 4. Restore /etc/kubernetes/ configs on all CPs +# 4b. Remove kubelet feature gates (for rollback to older versions) +# 4c. Update kubelet config.yaml parameters (match cloud-init settings) # 5. Downgrade packages on all CPs + start kubelet # 6. Fix kube-vip split-brain # 7. Downgrade packages on workers + start kubelet @@ -168,6 +170,14 @@ - name: "Stage 4 — Restore K8s configs" ansible.builtin.include_tasks: restore_k8s_configs.yml + # ── Stage 4b: Remove kubelet feature gates ──────────────────── + - name: "Stage 4b — Remove kubelet feature gates" + ansible.builtin.include_tasks: remove_kubelet_feature_gates.yml + + # ── Stage 4c: Update kubelet config.yaml parameters ───────── + - name: "Stage 4c — Update kubelet config parameters" + ansible.builtin.include_tasks: update_kubelet_config.yml + # ── Stage 5: Downgrade packages on CPs and start kubelet ────── - name: "Stage 5 — Downgrade and start control planes" ansible.builtin.include_tasks: downgrade_cp_packages.yml diff --git a/rollback/roles/rollback_k8s/tasks/remove_kubelet_feature_gates.yml b/rollback/roles/rollback_k8s/tasks/remove_kubelet_feature_gates.yml new file mode 100644 index 0000000000..bd1d480844 --- /dev/null +++ b/rollback/roles/rollback_k8s/tasks/remove_kubelet_feature_gates.yml @@ -0,0 +1,79 @@ +# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +# ── Remove kubelet feature gates added during upgrade ─────────────── +# Removes ChangeContainerStatusOnKubeletRestart from: +# 1. /etc/sysconfig/kubelet (command-line argument) +# This is necessary when rolling back from 1.35.1 to 1.34.1 because +# the feature gate is not recognized in 1.34.1. + +- name: Skip remove_feature_gates if already completed + ansible.builtin.debug: + msg: "remove_feature_gates already completed — skipping." + when: (rollback_status.stages.remove_feature_gates.status | default('pending')) == 'completed' + +- name: Remove kubelet feature gates + when: (rollback_status.stages.remove_feature_gates.status | default('pending')) != 'completed' + block: + - name: Mark remove_feature_gates in_progress + ansible.builtin.include_tasks: + file: update_status.yml + vars: + status_update: + stages: + remove_feature_gates: + status: in_progress + timestamp: "{{ now(utc=true).strftime('%Y-%m-%dT%H:%M:%SZ') }}" + + - name: Remove feature gate from /etc/sysconfig/kubelet on all nodes + ansible.builtin.lineinfile: + path: /etc/sysconfig/kubelet + regexp: '.*ChangeContainerStatusOnKubeletRestart.*' + state: absent + backup: true + delegate_to: "{{ item }}" + register: _kubelet_config_remove + loop: "{{ all_rollback_nodes }}" + loop_control: + label: "{{ item }}" + + - name: Report feature gate removal from /etc/sysconfig/kubelet + ansible.builtin.debug: + msg: "Removed ChangeContainerStatusOnKubeletRestart from /etc/sysconfig/kubelet on {{ item.item }}" + loop: "{{ _kubelet_config_remove.results }}" + loop_control: + label: "{{ item.item }}" + when: item.changed # noqa: no-handler + + - name: Reload systemd daemon on all nodes + ansible.builtin.systemd_service: + daemon_reload: true + delegate_to: "{{ item }}" + loop: "{{ all_rollback_nodes }}" + loop_control: + label: "{{ item }}" + + - name: Mark remove_feature_gates completed + ansible.builtin.include_tasks: + file: update_status.yml + vars: + status_update: + stages: + remove_feature_gates: + status: completed + timestamp: "{{ now(utc=true).strftime('%Y-%m-%dT%H:%M:%SZ') }}" + + - name: Display remove_feature_gates completion + ansible.builtin.debug: + msg: "Kubelet feature gates removed on all {{ all_rollback_nodes | length }} nodes." diff --git a/rollback/roles/rollback_k8s/tasks/restart_network_pods.yml b/rollback/roles/rollback_k8s/tasks/restart_network_pods.yml index a435534543..5073a9da2d 100644 --- a/rollback/roles/rollback_k8s/tasks/restart_network_pods.yml +++ b/rollback/roles/rollback_k8s/tasks/restart_network_pods.yml @@ -91,10 +91,27 @@ changed_when: true failed_when: false + # ── Restart kubelet on all nodes to clear stale pod state ────── + # After force-deleting calico-node pods, the kubelet may retain + # stale references to the old pods (projected volume mounts, + # service account tokens). This prevents new replacement pods + # from starting — they stay Pending indefinitely. Restarting + # kubelet clears the stale pod cache and allows the DaemonSet + # controller's new pods to be started cleanly. + - name: Restart kubelet on all nodes to clear stale pod state + ansible.builtin.systemd_service: + name: kubelet + state: restarted + delegate_to: "{{ item }}" + changed_when: true + loop: "{{ all_rollback_nodes }}" + loop_control: + label: "{{ item }}" + # ── Brief wait for DaemonSet controller to recreate pods ─────── - name: Wait for DaemonSet controller to schedule new pods ansible.builtin.pause: - seconds: 15 + seconds: 30 # ── Delete any non-Running kube-system pods to unblock the poll ── # This runs BEFORE the strict poll so stuck/Pending pods from diff --git a/rollback/roles/rollback_k8s/tasks/update_kubelet_config.yml b/rollback/roles/rollback_k8s/tasks/update_kubelet_config.yml new file mode 100644 index 0000000000..f26d57b916 --- /dev/null +++ b/rollback/roles/rollback_k8s/tasks/update_kubelet_config.yml @@ -0,0 +1,152 @@ +# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +# ============================================================================ +# update_kubelet_config.yml — Update kubelet config.yaml after rollback +# ============================================================================ +# Applies kubelet configuration parameters to /var/lib/kubelet/config.yaml +# on all nodes after rollback to match cloud-init provisioned settings. +# +# /var/lib/kubelet/config.yaml is NOT backed up during upgrade, so after +# rollback it may be missing these performance tunings. This task ensures +# consistency between provisioned and rolled-back nodes. +# +# Parameters applied: +# All nodes: +# - shutdownGracePeriod: 30s +# - shutdownGracePeriodCriticalPods: 10s +# All control planes (first + additional): +# - nodeStatusUpdateFrequency: 10s +# - nodeStatusReportFrequency: 60s +# - syncFrequency: 60s +# Workers: +# - nodeStatusUpdateFrequency: 5s +# - nodeStatusReportFrequency: 10s +# - syncFrequency: 10s +# ============================================================================ + +- name: Skip update_kubelet_config if already completed + ansible.builtin.debug: + msg: "update_kubelet_config already completed — skipping." + when: (rollback_status.stages.update_kubelet_config.status | default('pending')) == 'completed' + +- name: Update kubelet config on all nodes + when: (rollback_status.stages.update_kubelet_config.status | default('pending')) != 'completed' + block: + - name: Mark update_kubelet_config in_progress + ansible.builtin.include_tasks: + file: update_status.yml + vars: + status_update: + stages: + update_kubelet_config: + status: in_progress + timestamp: "{{ now(utc=true).strftime('%Y-%m-%dT%H:%M:%SZ') }}" + + # ── Shutdown grace period settings (all nodes) ────────────────── + - name: Update shutdownGracePeriod on all nodes + ansible.builtin.lineinfile: + path: /var/lib/kubelet/config.yaml + regexp: '^shutdownGracePeriod:' + line: 'shutdownGracePeriod: 30s' + delegate_to: "{{ item }}" + loop: "{{ all_rollback_nodes }}" + loop_control: + label: "{{ item }}" + + - name: Update shutdownGracePeriodCriticalPods on all nodes + ansible.builtin.lineinfile: + path: /var/lib/kubelet/config.yaml + regexp: '^shutdownGracePeriodCriticalPods:' + line: 'shutdownGracePeriodCriticalPods: 10s' + delegate_to: "{{ item }}" + loop: "{{ all_rollback_nodes }}" + loop_control: + label: "{{ item }}" + + # ── All control plane settings (first + additional) ───────────── + - name: Set nodeStatusUpdateFrequency on all control planes + ansible.builtin.lineinfile: + path: /var/lib/kubelet/config.yaml + regexp: '^nodeStatusUpdateFrequency:' + line: 'nodeStatusUpdateFrequency: 10s' + delegate_to: "{{ item }}" + loop: "{{ all_cp_nodes }}" + loop_control: + label: "{{ item }}" + + - name: Set nodeStatusReportFrequency on all control planes + ansible.builtin.lineinfile: + path: /var/lib/kubelet/config.yaml + regexp: '^nodeStatusReportFrequency:' + line: 'nodeStatusReportFrequency: 60s' + delegate_to: "{{ item }}" + loop: "{{ all_cp_nodes }}" + loop_control: + label: "{{ item }}" + + - name: Set syncFrequency on all control planes + ansible.builtin.lineinfile: + path: /var/lib/kubelet/config.yaml + regexp: '^syncFrequency:' + line: 'syncFrequency: 60s' + delegate_to: "{{ item }}" + loop: "{{ all_cp_nodes }}" + loop_control: + label: "{{ item }}" + + # ── Worker settings ───────────────────────────────────────────── + - name: Set nodeStatusUpdateFrequency on workers + ansible.builtin.lineinfile: + path: /var/lib/kubelet/config.yaml + regexp: '^nodeStatusUpdateFrequency:' + line: 'nodeStatusUpdateFrequency: 5s' + delegate_to: "{{ item }}" + loop: "{{ groups_worker }}" + loop_control: + label: "{{ item }}" + + - name: Set nodeStatusReportFrequency on workers + ansible.builtin.lineinfile: + path: /var/lib/kubelet/config.yaml + regexp: '^nodeStatusReportFrequency:' + line: 'nodeStatusReportFrequency: 10s' + delegate_to: "{{ item }}" + loop: "{{ groups_worker }}" + loop_control: + label: "{{ item }}" + + - name: Set syncFrequency on workers + ansible.builtin.lineinfile: + path: /var/lib/kubelet/config.yaml + regexp: '^syncFrequency:' + line: 'syncFrequency: 10s' + delegate_to: "{{ item }}" + loop: "{{ groups_worker }}" + loop_control: + label: "{{ item }}" + + - name: Mark update_kubelet_config completed + ansible.builtin.include_tasks: + file: update_status.yml + vars: + status_update: + stages: + update_kubelet_config: + status: completed + timestamp: "{{ now(utc=true).strftime('%Y-%m-%dT%H:%M:%SZ') }}" + + - name: Display update_kubelet_config completion + ansible.builtin.debug: + msg: "Kubelet config.yaml updated on all {{ all_rollback_nodes | length }} nodes." diff --git a/rollback/rollback.yml b/rollback/rollback.yml index ef7da98d57..e280609fcd 100644 --- a/rollback/rollback.yml +++ b/rollback/rollback.yml @@ -426,33 +426,74 @@ state: absent # ───────────────────────────────────────────────────────────────────────── - # FIX 3: Read from cleaned_component_status (real-time accurate values) - # instead of rollback_manifest.component_status (stale/corrupt). - - name: Display rollback summary + # Save rollback summary facts for the final banner play + - name: Save rollback summary facts + ansible.builtin.set_fact: + _rollback_new_status: "{{ new_status }}" + _rollback_id: "{{ rollback_manifest.rollback_id }}" + _rollback_source_version: "{{ rollback_manifest.source_version }}" + _rollback_target_version: "{{ rollback_manifest.target_version }}" + _rollback_component_status: "{{ cleaned_component_status }}" + +# ────────────────────────────────────────────────────────────────────── +# Archive upgrade/rollback artifacts on NFS share. +# Only runs when k8s-telemetry rollback completed successfully. +# Archives status files, backups, and OIM logs into a timestamped +# directory on the NFS share for root cause analysis, then cleans up +# staging artifacts, temp files, and lock files. +# ────────────────────────────────────────────────────────────────────── +- name: Archive upgrade artifacts on NFS + ansible.builtin.import_playbook: playbooks/archive_upgrade_artifacts.yml + tags: always + +# ────────────────────────────────────────────────────────────────────── +# Final rollback summary banner — displayed last so the user sees +# the important "NEXT REQUIRED STEP" message at the end. +# ────────────────────────────────────────────────────────────────────── +- name: Display rollback summary + hosts: localhost + connection: local + gather_facts: false + tags: always + tasks: + - name: Skip banner if finalize did not complete + ansible.builtin.meta: end_play + when: _rollback_new_status is not defined + + - name: Display rollback summary banner ansible.builtin.debug: - msg: - - "=========================================" - - " ROLLBACK {{ new_status | upper }}" - - "=========================================" - - "Rollback ID: {{ rollback_manifest.rollback_id }}" - - "Source: {{ rollback_manifest.source_version }} → Target: {{ rollback_manifest.target_version }}" - - "" - - "Component Status:" - - " slurm: {{ cleaned_component_status.slurm }}" - - " k8s-telemetry: {{ cleaned_component_status['k8s-telemetry'] }}" - - " build_stream: {{ cleaned_component_status.build_stream }}" - - " oim: {{ cleaned_component_status.oim }}" - - "" - - "Upgrade manifest archived; next upgrade run will start fresh." - - "" - - "=========================================" - - " NEXT REQUIRED STEP" - - "=========================================" - - "Exit the omnia_core container and run the following command on the OIM host:" - - "" - - " sudo ./omnia.sh --rollback" - - "" - - "This will rollback the omnia_core container image to the previous version." + msg: >- + {{ + ['=========================================', + ' ROLLBACK ' + (_rollback_new_status | upper), + '=========================================', + 'Rollback ID: ' + _rollback_id, + 'Source: ' + _rollback_source_version + ' → Target: ' + _rollback_target_version, + '', + 'Component Status:', + ' slurm: ' + _rollback_component_status.slurm, + ' k8s-telemetry: ' + _rollback_component_status['k8s-telemetry'], + ' build_stream: ' + _rollback_component_status.build_stream, + ' oim: ' + _rollback_component_status.oim] + + + (['', + 'Upgrade manifest archived; next upgrade run will start fresh.', + '', + '=========================================', + ' NEXT REQUIRED STEP', + '=========================================', + 'Exit the omnia_core container and run the following command on the OIM host:', + '', + ' sudo ./omnia.sh --rollback', + '', + 'This will rollback the omnia_core container image to the previous version.'] + if _rollback_new_status == 'completed' + else + ['', + 'Some components are not yet rolled back.', + 'Run rollback again to complete the remaining components.'] + ) + }} # ────────────────────────────────────────────────────────────────────── # Guaranteed terminal cleanup (C-30) — defense-in-depth against diff --git a/upgrade/playbooks/upgrade_k8s.yml b/upgrade/playbooks/upgrade_k8s.yml index d5cb163dda..4b9427472c 100644 --- a/upgrade/playbooks/upgrade_k8s.yml +++ b/upgrade/playbooks/upgrade_k8s.yml @@ -436,15 +436,133 @@ - name: "Detect upgrade hop chain from cluster" ansible.builtin.include_tasks: "{{ playbook_dir }}/../roles/upgrade_k8s/tasks/detect_hop_chain_from_manifest.yml" + - name: "Set hop_chain_empty flag for other plays" + ansible.builtin.set_fact: + hop_chain_empty: "{{ hop_chain | length == 0 }}" + +- name: "Kubernetes Upgrade - Handle already at target version" + hosts: localhost + connection: local + gather_facts: false + vars: + oim_data_path: "/opt/omnia/.data" + manifest_path: /opt/omnia/.data/upgrade_manifest.yml + component_name: k8s + input_project_dir: "/opt/omnia/input/project_default" + tasks: + - name: "Skip if service_k8s not configured or already completed" + ansible.builtin.meta: end_play + when: > + not (hostvars['localhost']['k8s_upgrade_enabled'] | default(true)) or + (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) + + - name: Load variables from file + ansible.builtin.include_vars: + file: /tmp/k8s_vars.yml + + - name: "Check if cluster is already at target version" + when: hop_chain | length == 0 + block: + - name: Use cached storage_config + ansible.builtin.set_fact: + storage_config: "{{ hostvars['localhost']['cached_storage_config'] }}" + + - name: Use cached omnia_config + ansible.builtin.set_fact: + omnia_config: "{{ hostvars['localhost']['cached_omnia_config'] }}" + + - name: Set k8s_nfs_storage_name + ansible.builtin.set_fact: + k8s_nfs_storage_name: "{{ omnia_config.service_k8s_cluster[0].nfs_storage_name }}" + + - name: Set status_file path + ansible.builtin.set_fact: + status_file: >- + {{ (storage_config.mounts + | selectattr('name', 'equalto', k8s_nfs_storage_name) + | first).mount_point }}/upgrade/upgrade_status.yml + + - name: "Check if upgrade_status.yml exists" + ansible.builtin.stat: + path: "{{ status_file }}" + delegate_to: "{{ hostvars['localhost']['kube_vip'] }}" + register: _upgrade_status_stat + + - name: "Read existing upgrade_status.yml" + ansible.builtin.slurp: + src: "{{ status_file }}" + delegate_to: "{{ hostvars['localhost']['kube_vip'] }}" + register: _upgrade_status_slurp + when: _upgrade_status_stat.stat.exists | default(false) + + - name: "Parse existing upgrade status" + ansible.builtin.set_fact: + _existing_upgrade_status: "{{ _upgrade_status_slurp.content | b64decode | from_yaml }}" + when: _upgrade_status_stat.stat.exists | default(false) + + - name: "Check if previous upgrade is incomplete (failed or in_progress)" + ansible.builtin.set_fact: + _previous_upgrade_incomplete: >- + {{ _upgrade_status_stat.stat.exists | default(false) and + (_existing_upgrade_status.upgrade.status | default('') in ['failed', 'in_progress']) }} + + - name: "Display resuming upgrade message" + ansible.builtin.debug: + msg: + - "========================================================================" + - "[UPGRADE] Resuming previous upgrade to {{ k8s_target_version }}" + - "========================================================================" + - "Cluster is at target version but previous upgrade status: {{ _existing_upgrade_status.upgrade.status | default('unknown') }}" + - "Will re-run post-validation to verify cluster health." + - "========================================================================" + when: _previous_upgrade_incomplete | bool + + - name: "Set resume_post_validation_only flag" + ansible.builtin.set_fact: + resume_post_validation_only: true + when: _previous_upgrade_incomplete | bool + + - name: "Handle fresh run - cluster already at target" + when: not (_previous_upgrade_incomplete | bool) + block: + - name: "Display already at target message" + ansible.builtin.debug: + msg: + - "========================================================================" + - "[UPGRADE] Cluster already at target version {{ k8s_target_version }}" + - "========================================================================" + - "No upgrade needed. Marking K8s upgrade as completed." + - "========================================================================" + + - name: "Load upgrade manifest" + ansible.builtin.set_fact: + manifest: "{{ lookup('file', manifest_path) | from_yaml }}" + + - name: "Mark K8s upgrade as completed" + ansible.builtin.copy: + content: >- + {{ manifest | combine({ + 'component_status': manifest.component_status | combine({ + component_name: 'completed' + }) + }) | to_nice_yaml }} + dest: "{{ manifest_path }}" + mode: '0644' + + - name: "Set skip flag for remaining plays" + ansible.builtin.set_fact: + k8s_upgrade_skip: true + - name: "Kubernetes Upgrade - Get cluster version for status" hosts: kube_vip_group gather_facts: false tasks: - - name: "Skip all tasks — service_k8s not configured or already completed" + - name: "Skip all tasks — service_k8s not configured, already completed, or resuming post-validation" ansible.builtin.meta: end_play when: > not (hostvars['localhost']['k8s_upgrade_enabled'] | default(true)) or - (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) + (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) or + (hostvars['localhost']['resume_post_validation_only'] | default(false)) - name: Get current cluster version ansible.builtin.command: >- @@ -470,11 +588,12 @@ vars: input_project_dir: "/opt/omnia/input/project_default" tasks: - - name: "Skip all tasks — service_k8s not configured or already completed" + - name: "Skip all tasks — service_k8s not configured, already completed, or resuming post-validation" ansible.builtin.meta: end_play when: > not (hostvars['localhost']['k8s_upgrade_enabled'] | default(true)) or - (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) + (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) or + (hostvars['localhost']['resume_post_validation_only'] | default(false)) - name: Use cached storage_config ansible.builtin.set_fact: @@ -518,11 +637,12 @@ status_file: "{{ hostvars[kube_vip]['k8s_client_mount_path_kube_vip'] }}/upgrade/upgrade_status.yml" kube_vip: "{{ hostvars[kube_vip]['ansible_host'] }}" tasks: - - name: "Skip all tasks — service_k8s not configured or already completed" + - name: "Skip all tasks — service_k8s not configured, already completed, or resuming post-validation" ansible.builtin.meta: end_play when: > not (hostvars['localhost']['k8s_upgrade_enabled'] | default(true)) or - (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) + (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) or + (hostvars['localhost']['resume_post_validation_only'] | default(false)) - name: Use cached storage_config ansible.builtin.set_fact: @@ -778,11 +898,12 @@ group_cp: "service_kube_control_plane_x86_64" group_worker: "service_kube_node_x86_64" tasks: - - name: "Skip all tasks — service_k8s not configured or already completed" + - name: "Skip all tasks — service_k8s not configured, already completed, or resuming post-validation" ansible.builtin.meta: end_play when: > not (hostvars['localhost']['k8s_upgrade_enabled'] | default(true)) or - (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) + (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) or + (hostvars['localhost']['resume_post_validation_only'] | default(false)) - name: Use cached storage_config ansible.builtin.set_fact: @@ -916,11 +1037,12 @@ input_project_dir: "/opt/omnia/input/project_default" cluster_os_version: "{{ _software_config.cluster_os_version }}" tasks: - - name: "Skip all tasks — service_k8s not configured or already completed" + - name: "Skip all tasks — service_k8s not configured, already completed, or resuming post-validation" ansible.builtin.meta: end_play when: > not (hostvars['localhost']['k8s_upgrade_enabled'] | default(true)) or - (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) + (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) or + (hostvars['localhost']['resume_post_validation_only'] | default(false)) - name: Load variables from file ansible.builtin.include_vars: @@ -969,11 +1091,12 @@ k8s_client_mount_path: "{{ hostvars[groups.kube_vip_group[0]]['k8s_client_mount_path_kube_vip'] }}" kube_vip: "{{ hostvars[groups.kube_vip_group[0]]['ansible_host'] }}" tasks: - - name: "Skip all tasks — service_k8s not configured or already completed" + - name: "Skip all tasks — service_k8s not configured, already completed, or resuming post-validation" ansible.builtin.meta: end_play when: > not (hostvars['localhost']['k8s_upgrade_enabled'] | default(true)) or - (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) + (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) or + (hostvars['localhost']['resume_post_validation_only'] | default(false)) - name: Set backup paths from hostvars ansible.builtin.set_fact: @@ -990,11 +1113,12 @@ input_project_dir: "/opt/omnia/input/project_default" oim_provision_path: "/opt/omnia/provision" tasks: - - name: "Skip all tasks — service_k8s not configured or already completed" + - name: "Skip all tasks — service_k8s not configured, already completed, or resuming post-validation" ansible.builtin.meta: end_play when: > not (hostvars['localhost']['k8s_upgrade_enabled'] | default(true)) or - (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) + (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) or + (hostvars['localhost']['resume_post_validation_only'] | default(false)) - name: Load software_config.json ansible.builtin.slurp: @@ -1098,11 +1222,12 @@ gather_facts: false strategy: linear tasks: - - name: "Skip all tasks — service_k8s not configured or already completed" + - name: "Skip all tasks — service_k8s not configured, already completed, or resuming post-validation" ansible.builtin.meta: end_play when: > not (hostvars['localhost']['k8s_upgrade_enabled'] | default(true)) or - (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) + (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) or + (hostvars['localhost']['resume_post_validation_only'] | default(false)) - name: Deploy upgrade repo file to node ansible.builtin.copy: @@ -1297,11 +1422,12 @@ input_project_dir: "/opt/omnia/input/project_default" cluster_os_version: "{{ _software_config.cluster_os_version }}" tasks: - - name: "Skip all tasks — service_k8s not configured or already completed" + - name: "Skip all tasks — service_k8s not configured, already completed, or resuming post-validation" ansible.builtin.meta: end_play when: > not (hostvars['localhost']['k8s_upgrade_enabled'] | default(true)) or - (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) + (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) or + (hostvars['localhost']['resume_post_validation_only'] | default(false)) - name: Load omnia_config.yml ansible.builtin.include_vars: diff --git a/upgrade/playbooks/upgrade_worker_first.yml b/upgrade/playbooks/upgrade_worker_first.yml index 59332ca243..9e0b4037d4 100644 --- a/upgrade/playbooks/upgrade_worker_first.yml +++ b/upgrade/playbooks/upgrade_worker_first.yml @@ -350,28 +350,8 @@ status: in_progress timestamp: "{{ now(utc=true).strftime('%Y-%m-%dT%H:%M:%SZ') }}" - - name: Reload systemd - ansible.builtin.systemd: - daemon_reload: true - - - name: Restart kubelet - ansible.builtin.systemd: - name: kubelet - state: restarted - enabled: true - - - name: Wait for node to become Ready with target version - ansible.builtin.command: >- - kubectl get node {{ node_ip }} - -o jsonpath="{.status.nodeInfo.kubeletVersion}:{range .status.conditions[?(@.type==\"Ready\")]}{.status}{end}" - delegate_to: "{{ kube_vip }}" - register: node_ready - until: - - node_ready.rc == 0 - - "'v' + k8s_target_version + ':True' in node_ready.stdout" - retries: "{{ kubelet_ready_retries }}" - delay: "{{ kubelet_ready_delay }}" - changed_when: false + - name: Execute kubelet restart (includes feature gate update) + ansible.builtin.include_tasks: "{{ playbook_dir }}/../roles/upgrade_k8s/tasks/step_kubelet_restart.yml" - name: Mark kubelet_restart completed ansible.builtin.include_tasks: "{{ playbook_dir }}/../roles/upgrade_k8s/tasks/update_node_status.yml" diff --git a/upgrade/playbooks/upgrade_workers.yml b/upgrade/playbooks/upgrade_workers.yml index 2530a0db48..d3146e578c 100644 --- a/upgrade/playbooks/upgrade_workers.yml +++ b/upgrade/playbooks/upgrade_workers.yml @@ -354,28 +354,8 @@ status: in_progress timestamp: "{{ now(utc=true).strftime('%Y-%m-%dT%H:%M:%SZ') }}" - - name: Reload systemd - ansible.builtin.systemd: - daemon_reload: true - - - name: Restart kubelet - ansible.builtin.systemd: - name: kubelet - state: restarted - enabled: true - - - name: Wait for node to become Ready with target version - ansible.builtin.command: >- - kubectl get node {{ node_ip }} - -o jsonpath="{.status.nodeInfo.kubeletVersion}:{range .status.conditions[?(@.type==\"Ready\")]}{.status}{end}" - delegate_to: "{{ kube_vip }}" - register: node_ready - until: - - node_ready.rc == 0 - - "'v' + k8s_target_version + ':True' in node_ready.stdout" - retries: "{{ kubelet_ready_retries }}" - delay: "{{ kubelet_ready_delay }}" - changed_when: false + - name: Execute kubelet restart (includes feature gate update) + ansible.builtin.include_tasks: "{{ playbook_dir }}/../roles/upgrade_k8s/tasks/step_kubelet_restart.yml" - name: Mark kubelet_restart completed ansible.builtin.include_tasks: "{{ playbook_dir }}/../roles/upgrade_k8s/tasks/update_node_status.yml" diff --git a/upgrade/roles/upgrade_k8s/tasks/main.yml b/upgrade/roles/upgrade_k8s/tasks/main.yml index b95a8a400e..e14b847f83 100644 --- a/upgrade/roles/upgrade_k8s/tasks/main.yml +++ b/upgrade/roles/upgrade_k8s/tasks/main.yml @@ -174,7 +174,16 @@ - "Kubernetes upgrade failed!" - "Status file: {{ status_file }}" - "Backup location: {{ backup_dir }}" - - "For rollback, run: ansible-playbook rollback/rollback.yml --tags k8s" + - "" + - "RECOMMENDED STEPS:" + - " 1. Check the upgrade status file for failed steps" + - " 2. Verify cluster health: kubectl get nodes, kubectl get pods -A" + - " 3. Fix any issues with the failed nodes" + - " 4. Re-run the upgrade: ansible-playbook upgrade/upgrade.yml" + - " (completed steps will be skipped automatically)" + - "" + - " If the issue persists after retry, proceed with rollback:" + - " ansible-playbook rollback/rollback.yml --tags k8s" - name: Fail the playbook ansible.builtin.fail: diff --git a/upgrade/roles/upgrade_k8s/tasks/post_validation.yml b/upgrade/roles/upgrade_k8s/tasks/post_validation.yml index f29f046d85..74d1e00cef 100644 --- a/upgrade/roles/upgrade_k8s/tasks/post_validation.yml +++ b/upgrade/roles/upgrade_k8s/tasks/post_validation.yml @@ -100,7 +100,7 @@ - "'is unhealthy' in etcd_health.stdout or etcd_health.rc != 0" # ── Calico pods check (Engineering Spec §4.7.5 Check 5) ─────────── -- name: Check Calico pods +- name: Wait for Calico pods to be Running delegate_to: "{{ kube_vip }}" ansible.builtin.shell: cmd: >- @@ -113,16 +113,19 @@ register: unhealthy_calico_pods changed_when: false failed_when: false + retries: 30 + delay: 10 + until: unhealthy_calico_pods.stdout | length == 0 - name: Fail if Calico pods are not Running ansible.builtin.fail: msg: >- - Post-validation failed: Some Calico pods are not Running. + Post-validation failed: Some Calico pods are not Running after waiting 5 minutes. {{ unhealthy_calico_pods.stdout }} when: unhealthy_calico_pods.stdout | length > 0 # ── MetalLB pods check (Engineering Spec §4.7.5 Check 6) ───────── -- name: Check MetalLB pods +- name: Wait for MetalLB pods to be Running delegate_to: "{{ kube_vip }}" ansible.builtin.shell: cmd: >- @@ -135,11 +138,14 @@ register: unhealthy_metallb_pods changed_when: false failed_when: false + retries: 30 + delay: 10 + until: unhealthy_metallb_pods.stdout | length == 0 - name: Fail if MetalLB pods are not Running ansible.builtin.fail: msg: >- - Post-validation failed: Some MetalLB pods are not Running. + Post-validation failed: Some MetalLB pods are not Running after waiting 5 minutes. {{ unhealthy_metallb_pods.stdout }} when: unhealthy_metallb_pods.stdout | length > 0 diff --git a/upgrade/roles/upgrade_k8s/tasks/step_etcd_health_check.yml b/upgrade/roles/upgrade_k8s/tasks/step_etcd_health_check.yml index 0e8f04c71c..e8b4400974 100644 --- a/upgrade/roles/upgrade_k8s/tasks/step_etcd_health_check.yml +++ b/upgrade/roles/upgrade_k8s/tasks/step_etcd_health_check.yml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. --- -- name: Get etcd pod name on {{ current_node_name }} +- name: Wait for etcd pod on {{ current_node_name }} ansible.builtin.command: cmd: >- kubectl get pods -n kube-system @@ -22,6 +22,11 @@ delegate_to: "{{ kube_vip }}" register: etcd_pod changed_when: false + retries: 30 + delay: 10 + until: + - etcd_pod.rc == 0 + - etcd_pod.stdout | length > 0 - name: Check etcd cluster health after upgrade (kubectl exec) - {{ current_node_name }} ansible.builtin.command: diff --git a/upgrade/roles/upgrade_k8s/tasks/step_kubelet_restart.yml b/upgrade/roles/upgrade_k8s/tasks/step_kubelet_restart.yml index f88d250e20..8cbea6d96a 100644 --- a/upgrade/roles/upgrade_k8s/tasks/step_kubelet_restart.yml +++ b/upgrade/roles/upgrade_k8s/tasks/step_kubelet_restart.yml @@ -12,27 +12,35 @@ # See the License for the specific language governing permissions and # limitations under the License. --- -# Part 1: Reload systemd on the node +# Part 1: Update kubelet config.yaml parameters +- name: Update kubelet config.yaml on {{ current_node_name }} + ansible.builtin.include_tasks: update_kubelet_config.yml + +# Part 2: Update kubelet feature gates +- name: Update kubelet feature gates on {{ current_node_name }} + ansible.builtin.include_tasks: update_kubelet_feature_gates.yml + +# Part 3: Reload systemd on the node - name: Reload systemd on {{ current_node_name }} ansible.builtin.systemd: daemon_reload: true delegate_to: "{{ current_node_name }}" -# Part 2: Restart crio service +# Part 4: Restart crio service - name: Restart crio on {{ current_node_name }} ansible.builtin.systemd: name: crio state: restarted delegate_to: "{{ current_node_name }}" -# Part 3: Restart kubelet service +# Part 5: Restart kubelet service - name: Restart kubelet on {{ current_node_name }} ansible.builtin.systemd: name: kubelet state: restarted delegate_to: "{{ current_node_name }}" -# Part 4: Wait for node to become Ready with correct version +# Part 6: Wait for node to become Ready with correct version - name: Wait for node to become Ready ansible.builtin.command: >- kubectl get node {{ node_ip }} diff --git a/upgrade/roles/upgrade_k8s/tasks/step_upgrade_apply.yml b/upgrade/roles/upgrade_k8s/tasks/step_upgrade_apply.yml index ee49cd0829..99f6fcb144 100644 --- a/upgrade/roles/upgrade_k8s/tasks/step_upgrade_apply.yml +++ b/upgrade/roles/upgrade_k8s/tasks/step_upgrade_apply.yml @@ -26,3 +26,6 @@ ansible.builtin.command: kubeadm upgrade apply v{{ k8s_target_version }} --yes register: upgrade_apply_result changed_when: true + +- name: Update kube-controller-manager arguments + ansible.builtin.include_tasks: update_kcm_arguments.yml diff --git a/upgrade/roles/upgrade_k8s/tasks/step_upgrade_node.yml b/upgrade/roles/upgrade_k8s/tasks/step_upgrade_node.yml index 3abde668e7..9112773a77 100644 --- a/upgrade/roles/upgrade_k8s/tasks/step_upgrade_node.yml +++ b/upgrade/roles/upgrade_k8s/tasks/step_upgrade_node.yml @@ -17,3 +17,6 @@ delegate_to: "{{ current_node_name }}" register: upgrade_node_result changed_when: true + +- name: Update kube-controller-manager arguments on {{ current_node_name }} + ansible.builtin.include_tasks: update_kcm_arguments.yml diff --git a/upgrade/roles/upgrade_k8s/tasks/update_kcm_arguments.yml b/upgrade/roles/upgrade_k8s/tasks/update_kcm_arguments.yml new file mode 100644 index 0000000000..5d53d30e3a --- /dev/null +++ b/upgrade/roles/upgrade_k8s/tasks/update_kcm_arguments.yml @@ -0,0 +1,146 @@ +# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +# ============================================================================ +# update_kcm_arguments.yml — Update kube-controller-manager arguments +# ============================================================================ +# This task file updates kube-controller-manager manifest to ensure all +# required node monitoring and eviction arguments are present. +# +# This should be called on control plane nodes after kubeadm upgrade apply. +# ============================================================================ + +- name: Check if kube-controller-manager manifest exists + ansible.builtin.stat: + path: /etc/kubernetes/manifests/kube-controller-manager.yaml + register: _kcm_manifest_stat + +- name: Backup kube-controller-manager manifest + ansible.builtin.copy: + src: /etc/kubernetes/manifests/kube-controller-manager.yaml + dest: "/tmp/kube-controller-manager-backup-{{ now(utc=true).strftime('%Y%m%d%H%M%S') }}.yaml" + remote_src: true + mode: '0644' + when: _kcm_manifest_stat.stat.exists + +- name: Read current kube-controller-manager manifest + ansible.builtin.slurp: + path: /etc/kubernetes/manifests/kube-controller-manager.yaml + register: _kcm_manifest_content + when: _kcm_manifest_stat.stat.exists + +- name: Decode kube-controller-manager manifest + ansible.builtin.set_fact: + _kcm_manifest: "{{ _kcm_manifest_content.content | b64decode }}" + when: _kcm_manifest_stat.stat.exists + +- name: Define required kube-controller-manager arguments + ansible.builtin.set_fact: + _required_kcm_args: + - "--node-monitor-period=5s" + - "--node-monitor-grace-period=40s" + - "--node-eviction-rate=1" + - "--secondary-node-eviction-rate=1" + - "--terminated-pod-gc-threshold=50" + - "--controllers=*,nodeipam,nodelifecycle,bootstrapsigner,tokencleaner" + +- name: Initialize update flag + ansible.builtin.set_fact: + _needs_update: false + +- name: Check each required argument + ansible.builtin.set_fact: + _needs_update: true + when: + - _kcm_manifest_stat.stat.exists + - item not in _kcm_manifest + loop: "{{ _required_kcm_args }}" + +- name: Update kube-controller-manager manifest with required arguments + ansible.builtin.shell: | + set -eo pipefail + MANIFEST="/etc/kubernetes/manifests/kube-controller-manager.yaml" + + # Function to add or update argument + add_or_update_arg() { + local arg_name="$1" + local arg_value="$2" + local full_arg="--${arg_name}=${arg_value}" + + # Check if argument exists + if grep -q "^\s*- --${arg_name}=" "$MANIFEST"; then + # Update existing argument + sed -i "s|^\(\s*\)- --${arg_name}=.*|\1- ${full_arg}|" "$MANIFEST" + echo "Updated: ${full_arg}" + else + # Add new argument after the command line + sed -i "/^\s*- kube-controller-manager$/a\ - ${full_arg}" "$MANIFEST" + echo "Added: ${full_arg}" + fi + } + + # Update/add each argument + add_or_update_arg "node-monitor-period" "5s" + add_or_update_arg "node-monitor-grace-period" "40s" + add_or_update_arg "node-eviction-rate" "1" + add_or_update_arg "secondary-node-eviction-rate" "1" + add_or_update_arg "terminated-pod-gc-threshold" "50" + add_or_update_arg "controllers" "*,nodeipam,nodelifecycle,bootstrapsigner,tokencleaner" + + echo "kube-controller-manager manifest updated successfully" + args: + executable: /bin/bash + changed_when: _kcm_update_result.rc == 0 + when: + - _kcm_manifest_stat.stat.exists + - _needs_update | bool + register: _kcm_update_result + +- name: Display update result + ansible.builtin.debug: + msg: "{{ _kcm_update_result.stdout_lines }}" + when: + - _kcm_update_result is defined + - _kcm_update_result is changed + +- name: Wait for kube-controller-manager to restart (kubelet auto-restarts static pods) + ansible.builtin.pause: + seconds: 30 + when: + - _kcm_update_result is defined + - _kcm_update_result is changed + +- name: Get node IP for pod name + ansible.builtin.set_fact: + _node_ip: "{{ node_ip | default(current_node_name) | default(inventory_hostname) }}" + +- name: Verify kube-controller-manager is running + ansible.builtin.shell: | + kubectl get pod kube-controller-manager-{{ _node_ip }} -n kube-system -o jsonpath='{.status.phase}' + changed_when: false + register: _kcm_status + retries: 10 + delay: 5 + until: _kcm_status.stdout == "Running" + delegate_to: "{{ kube_vip }}" + when: + - _kcm_update_result is defined + - _kcm_update_result is changed + +- name: Display success message + ansible.builtin.debug: + msg: "kube-controller-manager arguments successfully updated on {{ _node_ip }}" + when: + - _kcm_update_result is defined + - _kcm_update_result is changed diff --git a/upgrade/roles/upgrade_k8s/tasks/update_kubelet_config.yml b/upgrade/roles/upgrade_k8s/tasks/update_kubelet_config.yml new file mode 100644 index 0000000000..ce47d671c9 --- /dev/null +++ b/upgrade/roles/upgrade_k8s/tasks/update_kubelet_config.yml @@ -0,0 +1,116 @@ +# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +# ============================================================================ +# update_kubelet_config.yml — Update kubelet config.yaml parameters +# ============================================================================ +# This task file updates kubelet configuration parameters in +# /var/lib/kubelet/config.yaml to match the settings applied during +# fresh provisioning via cloud-init templates. +# +# This should be called after kubelet is upgraded to ensure consistency +# between provisioned and upgraded nodes. +# ============================================================================ + +- name: Check if /var/lib/kubelet/config.yaml exists + ansible.builtin.stat: + path: /var/lib/kubelet/config.yaml + register: _kubelet_config_stat + +- name: Determine node role from inventory groups + ansible.builtin.set_fact: + _is_cp_first: "{{ 'k8s_control_plane_first' in group_names }}" + _is_cp_additional: "{{ 'k8s_control_plane' in group_names }}" + _is_worker: "{{ 'k8s_workers' in group_names }}" + +- name: Update kubelet config.yaml parameters + when: _kubelet_config_stat.stat.exists + block: + # Shutdown grace period settings (all nodes) + - name: Update shutdownGracePeriod in kubelet config + ansible.builtin.lineinfile: + path: /var/lib/kubelet/config.yaml + regexp: '^shutdownGracePeriod:' + line: 'shutdownGracePeriod: 30s' + + - name: Update shutdownGracePeriodCriticalPods in kubelet config + ansible.builtin.lineinfile: + path: /var/lib/kubelet/config.yaml + regexp: '^shutdownGracePeriodCriticalPods:' + line: 'shutdownGracePeriodCriticalPods: 10s' + + # Node status and sync frequency settings + # All control planes (first + additional): 10s / 60s / 60s + # Workers: 5s / 10s / 10s + + # ── Control plane nodes (first + additional) ── + - name: Set nodeStatusUpdateFrequency for control plane nodes + ansible.builtin.lineinfile: + path: /var/lib/kubelet/config.yaml + regexp: '^nodeStatusUpdateFrequency:' + line: 'nodeStatusUpdateFrequency: 10s' + when: (_is_cp_first | bool) or (_is_cp_additional | bool) + + - name: Set nodeStatusReportFrequency for control plane nodes + ansible.builtin.lineinfile: + path: /var/lib/kubelet/config.yaml + regexp: '^nodeStatusReportFrequency:' + line: 'nodeStatusReportFrequency: 60s' + when: (_is_cp_first | bool) or (_is_cp_additional | bool) + + - name: Set syncFrequency for control plane nodes + ansible.builtin.lineinfile: + path: /var/lib/kubelet/config.yaml + regexp: '^syncFrequency:' + line: 'syncFrequency: 60s' + when: (_is_cp_first | bool) or (_is_cp_additional | bool) + + # ── Worker nodes ── + - name: Set nodeStatusUpdateFrequency for worker nodes + ansible.builtin.lineinfile: + path: /var/lib/kubelet/config.yaml + regexp: '^nodeStatusUpdateFrequency:' + line: 'nodeStatusUpdateFrequency: 5s' + when: _is_worker | bool + + - name: Set nodeStatusReportFrequency for worker nodes + ansible.builtin.lineinfile: + path: /var/lib/kubelet/config.yaml + regexp: '^nodeStatusReportFrequency:' + line: 'nodeStatusReportFrequency: 10s' + when: _is_worker | bool + + - name: Set syncFrequency for worker nodes + ansible.builtin.lineinfile: + path: /var/lib/kubelet/config.yaml + regexp: '^syncFrequency:' + line: 'syncFrequency: 10s' + when: _is_worker | bool + +# Note: daemon-reload and kubelet restart are handled by the caller +# (step_kubelet_restart.yml). No restart here to avoid double restart. + +- name: Determine node role label + ansible.builtin.set_fact: + _node_role: >- + {{ 'cp_first' if (_is_cp_first | bool) + else 'cp_additional' if (_is_cp_additional | bool) + else 'worker' }} + +- name: Display kubelet config update status + ansible.builtin.debug: + msg: >- + Updated /var/lib/kubelet/config.yaml + on {{ inventory_hostname }} (role: {{ _node_role }}) + when: _kubelet_config_stat.stat.exists diff --git a/upgrade/roles/upgrade_k8s/tasks/update_kubelet_feature_gates.yml b/upgrade/roles/upgrade_k8s/tasks/update_kubelet_feature_gates.yml new file mode 100644 index 0000000000..286fa614f7 --- /dev/null +++ b/upgrade/roles/upgrade_k8s/tasks/update_kubelet_feature_gates.yml @@ -0,0 +1,72 @@ +# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +# ============================================================================ +# update_kubelet_feature_gates.yml — Update kubelet feature gates +# ============================================================================ +# This task file updates kubelet feature gates to enable old K8s behavior +# where pod status is reset on kubelet restart. +# +# This should be called after kubelet is upgraded to ensure the feature gate +# is present on all nodes. +# ============================================================================ + +- name: Check if /etc/sysconfig/kubelet exists + ansible.builtin.stat: + path: /etc/sysconfig/kubelet + register: _kubelet_sysconfig_stat + +- name: Read current KUBELET_EXTRA_ARGS + ansible.builtin.slurp: + path: /etc/sysconfig/kubelet + register: _kubelet_extra_args_content + when: _kubelet_sysconfig_stat.stat.exists + +- name: Decode and check current KUBELET_EXTRA_ARGS + ansible.builtin.set_fact: + _current_extra_args: "{{ (_kubelet_extra_args_content.content | b64decode).strip() }}" + _feature_gate_present: "{{ 'ChangeContainerStatusOnKubeletRestart' in (_kubelet_extra_args_content.content | b64decode) }}" + when: _kubelet_sysconfig_stat.stat.exists + +- name: Set default if file doesn't exist + ansible.builtin.set_fact: + _current_extra_args: "" + _feature_gate_present: false + when: not _kubelet_sysconfig_stat.stat.exists + +- name: Display current status + ansible.builtin.debug: + msg: "Feature gate ChangeContainerStatusOnKubeletRestart already present on {{ inventory_hostname }}" + when: _feature_gate_present | bool + +- name: Update /etc/sysconfig/kubelet with feature gate + ansible.builtin.copy: + content: | + KUBELET_EXTRA_ARGS="--feature-gates=ChangeContainerStatusOnKubeletRestart=true" + dest: /etc/sysconfig/kubelet + owner: root + group: root + mode: '0644' + when: not (_feature_gate_present | bool) + register: _kubelet_config_updated + +# Note: daemon-reload and kubelet restart are handled by the caller +# (step_kubelet_restart.yml). No restart here to avoid double restart. + +- name: Display update status + ansible.builtin.debug: + msg: >- + {{ 'Updated /etc/sysconfig/kubelet with ChangeContainerStatusOnKubeletRestart feature gate' + if _kubelet_config_updated is changed + else 'Feature gate already present, no changes needed' }} From 08f3f26bb7090272eec4209dc67a6406587fb1bc Mon Sep 17 00:00:00 2001 From: Sujit Jadhav Date: Fri, 5 Jun 2026 09:56:14 +0530 Subject: [PATCH 18/78] =?UTF-8?q?fix(openchami):=20use=20coresmd=20rule-le?= =?UTF-8?q?vel=20subnet=20keys=20for=20multi-subnet=20P=E2=80=A6=20(#4643)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(openchami): use coresmd rule-level subnet keys for multi-subnet PXE boot The coresmd v0.5+ rich rules API uses subnet: match keys within rules instead of separate subnet= config directives. The previous template generated subnet=CIDR,ROUTER lines that coresmd ignored with 'unknown config key' errors, causing nodes in additional subnets to receive the wrong router (primary subnet gateway) and fail PXE boot with 0-byte boot files. Changes: - coredhcp.yaml.j2: Replace subnet= directives with per-subnet rules using subnet:/routers:/cidr: keys to set correct DHCP options 3 and 1 - configs.yaml.j2: Add netmask_bits to coredhcp_subnets variable for use in cidr: action key - deploy_openchami.yml: Add static routes for additional subnets so TFTP/iPXE responses route via the admin NIC instead of management Signed-off-by: Sujit Jadhav * fix(openchami): resolve multi-subnet hostname assignment using nid instead of pxe_mapping_file hostnames Root cause: During provisioning, per-node hostname data (hostname.yaml) was loaded into cloud-init-server BEFORE defaults and group configs. The later 'ochami cloud-init defaults set' and 'ochami cloud-init group set' calls overwrote the per-node instance data, resetting local-hostname to the NID-based default (e.g. nid006 instead of sn1). Changes: - Re-apply hostname.yaml after all cloud-init defaults and group configs are set in configure_bss_cloud_init.yml, matching the upgrade reload order in reload_cloud_init_data.yml - Change boot param datasource from ds=nocloud to ds=nocloud-net for proper network-based metadata fetch (required for cloud-init < 21.3) - Add cloud-init=enabled to boot params to ensure cloud-init activation in the live image dracut environment - Remove hostname:nid{03d} format from multi-subnet coresmd DHCP rules to stop DHCP from forcing NID-based hostnames via option 12, aligning with single-subnet behavior where cloud-init handles hostname assignment Signed-off-by: Sujit Jadhav --------- Signed-off-by: Sujit Jadhav Signed-off-by: venu <236371043+Venu-p1@users.noreply.github.com> Signed-off-by: Nagachandan P Signed-off-by: Kratika Patidar Signed-off-by: Rajeshkumar S Signed-off-by: Kratika.Patidar Signed-off-by: Abhishek S A Signed-off-by: Katakam-Rakesh Signed-off-by: sujit-jadhav Signed-off-by: Jagadeesh N V Signed-off-by: balajikumaran.cs Signed-off-by: sakshi-singla-1735 Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> Signed-off-by: Nagachandan-P Signed-off-by: mithileshreddy04 Signed-off-by: Jagadeesh N V Signed-off-by: pullan1 Co-authored-by: Abhishek S A Co-authored-by: venu <236371043+Venu-p1@users.noreply.github.com> Co-authored-by: Nagachandan P Co-authored-by: Kratika Patidar Co-authored-by: snarthan Co-authored-by: Rajeshkumar-s2 Co-authored-by: Jagadeesh N V Co-authored-by: Jagadeesh N V <39791839+jagadeeshnv@users.noreply.github.com> Co-authored-by: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> Co-authored-by: Jagadeesh N V Co-authored-by: balajikumaran.cs Co-authored-by: sakshi-singla-1735 Co-authored-by: priti-parate <140157516+priti-parate@users.noreply.github.com> Co-authored-by: Mithilesh Reddy Co-authored-by: pullan1 --- .../openchami/tasks/deploy_openchami.yml | 24 +++++++++++++++++++ .../openchami/templates/configs.yaml.j2 | 3 ++- .../templates/coredhcp/coredhcp.yaml.j2 | 11 +++++---- .../tasks/configure_bss_cloud_init.yml | 16 +++++++++++++ .../roles/configure_ochami/vars/main.yml | 2 +- 5 files changed, 50 insertions(+), 6 deletions(-) diff --git a/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml b/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml index 160ff38073..bebfa01c88 100644 --- a/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml +++ b/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml @@ -119,6 +119,30 @@ {{ (admin_nic_ip + '/' + network_data.admin_network.netmask_bits) | ansible.utils.ipaddr('netmask') }} coredhcp_lease_duration: "{{ default_lease_time }}s" +- name: Configure static routes for additional subnets + when: network_data.admin_network.additional_subnets | default([]) | length > 0 + block: + - name: Determine admin network gateway + ansible.builtin.set_fact: + admin_gateway: "{{ network_data.admin_network.subnet | ansible.utils.ipmath(1) }}" + + - name: Check existing routes on admin NIC + ansible.builtin.command: nmcli -g ipv4.routes connection show {{ admin_nic }} + register: existing_routes + changed_when: false + + - name: Add persistent routes for additional subnets via NetworkManager + ansible.builtin.command: >- + nmcli connection modify {{ admin_nic }} + +ipv4.routes "{{ item.subnet }}/{{ item.netmask_bits }} {{ admin_gateway }}" + loop: "{{ network_data.admin_network.additional_subnets }}" + when: (item.subnet + '/' + item.netmask_bits) not in existing_routes.stdout + changed_when: true + + - name: Apply route changes to admin NIC + ansible.builtin.command: nmcli device reapply {{ admin_nic }} + changed_when: true + - name: Set s3_access_id and s3_secret_key ansible.builtin.set_fact: s3_access_id: "{{ hostvars['localhost']['s3_access_id'] | default('admin', true) }}" diff --git a/prepare_oim/roles/deploy_containers/openchami/templates/configs.yaml.j2 b/prepare_oim/roles/deploy_containers/openchami/templates/configs.yaml.j2 index 472eac8283..a7ddc7be0d 100644 --- a/prepare_oim/roles/deploy_containers/openchami/templates/configs.yaml.j2 +++ b/prepare_oim/roles/deploy_containers/openchami/templates/configs.yaml.j2 @@ -14,11 +14,12 @@ cluster_nidlength: 3 {% set additional = network_data.admin_network.additional_subnets | default([]) %} {% if additional | length > 0 %} -# Multi-subnet support (coresmd subnet= directives) +# Multi-subnet support (coresmd rule-level subnet: match keys) coredhcp_subnets: {% for s in additional %} - cidr: "{{ s.subnet }}/{{ s.netmask_bits }}" router: "{{ s.router }}" + netmask_bits: "{{ s.netmask_bits }}" {% endfor %} # Multi-subnet bootloop pools (bootloop subnet_pool= directives) diff --git a/prepare_oim/roles/deploy_containers/openchami/templates/coredhcp/coredhcp.yaml.j2 b/prepare_oim/roles/deploy_containers/openchami/templates/coredhcp/coredhcp.yaml.j2 index 523d4be376..9dd198a59e 100644 --- a/prepare_oim/roles/deploy_containers/openchami/templates/coredhcp/coredhcp.yaml.j2 +++ b/prepare_oim/roles/deploy_containers/openchami/templates/coredhcp/coredhcp.yaml.j2 @@ -7,7 +7,9 @@ server4: - router: {{ coredhcp_router }} - netmask: {{ coredhcp_netmask }} {% if coredhcp_subnets | default([]) | length > 0 %} - # Multi-subnet mode: uses key=value config format (requires coresmd with multi-subnet support) + # Multi-subnet mode: uses key=value config format (requires coresmd v0.5+ rich rules) + # Subnet-specific rules use subnet: match keys with routers:/cidr: action keys + # to override DHCP options 3 (router) and 1 (netmask) per relay subnet. - coresmd: | svc_base_uri=https://{{ cluster_name }}.{{ cluster_domain }}:8443 ipxe_base_uri=http://{{ cluster_boot_ip }}:8081 @@ -16,10 +18,11 @@ server4: lease_time={{ coredhcp_lease_duration }} single_port={{ coredhcp_tftp_single_port_mode | lower }} {% for s in coredhcp_subnets %} - subnet={{ s.cidr }},{{ s.router }} + rule=subnet:{{ s.cidr }},type:Node,routers:{{ s.router }},cidr:{{ s.netmask_bits }} + rule=subnet:{{ s.cidr }},type:NodeBMC,routers:{{ s.router }},cidr:{{ s.netmask_bits }} {% endfor %} - rule=type:Node,hostname:{{ cluster_shortname }}{{'{'}}0{{ coredhcp_nidlength | default(cluster_nidlength | default(3)) }}d{{'}'}} - rule=type:NodeBMC,hostname:bmc{{'{'}}0{{ coredhcp_nidlength | default(cluster_nidlength | default(3)) }}d{{'}'}} + rule=type:Node + rule=type:NodeBMC rule=hostname:unknown-{{'{'}}04d{{'}'}} - bootloop: | lease_file=/tmp/coredhcp.db diff --git a/provision/roles/configure_ochami/tasks/configure_bss_cloud_init.yml b/provision/roles/configure_ochami/tasks/configure_bss_cloud_init.yml index 04e10ed205..171224dd21 100644 --- a/provision/roles/configure_ochami/tasks/configure_bss_cloud_init.yml +++ b/provision/roles/configure_ochami/tasks/configure_bss_cloud_init.yml @@ -111,6 +111,22 @@ ansible.builtin.include_tasks: configure_cloud_init_group.yml with_items: "{{ hostvars['localhost']['functional_groups'] | map(attribute='name') | list }}" +# Re-apply per-node hostname data after all defaults and group configs are set. +# The cloud-init-server stores data in-memory and the ordering matters: +# defaults/groups must be loaded before per-node instance overrides. +# This matches the reload order used in upgrade (reload_cloud_init_data.yml). +- name: Check hostname.yaml exists + ansible.builtin.stat: + path: "{{ openchami_hostname_vars_path }}" + register: hostname_yaml_stat + +- name: Re-apply per-node hostname data into cloud-init-server + ansible.builtin.command: /usr/bin/ochami cloud-init node set -f yaml -d @"{{ openchami_hostname_vars_path }}" + changed_when: true + when: + - hostname_yaml_stat.stat.exists | default(false) + - not (hostvars['localhost']['upgrade_mode'] | default(false) | bool) + - name: Set openchami SELinux context for Local flow ansible.builtin.command: chcon -R system_u:object_r:container_file_t:s0 "{{ hostvars['localhost']['oim_shared_path'] }}/omnia/openchami" changed_when: true diff --git a/provision/roles/configure_ochami/vars/main.yml b/provision/roles/configure_ochami/vars/main.yml index d9866fe7d4..04de41c179 100644 --- a/provision/roles/configure_ochami/vars/main.yml +++ b/provision/roles/configure_ochami/vars/main.yml @@ -38,7 +38,7 @@ common_cloud_init_groups: # Usage: configure_bss_group.yml, configure_bss_cloud_init.yml bss_template: bss/bss.yaml.j2 bss_dir: "{{ openchami_work_dir }}/boot" -bss_params_cloud_init: 'ds=nocloud;s=http://{{ cluster_boot_ip }}:8081/cloud-init/' +bss_params_cloud_init: 'cloud-init=enabled ds=nocloud-net;s=http://{{ cluster_boot_ip }}:8081/cloud-init/' bss_params_opts: 'ip=dhcp rd.live.image rd.live.ram rd.neednet=1 rd.driver.blacklist=ccp,edac_core,power_meter,ahci,megaraid_sas modprobe.blacklist=ccp,edac_core,power_meter,ahci,megaraid_sas libata.force=1:disable,2:disable,3:disable,4:disable rd.luks=0 rd.md=0 rd.dm=0 console=tty0 console=ttyS0,115200 selinux=0 apparmor=0 ip6=off cloud-init=enabled' # noqa: yaml[line-length] image_missing_fail_msg: "Failed to set kernel or initrd. Create the image using build_image.yml and try again." From b4f8d2581423eb919c0c07974b838b0e5979aa49 Mon Sep 17 00:00:00 2001 From: Mithilesh Reddy Date: Fri, 5 Jun 2026 12:05:18 +0530 Subject: [PATCH 19/78] Rollback summary formatting fixes (#4683) * Rollback summary formatting fixes Signed-off-by: mithileshreddy04 * Update upgrade_build_stream.yml Signed-off-by: mithileshreddy04 --------- Signed-off-by: mithileshreddy04 --- .../rollback_buildstream/tasks/status_update.yml | 10 ++++++---- .../templates/rollback_summary.j2 | 1 + rollback/roles/rollback_k8s/tasks/main.yml | 10 ++++++---- rollback/roles/rollback_k8s/vars/main.yml | 16 ++++++++++++---- .../rollback_openchami/tasks/rollback_status.yml | 10 ++++++---- upgrade/playbooks/upgrade_build_stream.yml | 1 + 6 files changed, 32 insertions(+), 16 deletions(-) diff --git a/rollback/roles/rollback_buildstream/tasks/status_update.yml b/rollback/roles/rollback_buildstream/tasks/status_update.yml index ff07c04951..179c9a5a28 100644 --- a/rollback/roles/rollback_buildstream/tasks/status_update.yml +++ b/rollback/roles/rollback_buildstream/tasks/status_update.yml @@ -44,9 +44,11 @@ mode: '0644' - name: "Phase 9 | Display rollback summary" - ansible.builtin.debug: - msg: "{{ lookup('template', 'rollback_summary.j2') }}" + ansible.builtin.pause: + seconds: 1 + prompt: "{{ lookup('template', 'rollback_summary.j2') }}" - name: "Phase 9 | Display rollback status" - ansible.builtin.debug: - msg: "[ROLLBACK] Component '{{ buildstream_orchestrator_component_name }}' — status changed to: {{ _rollback_final_status }}" + ansible.builtin.pause: + seconds: 1 + prompt: "[ROLLBACK] Component '{{ buildstream_orchestrator_component_name }}' — status changed to: {{ _rollback_final_status }}" diff --git a/rollback/roles/rollback_buildstream/templates/rollback_summary.j2 b/rollback/roles/rollback_buildstream/templates/rollback_summary.j2 index 527d3b2013..4d0eaa6081 100644 --- a/rollback/roles/rollback_buildstream/templates/rollback_summary.j2 +++ b/rollback/roles/rollback_buildstream/templates/rollback_summary.j2 @@ -22,4 +22,5 @@ Directories: All build_stream NFS directories removed omnia.target: BuildStream/watcher entries removed {% endif %} + ════════════════════════════════════════════════════════ diff --git a/rollback/roles/rollback_k8s/tasks/main.yml b/rollback/roles/rollback_k8s/tasks/main.yml index a5bdae2e88..bb03673eb0 100644 --- a/rollback/roles/rollback_k8s/tasks/main.yml +++ b/rollback/roles/rollback_k8s/tasks/main.yml @@ -232,8 +232,9 @@ completed_at: "{{ now(utc=true).strftime('%Y-%m-%dT%H:%M:%SZ') }}" - name: Display rollback success - ansible.builtin.debug: - msg: "{{ msg_rollback_complete }}" + ansible.builtin.pause: + seconds: 1 + prompt: "{{ msg_rollback_complete }}" rescue: - name: Mark rollback as failed @@ -252,8 +253,9 @@ msg: "Could not update rollback failed status file." - name: Display rollback failure - ansible.builtin.debug: - msg: "{{ msg_rollback_failed }}" + ansible.builtin.pause: + seconds: 1 + prompt: "{{ msg_rollback_failed }}" - name: Fail the playbook ansible.builtin.fail: diff --git a/rollback/roles/rollback_k8s/vars/main.yml b/rollback/roles/rollback_k8s/vars/main.yml index fce2df04c5..0892fa9041 100644 --- a/rollback/roles/rollback_k8s/vars/main.yml +++ b/rollback/roles/rollback_k8s/vars/main.yml @@ -116,12 +116,20 @@ msg_etcd_quorum_lost: >- msg_rollback_backup_missing: >- Required backup not found: {{ _missing_backup }}. Cannot proceed with rollback. Ensure upgrade backups exist. -msg_rollback_complete: >- - K8s rollback from {{ k8s_current_version }} to {{ k8s_rollback_version }} - completed successfully. All nodes Ready. -msg_rollback_failed: >- +msg_rollback_complete: | + ============================================================ + [ROLLBACK] K8S ROLLBACK COMPLETED SUCCESSFULLY + ============================================================ + Rolled back from {{ k8s_current_version }} to {{ k8s_rollback_version }}. + All nodes Ready. + ============================================================ +msg_rollback_failed: | + ============================================================ + [ROLLBACK] K8S ROLLBACK FAILED + ============================================================ K8s rollback failed. Check rollback_status.yml for details. Status file: {{ rollback_status_file }} + ============================================================ msg_metallb_cleanup: >- Stale MetalLB IPs detected. Removing secondary IPs from node interfaces to prevent split-brain when speakers restart. diff --git a/rollback/roles/rollback_openchami/tasks/rollback_status.yml b/rollback/roles/rollback_openchami/tasks/rollback_status.yml index 81e93963e2..c04222fdc4 100644 --- a/rollback/roles/rollback_openchami/tasks/rollback_status.yml +++ b/rollback/roles/rollback_openchami/tasks/rollback_status.yml @@ -23,13 +23,15 @@ - name: Report rollback result block: - name: Report rollback failure - ansible.builtin.debug: - msg: "{{ rollback_messages.status.failure }}" + ansible.builtin.pause: + seconds: 1 + prompt: "{{ rollback_messages.status.failure }}" when: openchami_rollback_failed | default(false) | bool - name: Report rollback success - ansible.builtin.debug: - msg: "{{ rollback_messages.status.success }}" + ansible.builtin.pause: + seconds: 1 + prompt: "{{ rollback_messages.status.success }}" when: - not (openchami_rollback_failed | default(false) | bool) diff --git a/upgrade/playbooks/upgrade_build_stream.yml b/upgrade/playbooks/upgrade_build_stream.yml index d5953a3e4b..9f2c202a58 100644 --- a/upgrade/playbooks/upgrade_build_stream.yml +++ b/upgrade/playbooks/upgrade_build_stream.yml @@ -246,6 +246,7 @@ - "Detected: {{ 'Upgrade existing installation' if upgrade_path == 'upgrade_existing' else 'Fresh install' }}" - "BuildStream: installed={{ buildstream_service_exists }}, active={{ buildstream_service_active }}" - "Postgres: installed={{ postgres_service_exists }}, active={{ postgres_service_active }}" + - "============================================================" # ── Set manifest to in-progress ── - name: Read manifest From 90517756ed98be8ef03b49017ba23d3215c01c0f Mon Sep 17 00:00:00 2001 From: Nagachandan P Date: Fri, 5 Jun 2026 13:14:25 +0530 Subject: [PATCH 20/78] Update check_slurm_cluster.yml Signed-off-by: Nagachandan P --- .../roles/upgrade_slurm/tasks/check_slurm_cluster.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/upgrade/roles/upgrade_slurm/tasks/check_slurm_cluster.yml b/upgrade/roles/upgrade_slurm/tasks/check_slurm_cluster.yml index f79fab053e..4d59ba5ec5 100644 --- a/upgrade/roles/upgrade_slurm/tasks/check_slurm_cluster.yml +++ b/upgrade/roles/upgrade_slurm/tasks/check_slurm_cluster.yml @@ -160,9 +160,12 @@ - name: Abort upgrade — compute nodes not in idle state ansible.builtin.fail: msg: >- - Upgrade aborted: {{ slurm_non_idle_nodes | length }} compute node(s) not idle: - {{ slurm_non_idle_nodes | join(', ') }}. - Drain nodes and cancel jobs, then re-run the upgrade playbook. + Upgrade aborted: One or more compute nodes are not in the IDLE state + (e.g. allocated, mixed, draining, drained, or down). + Upgrade can only proceed when all compute nodes are IDLE. + Please resolve the node states — complete or cancel running jobs, and + bring any DOWN nodes back online — so all nodes return to IDLE, + then re-run the upgrade playbook. - name: Create groups for slurm nodes reboot play ansible.builtin.add_host: From b10ed1d6ee5591ae4b90fc9f43ef793e5f348fe3 Mon Sep 17 00:00:00 2001 From: Mithilesh Reddy Date: Fri, 5 Jun 2026 13:38:48 +0530 Subject: [PATCH 21/78] Update omnia.sh (#4684) Signed-off-by: Mithilesh Reddy --- omnia.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/omnia.sh b/omnia.sh index aa42a62656..0b04bd94c5 100755 --- a/omnia.sh +++ b/omnia.sh @@ -1299,8 +1299,6 @@ post_setup_config() { cp -r /omnia/input/* /opt/omnia/input/project_default rm -rf /omnia/input rm -rf /omnia/omnia.sh" - - init_ssh_config } validate_nfs_server() { From c679902ccc87645f3cd969d2b15d17952ac3c950 Mon Sep 17 00:00:00 2001 From: sujit-jadhav Date: Thu, 4 Jun 2026 12:05:54 +0530 Subject: [PATCH 22/78] fix(dns): propagate dns_enabled to OIM and configure resolv.conf for coresmd When dns_enabled: true in provision_config.yml, /etc/hosts was still populated on OIM and compute nodes because dns_enabled was only loaded on localhost (omnia_core container), not on the oim host where the roles actually run. Additionally, the OIM host and omnia_core container had no DNS path to coresmd for cluster hostname resolution. Changes: - Add dns_enabled to configs.yaml.j2 so it flows into configs_vars.yaml for cloud-init templates rendered on oim - Load provision_config.yml on oim host in provision.yml pre_tasks so slurm_config and other roles have dns_enabled available - Configure OIM /etc/resolv.conf to prepend coresmd as primary nameserver when dns_enabled=true (preserves existing DNS entries for internet connectivity); omnia_core inherits via Network=host - Guard /etc/hosts population in configure_oim_ssh.yml with dns_enabled - Proxy admin_nic_ip and domain_name from localhost to configure_ochami role vars so cloud-init DNS templates resolve correctly on oim - Override dns_forwarders with network_spec dns servers when configured - Include dns_enabled in upgrade configs_vars.yaml regeneration Fixes: OMN01D-2505 Signed-off-by: sujit-jadhav --- .../openchami/tasks/deploy_openchami.yml | 5 +++ .../openchami/templates/configs.yaml.j2 | 1 + provision/provision.yml | 43 +++++++++++++++++++ .../roles/configure_ochami/vars/main.yml | 4 ++ .../tasks/configure_oim_ssh.yml | 6 ++- .../tasks/upgrade_openchami_containers.yml | 1 + 6 files changed, 58 insertions(+), 2 deletions(-) diff --git a/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml b/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml index bebfa01c88..b7b9b64bb2 100644 --- a/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml +++ b/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml @@ -143,6 +143,11 @@ ansible.builtin.command: nmcli device reapply {{ admin_nic }} changed_when: true +- name: Override DNS forwarders from network_spec if configured + ansible.builtin.set_fact: + dns_forwarders: "{{ network_data.admin_network.dns }}" + when: (network_data.admin_network.dns | default([])) | length > 0 + - name: Set s3_access_id and s3_secret_key ansible.builtin.set_fact: s3_access_id: "{{ hostvars['localhost']['s3_access_id'] | default('admin', true) }}" diff --git a/prepare_oim/roles/deploy_containers/openchami/templates/configs.yaml.j2 b/prepare_oim/roles/deploy_containers/openchami/templates/configs.yaml.j2 index a7ddc7be0d..3e13c46ff4 100644 --- a/prepare_oim/roles/deploy_containers/openchami/templates/configs.yaml.j2 +++ b/prepare_oim/roles/deploy_containers/openchami/templates/configs.yaml.j2 @@ -11,6 +11,7 @@ data_s3_dir: "{{ data_s3_dir }}" s3_work_dir: "{{ s3_work_dir }}" cluster_shortname: "nid" cluster_nidlength: 3 +dns_enabled: {{ dns_enabled | default(false) | bool }} {% set additional = network_data.admin_network.additional_subnets | default([]) %} {% if additional | length > 0 %} diff --git a/provision/provision.yml b/provision/provision.yml index e476c50835..f94f1fa594 100644 --- a/provision/provision.yml +++ b/provision/provision.yml @@ -139,6 +139,49 @@ hosts: oim connection: ssh pre_tasks: + - name: Load provision_config.yml on OIM + ansible.builtin.include_vars: + file: "{{ hostvars['localhost']['input_project_dir'] }}/provision_config.yml" + + - name: Configure OIM DNS resolution for CoreDNS (when dns_enabled) + when: dns_enabled | default(false) | bool + block: + - name: Remove immutable flag from /etc/resolv.conf if present + ansible.builtin.command: chattr -i /etc/resolv.conf + changed_when: true + failed_when: false + + - name: Prepend coresmd as primary nameserver in OIM /etc/resolv.conf + ansible.builtin.shell: | + set -o pipefail + tmpfile=$(mktemp) + + # Merge cluster domain into existing search line (preserve existing domains) + existing_search=$(grep '^search' /etc/resolv.conf 2>/dev/null | head -n1 | sed 's/^search //') + if echo " ${existing_search} " | grep -qw '{{ hostvars["localhost"]["domain_name"] }}'; then + echo "search ${existing_search}" > "$tmpfile" + else + echo "search {{ hostvars['localhost']['domain_name'] }} ${existing_search}" > "$tmpfile" + fi + + # Prepend coresmd as primary nameserver + echo "nameserver {{ hostvars['localhost']['admin_nic_ip'] }}" >> "$tmpfile" + + # Keep existing nameservers and options (skip search lines and coresmd duplicate) + grep -v '^search' /etc/resolv.conf 2>/dev/null | \ + grep -Fxv "nameserver {{ hostvars['localhost']['admin_nic_ip'] }}" | \ + grep -v '^$' >> "$tmpfile" || true + + # Deduplicate while preserving order + awk '!seen[$0]++' "$tmpfile" > /etc/resolv.conf + rm -f "$tmpfile" + changed_when: true + + - name: Prevent NetworkManager from overwriting resolv.conf + ansible.builtin.command: chattr +i /etc/resolv.conf + changed_when: true + failed_when: false + - name: Provision nodes ansible.builtin.include_role: name: configure_ochami diff --git a/provision/roles/configure_ochami/vars/main.yml b/provision/roles/configure_ochami/vars/main.yml index 04de41c179..982a164460 100644 --- a/provision/roles/configure_ochami/vars/main.yml +++ b/provision/roles/configure_ochami/vars/main.yml @@ -88,6 +88,10 @@ file_mode_600: "0600" ip_timeout: 10 ip_wait_loop: 60 +# DNS-related variables (set on localhost by provision_validations, needed by cloud-init templates) +admin_nic_ip: "{{ hostvars['localhost']['admin_nic_ip'] }}" +domain_name: "{{ hostvars['localhost']['domain_name'] }}" + # Hostname lists for stack-specific SSH configs (populated by passwordless_ssh role) k8s_cluster_hostnames: "{{ hostvars['localhost']['k8s_cluster_hostnames'] | default([]) }}" slurm_cluster_hostnames: "{{ hostvars['localhost']['slurm_cluster_hostnames'] | default([]) }}" diff --git a/provision/roles/passwordless_ssh/tasks/configure_oim_ssh.yml b/provision/roles/passwordless_ssh/tasks/configure_oim_ssh.yml index 6c7c297724..d1635b4706 100644 --- a/provision/roles/passwordless_ssh/tasks/configure_oim_ssh.yml +++ b/provision/roles/passwordless_ssh/tasks/configure_oim_ssh.yml @@ -56,7 +56,7 @@ }} -- name: Manage /etc/hosts entries on OIM for Omnia cluster nodes +- name: Manage /etc/hosts entries on OIM for Omnia cluster nodes (skipped when CoreDNS is enabled) ansible.builtin.blockinfile: path: /etc/hosts create: true @@ -66,7 +66,9 @@ {% for h in omnia_hosts_map | dict2items %} {{ h.value }} {{ h.key }} {% endfor %} - when: omnia_hosts_map | default({}) | length > 0 + when: + - omnia_hosts_map | default({}) | length > 0 + - not (hostvars['localhost']['dns_enabled'] | default(false) | bool) # - name: DEBUG configure_oim_ssh facts # ansible.builtin.debug: diff --git a/upgrade/roles/upgrade_openchami/tasks/upgrade_openchami_containers.yml b/upgrade/roles/upgrade_openchami/tasks/upgrade_openchami_containers.yml index e2ff8bd7fa..aee7a69eae 100644 --- a/upgrade/roles/upgrade_openchami/tasks/upgrade_openchami_containers.yml +++ b/upgrade/roles/upgrade_openchami/tasks/upgrade_openchami_containers.yml @@ -283,6 +283,7 @@ coredhcp_lease_duration: "{{ default_lease_time | default('86400') }}s" cluster_shortname: "nid" cluster_nidlength: 3 + dns_enabled: {{ dns_enabled | default(false) | bool }} dest: "{{ openchami_config_vars_path }}" mode: "{{ file_permissions_644 }}" when: oim_metadata_stat.stat.exists or network_spec_stat.stat.exists From 0bdd71be8a71b2c0edae26d7b25d29a33f1e1a14 Mon Sep 17 00:00:00 2001 From: Super User Date: Fri, 5 Jun 2026 13:41:11 +0530 Subject: [PATCH 23/78] fix(dns): namespace include_vars to prevent variable collision with mount_config role The include_vars of provision_config.yml on oim host was overriding pxe_mapping_file_path from mount_config/vars/main.yml due to Ansible variable precedence (include_vars level 18 > role vars level 15). Load provision_config.yml into _provision_config_oim namespace to prevent polluting the oim host variable scope. --- provision/provision.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/provision/provision.yml b/provision/provision.yml index f94f1fa594..7f1af02b5b 100644 --- a/provision/provision.yml +++ b/provision/provision.yml @@ -139,12 +139,13 @@ hosts: oim connection: ssh pre_tasks: - - name: Load provision_config.yml on OIM + - name: Load dns_enabled from provision_config.yml on OIM ansible.builtin.include_vars: file: "{{ hostvars['localhost']['input_project_dir'] }}/provision_config.yml" + name: _provision_config_oim - name: Configure OIM DNS resolution for CoreDNS (when dns_enabled) - when: dns_enabled | default(false) | bool + when: _provision_config_oim.dns_enabled | default(false) | bool block: - name: Remove immutable flag from /etc/resolv.conf if present ansible.builtin.command: chattr -i /etc/resolv.conf From cb4089e601cc7ac12296c3dd01c0df9662427454 Mon Sep 17 00:00:00 2001 From: Super User Date: Fri, 5 Jun 2026 13:56:39 +0530 Subject: [PATCH 24/78] Revert "fix(dns): namespace include_vars to prevent variable collision with mount_config role" This reverts commit 0bdd71be8a71b2c0edae26d7b25d29a33f1e1a14. --- provision/provision.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/provision/provision.yml b/provision/provision.yml index 7f1af02b5b..f94f1fa594 100644 --- a/provision/provision.yml +++ b/provision/provision.yml @@ -139,13 +139,12 @@ hosts: oim connection: ssh pre_tasks: - - name: Load dns_enabled from provision_config.yml on OIM + - name: Load provision_config.yml on OIM ansible.builtin.include_vars: file: "{{ hostvars['localhost']['input_project_dir'] }}/provision_config.yml" - name: _provision_config_oim - name: Configure OIM DNS resolution for CoreDNS (when dns_enabled) - when: _provision_config_oim.dns_enabled | default(false) | bool + when: dns_enabled | default(false) | bool block: - name: Remove immutable flag from /etc/resolv.conf if present ansible.builtin.command: chattr -i /etc/resolv.conf From 1bd333271a76afc042baeebc9f67edc32c78165d Mon Sep 17 00:00:00 2001 From: Super User Date: Fri, 5 Jun 2026 13:56:40 +0530 Subject: [PATCH 25/78] Revert "fix(dns): propagate dns_enabled to OIM and configure resolv.conf for coresmd" This reverts commit c679902ccc87645f3cd969d2b15d17952ac3c950. --- .../openchami/tasks/deploy_openchami.yml | 5 --- .../openchami/templates/configs.yaml.j2 | 1 - provision/provision.yml | 43 ------------------- .../roles/configure_ochami/vars/main.yml | 4 -- .../tasks/configure_oim_ssh.yml | 6 +-- .../tasks/upgrade_openchami_containers.yml | 1 - 6 files changed, 2 insertions(+), 58 deletions(-) diff --git a/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml b/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml index b7b9b64bb2..bebfa01c88 100644 --- a/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml +++ b/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml @@ -143,11 +143,6 @@ ansible.builtin.command: nmcli device reapply {{ admin_nic }} changed_when: true -- name: Override DNS forwarders from network_spec if configured - ansible.builtin.set_fact: - dns_forwarders: "{{ network_data.admin_network.dns }}" - when: (network_data.admin_network.dns | default([])) | length > 0 - - name: Set s3_access_id and s3_secret_key ansible.builtin.set_fact: s3_access_id: "{{ hostvars['localhost']['s3_access_id'] | default('admin', true) }}" diff --git a/prepare_oim/roles/deploy_containers/openchami/templates/configs.yaml.j2 b/prepare_oim/roles/deploy_containers/openchami/templates/configs.yaml.j2 index 3e13c46ff4..a7ddc7be0d 100644 --- a/prepare_oim/roles/deploy_containers/openchami/templates/configs.yaml.j2 +++ b/prepare_oim/roles/deploy_containers/openchami/templates/configs.yaml.j2 @@ -11,7 +11,6 @@ data_s3_dir: "{{ data_s3_dir }}" s3_work_dir: "{{ s3_work_dir }}" cluster_shortname: "nid" cluster_nidlength: 3 -dns_enabled: {{ dns_enabled | default(false) | bool }} {% set additional = network_data.admin_network.additional_subnets | default([]) %} {% if additional | length > 0 %} diff --git a/provision/provision.yml b/provision/provision.yml index f94f1fa594..e476c50835 100644 --- a/provision/provision.yml +++ b/provision/provision.yml @@ -139,49 +139,6 @@ hosts: oim connection: ssh pre_tasks: - - name: Load provision_config.yml on OIM - ansible.builtin.include_vars: - file: "{{ hostvars['localhost']['input_project_dir'] }}/provision_config.yml" - - - name: Configure OIM DNS resolution for CoreDNS (when dns_enabled) - when: dns_enabled | default(false) | bool - block: - - name: Remove immutable flag from /etc/resolv.conf if present - ansible.builtin.command: chattr -i /etc/resolv.conf - changed_when: true - failed_when: false - - - name: Prepend coresmd as primary nameserver in OIM /etc/resolv.conf - ansible.builtin.shell: | - set -o pipefail - tmpfile=$(mktemp) - - # Merge cluster domain into existing search line (preserve existing domains) - existing_search=$(grep '^search' /etc/resolv.conf 2>/dev/null | head -n1 | sed 's/^search //') - if echo " ${existing_search} " | grep -qw '{{ hostvars["localhost"]["domain_name"] }}'; then - echo "search ${existing_search}" > "$tmpfile" - else - echo "search {{ hostvars['localhost']['domain_name'] }} ${existing_search}" > "$tmpfile" - fi - - # Prepend coresmd as primary nameserver - echo "nameserver {{ hostvars['localhost']['admin_nic_ip'] }}" >> "$tmpfile" - - # Keep existing nameservers and options (skip search lines and coresmd duplicate) - grep -v '^search' /etc/resolv.conf 2>/dev/null | \ - grep -Fxv "nameserver {{ hostvars['localhost']['admin_nic_ip'] }}" | \ - grep -v '^$' >> "$tmpfile" || true - - # Deduplicate while preserving order - awk '!seen[$0]++' "$tmpfile" > /etc/resolv.conf - rm -f "$tmpfile" - changed_when: true - - - name: Prevent NetworkManager from overwriting resolv.conf - ansible.builtin.command: chattr +i /etc/resolv.conf - changed_when: true - failed_when: false - - name: Provision nodes ansible.builtin.include_role: name: configure_ochami diff --git a/provision/roles/configure_ochami/vars/main.yml b/provision/roles/configure_ochami/vars/main.yml index 982a164460..04de41c179 100644 --- a/provision/roles/configure_ochami/vars/main.yml +++ b/provision/roles/configure_ochami/vars/main.yml @@ -88,10 +88,6 @@ file_mode_600: "0600" ip_timeout: 10 ip_wait_loop: 60 -# DNS-related variables (set on localhost by provision_validations, needed by cloud-init templates) -admin_nic_ip: "{{ hostvars['localhost']['admin_nic_ip'] }}" -domain_name: "{{ hostvars['localhost']['domain_name'] }}" - # Hostname lists for stack-specific SSH configs (populated by passwordless_ssh role) k8s_cluster_hostnames: "{{ hostvars['localhost']['k8s_cluster_hostnames'] | default([]) }}" slurm_cluster_hostnames: "{{ hostvars['localhost']['slurm_cluster_hostnames'] | default([]) }}" diff --git a/provision/roles/passwordless_ssh/tasks/configure_oim_ssh.yml b/provision/roles/passwordless_ssh/tasks/configure_oim_ssh.yml index d1635b4706..6c7c297724 100644 --- a/provision/roles/passwordless_ssh/tasks/configure_oim_ssh.yml +++ b/provision/roles/passwordless_ssh/tasks/configure_oim_ssh.yml @@ -56,7 +56,7 @@ }} -- name: Manage /etc/hosts entries on OIM for Omnia cluster nodes (skipped when CoreDNS is enabled) +- name: Manage /etc/hosts entries on OIM for Omnia cluster nodes ansible.builtin.blockinfile: path: /etc/hosts create: true @@ -66,9 +66,7 @@ {% for h in omnia_hosts_map | dict2items %} {{ h.value }} {{ h.key }} {% endfor %} - when: - - omnia_hosts_map | default({}) | length > 0 - - not (hostvars['localhost']['dns_enabled'] | default(false) | bool) + when: omnia_hosts_map | default({}) | length > 0 # - name: DEBUG configure_oim_ssh facts # ansible.builtin.debug: diff --git a/upgrade/roles/upgrade_openchami/tasks/upgrade_openchami_containers.yml b/upgrade/roles/upgrade_openchami/tasks/upgrade_openchami_containers.yml index aee7a69eae..e2ff8bd7fa 100644 --- a/upgrade/roles/upgrade_openchami/tasks/upgrade_openchami_containers.yml +++ b/upgrade/roles/upgrade_openchami/tasks/upgrade_openchami_containers.yml @@ -283,7 +283,6 @@ coredhcp_lease_duration: "{{ default_lease_time | default('86400') }}s" cluster_shortname: "nid" cluster_nidlength: 3 - dns_enabled: {{ dns_enabled | default(false) | bool }} dest: "{{ openchami_config_vars_path }}" mode: "{{ file_permissions_644 }}" when: oim_metadata_stat.stat.exists or network_spec_stat.stat.exists From 54725351237fd54bc170fbf703edd0518576d124 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Fri, 5 Jun 2026 14:15:49 +0530 Subject: [PATCH 26/78] Update regctl path in build image flow and update input validation logic (#4685) * regctl path update Signed-off-by: Abhishek S A * prepare oim update Signed-off-by: Abhishek S A * Update validate_input.py Signed-off-by: Abhishek S A * Update main.yml Signed-off-by: Abhishek S A * Update main.yml Signed-off-by: Abhishek S A * input validation update Signed-off-by: Abhishek S A * validation fix Signed-off-by: Abhishek S A --------- Signed-off-by: Abhishek S A --- .../image_creation/tasks/build_base_image.yml | 2 +- .../tasks/build_compute_image.yml | 2 +- .../image_creation/tasks/build_base_image.yml | 2 +- .../tasks/build_compute_image.yml | 2 +- common/library/modules/validate_input.py | 9 ++-- .../roles/validate_input/tasks/main.yml | 46 +++++++++++-------- .../openchami/tasks/configs/verify_ochami.yml | 12 +++++ 7 files changed, 50 insertions(+), 25 deletions(-) diff --git a/build_image_aarch64/roles/image_creation/tasks/build_base_image.yml b/build_image_aarch64/roles/image_creation/tasks/build_base_image.yml index e3d02d229f..42cdc9fef0 100644 --- a/build_image_aarch64/roles/image_creation/tasks/build_base_image.yml +++ b/build_image_aarch64/roles/image_creation/tasks/build_base_image.yml @@ -76,7 +76,7 @@ - name: Verify the aarch64 base osimage in registry ansible.builtin.command: - cmd: "regctl repo ls {{ oim_node_name }}.{{ domain_name }}:5000" + cmd: "/usr/local/bin/regctl repo ls {{ oim_node_name }}.{{ domain_name }}:5000" delegate_to: "{{ aarch64_build_host }}" connection: ssh changed_when: false diff --git a/build_image_aarch64/roles/image_creation/tasks/build_compute_image.yml b/build_image_aarch64/roles/image_creation/tasks/build_compute_image.yml index aed32c5660..0ca77db3df 100644 --- a/build_image_aarch64/roles/image_creation/tasks/build_compute_image.yml +++ b/build_image_aarch64/roles/image_creation/tasks/build_compute_image.yml @@ -102,7 +102,7 @@ - name: Verify aarch64 compute osimages in registry ansible.builtin.command: - cmd: "regctl repo ls {{ oim_node_name }}.{{ domain_name }}:5000" + cmd: "/usr/local/bin/regctl repo ls {{ oim_node_name }}.{{ domain_name }}:5000" delegate_to: "{{ aarch64_build_host }}" connection: ssh changed_when: false diff --git a/build_image_x86_64/roles/image_creation/tasks/build_base_image.yml b/build_image_x86_64/roles/image_creation/tasks/build_base_image.yml index 3f3cf0e3e2..7e4bfa445e 100644 --- a/build_image_x86_64/roles/image_creation/tasks/build_base_image.yml +++ b/build_image_x86_64/roles/image_creation/tasks/build_base_image.yml @@ -66,7 +66,7 @@ - name: Verify the x86_64 base osimage in registry ansible.builtin.command: - cmd: "regctl repo ls {{ oim_node_name }}.{{ domain_name }}:5000" + cmd: "/usr/local/bin/regctl repo ls {{ oim_node_name }}.{{ domain_name }}:5000" changed_when: false register: verify_base_osimage diff --git a/build_image_x86_64/roles/image_creation/tasks/build_compute_image.yml b/build_image_x86_64/roles/image_creation/tasks/build_compute_image.yml index aac981ad18..84f6a2b063 100644 --- a/build_image_x86_64/roles/image_creation/tasks/build_compute_image.yml +++ b/build_image_x86_64/roles/image_creation/tasks/build_compute_image.yml @@ -94,7 +94,7 @@ - name: Verify x86_64 compute osimages in registry ansible.builtin.command: - cmd: "regctl repo ls {{ oim_node_name }}.{{ domain_name }}:5000" + cmd: "/usr/local/bin/regctl repo ls {{ oim_node_name }}.{{ domain_name }}:5000" changed_when: false register: verify_compute_osimages diff --git a/common/library/modules/validate_input.py b/common/library/modules/validate_input.py index 8411816a0c..54839ee417 100644 --- a/common/library/modules/validate_input.py +++ b/common/library/modules/validate_input.py @@ -76,7 +76,7 @@ def validate_csv_structure(csv_file_path, logger=None): for row in reader: if len(row) != expected_columns: error_msg = ( - f"CSV ERROR: Line {line_num} has {len(row)} columns, expected {expected_columns}. " + f"CSV ERROR: {csv_file_path}: Line {line_num} has {len(row)} columns, expected {expected_columns}. " f"Missing values in CSV row. Ensure each row has the correct number of values separated by commas." ) if logger: @@ -279,6 +279,7 @@ def main(): vstatus.append(True) except ValueError as csv_error: error_bucket = error_bucket + [str(csv_error)] + validation_status["Failed"].append(csv_file_path) vstatus.append(False) if not validation_status: @@ -298,14 +299,16 @@ def main(): f"Tag(s) run: {tag_names}. ", f"Look at the logs for more details: filename={log_file_name}"] - module.exit_json(failed=not status_bool, + module.exit_json( + changed=False, + validation_failed=not status_bool, error_msg=message, log_file=log_file_name, errors=error_bucket, valid_files=list(set(validation_status['Passed'])), invalid_files=list(set(validation_status['Failed'])), tags=tag_names - ) + ) if __name__ == "__main__": diff --git a/input_validation/roles/validate_input/tasks/main.yml b/input_validation/roles/validate_input/tasks/main.yml index a4e80fb6e2..45a6c68a22 100644 --- a/input_validation/roles/validate_input/tasks/main.yml +++ b/input_validation/roles/validate_input/tasks/main.yml @@ -22,22 +22,32 @@ validation_success_msg: "{{ messages.validation_success }}" validation_error_msg: "{{ messages.validation_error }}" -- name: Validate omnia input config - block: - - name: Run validation - validate_input: - omnia_base_dir: "{{ (input_dir + '/../') | ansible.builtin.realpath }}" - project_name: "{{ project_name }}" - tag_names: "{{ input_validate_tags }}" - module_utils_path: "{{ (role_path + '/../../../common/library/module_utils/') | ansible.builtin.realpath }}" - csv_file_path: "{{ csv_file_path }}" - register: validation_status - when: (input_validate_tags | length) > 0 or csv_file_path is defined +- name: Run validation + validate_input: + omnia_base_dir: "{{ (input_dir + '/../') | ansible.builtin.realpath }}" + project_name: "{{ project_name }}" + tag_names: "{{ input_validate_tags }}" + module_utils_path: "{{ (role_path + '/../../../common/library/module_utils/') | ansible.builtin.realpath }}" + csv_file_path: "{{ csv_file_path }}" + register: validation_status + when: (input_validate_tags | length) > 0 or csv_file_path is defined - - name: Debug validation status - ansible.builtin.debug: - msg: "{{ validation_success_msg }}" - rescue: - - name: Failed due to validation failure - ansible.builtin.fail: - msg: "{{ validation_error_msg }}" +- name: Validation passed + ansible.builtin.debug: + msg: "{{ validation_success_msg }}" + when: validation_status.validation_failed is defined and not validation_status.validation_failed + +- name: Failed due to validation failure + ansible.builtin.fail: + msg: >- + {{ validation_error_msg }} + {% if validation_status.errors is defined and validation_status.errors | length > 0 %} + Errors: {{ validation_status.errors | join('; ') }} + {% endif %} + {% if validation_status.invalid_files is defined and validation_status.invalid_files | length > 0 %} + Invalid files: {{ validation_status.invalid_files | join(', ') }} + {% endif %} + {% if validation_status.log_file is defined %} + Log file: {{ validation_status.log_file }} + {% endif %} + when: validation_status.validation_failed is defined and validation_status.validation_failed diff --git a/prepare_oim/roles/deploy_containers/openchami/tasks/configs/verify_ochami.yml b/prepare_oim/roles/deploy_containers/openchami/tasks/configs/verify_ochami.yml index 63f1e81948..33b3bfbda0 100644 --- a/prepare_oim/roles/deploy_containers/openchami/tasks/configs/verify_ochami.yml +++ b/prepare_oim/roles/deploy_containers/openchami/tasks/configs/verify_ochami.yml @@ -47,6 +47,12 @@ retries: "{{ max_retries }}" delay: "{{ max_delay }}" changed_when: false + failed_when: false + + - name: Fail if ochami bss status check failed + ansible.builtin.fail: + msg: "ochami bss service is not running after {{ max_retries }} retries. {{ openchami_bss_status.stderr | default('') }}" + when: openchami_bss_status.rc != 0 - name: Openchami bss status output ansible.builtin.debug: @@ -60,6 +66,12 @@ retries: "{{ max_retries }}" delay: "{{ max_delay }}" changed_when: false + failed_when: false + + - name: Fail if ochami smd status check failed + ansible.builtin.fail: + msg: "ochami smd service is not running after {{ max_retries }} retries. {{ openchami_smd_status.stderr | default('') }}" + when: openchami_smd_status.rc != 0 - name: Openchami smd status output ansible.builtin.debug: From 4d227ac8acc681285d09964f62a3de0f2939fdd9 Mon Sep 17 00:00:00 2001 From: Jagadeesh N V Date: Fri, 5 Jun 2026 16:06:22 +0530 Subject: [PATCH 27/78] VAST support retained after pre upgrade Signed-off-by: Jagadeesh N V --- upgrade/playbooks/upgrade_slurm.yml | 7 +++---- upgrade/prepare_upgrade.yml | 4 ++-- upgrade/roles/upgrade_slurm/tasks/slurm_backup.yml | 8 ++++---- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/upgrade/playbooks/upgrade_slurm.yml b/upgrade/playbooks/upgrade_slurm.yml index 05ab2b069a..94861387df 100644 --- a/upgrade/playbooks/upgrade_slurm.yml +++ b/upgrade/playbooks/upgrade_slurm.yml @@ -222,10 +222,9 @@ ============================================ 1. NODE REBOOT — All Slurm/login nodes will reboot. Ensure no critical jobs are running. 2. PXE MAPPING — Do not modify Slurm node entries until upgrade completes. - 3. NFS MOUNTS — Omnia 2.1 mount points are preserved. Do not modify during upgrade. - 4. VAST STORAGE — Vast storage is not supported during upgrade. Please remove it from omnia_config.yml. - 5. ROLLBACK SCOPE — New NFS mounts added during upgrade will NOT be retained on rollback. - 6. POST-UPGRADE — Rollback is NOT recommended once all nodes boot with cloud-init complete. + 3. NFS MOUNTS — Omnia 2.1 mount points are preserved. Do not modify the existing mounts during upgrade. + 4. ROLLBACK SCOPE — New NFS mounts added during upgrade will NOT be retained on rollback. + 5. POST-UPGRADE — Rollback is NOT recommended once all nodes boot with cloud-init complete. - name: Read oim_metadata for oim_node_name (standalone fallback) ansible.builtin.include_vars: diff --git a/upgrade/prepare_upgrade.yml b/upgrade/prepare_upgrade.yml index 4a1679a394..a0bdc886c2 100644 --- a/upgrade/prepare_upgrade.yml +++ b/upgrade/prepare_upgrade.yml @@ -100,7 +100,7 @@ - "File 2: storage_config.yml" - " 1. nfs_client_params is replaced by 'mounts' format" - " 2. mount_params profiles (nfs_default, vast_nfs, vast_rdma, vast_tcp)" - - " 3. powervault_config (now a list with mount_point)" + - " 3. powervault_config (now takes a list with mount_point)" - " 4. swap (new swap configuration)" - " 5. s3_configurations (new S3 storage - PowerScale or MinIO)" - " 6. VAST storage support (high-performance NFS for HPC tools)" @@ -151,7 +151,7 @@ - " {{ input_project_dir }}" - "" - " 2. Enable any new features you want" - - " (telemetry, storage profiles, IB network, etc.)" + - " (telemetry, storage, VAST for slurm, IB network, etc.)" - "" - " 3. Verify the upgrade summary shown above" - "" diff --git a/upgrade/roles/upgrade_slurm/tasks/slurm_backup.yml b/upgrade/roles/upgrade_slurm/tasks/slurm_backup.yml index 27bb58d9c8..dd841a54eb 100644 --- a/upgrade/roles/upgrade_slurm/tasks/slurm_backup.yml +++ b/upgrade/roles/upgrade_slurm/tasks/slurm_backup.yml @@ -36,10 +36,10 @@ ansible.builtin.set_fact: slurm_nfs: "{{ storage_config.mounts | selectattr('name', 'in', filter_slurm_nfs) | list }}" -- name: Vast storage not supported during upgrade - ansible.builtin.fail: - msg: "Vast storage is not supported during upgrade. Please remove the 'vast_storage_name' from the slurm_cluster configuration in omnia_config.yml." - when: "'vast_storage_name' in omnia_config.slurm_cluster[0]" +# - name: Vast storage not supported during upgrade +# ansible.builtin.fail: +# msg: "Vast storage is not supported during upgrade. Please remove the 'vast_storage_name' from the slurm_cluster configuration in omnia_config.yml." +# when: "'vast_storage_name' in omnia_config.slurm_cluster[0]" # MOUNTED and READY - name: Install NFS client with bolt-on support From ded516dfc47fa9005a127a0f8fbbb0050f9ecbd0 Mon Sep 17 00:00:00 2001 From: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> Date: Fri, 5 Jun 2026 16:48:38 +0530 Subject: [PATCH 28/78] Update cleanup_stale_volume_attachments.yml Signed-off-by: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> --- .../cleanup_stale_volume_attachments.yml | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/rollback/roles/rollback_k8s/tasks/cleanup_stale_volume_attachments.yml b/rollback/roles/rollback_k8s/tasks/cleanup_stale_volume_attachments.yml index 62a6a69dea..bae7d378cf 100644 --- a/rollback/roles/rollback_k8s/tasks/cleanup_stale_volume_attachments.yml +++ b/rollback/roles/rollback_k8s/tasks/cleanup_stale_volume_attachments.yml @@ -196,6 +196,25 @@ loop_control: label: "{{ item }}" + # ── Clean up old terminated CSI pods (Failed/Succeeded) ──────── + - name: Delete old terminated PowerScale CSI pods + ansible.builtin.shell: + cmd: >- + kubectl --server=https://{{ first_cp_ip }}:6443 + delete pods -A + -l 'app in (isilon-controller,isilon-node)' + --field-selector=status.phase=Failed + --ignore-not-found=true; + kubectl --server=https://{{ first_cp_ip }}:6443 + delete pods -A + -l 'app in (isilon-controller,isilon-node)' + --field-selector=status.phase=Succeeded + --ignore-not-found=true + delegate_to: "{{ groups_cp_first | first }}" + changed_when: true + failed_when: false + when: "'csi-isilon.dellemc.com' in (_csi_drivers.stdout | default(''))" + # ── Wait for CSI driver pods to be ready (PowerScale) ───────── - name: Verify PowerScale CSI driver pods are Running ansible.builtin.shell: @@ -229,6 +248,25 @@ changed_when: false failed_when: false + # ── Clean up old terminated NFS provisioner pods ───────────── + - name: Delete old terminated NFS provisioner pods + ansible.builtin.shell: + cmd: >- + kubectl --server=https://{{ first_cp_ip }}:6443 + delete pods -A + -l app=nfs-subdir-external-provisioner + --field-selector=status.phase=Failed + --ignore-not-found=true; + kubectl --server=https://{{ first_cp_ip }}:6443 + delete pods -A + -l app=nfs-subdir-external-provisioner + --field-selector=status.phase=Succeeded + --ignore-not-found=true + delegate_to: "{{ groups_cp_first | first }}" + changed_when: true + failed_when: false + when: _nfs_provisioner_deploy.stdout_lines | default([]) | length > 0 + - name: Verify NFS client provisioner pod is Running ansible.builtin.shell: cmd: >- From 392839fc71ea2ed2b8ef380b006232f6798da4c6 Mon Sep 17 00:00:00 2001 From: mithileshreddy04 Date: Fri, 5 Jun 2026 17:55:20 +0530 Subject: [PATCH 29/78] Input file upgrade template fixes Signed-off-by: mithileshreddy04 --- .../tasks/post_rollback_health_check.yml | 1 - .../tasks/restore_database.yml | 1 - .../templates/local_repo_config.j2 | 12 + .../templates/omnia_config.j2 | 150 ++++------- .../templates/storage_config.j2 | 252 +++++++++++------- .../tasks/migrate_database.yml | 8 +- .../tasks/post_upgrade_health_check.yml | 8 +- .../tasks/pre_upgrade_health_check.yml | 3 +- 8 files changed, 228 insertions(+), 207 deletions(-) diff --git a/rollback/roles/rollback_openchami/tasks/post_rollback_health_check.yml b/rollback/roles/rollback_openchami/tasks/post_rollback_health_check.yml index 6ff5b94d23..e4073d14fe 100644 --- a/rollback/roles/rollback_openchami/tasks/post_rollback_health_check.yml +++ b/rollback/roles/rollback_openchami/tasks/post_rollback_health_check.yml @@ -198,7 +198,6 @@ - "v2.2 coresmd-coredns: not running (confirmed removed)" - "SMD API: reachable at {{ openchami_smd_endpoint }}" - "BSS API: reachable at {{ openchami_bss_endpoint }}" - - "Node count: {{ rb_node_count.stdout | default('0') | trim }}" - "openchami.target: references coresmd.service (v2.1 pattern)" - >- Cloud-init-server: {{ rb_ci_container_status.stdout | default('unknown') }} diff --git a/rollback/roles/rollback_openchami/tasks/restore_database.yml b/rollback/roles/rollback_openchami/tasks/restore_database.yml index 7bcc974354..4f5573ac09 100644 --- a/rollback/roles/rollback_openchami/tasks/restore_database.yml +++ b/rollback/roles/rollback_openchami/tasks/restore_database.yml @@ -179,7 +179,6 @@ ansible.builtin.debug: msg: - "{{ rollback_messages.restore.database_success }}" - - "Node count after restore: {{ rb_post_restore_node_count.stdout | default('0') | trim }}" rescue: - name: Database restore failed diff --git a/upgrade/roles/import_input_parameters/templates/local_repo_config.j2 b/upgrade/roles/import_input_parameters/templates/local_repo_config.j2 index 7daf9b941d..d7752615b8 100644 --- a/upgrade/roles/import_input_parameters/templates/local_repo_config.j2 +++ b/upgrade/roles/import_input_parameters/templates/local_repo_config.j2 @@ -51,6 +51,7 @@ # - Do not use Jinja variables in this configuration. # - Omit SSL fields entirely if SSL is not in use. # - Its a mandatory field in case of slurm_custom with name as 'slurm_custom' +# - Version-specific naming (e.g., 'kubernetes-v1-35', 'cri-o-v1-35') is used only for service_k8s # # 3. user_repo_url_aarch64 #--------------------------- @@ -210,6 +211,15 @@ rhel_os_url_aarch64: rhel_subscription_repo_config_x86_64: rhel_subscription_repo_config_aarch64: # Making incorrect changes to this variable can cause omnia failure. Please edit cautiously. +# ============================================================================ +# VERSIONED REPOSITORY NAMING CONVENTION (Omnia 2.2+) +# ============================================================================ +# Starting from Omnia 2.2, repositories use versioned naming: +# - kubernetes-v- (e.g., kubernetes-v1-35) +# - cri-o-v- (e.g., cri-o-v1-35) +# Version-specific naming is used only for service_k8s components (kubernetes, cri-o) +# Other components (doca, cuda, slurm_custom) use non-versioned naming +# ============================================================================ omnia_repo_url_rhel_x86_64: - { url: "https://download.docker.com/linux/centos/10/x86_64/stable/", gpgkey: "https://download.docker.com/linux/centos/gpg", name: "docker-ce"} - { url: "https://dl.fedoraproject.org/pub/epel/10/Everything/x86_64/", gpgkey: "https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-10", name: "epel"} @@ -217,11 +227,13 @@ omnia_repo_url_rhel_x86_64: - { url: "https://download.opensuse.org/repositories/isv:/cri-o:/stable:/v1.35/rpm/", gpgkey: "https://download.opensuse.org/repositories/isv:/cri-o:/stable:/v1.35/rpm/repodata/repomd.xml.key", name: "cri-o-v1-35"} - { url: "https://linux.mellanox.com/public/repo/doca/3.2.1/rhel10/x86_64/", gpgkey: "https://linux.mellanox.com/public/repo/doca/3.2.1/rhel10/x86_64/repodata/repomd.xml.key", name: "doca"} - { url: "https://developer.download.nvidia.com/compute/cuda/repos/rhel10/x86_64/", gpgkey: "https://developer.download.nvidia.com/compute/cuda/repos/rhel10/x86_64/repodata/repomd.xml.key", name: "cuda"} + - { url: "https://developer.download.nvidia.com/hpc-sdk/rhel/x86_64", gpgkey: "https://developer.download.nvidia.com/hpc-sdk/rhel/RPM-GPG-KEY-NVIDIA-HPC-SDK", name: "nvidia-hpc-sdk"} omnia_repo_url_rhel_aarch64: - { url: "https://download.docker.com/linux/centos/10/aarch64/stable/", gpgkey: "https://download.docker.com/linux/centos/gpg", name: "docker-ce"} - { url: "https://dl.fedoraproject.org/pub/epel/10/Everything/aarch64/", gpgkey: "https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-10", name: "epel"} - { url: "https://linux.mellanox.com/public/repo/doca/3.2.1/rhel10/arm64-sbsa/", gpgkey: "https://linux.mellanox.com/public/repo/doca/3.2.1/rhel10/arm64-sbsa/repodata/repomd.xml.key", name: "doca"} - { url: "https://developer.download.nvidia.com/compute/cuda/repos/rhel10/sbsa/", gpgkey: "https://developer.download.nvidia.com/compute/cuda/repos/rhel10/sbsa/repodata/repomd.xml.key", name: "cuda"} + - { url: "https://developer.download.nvidia.com/hpc-sdk/rhel/aarch64", gpgkey: "https://developer.download.nvidia.com/hpc-sdk/rhel/RPM-GPG-KEY-NVIDIA-HPC-SDK", name: "nvidia-hpc-sdk"} # Example: # additional_repos_x86_64: # - { url: "https://rpm.grafana.com/", gpgkey: "", name: "grafana" } diff --git a/upgrade/roles/import_input_parameters/templates/omnia_config.j2 b/upgrade/roles/import_input_parameters/templates/omnia_config.j2 index 0c86f50913..35afbe5671 100644 --- a/upgrade/roles/import_input_parameters/templates/omnia_config.j2 +++ b/upgrade/roles/import_input_parameters/templates/omnia_config.j2 @@ -31,35 +31,42 @@ # Storage name corresponding to the VAST storage to be used by slurm cluster # This should match with exactly with a entry in storage_config.yml # The following directories will be mounted on the VAST storage: -# - /scratch -# - /tmp -# - /home -# - /apps -# - /projects +# - /scratch +# - /tmp +# - /home +# - /apps +# - /projects +# +# -> Centralized repository for HPC tools (UCX, OpenMPI, CUDA, benchmarks) that are +# bind-mounted to /hpc_tools on compute and login nodes. This storage must be +# accessible from OIM during provisioning to copy hpc_tools content to the cluster. +# -> Must match the 'name' field of the VAST storage entry(mounts) in storage_config.yml +# -> The VAST server must be reachable from OIM to enable hpc_tools population during +# cluster provisioning. Ensure network connectivity and mount permissions are configured. # skip_merge -# Variable indicates whether a specific configuration file path -# under config_sources should be used as-is without merging -# If skip_merge is set to true for a configuration source path, -# that configuration file will be applied directly -# without merging with defaults or existing configurations -# It accepts true and false values -# Default value is false +# Variable indicates whether a specific configuration file path +# under config_sources should be used as-is without merging +# If skip_merge is set to true for a configuration source path, +# that configuration file will be applied directly +# without merging with defaults or existing configurations +# It accepts true and false values +# Default value is false # node_discovery_mode -# Controls how hardware specifications are discovered for Slurm compute nodes -# Options: "heterogeneous" or "homogeneous" -# - heterogeneous: Discovers each node individually via iDRAC (1 call per node) -# Best for: Mixed hardware environments with different node configurations -# - homogeneous: Groups nodes by hardware type for optimized discovery -# Best for: Standardized hardware groups (grp0-grp100 in pxe_mapping_file.csv) +# Controls how hardware specifications are discovered for Slurm compute nodes +# Options: "heterogeneous" or "homogeneous" +# - heterogeneous: Discovers each node individually via iDRAC (1 call per node) +# Best for: Mixed hardware environments with different node configurations +# - homogeneous: Groups nodes by hardware type for optimized discovery +# Best for: Standardized hardware groups (grp0-grp100 in pxe_mapping_file.csv) # Performance: 0 iDRAC calls (with specs) or 1 call per group (without specs) -# Default value is heterogeneous +# Default value is heterogeneous # node_hardware_defaults -# Optional: Pre-define hardware specifications for homogeneous node groups -# Only used when node_discovery_mode is set to "homogeneous" -# Key: GROUP_NAME from pxe_mapping_file.csv (e.g., grp0, grp1, grp2, etc.) +# Optional: Pre-define hardware specifications for homogeneous node groups +# Only used when node_discovery_mode is set to "homogeneous" +# Key: GROUP_NAME from pxe_mapping_file.csv (e.g., grp0, grp1, grp2, etc.) # Value: Hardware specifications for all nodes in that group # - sockets: Number of CPU sockets per node (integer, minimum 1) # - cores_per_socket: Number of CPU cores per socket (integer, minimum 1) @@ -91,18 +98,6 @@ # This path can be any path inside the omnia_core container. # The default input path "/opt/omnia/input/project_default" # can also be used to place the custom conf files -# Example (slurm mapping): -# config_sources: -# slurm: -# SlurmctldTimeout: 60 -# SlurmdTimeout: 150 -# NodeName: -# - NodeName: node1 -# CPUs: 16 -# RealMemory: 64000 -# - NodeName: node2 -# CPUs: 16 -# RealMemory: 64000 # The conf files supported by slurm are # slurm # cgroup @@ -115,44 +110,41 @@ # oci # topology # burst_buffer -# Thes files will be written into the slurm_config directory with .conf suffix +# +# These files will be written into the slurm_config directory with .conf suffix +# config_sources: +# slurm: +# SlurmctldTimeout: 60 +# SlurmdTimeout: 150 +# NodeName: +# - NodeName: newnode1 +# CPUs: 16 +# RealMemory: 64000 +# - NodeName: newnode2 +# CPUs: 16 +# RealMemory: 64000 +# cgroup: +# CgroupPlugin: autodetect +# ConstrainCores: True +# ConstrainDevices: True +# ConstrainRAMSpace: True +# ConstrainSwapSpace: True +# +# OR +# +# config_sources: +# slurm: /opt/omnia/input/project_default/slurm.conf +# cgroup: /opt/omnia/input/project_default/cgroup.conf +# slurmdbd: /opt/omnia/input/project_default/slurmdbd.conf slurm_cluster: {% set _slurm_cluster = omnia_slurm_cluster | default([], true) %} {% if (_slurm_cluster | length) > 0 %} {% for _cluster in _slurm_cluster %} - cluster_name: {{ _cluster.cluster_name | default('') }} - # nfs_storage_name: References the NFS storage defined in storage_config.yml for Slurm controller data nfs_storage_name: {{ _cluster.nfs_storage_name | default('') }} - - # vast_storage_name: References the VAST high-performance NFS storage defined in storage_config.yml - # Purpose: Centralized repository for HPC tools (UCX, OpenMPI, CUDA, benchmarks) that are - # bind-mounted to /hpc_tools on compute and login nodes. This storage must be - # accessible from OIM during provisioning to copy hpc_tools content to the cluster. - # Configuration: Must match the 'name' field of the VAST storage entry in storage_config.yml - # Note: The VAST server must be reachable from OIM to enable hpc_tools population during - # cluster provisioning. Ensure network connectivity and mount permissions are configured. - # Uncomment this line to enable VAST storage for hpc_tools # vast_storage_name: "vast_storage" - # skip_merge: {{ _cluster.skip_merge | default(false) }} - - # Uncomment to enable homogeneous discovery mode - # node_discovery_mode: "homogeneous" - # Uncomment to provide hardware specs for homogeneous groups - # node_hardware_defaults: - # grp1: - # sockets: 2 - # cores_per_socket: 64 - # threads_per_core: 2 - # real_memory: 512000 - # gres: "gpu:4" - # grp2: - # sockets: 2 - # cores_per_socket: 32 - # threads_per_core: 2 - # real_memory: 256000 - {% if _cluster.config_sources is defined and (_cluster.config_sources | length > 0) %} config_sources: @@ -166,40 +158,8 @@ slurm_cluster: {{ _conf_name }}: {{ _conf_val }} {% endif %} {% endfor %} - # OR - - # config_sources: - # slurm: /path/to/custom_slurm.conf - # cgroup: /path/to/custom_cgroup.conf - # slurmdbd: /path/to/custom_slurmdbd.conf - # gres: /path/to/custom_gres.conf {% else %} - # config_sources: - # slurm: - # SlurmctldTimeout: 60 - # SlurmdTimeout: 150 - # NodeName: - # - NodeName: newnode1 - # CPUs: 16 - # RealMemory: 64000 - # - NodeName: newnode2 - # CPUs: 16 - # RealMemory: 64000 - # cgroup: - # CgroupPlugin: autodetect - # ConstrainCores: True - # ConstrainDevices: True - # ConstrainRAMSpace: True - # ConstrainSwapSpace: True - - # OR - - # config_sources: - # slurm: /path/to/custom_slurm.conf - # cgroup: /path/to/custom_cgroup.conf - # slurmdbd: /path/to/custom_slurmdbd.conf - # gres: /path/to/custom_gres.conf {% endif %} {% endfor %} {% endif %} diff --git a/upgrade/roles/import_input_parameters/templates/storage_config.j2 b/upgrade/roles/import_input_parameters/templates/storage_config.j2 index 1a0a0c617d..ba87a57b3f 100644 --- a/upgrade/roles/import_input_parameters/templates/storage_config.j2 +++ b/upgrade/roles/import_input_parameters/templates/storage_config.j2 @@ -16,49 +16,75 @@ # mounts # Configure mount points compatible with cloud-init mounts module. # Source must be known at boot time (NFS paths, UUIDs, local devices). -# For runtime-discovered sources (iSCSI/multipath), use powervault_config above. +# For runtime-discovered sources (iSCSI/multipath), use powervault_config below. # -# Each mount entry contains the following fields (matching /etc/fstab format): -# - name: Unique identifier for this mount entry. Required -# - source: Device or network path (e.g., /dev/sdc, UUID=xxx, 192.168.1.100:/share). Required -# - mount_point: Mount point path (e.g., /mnt, /opt/data). Required -# - fs_type: Filesystem type (e.g., ext4, xfs, nfs, nfs4, cifs, auto). Optional -# - If specified, takes PRIORITY over mount_params profile -# - mnt_opts: Mount options (e.g., defaults,noexec,nofail). Optional -# - If specified, takes PRIORITY over mount_params profile -# - dump_freq: Dump frequency (usually "0"). Optional -# - If specified, takes PRIORITY over mount_params profile -# - fsck_pass: Fsck pass number (usually "0" or "2"). Optional -# - If specified, takes PRIORITY over mount_params profile -# - mount_params: Name of a profile in mount_params section. Optional -# - Used ONLY for fields not explicitly specified in the mount entry -# - permissions: Directory ownership and mode for the mount point. Optional -# - owner: User owner (name or numeric UID, e.g., "root", "slurm", "1001"). Default: "root" -# - group: Group owner (name or numeric GID, e.g., "root", "slurm", "1001"). Default: "root" -# - mode: Octal permission string (e.g., "0755", "1777"). Default: "0755" -# - Applied via cloud-init runcmd (chown + chmod) after mount -# - node_key: cloud-init datasource variable accessible via 'cloud-init query ' (e.g., "local_hostname", "ds.meta_data.instance_data.local_ipv4"). Optional -# - When present, implies per-node bind mount where source path is constructed using the queried variable value -# - The variable chosen must be unique per host to ensure isolation between nodes -# - fs_type forced to "none", mnt_opts forced to "bind" (automatic) -# - Source becomes: // -# - node_mount_point: List of bind mount targets. Required when node_key is set -# - Each target gets: // -> -# - functional_group_prefix: List of functional group prefixes. Optional -# - All nodes whose role starts with any prefix get this mount -# - e.g., ["slurm"] matches slurm_control_node, slurm_node, etc. -# - If omitted (and no group), mount applies to all nodes -# - Mutually exclusive with group -# - groups: List of GROUP_NAME values from pxe_mapping_file.csv. Optional -# - Mutually exclusive with functional_group_prefix -# - e.g., groups: ["grp1", "grp2"] targets only nodes in those groups +# Mandatory fields: +# - name: Unique identifier for this mount entry. Mandatory +# Pattern: [a-zA-Z0-9_-], length 1-64 +# - source: Device or network path. Mandatory +# For NFS: server_ip:/export/path (e.g., 192.168.1.100:/export/share, nfs-server.example.com:/home) +# For local: /dev/sdc, UUID=xxx, LABEL=xxx +# For CIFS: //server/share +# Note: NFS paths must be resolvable at boot time (use IP or DNS-resolvable hostname) +# - mount_point: Absolute path for the mount point. Mandatory +# Must be an absolute path starting with / (e.g., /home, /mnt/vast, /opt/data) +# Avoid system directories (/etc, /sys, /proc, /boot, /root, /tmp) +# Common patterns: /mnt/*, /opt/*, /home, /var/lib/* +# Note: Path must be unique across all mount entries +# +# Optional fields: +# - mount_params: Named profile key from mount_params section. Optional +# Used ONLY for fields not explicitly set in the mount entry +# - fs_type: Filesystem type. Optional. Default: "auto" +# Allowed: auto, ext2, ext3, ext4, xfs, nfs, nfs4, cifs, tmpfs, +# cephfs, vfat, ntfs, none, fuse.s3fs +# If specified, takes PRIORITY over mount_params profile +# - mnt_opts: Mount options string (e.g., "defaults,noexec,nofail"). Optional +# If specified, takes PRIORITY over mount_params profile +# - dump_freq: Dump frequency. Optional. Default: "0". Allowed values: "0"-"2" +# If specified, takes PRIORITY over mount_params profile +# - fsck_pass: Fsck pass number. Optional. Default: "0". Allowed values: "0"-"9" +# If specified, takes PRIORITY over mount_params profile +# - mount_on_oim: Mount this filesystem on the OIM node as well. Optional. Default: false +# Ensure storage is network-accessible from OIM before enabling +# +# Node-specific bind mounts (node_key and node_mount_point are paired): +# - node_key: Per-node subdirectory isolation variable. Optional +# Allowed values: +# - "local_hostname" - hostname of the node +# - "local_ipv4" - IPv4 address of the node +# - "instance_id" - instance ID of the node from cloud-init +# Default: "local_hostname" +# When set, node_mount_point is MANDATORY +# Generates bind mounts: // -> +# - node_mount_point: List of bind mount target paths. Mandatory when node_key is set +# Minimum 1 entry, values must be unique absolute paths +# Each entry: // -> +# +# Targeting — exactly ONE of the following is required (mutually exclusive): +# - functional_group_prefix: List of functional group name prefixes. Mandatory (one of) +# MUTUALLY EXCLUSIVE with groups +# All nodes whose group name starts with any listed prefix receive this mount +# e.g., ["slurm"] matches slurm_control_node, slurm_node, etc. +# - groups: List of GROUP_NAME values from pxe_mapping_file.csv. Mandatory (one of) +# MUTUALLY EXCLUSIVE with functional_group_prefix +# Only nodes assigned to the listed PXE groups receive this mount +# e.g., ["grp1", "grp2"] targets only nodes in those groups +# +# Permissions (optional sub-object, applied via chown + chmod after mount): +# - permissions.owner: User owner (name or numeric UID). Optional. Default: "root" +# - permissions.group: Group owner (name or numeric GID). Optional. Default: "root" +# - permissions.mode: Octal permission string (3-4 digits). Optional. Default: "0755" +# e.g., "0755", "1777" # # Example: static mount with all explicit params (no profile) +# which will be mounted on functional groups +# slurm_control_node_x86_64, slurm_node_x86_64, slurm_login_x86_64 # mounts: # Example: static mount using profile # - name: "vast_home" -# source: "192.168.1.100:/home" +# source: "192.168.1.100:/share/slurm_home" # mount_point: "/home" # mount_params: "vast_nfs" # functional_group_prefix: ["slurm"] @@ -69,22 +95,19 @@ # # Example: per-node bind mount (node_key triggers bind behavior) # - name: "scratch_isolation" -# source: "/mnt/scratch" -# mount_point: "/mnted/scratch" +# source: "192.168.1.102:/share/vast" +# mount_point: "/mnt/vast" +# fs_type: "nfs" +# mnt_opts: "nosuid,rw,sync,hard" # node_key: "local_hostname" # node_mount_point: # - /scratch # - /tmp - # functional_group_prefix: ["slurm_node"] -# # On node001 generates fstab: -# # /mnted/scratch/node001/scratch /scratch none bind 0 0 -# # /mnted/scratch/node001/tmp /tmp none bind 0 0 -# # slurm.conf: SlurmdSpoolDir=/scratch,/tmp - -# /mnt/scratch /mnted/sctratch nfs4 defaults,nofail,_netdev,x-systemd.after=cloud-init-network.service 0 0 -# /mnted/scratch/node001/var/loig/state /var/log/state none bind 0 0 +# # On node001 generates fstab: +# # /mnt/vast/node001/scratch /scratch none bind 0 0 +# # /mnt/vast/node001/tmp /tmp none bind 0 0 {% if slurm_nfs_client_params or k8s_nfs_client_params %} mounts: @@ -126,27 +149,27 @@ mounts: # functional_group_prefix: ["slurm_node", "login"] # -----------------------------Mount Params (Profiles)------------------------------- -# mount_params: Dictionary of named mount configuration profiles for reuse across mounts. -# Each profile is a named dictionary with optional fields: -# - fs_type: Filesystem type (nfs, nfs4, xfs, ext4, ext3, ext2, cifs, tmpfs, cephfs, vfat, ntfs, none, fuse.s3fs) -# - mnt_opts: Mount options string (comma-separated, e.g., "nosuid,rw,sync,hard,intr") -# - dump_freq: Dump frequency for fstab (usually "0"). Default: "0" -# - fsck_pass: Fsck pass number for fstab (usually "0" or "2"). Default: "0" -# - Custom fields: Any additional backend-specific metadata (e.g., vast_nfs_ip, rdma_port) +# mount_params: Named default profiles for mount configurations. +# Profiles are referenced by name from mount entries via the mount_params field. +# +# Mandatory profile fields: +# - fs_type: Default filesystem type. Mandatory +# Allowed: auto, ext2, ext3, ext4, xfs, nfs, nfs4, cifs, tmpfs, +# cephfs, vfat, ntfs, none, fuse.s3fs +# - mnt_opts: Default mount options string. Mandatory +# +# Optional profile fields: +# - dump_freq: Default dump frequency. Optional. Allowed values: "0"-"2" +# - fsck_pass: Default fsck pass number. Optional. Allowed values: "0"-"9" mount_params: - # Standard NFS mount with security and performance tuning + # Default NFS mount nfs_default: fs_type: "nfs" - mnt_opts: "nosuid,rw,sync,hard,intr" + mnt_opts: "nosuid,rw,sync,hard" dump_freq: "0" fsck_pass: "0" - # VAST NFS storage - standard TCP configuration - vast_nfs: - fs_type: "nfs" - mnt_opts: "nosuid,rw,sync,hard,intr" - # VAST NFS RDMA storage over IB - standard configuration vast_rdma: fs_type: "nfs" @@ -164,31 +187,51 @@ mount_params: # The runcmd script handles: iscsid enable, initiator name, discovery, login, # multipathd, volume_id matching, partitioning, formatting, mount, and bind mounts. # -# Mandatory parameters: -# - name: Unique identifier for this powervault entry. Required -# - ip: List of PowerVault controller IPv4 addresses for iSCSI target discovery. Required -# - iscsi_initiator: InitiatorName IQN for the host. Required -# - volume_id: WWN/identifier for the volume (used for multipath device matching). Required -# - mount_point: Where the discovered device gets mounted. Required - -# Optional parameters: -# - port: TCP port for iSCSI target service. Default: 3260 -# - mount_params: Named profile for fs_type/mnt_opts (read by the runcmd script). Optional -# - node_key: ds.meta_data key for per-node bind mounts (e.g., "local_hostname", "ds.meta_data.instance_data.local_ipv4"). Optional -# - When present, implies bind mount: // -> -# - fs_type forced to "none", mnt_opts forced to "bind" (automatic) -# - node_mount_point: List of bind mount targets. Required when node_key is set -# - Pattern: // -> -# - functional_group_prefix: List of functional group prefixes for node targeting. Mutually exclusive with group. -# - permissions: Directory ownership and mode for the mount point. Optional -# - owner: User owner (name or UID). Default: "root" -# - group: Group owner (name or GID). Default: "root" -# - mode: Octal permission string (e.g., "0755"). Default: "0755" +# NOTE: The groups field is NOT supported for powervault_config entries. +# Only functional_group_prefix is available for node targeting. +# +# Mandatory fields: +# - name: Unique identifier for this PowerVault entry. Mandatory +# Pattern: [a-zA-Z0-9_-], length 1-64 +# - ip: List of PowerVault controller IPv4 addresses for iSCSI discovery. Mandatory +# Minimum 1 address, values must be unique +# - iscsi_initiator: iSCSI initiator IQN for the host. Mandatory +# Pattern: iqn..: +# - volume_id: Volume WWN/identifier for multipath device matching. Mandatory +# Pattern: hex string [a-fA-F0-9]+ +# - mount_point: Absolute path where the discovered device gets mounted. Mandatory +# - functional_group_prefix: List of oChaMI functional group name prefixes. Mandatory +# All nodes whose group name starts with any listed prefix receive this entry +# +# Optional fields: +# - port: TCP port for iSCSI target service. Optional. Default: 3260. Range: 1-65535 +# - fs_type: Filesystem type. Optional. Default: "xfs" +# Allowed: xfs, ext4, ext3, ext2, nfs, nfs4, cifs, ntfs, auto +# If specified, takes PRIORITY over mount_params profile +# - mnt_opts: Mount options string. Optional +# If specified, takes PRIORITY over mount_params profile +# - dump_freq: Dump frequency. Optional. Default: "0". Allowed values: "0"-"2" +# - fsck_pass: Fsck pass number. Optional. Default: "0". Allowed values: "0"-"9" +# - mount_params: Named profile key from mount_params section. Optional +# +# Node-specific bind mounts (node_key and node_mount_point are paired): +# - node_key: Per-node subdirectory isolation variable. Optional +# Allowed values: "local_hostname", "local_ipv4", "instance_id" +# Default: "local_hostname" +# When set, node_mount_point is MANDATORY +# - node_mount_point: List of bind mount target paths. Mandatory when node_key is set +# Pattern: // -> +# +# Permissions (optional sub-object, applied via chown + chmod after mount): +# - permissions.owner: User owner (name or UID). Optional. Default: "root" +# - permissions.group: Group owner (name or GID). Optional. Default: "root" +# - permissions.mode: Octal permission string (3-4 digits). Optional. Default: "0755" # powervault_config: # # This mounts the whole powervault volume with to /mnt/slurm # # followed by bind creation of dir under /mnt/slurm # # node_key is the key in cloud-init so that its unique per host + {% set pv = storage_powervault_config | default({}, true) %} {% if pv %} powervault_config: @@ -201,12 +244,16 @@ powervault_config: iscsi_initiator: {{ pv.iscsi_initiator | default('') }} volume_id: {{ pv.volume_id | default('') }} mount_point: "/mnt/slurm" - fs_type: "xfs" - node_key: "local_hostname" - node_mount_point: - - "/var/lib/mysql" - - "/var/spool/slurm" + mount_params: "powervault_iscsi" + node_key: "local_hostname" # per_node_id,node_subdir_key + node_mount_point: # bind_paths, sub_mounts + - "/var/lib/mysql" # /mnt/slurm//var/lib/mysql + - "/var/spool/slurm" # /mnt/slurm//var/spool/slurm functional_group_prefix: ["slurm_control_node"] + permissions: + owner: "slurm" + group: "slurm" + mode: "0750" {% else %} # powervault_config: # - name: powervault1 @@ -217,23 +264,36 @@ powervault_config: # volume_id: 00c0ff4343f1f1f1001c8c4e6901000000 # # mount params # mount_point: "/mnt/slurm" -# fs_type: "xfs" -# node_key: "local_hostname" -# node_mount_point: -# - "/var/lib/mysql" -# - "/var/spool/slurm" +# mount_params: "powervault_iscsi" +# node_key: "local_hostname" # per_node_id,node_subdir_key +# node_mount_point: # bind_paths, sub_mounts +# - "/var/lib/mysql" # /mnt/slurm//var/lib/mysql +# - "/var/spool/slurm" # /mnt/slurm//var/spool/slurm # functional_group_prefix: ["slurm_control_node"] +# permissions: +# owner: "slurm" +# group: "slurm" +# mode: "0750" {% endif %} # -----------------------------Swap------------------------------------------------- # swap: Swap file configuration (list of swap configurations) -# Each swap entry contains: -# - name: Unique identifier. Required -# - filename: Path to the swap file (e.g., /swapfile). Required -# - size: Size in bytes, 'auto', or human-readable (e.g., "2G", "512M"). Required -# - maxsize: Max size (used with size: auto). Optional -# - functional_group_prefix: List of functional group prefixes. +# +# NOTE: The groups field is NOT supported for swap entries. +# Only functional_group_prefix is available for node targeting. +# +# Mandatory fields: +# - filename: Path to the swap file to create. Mandatory +# Pattern: /path/to/swapfile (absolute path) +# - size: Swap file size. Mandatory +# Values: "auto", a byte integer, or human-readable (e.g., "2G", "512M") +# - functional_group_prefix: List of oChaMI functional group name prefixes. Mandatory +# All nodes whose group name starts with any listed prefix receive this swap +# +# Optional fields: +# - maxsize: Maximum swap size. Optional. Used only when size is "auto" +# Format: byte integer or human-readable (e.g., "4G") # swap: # - name: "compute_swap" diff --git a/upgrade/roles/upgrade_openchami/tasks/migrate_database.yml b/upgrade/roles/upgrade_openchami/tasks/migrate_database.yml index 71e18922a2..09c343e316 100644 --- a/upgrade/roles/upgrade_openchami/tasks/migrate_database.yml +++ b/upgrade/roles/upgrade_openchami/tasks/migrate_database.yml @@ -143,7 +143,7 @@ - name: Verify node count preserved across migration ansible.builtin.debug: - verbosity: 1 + verbosity: 2 msg: >- Node count check — Pre-upgrade: {{ pre_upgrade_node_count | default('0') | trim }}, @@ -154,6 +154,7 @@ - name: Warn if node count decreased after migration ansible.builtin.debug: + verbosity: 2 msg: >- WARNING: Node count decreased from {{ pre_upgrade_node_count | default('0') | trim }} to {{ post_migration_node_count.stdout | default('0') | trim }}. @@ -179,11 +180,6 @@ SMD startup: {{ 'no migration failures detected' if smd_logs.stdout is not defined or ('FATAL' not in smd_logs.stdout | default('') and 'migration failed' not in (smd_logs.stdout | default('') | lower)) else 'WARNING — check podman logs smd' }} - - >- - Node count — Pre: {{ pre_upgrade_node_count | default('0') | trim }}, - Post: {{ post_migration_node_count.stdout | default('0') | trim }}{{ - ' (prepare_oim-only)' if (pre_upgrade_node_count | default('0') | trim | int) == 0 - and (post_migration_node_count.stdout | default('0') | trim | int) == 0 else '' }} - "SMD/BSS API: checked in post-upgrade health check" rescue: diff --git a/upgrade/roles/upgrade_openchami/tasks/post_upgrade_health_check.yml b/upgrade/roles/upgrade_openchami/tasks/post_upgrade_health_check.yml index df8aae5793..88c8eac4a6 100644 --- a/upgrade/roles/upgrade_openchami/tasks/post_upgrade_health_check.yml +++ b/upgrade/roles/upgrade_openchami/tasks/post_upgrade_health_check.yml @@ -151,7 +151,7 @@ - name: Compare node counts ansible.builtin.debug: - verbosity: 1 + verbosity: 2 msg: >- Node count — Pre-upgrade: {{ pre_upgrade_node_count | default('0') | trim }}, Post-upgrade: {{ post_upgrade_node_count.stdout | default('0') | trim }}. @@ -161,6 +161,7 @@ - name: Warn if node count decreased ansible.builtin.debug: + verbosity: 2 msg: >- WARNING: Node count decreased from {{ pre_upgrade_node_count | default('0') | trim }} to {{ post_upgrade_node_count.stdout | default('0') | trim }}. @@ -386,11 +387,6 @@ - "Legacy coresmd: removed" - "SMD API: reachable at {{ openchami_smd_endpoint }}" - "BSS API: reachable at {{ openchami_bss_endpoint }}" - - >- - Node count — Pre: {{ pre_upgrade_node_count | default('0') | trim }}, - Post: {{ post_upgrade_node_count.stdout | default('0') | trim }}{{ - ' (prepare_oim-only)' if (pre_upgrade_node_count | default('0') | trim | int) == 0 - and (post_upgrade_node_count.stdout | default('0') | trim | int) == 0 else '' }} - "Systemd deps: {{ 'verified' if openchami_deps.rc | default(1) == 0 else 'check skipped' }}" - "ochami CLI: {{ 'v' ~ ochami_version_check.stdout | default('unknown') | trim if ochami_version_check.rc | default(1) == 0 else 'not installed' }}" - "S3 buckets: {{ 'verified' if s3cfg_stat.stat.exists | default(false) else 'not configured (non-fatal)' }}" diff --git a/upgrade/roles/upgrade_openchami/tasks/pre_upgrade_health_check.yml b/upgrade/roles/upgrade_openchami/tasks/pre_upgrade_health_check.yml index 293bc49b3e..9ffe9e9b34 100644 --- a/upgrade/roles/upgrade_openchami/tasks/pre_upgrade_health_check.yml +++ b/upgrade/roles/upgrade_openchami/tasks/pre_upgrade_health_check.yml @@ -266,7 +266,7 @@ - name: Display baseline node count ansible.builtin.debug: - verbosity: 1 + verbosity: 2 msg: >- Pre-upgrade baseline node count: {{ pre_upgrade_node_count }}. {{ '(empty database — prepare_oim-only scenario, no nodes discovered yet)' @@ -287,7 +287,6 @@ S3/MinIO: {{ 'accessible' if (pre_s3cfg_stat.stat.exists | default(false) and 's3_unreachable' not in pre_s3_check.stdout | default('s3_unreachable')) else 'not configured (non-fatal)' }} - - "Node count: {{ pre_upgrade_node_count }}{{ ' (prepare_oim-only)' if (pre_upgrade_node_count | int) == 0 else '' }}" - "OpenCHAMI RPM: {{ current_openchami_rpm | default('not installed') }}" - "ochami CLI RPM: {{ current_ochami_rpm | default('not installed') }}" - "════════════════════════════════════════════" From 6fb344b5f5232987273bcce1626565c4743277a2 Mon Sep 17 00:00:00 2001 From: Sujit Jadhav Date: Fri, 5 Jun 2026 18:10:25 +0530 Subject: [PATCH 30/78] fix(dns): propagate dns_enabled to OIM and configure resolv.conf for coresmd (#4671) * fix(dns): propagate dns_enabled to OIM and configure resolv.conf for coresmd When dns_enabled: true in provision_config.yml, /etc/hosts was still populated on OIM and compute nodes because dns_enabled was only loaded on localhost (omnia_core container), not on the oim host where the roles actually run. Additionally, the OIM host and omnia_core container had no DNS path to coresmd for cluster hostname resolution. Changes: - Add dns_enabled to configs.yaml.j2 so it flows into configs_vars.yaml for cloud-init templates rendered on oim - Load provision_config.yml on oim host in provision.yml pre_tasks so slurm_config and other roles have dns_enabled available - Configure OIM /etc/resolv.conf to prepend coresmd as primary nameserver when dns_enabled=true (preserves existing DNS entries for internet connectivity); omnia_core inherits via Network=host - Guard /etc/hosts population in configure_oim_ssh.yml with dns_enabled - Proxy admin_nic_ip and domain_name from localhost to configure_ochami role vars so cloud-init DNS templates resolve correctly on oim - Override dns_forwarders with network_spec dns servers when configured - Include dns_enabled in upgrade configs_vars.yaml regeneration Fixes: OMN01D-2505 Signed-off-by: sujit-jadhav * fix(dns): namespace include_vars to prevent variable collision with mount_config role The include_vars of provision_config.yml on oim host was overriding pxe_mapping_file_path from mount_config/vars/main.yml due to Ansible variable precedence (include_vars level 18 > role vars level 15). Load provision_config.yml into _provision_config_oim namespace to prevent polluting the oim host variable scope. Signed-off-by: sujit-jadhav * fix(dns): configure omnia_core resolv.conf and always populate /etc/hosts - Add new play to configure omnia_core resolv.conf with CoreDNS as primary nameserver when dns_enabled is true - Remove dns_enabled guard from /etc/hosts management tasks so custom hostnames from PXE mapping are always resolvable (hybrid mode: CoreDNS for nidXXX + /etc/hosts for custom names) Signed-off-by: sujit-jadhav --------- Signed-off-by: sujit-jadhav Co-authored-by: Super User --- .../openchami/tasks/deploy_openchami.yml | 5 ++ .../openchami/templates/configs.yaml.j2 | 1 + provision/provision.yml | 77 +++++++++++++++++++ .../roles/configure_ochami/vars/main.yml | 4 + .../tasks/configure_oim_ssh.yml | 3 +- .../tasks/update_hosts.yml | 3 +- .../tasks/upgrade_openchami_containers.yml | 1 + 7 files changed, 91 insertions(+), 3 deletions(-) diff --git a/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml b/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml index bebfa01c88..b7b9b64bb2 100644 --- a/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml +++ b/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml @@ -143,6 +143,11 @@ ansible.builtin.command: nmcli device reapply {{ admin_nic }} changed_when: true +- name: Override DNS forwarders from network_spec if configured + ansible.builtin.set_fact: + dns_forwarders: "{{ network_data.admin_network.dns }}" + when: (network_data.admin_network.dns | default([])) | length > 0 + - name: Set s3_access_id and s3_secret_key ansible.builtin.set_fact: s3_access_id: "{{ hostvars['localhost']['s3_access_id'] | default('admin', true) }}" diff --git a/prepare_oim/roles/deploy_containers/openchami/templates/configs.yaml.j2 b/prepare_oim/roles/deploy_containers/openchami/templates/configs.yaml.j2 index a7ddc7be0d..3e13c46ff4 100644 --- a/prepare_oim/roles/deploy_containers/openchami/templates/configs.yaml.j2 +++ b/prepare_oim/roles/deploy_containers/openchami/templates/configs.yaml.j2 @@ -11,6 +11,7 @@ data_s3_dir: "{{ data_s3_dir }}" s3_work_dir: "{{ s3_work_dir }}" cluster_shortname: "nid" cluster_nidlength: 3 +dns_enabled: {{ dns_enabled | default(false) | bool }} {% set additional = network_data.admin_network.additional_subnets | default([]) %} {% if additional | length > 0 %} diff --git a/provision/provision.yml b/provision/provision.yml index e476c50835..53868cfb68 100644 --- a/provision/provision.yml +++ b/provision/provision.yml @@ -135,10 +135,87 @@ vars: oim_node_name: "{{ hostvars['localhost']['oim_node_name'] }}" +- name: Configure omnia_core DNS resolution for CoreDNS + hosts: localhost + connection: local + tasks: + - name: Configure omnia_core resolv.conf for CoreDNS (when dns_enabled) + when: dns_enabled | default(false) | bool + block: + - name: Prepend CoreDNS as primary nameserver in omnia_core /etc/resolv.conf + ansible.builtin.shell: | + set -o pipefail + tmpfile=$(mktemp) + + # Merge cluster domain into existing search line (preserve existing domains) + existing_search=$(grep '^search' /etc/resolv.conf 2>/dev/null | head -n1 | sed 's/^search //') + if echo " ${existing_search} " | grep -qw '{{ domain_name }}'; then + echo "search ${existing_search}" > "$tmpfile" + else + echo "search {{ domain_name }} ${existing_search}" > "$tmpfile" + fi + + # Prepend CoreDNS as primary nameserver + echo "nameserver {{ admin_nic_ip }}" >> "$tmpfile" + + # Keep existing nameservers and options (skip search lines and CoreDNS duplicate) + grep -v '^search' /etc/resolv.conf 2>/dev/null | \ + grep -Fxv "nameserver {{ admin_nic_ip }}" | \ + grep -v '^$' >> "$tmpfile" || true + + # Deduplicate while preserving order + awk '!seen[$0]++' "$tmpfile" > /etc/resolv.conf + rm -f "$tmpfile" + changed_when: true + - name: Provision nodes, configure bss and cloud-init hosts: oim connection: ssh pre_tasks: + - name: Load dns_enabled from provision_config.yml on OIM + ansible.builtin.include_vars: + file: "{{ hostvars['localhost']['input_project_dir'] }}/provision_config.yml" + name: _provision_config_oim + + - name: Configure OIM DNS resolution for CoreDNS (when dns_enabled) + when: _provision_config_oim.dns_enabled | default(false) | bool + block: + - name: Remove immutable flag from /etc/resolv.conf if present + ansible.builtin.command: chattr -i /etc/resolv.conf + changed_when: true + failed_when: false + + - name: Prepend coresmd as primary nameserver in OIM /etc/resolv.conf + ansible.builtin.shell: | + set -o pipefail + tmpfile=$(mktemp) + + # Merge cluster domain into existing search line (preserve existing domains) + existing_search=$(grep '^search' /etc/resolv.conf 2>/dev/null | head -n1 | sed 's/^search //') + if echo " ${existing_search} " | grep -qw '{{ hostvars["localhost"]["domain_name"] }}'; then + echo "search ${existing_search}" > "$tmpfile" + else + echo "search {{ hostvars['localhost']['domain_name'] }} ${existing_search}" > "$tmpfile" + fi + + # Prepend coresmd as primary nameserver + echo "nameserver {{ hostvars['localhost']['admin_nic_ip'] }}" >> "$tmpfile" + + # Keep existing nameservers and options (skip search lines and coresmd duplicate) + grep -v '^search' /etc/resolv.conf 2>/dev/null | \ + grep -Fxv "nameserver {{ hostvars['localhost']['admin_nic_ip'] }}" | \ + grep -v '^$' >> "$tmpfile" || true + + # Deduplicate while preserving order + awk '!seen[$0]++' "$tmpfile" > /etc/resolv.conf + rm -f "$tmpfile" + changed_when: true + + - name: Prevent NetworkManager from overwriting resolv.conf + ansible.builtin.command: chattr +i /etc/resolv.conf + changed_when: true + failed_when: false + - name: Provision nodes ansible.builtin.include_role: name: configure_ochami diff --git a/provision/roles/configure_ochami/vars/main.yml b/provision/roles/configure_ochami/vars/main.yml index 04de41c179..982a164460 100644 --- a/provision/roles/configure_ochami/vars/main.yml +++ b/provision/roles/configure_ochami/vars/main.yml @@ -88,6 +88,10 @@ file_mode_600: "0600" ip_timeout: 10 ip_wait_loop: 60 +# DNS-related variables (set on localhost by provision_validations, needed by cloud-init templates) +admin_nic_ip: "{{ hostvars['localhost']['admin_nic_ip'] }}" +domain_name: "{{ hostvars['localhost']['domain_name'] }}" + # Hostname lists for stack-specific SSH configs (populated by passwordless_ssh role) k8s_cluster_hostnames: "{{ hostvars['localhost']['k8s_cluster_hostnames'] | default([]) }}" slurm_cluster_hostnames: "{{ hostvars['localhost']['slurm_cluster_hostnames'] | default([]) }}" diff --git a/provision/roles/passwordless_ssh/tasks/configure_oim_ssh.yml b/provision/roles/passwordless_ssh/tasks/configure_oim_ssh.yml index 6c7c297724..8b91e7ea07 100644 --- a/provision/roles/passwordless_ssh/tasks/configure_oim_ssh.yml +++ b/provision/roles/passwordless_ssh/tasks/configure_oim_ssh.yml @@ -66,7 +66,8 @@ {% for h in omnia_hosts_map | dict2items %} {{ h.value }} {{ h.key }} {% endfor %} - when: omnia_hosts_map | default({}) | length > 0 + when: + - omnia_hosts_map | default({}) | length > 0 # - name: DEBUG configure_oim_ssh facts # ansible.builtin.debug: diff --git a/provision/roles/provision_validations/tasks/update_hosts.yml b/provision/roles/provision_validations/tasks/update_hosts.yml index 8110097cbe..3d6391413f 100644 --- a/provision/roles/provision_validations/tasks/update_hosts.yml +++ b/provision/roles/provision_validations/tasks/update_hosts.yml @@ -19,8 +19,7 @@ grep -qxF '127.0.0.1 localhost.localdomain localhost' {{ hosts_file_path }} || echo '127.0.0.1 localhost.localdomain localhost' >> {{ hosts_file_path }} changed_when: true -- name: Update OIM /etc/hosts (skipped when CoreDNS is enabled) - when: not (dns_enabled | default(false) | bool) +- name: Update /etc/hosts with PXE mapping hostnames block: - name: Remove stale entries for IPs and hostnames that are being updated ansible.builtin.shell: | diff --git a/upgrade/roles/upgrade_openchami/tasks/upgrade_openchami_containers.yml b/upgrade/roles/upgrade_openchami/tasks/upgrade_openchami_containers.yml index e2ff8bd7fa..aee7a69eae 100644 --- a/upgrade/roles/upgrade_openchami/tasks/upgrade_openchami_containers.yml +++ b/upgrade/roles/upgrade_openchami/tasks/upgrade_openchami_containers.yml @@ -283,6 +283,7 @@ coredhcp_lease_duration: "{{ default_lease_time | default('86400') }}s" cluster_shortname: "nid" cluster_nidlength: 3 + dns_enabled: {{ dns_enabled | default(false) | bool }} dest: "{{ openchami_config_vars_path }}" mode: "{{ file_permissions_644 }}" when: oim_metadata_stat.stat.exists or network_spec_stat.stat.exists From 71c13849c496b7452075cd69a83c4f44a006e364 Mon Sep 17 00:00:00 2001 From: mithileshreddy04 Date: Fri, 5 Jun 2026 18:21:21 +0530 Subject: [PATCH 31/78] Update omnia_config.j2 Signed-off-by: mithileshreddy04 --- upgrade/roles/import_input_parameters/templates/omnia_config.j2 | 2 -- 1 file changed, 2 deletions(-) diff --git a/upgrade/roles/import_input_parameters/templates/omnia_config.j2 b/upgrade/roles/import_input_parameters/templates/omnia_config.j2 index 35afbe5671..b1bc6048ad 100644 --- a/upgrade/roles/import_input_parameters/templates/omnia_config.j2 +++ b/upgrade/roles/import_input_parameters/templates/omnia_config.j2 @@ -158,8 +158,6 @@ slurm_cluster: {{ _conf_name }}: {{ _conf_val }} {% endif %} {% endfor %} -{% else %} - {% endif %} {% endfor %} {% endif %} From a498c7debff10616428359f3f5f613612b7c4d03 Mon Sep 17 00:00:00 2001 From: Jagadeesh N V <39791839+jagadeeshnv@users.noreply.github.com> Date: Fri, 5 Jun 2026 20:22:46 +0530 Subject: [PATCH 32/78] Update slurm_backup.yml Signed-off-by: Jagadeesh N V <39791839+jagadeeshnv@users.noreply.github.com> --- upgrade/roles/upgrade_slurm/tasks/slurm_backup.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/upgrade/roles/upgrade_slurm/tasks/slurm_backup.yml b/upgrade/roles/upgrade_slurm/tasks/slurm_backup.yml index dd841a54eb..6e13c24f98 100644 --- a/upgrade/roles/upgrade_slurm/tasks/slurm_backup.yml +++ b/upgrade/roles/upgrade_slurm/tasks/slurm_backup.yml @@ -127,3 +127,11 @@ ansible.builtin.fail: msg: "SLURM: MySQL datadir not found in nfs" when: not is_mysql_datadir + +- name: Remove hpc_tools tracking files if present + ansible.builtin.file: + path: "{{ slurm_nfs_mounted_path }}{{ item }}" + state: absent + force: true + loop: "{{ hpc_tools_tracking_files }}" + failed_when: false From 789891542cf6f0d45f116ad701f22c6c61a370f6 Mon Sep 17 00:00:00 2001 From: Jagadeesh N V <39791839+jagadeeshnv@users.noreply.github.com> Date: Fri, 5 Jun 2026 20:23:34 +0530 Subject: [PATCH 33/78] Update main.yml Signed-off-by: Jagadeesh N V <39791839+jagadeeshnv@users.noreply.github.com> --- upgrade/roles/upgrade_slurm/vars/main.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/upgrade/roles/upgrade_slurm/vars/main.yml b/upgrade/roles/upgrade_slurm/vars/main.yml index 1086db92fd..61a359b244 100644 --- a/upgrade/roles/upgrade_slurm/vars/main.yml +++ b/upgrade/roles/upgrade_slurm/vars/main.yml @@ -20,3 +20,6 @@ oim_metadata_path: "/opt/omnia/.data/oim_metadata.yml" backup_slurm_nfs_contents: true # To create a copy of all slurm contents on the nfs share input_project_dir: "{{ hostvars['localhost']['input_project_dir'] | default('/opt/omnia/input/project_default') }}" oim_nfs_fail_msg: "Failed to mount NFS on oim" +hpc_tools_tracking_files: + - /hpc_tools/.done_cuda + - /hpc_tools/cuda/bin/nvcc From c240b49cdef8f467c1c1f770355ea3bcfef79a94 Mon Sep 17 00:00:00 2001 From: Venu-p1 <236371043+Venu-p1@users.noreply.github.com> Date: Fri, 5 Jun 2026 22:35:22 +0530 Subject: [PATCH 34/78] Remove duplicate NFS cleanup prompt and duplicate credential utility import from upgrade workflow (#4694) Signed-off-by: venu <236371043+Venu-p1@users.noreply.github.com> --- .../tasks/fresh_install.yml | 7 ---- upgrade/upgrade.yml | 38 ------------------- 2 files changed, 45 deletions(-) diff --git a/upgrade/roles/upgrade_build_stream/tasks/fresh_install.yml b/upgrade/roles/upgrade_build_stream/tasks/fresh_install.yml index 6f10defd45..c284fbe029 100644 --- a/upgrade/roles/upgrade_build_stream/tasks/fresh_install.yml +++ b/upgrade/roles/upgrade_build_stream/tasks/fresh_install.yml @@ -24,13 +24,6 @@ - "BuildStream was NOT enabled in 2.1 — performing fresh installation" - "This will deploy Postgres, BuildStream, watcher service, and automation" -- name: Prompt for NFS cleanup confirmation - ansible.builtin.pause: - prompt: "Have you cleaned/backed up the NFS share? Press Enter to continue or Ctrl+C to abort" - when: hostvars['localhost']['skip_approval'] is not defined or not hostvars['localhost']['skip_approval'] | bool - delegate_to: localhost - connection: local - # ══════════════════════════════════════════════════════════════════ # Step 1: Deploy Postgres via prepare_oim role # (handles: stop existing, image pull, quadlet, start, DB init) diff --git a/upgrade/upgrade.yml b/upgrade/upgrade.yml index 7d15568575..c97c234bad 100644 --- a/upgrade/upgrade.yml +++ b/upgrade/upgrade.yml @@ -288,44 +288,6 @@ vars: oim_group: true -# ────────────────────────────────────────────────────────────────────── -# Load credentials via credential utility (for build_stream and gitlab) -# Set upgrade_mode to bypass upgrade_checkup guard within credential utility -# Set dynamic run tags based on enable_build_stream flag -# ────────────────────────────────────────────────────────────────────── -- name: Set upgrade mode flag and dynamic tags for credential utility - hosts: localhost - connection: local - gather_facts: false - tags: always - tasks: - - name: Read build_stream_config.yml to check enable_build_stream flag - ansible.builtin.include_vars: - file: "{{ input_project_dir }}/build_stream_config.yml" - name: _bs_config_check - failed_when: false - - - name: Enable upgrade_mode to bypass credential utility guard - ansible.builtin.set_fact: - upgrade_mode: true - cacheable: true - - - name: Set dynamic run tags including build_stream and gitlab - ansible.builtin.set_fact: - omnia_run_tags: >- - {{ - ( - ansible_run_tags | default([]) | list + - ['build_stream', 'gitlab'] - ) | unique - }} - cacheable: true - when: _bs_config_check.enable_build_stream | default(false) | bool - -- name: Load Omnia credential utility - ansible.builtin.import_playbook: ../utils/credential_utility/get_config_credentials.yml - tags: [build_stream, gitlab] - # ────────────────────────────────────────────────────────────────────── # BuildStream Terminal Gate (C-24): Read build_stream_config.yml and # set build_stream_terminal fact. When enable_build_stream=true AND From 69ceb7c924b11a9147e5aa9ac3fe4d5d064f486f Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Sat, 6 Jun 2026 08:21:02 +0530 Subject: [PATCH 35/78] kernel version update support (#4692) Signed-off-by: Abhishek S A --- .../common_utils/en_us_validation_msg.py | 5 ++ .../schema/provision_config.json | 6 +++ .../validation_flows/provision_validation.py | 12 +++++ input/provision_config.yml | 6 +++ .../tasks/configure_bss_group.yml | 10 +++- .../tasks/provision_mapping_nodes.yml | 49 ++++++++++++++----- .../tasks/validate_image.yml | 14 ++++++ .../roles/provision_validations/vars/main.yml | 6 +++ .../tasks/transform_provision_config.yml | 2 + .../templates/provision_config.j2 | 6 +++ .../import_input_parameters/vars/main.yml | 2 + 11 files changed, 103 insertions(+), 15 deletions(-) diff --git a/common/library/module_utils/input_validation/common_utils/en_us_validation_msg.py b/common/library/module_utils/input_validation/common_utils/en_us_validation_msg.py index f17d9c568c..305e743809 100644 --- a/common/library/module_utils/input_validation/common_utils/en_us_validation_msg.py +++ b/common/library/module_utils/input_validation/common_utils/en_us_validation_msg.py @@ -186,6 +186,11 @@ ENABLE_SWITCH_BASED_FAIL_MSG = "enable_switch_based must be set to either true or false." LANGUAGE_FAIL_MSG = "Only en_US.UTF-8 language supported" LANGUAGE_EMPTY_MSG = "Language setting cannot be empty" +KERNEL_VERSION_OVERRIDE_FAIL_MSG = ( + "kernel_version_override must be either empty or a valid kernel version " + "string (e.g. '6.12.0-55.76.1.el10_0.x86_64'). " + "The format must be: ..-." +) PUBLIC_NIC_FAIL_MSG = "public_nic is empty. Please provide a public_nic value." PXE_MAPPING_FILE_PATH_FAIL_MSG = ( "File path is invalid. Please ensure the file path specified in " diff --git a/common/library/module_utils/input_validation/schema/provision_config.json b/common/library/module_utils/input_validation/schema/provision_config.json index 0f154d8870..bf313a9a94 100644 --- a/common/library/module_utils/input_validation/schema/provision_config.json +++ b/common/library/module_utils/input_validation/schema/provision_config.json @@ -21,6 +21,12 @@ "type": "boolean", "description": "Enable DNS-based hostname resolution via coresmd.", "default": false + }, + "kernel_version_override": { + "type": "string", + "description": "Optional kernel version to pin for boot image selection. Leave empty to auto-select latest.", + "pattern": "^(|[0-9]+\\.[0-9]+\\.[0-9]+-.+)$", + "default": "" } }, "required": [ diff --git a/common/library/module_utils/input_validation/validation_flows/provision_validation.py b/common/library/module_utils/input_validation/validation_flows/provision_validation.py index 6b7fdabcef..1a894f4749 100644 --- a/common/library/module_utils/input_validation/validation_flows/provision_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/provision_validation.py @@ -1111,6 +1111,18 @@ def validate_provision_config( en_us_validation_msg.DEFAULT_LEASE_TIME_FAIL_MSG, ) ) + + kernel_version_override = data.get("kernel_version_override", "") + if kernel_version_override: + if not re.match(r"^[0-9]+\.[0-9]+\.[0-9]+-.+$", kernel_version_override): + errors.append( + create_error_msg( + "kernel_version_override", + kernel_version_override, + en_us_validation_msg.KERNEL_VERSION_OVERRIDE_FAIL_MSG, + ) + ) + return errors def validate_network_spec( diff --git a/input/provision_config.yml b/input/provision_config.yml index 14b946ad8a..91184df878 100644 --- a/input/provision_config.yml +++ b/input/provision_config.yml @@ -46,3 +46,9 @@ default_lease_time: "86400" # The cluster domain is read from OIM metadata (domain_name). # Default: false dns_enabled: false + +#### Optional +# Pin a specific kernel version for boot image selection. +# Leave empty ("") to auto-select the latest available image from S3. +# Example: kernel_version_override: "6.12.0-55.76.1.el10_0.x86_64" +kernel_version_override: "" diff --git a/provision/roles/configure_ochami/tasks/configure_bss_group.yml b/provision/roles/configure_ochami/tasks/configure_bss_group.yml index 5e1a19d904..6453fb4848 100644 --- a/provision/roles/configure_ochami/tasks/configure_bss_group.yml +++ b/provision/roles/configure_ochami/tasks/configure_bss_group.yml @@ -36,7 +36,10 @@ set -o pipefail && \ s3cmd ls -Hr s3://boot-images | \ grep "{{ image_search_pattern }}{{ compute_image_suffix }}" | \ - grep {{ hostvars['localhost']['cluster_os_version'] }} | awk '{print $4}' | sed 's|s3://||' + {% if hostvars['localhost']['kernel_version_override'] | default('') | length > 0 %} + grep "{{ hostvars['localhost']['kernel_version_override'] }}" | \ + {% endif %} + grep {{ hostvars['localhost']['cluster_os_version'] }} | sort -k1,2r | awk '{print $4}' | sed 's|s3://||' changed_when: false failed_when: false register: verify_s3_image_build_stream @@ -49,7 +52,10 @@ set -o pipefail && \ s3cmd ls -Hr s3://boot-images | \ grep "{{ image_search_pattern }}" | \ - grep {{ hostvars['localhost']['cluster_os_version'] }} | awk '{print $4}' | sed 's|s3://||' + {% if hostvars['localhost']['kernel_version_override'] | default('') | length > 0 %} + grep "{{ hostvars['localhost']['kernel_version_override'] }}" | \ + {% endif %} + grep {{ hostvars['localhost']['cluster_os_version'] }} | sort -k1,2r | awk '{print $4}' | sed 's|s3://||' changed_when: false failed_when: false register: verify_s3_image diff --git a/provision/roles/configure_ochami/tasks/provision_mapping_nodes.yml b/provision/roles/configure_ochami/tasks/provision_mapping_nodes.yml index e8180ffb61..101c3b9d3d 100644 --- a/provision/roles/configure_ochami/tasks/provision_mapping_nodes.yml +++ b/provision/roles/configure_ochami/tasks/provision_mapping_nodes.yml @@ -77,20 +77,43 @@ delay: "{{ service_retry_interval }}" until: cloud_init_server_status is success - - name: Wait for cloud-init-server to be ready - ansible.builtin.command: /usr/bin/ochami cloud-init service status - register: cloud_init_status - retries: "{{ service_retries }}" - delay: "{{ service_retry_interval }}" - until: cloud_init_status.rc == 0 - failed_when: false - changed_when: false + - name: Verify cloud-init-server is reachable + block: + - name: Wait for cloud-init-server to be ready + ansible.builtin.command: /usr/bin/ochami cloud-init service status + register: cloud_init_status + retries: "{{ service_retries }}" + delay: "{{ service_retry_interval }}" + until: cloud_init_status.rc == 0 + changed_when: false + rescue: + - name: Restart openchami.target to recover container networking + ansible.builtin.service: + name: openchami.target + state: restarted - - name: Fail if cloud-init-server is not running - ansible.builtin.fail: - msg: "{{ cloud_init_failed_msg }}" - when: - - cloud_init_status.rc != 0 + - name: Wait for openchami.target to be ready after restart + ansible.builtin.service: + name: openchami.target + state: started + register: openchami_target_recovery + retries: "{{ service_retries }}" + delay: "{{ service_retry_interval }}" + until: openchami_target_recovery is success + + - name: Retry cloud-init-server readiness check after openchami restart + ansible.builtin.command: /usr/bin/ochami cloud-init service status + register: cloud_init_status + retries: "{{ service_retries }}" + delay: "{{ service_retry_interval }}" + until: cloud_init_status.rc == 0 + failed_when: false + changed_when: false + + - name: Fail if cloud-init-server is still not reachable + ansible.builtin.fail: + msg: "{{ cloud_init_failed_msg }}" + when: cloud_init_status.rc != 0 - name: Check whether openchami.target is up ansible.builtin.service: diff --git a/provision/roles/provision_validations/tasks/validate_image.yml b/provision/roles/provision_validations/tasks/validate_image.yml index 1bda498a73..8745ec6a06 100644 --- a/provision/roles/provision_validations/tasks/validate_image.yml +++ b/provision/roles/provision_validations/tasks/validate_image.yml @@ -53,6 +53,9 @@ set -o pipefail && \ s3cmd ls -Hr s3://boot-images | \ grep "{{ image_search_pattern }}" | \ + {% if hostvars['localhost']['kernel_version_override'] | default('') | length > 0 %} + grep "{{ hostvars['localhost']['kernel_version_override'] }}" | \ + {% endif %} grep {{ hostvars['localhost']['cluster_os_version'] }} | awk '{print $4}' | sed 's|s3://||' changed_when: false failed_when: false @@ -74,6 +77,17 @@ initrd: "{{ verify_s3_image.stdout_lines | select('search', 'initramfs') | list | first }}" when: verify_s3_image.stdout_lines | length > 1 +- name: Fail if kernel override did not match any S3 image + ansible.builtin.fail: + msg: >- + {{ kernel_override_no_match_msg }} + Specified: '{{ hostvars['localhost']['kernel_version_override'] }}'. + Pattern: '{{ image_search_pattern }}'. + Verify the kernel version exists in s3://boot-images. + when: + - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 + - kernel | length < 1 or initrd | length < 1 + - name: Fail if kernel or initrd length less than 1 ansible.builtin.fail: msg: "{{ image_missing_fail_msg.splitlines() | join(' ') }}" diff --git a/provision/roles/provision_validations/vars/main.yml b/provision/roles/provision_validations/vars/main.yml index 65aedf8f56..499f52ff2c 100644 --- a/provision/roles/provision_validations/vars/main.yml +++ b/provision/roles/provision_validations/vars/main.yml @@ -62,6 +62,12 @@ image_missing_fail_msg: | Please create the image by running the corresponding build playbook (build_image_x86_64.yml for x86_64 or build_image_aarch64.yml for aarch64) and re-run the provision.yml playbook. +kernel_override_no_match_msg: >- + Error: kernel_version_override did not match any image in S3 + for functional group '{{ functional_group_name }}'. + Either set kernel_version_override to "" in provision_config.yml to auto-select the latest, + or provide a valid kernel version that exists in s3://boot-images. + # Usage: validate_telemetry.yml warning_idrac_telemetry_support_false: | "[WARNING] telemetry_sources.idrac.metrics_enabled is set to false in telemetry_config.yml. This means iDRAC telemetry will not be activated. diff --git a/upgrade/roles/import_input_parameters/tasks/transform_provision_config.yml b/upgrade/roles/import_input_parameters/tasks/transform_provision_config.yml index d39476241a..a6fae3e3b8 100644 --- a/upgrade/roles/import_input_parameters/tasks/transform_provision_config.yml +++ b/upgrade/roles/import_input_parameters/tasks/transform_provision_config.yml @@ -47,6 +47,7 @@ provision_pxe_mapping_file_path: "{{ backup_provision_config.pxe_mapping_file_path | default(provision_default_pxe_mapping_file_path) }}" provision_language: "{{ backup_provision_config.language | default(provision_default_language) }}" provision_default_lease_time: "{{ backup_provision_config.default_lease_time | default(provision_default_lease_time) }}" + provision_kernel_version_override: "{{ backup_provision_config.kernel_version_override | default(provision_default_kernel_version_override) }}" - name: Fail if pxe_mapping_file_path is missing ansible.builtin.fail: @@ -62,6 +63,7 @@ provision_pxe_mapping_file_path: "{{ provision_pxe_mapping_file_path }}" provision_language: "{{ provision_language }}" provision_default_lease_time: "{{ provision_default_lease_time }}" + provision_kernel_version_override: "{{ provision_kernel_version_override }}" - name: Validate YAML syntax of transformed provision_config.yml ansible.builtin.command: diff --git a/upgrade/roles/import_input_parameters/templates/provision_config.j2 b/upgrade/roles/import_input_parameters/templates/provision_config.j2 index ae0257216f..004f8e1359 100644 --- a/upgrade/roles/import_input_parameters/templates/provision_config.j2 +++ b/upgrade/roles/import_input_parameters/templates/provision_config.j2 @@ -46,3 +46,9 @@ default_lease_time: "{{ provision_default_lease_time }}" # The cluster domain is read from OIM metadata (domain_name). # Default: false dns_enabled: false + +#### Optional +# Pin a specific kernel version for boot image selection. +# Leave empty ("") to auto-select the latest available image from S3. +# Example: kernel_version_override: "6.12.0-55.76.1.el10_0.x86_64" +kernel_version_override: "{{ provision_kernel_version_override }}" diff --git a/upgrade/roles/import_input_parameters/vars/main.yml b/upgrade/roles/import_input_parameters/vars/main.yml index 4423147336..4f20a0d8ed 100644 --- a/upgrade/roles/import_input_parameters/vars/main.yml +++ b/upgrade/roles/import_input_parameters/vars/main.yml @@ -284,6 +284,7 @@ msg_provision_config_transform_summary: | Backup preserved at: {{ backup_location }}/provision_config.yml Changes: - Ensured pxe_mapping_file_path, language, and default_lease_time are present + - Added kernel_version_override (default: empty, auto-selects latest kernel) # Restore summary message for storage config transformation msg_storage_config_transform_summary: | @@ -387,6 +388,7 @@ gitlab_default_sidekiq_concurrency: 10 provision_default_pxe_mapping_file_path: "pxe_mapping_file.csv" provision_default_language: "en_US.UTF-8" provision_default_lease_time: "86400" +provision_default_kernel_version_override: "" # Network Config Defaults network_default_netmask_bits: "24" From 0cbfd24ccb1ccce36301d9d461e4466c8498db86 Mon Sep 17 00:00:00 2001 From: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> Date: Sat, 6 Jun 2026 09:54:06 +0530 Subject: [PATCH 36/78] Enhance K8s upgrade/rollback with validation and cleanup improvements (#4690) * Add VIP validation against PXE mapping file Add new validation module to check VIP conflicts with ADMIN_IPs from PXE mapping file and ensure all ADMIN_IPs are in the same subnet as VIP. Signed-off-by: Katakam-Rakesh * Improve VIP conflict error messages with specific source Signed-off-by: Katakam-Rakesh * Add cluster node validation for upgrade Signed-off-by: Katakam-Rakesh * Fix rollback playbook role paths using playbook_dir Signed-off-by: Katakam-Rakesh * Enhance rollback with cleanup and improved node inventory Signed-off-by: Katakam-Rakesh * Fix upgrade playbook role paths and debug messages Signed-off-by: Katakam-Rakesh * Remove duplicate NFS provisioner cleanup task Signed-off-by: Katakam-Rakesh * Fix pylint issues in vip_pxe_validation module Signed-off-by: Katakam-Rakesh * Fix function name: use validate_ipv4 instead of is_valid_ipv4 Signed-off-by: Katakam-Rakesh * Fix ansible-lint issues: trailing spaces, line length, pipefail Signed-off-by: Katakam-Rakesh * Remove hardcoded input_project_dir from cluster validation play Signed-off-by: Katakam-Rakesh * Fix cluster node validation for IP-named nodes - Cluster nodes are named by IP addresses (e.g., 10.60.0.101) - nodes.yaml has hostname -> IP mapping (e.g., kcp1 -> 10.60.0.102) - Compare nodes_yaml IPs against cluster node names (which are IPs) - Simplified kubectl query (removed InternalIP since node name IS the IP) - Added IP to hostname reverse mapping for display - Improved validation output to show IP mappings clearly Signed-off-by: Katakam-Rakesh * Fix display banners to use multi-line string format Signed-off-by: Katakam-Rakesh * Improve display banner formatting for SSH, Ready, and Pod health checks Signed-off-by: Katakam-Rakesh * Remove duplicate nodes.yaml reading from load_status.yml - load_status.yml now uses validated nodes from validate_cluster_nodes.yml - Removed redundant nodes.yaml reading that was overwriting validated nodes - Added assertion to ensure cluster validation was completed - Variables used: groups_cp_first, groups_cp, groups_worker, all_upgrade_nodes, node_ips Signed-off-by: Katakam-Rakesh * Fix load_status.yml to run validation if nodes not available Signed-off-by: Katakam-Rakesh * Fix display banner formatting issues Signed-off-by: Katakam-Rakesh --------- Signed-off-by: Katakam-Rakesh --- .../high_availability_validation.py | 50 +- .../validation_flows/vip_pxe_validation.py | 122 ++++ rollback/playbooks/rollback_k8s.yml | 2 +- rollback/playbooks/rollback_oim.yml | 2 +- rollback/playbooks/rollback_slurm.yml | 2 +- .../cleanup_stale_volume_attachments.yml | 1 + .../tasks/load_rollback_status.yml | 155 ++++- rollback/roles/rollback_k8s/tasks/main.yml | 3 + .../tasks/restore_bss_cloud_init.yml | 2 +- rollback/rollback.yml | 17 +- upgrade/playbooks/upgrade_k8s.yml | 183 +++--- upgrade/playbooks/upgrade_oim.yml | 2 +- upgrade/playbooks/upgrade_provision.yml | 4 +- upgrade/playbooks/upgrade_slurm.yml | 2 +- upgrade/playbooks/upgrade_telemetry.yml | 2 +- .../roles/upgrade_k8s/tasks/load_status.yml | 62 +- .../tasks/validate_cluster_nodes.yml | 600 ++++++++++++++++++ upgrade/roles/upgrade_k8s/vars/main.yml | 14 + upgrade/upgrade.yml | 17 +- 19 files changed, 1040 insertions(+), 202 deletions(-) create mode 100644 common/library/module_utils/input_validation/validation_flows/vip_pxe_validation.py create mode 100644 upgrade/roles/upgrade_k8s/tasks/validate_cluster_nodes.yml diff --git a/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py b/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py index 0d1c571e92..d4ce3f0a32 100644 --- a/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py @@ -21,6 +21,10 @@ from ansible.module_utils.input_validation.common_utils import validation_utils from ansible.module_utils.input_validation.common_utils import config from ansible.module_utils.input_validation.common_utils import en_us_validation_msg +from ansible.module_utils.input_validation.validation_flows.vip_pxe_validation import ( + validate_vip_vs_pxe_mapping_host_ips, + validate_all_host_ips_same_subnet_as_vip +) file_names = config.files create_error_msg = validation_utils.create_error_msg @@ -291,11 +295,12 @@ def validate_vip_address( admin_network, pod_external_ip_list, admin_netmaskbits, - oim_admin_ip + oim_admin_ip, + pxe_mapping_file_path=None ): """ Validate a virtual IP address against a list of existing service node VIPs, - admin network static and dynamic ranges, and admin subnet. + admin network static and dynamic ranges, admin subnet, and PXE mapping file. Parameters: - errors (list): A list to store error messages. @@ -305,6 +310,7 @@ def validate_vip_address( - admin_network (dict): A dictionary containing admin network configuration. - admin_netmaskbits (str): The netmask bits value of the admin network. - oim_admin_ip (str): The IP address of the OIM admin interface. + - pxe_mapping_file_path (str, optional): Path to PXE mapping file for additional validation. Returns: - None: The function does not return any value, it only appends @@ -359,6 +365,14 @@ def validate_vip_address( ) ) + # Validate VIP against PXE mapping file (if provided) + if pxe_mapping_file_path: + # Check VIP doesn't conflict with any HOST_IP in PXE mapping + validate_vip_vs_pxe_mapping_host_ips(errors, config_type, vip_address, pxe_mapping_file_path) + + # Check all HOST_IPs are in same subnet as VIP + validate_all_host_ips_same_subnet_as_vip(errors, vip_address, pxe_mapping_file_path, admin_netmaskbits) + def validate_service_k8s_cluster_ha( errors, config_type, @@ -414,13 +428,28 @@ def validate_service_k8s_cluster_ha( vip_address = hdata.get("virtual_ip_address") # Find the intersection if vip_address: - for ip_list in (ha_node_vip_list, pxe_admin_ips, pxe_bmc_ips): - if vip_address in ip_list: - errors.append( - create_error_msg( - f"{config_type} virtual_ip_duplicate", - vip_address, - en_us_validation_msg.DUPLICATE_VIRTUAL_IP)) + if vip_address in ha_node_vip_list: + errors.append( + create_error_msg( + f"{config_type} virtual_ip_duplicate", + vip_address, + en_us_validation_msg.DUPLICATE_VIRTUAL_IP)) + if vip_address in pxe_admin_ips: + errors.append( + create_error_msg( + f"{config_type} virtual_ip_duplicate", + vip_address, + f"virtual_ip_address '{vip_address}' conflicts with an ADMIN_IP " + "in pxe_mapping_file.csv. The VIP must not match any node's " + "ADMIN_IP. Please use a different virtual IP address.")) + if vip_address in pxe_bmc_ips: + errors.append( + create_error_msg( + f"{config_type} virtual_ip_duplicate", + vip_address, + f"virtual_ip_address '{vip_address}' conflicts with a BMC_IP " + "in pxe_mapping_file.csv. The VIP must not match any node's " + "BMC_IP. Please use a different virtual IP address.")) validate_vip_address( errors, config_type, @@ -428,7 +457,8 @@ def validate_service_k8s_cluster_ha( admin_network, pod_external_ip_list, admin_netmaskbits, - oim_admin_ip + oim_admin_ip, + prov_cfg.get('pxe_mapping_file_path') ) diff --git a/common/library/module_utils/input_validation/validation_flows/vip_pxe_validation.py b/common/library/module_utils/input_validation/validation_flows/vip_pxe_validation.py new file mode 100644 index 0000000000..8866447423 --- /dev/null +++ b/common/library/module_utils/input_validation/validation_flows/vip_pxe_validation.py @@ -0,0 +1,122 @@ +# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=too-many-arguments,import-error,no-name-in-module +# pylint: disable=too-many-locals,too-many-positional-arguments +""" +This module contains functions for validating VIP against PXE mapping file. +""" +import csv +import os +from ansible.module_utils.input_validation.common_utils import validation_utils + +create_error_msg = validation_utils.create_error_msg + + +def extract_host_ips_from_pxe_mapping(pxe_mapping_file_path): + """ + Extract all ADMIN_IP values from PXE mapping file. + + Parameters: + pxe_mapping_file_path (str): Path to the PXE mapping file + + Returns: + list: List of ADMIN_IP values (node admin IPs) + """ + host_ips = [] + + if not pxe_mapping_file_path or not os.path.isfile(pxe_mapping_file_path): + return host_ips + + try: + with open(pxe_mapping_file_path, "r", encoding="utf-8") as fh: + raw_lines = fh.readlines() + + non_comment_lines = [ + ln for ln in raw_lines + if ln.strip() and not ln.strip().startswith('#') + ] + reader = csv.DictReader(non_comment_lines) + + fieldname_map = {fn.strip().upper(): fn for fn in reader.fieldnames} + admin_ip_col = fieldname_map.get("ADMIN_IP") + + if admin_ip_col: + for row in reader: + admin_ip_value = row.get(admin_ip_col, "").strip() \ + if row.get(admin_ip_col) else "" + if admin_ip_value and \ + validation_utils.validate_ipv4(admin_ip_value): + host_ips.append(admin_ip_value) + + except (OSError, csv.Error): + # If file can't be read, return empty list + pass + + return host_ips + + +def validate_vip_vs_pxe_mapping_host_ips( + errors, config_type, vip_address, pxe_mapping_file_path): + """ + Validate that VIP doesn't conflict with any ADMIN_IP in PXE mapping file. + + Parameters: + errors (list): List to append error messages + config_type (str): Configuration type for error reporting + vip_address (str): VIP address to validate + pxe_mapping_file_path (str): Path to PXE mapping file + """ + host_ips = extract_host_ips_from_pxe_mapping(pxe_mapping_file_path) + + for host_ip in host_ips: + if vip_address == host_ip: + errors.append( + create_error_msg( + f"{config_type} virtual_ip_address", + vip_address, + "VIP cannot be the same as any ADMIN_IP in PXE " + f"mapping file. VIP {vip_address} conflicts with " + f"node ADMIN_IP {host_ip}. " + "Please use a different VIP address." + ) + ) + break # Only need to report once + + +def validate_all_host_ips_same_subnet_as_vip( + errors, vip_address, pxe_mapping_file_path, admin_netmaskbits): + """ + Validate that all ADMIN_IPs in PXE mapping are in same subnet as VIP. + + Parameters: + errors (list): List to append error messages + vip_address (str): VIP address to validate against + pxe_mapping_file_path (str): Path to PXE mapping file + admin_netmaskbits (str): Netmask bits for subnet validation + """ + host_ips = extract_host_ips_from_pxe_mapping(pxe_mapping_file_path) + + for host_ip in host_ips: + if not validation_utils.is_ip_in_subnet( + vip_address, admin_netmaskbits, host_ip): + errors.append( + create_error_msg( + "ADMIN_IP subnet consistency", + host_ip, + f"Node ADMIN_IP {host_ip} must be in the same " + f"subnet as VIP {vip_address}. " + "Please ensure all ADMIN_IPs in PXE mapping file " + "are in the same subnet as the VIP." + ) + ) \ No newline at end of file diff --git a/rollback/playbooks/rollback_k8s.yml b/rollback/playbooks/rollback_k8s.yml index 2833803cfa..2708c9b658 100644 --- a/rollback/playbooks/rollback_k8s.yml +++ b/rollback/playbooks/rollback_k8s.yml @@ -143,7 +143,7 @@ block: - name: Rollback Kubernetes cluster ansible.builtin.include_role: - name: rollback_k8s + name: "{{ playbook_dir }}/../roles/rollback_k8s" - name: Display K8s rollback outcome ansible.builtin.debug: diff --git a/rollback/playbooks/rollback_oim.yml b/rollback/playbooks/rollback_oim.yml index ebb9ecc6c6..05eb34d68f 100644 --- a/rollback/playbooks/rollback_oim.yml +++ b/rollback/playbooks/rollback_oim.yml @@ -71,7 +71,7 @@ block: - name: Rollback OpenCHAMI containers and services ansible.builtin.include_role: - name: ../roles/rollback_openchami + name: "{{ playbook_dir }}/../roles/rollback_openchami" - name: Display OpenCHAMI rollback outcome ansible.builtin.debug: diff --git a/rollback/playbooks/rollback_slurm.yml b/rollback/playbooks/rollback_slurm.yml index c25e43383c..8d0bc9374c 100644 --- a/rollback/playbooks/rollback_slurm.yml +++ b/rollback/playbooks/rollback_slurm.yml @@ -262,7 +262,7 @@ - name: Include slurm rollback role ansible.builtin.include_role: - name: ../roles/rollback_slurm + name: "{{ playbook_dir }}/../roles/rollback_slurm" tags: slurm - name: Reboot Slurm nodes and validate services diff --git a/rollback/roles/rollback_k8s/tasks/cleanup_stale_volume_attachments.yml b/rollback/roles/rollback_k8s/tasks/cleanup_stale_volume_attachments.yml index bae7d378cf..f1dab88382 100644 --- a/rollback/roles/rollback_k8s/tasks/cleanup_stale_volume_attachments.yml +++ b/rollback/roles/rollback_k8s/tasks/cleanup_stale_volume_attachments.yml @@ -235,6 +235,7 @@ until: _csi_pods_check.stdout | trim | length == 0 when: "'csi-isilon.dellemc.com' in (_csi_drivers.stdout | default(''))" + # ── Verify NFS client provisioner pod is Running ────────────── - name: Check if NFS client provisioner exists ansible.builtin.command: diff --git a/rollback/roles/rollback_k8s/tasks/load_rollback_status.yml b/rollback/roles/rollback_k8s/tasks/load_rollback_status.yml index d0291d0102..aef3411564 100644 --- a/rollback/roles/rollback_k8s/tasks/load_rollback_status.yml +++ b/rollback/roles/rollback_k8s/tasks/load_rollback_status.yml @@ -22,38 +22,46 @@ ansible_ssh_common_args: "-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" groups: kube_vip_group -# ── Read nodes.yaml for inventory ────────────────────────────────── -- name: Read nodes.yaml - ansible.builtin.slurp: - src: "{{ nodes_yaml_path }}" - register: nodes_slurp - changed_when: false - -- name: Parse nodes.yaml - ansible.builtin.set_fact: - parsed_nodes: "{{ nodes_slurp.content | b64decode | from_yaml }}" +# ══════════════════════════════════════════════════════════════════════════════ +# Build node inventory from upgrade_status.nodes (nodes that were actually upgraded) +# This ensures rollback targets the same nodes that were part of the upgrade +# ══════════════════════════════════════════════════════════════════════════════ + +- name: Display upgrade status nodes info + ansible.builtin.debug: + msg: "{{ nodes_info_banner }}" + vars: + nodes_info_banner: + - "════════════════════════════════════════════════════════════════════════════════" + - "[ROLLBACK] BUILDING NODE INVENTORY FROM UPGRADE STATUS" + - "════════════════════════════════════════════════════════════════════════════════" + - "" + - "Using nodes from upgrade_status.yml (nodes that were part of the upgrade)" + - "Total nodes in upgrade status: {{ upgrade_status.nodes | length }}" + - "" + - "════════════════════════════════════════════════════════════════════════════════" -# ── Build node lists by role ──────────────────────────────────────── -- name: Build first control plane list +# ── Build node lists from upgrade_status.nodes ─────────────────────── +- name: Build first control plane list from upgrade_status ansible.builtin.set_fact: groups_cp_first: >- - {{ parsed_nodes.nodes - | selectattr('group', 'equalto', group_cp_first) - | map(attribute='name') | list }} + {{ upgrade_status.nodes | dict2items + | selectattr('value.role', 'equalto', 'control_plane_first') + | map(attribute='key') | list }} -- name: Build additional control plane list +- name: Build additional control plane list from upgrade_status ansible.builtin.set_fact: groups_cp: >- - {{ parsed_nodes.nodes - | selectattr('group', 'equalto', group_cp) - | map(attribute='name') | list }} + {{ upgrade_status.nodes | dict2items + | selectattr('value.role', 'equalto', 'control_plane') + | map(attribute='key') | list }} -- name: Build worker list +- name: Build worker list from upgrade_status ansible.builtin.set_fact: groups_worker: >- - {{ parsed_nodes.nodes - | selectattr('group', 'equalto', group_worker) - | map(attribute='name') | list }} + {{ upgrade_status.nodes | dict2items + | selectattr('value.role', 'equalto', 'worker') + | map(attribute='key') | list }} - name: Build all nodes list ansible.builtin.set_fact: @@ -61,18 +69,105 @@ all_worker_nodes: "{{ groups_worker }}" all_rollback_nodes: "{{ groups_cp_first + groups_cp + groups_worker }}" -# ── Build node IP map ─────────────────────────────────────────────── -- name: Build node name-to-IP mapping +# ── Build node IP map from upgrade_status.nodes ────────────────────── +- name: Build node name-to-IP mapping from upgrade_status ansible.builtin.set_fact: node_ips: >- {{ node_ips | default({}) | combine({ - item.name: (item.interfaces | first).ip_addrs - | selectattr('name', 'equalto', 'management') - | map(attribute='ip_addr') | first + item.key: item.value.ip }) }} - loop: "{{ parsed_nodes.nodes }}" + loop: "{{ upgrade_status.nodes | dict2items }}" + loop_control: + label: "{{ item.key }}" + +# ── Display rollback node inventory ────────────────────────────────── +- name: Display rollback node inventory + ansible.builtin.debug: + msg: "{{ inventory_banner }}" + vars: + inventory_banner: + - "════════════════════════════════════════════════════════════════════════════════" + - "[ROLLBACK] NODE INVENTORY" + - "════════════════════════════════════════════════════════════════════════════════" + - "" + - "Total nodes for rollback: {{ all_rollback_nodes | length }}" + - "" + - "Control plane (first): {{ groups_cp_first | length }} node(s)" + - "{% for node in groups_cp_first %} - {{ node }} ({{ node_ips[node] }})\n{% endfor %}" + - "" + - "Control plane (additional): {{ groups_cp | length }} node(s)" + - "{% for node in groups_cp %} - {{ node }} ({{ node_ips[node] }})\n{% endfor %}" + - "" + - "Workers: {{ groups_worker | length }} node(s)" + - "{% for node in groups_worker %} - {{ node }} ({{ node_ips[node] }})\n{% endfor %}" + - "" + - "════════════════════════════════════════════════════════════════════════════════" + +# ── Validate SSH connectivity to all rollback nodes ────────────────── +- name: Display SSH connectivity check message + ansible.builtin.debug: + msg: "Checking SSH connectivity to {{ all_rollback_nodes | length }} rollback nodes..." + +- name: Check SSH connectivity to all rollback nodes + ansible.builtin.wait_for: + host: "{{ node_ips[item] }}" + port: 22 + timeout: 10 + state: started + loop: "{{ all_rollback_nodes }}" loop_control: - label: "{{ item.name }}" + label: "{{ item }} ({{ node_ips[item] }})" + register: ssh_check_results + ignore_errors: true + +- name: Build list of nodes with SSH failures + ansible.builtin.set_fact: + ssh_failed_nodes: >- + {{ ssh_check_results.results + | selectattr('failed', 'defined') + | selectattr('failed', 'equalto', true) + | map(attribute='item') + | list }} + +- name: Display SSH check results + ansible.builtin.debug: + msg: "{{ ssh_results_banner }}" + vars: + ssh_results_banner: + - "════════════════════════════════════════════════════════════════════════════════" + - "[SSH] CONNECTIVITY CHECK RESULTS" + - "════════════════════════════════════════════════════════════════════════════════" + - "" + - "Nodes with SSH accessible: {{ all_rollback_nodes | difference(ssh_failed_nodes) | length }}" + - "Nodes with SSH failed: {{ ssh_failed_nodes | length }}" + - "{% if ssh_failed_nodes | length > 0 %} Failed nodes: {{ ssh_failed_nodes | join(', ') }}{% endif %}" + - "" + - "════════════════════════════════════════════════════════════════════════════════" + +- name: Fail if any node has SSH connectivity failure + ansible.builtin.fail: + msg: | + ════════════════════════════════════════════════════════════════════════════════ + [ERROR] SSH CONNECTIVITY FAILURE + ════════════════════════════════════════════════════════════════════════════════ + The following nodes failed SSH connectivity check: + {% for node in ssh_failed_nodes %} + - {{ node }} ({{ node_ips[node] }}) + {% endfor %} + + Cannot proceed with rollback. Please ensure: + 1. All nodes are powered on and accessible + 2. SSH service is running on all nodes + 3. Network connectivity exists between control node and target nodes + 4. SSH keys are properly configured + + Fix the connectivity issues and re-run the rollback. + ════════════════════════════════════════════════════════════════════════════════ + when: ssh_failed_nodes | length > 0 + +- name: Display all nodes SSH accessible + ansible.builtin.debug: + msg: "[OK] SSH connectivity verified for all {{ all_rollback_nodes | length }} rollback nodes" # ── Add all nodes to Ansible inventory ────────────────────────────── - name: Add K8s nodes to inventory diff --git a/rollback/roles/rollback_k8s/tasks/main.yml b/rollback/roles/rollback_k8s/tasks/main.yml index bb03673eb0..db660afd3a 100644 --- a/rollback/roles/rollback_k8s/tasks/main.yml +++ b/rollback/roles/rollback_k8s/tasks/main.yml @@ -67,6 +67,9 @@ k8s_config_backup_dir: "{{ k8s_client_mount_path }}/upgrade/backup/configs" # ── Resolve first CP node for delegation (avoids kube_vip dependency) ─ +# NOTE: This reads nodes.yaml ONLY to get the first CP node for initial delegation. +# The actual rollback inventory is built from upgrade_status.nodes in load_rollback_status.yml +# to ensure we rollback the same nodes that were part of the upgrade. - name: Read nodes.yaml for early CP resolution ansible.builtin.slurp: src: "{{ nodes_yaml_path }}" diff --git a/rollback/roles/rollback_k8s/tasks/restore_bss_cloud_init.yml b/rollback/roles/rollback_k8s/tasks/restore_bss_cloud_init.yml index 44ecab5a66..27e60765fc 100644 --- a/rollback/roles/rollback_k8s/tasks/restore_bss_cloud_init.yml +++ b/rollback/roles/rollback_k8s/tasks/restore_bss_cloud_init.yml @@ -141,7 +141,7 @@ + ([] if (groups_cp | length == 0) else [group_cp]) + [group_worker] }} - - name: "Restore BSS and cloud-init for {{ item }}" + - name: Restore BSS and cloud-init for functional groups ansible.builtin.shell: cmd: > set -o pipefail && ansible-playbook diff --git a/rollback/rollback.yml b/rollback/rollback.yml index e280609fcd..9f75363ced 100644 --- a/rollback/rollback.yml +++ b/rollback/rollback.yml @@ -95,12 +95,14 @@ - name: Abort rollback if upgrade completed successfully ansible.builtin.fail: - msg: | - The previous upgrade completed successfully (upgrade_status: completed). - Rollback after a successful upgrade is not recommended because all - components were upgraded consistently. - If you need to rollback despite successful completion: - ansible-playbook rollback/rollback.yml -e force_rollback=true + msg: + - "The previous upgrade completed successfully (upgrade_status: completed)." + - "Rollback after a successful upgrade is not recommended because all" + - "components were upgraded consistently." + - "" + - "If you need to rollback despite successful completion:" + - " cd /omnia/rollback" + - " ansible-playbook rollback.yml -e force_rollback=true" when: - upgrade_manifest is defined - upgrade_manifest.upgrade_status | default('') == 'completed' @@ -129,7 +131,8 @@ The previous rollback already completed successfully. Rollback ID: {{ prior_rollback.rollback_id | default('N/A') }} If you need to force a new rollback: - ansible-playbook rollback/rollback.yml -e force_rollback=true + cd /omnia/rollback + ansible-playbook rollback.yml -e force_rollback=true when: - existing_rollback.stat.exists - prior_rollback.rollback_status | default('') == 'completed' diff --git a/upgrade/playbooks/upgrade_k8s.yml b/upgrade/playbooks/upgrade_k8s.yml index 4b9427472c..a85da33da7 100644 --- a/upgrade/playbooks/upgrade_k8s.yml +++ b/upgrade/playbooks/upgrade_k8s.yml @@ -624,6 +624,44 @@ ansible.builtin.set_fact: k8s_client_mount_path_kube_vip: "{{ k8s_client_mount_path }}" +# ══════════════════════════════════════════════════════════════════════════════ +# Validate cluster nodes against nodes.yaml before proceeding +# ══════════════════════════════════════════════════════════════════════════════ +- name: "Kubernetes Upgrade - Validate cluster nodes against nodes.yaml" + hosts: localhost + connection: local + gather_facts: false + vars: + nodes_yaml_path: "/opt/omnia/openchami/workdir/nodes/nodes.yaml" + group_cp_first: "service_kube_control_plane_first_x86_64" + group_cp: "service_kube_control_plane_x86_64" + group_worker: "service_kube_node_x86_64" + kube_vip: "{{ hostvars['localhost']['kube_vip'] }}" + tasks: + - name: "Skip all tasks — service_k8s not configured, already completed, or resuming post-validation" + ansible.builtin.meta: end_play + when: > + not (hostvars['localhost']['k8s_upgrade_enabled'] | default(true)) or + (hostvars['localhost']['k8s_upgrade_skip'] | default(false)) or + (hostvars['localhost']['resume_post_validation_only'] | default(false)) + + - name: "Load upgrade role variables" + ansible.builtin.include_vars: + file: "{{ playbook_dir }}/../roles/upgrade_k8s/vars/main.yml" + + - name: "Validate cluster nodes against nodes.yaml" + ansible.builtin.include_tasks: "{{ playbook_dir }}/../roles/upgrade_k8s/tasks/validate_cluster_nodes.yml" + + - name: "Cache validated node groups for subsequent plays" + ansible.builtin.set_fact: + validated_groups_cp_first: "{{ groups_cp_first }}" + validated_groups_cp: "{{ groups_cp }}" + validated_groups_worker: "{{ groups_worker }}" + validated_all_upgrade_nodes: "{{ all_upgrade_nodes }}" + validated_node_ips: "{{ node_ips }}" + validated_parsed_nodes: "{{ parsed_nodes }}" + cacheable: true + - name: "Kubernetes Upgrade - Initialize upgrade status file" hosts: localhost connection: local @@ -663,52 +701,26 @@ | selectattr('name', 'equalto', k8s_nfs_storage_name) | first).mount_point }} - - name: Read nodes.yaml - ansible.builtin.slurp: - src: "{{ nodes_yaml_path }}" - register: nodes_slurp - changed_when: false + - name: "Use validated node groups from cluster validation" + ansible.builtin.set_fact: + groups_cp_first: "{{ hostvars['localhost']['validated_groups_cp_first'] }}" + groups_cp: "{{ hostvars['localhost']['validated_groups_cp'] }}" + groups_worker: "{{ hostvars['localhost']['validated_groups_worker'] }}" + all_upgrade_nodes: "{{ hostvars['localhost']['validated_all_upgrade_nodes'] }}" + node_ips: "{{ hostvars['localhost']['validated_node_ips'] }}" + parsed_nodes: "{{ hostvars['localhost']['validated_parsed_nodes'] }}" + when: hostvars['localhost']['cluster_validation_completed'] | default(false) - - name: Parse nodes.yaml - ansible.builtin.set_fact: - parsed_nodes: "{{ nodes_slurp.content | b64decode | from_yaml }}" - - - name: Build node lists by role - ansible.builtin.set_fact: - groups_cp_first: >- - {{ parsed_nodes.nodes - | selectattr('group', 'equalto', group_cp_first) - | map(attribute='name') | list }} - groups_cp: >- - {{ parsed_nodes.nodes - | selectattr('group', 'equalto', group_cp) - | map(attribute='name') | list }} - groups_worker: >- - {{ parsed_nodes.nodes - | selectattr('group', 'equalto', group_worker) - | map(attribute='name') | list }} - all_upgrade_nodes: >- - {{ (parsed_nodes.nodes - | selectattr('group', 'equalto', group_cp_first) - | map(attribute='name') | list) + - (parsed_nodes.nodes - | selectattr('group', 'equalto', group_cp) - | map(attribute='name') | list) + - (parsed_nodes.nodes - | selectattr('group', 'equalto', group_worker) - | map(attribute='name') | list) }} - - - name: Build node IP map - ansible.builtin.set_fact: - node_ips: >- - {{ node_ips | default({}) | combine({ - item.name: (item.interfaces | first).ip_addrs - | selectattr('name', 'equalto', 'management') - | map(attribute='ip_addr') | first - }) }} - loop: "{{ parsed_nodes.nodes }}" - loop_control: - label: "{{ item.name }}" + - name: "Run cluster validation if not already done" + when: not (hostvars['localhost']['cluster_validation_completed'] | default(false)) + block: + - name: "Load upgrade role variables for validation" + ansible.builtin.include_vars: + file: "{{ playbook_dir }}/../roles/upgrade_k8s/vars/main.yml" + + - name: "Run cluster node validation" + ansible.builtin.include_tasks: + file: "{{ playbook_dir }}/../roles/upgrade_k8s/tasks/validate_cluster_nodes.yml" - name: Load variables from file ansible.builtin.include_vars: @@ -917,36 +929,25 @@ ansible.builtin.set_fact: k8s_nfs_storage_name: "{{ omnia_config.service_k8s_cluster[0].nfs_storage_name }}" - - name: Read nodes.yaml - ansible.builtin.slurp: - src: "{{ nodes_yaml_path }}" - register: nodes_slurp - changed_when: false - - - name: Parse nodes.yaml + - name: "Use validated node groups from cluster validation" ansible.builtin.set_fact: - parsed_nodes: "{{ nodes_slurp.content | b64decode | from_yaml }}" + groups_cp_first: "{{ hostvars['localhost']['validated_groups_cp_first'] }}" + groups_cp: "{{ hostvars['localhost']['validated_groups_cp'] }}" + groups_worker: "{{ hostvars['localhost']['validated_groups_worker'] }}" + parsed_nodes: "{{ hostvars['localhost']['validated_parsed_nodes'] }}" + node_ips: "{{ hostvars['localhost']['validated_node_ips'] }}" + when: hostvars['localhost']['cluster_validation_completed'] | default(false) - - name: Build first control plane list - ansible.builtin.set_fact: - groups_cp_first: >- - {{ parsed_nodes.nodes - | selectattr('group', 'equalto', group_cp_first) - | map(attribute='name') | list }} - - - name: Build additional control plane list - ansible.builtin.set_fact: - groups_cp: >- - {{ parsed_nodes.nodes - | selectattr('group', 'equalto', group_cp) - | map(attribute='name') | list }} + - name: "Run cluster validation if not already done" + when: not (hostvars['localhost']['cluster_validation_completed'] | default(false)) + block: + - name: "Load upgrade role variables for validation" + ansible.builtin.include_vars: + file: "{{ playbook_dir }}/../roles/upgrade_k8s/vars/main.yml" - - name: Build worker list - ansible.builtin.set_fact: - groups_worker: >- - {{ parsed_nodes.nodes - | selectattr('group', 'equalto', group_worker) - | map(attribute='name') | list }} + - name: "Run cluster node validation" + ansible.builtin.include_tasks: + file: "{{ playbook_dir }}/../roles/upgrade_k8s/tasks/validate_cluster_nodes.yml" - name: Add first control plane to inventory ansible.builtin.add_host: @@ -1440,7 +1441,7 @@ - name: Call upgrade_k8s role ansible.builtin.include_role: - name: ../roles/upgrade_k8s + name: "{{ playbook_dir }}/../roles/upgrade_k8s" - name: "Kubernetes Upgrade - Post-Validation" hosts: localhost @@ -1498,27 +1499,25 @@ ansible.builtin.set_fact: status_file: "{{ k8s_client_mount_path }}/upgrade/upgrade_status.yml" - - name: Read nodes.yaml for all_upgrade_nodes - ansible.builtin.slurp: - src: "/opt/omnia/openchami/workdir/nodes/nodes.yaml" - register: nodes_slurp - - - name: Parse nodes.yaml + - name: "Use validated node groups from cluster validation" ansible.builtin.set_fact: - parsed_nodes: "{{ nodes_slurp.content | b64decode | from_yaml }}" + all_upgrade_nodes: "{{ hostvars['localhost']['validated_all_upgrade_nodes'] }}" + when: hostvars['localhost']['cluster_validation_completed'] | default(false) - - name: Build all_upgrade_nodes list - ansible.builtin.set_fact: - all_upgrade_nodes: >- - {{ (parsed_nodes.nodes - | selectattr('group', 'equalto', 'service_kube_control_plane_first_x86_64') - | map(attribute='name') | list) + - (parsed_nodes.nodes - | selectattr('group', 'equalto', 'service_kube_control_plane_x86_64') - | map(attribute='name') | list) + - (parsed_nodes.nodes - | selectattr('group', 'equalto', 'service_kube_node_x86_64') - | map(attribute='name') | list) }} + - name: "Run cluster validation if not already done" + when: not (hostvars['localhost']['cluster_validation_completed'] | default(false)) + block: + - name: "Load upgrade role variables for validation" + ansible.builtin.include_vars: + file: "{{ playbook_dir }}/../roles/upgrade_k8s/vars/main.yml" + + - name: "Run cluster node validation" + ansible.builtin.include_tasks: + file: "{{ playbook_dir }}/../roles/upgrade_k8s/tasks/validate_cluster_nodes.yml" + + - name: "Set all_upgrade_nodes from validation" + ansible.builtin.set_fact: + all_upgrade_nodes: "{{ groups_cp_first + groups_cp + groups_worker }}" - name: Get k8s_from_version from status file ansible.builtin.slurp: diff --git a/upgrade/playbooks/upgrade_oim.yml b/upgrade/playbooks/upgrade_oim.yml index 9d23f656a3..5342650777 100644 --- a/upgrade/playbooks/upgrade_oim.yml +++ b/upgrade/playbooks/upgrade_oim.yml @@ -76,7 +76,7 @@ block: - name: Upgrade OpenCHAMI containers and services ansible.builtin.include_role: - name: ../roles/upgrade_openchami + name: "{{ playbook_dir }}/../roles/upgrade_openchami" - name: Display OpenCHAMI upgrade outcome ansible.builtin.debug: diff --git a/upgrade/playbooks/upgrade_provision.yml b/upgrade/playbooks/upgrade_provision.yml index f55f377e34..99d9b29a5a 100644 --- a/upgrade/playbooks/upgrade_provision.yml +++ b/upgrade/playbooks/upgrade_provision.yml @@ -153,13 +153,13 @@ - name: Include required input to resolve kube_vip ansible.builtin.include_role: - name: ../roles/upgrade_telemetry + name: "{{ playbook_dir }}/../roles/upgrade_telemetry" tasks_from: include_required_input.yml when: k8s_configured | default(false) | bool - name: Backup telemetry scripts before provisioning target version ansible.builtin.include_role: - name: ../roles/upgrade_telemetry + name: "{{ playbook_dir }}/../roles/upgrade_telemetry" tasks_from: backup_telemetry.yml when: k8s_configured | default(false) | bool diff --git a/upgrade/playbooks/upgrade_slurm.yml b/upgrade/playbooks/upgrade_slurm.yml index 94861387df..538892c5e7 100644 --- a/upgrade/playbooks/upgrade_slurm.yml +++ b/upgrade/playbooks/upgrade_slurm.yml @@ -246,7 +246,7 @@ - name: Include slurm upgrade role ansible.builtin.include_role: - name: ../roles/upgrade_slurm + name: "{{ playbook_dir }}/../roles/upgrade_slurm" tags: slurm - name: Update cloud-init and BSS for slurm diff --git a/upgrade/playbooks/upgrade_telemetry.yml b/upgrade/playbooks/upgrade_telemetry.yml index fd21e71f78..46fa8af052 100644 --- a/upgrade/playbooks/upgrade_telemetry.yml +++ b/upgrade/playbooks/upgrade_telemetry.yml @@ -137,7 +137,7 @@ block: - name: Invoke upgrade_telemetry role ansible.builtin.include_role: - name: ../roles/upgrade_telemetry + name: "{{ playbook_dir }}/../roles/upgrade_telemetry" - name: Mark telemetry upgrade as completed ansible.builtin.copy: diff --git a/upgrade/roles/upgrade_k8s/tasks/load_status.yml b/upgrade/roles/upgrade_k8s/tasks/load_status.yml index d579bd47a6..a637eaa69d 100644 --- a/upgrade/roles/upgrade_k8s/tasks/load_status.yml +++ b/upgrade/roles/upgrade_k8s/tasks/load_status.yml @@ -42,55 +42,23 @@ ansible_ssh_common_args: "-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" groups: kube_vip_group -# ── Read nodes.yaml for inventory ────────────────────────────────── -- name: Read nodes.yaml - ansible.builtin.slurp: - src: "{{ nodes_yaml_path }}" - register: nodes_slurp - changed_when: false +# ══════════════════════════════════════════════════════════════════════════════ +# Use validated nodes if available, otherwise run validation +# ══════════════════════════════════════════════════════════════════════════════ -- name: Parse nodes.yaml - ansible.builtin.set_fact: - parsed_nodes: "{{ nodes_slurp.content | b64decode | from_yaml }}" - -# ── Build node lists by role ──────────────────────────────────────── -- name: Build first control plane list - ansible.builtin.set_fact: - groups_cp_first: >- - {{ parsed_nodes.nodes - | selectattr('group', 'equalto', group_cp_first) - | map(attribute='name') | list }} - -- name: Build additional control plane list - ansible.builtin.set_fact: - groups_cp: >- - {{ parsed_nodes.nodes - | selectattr('group', 'equalto', group_cp) - | map(attribute='name') | list }} - -- name: Build worker list - ansible.builtin.set_fact: - groups_worker: >- - {{ parsed_nodes.nodes - | selectattr('group', 'equalto', group_worker) - | map(attribute='name') | list }} - -- name: Build all nodes list - ansible.builtin.set_fact: - all_upgrade_nodes: "{{ groups_cp_first + groups_cp + groups_worker }}" +- name: Check if validated nodes are available + ansible.builtin.debug: + msg: "Using {{ all_upgrade_nodes | length }} validated nodes" + when: + - all_upgrade_nodes is defined + - all_upgrade_nodes | length > 0 + - node_ips is defined -# ── Build node IP map ─────────────────────────────────────────────── -- name: Build node name-to-IP mapping - ansible.builtin.set_fact: - node_ips: >- - {{ node_ips | default({}) | combine({ - item.name: (item.interfaces | first).ip_addrs - | selectattr('name', 'equalto', 'management') - | map(attribute='ip_addr') | first - }) }} - loop: "{{ parsed_nodes.nodes }}" - loop_control: - label: "{{ item.name }}" +- name: Run cluster validation if nodes not available + when: all_upgrade_nodes is not defined or (all_upgrade_nodes | length == 0) or node_ips is not defined + block: + - name: "Run cluster node validation" + ansible.builtin.include_tasks: validate_cluster_nodes.yml # ── Add all nodes to Ansible inventory ────────────────────────────── - name: Add K8s nodes to inventory diff --git a/upgrade/roles/upgrade_k8s/tasks/validate_cluster_nodes.yml b/upgrade/roles/upgrade_k8s/tasks/validate_cluster_nodes.yml new file mode 100644 index 0000000000..af98d72a13 --- /dev/null +++ b/upgrade/roles/upgrade_k8s/tasks/validate_cluster_nodes.yml @@ -0,0 +1,600 @@ +# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +# Validate nodes.yaml against actual cluster nodes and build validated inventory +# This task file: +# 1. Reads nodes.yaml to get expected nodes +# 2. Queries the cluster to get actual nodes +# 3. Compares and shows discrepancies +# 4. Prompts user for confirmation +# 5. Builds inventory from actual cluster nodes only + +# ── Read nodes.yaml for expected nodes ────────────────────────────── +- name: Read nodes.yaml + ansible.builtin.slurp: + src: "{{ nodes_yaml_path }}" + register: nodes_slurp + changed_when: false + +- name: Parse nodes.yaml + ansible.builtin.set_fact: + parsed_nodes: "{{ nodes_slurp.content | b64decode | from_yaml }}" + +# ── Build expected node lists from nodes.yaml ──────────────────────── +- name: Build expected node lists from nodes.yaml + ansible.builtin.set_fact: + expected_cp_first: >- + {{ parsed_nodes.nodes + | selectattr('group', 'equalto', group_cp_first) + | map(attribute='name') | list }} + expected_cp: >- + {{ parsed_nodes.nodes + | selectattr('group', 'equalto', group_cp) + | map(attribute='name') | list }} + expected_workers: >- + {{ parsed_nodes.nodes + | selectattr('group', 'equalto', group_worker) + | map(attribute='name') | list }} + +- name: Build all expected nodes list + ansible.builtin.set_fact: + all_expected_nodes: "{{ expected_cp_first + expected_cp + expected_workers }}" + +# ── Build node IP map from nodes.yaml ──────────────────────────────── +- name: Build node name-to-IP mapping from nodes.yaml + ansible.builtin.set_fact: + nodes_yaml_ips: >- + {{ nodes_yaml_ips | default({}) | combine({ + item.name: (item.interfaces | first).ip_addrs + | selectattr('name', 'equalto', 'management') + | map(attribute='ip_addr') | first + }) }} + loop: "{{ parsed_nodes.nodes }}" + loop_control: + label: "{{ item.name }}" + +# ── Query actual cluster nodes ─────────────────────────────────────── +# Cluster nodes are typically named by their IP addresses (e.g., 10.60.0.101) +- name: Get actual cluster nodes with roles + ansible.builtin.command: + cmd: >- + kubectl get nodes + -o jsonpath='{range .items[*]}{.metadata.name}|{.metadata.labels.node-role\.kubernetes\.io/control-plane}{"\n"}{end}' + delegate_to: "{{ kube_vip }}" + register: cluster_nodes_raw + changed_when: false + +- name: Parse cluster nodes + ansible.builtin.set_fact: + cluster_nodes_parsed: >- + {{ cluster_nodes_parsed | default([]) + [{ + 'name': item.split('|')[0], + 'is_control_plane': (item.split('|')[1] | default('') | length > 0) + }] }} + loop: "{{ cluster_nodes_raw.stdout_lines | select('match', '.+') | list }}" + loop_control: + label: "{{ item.split('|')[0] }}" + +- name: Build cluster node lists by role + ansible.builtin.set_fact: + cluster_control_planes: >- + {{ cluster_nodes_parsed | selectattr('is_control_plane', 'equalto', true) | map(attribute='name') | list }} + cluster_workers: >- + {{ cluster_nodes_parsed | selectattr('is_control_plane', 'equalto', false) | map(attribute='name') | list }} + cluster_all_nodes: >- + {{ cluster_nodes_parsed | map(attribute='name') | list }} + +# ══════════════════════════════════════════════════════════════════════════════ +# Compare nodes.yaml with cluster nodes +# ══════════════════════════════════════════════════════════════════════════════ +# nodes.yaml has: hostname -> IP mapping (e.g., kcp1 -> 10.60.0.102) +# Cluster has: node names which ARE the IPs (e.g., 10.60.0.102) +# So we compare nodes_yaml IPs against cluster node names (which are IPs) + +- name: Build IP list from nodes.yaml + ansible.builtin.set_fact: + nodes_yaml_ip_list: "{{ nodes_yaml_ips.values() | list }}" + +# cluster_all_nodes contains IPs (since nodes are named by IP) +- name: Build IP list from cluster (node names are IPs) + ansible.builtin.set_fact: + cluster_ip_list: "{{ cluster_all_nodes }}" + +# Find nodes.yaml entries whose IP is NOT in the cluster +- name: Find nodes in nodes.yaml but NOT in cluster (by IP) + ansible.builtin.set_fact: + nodes_only_in_yaml: >- + {{ nodes_yaml_ips | dict2items + | rejectattr('value', 'in', cluster_ip_list) + | map(attribute='key') | list }} + +# Find cluster nodes (IPs) that are NOT in nodes.yaml +- name: Find nodes in cluster but NOT in nodes.yaml (by IP) + ansible.builtin.set_fact: + nodes_only_in_cluster: >- + {{ cluster_all_nodes | reject('in', nodes_yaml_ip_list) | list }} + +# Find nodes.yaml entries whose IP IS in the cluster +- name: Find nodes present in both (by IP) - get nodes.yaml names + ansible.builtin.set_fact: + nodes_in_both: >- + {{ nodes_yaml_ips | dict2items + | selectattr('value', 'in', cluster_ip_list) + | map(attribute='key') | list }} + +# Build reverse mapping: cluster IP -> nodes.yaml hostname (for display) +- name: Build IP to hostname mapping + ansible.builtin.set_fact: + ip_to_hostname: >- + {{ ip_to_hostname | default({}) | combine({ + item.value: item.key + }) }} + loop: "{{ nodes_yaml_ips | dict2items }}" + loop_control: + label: "{{ item.key }}" + +- name: Determine if there are discrepancies + ansible.builtin.set_fact: + has_discrepancies: "{{ (nodes_only_in_yaml | length > 0) or (nodes_only_in_cluster | length > 0) }}" + +# ── Build validation display lines ─────────────────────────────────── +- name: Build IP mapping display lines + ansible.builtin.set_fact: + ip_mapping_lines: [] + +- name: Add IP mapping lines + ansible.builtin.set_fact: + ip_mapping_lines: "{{ ip_mapping_lines + [' - ' + item + ': ' + nodes_yaml_ips[item]] }}" + loop: "{{ all_expected_nodes }}" + +- name: Build matched nodes display lines + ansible.builtin.set_fact: + matched_nodes_lines: [] + +- name: Add matched nodes lines + ansible.builtin.set_fact: + matched_nodes_lines: "{{ matched_nodes_lines + [' - ' + item + ' -> ' + nodes_yaml_ips[item]] }}" + loop: "{{ nodes_in_both }}" + +# ── Display validation results ─────────────────────────────────────── +- name: Display cluster node validation results + ansible.builtin.debug: + msg: "{{ validation_banner }}" + vars: + validation_banner: >- + {{ + ['════════════════════════════════════════════════════════════════════════════════', + '[UPGRADE] CLUSTER NODE VALIDATION', + '════════════════════════════════════════════════════════════════════════════════', + '', + 'Expected nodes from nodes.yaml: ' ~ (all_expected_nodes | length), + ' - Control plane (first): ' ~ (expected_cp_first | join(', ') | default('none', true)), + ' - Control plane (additional): ' ~ (expected_cp | join(', ') | default('none', true)), + ' - Workers: ' ~ (expected_workers | join(', ') | default('none', true)), + '', + 'nodes.yaml IP mappings:'] + + ip_mapping_lines + + ['Actual nodes in cluster (by IP): ' ~ (cluster_all_nodes | length), + ' - Cluster IPs: ' ~ (cluster_all_nodes | join(', ') | default('none', true)), + '', + 'Matched nodes (nodes.yaml hostname -> cluster IP): ' ~ (nodes_in_both | length)] + + matched_nodes_lines + + ['════════════════════════════════════════════════════════════════════════════════'] + }} + +- name: Display discrepancy warning + ansible.builtin.debug: + msg: "{{ discrepancy_banner }}" + vars: + discrepancy_banner: + - "════════════════════════════════════════════════════════════════════════════════" + - "[WARNING] NODE DISCREPANCIES DETECTED" + - "════════════════════════════════════════════════════════════════════════════════" + - "" + - "Nodes in nodes.yaml but NOT in cluster (will be SKIPPED):" + - " {{ nodes_only_in_yaml | join(', ') | default('none') }}" + - "" + - "Nodes in cluster but NOT in nodes.yaml (will be SKIPPED - no IP mapping):" + - " {{ nodes_only_in_cluster | join(', ') | default('none') }}" + - "" + - "Nodes that will be upgraded (present in both):" + - " {{ nodes_in_both | join(', ') }}" + - "" + - "════════════════════════════════════════════════════════════════════════════════" + when: has_discrepancies + +- name: Display no discrepancies message + ansible.builtin.debug: + msg: + - "════════════════════════════════════════════════════════════════════════════════" + - "[OK] All nodes in nodes.yaml are present in the cluster" + - "════════════════════════════════════════════════════════════════════════════════" + when: not has_discrepancies + +# ── Build validated inventory (only nodes whose IP is in cluster) ──── +- name: Build validated control plane first list + ansible.builtin.set_fact: + validated_cp_first: >- + {{ expected_cp_first | select('in', nodes_in_both) | list }} + +- name: Build validated additional control plane list + ansible.builtin.set_fact: + validated_cp: >- + {{ expected_cp | select('in', nodes_in_both) | list }} + +- name: Build validated worker list + ansible.builtin.set_fact: + validated_workers: >- + {{ expected_workers | select('in', nodes_in_both) | list }} + +- name: Build all validated nodes list + ansible.builtin.set_fact: + all_validated_nodes: "{{ validated_cp_first + validated_cp + validated_workers }}" + +# ── Validate SSH connectivity to all validated nodes ───────────────── +- name: Display SSH connectivity check message + ansible.builtin.debug: + msg: "Checking SSH connectivity to {{ all_validated_nodes | length }} validated nodes..." + +- name: Check SSH connectivity to all validated nodes + ansible.builtin.wait_for: + host: "{{ nodes_yaml_ips[item] }}" + port: 22 + timeout: 10 + state: started + loop: "{{ all_validated_nodes }}" + loop_control: + label: "{{ item }} ({{ nodes_yaml_ips[item] }})" + register: ssh_check_results + ignore_errors: true + +- name: Build list of nodes with SSH failures + ansible.builtin.set_fact: + ssh_failed_nodes: >- + {{ ssh_check_results.results + | selectattr('failed', 'defined') + | selectattr('failed', 'equalto', true) + | map(attribute='item') + | list }} + +- name: Display SSH check results + ansible.builtin.debug: + msg: + - "════════════════════════════════════════════════════════════════════════════════" + - "[SSH] CONNECTIVITY CHECK RESULTS" + - "════════════════════════════════════════════════════════════════════════════════" + - "" + - "Nodes with SSH accessible: {{ all_validated_nodes | difference(ssh_failed_nodes) | length }}" + - "Nodes with SSH failed: {{ ssh_failed_nodes | length }}" + - "{{ 'Failed nodes: ' + (ssh_failed_nodes | join(', ')) if ssh_failed_nodes | length > 0 else ' (all nodes accessible)' }}" + - "════════════════════════════════════════════════════════════════════════════════" + +- name: Fail if any node has SSH connectivity failure + ansible.builtin.fail: + msg: | + ════════════════════════════════════════════════════════════════════════════════ + [ERROR] SSH CONNECTIVITY FAILURE + ════════════════════════════════════════════════════════════════════════════════ + The following nodes failed SSH connectivity check: + {% for node in ssh_failed_nodes %} + - {{ node }} ({{ nodes_yaml_ips[node] }}) + {% endfor %} + + Cannot proceed with upgrade. Please ensure: + 1. All nodes are powered on and accessible + 2. SSH service is running on all nodes + 3. Network connectivity exists between control node and target nodes + 4. SSH keys are properly configured + + Fix the connectivity issues and re-run the upgrade. + ════════════════════════════════════════════════════════════════════════════════ + when: ssh_failed_nodes | length > 0 + +- name: Display all nodes SSH accessible + ansible.builtin.debug: + msg: "[OK] SSH connectivity verified for all {{ all_validated_nodes | length }} validated nodes" + +# ══════════════════════════════════════════════════════════════════════════════ +# Validate all nodes are in Ready state +# ══════════════════════════════════════════════════════════════════════════════ +- name: Display node Ready status check message + ansible.builtin.debug: + msg: "Checking all nodes are in Ready state..." + +- name: Get node Ready status from cluster + ansible.builtin.command: + cmd: >- + kubectl get nodes -o jsonpath='{range .items[*]}{.metadata.name}|{range .status.conditions[?(@.type=="Ready")]}{.status}{end}{"\n"}{end}' + delegate_to: "{{ kube_vip }}" + register: node_ready_status + changed_when: false + +- name: Parse node Ready status + ansible.builtin.set_fact: + nodes_not_ready: >- + {{ nodes_not_ready | default([]) + ( + [item.split('|')[0]] if (item.split('|')[1] | default('False')) != 'True' else [] + ) }} + loop: "{{ node_ready_status.stdout_lines | select('match', '.+') | list }}" + loop_control: + label: "{{ item.split('|')[0] }}" + +- name: Filter not-ready nodes to only validated nodes + ansible.builtin.set_fact: + validated_nodes_not_ready: "{{ nodes_not_ready | default([]) | intersect(all_validated_nodes) }}" + +- name: Display node Ready status results + ansible.builtin.debug: + msg: + - "════════════════════════════════════════════════════════════════════════════════" + - "[NODE STATUS] READY CHECK RESULTS" + - "════════════════════════════════════════════════════════════════════════════════" + - "" + - "Nodes in Ready state: {{ all_validated_nodes | difference(validated_nodes_not_ready) | length }}" + - "Nodes NOT Ready: {{ validated_nodes_not_ready | length }}" + - "{{ 'Not Ready nodes: ' + (validated_nodes_not_ready | join(', ')) if validated_nodes_not_ready | length > 0 else ' (all nodes ready)' }}" + - "════════════════════════════════════════════════════════════════════════════════" + +- name: Fail if any validated node is not Ready + ansible.builtin.fail: + msg: | + ════════════════════════════════════════════════════════════════════════════════ + [ERROR] NODES NOT IN READY STATE + ════════════════════════════════════════════════════════════════════════════════ + The following nodes are not in Ready state: + {% for node in validated_nodes_not_ready %} + - {{ node }} + {% endfor %} + + Cannot proceed with upgrade. Please ensure: + 1. All nodes are healthy and running + 2. Kubelet is running on all nodes + 3. Network connectivity between nodes is working + 4. Check node conditions: kubectl describe node + + Fix the node issues and re-run the upgrade. + ════════════════════════════════════════════════════════════════════════════════ + when: validated_nodes_not_ready | length > 0 + +- name: Display all nodes Ready + ansible.builtin.debug: + msg: "[OK] All {{ all_validated_nodes | length }} validated nodes are in Ready state" + when: validated_nodes_not_ready | length == 0 + +# ══════════════════════════════════════════════════════════════════════════════ +# Validate all pods in the cluster are healthy +# Healthy STATUS values: Running, Completed, Succeeded +# Unhealthy: CrashLoopBackOff, Error, ImagePullBackOff, Pending, Failed, etc. +# ══════════════════════════════════════════════════════════════════════════════ +- name: Display pod health check message + ansible.builtin.debug: + msg: "Checking all pods in the cluster are healthy..." + +# Get all pods with their STATUS (same as kubectl get pods output) +# Healthy phases: Running, Succeeded (Completed jobs show as Succeeded) +- name: Get all pods status across all namespaces + ansible.builtin.shell: + cmd: | + set -o pipefail + kubectl get pods --all-namespaces --no-headers \ + -o custom-columns='NS:.metadata.namespace,NAME:.metadata.name,PHASE:.status.phase,READY:.status.containerStatuses[*].ready' \ + 2>/dev/null | \ + while read ns name phase ready; do + # Healthy: Running with all containers ready, or Succeeded (completed) + if [[ "$phase" == "Running" && "$ready" == *"false"* ]]; then + echo "$ns|$name|ContainersNotReady" + elif [[ "$phase" != "Running" && "$phase" != "Succeeded" ]]; then + echo "$ns|$name|$phase" + fi + done + args: + executable: /bin/bash + delegate_to: "{{ kube_vip }}" + register: unhealthy_pods_check + changed_when: false + failed_when: false + +- name: Get total pod count + ansible.builtin.shell: + cmd: | + set -o pipefail + kubectl get pods --all-namespaces --no-headers | wc -l + args: + executable: /bin/bash + delegate_to: "{{ kube_vip }}" + register: total_pods_count + changed_when: false + +- name: Parse unhealthy pods from output + ansible.builtin.set_fact: + all_unhealthy_pods: [] + +- name: Build unhealthy pods list + ansible.builtin.set_fact: + all_unhealthy_pods: >- + {{ all_unhealthy_pods + [item.split('|')[0] + '/' + item.split('|')[1] + ' (' + item.split('|')[2] + ')'] }} + loop: "{{ unhealthy_pods_check.stdout_lines | default([]) | select('match', '.+') | list }}" + when: "'|' in item" + +- name: Set total pods count + ansible.builtin.set_fact: + total_pods: "{{ total_pods_count.stdout | trim }}" + +- name: Count healthy pods + ansible.builtin.set_fact: + healthy_pods: "{{ (total_pods | int) - (all_unhealthy_pods | length) }}" + +- name: Display pod health check results + ansible.builtin.debug: + msg: "{{ pod_health_banner }}" + vars: + pod_health_banner: >- + {{ + ['════════════════════════════════════════════════════════════════════════════════', + '[POD STATUS] CLUSTER-WIDE HEALTH CHECK', + '════════════════════════════════════════════════════════════════════════════════', + '', + 'Total pods in cluster: ' + (total_pods | string), + 'Healthy pods: ' + (healthy_pods | string), + 'Unhealthy pods: ' + (all_unhealthy_pods | length | string)] + + (['Problem pods (namespace/name):'] + (all_unhealthy_pods | map('regex_replace', '^', ' - ') | list) + if all_unhealthy_pods | length > 0 + else [' (all pods healthy)']) + + ['════════════════════════════════════════════════════════════════════════════════'] + }} + +- name: Fail if any pods are not healthy + ansible.builtin.fail: + msg: | + ════════════════════════════════════════════════════════════════════════════════ + [ERROR] UNHEALTHY PODS DETECTED + ════════════════════════════════════════════════════════════════════════════════ + The following pods are not healthy: + {% for pod in all_unhealthy_pods %} + - {{ pod }} + {% endfor %} + + Healthy pods must have STATUS: Running or Succeeded + with all containers in Ready state. + + Cannot proceed with upgrade. Please fix these issues: + 1. Check pod status: kubectl get pods --all-namespaces + 2. Check pod logs: kubectl logs -n + 3. Check pod events: kubectl describe pod -n + + Re-run the upgrade after fixing the pod issues. + ════════════════════════════════════════════════════════════════════════════════ + when: all_unhealthy_pods | length > 0 + +- name: Display all pods healthy + ansible.builtin.debug: + msg: "[OK] All {{ total_pods }} pods in the cluster are healthy" + when: all_unhealthy_pods | length == 0 + +# ── Fail if no control plane first node is available ───────────────── +- name: Fail if no first control plane node is available for upgrade + ansible.builtin.fail: + msg: | + ════════════════════════════════════════════════════════════════════════════════ + [ERROR] NO FIRST CONTROL PLANE NODE AVAILABLE + ════════════════════════════════════════════════════════════════════════════════ + The first control plane node(s) from nodes.yaml are not present in the cluster: + Expected: {{ expected_cp_first | join(', ') }} + In cluster: {{ cluster_control_planes | join(', ') }} + + Cannot proceed with upgrade without a first control plane node. + Please verify your cluster state and nodes.yaml configuration. + ════════════════════════════════════════════════════════════════════════════════ + when: validated_cp_first | length == 0 + +# ── Build inventory summary node lines ──────────────────────────────── +- name: Build inventory node display lines + ansible.builtin.set_fact: + inv_cp_first_lines: "{{ validated_cp_first | map('regex_replace', '^(.*)$', ' - \\1') | list }}" + inv_cp_lines: "{{ validated_cp | map('regex_replace', '^(.*)$', ' - \\1') | list }}" + inv_worker_lines: "{{ validated_workers | map('regex_replace', '^(.*)$', ' - \\1') | list }}" + +- name: Add IPs to control plane first lines + ansible.builtin.set_fact: + inv_cp_first_display: >- + {{ inv_cp_first_display | default([]) + [' - ' + item + ' (' + nodes_yaml_ips[item] + ')'] }} + loop: "{{ validated_cp_first }}" + +- name: Add IPs to control plane additional lines + ansible.builtin.set_fact: + inv_cp_display: >- + {{ inv_cp_display | default([]) + [' - ' + item + ' (' + nodes_yaml_ips[item] + ')'] }} + loop: "{{ validated_cp }}" + +- name: Add IPs to worker lines + ansible.builtin.set_fact: + inv_worker_display: >- + {{ inv_worker_display | default([]) + [' - ' + item + ' (' + nodes_yaml_ips[item] + ')'] }} + loop: "{{ validated_workers }}" + +# ── Display upgrade inventory summary ──────────────────────────────── +- name: Display upgrade inventory summary + ansible.builtin.debug: + msg: "{{ inventory_summary }}" + vars: + inventory_summary: >- + {{ + ['════════════════════════════════════════════════════════════════════════════════', + '[UPGRADE] PROCEEDING WITH THE FOLLOWING NODES', + '════════════════════════════════════════════════════════════════════════════════', + '', + 'Total nodes for upgrade: ' ~ (all_validated_nodes | length), + '', + 'Control plane (first): ' ~ (validated_cp_first | length) ~ ' node(s)'] + + (inv_cp_first_display | default([])) + + ['Control plane (additional): ' ~ (validated_cp | length) ~ ' node(s)'] + + (inv_cp_display | default([' (none)']) if validated_cp | length > 0 else [' (none)']) + + ['Workers: ' ~ (validated_workers | length) ~ ' node(s)'] + + (inv_worker_display | default([' (none)']) if validated_workers | length > 0 else [' (none)']) + + ['════════════════════════════════════════════════════════════════════════════════'] + }} + +# ── Pause to allow user to review discrepancies ────────────────────── +- name: Display proceeding message with discrepancies + ansible.builtin.debug: + msg: "{{ proceed_banner }}" + vars: + proceed_banner: + - "" + - "════════════════════════════════════════════════════════════════════════════════" + - "[NOTICE] PROCEEDING WITH CLUSTER NODES ONLY" + - "════════════════════════════════════════════════════════════════════════════════" + - "" + - "The following nodes from nodes.yaml are NOT in the cluster (will be SKIPPED):" + - " {{ nodes_only_in_yaml | join(', ') | default('none') }}" + - "" + - "The following nodes are in cluster but NOT in nodes.yaml (will be SKIPPED):" + - " {{ nodes_only_in_cluster | join(', ') | default('none') }}" + - "" + - "Upgrade will proceed with {{ all_validated_nodes | length }} nodes:" + - " - Control plane (first): {{ validated_cp_first | join(', ') }}" + - " - Control plane (additional): {{ validated_cp | join(', ') | default('none') }}" + - " - Workers: {{ validated_workers | join(', ') | default('none') }}" + - "" + - "Pausing for {{ cluster_validation_pause_seconds | default(10) }} seconds..." + - "Press Ctrl+C then 'A' to abort if this is not correct." + - "════════════════════════════════════════════════════════════════════════════════" + - "" + when: + - has_discrepancies + - not (skip_cluster_validation_pause | default(false)) + +- name: Pause to allow review of discrepancies + ansible.builtin.pause: + seconds: "{{ cluster_validation_pause_seconds | default(10) }}" + when: + - has_discrepancies + - not (skip_cluster_validation_pause | default(false)) + +- name: Log proceeding with validated nodes + ansible.builtin.debug: + msg: "[OK] Proceeding with upgrade on {{ all_validated_nodes | length }} validated cluster nodes" + when: has_discrepancies + +# ── Set validated groups for use by other tasks ────────────────────── +- name: Set validated node groups for inventory + ansible.builtin.set_fact: + groups_cp_first: "{{ validated_cp_first }}" + groups_cp: "{{ validated_cp }}" + groups_worker: "{{ validated_workers }}" + all_upgrade_nodes: "{{ all_validated_nodes }}" + node_ips: "{{ nodes_yaml_ips }}" + cluster_validation_completed: true + nodes_only_in_yaml_final: "{{ nodes_only_in_yaml }}" + nodes_only_in_cluster_final: "{{ nodes_only_in_cluster }}" diff --git a/upgrade/roles/upgrade_k8s/vars/main.yml b/upgrade/roles/upgrade_k8s/vars/main.yml index 7b27f37e8f..2efe076c4f 100644 --- a/upgrade/roles/upgrade_k8s/vars/main.yml +++ b/upgrade/roles/upgrade_k8s/vars/main.yml @@ -85,6 +85,20 @@ reboot_connect_timeout: 300 # play with serial: N. Override via --extra-vars worker_parallel_count=3. worker_parallel_count: 1 +# --------------------------------------------------------------------------- +# Cluster validation settings +# --------------------------------------------------------------------------- +# When cluster nodes don't match nodes.yaml, the playbook pauses to allow review. +# +# cluster_validation_pause_seconds: How long to pause (default: 10 seconds) +# Override via --extra-vars cluster_validation_pause_seconds=30 +# +# skip_cluster_validation_pause: Skip the pause entirely (default: false) +# Set to true for automated/CI pipelines. +# Override via --extra-vars skip_cluster_validation_pause=true +cluster_validation_pause_seconds: 10 +skip_cluster_validation_pause: false + # --------------------------------------------------------------------------- # Step definitions per role # --------------------------------------------------------------------------- diff --git a/upgrade/upgrade.yml b/upgrade/upgrade.yml index c97c234bad..b101d0d2e2 100644 --- a/upgrade/upgrade.yml +++ b/upgrade/upgrade.yml @@ -19,11 +19,12 @@ # Supports --tags for selective execution of upgrade sub-flows. # # Usage: -# ansible-playbook upgrade/upgrade.yml # Full upgrade -# ansible-playbook upgrade/upgrade.yml --tags oim # OIM only -# ansible-playbook upgrade/upgrade.yml --tags k8s # K8s only -# ansible-playbook upgrade/upgrade.yml --tags provision -# ansible-playbook upgrade/upgrade.yml --tags "k8s,telemetry" +# cd /omnia/upgrade +# ansible-playbook upgrade.yml # Full upgrade +# ansible-playbook upgrade.yml --tags oim # OIM only +# ansible-playbook upgrade.yml --tags k8s # K8s only +# ansible-playbook upgrade.yml --tags provision +# ansible-playbook upgrade.yml --tags "k8s,telemetry" # # IMPORTANT: Must be invoked from the parent directory containing upgrade/, # rollback/, and playbooks/ folders. Internal playbooks are imported via @@ -122,7 +123,8 @@ Source: {{ existing_manifest.source_version | default('N/A') }} → Target: {{ existing_manifest.target_version | default('N/A') }} Re-running upgrade after successful completion is not allowed. If you need to force a new upgrade cycle: - ansible-playbook upgrade/upgrade.yml -e force_upgrade=true + cd /omnia/upgrade + ansible-playbook upgrade.yml -e force_upgrade=true when: - manifest_stat.stat.exists - existing_manifest.upgrade_status | default('') == 'completed' @@ -520,7 +522,8 @@ Please re-run the upgrade playbook and type 'yes' at the confirmation prompt: - ansible-playbook upgrade/upgrade.yml + cd /omnia/upgrade + ansible-playbook upgrade.yml ══════════════════════════════════════════════════════════════ From 3ad89bfc1f59b492b663f0b615cfbf5f1eb56dda Mon Sep 17 00:00:00 2001 From: "balajikumaran.cs" Date: Sat, 6 Jun 2026 10:02:17 +0530 Subject: [PATCH 37/78] Update get_powerscale_telemetry_dependencies.yml (#4696) Signed-off-by: balajikumaran.cs --- .../get_powerscale_telemetry_dependencies.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/provision/roles/telemetry/tasks/get_powerscale_telemetry_dependencies.yml b/provision/roles/telemetry/tasks/get_powerscale_telemetry_dependencies.yml index 0ebb5ac196..3a83ea7470 100644 --- a/provision/roles/telemetry/tasks/get_powerscale_telemetry_dependencies.yml +++ b/provision/roles/telemetry/tasks/get_powerscale_telemetry_dependencies.yml @@ -89,13 +89,13 @@ - name: Check if cert-manager is disabled in values file ansible.builtin.set_fact: cert_manager_disabled: >- - {%- if csm_observability_values_file_path | default('') != '' -%} - {%- set values_content = lookup('file', csm_observability_values_file_path, errors='ignore') -%} - {%- if values_content is not none and 'cert-manager:' in values_content and 'enabled: false' in values_content -%} - true{%- else -%} - false{%- endif -%} - {%- else -%} - false{%- endif -%} + {{- + (csm_observability_values_file_path | default('') != '') and + (lookup('file', csm_observability_values_file_path, errors='ignore') is not none) and + ('cert-manager:' in lookup('file', csm_observability_values_file_path, errors='ignore')) and + ('enabled: false' in lookup('file', csm_observability_values_file_path, errors='ignore')) + | bool + -}} - name: Display cert-manager dependency status ansible.builtin.debug: From 1f1f88114709f5a7b4f56bc6a696effb8be8a0ce Mon Sep 17 00:00:00 2001 From: "balajikumaran.cs" Date: Sat, 6 Jun 2026 10:25:47 +0530 Subject: [PATCH 38/78] Fix DNF5 compatibility issues in local_repo RPM downloads (#4695) * Restore SELinux policy fix task for aarch64 Reintroduced pre-flight SELinux policy fix for aarch64 node. Signed-off-by: balajikumaran.cs * Update config.py Signed-off-by: balajikumaran.cs * Update download_rpm.py Signed-off-by: balajikumaran.cs * Update download_rpm.py Signed-off-by: balajikumaran.cs * Update process_rpm_config.py Signed-off-by: balajikumaran.cs --------- Signed-off-by: balajikumaran.cs --- .../library/module_utils/local_repo/config.py | 2 +- .../module_utils/local_repo/download_rpm.py | 58 ++++++++++++++----- common/library/modules/process_rpm_config.py | 46 ++++++++++----- 3 files changed, 76 insertions(+), 30 deletions(-) diff --git a/common/library/module_utils/local_repo/config.py b/common/library/module_utils/local_repo/config.py index aa05abf2dd..5a8d4a63f5 100644 --- a/common/library/module_utils/local_repo/config.py +++ b/common/library/module_utils/local_repo/config.py @@ -79,7 +79,7 @@ ("never", True): "streamed" } DNF_COMMANDS = { - "x86_64": ["dnf", "download", "--resolve", "--alldeps", "--arch=x86_64,noarch", "--disablerepo=*", "--enablerepo=x86_64_*"], + "x86_64": ["dnf", "download", "--resolve", "--alldeps", "--arch=x86_64", "--arch=noarch", "--disablerepo=*", "--enablerepo=x86_64_*"], "aarch64": ["dnf", "download", "--forcearch", "aarch64", "--resolve", "--alldeps", "--exclude=*.x86_64", "--disablerepo=*", "--enablerepo=aarch64_*"] } DNF_INFO_COMMANDS = { diff --git a/common/library/module_utils/local_repo/download_rpm.py b/common/library/module_utils/local_repo/download_rpm.py index 57c54922de..e6fe9603a2 100644 --- a/common/library/module_utils/local_repo/download_rpm.py +++ b/common/library/module_utils/local_repo/download_rpm.py @@ -1,4 +1,4 @@ -# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ import subprocess import os +import glob import shutil from pathlib import Path from ansible.module_utils.local_repo.config import ( @@ -28,6 +29,36 @@ file_lock = Lock() +def _check_rpm_downloaded(rpm_directory, pkg_name): + """ + Check if an RPM file for the given package exists in the directory. + This is more reliable than parsing DNF output which varies between DNF4 and DNF5. + + Args: + rpm_directory (str): Directory where RPMs are downloaded + pkg_name (str): Package name to check for + + Returns: + bool: True if RPM file exists for the package + """ + # Look for RPM files that start with the package name + # Pattern: pkg_name-version-release.arch.rpm + pattern = os.path.join(rpm_directory, f"{pkg_name}-[0-9]*.rpm") + matches = glob.glob(pattern) + if matches: + return True + + # Also check for exact match pattern (some packages have numbers in name) + pattern2 = os.path.join(rpm_directory, f"{pkg_name}-*.rpm") + for match in glob.glob(pattern2): + # Extract just the filename + filename = os.path.basename(match) + # Check if filename starts with pkg_name followed by a dash and version + if filename.startswith(f"{pkg_name}-"): + return True + + return False + def process_rpm(package, repo_store_path, status_file_path, cluster_os_type, cluster_os_version, repo_config_value, arc, logger): """ @@ -93,21 +124,18 @@ def process_rpm(package, repo_store_path, status_file_path, cluster_os_type, failed = [] # Detect successes/failures from combined run + # Use filesystem check instead of parsing output (works with both DNF4 and DNF5) for pkg in rpm_list: # Get repo_name for this specific RPM from mapping pkg_repo_name = repo_mapping.get(pkg, "") - # Check if package was downloaded successfully - # Look for "Already downloaded" or actual .rpm file in output - pkg_downloaded = False - for line in stdout_lines + stderr_lines: - if pkg in line and (".rpm" in line or "Already downloaded" in line): - pkg_downloaded = True - break - # Also check for "No match for argument" or "No package" errors + # Check if package was downloaded by looking for the RPM file + pkg_downloaded = _check_rpm_downloaded(rpm_directory, pkg) + + # Also check for "No match for argument" or "No package" errors in stderr pkg_not_found = False for line in stderr_lines: - if pkg in line and ("No match for argument" in line or + if pkg in line and ("No match for argument" in line or "No package" in line or "not found" in line.lower()): pkg_not_found = True @@ -116,6 +144,7 @@ def process_rpm(package, repo_store_path, status_file_path, cluster_os_type, if pkg_downloaded and not pkg_not_found: downloaded.append(pkg) write_status_to_file(status_file_path, pkg, "rpm", "Success", logger, file_lock, pkg_repo_name) + logger.info(f"Package '{pkg}' downloaded successfully.") else: failed.append(pkg) if pkg_not_found: @@ -139,7 +168,8 @@ def process_rpm(package, repo_store_path, status_file_path, cluster_os_type, "unable to find a match" ]) - if retry_res.returncode == 0 and ".rpm" in retry_res.stdout + retry_res.stderr: + # Check if RPM file exists after retry (works with both DNF4 and DNF5) + if retry_res.returncode == 0 and _check_rpm_downloaded(rpm_directory, pkg): downloaded.append(pkg) failed.remove(pkg) write_status_to_file(status_file_path, pkg, "rpm", "Success", logger, file_lock, pkg_repo_name) @@ -170,7 +200,7 @@ def process_rpm(package, repo_store_path, status_file_path, cluster_os_type, for pkg in package["rpm_list"]: # Get repo_name for this specific RPM from mapping pkg_repo_name = repo_mapping.get(pkg, "") - + # Validate package using dnf info with specific repo only if pkg_repo_name: # Apply architecture prefixing if needed @@ -193,7 +223,7 @@ def process_rpm(package, repo_store_path, status_file_path, cluster_os_type, # Package exists and is available valid_packages.append(pkg) write_status_to_file( - status_file_path, pkg, "rpm", "Success", + status_file_path, pkg, "rpm", "Success", logger, file_lock, pkg_repo_name ) logger.info(f"Package '{pkg}' validated successfully") @@ -201,7 +231,7 @@ def process_rpm(package, repo_store_path, status_file_path, cluster_os_type, # Package not found or invalid invalid_packages.append(pkg) write_status_to_file( - status_file_path, pkg, "rpm", "Failed", + status_file_path, pkg, "rpm", "Failed", logger, file_lock, pkg_repo_name ) logger.error( diff --git a/common/library/modules/process_rpm_config.py b/common/library/modules/process_rpm_config.py index 4a5b42f539..34d650d4a3 100644 --- a/common/library/modules/process_rpm_config.py +++ b/common/library/modules/process_rpm_config.py @@ -244,12 +244,12 @@ def create_rpm_remote(repo,log): repo_name = f"{repo_name}_{version}" remote_name = repo_name - + # Check if remote already exists - skip if it does if show_rpm_remote(remote_name, log): log.info("Remote '%s' already exists. Skipping.", remote_name) return True, repo_name - + # Remote doesn't exist - create it repo_keys = repo.keys() if "ca_cert" in repo_keys and repo["ca_cert"]: @@ -326,7 +326,7 @@ def sync_rpm_repository(repo,log, resync_repos=None): log.info("Starting synchronization for RPM repository") # Determine if we should skip sync check force_sync = False - + # Normalize resync_repos: convert comma-separated string to list resync_list = None if resync_repos == "all": @@ -458,9 +458,25 @@ def check_publication_exists(repo_name, log): try: command = pulp_rpm_commands["check_publication"] % repo_name log.info("Checking if publication exists for repository '%s'", repo_name) - result = execute_command(command, log) - # The command returns a list - if empty, no publication exists - return bool(result) + cmd_list = shlex.split(command) + result = subprocess.run(cmd_list, shell=False, capture_output=True, text=True) + log.info("check_publication_exists command return code: %s", result.returncode) + + if result.returncode != 0: + log.info("Publication check command failed for '%s'", repo_name) + return False + + # Parse the JSON output - publication list returns [] when empty + publications = json.loads(result.stdout) + if publications: + log.info("Publication exists for '%s' (%d found)", repo_name, len(publications)) + return True + else: + log.info("No publications found for '%s' (empty list)", repo_name) + return False + except (json.JSONDecodeError, ValueError) as e: + log.error("Error parsing publication list for '%s': %s", repo_name, str(e)) + return False except Exception as e: log.error("Error checking publication for '%s': %s", repo_name, str(e)) return False @@ -574,7 +590,7 @@ def delete_old_publications(repo_name, log): log.warning("Failed to delete publication %s: %s", pub_href, delete_result.stderr) else: log.info("Successfully deleted publication: %s", pub_href) - + return True except Exception as e: log.error("Error deleting publications for '%s': %s", repo_name, str(e)) @@ -601,10 +617,10 @@ def create_publication(repo,log, resync_repos=None): repo_name = f"{repo_name}_{version}" log.info("Processing publication for repository: '%s'", repo_name) - + # Check if version changed during sync (passed via _version_changed flag) version_changed = repo.get("_version_changed", True) # Default True for safety - + # If publication exists and version didn't change, keep existing publication if check_publication_exists(repo_name, log): if not version_changed: @@ -865,7 +881,7 @@ def process_sync_results(sync_results, rpm_config, resync_repos, log): # Get list of repos where version changed (need new publication) version_changed_repos = [name for success, name, actually_synced, version_changed in sync_results if success and actually_synced and version_changed] log.info(f"Repos with version change: {len(version_changed_repos)} - {version_changed_repos}") - + # If no versions changed, check for missing publication/distribution # This handles the crash recovery case: process failed after sync but before pub/dist if not version_changed_repos: @@ -983,7 +999,7 @@ def delete_aggregated_repo(repo_name, log): """ Delete the aggregated repository, its remotes, and distribution for a given architecture. This is called before recreating the aggregated repo to ensure a clean state. - + Note: This only deletes the distribution with the exact repo_name. Old distributions with different naming conventions are preserved to allow coexistence during upgrades. @@ -1397,7 +1413,7 @@ def manage_rpm_repositories_multiprocess(rpm_config, log, sw_archs=None, resync_ # Process sync results and get repos for publication/distribution repos_for_pub_dist, should_skip, skip_message = process_sync_results(sync_results, rpm_config, resync_repos, log) - + # Only run publication/distribution if repos need it if not should_skip: # Step 4: Concurrent publication creation @@ -1432,7 +1448,7 @@ def manage_rpm_repositories_multiprocess(rpm_config, log, sw_archs=None, resync_ if not base_urls: log.error("No base URLs retrieved from Pulp. Cannot create repo file.") return False, "Base URLs fetch failed — repo file not created." - + log.info(f"Fetched {len(base_urls)} base URLs from Pulp.") create_yum_repo_file(base_urls, log) log.info("Successfully created/updated pulp.repo file with fetched base URLs.") @@ -1440,7 +1456,7 @@ def manage_rpm_repositories_multiprocess(rpm_config, log, sw_archs=None, resync_ # Return appropriate success message based on resync_repos and skip status if should_skip: return True, skip_message - + if resync_repos == "all": return True, "Resync completed successfully for all repositories" elif resync_repos: @@ -1449,7 +1465,7 @@ def manage_rpm_repositories_multiprocess(rpm_config, log, sw_archs=None, resync_ else: repos_list = ", ".join(resync_repos) return True, f"Resync completed successfully for specified repositories: {repos_list}" - + return True, "RPM repository sync and configuration completed successfully" def main(): From 06b96a8a8352e44c00796097dd2e3c5fb29f56ac Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Sat, 6 Jun 2026 19:14:17 +0530 Subject: [PATCH 39/78] kernel version update check (#4697) --- .../tasks/configure_bss_group.yml | 35 +++++++++++++++---- .../tasks/provision_mapping_nodes.yml | 12 ++++--- .../tasks/validate_image.yml | 20 ++++++++--- .../roles/provision_validations/vars/main.yml | 10 ++++-- 4 files changed, 60 insertions(+), 17 deletions(-) diff --git a/provision/roles/configure_ochami/tasks/configure_bss_group.yml b/provision/roles/configure_ochami/tasks/configure_bss_group.yml index 6453fb4848..d32776bdf7 100644 --- a/provision/roles/configure_ochami/tasks/configure_bss_group.yml +++ b/provision/roles/configure_ochami/tasks/configure_bss_group.yml @@ -36,9 +36,6 @@ set -o pipefail && \ s3cmd ls -Hr s3://boot-images | \ grep "{{ image_search_pattern }}{{ compute_image_suffix }}" | \ - {% if hostvars['localhost']['kernel_version_override'] | default('') | length > 0 %} - grep "{{ hostvars['localhost']['kernel_version_override'] }}" | \ - {% endif %} grep {{ hostvars['localhost']['cluster_os_version'] }} | sort -k1,2r | awk '{print $4}' | sed 's|s3://||' changed_when: false failed_when: false @@ -52,9 +49,6 @@ set -o pipefail && \ s3cmd ls -Hr s3://boot-images | \ grep "{{ image_search_pattern }}" | \ - {% if hostvars['localhost']['kernel_version_override'] | default('') | length > 0 %} - grep "{{ hostvars['localhost']['kernel_version_override'] }}" | \ - {% endif %} grep {{ hostvars['localhost']['cluster_os_version'] }} | sort -k1,2r | awk '{print $4}' | sed 's|s3://||' changed_when: false failed_when: false @@ -69,6 +63,21 @@ when: - enable_build_stream - (compute_image_suffix | default('')) != '' + - hostvars['localhost']['kernel_version_override'] | default('') | length == 0 + +- name: Set kernel and initrd variables (build stream - kernel override) + vars: + _kvo: "{{ hostvars['localhost']['kernel_version_override'] }}" + _bs_lines: "{{ verify_s3_image_build_stream.stdout_lines }}" + ansible.builtin.set_fact: + kernel: "{{ _bs_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | first }}" + initrd: "{{ _bs_lines | select('search', 'initramfs') | select('search', _kvo) | list | first }}" + when: + - enable_build_stream + - (compute_image_suffix | default('')) != '' + - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 + - _bs_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | length > 0 + - _bs_lines | select('search', 'initramfs') | select('search', _kvo) | list | length > 0 - name: Set kernel and initrd variables (default) ansible.builtin.set_fact: @@ -76,6 +85,20 @@ initrd: "{{ verify_s3_image.stdout_lines | select('search', 'initramfs') | list | first }}" when: - not enable_build_stream + - hostvars['localhost']['kernel_version_override'] | default('') | length == 0 + +- name: Set kernel and initrd variables (default - kernel override) + vars: + _kvo: "{{ hostvars['localhost']['kernel_version_override'] }}" + _s3_lines: "{{ verify_s3_image.stdout_lines }}" + ansible.builtin.set_fact: + kernel: "{{ _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | first }}" + initrd: "{{ _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | first }}" + when: + - not enable_build_stream + - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 + - _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | length > 0 + - _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | length > 0 - name: Load bss template - {{ functional_group_name }} ansible.builtin.template: diff --git a/provision/roles/configure_ochami/tasks/provision_mapping_nodes.yml b/provision/roles/configure_ochami/tasks/provision_mapping_nodes.yml index 101c3b9d3d..23f22bed7a 100644 --- a/provision/roles/configure_ochami/tasks/provision_mapping_nodes.yml +++ b/provision/roles/configure_ochami/tasks/provision_mapping_nodes.yml @@ -64,12 +64,13 @@ ansible.builtin.include_tasks: delete_smd_config.yml - name: Restart the cloud-init service to refresh node instance data - ansible.builtin.service: + ansible.builtin.systemd: name: cloud-init-server state: restarted + daemon_reload: true - name: Check if cloud-init-server is running - ansible.builtin.service: + ansible.builtin.systemd: name: cloud-init-server state: started register: cloud_init_server_status @@ -88,12 +89,13 @@ changed_when: false rescue: - name: Restart openchami.target to recover container networking - ansible.builtin.service: + ansible.builtin.systemd: name: openchami.target state: restarted + daemon_reload: true - name: Wait for openchami.target to be ready after restart - ansible.builtin.service: + ansible.builtin.systemd: name: openchami.target state: started register: openchami_target_recovery @@ -116,7 +118,7 @@ when: cloud_init_status.rc != 0 - name: Check whether openchami.target is up - ansible.builtin.service: + ansible.builtin.systemd: name: openchami.target state: started register: openchami_target_status diff --git a/provision/roles/provision_validations/tasks/validate_image.yml b/provision/roles/provision_validations/tasks/validate_image.yml index 8745ec6a06..e00d49826a 100644 --- a/provision/roles/provision_validations/tasks/validate_image.yml +++ b/provision/roles/provision_validations/tasks/validate_image.yml @@ -53,9 +53,6 @@ set -o pipefail && \ s3cmd ls -Hr s3://boot-images | \ grep "{{ image_search_pattern }}" | \ - {% if hostvars['localhost']['kernel_version_override'] | default('') | length > 0 %} - grep "{{ hostvars['localhost']['kernel_version_override'] }}" | \ - {% endif %} grep {{ hostvars['localhost']['cluster_os_version'] }} | awk '{print $4}' | sed 's|s3://||' changed_when: false failed_when: false @@ -75,7 +72,22 @@ ansible.builtin.set_fact: kernel: "{{ verify_s3_image.stdout_lines | select('search', 'vmlinuz') | list | first }}" initrd: "{{ verify_s3_image.stdout_lines | select('search', 'initramfs') | list | first }}" - when: verify_s3_image.stdout_lines | length > 1 + when: + - verify_s3_image.stdout_lines | length > 1 + - hostvars['localhost']['kernel_version_override'] | default('') | length == 0 + +- name: Set kernel and initrd variables (kernel override) + vars: + _kvo: "{{ hostvars['localhost']['kernel_version_override'] }}" + _s3_lines: "{{ verify_s3_image.stdout_lines }}" + ansible.builtin.set_fact: + kernel: "{{ _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | first }}" + initrd: "{{ _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | first }}" + when: + - verify_s3_image.stdout_lines | length > 1 + - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 + - _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | length > 0 + - _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | length > 0 - name: Fail if kernel override did not match any S3 image ansible.builtin.fail: diff --git a/provision/roles/provision_validations/vars/main.yml b/provision/roles/provision_validations/vars/main.yml index 499f52ff2c..1038ab4b2d 100644 --- a/provision/roles/provision_validations/vars/main.yml +++ b/provision/roles/provision_validations/vars/main.yml @@ -65,8 +65,14 @@ image_missing_fail_msg: | kernel_override_no_match_msg: >- Error: kernel_version_override did not match any image in S3 for functional group '{{ functional_group_name }}'. - Either set kernel_version_override to "" in provision_config.yml to auto-select the latest, - or provide a valid kernel version that exists in s3://boot-images. + Before running provision.yml, ensure the following steps are completed in order: + (1) Run local_repo.yml to sync the latest BaseOS repository so the desired + kernel packages are available locally. + (2) Run build_image_x86_64.yml/build_image_aarch64.yml to build and upload the new kernel image + to s3://boot-images. + (3) Re-run provision.yml. + Alternatively, set kernel_version_override to "" in provision_config.yml + to auto-select the latest available image. # Usage: validate_telemetry.yml warning_idrac_telemetry_support_false: | From 23418471f72e9163da67642983c81becaf5ff4a3 Mon Sep 17 00:00:00 2001 From: "balajikumaran.cs" Date: Mon, 8 Jun 2026 11:35:46 +0530 Subject: [PATCH 40/78] Fix: Cloud-Init runcmd YAML parsing errors on aarch64 templates (#4699) --- ...-group-login_compiler_node_aarch64.yaml.j2 | 20 ++++++++--------- .../ci-group-slurm_node_aarch64.yaml.j2 | 22 +++++++++---------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_aarch64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_aarch64.yaml.j2 index 5a5b73da5e..0ac3d9f7a4 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_aarch64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-login_compiler_node_aarch64.yaml.j2 @@ -135,7 +135,7 @@ owner: root:root permissions: '{{ file_mode_755 }}' content: | - {{ lookup('template', 'templates/hpc_tools/setup_doca_mpi_env.sh.j2') | indent(12) }} + {{ lookup('template', 'templates/hpc_tools/setup_doca_mpi_env.sh.j2') | indent(12) }} {% if dns_enabled | default(false) | bool %} - path: /etc/resolv.conf @@ -181,7 +181,7 @@ permissions: '0644' content: | {{ lookup('template', 'templates/nodes/apptainer_mirror.conf.j2') | indent(12) }} - + # NVIDIA HPC SDK script (run manually: /usr/local/bin/setup_nvhpc_sdk.sh --install) - path: /usr/local/bin/setup_nvhpc_sdk.sh owner: root:root @@ -317,38 +317,38 @@ - firewall-cmd --permanent --add-port={{ slurm_conf_dict.SrunPortRange }}/udp - firewall-cmd --permanent --add-port={{ slurm_conf_dict.SlurmdPort }}/tcp - firewall-cmd --permanent --add-port={{ slurm_conf_dict.SlurmdPort }}/udp - + # Add PXE network to trusted zone for ORTE communication - echo "[INFO] Adding PXE network to trusted zone for ORTE communication" - | bash -c ' ADMIN_IP="{{ hostvars['localhost']['admin_nic_ip'] }}" NETMASK_BITS="{{ hostvars['localhost']['admin_netmask_bits'] }}" - + # Convert IP to integer and calculate network address ip_to_int() { local IFS=. read -r a b c d <<< "$1" echo $(( (a << 24) + (b << 16) + (c << 8) + d )) } - + int_to_ip() { local ip=$1 - echo "$(( (ip >> 24) & 255 )).$(( (ip >> 16) & 255 )).$(( (ip >> 8) & 255 )).$(( ip & 255 ))" + echo "$(( (ip >> 24) & 255 )).$(( (ip >> 16) & 255 )).$(( (ip >> 8) & 255 )).$(( ip & 255 ))" } - + ADMIN_IP_INT=$(ip_to_int "$ADMIN_IP") HOST_BITS=$(( 32 - NETMASK_BITS )) HOST_MASK=$(( (1 << HOST_BITS) - 1 )) NETWORK_MASK=$(( ~HOST_MASK & 0xFFFFFFFF )) NETWORK_INT=$(( ADMIN_IP_INT & NETWORK_MASK )) NETWORK_IP=$(int_to_ip "$NETWORK_INT") - + PXE_SUBNET="$NETWORK_IP/$NETMASK_BITS" echo "[INFO] Admin IP: $ADMIN_IP, Netmask: /$NETMASK_BITS, PXE Subnet: $PXE_SUBNET" firewall-cmd --zone=trusted --add-source="$PXE_SUBNET" --permanent ' - + - firewall-cmd --reload - systemctl enable sshd - systemctl start sshd @@ -389,4 +389,4 @@ # NVIDIA HPC SDK: Script deployed to /usr/local/bin/setup_nvhpc_sdk.sh # User must run manually after cloud-init completes - echo "Cloud-Init has completed successfully." - - echo "To install NVIDIA HPC SDK, run: /usr/local/bin/setup_nvhpc_sdk.sh --install" + - echo "Run /usr/local/bin/setup_nvhpc_sdk.sh --install to install NVIDIA HPC SDK" diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 index 2f7f52c930..096829024f 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-slurm_node_aarch64.yaml.j2 @@ -291,36 +291,36 @@ firewall-cmd --permanent --add-service=ssh firewall-cmd --permanent --add-port="${SRUN_RANGE}"/tcp firewall-cmd --permanent --add-port="${SLURMD_PORT}"/tcp - + # Add PXE network to trusted zone for ORTE communication echo "[INFO] Adding PXE network to trusted zone for ORTE communication" # Calculate PXE subnet using admin IP and netmask bits ADMIN_IP="{{ hostvars['localhost']['admin_nic_ip'] }}" NETMASK_BITS="{{ hostvars['localhost']['admin_netmask_bits'] }}" - + # Convert IP to integer and calculate network address ip_to_int() { local IFS=. read -r a b c d <<< "$1" echo $(( (a << 24) + (b << 16) + (c << 8) + d )) } - + int_to_ip() { local ip=$1 echo "$(( (ip >> 24) & 255 )).$(( (ip >> 16) & 255 )).$(( (ip >> 8) & 255 )).$(( ip & 255 ))" } - + ADMIN_IP_INT=$(ip_to_int "$ADMIN_IP") HOST_BITS=$(( 32 - NETMASK_BITS )) HOST_MASK=$(( (1 << HOST_BITS) - 1 )) NETWORK_MASK=$(( ~HOST_MASK & 0xFFFFFFFF )) NETWORK_INT=$(( ADMIN_IP_INT & NETWORK_MASK )) NETWORK_IP=$(int_to_ip "$NETWORK_INT") - + PXE_SUBNET="$NETWORK_IP/$NETMASK_BITS" echo "[INFO] Admin IP: $ADMIN_IP, Netmask: /$NETMASK_BITS, PXE Subnet: $PXE_SUBNET" firewall-cmd --zone=trusted --add-source="$PXE_SUBNET" --permanent - + firewall-cmd --reload echo "[INFO] Unmounting controller slurm.conf directory from $CTLD_SLURM_DIR_MNT" @@ -379,19 +379,19 @@ permissions: '0644' content: | {{ lookup('template', 'templates/nodes/apptainer_mirror.conf.j2') | indent(12) }} - + - path: /usr/local/bin/configure_ucx_openmpi_env.sh owner: root:root permissions: '{{ file_mode_755 }}' content: | {{ lookup('template', 'templates/hpc_tools/configure_ucx_openmpi_env.sh.j2') | indent(12) }} - + - path: /usr/local/bin/setup_doca_mpi_env.sh owner: root:root permissions: '{{ file_mode_755 }}' content: | {{ lookup('template', 'templates/hpc_tools/setup_doca_mpi_env.sh.j2') | indent(12) }} - + # NVIDIA HPC SDK setup script (run manually: /usr/local/bin/setup_nvhpc_sdk.sh) - path: /usr/local/bin/setup_nvhpc_sdk.sh owner: root:root @@ -419,7 +419,7 @@ {% if hostvars['localhost']['openmpi_support'] %} - bash /usr/local/bin/setup_doca_mpi_env.sh || echo "DOCA MPI environment setup failed (non-critical)" {% endif %} - + {# Mount-specific runcmd entries - moved after DOCA to ensure RDMA is available #} {%- if cloud_init_groups_dict[functional_group_name].runcmd is defined and cloud_init_groups_dict[functional_group_name].runcmd is not none %} {% for cmd in cloud_init_groups_dict[functional_group_name].runcmd %} @@ -528,4 +528,4 @@ - systemctl restart slurmd - echo "Cloud-Init has completed successfully." - - echo "To setup NVIDIA HPC SDK, run: /usr/local/bin/setup_nvhpc_sdk.sh" + - echo "Run /usr/local/bin/setup_nvhpc_sdk.sh to setup NVIDIA HPC SDK" From 3f73926574d79ac8942a7b1de59d95d9e2922d45 Mon Sep 17 00:00:00 2001 From: "balajikumaran.cs" Date: Mon, 8 Jun 2026 12:09:15 +0530 Subject: [PATCH 41/78] Fix: Multiple Ansible 2.14+ compatibility issues after upgrade (#4698) * Update initiate_telemetry_service_cluster.yml Signed-off-by: balajikumaran.cs * Update update_metadata_file.yml Signed-off-by: balajikumaran.cs --------- Signed-off-by: balajikumaran.cs --- .../tasks/initiate_telemetry_service_cluster.yml | 2 +- .../tasks/update_metadata_file.yml | 14 +++++--------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/telemetry/roles/idrac_telemetry/tasks/initiate_telemetry_service_cluster.yml b/telemetry/roles/idrac_telemetry/tasks/initiate_telemetry_service_cluster.yml index 7078a2f056..d466ecc573 100644 --- a/telemetry/roles/idrac_telemetry/tasks/initiate_telemetry_service_cluster.yml +++ b/telemetry/roles/idrac_telemetry/tasks/initiate_telemetry_service_cluster.yml @@ -50,7 +50,7 @@ "iDRAC IPs for pod '{{ item.key }}': {{ item.value | join(', ') }}" verbosity: 2 loop: "{{ idrac_podname_idracips.idrac_podname_ips | dict2items }}" - when: idrac_podname_idracips.idrac_podname_ips is defined and idrac_podname_idracips.idrac_podname_ips + when: idrac_podname_idracips.idrac_podname_ips is defined and (idrac_podname_idracips.idrac_podname_ips | length > 0) - name: Read the existing BMC IP's from mysqlDB of the idrac telemetry pods block: diff --git a/telemetry/roles/service_k8s_telemetry/tasks/update_metadata_file.yml b/telemetry/roles/service_k8s_telemetry/tasks/update_metadata_file.yml index b9a7277055..8516e6e9d3 100644 --- a/telemetry/roles/service_k8s_telemetry/tasks/update_metadata_file.yml +++ b/telemetry/roles/service_k8s_telemetry/tasks/update_metadata_file.yml @@ -1,4 +1,4 @@ -# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -76,14 +76,10 @@ kind: StatefulSet name: idrac-telemetry namespace: telemetry - patch: | - [ - { - "op": "replace", - "path": "/spec/replicas", - "value": {{ (idrac_telemetry_replicas | int) + 1 }} - } - ] + patch: + - op: replace + path: /spec/replicas + value: "{{ (idrac_telemetry_replicas | int) + 1 }}" - name: Generating iDRAC telemetry pods names ansible.builtin.set_fact: From 64b0a7690ebcd025e2a5be2b68493c70785db68d Mon Sep 17 00:00:00 2001 From: Sujit Jadhav Date: Mon, 8 Jun 2026 15:22:47 +0530 Subject: [PATCH 42/78] fix(openchami): resolve missing service units in multi-subnet deployment (OMN01D-2513) (#4700) The OpenCHAMI deployment fails with 'Unit opaal.service not found' and 'Unit bss.service not found' when starting openchami.target in multi-subnet environments. This occurs because: 1. The openchami-certificate-update script reads configs from /etc/openchami/configs/ to generate quadlet .container files. Custom coredhcp.yaml was dropped BEFORE the cert update ran, and the multi-subnet key=value format caused the script to fail, leaving quadlet files ungenerated. 2. The coredhcp_subnets variable used in the template was written to a file (configs_vars.yaml) but never loaded into Ansible's variable scope via set_fact, so multi-subnet config blocks never rendered. 3. Input validation rejected ADMIN_IPs in additional subnets because validate_all_host_ips_same_subnet_as_vip only checked the primary admin subnet. Changes: - ochami.yml: Reorder cert update before config template drops so the script uses RPM default configs. Add conditional quadlet stat check + RPM reinstall fallback for multi-subnet deployments only. - deploy_openchami.yml: Add set_fact for additional_subnets so the coredhcp template can access subnet data. - coredhcp.yaml.j2: Version-aware template that uses single-subnet positional format for coresmd v0.4.x (with commented multi-subnet config when additional_subnets are configured) and functional multi-subnet key=value format for coresmd v0.6.x+. - main.yml: Retain install guard (when: not openchami_install_status) to avoid unnecessary reinstalls. Add separate refresh task that updates network configs on re-runs when openchami is already running. - refresh_openchami_configs.yml: New task file that reloads network_spec and provision_config, recomputes network facts, and refreshes coredhcp/coredns templates. Only restarts services if configs actually changed. - vip_pxe_validation.py: Accept additional_subnets parameter and validate ADMIN_IPs against all known subnets. - high_availability_validation.py: Add get_additional_subnets helper, include it in load_network_spec, and pass through the call chain. All changes are backward-compatible with single-subnet/flat network deployments. No impact on local_repo, build_image, provision, or other input_validation flows. Signed-off-by: Sujit Jadhav --- .../high_availability_validation.py | 29 +++++- .../validation_flows/vip_pxe_validation.py | 32 ++++++- .../common/tasks/aarch64_prereq.yml | 5 + .../openchami/tasks/configs/ochami.yml | 24 ++++- .../openchami/tasks/configs/regctl.yml | 5 + .../openchami/tasks/deploy_openchami.yml | 4 + .../openchami/tasks/main.yml | 4 + .../tasks/refresh_openchami_configs.yml | 94 +++++++++++++++++++ .../templates/coredhcp/coredhcp.yaml.j2 | 54 +++++++++-- 9 files changed, 228 insertions(+), 23 deletions(-) create mode 100644 prepare_oim/roles/deploy_containers/openchami/tasks/refresh_openchami_configs.yml diff --git a/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py b/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py index d4ce3f0a32..1630e5e9cc 100644 --- a/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py @@ -231,6 +231,24 @@ def get_primary_oim_admin_ip(network_spec_json): return oim_admin_ip +def get_additional_subnets(network_spec_json): + """ + Retrieves the additional_subnets list from the admin_network configuration. + + Args: + network_spec_json (dict): The JSON object containing the network specifications. + + Returns: + list: The additional_subnets list, or an empty list if not configured. + """ + for network in network_spec_json["Networks"]: + for key, value in network.items(): + if key == "admin_network": + additional = value.get("additional_subnets", []) + return additional if additional else [] + return [] + + def is_service_tag_present(service_tags_list, input_service_tag): """ Checks if a service tag is present in a given list of service tags. @@ -296,7 +314,8 @@ def validate_vip_address( pod_external_ip_list, admin_netmaskbits, oim_admin_ip, - pxe_mapping_file_path=None + pxe_mapping_file_path=None, + additional_subnets=None ): """ Validate a virtual IP address against a list of existing service node VIPs, @@ -371,7 +390,7 @@ def validate_vip_address( validate_vip_vs_pxe_mapping_host_ips(errors, config_type, vip_address, pxe_mapping_file_path) # Check all HOST_IPs are in same subnet as VIP - validate_all_host_ips_same_subnet_as_vip(errors, vip_address, pxe_mapping_file_path, admin_netmaskbits) + validate_all_host_ips_same_subnet_as_vip(errors, vip_address, pxe_mapping_file_path, admin_netmaskbits, additional_subnets) def validate_service_k8s_cluster_ha( errors, @@ -458,7 +477,8 @@ def validate_service_k8s_cluster_ha( pod_external_ip_list, admin_netmaskbits, oim_admin_ip, - prov_cfg.get('pxe_mapping_file_path') + prov_cfg.get('pxe_mapping_file_path'), + network_spec_data.get("additional_subnets", []) ) @@ -483,7 +503,8 @@ def load_network_spec(input_file_path): "admin_uncorrelated_node_start_ip": get_admin_uncorrelated_node_start_ip( network_spec_json ), - "oim_admin_ip": get_primary_oim_admin_ip(network_spec_json) + "oim_admin_ip": get_primary_oim_admin_ip(network_spec_json), + "additional_subnets": get_additional_subnets(network_spec_json) } return network_spec_info diff --git a/common/library/module_utils/input_validation/validation_flows/vip_pxe_validation.py b/common/library/module_utils/input_validation/validation_flows/vip_pxe_validation.py index 8866447423..1cd060a991 100644 --- a/common/library/module_utils/input_validation/validation_flows/vip_pxe_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/vip_pxe_validation.py @@ -95,28 +95,50 @@ def validate_vip_vs_pxe_mapping_host_ips( def validate_all_host_ips_same_subnet_as_vip( - errors, vip_address, pxe_mapping_file_path, admin_netmaskbits): + errors, vip_address, pxe_mapping_file_path, admin_netmaskbits, + additional_subnets=None): """ - Validate that all ADMIN_IPs in PXE mapping are in same subnet as VIP. + Validate that all ADMIN_IPs in PXE mapping are in a known subnet + (primary admin subnet or any additional subnet). Parameters: errors (list): List to append error messages vip_address (str): VIP address to validate against pxe_mapping_file_path (str): Path to PXE mapping file admin_netmaskbits (str): Netmask bits for subnet validation + additional_subnets (list, optional): List of additional subnet + dicts with 'subnet' and 'netmask_bits' keys. """ host_ips = extract_host_ips_from_pxe_mapping(pxe_mapping_file_path) + if additional_subnets is None: + additional_subnets = [] for host_ip in host_ips: - if not validation_utils.is_ip_in_subnet( + # Check if host_ip is in the primary admin subnet (VIP subnet) + if validation_utils.is_ip_in_subnet( vip_address, admin_netmaskbits, host_ip): + continue + + # Check if host_ip is in any additional subnet + in_additional = False + for subnet_entry in additional_subnets: + subnet_addr = subnet_entry.get("subnet", "") + subnet_bits = subnet_entry.get("netmask_bits", "") + if subnet_addr and subnet_bits: + if validation_utils.is_ip_in_subnet( + subnet_addr, subnet_bits, host_ip): + in_additional = True + break + + if not in_additional: errors.append( create_error_msg( "ADMIN_IP subnet consistency", host_ip, f"Node ADMIN_IP {host_ip} must be in the same " - f"subnet as VIP {vip_address}. " + f"subnet as VIP {vip_address} or in one of the " + "configured additional_subnets. " "Please ensure all ADMIN_IPs in PXE mapping file " - "are in the same subnet as the VIP." + "are in a known subnet." ) ) \ No newline at end of file diff --git a/prepare_oim/roles/deploy_containers/common/tasks/aarch64_prereq.yml b/prepare_oim/roles/deploy_containers/common/tasks/aarch64_prereq.yml index f5eae768bb..5f4d21a7cd 100644 --- a/prepare_oim/roles/deploy_containers/common/tasks/aarch64_prereq.yml +++ b/prepare_oim/roles/deploy_containers/common/tasks/aarch64_prereq.yml @@ -24,3 +24,8 @@ url: "{{ regctl_aarch64_url }}" dest: "{{ ochami_aarch64_dir }}/regctl" mode: "{{ dir_permissions_755 }}" + timeout: 120 + register: regctl_aarch64_download + retries: 5 + delay: 10 + until: regctl_aarch64_download is succeeded diff --git a/prepare_oim/roles/deploy_containers/openchami/tasks/configs/ochami.yml b/prepare_oim/roles/deploy_containers/openchami/tasks/configs/ochami.yml index b6aeec581a..de7b998c93 100644 --- a/prepare_oim/roles/deploy_containers/openchami/tasks/configs/ochami.yml +++ b/prepare_oim/roles/deploy_containers/openchami/tasks/configs/ochami.yml @@ -29,6 +29,26 @@ state: present disable_gpg_check: true +- name: Check if OpenCHAMI quadlet files exist + ansible.builtin.stat: + path: /etc/containers/systemd/opaal.container + register: openchami_quadlet_check + when: additional_subnets | default([]) | length > 0 + +- name: Reinstall openchami RPM to restore missing quadlet files + ansible.builtin.command: >- + dnf reinstall -y + {{ openchami_work_dir }}/{{ openchami_rpm_name }} + --nogpgcheck + changed_when: true + when: + - additional_subnets | default([]) | length > 0 + - not (openchami_quadlet_check.stat.exists | default(true)) + +- name: Configure cluster FQDN for certificates + ansible.builtin.command: sudo openchami-certificate-update update {{ cluster_name }}.{{ cluster_domain }} + changed_when: true + - name: Download the ochami client RPM file ansible.builtin.get_url: url: "{{ ochami_client_rpm_url }}" @@ -66,7 +86,3 @@ owner: root group: root mode: "{{ file_perm_rw }}" - -- name: Configure cluster FQDN for certificates - ansible.builtin.command: sudo openchami-certificate-update update {{ cluster_name }}.{{ cluster_domain }} - changed_when: true diff --git a/prepare_oim/roles/deploy_containers/openchami/tasks/configs/regctl.yml b/prepare_oim/roles/deploy_containers/openchami/tasks/configs/regctl.yml index 94ce6965d5..27a724787d 100644 --- a/prepare_oim/roles/deploy_containers/openchami/tasks/configs/regctl.yml +++ b/prepare_oim/roles/deploy_containers/openchami/tasks/configs/regctl.yml @@ -18,6 +18,11 @@ url: "{{ regctl_url }}" dest: "{{ regctl_dest }}" mode: "{{ file_perm_rwx }}" + timeout: 120 + register: regctl_download + retries: 5 + delay: 10 + until: regctl_download is succeeded - name: Configure regctl # noqa: command-instead-of-shell ansible.builtin.shell: | diff --git a/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml b/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml index b7b9b64bb2..506f804ea8 100644 --- a/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml +++ b/prepare_oim/roles/deploy_containers/openchami/tasks/deploy_openchami.yml @@ -119,6 +119,10 @@ {{ (admin_nic_ip + '/' + network_data.admin_network.netmask_bits) | ansible.utils.ipaddr('netmask') }} coredhcp_lease_duration: "{{ default_lease_time }}s" +- name: Set multi-subnet coredhcp facts + ansible.builtin.set_fact: + additional_subnets: "{{ network_data.admin_network.additional_subnets | default([]) }}" + - name: Configure static routes for additional subnets when: network_data.admin_network.additional_subnets | default([]) | length > 0 block: diff --git a/prepare_oim/roles/deploy_containers/openchami/tasks/main.yml b/prepare_oim/roles/deploy_containers/openchami/tasks/main.yml index 97170c2966..4c3dc90d9e 100644 --- a/prepare_oim/roles/deploy_containers/openchami/tasks/main.yml +++ b/prepare_oim/roles/deploy_containers/openchami/tasks/main.yml @@ -22,3 +22,7 @@ - name: Deploy openchami ansible.builtin.include_tasks: deploy_openchami.yml when: not hostvars['oim']['openchami_install_status'] + +- name: Refresh openchami network configs + ansible.builtin.include_tasks: refresh_openchami_configs.yml + when: hostvars['oim']['openchami_install_status'] diff --git a/prepare_oim/roles/deploy_containers/openchami/tasks/refresh_openchami_configs.yml b/prepare_oim/roles/deploy_containers/openchami/tasks/refresh_openchami_configs.yml new file mode 100644 index 0000000000..49d3fa94e6 --- /dev/null +++ b/prepare_oim/roles/deploy_containers/openchami/tasks/refresh_openchami_configs.yml @@ -0,0 +1,94 @@ +# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +# Refresh openchami network configs when openchami is already installed. +# This allows network_spec changes (e.g. additional_subnets) to be +# reapplied on re-runs without triggering a full reinstall. + +- name: Include provision_config.yml + block: + - name: Include provision_config.yml file + ansible.builtin.include_vars: "{{ provision_config }}" + no_log: true + rescue: + - name: Failed to include provision_config.yml + ansible.builtin.fail: + msg: "{{ provision_config_syntax_fail_msg }}" + +- name: Include network_spec.yml + block: + - name: Include network_spec file + ansible.builtin.include_vars: "{{ network_spec }}" + no_log: true + rescue: + - name: Failed to include network_spec.yml + ansible.builtin.fail: + msg: "{{ network_spec_syntax_fail_msg }}" + +- name: Parse network_spec data + ansible.builtin.set_fact: + network_data: "{{ network_data | default({}) | combine({item.key: item.value}) }}" + with_dict: "{{ Networks }}" + +- name: Set openchami cluster configuration facts + ansible.builtin.set_fact: + cluster_name: "{{ oim_node_name }}" + cluster_domain: "{{ domain_name }}" + cluster_boot_ip: "{{ admin_nic_ip }}" + cluster_boot_interface: "{{ admin_nic }}" + cluster_shortname: "nid" + cluster_nidlength: 3 + +- name: Set openchami network configuration facts + ansible.builtin.set_fact: + coredhcp_dhcp_pool: >- + {{ network_data.admin_network.dynamic_range | split('-') | first }} + {{ network_data.admin_network.dynamic_range | split('-') | last }} + coredhcp_netmask: >- + {{ (admin_nic_ip + '/' + network_data.admin_network.netmask_bits) | ansible.utils.ipaddr('netmask') }} + coredhcp_lease_duration: "{{ default_lease_time }}s" + +- name: Set multi-subnet coredhcp facts + ansible.builtin.set_fact: + additional_subnets: "{{ network_data.admin_network.additional_subnets | default([]) }}" + +- name: Override DNS forwarders from network_spec if configured + ansible.builtin.set_fact: + dns_forwarders: "{{ network_data.admin_network.dns }}" + when: (network_data.admin_network.dns | default([])) | length > 0 + +- name: Refresh coredhcp config file + ansible.builtin.template: + src: coredhcp/coredhcp.yaml.j2 + dest: "{{ openchami_config_dir }}/coredhcp.yaml" + owner: root + group: root + mode: "{{ file_perm_rw }}" + register: coredhcp_config_result + +- name: Refresh coredns Corefile config + ansible.builtin.template: + src: coredns/Corefile.j2 + dest: "{{ openchami_config_dir }}/Corefile" + owner: root + group: root + mode: "{{ file_perm_rw }}" + register: coredns_config_result + +- name: Restart openchami services if configs changed + ansible.builtin.systemd: + name: openchami.target + state: restarted + daemon_reload: true + when: coredhcp_config_result.changed or coredns_config_result.changed diff --git a/prepare_oim/roles/deploy_containers/openchami/templates/coredhcp/coredhcp.yaml.j2 b/prepare_oim/roles/deploy_containers/openchami/templates/coredhcp/coredhcp.yaml.j2 index 9dd198a59e..d5ded992bb 100644 --- a/prepare_oim/roles/deploy_containers/openchami/templates/coredhcp/coredhcp.yaml.j2 +++ b/prepare_oim/roles/deploy_containers/openchami/templates/coredhcp/coredhcp.yaml.j2 @@ -1,3 +1,6 @@ +{% set coresmd_ver = openchami_coresmd_tag | regex_replace('^v', '') %} +{% set multisubnet_native = coresmd_ver is version('0.6.0', '>=') %} +{% set has_additional_subnets = additional_subnets | default([]) | length > 0 %} server4: listen: - "%{{ cluster_boot_interface }}" @@ -6,10 +9,8 @@ server4: - dns: {{ coredhcp_dns_server }} - router: {{ coredhcp_router }} - netmask: {{ coredhcp_netmask }} -{% if coredhcp_subnets | default([]) | length > 0 %} - # Multi-subnet mode: uses key=value config format (requires coresmd v0.5+ rich rules) - # Subnet-specific rules use subnet: match keys with routers:/cidr: action keys - # to override DHCP options 3 (router) and 1 (netmask) per relay subnet. +{% if multisubnet_native and has_additional_subnets %} + # Multi-subnet mode: key=value config format (coresmd v0.6.x+) - coresmd: | svc_base_uri=https://{{ cluster_name }}.{{ cluster_domain }}:8443 ipxe_base_uri=http://{{ cluster_boot_ip }}:8081 @@ -17,9 +18,9 @@ server4: cache_valid={{ coredhcp_cache_validity }} lease_time={{ coredhcp_lease_duration }} single_port={{ coredhcp_tftp_single_port_mode | lower }} -{% for s in coredhcp_subnets %} - rule=subnet:{{ s.cidr }},type:Node,routers:{{ s.router }},cidr:{{ s.netmask_bits }} - rule=subnet:{{ s.cidr }},type:NodeBMC,routers:{{ s.router }},cidr:{{ s.netmask_bits }} +{% for s in additional_subnets %} + rule=subnet:{{ s.subnet }}/{{ s.netmask_bits }},type:Node,routers:{{ s.router }},cidr:{{ s.netmask_bits }} + rule=subnet:{{ s.subnet }}/{{ s.netmask_bits }},type:NodeBMC,routers:{{ s.router }},cidr:{{ s.netmask_bits }} {% endfor %} rule=type:Node rule=type:NodeBMC @@ -28,11 +29,44 @@ server4: lease_file=/tmp/coredhcp.db script_path={{ coredhcp_custom_ipxe }} lease_time={{ coredhcp_tmp_lease_duration }} -{% for sp in coredhcp_subnet_pools %} - subnet_pool={{ sp.cidr }},{{ sp.start }},{{ sp.end }} +{% for s in additional_subnets %} +{% set range_parts = s.dynamic_range | split('-') %} + subnet_pool={{ s.subnet }}/{{ s.netmask_bits }},{{ range_parts[0] }},{{ range_parts[1] }} {% endfor %} {% else %} - # Single-subnet mode: positional argument format compatible with coresmd v0.4.x + # Single-subnet mode: positional argument format (coresmd v0.4.x) - coresmd: https://{{ cluster_name }}.{{ cluster_domain }}:8443 http://{{ cluster_boot_ip }}:8081 /root_ca/root_ca.crt {{ coredhcp_cache_validity }} {{ coredhcp_lease_duration }} {{ coredhcp_tftp_single_port_mode | lower }} - bootloop: /tmp/coredhcp.db {{ coredhcp_custom_ipxe }} {{ coredhcp_tmp_lease_duration }} {{ coredhcp_dhcp_pool }} +{% if has_additional_subnets %} + # ------------------------------------------------------------------- + # Multi-subnet configuration (requires coresmd v0.6.x+) + # To enable multi-subnet DHCP: + # 1. Pull the new coresmd image: podman pull ghcr.io/openchami/coresmd:v0.6.x + # 2. Comment out the single-subnet coresmd and bootloop lines above + # 3. Uncomment the multi-subnet coresmd and bootloop blocks below + # 4. Restart services: systemctl restart openchami.target + # ------------------------------------------------------------------- + # - coresmd: | + # svc_base_uri=https://{{ cluster_name }}.{{ cluster_domain }}:8443 + # ipxe_base_uri=http://{{ cluster_boot_ip }}:8081 + # ca_cert=/root_ca/root_ca.crt + # cache_valid={{ coredhcp_cache_validity }} + # lease_time={{ coredhcp_lease_duration }} + # single_port={{ coredhcp_tftp_single_port_mode | lower }} +{% for s in additional_subnets %} + # rule=subnet:{{ s.subnet }}/{{ s.netmask_bits }},type:Node,routers:{{ s.router }},cidr:{{ s.netmask_bits }} + # rule=subnet:{{ s.subnet }}/{{ s.netmask_bits }},type:NodeBMC,routers:{{ s.router }},cidr:{{ s.netmask_bits }} +{% endfor %} + # rule=type:Node + # rule=type:NodeBMC + # rule=hostname:unknown-{{'{'}}04d{{'}'}} + # - bootloop: | + # lease_file=/tmp/coredhcp.db + # script_path={{ coredhcp_custom_ipxe }} + # lease_time={{ coredhcp_tmp_lease_duration }} +{% for s in additional_subnets %} +{% set range_parts = s.dynamic_range | split('-') %} + # subnet_pool={{ s.subnet }}/{{ s.netmask_bits }},{{ range_parts[0] }},{{ range_parts[1] }} +{% endfor %} +{% endif %} {% endif %} From 71b899fed803754579e78a16e0fa75706171ec6d Mon Sep 17 00:00:00 2001 From: pullan1 Date: Mon, 8 Jun 2026 16:00:13 +0530 Subject: [PATCH 43/78] localrepo checkmarx fixes (#4703) Signed-off-by: pullan1 --- .../module_utils/local_repo/download_image.py | 12 ++++-- .../local_repo/process_parallel.py | 43 +++++++++++-------- common/library/modules/parallel_tasks.py | 4 +- .../tasks/check_rhel_subscription.yml | 11 +++++ 4 files changed, 45 insertions(+), 25 deletions(-) diff --git a/common/library/module_utils/local_repo/download_image.py b/common/library/module_utils/local_repo/download_image.py index 98a1cb5b66..793999772a 100644 --- a/common/library/module_utils/local_repo/download_image.py +++ b/common/library/module_utils/local_repo/download_image.py @@ -19,6 +19,7 @@ import json from multiprocessing import Lock from jinja2 import Template +from ansible.module_utils.local_repo.process_parallel import docker_password_cipher from ansible.module_utils.local_repo.standard_logger import setup_standard_logger from ansible.module_utils.local_repo.parse_and_download import execute_command,write_status_to_file from ansible.module_utils.local_repo.user_image_utility import handle_user_image_registry @@ -40,7 +41,7 @@ file_lock = Lock() def create_container_remote_with_auth(remote_name, remote_url, package, policy_type, - tag, logger, docker_username, docker_password): + tag, logger, docker_username, docker_secret_token): """ Create a container remote with authentication. @@ -61,6 +62,9 @@ def create_container_remote_with_auth(remote_name, remote_url, package, policy_t bool: True if the container remote was created or updated successfully, False otherwise. """ try: + docker_password = docker_password_cipher.decrypt( + docker_secret_token.encode("utf-8") + ).decode("utf-8") remote_exists = execute_command(pulp_container_commands["show_container_remote"] % remote_name, logger) if not remote_exists: tags_json = json.dumps([tag]) # --> '["1.25.2-alpine"]' @@ -263,7 +267,7 @@ def get_repo_url_and_content(package): # raise ValueError(f"Unsupported package prefix for package: {package}") def process_image(package, status_file_path, version_variables, - user_registries,docker_username, docker_password, logger): + user_registries,docker_username, docker_secret_token, logger): """ Process an image. Args: @@ -331,10 +335,10 @@ def process_image(package, status_file_path, version_variables, package_identifier += f":{package['tag']}" with remote_creation_lock: - if package['package'].startswith('docker.io/') and docker_username and docker_password: + if package['package'].startswith('docker.io/') and docker_username and docker_secret_token: result = create_container_remote_with_auth( remote_name, base_url, package_content, policy_type, - tag_val, logger, docker_username, docker_password + tag_val, logger, docker_username, docker_secret_token ) else: result = create_container_remote( diff --git a/common/library/module_utils/local_repo/process_parallel.py b/common/library/module_utils/local_repo/process_parallel.py index 63c8560ba2..b6a9cc68c6 100644 --- a/common/library/module_utils/local_repo/process_parallel.py +++ b/common/library/module_utils/local_repo/process_parallel.py @@ -23,8 +23,9 @@ import traceback import json import yaml -import json import requests +from pathlib import Path +from cryptography.fernet import Fernet from jinja2 import Template from ansible.module_utils.local_repo.common_functions import ( load_yaml_file, @@ -39,6 +40,7 @@ ) # Global lock for logging synchronization log_lock = multiprocessing.Lock() +docker_password_cipher = Fernet(Fernet.generate_key()) def load_docker_credentials(vault_yml_path, vault_password_file): """ @@ -83,18 +85,24 @@ def load_docker_credentials(vault_yml_path, vault_password_file): ) data = yaml.safe_load(result.stdout) else: - with open(vault_yml_path, "r", encoding="utf-8") as fh: - data = yaml.safe_load(fh) + data = yaml.safe_load(Path(vault_yml_path).read_text(encoding="utf-8")) docker_username = data.get("docker_username") - docker_password = data.get("docker_password") + docker_secret_token = None + if data.get("docker_password"): + docker_secret_token = docker_password_cipher.encrypt( + data.get("docker_password").encode("utf-8") + ).decode("utf-8") # If either credential is missing, skip validation - if not docker_username or not docker_password: + if not docker_username or not docker_secret_token: return None, None # Validate credentials using Docker Hub API try: - payload = json.dumps({"username": docker_username, "password": docker_password}) + validation_secret = docker_password_cipher.decrypt( + docker_secret_token.encode("utf-8") + ).decode("utf-8") + payload = json.dumps({"username": docker_username, "password": validation_secret}) response = requests.post( "https://hub.docker.com/v2/users/login/", data=payload, @@ -106,7 +114,7 @@ def load_docker_credentials(vault_yml_path, vault_password_file): ) if response.status_code == 200: - return docker_username, docker_password + return docker_username, docker_secret_token if response.status_code == 429: raise RuntimeError("Docker Hub rate limit exceeded. Please try again later.") @@ -186,7 +194,7 @@ def setup_logger(log_dir,log_file_path): def execute_task(task, determine_function, user_data, version_variables, arc, repo_store_path, csv_file_path,logger, user_registries, - docker_username, docker_password, timeout=None): + docker_username, docker_secret_token, timeout=None): """ Executes a task by determining the appropriate function to call, managing execution time, handling timeouts, and logging the results. @@ -221,7 +229,7 @@ def execute_task(task, determine_function, user_data, version_variables, arc, # Determine the function and its arguments using the provided `determine_function` function, args = determine_function(task, repo_store_path, csv_file_path, user_data, - version_variables, arc, user_registries, docker_username, docker_password) + version_variables, arc, user_registries, docker_username, docker_secret_token) while True: elapsed_time = time.time() - start_time # Calculate elapsed time @@ -276,7 +284,7 @@ def execute_task(task, determine_function, user_data, version_variables, arc, } def worker_process(task, determine_function, user_data, version_variables, arc, repo_store_path, csv_file_path, log_dir, result_queue, user_registries, - docker_username, docker_password, timeout): + omnia_credentials_yaml_path, omnia_credentials_vault_path, timeout): """ Executes a task in a separate worker process, logs the process execution, and puts the result in a result queue. @@ -293,7 +301,8 @@ def worker_process(task, determine_function, user_data, version_variables, arc, result_queue (multiprocessing.Queue): Queue for putting the result of the task execution (used for inter-process communication). docker_username: Docker username provided by the user - docker_password: Docker password for the provided username + omnia_credentials_yaml_path: Path to the Omnia credentials YAML file + omnia_credentials_vault_path: Path to the Omnia credentials vault password file user_registries (str): List of user registries timeout (float): The maximum allowed time for the task execution. Returns: @@ -307,10 +316,12 @@ def worker_process(task, determine_function, user_data, version_variables, arc, # Log the start of the worker process execution with log_lock: logger.info(f"Worker process {os.getpid()} started execution.") + docker_username, docker_secret_token = load_docker_credentials(omnia_credentials_yaml_path, + omnia_credentials_vault_path) # Execute the task by calling the `execute_task` function and passing necessary arguments result = execute_task(task, determine_function, user_data, version_variables, arc, repo_store_path, csv_file_path, logger, user_registries, - docker_username, docker_password, timeout) + docker_username, docker_secret_token, timeout) result["logname"] = f"package_status_{os.getpid()}.log" # Put the result of the task execution into the result_queue for further processing result_queue.put(result) @@ -389,12 +400,6 @@ def execute_parallel( # registry["username"] = creds.get("username") # registry["password"] = creds.get("password") - - try: - docker_username, docker_password = load_docker_credentials(omnia_credentials_yaml_path, - omnia_credentials_vault_path) - except RuntimeError as e: - raise # Create a pool of worker processes to handle the tasks with multiprocessing.Pool(processes=nthreads) as pool: task_results = [] # List to hold references to the async results of the tasks @@ -406,7 +411,7 @@ def execute_parallel( task['package'] = package_name task_results.append(pool.apply_async(worker_process, (task, determine_function, user_data, version_variables, arc, repo_store_path, csv_file_path, log_dir, result_queue, - user_registries,docker_username, docker_password, timeout))) + user_registries, omnia_credentials_yaml_path, omnia_credentials_vault_path, timeout))) pool.close() # Close the pool to new tasks once all have been submitted start_time = time.time() # Start time for overall task execution diff --git a/common/library/modules/parallel_tasks.py b/common/library/modules/parallel_tasks.py index 48b5d6b5dc..e696256bed 100644 --- a/common/library/modules/parallel_tasks.py +++ b/common/library/modules/parallel_tasks.py @@ -131,7 +131,7 @@ def update_status_csv(csv_dir, software, overall_status,slogger): def determine_function( task, repo_store_path, csv_file_path, user_data, version_variables, arc, - user_registries, docker_username, docker_password + user_registries, docker_username, docker_secret_token ): """ Determines the appropriate function and its arguments to process a given task. @@ -211,7 +211,7 @@ def determine_function( if task_type == "image": return process_image, [ task, status_file, version_variables, user_registries, - docker_username, docker_password + docker_username, docker_secret_token ] if task_type == "rpm_file": return process_rpm_file, [ diff --git a/input_validation/roles/validate_subscription/tasks/check_rhel_subscription.yml b/input_validation/roles/validate_subscription/tasks/check_rhel_subscription.yml index 31007eb059..09ad929f51 100644 --- a/input_validation/roles/validate_subscription/tasks/check_rhel_subscription.yml +++ b/input_validation/roles/validate_subscription/tasks/check_rhel_subscription.yml @@ -75,6 +75,17 @@ state: directory mode: "{{ hostvars['localhost']['dir_permissions_755'] }}" + - name: Remove stale files from rhel_repo_cert_dir before fresh copy + ansible.builtin.file: + path: "{{ rhel_repo_cert_dir }}" + state: absent + + - name: Recreate rhel_repo_cert_dir after cleanup + ansible.builtin.file: + path: "{{ rhel_repo_cert_dir }}" + state: directory + mode: "{{ hostvars['localhost']['dir_permissions_755'] }}" + - name: Find entitlement certs on oim ansible.builtin.shell: | set -o pipefail From c24c64efa0b63ee7e0fab1cbdb06f48c8f0f71f3 Mon Sep 17 00:00:00 2001 From: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> Date: Mon, 8 Jun 2026 16:52:18 +0530 Subject: [PATCH 44/78] Update etcd-disk-setup.sh.j2 Signed-off-by: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> --- .../templates/k8s-scripts/etcd-disk-setup.sh.j2 | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/provision/roles/configure_ochami/templates/k8s-scripts/etcd-disk-setup.sh.j2 b/provision/roles/configure_ochami/templates/k8s-scripts/etcd-disk-setup.sh.j2 index 4bfef08b50..7f708ff62f 100644 --- a/provision/roles/configure_ochami/templates/k8s-scripts/etcd-disk-setup.sh.j2 +++ b/provision/roles/configure_ochami/templates/k8s-scripts/etcd-disk-setup.sh.j2 @@ -40,6 +40,13 @@ dump_fstab_entry() { echo "" echo "$(ts) [INFO] ===== START etcd-disk-setup =====" +echo "Loading Drivers" +modprobe nvme +modprobe ahci +modprobe sd_mod +modprobe megaraid_sas +echo "Drivers Loaded" +sleep 20 # ── Detect root disk ───────────────────────────────────────────────────────── From 72109176ec437a6bc74953c69626e5e843a698c0 Mon Sep 17 00:00:00 2001 From: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> Date: Mon, 8 Jun 2026 16:52:46 +0530 Subject: [PATCH 45/78] Update etcd-fstab-update.sh.j2 Signed-off-by: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> --- .../templates/k8s-scripts/etcd-fstab-update.sh.j2 | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/provision/roles/configure_ochami/templates/k8s-scripts/etcd-fstab-update.sh.j2 b/provision/roles/configure_ochami/templates/k8s-scripts/etcd-fstab-update.sh.j2 index 2bcf47673b..46c7d29e8e 100644 --- a/provision/roles/configure_ochami/templates/k8s-scripts/etcd-fstab-update.sh.j2 +++ b/provision/roles/configure_ochami/templates/k8s-scripts/etcd-fstab-update.sh.j2 @@ -39,6 +39,13 @@ dump_fstab_entry() { echo "" echo "$(ts) [INFO] ===== START diskless-update-etcd-mount-fstab =====" echo "$(ts) [INFO] NO wipe | NO format | detect → mount → fstab" +echo "Loading Drivers" +modprobe nvme +modprobe ahci +modprobe sd_mod +modprobe megaraid_sas +echo "Drivers Loaded" +sleep 20 # ── Detect root disk ───────────────────────────────────────────────────────── echo "$(ts) [STEP] Detecting root disk" From 690ee6bb472a7ae94155641697003d06a5906a48 Mon Sep 17 00:00:00 2001 From: mithileshreddy04 Date: Mon, 8 Jun 2026 17:07:27 +0530 Subject: [PATCH 46/78] Backup permission issue fix and recheck for rollback openchami Signed-off-by: mithileshreddy04 --- omnia.sh | 21 ++- .../roles/rollback_openchami/tasks/main.yml | 7 + .../tasks/normalize_permissions.yml | 139 ++++++++++++++++++ .../roles/rollback_openchami/vars/main.yml | 30 ++++ .../tasks/backup_openchami.yml | 12 ++ 5 files changed, 203 insertions(+), 6 deletions(-) create mode 100644 rollback/roles/rollback_openchami/tasks/normalize_permissions.yml diff --git a/omnia.sh b/omnia.sh index 0b04bd94c5..783d3766e9 100755 --- a/omnia.sh +++ b/omnia.sh @@ -1858,6 +1858,14 @@ backup_openchami_data() { echo "[INFO] [ORCHESTRATOR] Quadlet .network files backed up" fi + # Normalize permissions on the openchami backup tree so rollback can + # read it even under NFS root_squash. cp -a preserves the source + # permissions which may be restrictive (container-created files). + podman exec -u root omnia_core bash -c " + find '${backup_base%/}/openchami' -type d -exec chmod 0755 {} + 2>/dev/null || true + find '${backup_base%/}/openchami' -type f -exec chmod 0644 {} + 2>/dev/null || true + " 2>/dev/null || true + echo "[INFO] [ORCHESTRATOR] OpenCHAMI data backup completed: ${backup_base}/openchami/" return 0 } @@ -1881,17 +1889,18 @@ phase3_backup_creation() { set -e rm -rf '${backup_base%/}/input' '${backup_base%/}/metadata' '${backup_base%/}/configs' mkdir -p '${backup_base%/}/input' '${backup_base%/}/metadata' '${backup_base%/}/configs' - chmod 0700 '${backup_base%/}' '${backup_base%/}/input' '${backup_base%/}/metadata' '${backup_base%/}/configs' + chmod 0755 '${backup_base%/}' + chmod 0755 '${backup_base%/}/input' '${backup_base%/}/metadata' '${backup_base%/}/configs' if [ -f '$CONTAINER_INPUT_DIR/default.yml' ]; then cp -a '$CONTAINER_INPUT_DIR/default.yml' '${backup_base%/}/input/' - chmod 0600 '${backup_base%/}/input/default.yml' + chmod 0644 '${backup_base%/}/input/default.yml' fi if [ -d '$CONTAINER_INPUT_DIR/project_default' ]; then cp -a '$CONTAINER_INPUT_DIR/project_default' '${backup_base%/}/input/' - chmod -R 0600 '${backup_base%/}/input/project_default'/* - find '${backup_base%/}/input/project_default' -type d -exec chmod 0700 {} \; + chmod -R 0644 '${backup_base%/}/input/project_default'/* + find '${backup_base%/}/input/project_default' -type d -exec chmod 0755 {} \; fi if [ ! -f '$CONTAINER_METADATA_FILE' ]; then @@ -1899,7 +1908,7 @@ phase3_backup_creation() { exit 1 fi cp -a '$CONTAINER_METADATA_FILE' '${backup_base%/}/metadata/oim_metadata.yml' - chmod 0600 '${backup_base%/}/metadata/oim_metadata.yml' + chmod 0644 '${backup_base%/}/metadata/oim_metadata.yml' "; then echo "[ERROR] [ORCHESTRATOR] Backup failed; cleaning up partial backup" podman exec -u root omnia_core bash -c "rm -rf '${backup_base%/}/input' '${backup_base%/}/metadata' '${backup_base%/}/configs'" >/dev/null 2>&1 || true @@ -1912,7 +1921,7 @@ phase3_backup_creation() { podman exec -u root omnia_core bash -c "rm -rf '${backup_base%/}/input' '${backup_base%/}/metadata' '${backup_base%/}/configs'" >/dev/null 2>&1 || true return 1 fi - podman exec -u root omnia_core chmod 0600 "${backup_base%/}/configs/omnia_core.container" 2>/dev/null || true + podman exec -u root omnia_core chmod 0644 "${backup_base%/}/configs/omnia_core.container" 2>/dev/null || true fi echo "[INFO] [ORCHESTRATOR] Backup created at: $backup_base" diff --git a/rollback/roles/rollback_openchami/tasks/main.yml b/rollback/roles/rollback_openchami/tasks/main.yml index 74d54ed148..244dc7d930 100644 --- a/rollback/roles/rollback_openchami/tasks/main.yml +++ b/rollback/roles/rollback_openchami/tasks/main.yml @@ -40,6 +40,13 @@ - name: OpenCHAMI rollback workflow block: + # Normalize backup + /etc/openchami permissions BEFORE any checks or + # restores. The backup is created with `cp -a` which preserves restrictive + # source modes; on the shared NFS path those bits cause permission-denied + # when the rollback reads/restores configs, pg-init, and the pg_dump. + - name: Normalize backup and config permissions + ansible.builtin.include_tasks: normalize_permissions.yml + - name: Pre-rollback validation checks ansible.builtin.include_tasks: pre_rollback_checks.yml diff --git a/rollback/roles/rollback_openchami/tasks/normalize_permissions.yml b/rollback/roles/rollback_openchami/tasks/normalize_permissions.yml new file mode 100644 index 0000000000..937601d340 --- /dev/null +++ b/rollback/roles/rollback_openchami/tasks/normalize_permissions.yml @@ -0,0 +1,139 @@ +# Copyright 2026 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +# ============================================================================ +# normalize_permissions.yml — Normalize Backup & Config Permissions +# ============================================================================ +# The pre-upgrade backup is created with `cp -a`, which PRESERVES the source +# permissions of /etc/openchami and its contents. Several of those files are +# created by containers running as non-root UIDs (e.g. configs/, pg-init/, +# postgresql_backup/openchami.sql) and may carry restrictive modes. On the +# shared NFS backup path (root_squash), those restrictive bits cause +# "permission denied" when the rollback later reads the backup or restores +# /etc/openchami — which makes the final OpenCHAMI rollback step fail. +# +# This task normalizes permissions BEFORE the restore runs: +# - Directories -> 0755 +# - Files -> 0644 +# for two roots: +# 1. Backup root on the core container : {{ rollback_backup_dir }}/openchami +# 2. /etc/openchami on the OIM host : {{ openchami_etc_dir }} +# +# Behaviour: +# - If permissions are simply wrong, they are corrected automatically and +# the rollback proceeds with no user action. +# - If any directory/file CANNOT be made readable (chmod denied / immutable +# / read-only mount), a clear error is shown asking the operator to fix +# the permissions manually and re-run the OpenCHAMI/OIM rollback with +# `--tags oim`. +# ============================================================================ + +- name: Normalize backup and /etc/openchami permissions + block: + # ── Resolve the two roots to normalize ────────────────────────────── + - name: Set permission normalization targets + ansible.builtin.set_fact: + normalize_backup_root: "{{ rollback_backup_dir }}/openchami" + normalize_etc_root: "{{ openchami_etc_dir }}" + + # ── 1. Backup root (core container — local, on shared NFS path) ────── + - name: Check backup root exists (core container) + ansible.builtin.stat: + path: "{{ normalize_backup_root }}" + register: normalize_backup_root_stat + + - name: Normalize permissions on backup root (core container) + ansible.builtin.shell: | + set -o pipefail + find "{{ normalize_backup_root }}" -type d -exec chmod {{ dir_permissions_755 }} {} + 2>/dev/null || true + find "{{ normalize_backup_root }}" -type f -exec chmod {{ file_permissions_644 }} {} + 2>/dev/null || true + register: normalize_backup_chmod + changed_when: true + failed_when: false + when: normalize_backup_root_stat.stat.exists | default(false) + + - name: Detect unreadable items under backup root (core container) + ansible.builtin.shell: | + set -o pipefail + { + find "{{ normalize_backup_root }}" -type d \( ! -readable -o ! -executable \) -print + find "{{ normalize_backup_root }}" -type f ! -readable -print + } 2>/dev/null + register: normalize_backup_bad + changed_when: false + failed_when: false + when: normalize_backup_root_stat.stat.exists | default(false) + + # ── 2. /etc/openchami (OIM host — delegated) ──────────────────────── + - name: Check /etc/openchami exists (OIM host) + ansible.builtin.stat: + path: "{{ normalize_etc_root }}" + register: normalize_etc_root_stat + delegate_to: oim + delegate_facts: true + connection: ssh + + - name: Normalize permissions on /etc/openchami (OIM host) + ansible.builtin.shell: | + set -o pipefail + find "{{ normalize_etc_root }}" -type d -exec chmod {{ dir_permissions_755 }} {} + 2>/dev/null || true + find "{{ normalize_etc_root }}" -type f -exec chmod {{ file_permissions_644 }} {} + 2>/dev/null || true + register: normalize_etc_chmod + changed_when: true + failed_when: false + when: normalize_etc_root_stat.stat.exists | default(false) + delegate_to: oim + delegate_facts: true + connection: ssh + + - name: Detect unreadable items under /etc/openchami (OIM host) + ansible.builtin.shell: | + set -o pipefail + { + find "{{ normalize_etc_root }}" -type d \( ! -readable -o ! -executable \) -print + find "{{ normalize_etc_root }}" -type f ! -readable -print + } 2>/dev/null + register: normalize_etc_bad + changed_when: false + failed_when: false + when: normalize_etc_root_stat.stat.exists | default(false) + delegate_to: oim + delegate_facts: true + connection: ssh + + # ── 3. Aggregate results and fail with guidance if unfixable ──────── + - name: Build list of items that could not be normalized + ansible.builtin.set_fact: + normalize_unfixable_backup: "{{ (normalize_backup_bad.stdout_lines | default([])) | select('string') | reject('equalto', '') | list }}" + normalize_unfixable_etc: "{{ (normalize_etc_bad.stdout_lines | default([])) | select('string') | reject('equalto', '') | list }}" + + - name: Display permission normalization summary + ansible.builtin.debug: + verbosity: 1 + msg: + - "Backup root: {{ normalize_backup_root }} ({{ 'present' if normalize_backup_root_stat.stat.exists | default(false) else 'absent' }})" + - "/etc/openchami: {{ normalize_etc_root }} ({{ 'present' if normalize_etc_root_stat.stat.exists | default(false) else 'absent' }})" + - "Directories normalized to {{ dir_permissions_755 }}, files normalized to {{ file_permissions_644 }}." + - "Unreadable items remaining (backup): {{ normalize_unfixable_backup | length }}" + - "Unreadable items remaining (/etc/openchami): {{ normalize_unfixable_etc | length }}" + + - name: Fail if permissions could not be corrected + ansible.builtin.fail: + msg: "{{ rollback_messages.permissions.unfixable }}" + when: (normalize_unfixable_backup | length > 0) or (normalize_unfixable_etc | length > 0) + + - name: Display permission normalization success + ansible.builtin.debug: + msg: "{{ rollback_messages.permissions.normalized }}" diff --git a/rollback/roles/rollback_openchami/vars/main.yml b/rollback/roles/rollback_openchami/vars/main.yml index b6ead1de08..c6672f1d35 100644 --- a/rollback/roles/rollback_openchami/vars/main.yml +++ b/rollback/roles/rollback_openchami/vars/main.yml @@ -32,6 +32,7 @@ rollback_backup_dir_default: "/opt/omnia/backups/upgrade/version_2.1.0.0" # OpenCHAMI configuration paths openchami_config_dir: "/etc/openchami/configs" +openchami_etc_dir: "/etc/openchami" openchami_config_vars_path: "/opt/omnia/openchami/configs_vars.yaml" openchami_work_dir: "/opt/omnia/openchami/workdir" ochami_dir: "/etc/ochami" @@ -127,6 +128,35 @@ rollback_messages: OpenCHAMI containers are already running v2.1 images. Rollback is not needed. Skipping. backup_found: "Backup directory found. Proceeding with rollback." + permissions: + normalized: "Backup and /etc/openchami permissions verified (directories 0755, files 0644)." + unfixable: | + ════════════════════════════════════════════ + OPENCHAMI ROLLBACK BLOCKED — PERMISSION ISSUE + ════════════════════════════════════════════ + One or more files/directories in the backup or /etc/openchami could not + be made readable. The rollback cannot read these to restore them. + + This usually happens on the shared NFS backup path (root_squash) when + container-created files carry restrictive ownership/permissions. + + Fix the permissions MANUALLY, then re-run ONLY the OpenCHAMI/OIM rollback: + + On the OIM host (as a user that owns the files, e.g. the NFS owner): + chmod -R u+rwX,go+rX /etc/openchami + chmod -R u+rwX,go+rX {{ rollback_backup_dir | default(rollback_backup_dir_default) }}/openchami + + Ensure all directories are 0755 and all files are 0644, and that none + are in a permission-denied / immutable state. Then retry: + + cd /opt/omnia/... && ansible-playbook rollback/rollback.yml --tags oim + + Relevant directories that must be readable: + /etc/openchami, /etc/openchami/configs, /etc/openchami/pg-init + {{ rollback_backup_dir | default(rollback_backup_dir_default) }}/openchami/etc_openchami + {{ rollback_backup_dir | default(rollback_backup_dir_default) }}/openchami/etc_openchami/configs + {{ rollback_backup_dir | default(rollback_backup_dir_default) }}/openchami/etc_openchami/pg-init + {{ rollback_backup_dir | default(rollback_backup_dir_default) }}/openchami/postgresql_backup restore: quadlets_success: "Restored v2.1 quadlet files from backup." quadlets_failure: "Failed to restore v2.1 quadlet files." diff --git a/upgrade/roles/upgrade_openchami/tasks/backup_openchami.yml b/upgrade/roles/upgrade_openchami/tasks/backup_openchami.yml index e3b33530c4..72742b968a 100644 --- a/upgrade/roles/upgrade_openchami/tasks/backup_openchami.yml +++ b/upgrade/roles/upgrade_openchami/tasks/backup_openchami.yml @@ -256,6 +256,18 @@ delegate_facts: true connection: ssh + - name: Normalize permissions on /etc/openchami backup (dirs 0755, files 0644) + ansible.builtin.shell: | + set -o pipefail + find "{{ oim_host_backup_dir }}/{{ backup_etc_openchami_subpath }}" -type d -exec chmod {{ dir_permissions_755 }} {} + 2>/dev/null || true + find "{{ oim_host_backup_dir }}/{{ backup_etc_openchami_subpath }}" -type f -exec chmod {{ file_permissions_644 }} {} + 2>/dev/null || true + changed_when: true + failed_when: false + when: etc_openchami_backup_result is defined and etc_openchami_backup_result.rc | default(1) == 0 + delegate_to: oim + delegate_facts: true + connection: ssh + - name: Verify /etc/openchami backup was created ansible.builtin.stat: path: "{{ oim_host_backup_dir }}/{{ backup_etc_openchami_subpath }}" From 88deba9997e26434d0a247146be07c65edbc0ca8 Mon Sep 17 00:00:00 2001 From: Kratika Patidar Date: Mon, 8 Jun 2026 17:08:18 +0530 Subject: [PATCH 47/78] input validation for ldms and vector (#4704) * telemetry rollback and upgrade changes Signed-off-by: Kratika.Patidar * rollback and upgrade comments fixes Signed-off-by: Kratika.Patidar * changing component and tag name from k8s -> k8s-telemetry Signed-off-by: Kratika.Patidar * lint Signed-off-by: Kratika.Patidar * upgrade telemetry refinement changes Signed-off-by: Kratika.Patidar * skip and end play tsks updated Signed-off-by: Kratika.Patidar * Update process_rpm_config.py Signed-off-by: Kratika Patidar * ldms metrics input validation Signed-off-by: Kratika_Patidar --------- Signed-off-by: Kratika.Patidar Signed-off-by: Kratika Patidar Signed-off-by: Kratika_Patidar --- .../validation_flows/telemetry_validation.py | 20 ++++++------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/common/library/module_utils/input_validation/validation_flows/telemetry_validation.py b/common/library/module_utils/input_validation/validation_flows/telemetry_validation.py index 97d988132c..f0bb56533a 100644 --- a/common/library/module_utils/input_validation/validation_flows/telemetry_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/telemetry_validation.py @@ -620,21 +620,13 @@ def validate_telemetry_config( f"ldms_source.metrics_enabled={ldms_source_enabled}" ) - # Validation 2: If LDMS source is enabled, Vector-LDMS bridge must also be enabled - # (LDMS only supports Kafka collection, requires Vector bridge to reach VictoriaMetrics) - if ldms_source_enabled and not vector_ldms_enabled: - errors.append(create_error_msg( - "telemetry_sources.ldms.metrics_enabled", - "true", - "LDMS source is enabled but Vector-LDMS bridge is disabled. " - "LDMS metrics can only reach VictoriaMetrics via the Vector-LDMS bridge. " - "If you want to check LDMS Metrics on VicotriaMetircs then:" - "Set telemetry_bridges.vector_ldms.metrics_enabled to true in telemetry_config.yml" - )) - logger.error( - "LDMS source enabled without Vector-LDMS bridge: " + elif ldms_source_enabled and not vector_ldms_enabled: + logger.info( + "LDMS source is enabled without Vector-LDMS bridge: " f"ldms_source.metrics_enabled={ldms_source_enabled}, " - f"vector_ldms.metrics_enabled={vector_ldms_enabled}" + f"vector_ldms.metrics_enabled={vector_ldms_enabled}. " + "LDMS metrics will flow to Kafka only. " + "To also route metrics to VictoriaMetrics, set telemetry_bridges.vector_ldms.metrics_enabled to true." ) # # Validation 3: Verify Kafka collection target for LDMS From 10a9b96d3810be5ff4f26478d8bf2b9cd27f9505 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Mon, 8 Jun 2026 17:48:44 +0530 Subject: [PATCH 48/78] kernel 10.2 fix Signed-off-by: Abhishek S A --- .../tasks/configure_bss_group.yml | 58 +++++++++++++++++++ .../tasks/validate_image.yml | 26 +++++++++ 2 files changed, 84 insertions(+) diff --git a/provision/roles/configure_ochami/tasks/configure_bss_group.yml b/provision/roles/configure_ochami/tasks/configure_bss_group.yml index d32776bdf7..3cc37718d7 100644 --- a/provision/roles/configure_ochami/tasks/configure_bss_group.yml +++ b/provision/roles/configure_ochami/tasks/configure_bss_group.yml @@ -100,6 +100,64 @@ - _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | length > 0 - _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | length > 0 +- name: Retry S3 search without OS version filter (build stream - kernel override cross-version) + ansible.builtin.shell: | + set -o pipefail && \ + s3cmd ls -Hr s3://boot-images | \ + grep "{{ image_search_pattern }}{{ compute_image_suffix }}" | sort -k1,2r | awk '{print $4}' | sed 's|s3://||' + changed_when: false + failed_when: false + register: verify_s3_image_bs_retry + when: + - enable_build_stream | default(false) + - (compute_image_suffix | default('')) != '' + - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 + - kernel | default('') | length < 1 or initrd | default('') | length < 1 + +- name: Set kernel and initrd from retry results (build stream - kernel override cross-version) + vars: + _kvo: "{{ hostvars['localhost']['kernel_version_override'] }}" + _bs_lines: "{{ verify_s3_image_bs_retry.stdout_lines }}" + ansible.builtin.set_fact: + kernel: "{{ _bs_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | first }}" + initrd: "{{ _bs_lines | select('search', 'initramfs') | select('search', _kvo) | list | first }}" + when: + - enable_build_stream | default(false) + - (compute_image_suffix | default('')) != '' + - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 + - kernel | default('') | length < 1 or initrd | default('') | length < 1 + - verify_s3_image_bs_retry.stdout_lines | default([]) | length > 1 + - _bs_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | length > 0 + - _bs_lines | select('search', 'initramfs') | select('search', _kvo) | list | length > 0 + +- name: Retry S3 search without OS version filter (default - kernel override cross-version) + ansible.builtin.shell: | + set -o pipefail && \ + s3cmd ls -Hr s3://boot-images | \ + grep "{{ image_search_pattern }}" | sort -k1,2r | awk '{print $4}' | sed 's|s3://||' + changed_when: false + failed_when: false + register: verify_s3_image_retry + when: + - not enable_build_stream + - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 + - kernel | default('') | length < 1 or initrd | default('') | length < 1 + +- name: Set kernel and initrd from retry results (default - kernel override cross-version) + vars: + _kvo: "{{ hostvars['localhost']['kernel_version_override'] }}" + _s3_lines: "{{ verify_s3_image_retry.stdout_lines }}" + ansible.builtin.set_fact: + kernel: "{{ _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | first }}" + initrd: "{{ _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | first }}" + when: + - not enable_build_stream + - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 + - kernel | default('') | length < 1 or initrd | default('') | length < 1 + - verify_s3_image_retry.stdout_lines | default([]) | length > 1 + - _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | length > 0 + - _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | length > 0 + - name: Load bss template - {{ functional_group_name }} ansible.builtin.template: src: "{{ bss_template }}" diff --git a/provision/roles/provision_validations/tasks/validate_image.yml b/provision/roles/provision_validations/tasks/validate_image.yml index e00d49826a..3ca394f862 100644 --- a/provision/roles/provision_validations/tasks/validate_image.yml +++ b/provision/roles/provision_validations/tasks/validate_image.yml @@ -89,6 +89,32 @@ - _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | length > 0 - _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | length > 0 +- name: Retry S3 search without OS version filter (kernel override cross-version) + ansible.builtin.shell: | + set -o pipefail && \ + s3cmd ls -Hr s3://boot-images | \ + grep "{{ image_search_pattern }}" | awk '{print $4}' | sed 's|s3://||' + changed_when: false + failed_when: false + register: verify_s3_image_retry + when: + - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 + - kernel | length < 1 or initrd | length < 1 + +- name: Set kernel and initrd from retry results (kernel override cross-version) + vars: + _kvo: "{{ hostvars['localhost']['kernel_version_override'] }}" + _s3_lines: "{{ verify_s3_image_retry.stdout_lines }}" + ansible.builtin.set_fact: + kernel: "{{ _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | first }}" + initrd: "{{ _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | first }}" + when: + - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 + - kernel | length < 1 or initrd | length < 1 + - verify_s3_image_retry.stdout_lines | default([]) | length > 1 + - _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | length > 0 + - _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | length > 0 + - name: Fail if kernel override did not match any S3 image ansible.builtin.fail: msg: >- From 5e7348f4795fe76018545212cd95caa258ba4693 Mon Sep 17 00:00:00 2001 From: pullan1 Date: Mon, 8 Jun 2026 17:57:40 +0530 Subject: [PATCH 49/78] checkmarx fixes (#4706) * localrepo checkmarx fixes Signed-off-by: pullan1 * Checkmarx fixes Signed-off-by: pullan1 --------- Signed-off-by: pullan1 --- .../library/module_utils/local_repo/software_utils.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/common/library/module_utils/local_repo/software_utils.py b/common/library/module_utils/local_repo/software_utils.py index 2c8a3b3da0..da20edea12 100644 --- a/common/library/module_utils/local_repo/software_utils.py +++ b/common/library/module_utils/local_repo/software_utils.py @@ -230,14 +230,14 @@ def is_remote_url_reachable(remote_url, timeout=10, verify=ca_cert, timeout=timeout ) - except requests.exceptions.SSLError as ssl_exc: + except requests.exceptions.SSLError: # Python 3.13+ rejects CA certs with non-critical Basic # Constraints (RFC 5280 strict mode). Retry against the # SAME CA with VERIFY_X509_STRICT cleared — still validates # the full chain and hostname, just relaxes the one check. logger.warning( - f"Strict SSL verification failed for {remote_url}: " - f"{ssl_exc}. Retrying with VERIFY_X509_STRICT cleared.") + f"Strict SSL verification failed for {remote_url}. " + "Retrying with VERIFY_X509_STRICT cleared.") session = requests.Session() adapter = _RelaxedCAAdapter( ca_cert, client_cert, client_key) @@ -250,10 +250,9 @@ def is_remote_url_reachable(remote_url, timeout=10, logger.error( f"URL {remote_url} returned HTTP {response.status_code}") return response.status_code == 200 - except Exception as exc: + except Exception: logger.error( - f"URL reachability exception for {remote_url}: " - f"{type(exc).__name__}: {exc}") + f"URL reachability check failed for {remote_url}") return False def transform_package_dict(data, arch_val,logger): From 71e6592bd5a99767b18042586cd46722e0ebf49f Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Mon, 8 Jun 2026 18:43:46 +0530 Subject: [PATCH 50/78] remove duplicate kernel check Signed-off-by: Abhishek S A --- .../tasks/configure_bss_group.yml | 133 +----------------- .../tasks/validate_image.yml | 10 ++ 2 files changed, 13 insertions(+), 130 deletions(-) diff --git a/provision/roles/configure_ochami/tasks/configure_bss_group.yml b/provision/roles/configure_ochami/tasks/configure_bss_group.yml index 3cc37718d7..c011c8b998 100644 --- a/provision/roles/configure_ochami/tasks/configure_bss_group.yml +++ b/provision/roles/configure_ochami/tasks/configure_bss_group.yml @@ -26,137 +26,10 @@ if (item is match('service_kube_')) and (hostvars['localhost']['service_k8s_version'] | default('') | length > 0) else '' }} -- name: Build image search pattern +- name: Set kernel and initrd from validated images ansible.builtin.set_fact: - image_search_pattern: >- - rhel-{{ functional_group_name }}{{ naming_suffix }} - -- name: Verify image, kernel and initramfs in S3 (build stream and image-key) - ansible.builtin.shell: | - set -o pipefail && \ - s3cmd ls -Hr s3://boot-images | \ - grep "{{ image_search_pattern }}{{ compute_image_suffix }}" | \ - grep {{ hostvars['localhost']['cluster_os_version'] }} | sort -k1,2r | awk '{print $4}' | sed 's|s3://||' - changed_when: false - failed_when: false - register: verify_s3_image_build_stream - when: - - enable_build_stream | default(false) - - (compute_image_suffix | default('')) != '' - -- name: Verify image, kernel and initramfs in S3 (default) - ansible.builtin.shell: | - set -o pipefail && \ - s3cmd ls -Hr s3://boot-images | \ - grep "{{ image_search_pattern }}" | \ - grep {{ hostvars['localhost']['cluster_os_version'] }} | sort -k1,2r | awk '{print $4}' | sed 's|s3://||' - changed_when: false - failed_when: false - register: verify_s3_image - when: - - not enable_build_stream - -- name: Set kernel and initrd variables (build stream) - ansible.builtin.set_fact: - kernel: "{{ verify_s3_image_build_stream.stdout_lines | select('search', 'vmlinuz') | list | first }}" - initrd: "{{ verify_s3_image_build_stream.stdout_lines | select('search', 'initramfs') | list | first }}" - when: - - enable_build_stream - - (compute_image_suffix | default('')) != '' - - hostvars['localhost']['kernel_version_override'] | default('') | length == 0 - -- name: Set kernel and initrd variables (build stream - kernel override) - vars: - _kvo: "{{ hostvars['localhost']['kernel_version_override'] }}" - _bs_lines: "{{ verify_s3_image_build_stream.stdout_lines }}" - ansible.builtin.set_fact: - kernel: "{{ _bs_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | first }}" - initrd: "{{ _bs_lines | select('search', 'initramfs') | select('search', _kvo) | list | first }}" - when: - - enable_build_stream - - (compute_image_suffix | default('')) != '' - - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 - - _bs_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | length > 0 - - _bs_lines | select('search', 'initramfs') | select('search', _kvo) | list | length > 0 - -- name: Set kernel and initrd variables (default) - ansible.builtin.set_fact: - kernel: "{{ verify_s3_image.stdout_lines | select('search', 'vmlinuz') | list | first }}" - initrd: "{{ verify_s3_image.stdout_lines | select('search', 'initramfs') | list | first }}" - when: - - not enable_build_stream - - hostvars['localhost']['kernel_version_override'] | default('') | length == 0 - -- name: Set kernel and initrd variables (default - kernel override) - vars: - _kvo: "{{ hostvars['localhost']['kernel_version_override'] }}" - _s3_lines: "{{ verify_s3_image.stdout_lines }}" - ansible.builtin.set_fact: - kernel: "{{ _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | first }}" - initrd: "{{ _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | first }}" - when: - - not enable_build_stream - - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 - - _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | length > 0 - - _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | length > 0 - -- name: Retry S3 search without OS version filter (build stream - kernel override cross-version) - ansible.builtin.shell: | - set -o pipefail && \ - s3cmd ls -Hr s3://boot-images | \ - grep "{{ image_search_pattern }}{{ compute_image_suffix }}" | sort -k1,2r | awk '{print $4}' | sed 's|s3://||' - changed_when: false - failed_when: false - register: verify_s3_image_bs_retry - when: - - enable_build_stream | default(false) - - (compute_image_suffix | default('')) != '' - - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 - - kernel | default('') | length < 1 or initrd | default('') | length < 1 - -- name: Set kernel and initrd from retry results (build stream - kernel override cross-version) - vars: - _kvo: "{{ hostvars['localhost']['kernel_version_override'] }}" - _bs_lines: "{{ verify_s3_image_bs_retry.stdout_lines }}" - ansible.builtin.set_fact: - kernel: "{{ _bs_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | first }}" - initrd: "{{ _bs_lines | select('search', 'initramfs') | select('search', _kvo) | list | first }}" - when: - - enable_build_stream | default(false) - - (compute_image_suffix | default('')) != '' - - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 - - kernel | default('') | length < 1 or initrd | default('') | length < 1 - - verify_s3_image_bs_retry.stdout_lines | default([]) | length > 1 - - _bs_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | length > 0 - - _bs_lines | select('search', 'initramfs') | select('search', _kvo) | list | length > 0 - -- name: Retry S3 search without OS version filter (default - kernel override cross-version) - ansible.builtin.shell: | - set -o pipefail && \ - s3cmd ls -Hr s3://boot-images | \ - grep "{{ image_search_pattern }}" | sort -k1,2r | awk '{print $4}' | sed 's|s3://||' - changed_when: false - failed_when: false - register: verify_s3_image_retry - when: - - not enable_build_stream - - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 - - kernel | default('') | length < 1 or initrd | default('') | length < 1 - -- name: Set kernel and initrd from retry results (default - kernel override cross-version) - vars: - _kvo: "{{ hostvars['localhost']['kernel_version_override'] }}" - _s3_lines: "{{ verify_s3_image_retry.stdout_lines }}" - ansible.builtin.set_fact: - kernel: "{{ _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | first }}" - initrd: "{{ _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | first }}" - when: - - not enable_build_stream - - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 - - kernel | default('') | length < 1 or initrd | default('') | length < 1 - - verify_s3_image_retry.stdout_lines | default([]) | length > 1 - - _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | length > 0 - - _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | length > 0 + kernel: "{{ validated_images[functional_group_name].kernel }}" + initrd: "{{ validated_images[functional_group_name].initrd }}" - name: Load bss template - {{ functional_group_name }} ansible.builtin.template: diff --git a/provision/roles/provision_validations/tasks/validate_image.yml b/provision/roles/provision_validations/tasks/validate_image.yml index 3ca394f862..965e03d724 100644 --- a/provision/roles/provision_validations/tasks/validate_image.yml +++ b/provision/roles/provision_validations/tasks/validate_image.yml @@ -67,6 +67,7 @@ ansible.builtin.set_fact: kernel: "" initrd: "" + validated_images: "{}" - name: Set kernel and initrd variables ansible.builtin.set_fact: @@ -115,6 +116,15 @@ - _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | length > 0 - _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | length > 0 +- name: Store validated kernel and initrd for functional group + ansible.builtin.set_fact: + validated_images: >- + {{ validated_images | default({}) | combine({ + functional_group_name: {'kernel': kernel, 'initrd': initrd} + }) + }} + when: kernel | length > 0 and initrd | length > 0 + - name: Fail if kernel override did not match any S3 image ansible.builtin.fail: msg: >- From 0797f45e382428e84dacac6e4c1e03675ec6e951 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Mon, 8 Jun 2026 18:50:40 +0530 Subject: [PATCH 51/78] Update validate_image.yml Signed-off-by: Abhishek S A --- provision/roles/provision_validations/tasks/validate_image.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/provision/roles/provision_validations/tasks/validate_image.yml b/provision/roles/provision_validations/tasks/validate_image.yml index 965e03d724..a594f319d8 100644 --- a/provision/roles/provision_validations/tasks/validate_image.yml +++ b/provision/roles/provision_validations/tasks/validate_image.yml @@ -67,7 +67,7 @@ ansible.builtin.set_fact: kernel: "" initrd: "" - validated_images: "{}" + validated_images: "{{ validated_images | default({}) }}" - name: Set kernel and initrd variables ansible.builtin.set_fact: From 31bbeadc2c3b5f3f2f5bbb992335876b8114ae8d Mon Sep 17 00:00:00 2001 From: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> Date: Mon, 8 Jun 2026 18:57:38 +0530 Subject: [PATCH 52/78] Update ci-group-service_kube_control_plane_first_x86_64.yaml.j2 Signed-off-by: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> --- .../ci-group-service_kube_control_plane_first_x86_64.yaml.j2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_first_x86_64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_first_x86_64.yaml.j2 index 811c2d4e1f..377fd8778a 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_first_x86_64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_first_x86_64.yaml.j2 @@ -573,7 +573,7 @@ "--node-monitor-grace-period=40s" "--node-eviction-rate=1" "--secondary-node-eviction-rate=1" - "--terminated-pod-gc-threshold=50" + "--terminated-pod-gc-threshold=5" ) echo "Backing up kube-controller-manager manifest..." @@ -784,6 +784,7 @@ PYEOF grep -q "^syncFrequency:" $CONFIG_FILE || echo "syncFrequency: 60s" >> $CONFIG_FILE # Restart kubelet to apply changes + systemctl daemon-reload systemctl restart kubelet echo "Installing plugins" From bcc60ca8b66290a223944baebcd177d04366391c Mon Sep 17 00:00:00 2001 From: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> Date: Mon, 8 Jun 2026 18:58:25 +0530 Subject: [PATCH 53/78] Update ci-group-service_kube_control_plane_x86_64.yaml.j2 Signed-off-by: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> --- .../ci-group-service_kube_control_plane_x86_64.yaml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_x86_64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_x86_64.yaml.j2 index 782c7970b9..d79471ce96 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_x86_64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_x86_64.yaml.j2 @@ -467,7 +467,7 @@ "--node-monitor-grace-period=40s" "--node-eviction-rate=1" "--secondary-node-eviction-rate=1" - "--terminated-pod-gc-threshold=50" + "--terminated-pod-gc-threshold=5" ) echo "Backing up kube-controller-manager manifest..." From 33dc6a8cb6a553bda69b8f8e8c9a26bff1039fff Mon Sep 17 00:00:00 2001 From: priti-parate <140157516+priti-parate@users.noreply.github.com> Date: Mon, 8 Jun 2026 18:59:28 +0530 Subject: [PATCH 54/78] Updating default storage size for idrac telemetry containers (#4702) * upgrade defects fixes and fix for crashloopback on pod restart Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * remove stale services and deployments for victoria Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * revert changes as it si taken care in another Pr Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * revert idrac terminationgraceperiod Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * ansible lint fixes Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * rescue block for upgrade telemetry Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * revert upgrade telemetry Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * default size of idrac telemetry containers Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * add new line Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * input/config/x86_64/rhel/10.0/service_k8s_v1.35.1.json Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * update values in upgrade path Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * updating values in integer instead decimal Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> * revert service k8s json file Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> --------- Signed-off-by: priti-parate <140157516+priti-parate@users.noreply.github.com> --- .../config/x86_64/rhel/10.0/service_k8s_v1.35.1.json | 2 +- input/telemetry_storage_config.yml | 12 ++++++------ provision/roles/telemetry/vars/main.yml | 12 ++++++------ .../templates/telemetry_storage_config.j2 | 12 ++++++------ 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/input/config/x86_64/rhel/10.0/service_k8s_v1.35.1.json b/input/config/x86_64/rhel/10.0/service_k8s_v1.35.1.json index 966a94d7b9..1fc9bd65ef 100644 --- a/input/config/x86_64/rhel/10.0/service_k8s_v1.35.1.json +++ b/input/config/x86_64/rhel/10.0/service_k8s_v1.35.1.json @@ -118,4 +118,4 @@ { "package": "quay.io/metallb/controller", "tag": "v0.15.3", "type": "image" } ] } -} +} \ No newline at end of file diff --git a/input/telemetry_storage_config.yml b/input/telemetry_storage_config.yml index d44a7ec68f..c80dbdde65 100644 --- a/input/telemetry_storage_config.yml +++ b/input/telemetry_storage_config.yml @@ -168,10 +168,10 @@ idrac_telemetry_storage: resources: requests: cpu: "100m" - memory: "256Mi" + memory: "512Mi" limits: cpu: "500m" - memory: "512Mi" + memory: "1536Mi" receiver: resources: requests: @@ -184,18 +184,18 @@ idrac_telemetry_storage: resources: requests: cpu: "50m" - memory: "64Mi" + memory: "128Mi" limits: cpu: "200m" - memory: "256Mi" + memory: "512Mi" victoria_pump: resources: requests: cpu: "50m" - memory: "64Mi" + memory: "128Mi" limits: cpu: "200m" - memory: "256Mi" + memory: "512Mi" # Kafka Storage resources kafka_storage: diff --git a/provision/roles/telemetry/vars/main.yml b/provision/roles/telemetry/vars/main.yml index 55f0ac6534..4f1f29f9fa 100644 --- a/provision/roles/telemetry/vars/main.yml +++ b/provision/roles/telemetry/vars/main.yml @@ -82,10 +82,10 @@ idrac_telemetry_resources: activemq: requests: cpu: "{{ telemetry_storage_config.idrac_telemetry_storage.activemq.resources.requests.cpu | default('100m') }}" - memory: "{{ telemetry_storage_config.idrac_telemetry_storage.activemq.resources.requests.memory | default('256Mi') }}" + memory: "{{ telemetry_storage_config.idrac_telemetry_storage.activemq.resources.requests.memory | default('512Mi') }}" limits: cpu: "{{ telemetry_storage_config.idrac_telemetry_storage.activemq.resources.limits.cpu | default('500m') }}" - memory: "{{ telemetry_storage_config.idrac_telemetry_storage.activemq.resources.limits.memory | default('512Mi') }}" + memory: "{{ telemetry_storage_config.idrac_telemetry_storage.activemq.resources.limits.memory | default('1.5Gi') }}" receiver: requests: cpu: "{{ telemetry_storage_config.idrac_telemetry_storage.receiver.resources.requests.cpu | default('100m') }}" @@ -96,17 +96,17 @@ idrac_telemetry_resources: kafka_pump: requests: cpu: "{{ telemetry_storage_config.idrac_telemetry_storage.kafka_pump.resources.requests.cpu | default('50m') }}" - memory: "{{ telemetry_storage_config.idrac_telemetry_storage.kafka_pump.resources.requests.memory | default('64Mi') }}" + memory: "{{ telemetry_storage_config.idrac_telemetry_storage.kafka_pump.resources.requests.memory | default('128Mi') }}" limits: cpu: "{{ telemetry_storage_config.idrac_telemetry_storage.kafka_pump.resources.limits.cpu | default('200m') }}" - memory: "{{ telemetry_storage_config.idrac_telemetry_storage.kafka_pump.resources.limits.memory | default('256Mi') }}" + memory: "{{ telemetry_storage_config.idrac_telemetry_storage.kafka_pump.resources.limits.memory | default('512Mi') }}" victoria_pump: requests: cpu: "{{ telemetry_storage_config.idrac_telemetry_storage.victoria_pump.resources.requests.cpu | default('50m') }}" - memory: "{{ telemetry_storage_config.idrac_telemetry_storage.victoria_pump.resources.requests.memory | default('64Mi') }}" + memory: "{{ telemetry_storage_config.idrac_telemetry_storage.victoria_pump.resources.requests.memory | default('128Mi') }}" limits: cpu: "{{ telemetry_storage_config.idrac_telemetry_storage.victoria_pump.resources.limits.cpu | default('200m') }}" - memory: "{{ telemetry_storage_config.idrac_telemetry_storage.victoria_pump.resources.limits.memory | default('256Mi') }}" + memory: "{{ telemetry_storage_config.idrac_telemetry_storage.victoria_pump.resources.limits.memory | default('512Mi') }}" # Usage: kafka_deployment.yml kafka: diff --git a/upgrade/roles/import_input_parameters/templates/telemetry_storage_config.j2 b/upgrade/roles/import_input_parameters/templates/telemetry_storage_config.j2 index d44a7ec68f..c80dbdde65 100644 --- a/upgrade/roles/import_input_parameters/templates/telemetry_storage_config.j2 +++ b/upgrade/roles/import_input_parameters/templates/telemetry_storage_config.j2 @@ -168,10 +168,10 @@ idrac_telemetry_storage: resources: requests: cpu: "100m" - memory: "256Mi" + memory: "512Mi" limits: cpu: "500m" - memory: "512Mi" + memory: "1536Mi" receiver: resources: requests: @@ -184,18 +184,18 @@ idrac_telemetry_storage: resources: requests: cpu: "50m" - memory: "64Mi" + memory: "128Mi" limits: cpu: "200m" - memory: "256Mi" + memory: "512Mi" victoria_pump: resources: requests: cpu: "50m" - memory: "64Mi" + memory: "128Mi" limits: cpu: "200m" - memory: "256Mi" + memory: "512Mi" # Kafka Storage resources kafka_storage: From b4c70f594a7fbccb165f4c1bd394a223ce433467 Mon Sep 17 00:00:00 2001 From: "balajikumaran.cs" Date: Mon, 8 Jun 2026 19:03:15 +0530 Subject: [PATCH 55/78] fix: prevent telemetry pods from getting stuck in ContainerCreating state (#4709) * input_validation changes for the powerscale telemetry * Update en_us_validation_msg.py Signed-off-by: balajikumaran.cs * Update en_us_validation_msg.py Signed-off-by: balajikumaran.cs * Update en_us_validation_msg.py Signed-off-by: balajikumaran.cs --------- Signed-off-by: balajikumaran.cs --- .../common_utils/en_us_validation_msg.py | 8 +++++++ .../powerscale_telemetry_validation.py | 23 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/common/library/module_utils/input_validation/common_utils/en_us_validation_msg.py b/common/library/module_utils/input_validation/common_utils/en_us_validation_msg.py index 305e743809..d77c8e32db 100644 --- a/common/library/module_utils/input_validation/common_utils/en_us_validation_msg.py +++ b/common/library/module_utils/input_validation/common_utils/en_us_validation_msg.py @@ -842,3 +842,11 @@ def get_logic_success(input_file_path): "To fix: Either set telemetry_sources.ldms.metrics_enabled=true to enable LDMS data collection, " "or set telemetry_bridges.vector_ldms.metrics_enabled=false to disable the Vector-LDMS bridge." ) + +# CSM Observability - Unsupported metrics validation messages +def powerscale_unsupported_metrics_enabled_msg(component_name, section_name, values_file_path): + """Returns error message when unsupported CSM metrics components are enabled.""" + return ( + f"{component_name} metrics collection not supported. " + f"Set {section_name}.enabled to false in {values_file_path} and rerun the playbook." + ) diff --git a/common/library/module_utils/input_validation/validation_flows/powerscale_telemetry_validation.py b/common/library/module_utils/input_validation/validation_flows/powerscale_telemetry_validation.py index d72659d542..18cd8a6cf3 100644 --- a/common/library/module_utils/input_validation/validation_flows/powerscale_telemetry_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/powerscale_telemetry_validation.py @@ -320,6 +320,29 @@ def validate_powerscale_telemetry_config( f"skipping image version validation" ) + # Validate unsupported metrics are not enabled + # Only PowerScale metrics should be enabled; PowerFlex, PowerStore, PowerMax + # require their own CSI drivers which are not part of this deployment + unsupported_metrics = { + "karaviMetricsPowerflex": ("PowerFlex", "karaviMetricsPowerflex"), + "karaviMetricsPowerstore": ("PowerStore", "karaviMetricsPowerstore"), + "karaviMetricsPowermax": ("PowerMax", "karaviMetricsPowermax"), + } + for section_key, (component_name, section_name) in unsupported_metrics.items(): + section = csm_values.get(section_key, {}) + if isinstance(section, dict) and section.get("enabled", False): + errors.append(create_error_msg( + f"{section_name}.enabled", + "true", + en_us_validation_msg.powerscale_unsupported_metrics_enabled_msg( + component_name, section_name, csm_values_path + ) + )) + logger.error( + f"Unsupported metrics component {section_name} is enabled " + f"in CSM Observability values file" + ) + logger.info("CSM Observability values.yaml validation passed") except (yaml.YAMLError, IOError) as e: errors.append(create_error_msg( From a77e94f9f7c95c787f578e669b9d75efb2cce6e1 Mon Sep 17 00:00:00 2001 From: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> Date: Mon, 8 Jun 2026 19:06:45 +0530 Subject: [PATCH 56/78] Update install_cuda_driver.sh.j2 Signed-off-by: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> --- .../hpc_tools/install_cuda_driver.sh.j2 | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/provision/roles/configure_ochami/templates/hpc_tools/install_cuda_driver.sh.j2 b/provision/roles/configure_ochami/templates/hpc_tools/install_cuda_driver.sh.j2 index f0f3da1b82..0ea6ebc170 100644 --- a/provision/roles/configure_ochami/templates/hpc_tools/install_cuda_driver.sh.j2 +++ b/provision/roles/configure_ochami/templates/hpc_tools/install_cuda_driver.sh.j2 @@ -3,24 +3,42 @@ # Never touches NFS lock artifacts. Never touches /hpc_tools/cuda contents. set -euo pipefail +DRIVER_VERSION="580.159.04" LOGFILE="/var/log/nvidia_install.log" + exec > >(tee -a "$LOGFILE") 2>&1 -echo "===== NVIDIA driver install =====" +echo "===== NVIDIA DRIVER INSTALL START =====" +# Check GPU presence if ! lspci | grep -qi nvidia; then echo "[INFO] No NVIDIA GPU detected. Exiting." exit 0 fi +# Install only if nvidia-smi is missing if command -v nvidia-smi >/dev/null 2>&1; then echo "[INFO] NVIDIA driver already installed. Skipping." else echo "[INFO] Installing NVIDIA driver via dnf..." - dnf install -y cuda-drivers - command -v nvidia-smi >/dev/null 2>&1 || { echo "[ERROR] Driver install failed."; exit 1; } + + dnf install -y \ + nvidia-driver-${DRIVER_VERSION} \ + nvidia-driver-cuda-${DRIVER_VERSION} + dkms autoinstall fi +# Load modules (safe even if already loaded) +modprobe nvidia || true +modprobe nvidia_uvm || true +modprobe nvidia_modeset || true +modprobe nvidia_drm || true + +# Ensure device nodes +echo "[INFO] Ensuring /dev/nvidia devices" +nvidia-modprobe -u || true + +# Enable persistence mode nvidia-smi -pm 1 || true # Mount shared toolkit at /usr/local/cuda (harmless if already mounted) From d20165a9c6dee08aa4c1c256f0e6946b2ce6b5eb Mon Sep 17 00:00:00 2001 From: Jagadeesh N V <39791839+jagadeeshnv@users.noreply.github.com> Date: Mon, 8 Jun 2026 19:25:07 +0530 Subject: [PATCH 57/78] Update version for cuda aarch Signed-off-by: Jagadeesh N V <39791839+jagadeeshnv@users.noreply.github.com> --- provision/roles/configure_ochami/vars/main.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/provision/roles/configure_ochami/vars/main.yml b/provision/roles/configure_ochami/vars/main.yml index 982a164460..70f7b34bf2 100644 --- a/provision/roles/configure_ochami/vars/main.yml +++ b/provision/roles/configure_ochami/vars/main.yml @@ -152,3 +152,6 @@ ochami_discovery_failed_msg: | 4. Check ochami CLI: /usr/bin/ochami smd service status 5. Restart openchami.target: systemctl restart openchami.target 6. Rerun provision.yml: ansible-playbook provision/provision.yml + +# templates/hpc_tools/install_cuda_driver.sh.j2 - add a hyphen so that empty will also work for latest +cuda_driver_version: "-3:580.159.04-1.el10" From 3c0e0c729587c6c26b53ff0a9d148e45895cb7e0 Mon Sep 17 00:00:00 2001 From: Jagadeesh N V <39791839+jagadeeshnv@users.noreply.github.com> Date: Mon, 8 Jun 2026 19:26:21 +0530 Subject: [PATCH 58/78] Cuda for aarch version install_cuda_driver.sh.j2 Signed-off-by: Jagadeesh N V <39791839+jagadeeshnv@users.noreply.github.com> --- .../templates/hpc_tools/install_cuda_driver.sh.j2 | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/provision/roles/configure_ochami/templates/hpc_tools/install_cuda_driver.sh.j2 b/provision/roles/configure_ochami/templates/hpc_tools/install_cuda_driver.sh.j2 index f0f3da1b82..7f8c2940d5 100644 --- a/provision/roles/configure_ochami/templates/hpc_tools/install_cuda_driver.sh.j2 +++ b/provision/roles/configure_ochami/templates/hpc_tools/install_cuda_driver.sh.j2 @@ -17,7 +17,10 @@ if command -v nvidia-smi >/dev/null 2>&1; then echo "[INFO] NVIDIA driver already installed. Skipping." else echo "[INFO] Installing NVIDIA driver via dnf..." - dnf install -y cuda-drivers + if [ "$(uname -m)" = "aarch64" ]; then + dnf install -y cuda-drivers{{ cuda_driver_version }} + else + dnf install -y cuda-drivers command -v nvidia-smi >/dev/null 2>&1 || { echo "[ERROR] Driver install failed."; exit 1; } fi From 51362abe863beeb09e288439261096c1be18987e Mon Sep 17 00:00:00 2001 From: Jagadeesh N V <39791839+jagadeeshnv@users.noreply.github.com> Date: Mon, 8 Jun 2026 19:30:21 +0530 Subject: [PATCH 59/78] Update install_cuda_driver.sh.j2 Default empty Signed-off-by: Jagadeesh N V <39791839+jagadeeshnv@users.noreply.github.com> --- .../templates/hpc_tools/install_cuda_driver.sh.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/provision/roles/configure_ochami/templates/hpc_tools/install_cuda_driver.sh.j2 b/provision/roles/configure_ochami/templates/hpc_tools/install_cuda_driver.sh.j2 index 7f8c2940d5..e8f7fc2e11 100644 --- a/provision/roles/configure_ochami/templates/hpc_tools/install_cuda_driver.sh.j2 +++ b/provision/roles/configure_ochami/templates/hpc_tools/install_cuda_driver.sh.j2 @@ -18,7 +18,7 @@ if command -v nvidia-smi >/dev/null 2>&1; then else echo "[INFO] Installing NVIDIA driver via dnf..." if [ "$(uname -m)" = "aarch64" ]; then - dnf install -y cuda-drivers{{ cuda_driver_version }} + dnf install -y cuda-drivers{{ cuda_driver_version | default('') }} else dnf install -y cuda-drivers command -v nvidia-smi >/dev/null 2>&1 || { echo "[ERROR] Driver install failed."; exit 1; } From c845c83ad733268a7cfa0ee362f1130d65eb1055 Mon Sep 17 00:00:00 2001 From: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> Date: Mon, 8 Jun 2026 19:36:13 +0530 Subject: [PATCH 60/78] Update update_kcm_arguments.yml Signed-off-by: Katakam Rakesh Naga Sai <125246792+Katakam-Rakesh@users.noreply.github.com> --- .../tasks/update_kcm_arguments.yml | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/upgrade/roles/upgrade_k8s/tasks/update_kcm_arguments.yml b/upgrade/roles/upgrade_k8s/tasks/update_kcm_arguments.yml index 5d53d30e3a..f08196ea94 100644 --- a/upgrade/roles/upgrade_k8s/tasks/update_kcm_arguments.yml +++ b/upgrade/roles/upgrade_k8s/tasks/update_kcm_arguments.yml @@ -52,7 +52,7 @@ - "--node-monitor-grace-period=40s" - "--node-eviction-rate=1" - "--secondary-node-eviction-rate=1" - - "--terminated-pod-gc-threshold=50" + - "--terminated-pod-gc-threshold=5" - "--controllers=*,nodeipam,nodelifecycle,bootstrapsigner,tokencleaner" - name: Initialize update flag @@ -95,7 +95,7 @@ add_or_update_arg "node-monitor-grace-period" "40s" add_or_update_arg "node-eviction-rate" "1" add_or_update_arg "secondary-node-eviction-rate" "1" - add_or_update_arg "terminated-pod-gc-threshold" "50" + add_or_update_arg "terminated-pod-gc-threshold" "5" add_or_update_arg "controllers" "*,nodeipam,nodelifecycle,bootstrapsigner,tokencleaner" echo "kube-controller-manager manifest updated successfully" @@ -114,6 +114,21 @@ - _kcm_update_result is defined - _kcm_update_result is changed +- name: Reload systemd daemon + ansible.builtin.systemd: + daemon_reload: true + when: + - _kcm_update_result is defined + - _kcm_update_result is changed + +- name: Restart kubelet service + ansible.builtin.systemd: + name: kubelet + state: restarted + when: + - _kcm_update_result is defined + - _kcm_update_result is changed + - name: Wait for kube-controller-manager to restart (kubelet auto-restarts static pods) ansible.builtin.pause: seconds: 30 From 87d8ccd7999e88758ca70c558e2033bb7ebf56f8 Mon Sep 17 00:00:00 2001 From: pullan1 Date: Mon, 8 Jun 2026 19:44:30 +0530 Subject: [PATCH 61/78] vast metrics relabeling fix (#4712) * localrepo checkmarx fixes Signed-off-by: pullan1 * Checkmarx fixes Signed-off-by: pullan1 * vast metrics relabeling fix Signed-off-by: pullan1 --------- Signed-off-by: pullan1 --- .../victoria-operator-vmscrape.yaml.j2 | 32 +++++++------------ 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/provision/roles/telemetry/templates/telemetry/victoria/victoria-operator-vmscrape.yaml.j2 b/provision/roles/telemetry/templates/telemetry/victoria/victoria-operator-vmscrape.yaml.j2 index 8de7d1b393..0fa6a2f930 100644 --- a/provision/roles/telemetry/templates/telemetry/victoria/victoria-operator-vmscrape.yaml.j2 +++ b/provision/roles/telemetry/templates/telemetry/victoria/victoria-operator-vmscrape.yaml.j2 @@ -146,18 +146,12 @@ spec: name: ufm-telemetry-credentials key: password {% endif %} - relabelings: - - action: replace - sourceLabels: [] - targetLabel: source + relabelConfigs: + - targetLabel: source replacement: ufm - - action: replace - sourceLabels: [] - targetLabel: subsystem + - targetLabel: subsystem replacement: infiniband - - action: replace - sourceLabels: [] - targetLabel: job + - targetLabel: job replacement: ufm-infiniband-metrics {% endif %} @@ -201,17 +195,13 @@ spec: name: vast-telemetry-credentials key: password {% endif %} - relabelings: - - action: replace - sourceLabels: [] - targetLabel: source - replacement: vast - - action: replace - sourceLabels: [] - targetLabel: subsystem + relabelConfigs: + - targetLabel: source_subsystem replacement: vast - - action: replace - sourceLabels: [] - targetLabel: job + - targetLabel: subsystem + replacement: storage + - sourceLabels: [__meta_kubernetes_service_label_app] + targetLabel: vast_domain + - targetLabel: job replacement: vast-storage-metrics {% endif %} From 1d1515dcbd008e57f1fc4bd3dc0b0dad546e6b33 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Mon, 8 Jun 2026 21:08:55 +0530 Subject: [PATCH 62/78] Update validate_image.yml Signed-off-by: Abhishek S A --- provision/roles/provision_validations/tasks/validate_image.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/provision/roles/provision_validations/tasks/validate_image.yml b/provision/roles/provision_validations/tasks/validate_image.yml index a594f319d8..4fa7356b60 100644 --- a/provision/roles/provision_validations/tasks/validate_image.yml +++ b/provision/roles/provision_validations/tasks/validate_image.yml @@ -53,7 +53,7 @@ set -o pipefail && \ s3cmd ls -Hr s3://boot-images | \ grep "{{ image_search_pattern }}" | \ - grep {{ hostvars['localhost']['cluster_os_version'] }} | awk '{print $4}' | sed 's|s3://||' + grep {{ hostvars['localhost']['cluster_os_version'] }} | sort -k1,2r | awk '{print $4}' | sed 's|s3://||' changed_when: false failed_when: false register: verify_s3_image From 958493c1bc034e583b9cd254254c576d17359031 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Mon, 8 Jun 2026 21:26:57 +0530 Subject: [PATCH 63/78] Kernel 10.2 provisioning support (#4707) * kernel 10.2 fix Signed-off-by: Abhishek S A * remove duplicate kernel check Signed-off-by: Abhishek S A * Update validate_image.yml Signed-off-by: Abhishek S A * Update validate_image.yml Signed-off-by: Abhishek S A --------- Signed-off-by: Abhishek S A --- .../tasks/configure_bss_group.yml | 75 +------------------ .../tasks/validate_image.yml | 38 +++++++++- 2 files changed, 40 insertions(+), 73 deletions(-) diff --git a/provision/roles/configure_ochami/tasks/configure_bss_group.yml b/provision/roles/configure_ochami/tasks/configure_bss_group.yml index d32776bdf7..c011c8b998 100644 --- a/provision/roles/configure_ochami/tasks/configure_bss_group.yml +++ b/provision/roles/configure_ochami/tasks/configure_bss_group.yml @@ -26,79 +26,10 @@ if (item is match('service_kube_')) and (hostvars['localhost']['service_k8s_version'] | default('') | length > 0) else '' }} -- name: Build image search pattern +- name: Set kernel and initrd from validated images ansible.builtin.set_fact: - image_search_pattern: >- - rhel-{{ functional_group_name }}{{ naming_suffix }} - -- name: Verify image, kernel and initramfs in S3 (build stream and image-key) - ansible.builtin.shell: | - set -o pipefail && \ - s3cmd ls -Hr s3://boot-images | \ - grep "{{ image_search_pattern }}{{ compute_image_suffix }}" | \ - grep {{ hostvars['localhost']['cluster_os_version'] }} | sort -k1,2r | awk '{print $4}' | sed 's|s3://||' - changed_when: false - failed_when: false - register: verify_s3_image_build_stream - when: - - enable_build_stream | default(false) - - (compute_image_suffix | default('')) != '' - -- name: Verify image, kernel and initramfs in S3 (default) - ansible.builtin.shell: | - set -o pipefail && \ - s3cmd ls -Hr s3://boot-images | \ - grep "{{ image_search_pattern }}" | \ - grep {{ hostvars['localhost']['cluster_os_version'] }} | sort -k1,2r | awk '{print $4}' | sed 's|s3://||' - changed_when: false - failed_when: false - register: verify_s3_image - when: - - not enable_build_stream - -- name: Set kernel and initrd variables (build stream) - ansible.builtin.set_fact: - kernel: "{{ verify_s3_image_build_stream.stdout_lines | select('search', 'vmlinuz') | list | first }}" - initrd: "{{ verify_s3_image_build_stream.stdout_lines | select('search', 'initramfs') | list | first }}" - when: - - enable_build_stream - - (compute_image_suffix | default('')) != '' - - hostvars['localhost']['kernel_version_override'] | default('') | length == 0 - -- name: Set kernel and initrd variables (build stream - kernel override) - vars: - _kvo: "{{ hostvars['localhost']['kernel_version_override'] }}" - _bs_lines: "{{ verify_s3_image_build_stream.stdout_lines }}" - ansible.builtin.set_fact: - kernel: "{{ _bs_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | first }}" - initrd: "{{ _bs_lines | select('search', 'initramfs') | select('search', _kvo) | list | first }}" - when: - - enable_build_stream - - (compute_image_suffix | default('')) != '' - - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 - - _bs_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | length > 0 - - _bs_lines | select('search', 'initramfs') | select('search', _kvo) | list | length > 0 - -- name: Set kernel and initrd variables (default) - ansible.builtin.set_fact: - kernel: "{{ verify_s3_image.stdout_lines | select('search', 'vmlinuz') | list | first }}" - initrd: "{{ verify_s3_image.stdout_lines | select('search', 'initramfs') | list | first }}" - when: - - not enable_build_stream - - hostvars['localhost']['kernel_version_override'] | default('') | length == 0 - -- name: Set kernel and initrd variables (default - kernel override) - vars: - _kvo: "{{ hostvars['localhost']['kernel_version_override'] }}" - _s3_lines: "{{ verify_s3_image.stdout_lines }}" - ansible.builtin.set_fact: - kernel: "{{ _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | first }}" - initrd: "{{ _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | first }}" - when: - - not enable_build_stream - - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 - - _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | length > 0 - - _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | length > 0 + kernel: "{{ validated_images[functional_group_name].kernel }}" + initrd: "{{ validated_images[functional_group_name].initrd }}" - name: Load bss template - {{ functional_group_name }} ansible.builtin.template: diff --git a/provision/roles/provision_validations/tasks/validate_image.yml b/provision/roles/provision_validations/tasks/validate_image.yml index e00d49826a..4fa7356b60 100644 --- a/provision/roles/provision_validations/tasks/validate_image.yml +++ b/provision/roles/provision_validations/tasks/validate_image.yml @@ -53,7 +53,7 @@ set -o pipefail && \ s3cmd ls -Hr s3://boot-images | \ grep "{{ image_search_pattern }}" | \ - grep {{ hostvars['localhost']['cluster_os_version'] }} | awk '{print $4}' | sed 's|s3://||' + grep {{ hostvars['localhost']['cluster_os_version'] }} | sort -k1,2r | awk '{print $4}' | sed 's|s3://||' changed_when: false failed_when: false register: verify_s3_image @@ -67,6 +67,7 @@ ansible.builtin.set_fact: kernel: "" initrd: "" + validated_images: "{{ validated_images | default({}) }}" - name: Set kernel and initrd variables ansible.builtin.set_fact: @@ -89,6 +90,41 @@ - _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | length > 0 - _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | length > 0 +- name: Retry S3 search without OS version filter (kernel override cross-version) + ansible.builtin.shell: | + set -o pipefail && \ + s3cmd ls -Hr s3://boot-images | \ + grep "{{ image_search_pattern }}" | awk '{print $4}' | sed 's|s3://||' + changed_when: false + failed_when: false + register: verify_s3_image_retry + when: + - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 + - kernel | length < 1 or initrd | length < 1 + +- name: Set kernel and initrd from retry results (kernel override cross-version) + vars: + _kvo: "{{ hostvars['localhost']['kernel_version_override'] }}" + _s3_lines: "{{ verify_s3_image_retry.stdout_lines }}" + ansible.builtin.set_fact: + kernel: "{{ _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | first }}" + initrd: "{{ _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | first }}" + when: + - hostvars['localhost']['kernel_version_override'] | default('') | length > 0 + - kernel | length < 1 or initrd | length < 1 + - verify_s3_image_retry.stdout_lines | default([]) | length > 1 + - _s3_lines | select('search', 'vmlinuz') | select('search', _kvo) | list | length > 0 + - _s3_lines | select('search', 'initramfs') | select('search', _kvo) | list | length > 0 + +- name: Store validated kernel and initrd for functional group + ansible.builtin.set_fact: + validated_images: >- + {{ validated_images | default({}) | combine({ + functional_group_name: {'kernel': kernel, 'initrd': initrd} + }) + }} + when: kernel | length > 0 and initrd | length > 0 + - name: Fail if kernel override did not match any S3 image ansible.builtin.fail: msg: >- From 61acdb67b221e227b2416c96c2571c18950ff045 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Mon, 8 Jun 2026 22:04:28 +0530 Subject: [PATCH 64/78] local repo fix Signed-off-by: Abhishek S A --- .../library/module_utils/local_repo/config.py | 26 ++++++ .../local_repo/download_common.py | 93 +++++++++++++++++-- common/library/modules/parallel_tasks.py | 3 +- 3 files changed, 111 insertions(+), 11 deletions(-) diff --git a/common/library/module_utils/local_repo/config.py b/common/library/module_utils/local_repo/config.py index 5a8d4a63f5..a1165e0509 100644 --- a/common/library/module_utils/local_repo/config.py +++ b/common/library/module_utils/local_repo/config.py @@ -60,6 +60,32 @@ USER_REPO_URL = "user_repo_url" ARCH_SUFFIXES = {"x86_64", "aarch64"} +# Target OS → Python version mapping for pip cross-version downloads. +# The omnia_core container (Fedora 42) runs Python 3.13, but target nodes +# run a different Python. pip must download wheels for the TARGET version. +OS_TARGET_PYTHON = { + "rhel": {"10": "3.12"}, +} + +# Architecture → manylinux platform tags for pip --platform flag. +# Multiple tags are listed most-specific-first; pip matches any of them. +ARCH_PIP_PLATFORMS = { + "x86_64": [ + "manylinux_2_34_x86_64", + "manylinux_2_28_x86_64", + "manylinux_2_17_x86_64", + "manylinux2014_x86_64", + "linux_x86_64", + ], + "aarch64": [ + "manylinux_2_34_aarch64", + "manylinux_2_28_aarch64", + "manylinux_2_17_aarch64", + "manylinux2014_aarch64", + "linux_aarch64", + ], +} + # ---------------------------- # Repo Naming Format # Controls the naming convention for Pulp repositories, remotes, and distributions. diff --git a/common/library/module_utils/local_repo/download_common.py b/common/library/module_utils/local_repo/download_common.py index 190bd3b953..83633d9030 100644 --- a/common/library/module_utils/local_repo/download_common.py +++ b/common/library/module_utils/local_repo/download_common.py @@ -42,7 +42,9 @@ FILE_TIMEOUT_MIN, TASK_POLL_INTERVAL, FILE_URI, - PULP_SSL_CA_CERT + PULP_SSL_CA_CERT, + OS_TARGET_PYTHON, + ARCH_PIP_PLATFORMS, ) from ansible.module_utils.local_repo.software_utils import build_repo_name @@ -996,7 +998,58 @@ def process_iso(package, status_file_path, logger.info("#" * 30 + f" {process_iso.__name__} end " + "#" * 30) # End of function return status -def process_pip(package, status_file_path, content_base_dir, repo_name, logger): +def _get_target_python_version(cluster_os_type, cluster_os_version, logger): + """Resolve the target Python version from OS type and version. + + Args: + cluster_os_type (str): e.g. 'rhel'. + cluster_os_version (str): e.g. '10.0'. + logger: Logger instance. + + Returns: + str or None: Python version string (e.g. '3.12') or None. + """ + os_map = OS_TARGET_PYTHON.get(cluster_os_type, {}) + major = cluster_os_version.split(".")[0] if cluster_os_version else "" + py_ver = os_map.get(major) + if py_ver: + logger.info(f"Target Python resolved: {cluster_os_type} {cluster_os_version} -> Python {py_ver}") + return py_ver + + +def _build_pip_platform_args(target_python, arc, logger): + """Build pip download flags for cross-version/cross-platform downloads. + + Args: + target_python (str): e.g. '3.12'. + arc (str): e.g. 'x86_64'. + logger: Logger instance. + + Returns: + str: Extra CLI flags for pip download, or empty string. + """ + if not target_python: + return "" + + abi = "cp" + target_python.replace(".", "") + platforms = ARCH_PIP_PLATFORMS.get(arc, []) + parts = [ + f"--python-version {target_python}", + f"--implementation cp", + f"--abi {abi}", + "--only-binary=:all:", + ] + for plat in platforms: + parts.append(f"--platform {plat}") + + flags = " ".join(parts) + logger.info(f"Pip platform flags for {arc}/{target_python}: {flags}") + return flags + + +def process_pip(package, status_file_path, content_base_dir, repo_name, + cluster_os_type="", cluster_os_version="", arc="", + logger=None): """ Process a pip package using Pulp. @@ -1005,6 +1058,9 @@ def process_pip(package, status_file_path, content_base_dir, repo_name, logger): status_file_path (str): Path to log processing status. content_base_dir (str): Pre-built base directory for offline content. repo_name (str): Pre-built Pulp repository name. + cluster_os_type (str): Target OS type (e.g. 'rhel'). + cluster_os_version (str): Target OS version (e.g. '10.0'). + arc (str): Target architecture (e.g. 'x86_64'). logger (logging.Logger): The logger instance. Returns: @@ -1029,16 +1085,33 @@ def process_pip(package, status_file_path, content_base_dir, repo_name, logger): os.makedirs(pip_package_directory, exist_ok=True) # Ensure directory exists - # Step 1: Download the package + # Step 1: Download the package with target-platform-aware flags. + # The container runs Python 3.13 (Fedora 42) but targets run Python 3.12 + # (RHEL 10.0). Without --python-version, pip downloads cp313 wheels that + # cannot be installed on cp312 nodes (e.g. cffi, cryptography). logger.info("Step 1: Downloading package...") - download_command = f"pip download -d {shlex.quote(pip_package_directory)} {package_name}" - if version: - download_command += f"=={version}" + target_python = _get_target_python_version(cluster_os_type, cluster_os_version, logger) + platform_flags = _build_pip_platform_args(target_python, arc, logger) + + pkg_spec = f"{package_name}=={version}" if version else package_name + dest_flag = f"-d {shlex.quote(pip_package_directory)}" + + download_ok = False + if platform_flags: + download_command = f"pip download {dest_flag} {platform_flags} {pkg_spec}" + download_ok = execute_command(download_command, logger) + if not download_ok: + logger.warning( + f"Platform-aware download failed for {pkg_spec}; " + "retrying without platform flags (source fallback)." + ) - if not execute_command(download_command, logger): - status = "Failed" - logger.error(f"Failed to download {package_name}. Aborting process.") - return status # Stop further steps + if not download_ok: + download_command = f"pip download {dest_flag} {pkg_spec}" + if not execute_command(download_command, logger): + status = "Failed" + logger.error(f"Failed to download {package_name}. Aborting process.") + return status # Stop further steps # Step 2: Create the Pulp repository if it does not exist logger.info("Step 2: Checking repository existence...") diff --git a/common/library/modules/parallel_tasks.py b/common/library/modules/parallel_tasks.py index e696256bed..20268b10fa 100644 --- a/common/library/modules/parallel_tasks.py +++ b/common/library/modules/parallel_tasks.py @@ -206,7 +206,8 @@ def determine_function( ] if task_type == "pip_module": return process_pip, [ - task, status_file, content_base_dir, repo_name + task, status_file, content_base_dir, repo_name, + cluster_os_type, cluster_os_version, arc ] if task_type == "image": return process_image, [ From 631c5d8823657aea3a2b834ea9fb0541e7eb9f32 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Mon, 8 Jun 2026 23:03:47 +0530 Subject: [PATCH 65/78] pip fix Signed-off-by: Abhishek S A --- common/library/module_utils/local_repo/config.py | 4 ---- common/library/module_utils/local_repo/download_common.py | 7 +++++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/common/library/module_utils/local_repo/config.py b/common/library/module_utils/local_repo/config.py index a1165e0509..e3a1031528 100644 --- a/common/library/module_utils/local_repo/config.py +++ b/common/library/module_utils/local_repo/config.py @@ -74,15 +74,11 @@ "manylinux_2_34_x86_64", "manylinux_2_28_x86_64", "manylinux_2_17_x86_64", - "manylinux2014_x86_64", - "linux_x86_64", ], "aarch64": [ "manylinux_2_34_aarch64", "manylinux_2_28_aarch64", "manylinux_2_17_aarch64", - "manylinux2014_aarch64", - "linux_aarch64", ], } diff --git a/common/library/module_utils/local_repo/download_common.py b/common/library/module_utils/local_repo/download_common.py index 83633d9030..0eff437663 100644 --- a/common/library/module_utils/local_repo/download_common.py +++ b/common/library/module_utils/local_repo/download_common.py @@ -1035,7 +1035,7 @@ def _build_pip_platform_args(target_python, arc, logger): platforms = ARCH_PIP_PLATFORMS.get(arc, []) parts = [ f"--python-version {target_python}", - f"--implementation cp", + "--implementation cp", f"--abi {abi}", "--only-binary=:all:", ] @@ -1066,6 +1066,9 @@ def process_pip(package, status_file_path, content_base_dir, repo_name, Returns: str: "Success" if the process is successful, otherwise "Failed". """ + if logger is None: + import logging + logger = logging.getLogger(__name__) logger.info("#" * 30 + f" {process_pip.__name__} start " + "#" * 30) status = "Success" # Default status, updated if any step fails @@ -1110,7 +1113,7 @@ def process_pip(package, status_file_path, content_base_dir, repo_name, download_command = f"pip download {dest_flag} {pkg_spec}" if not execute_command(download_command, logger): status = "Failed" - logger.error(f"Failed to download {package_name}. Aborting process.") + logger.error(f"Failed to download {pkg_spec}. Aborting process.") return status # Stop further steps # Step 2: Create the Pulp repository if it does not exist From a2322be1c00868dba434a40faa0d8d5f68a9cbf6 Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Tue, 9 Jun 2026 02:59:28 +0530 Subject: [PATCH 66/78] pip3 package installation fix post python3.13 upgrade (#4713) * kernel 10.2 fix Signed-off-by: Abhishek S A * remove duplicate kernel check Signed-off-by: Abhishek S A * Update validate_image.yml Signed-off-by: Abhishek S A * Update validate_image.yml Signed-off-by: Abhishek S A * local repo fix Signed-off-by: Abhishek S A * pip fix Signed-off-by: Abhishek S A --------- Signed-off-by: Abhishek S A --- .../library/module_utils/local_repo/config.py | 22 +++++ .../local_repo/download_common.py | 96 +++++++++++++++++-- common/library/modules/parallel_tasks.py | 3 +- 3 files changed, 110 insertions(+), 11 deletions(-) diff --git a/common/library/module_utils/local_repo/config.py b/common/library/module_utils/local_repo/config.py index 5a8d4a63f5..e3a1031528 100644 --- a/common/library/module_utils/local_repo/config.py +++ b/common/library/module_utils/local_repo/config.py @@ -60,6 +60,28 @@ USER_REPO_URL = "user_repo_url" ARCH_SUFFIXES = {"x86_64", "aarch64"} +# Target OS → Python version mapping for pip cross-version downloads. +# The omnia_core container (Fedora 42) runs Python 3.13, but target nodes +# run a different Python. pip must download wheels for the TARGET version. +OS_TARGET_PYTHON = { + "rhel": {"10": "3.12"}, +} + +# Architecture → manylinux platform tags for pip --platform flag. +# Multiple tags are listed most-specific-first; pip matches any of them. +ARCH_PIP_PLATFORMS = { + "x86_64": [ + "manylinux_2_34_x86_64", + "manylinux_2_28_x86_64", + "manylinux_2_17_x86_64", + ], + "aarch64": [ + "manylinux_2_34_aarch64", + "manylinux_2_28_aarch64", + "manylinux_2_17_aarch64", + ], +} + # ---------------------------- # Repo Naming Format # Controls the naming convention for Pulp repositories, remotes, and distributions. diff --git a/common/library/module_utils/local_repo/download_common.py b/common/library/module_utils/local_repo/download_common.py index 190bd3b953..0eff437663 100644 --- a/common/library/module_utils/local_repo/download_common.py +++ b/common/library/module_utils/local_repo/download_common.py @@ -42,7 +42,9 @@ FILE_TIMEOUT_MIN, TASK_POLL_INTERVAL, FILE_URI, - PULP_SSL_CA_CERT + PULP_SSL_CA_CERT, + OS_TARGET_PYTHON, + ARCH_PIP_PLATFORMS, ) from ansible.module_utils.local_repo.software_utils import build_repo_name @@ -996,7 +998,58 @@ def process_iso(package, status_file_path, logger.info("#" * 30 + f" {process_iso.__name__} end " + "#" * 30) # End of function return status -def process_pip(package, status_file_path, content_base_dir, repo_name, logger): +def _get_target_python_version(cluster_os_type, cluster_os_version, logger): + """Resolve the target Python version from OS type and version. + + Args: + cluster_os_type (str): e.g. 'rhel'. + cluster_os_version (str): e.g. '10.0'. + logger: Logger instance. + + Returns: + str or None: Python version string (e.g. '3.12') or None. + """ + os_map = OS_TARGET_PYTHON.get(cluster_os_type, {}) + major = cluster_os_version.split(".")[0] if cluster_os_version else "" + py_ver = os_map.get(major) + if py_ver: + logger.info(f"Target Python resolved: {cluster_os_type} {cluster_os_version} -> Python {py_ver}") + return py_ver + + +def _build_pip_platform_args(target_python, arc, logger): + """Build pip download flags for cross-version/cross-platform downloads. + + Args: + target_python (str): e.g. '3.12'. + arc (str): e.g. 'x86_64'. + logger: Logger instance. + + Returns: + str: Extra CLI flags for pip download, or empty string. + """ + if not target_python: + return "" + + abi = "cp" + target_python.replace(".", "") + platforms = ARCH_PIP_PLATFORMS.get(arc, []) + parts = [ + f"--python-version {target_python}", + "--implementation cp", + f"--abi {abi}", + "--only-binary=:all:", + ] + for plat in platforms: + parts.append(f"--platform {plat}") + + flags = " ".join(parts) + logger.info(f"Pip platform flags for {arc}/{target_python}: {flags}") + return flags + + +def process_pip(package, status_file_path, content_base_dir, repo_name, + cluster_os_type="", cluster_os_version="", arc="", + logger=None): """ Process a pip package using Pulp. @@ -1005,11 +1058,17 @@ def process_pip(package, status_file_path, content_base_dir, repo_name, logger): status_file_path (str): Path to log processing status. content_base_dir (str): Pre-built base directory for offline content. repo_name (str): Pre-built Pulp repository name. + cluster_os_type (str): Target OS type (e.g. 'rhel'). + cluster_os_version (str): Target OS version (e.g. '10.0'). + arc (str): Target architecture (e.g. 'x86_64'). logger (logging.Logger): The logger instance. Returns: str: "Success" if the process is successful, otherwise "Failed". """ + if logger is None: + import logging + logger = logging.getLogger(__name__) logger.info("#" * 30 + f" {process_pip.__name__} start " + "#" * 30) status = "Success" # Default status, updated if any step fails @@ -1029,16 +1088,33 @@ def process_pip(package, status_file_path, content_base_dir, repo_name, logger): os.makedirs(pip_package_directory, exist_ok=True) # Ensure directory exists - # Step 1: Download the package + # Step 1: Download the package with target-platform-aware flags. + # The container runs Python 3.13 (Fedora 42) but targets run Python 3.12 + # (RHEL 10.0). Without --python-version, pip downloads cp313 wheels that + # cannot be installed on cp312 nodes (e.g. cffi, cryptography). logger.info("Step 1: Downloading package...") - download_command = f"pip download -d {shlex.quote(pip_package_directory)} {package_name}" - if version: - download_command += f"=={version}" + target_python = _get_target_python_version(cluster_os_type, cluster_os_version, logger) + platform_flags = _build_pip_platform_args(target_python, arc, logger) + + pkg_spec = f"{package_name}=={version}" if version else package_name + dest_flag = f"-d {shlex.quote(pip_package_directory)}" + + download_ok = False + if platform_flags: + download_command = f"pip download {dest_flag} {platform_flags} {pkg_spec}" + download_ok = execute_command(download_command, logger) + if not download_ok: + logger.warning( + f"Platform-aware download failed for {pkg_spec}; " + "retrying without platform flags (source fallback)." + ) - if not execute_command(download_command, logger): - status = "Failed" - logger.error(f"Failed to download {package_name}. Aborting process.") - return status # Stop further steps + if not download_ok: + download_command = f"pip download {dest_flag} {pkg_spec}" + if not execute_command(download_command, logger): + status = "Failed" + logger.error(f"Failed to download {pkg_spec}. Aborting process.") + return status # Stop further steps # Step 2: Create the Pulp repository if it does not exist logger.info("Step 2: Checking repository existence...") diff --git a/common/library/modules/parallel_tasks.py b/common/library/modules/parallel_tasks.py index e696256bed..20268b10fa 100644 --- a/common/library/modules/parallel_tasks.py +++ b/common/library/modules/parallel_tasks.py @@ -206,7 +206,8 @@ def determine_function( ] if task_type == "pip_module": return process_pip, [ - task, status_file, content_base_dir, repo_name + task, status_file, content_base_dir, repo_name, + cluster_os_type, cluster_os_version, arc ] if task_type == "image": return process_image, [ From 5de6d24b108f00aaaae1d377d73fc53ebc8c9c45 Mon Sep 17 00:00:00 2001 From: Nagachandan-P Date: Tue, 9 Jun 2026 05:09:07 +0000 Subject: [PATCH 67/78] multi-ib ib ip supported Signed-off-by: Nagachandan-P --- .../doca-ofed/configure-ib-network.sh.j2 | 303 +++++++----------- 1 file changed, 121 insertions(+), 182 deletions(-) diff --git a/provision/roles/configure_ochami/templates/doca-ofed/configure-ib-network.sh.j2 b/provision/roles/configure_ochami/templates/doca-ofed/configure-ib-network.sh.j2 index 9ecc30d17f..0bebb16c6d 100644 --- a/provision/roles/configure_ochami/templates/doca-ofed/configure-ib-network.sh.j2 +++ b/provision/roles/configure_ochami/templates/doca-ofed/configure-ib-network.sh.j2 @@ -165,211 +165,150 @@ else echo "WARNING: Proceeding with all mlx5 devices - may include Ethernet-only NICs" fi -# === DEVICE COUNT ANALYSIS AND SELECTION LOGIC === -echo "INFO: DEVICE ANALYSIS: Analyzing mlx5 device count for configuration strategy" +# ============================== +# SLOT-BASED DEVICE SELECTION (Integrated Logic) +# ============================== +echo "=============================================" +echo "🔍 SLOT-BASED IB DEVICE SELECTION" +echo "=============================================" +echo "INFO: Target Slot=$SLOT_NUMBER Port=$PORT_NUMBER IP=$IB_IP/$NETMASK_BITS" +echo + +# ============================== +# Requirements check for new dependencies (available from DOCA installation) +# ============================== +if ! command -v dmidecode >/dev/null; then + echo "WARNING: dmidecode not found - required for slot-based mapping. Ensure DOCA is installed properly." +fi +if ! command -v ibdev2netdev >/dev/null; then + echo "WARNING: ibdev2netdev not found - required for device mapping. Ensure DOCA is installed properly." +fi -if [ "$MLX5_DEVICE_COUNT" -eq 0 ]; then - echo "ERROR: DEVICE ANALYSIS: No mlx5 devices found on this system" - echo "ERROR: DEVICE ANALYSIS: Please check hardware and driver installation" +# ============================== +# STEP 1: Resolve SLOT → Full PCI Address +# ============================== +echo "---- STEP 1: Resolve Slot → PCI ----" +PCI_ADDR=$(dmidecode -t slot | awk -v slot="Slot $SLOT_NUMBER" ' + /Designation:/ { found = ($0 ~ slot) } + found && /Bus Address:/ { print $NF; exit } +') + +if [ -z "$PCI_ADDR" ]; then + echo "ERROR: Could not resolve PCI address for slot $SLOT_NUMBER" + echo "Available slots:" + dmidecode -t slot | grep -E "Designation|Bus Address" exit 1 +fi +echo "INFO: Slot $SLOT_NUMBER → PCI $PCI_ADDR" +echo + +# ============================== +# STEP 2: Select device by FULL PCI address (with bus:device fallback) +# ============================== +echo "---- STEP 2: Select device by PCI match ----" +SELECTED_DEVICE="" + +# Use filtered IB device list if available, otherwise use all mlx5 devices +search_devices=() +if [ -n "${IB_DEVICES[*]:-}" ]; then + for device in "${IB_DEVICES[@]}"; do + search_devices+=("/sys/class/infiniband/$device") + done +else + for mlx_device in /sys/class/infiniband/mlx5_*; do + [ -e "$mlx_device" ] && search_devices+=("$mlx_device") + done +fi -elif [ "$MLX5_DEVICE_COUNT" -eq 1 ]; then - echo "INFO: SINGLE-DEVICE MODE: Only one mlx5 device available - no slot-based mapping needed" - echo "INFO: SINGLE-DEVICE MODE: Using available device (slot number from PXE mapping ignored)" - echo "INFO: SINGLE-DEVICE MODE: This ensures robustness for single-device deployments" - - if [ -n "${SINGLE_IB_DEVICE:-}" ]; then - MLX5_DEVICE="$SINGLE_IB_DEVICE" - echo "INFO: SINGLE-DEVICE MODE: Using filtered InfiniBand device: $MLX5_DEVICE" - else - MLX5_DEVICE=$(ls /sys/class/infiniband/ | grep mlx5 | head -1) - echo "INFO: SINGLE-DEVICE MODE: Using first available mlx5 device: $MLX5_DEVICE" - fi - if [ -z "$MLX5_DEVICE" ]; then - echo "ERROR: DEVICE ANALYSIS: Failed to detect mlx5 device despite count showing 1" - exit 1 - fi - - echo "SUCCESS: SINGLE-DEVICE MODE: Selected device '$MLX5_DEVICE' (only available device)" - -elif [ "$MLX5_DEVICE_COUNT" -gt 1 ]; then - echo "INFO: MULTI-DEVICE MODE: Found $MLX5_DEVICE_COUNT active mlx5 devices" - echo "INFO: MULTI-DEVICE MODE: Checking all devices for port $PORT_NUMBER with GUID validation" - - # Array to store devices that have the specified port with valid GUID - MATCHING_DEVICES=() - - # Use filtered device list if available, otherwise check all devices - search_devices=() - if [ -n "${IB_DEVICES[*]:-}" ]; then - for device in "${IB_DEVICES[@]}"; do - search_devices+=("/sys/class/infiniband/$device") - done - else - for mlx_device in /sys/class/infiniband/mlx5_*; do - [ -e "$mlx_device" ] && search_devices+=("$mlx_device") - done +# --- Primary: exact full PCI match --- +for device_name in "${IB_DEVICES[@]:-}"; do + [ -z "$device_name" ] && continue + dev="/sys/class/infiniband/$device_name" + [ ! -e "$dev" ] && continue + dev_pci=$(basename "$(readlink -f "$dev/device")") + echo "DEBUG: $device_name → PCI $dev_pci (slot reports $PCI_ADDR)" + if [[ "$dev_pci" == "$PCI_ADDR" ]]; then + SELECTED_DEVICE="$device_name" + echo "✅ FULL PCI MATCH → $SELECTED_DEVICE" + break fi +done - for mlx_device in "${search_devices[@]}"; do - [ -e "$mlx_device" ] || continue - device_name=$(basename "$mlx_device") - - # Check if port exists on this device - PORT_PATH="/sys/class/infiniband/${device_name}/ports/${PORT_NUMBER}" - if [ ! -d "$PORT_PATH" ]; then - echo "DEBUG: Device $device_name - Port $PORT_NUMBER does not exist" - continue - fi - - # Check if GUID exists for this port - GUID_PATH="${PORT_PATH}/gids/0" - if [ ! -f "$GUID_PATH" ]; then - echo "DEBUG: Device $device_name - Port $PORT_NUMBER exists but GUID not found" - continue +# --- Fallback: bus:device match (handles dmidecode root-port quirk) --- +if [ -z "$SELECTED_DEVICE" ]; then + TARGET_BD=$(echo "$PCI_ADDR" | cut -d. -f1) # drop .function + echo "WARN: No exact PCI match; trying bus:device match on $TARGET_BD" + for device_name in "${IB_DEVICES[@]:-}"; do + [ -z "$device_name" ] && continue + dev="/sys/class/infiniband/$device_name" + [ ! -e "$dev" ] && continue + dev_pci=$(basename "$(readlink -f "$dev/device")") + dev_bd=$(echo "$dev_pci" | cut -d. -f1) + if [[ "$dev_bd" == "$TARGET_BD" ]]; then + SELECTED_DEVICE="$device_name" + echo "✅ BUS:DEVICE MATCH → $SELECTED_DEVICE" + break fi - - # Device has both port and GUID - MATCHING_DEVICES+=("$device_name") - echo "DEBUG: Device $device_name - Port $PORT_NUMBER with GUID found" done +fi - # Analyze matching results - MATCHING_COUNT=0 - for device in "${MATCHING_DEVICES[@]}"; do - MATCHING_COUNT=$((MATCHING_COUNT + 1)) +if [ -z "$SELECTED_DEVICE" ]; then + echo "ERROR: No matching IB device found for slot=$SLOT_NUMBER" + echo "Device PCI map:" + for device_name in "${IB_DEVICES[@]:-}"; do + [ -z "$device_name" ] && continue + dev="/sys/class/infiniband/$device_name" + [ -e "$dev" ] && echo " $device_name → $(basename "$(readlink -f "$dev/device")")" done - - if [ "$MATCHING_COUNT" -eq 0 ]; then - echo "ERROR: MULTI-DEVICE MODE: Port $PORT_NUMBER not found on any device (or no GUID available)" - echo "ERROR: MULTI-DEVICE MODE: Available devices and their ports:" - for mlx_device in "${search_devices[@]}"; do - [ -e "$mlx_device" ] || continue - device_name=$(basename "$mlx_device") - echo " - $device_name:" - if [ -d "/sys/class/infiniband/${device_name}/ports" ]; then - ls /sys/class/infiniband/${device_name}/ports/ 2>/dev/null | sed 's/^/ Port: /' - else - echo " No ports directory found" - fi - done - exit 1 - elif [ "$MATCHING_COUNT" -gt 1 ]; then - echo "ERROR: MULTI-DEVICE MODE: Port $PORT_NUMBER found on multiple devices" - echo "ERROR: MULTI-DEVICE MODE: Matching devices: ${MATCHING_DEVICES[*]}" - echo "ERROR: MULTI-DEVICE MODE: Cannot determine which device to use" - echo "ERROR: MULTI-DEVICE MODE: Please update PXE mapping to specify device uniquely" - exit 1 - fi - - # Use the single matching device - MLX5_DEVICE="${MATCHING_DEVICES[0]}" - echo "SUCCESS: MULTI-DEVICE MODE: Selected device '$MLX5_DEVICE' (only device with port $PORT_NUMBER and valid GUID)" + exit 1 fi +MLX5_DEVICE="$SELECTED_DEVICE" echo "INFO: FINAL DEVICE SELECTION: Using mlx5 device '$MLX5_DEVICE'" +echo -# Check if the specified port exists +# ============================== +# STEP 3: Validate port +# ============================== +echo "---- STEP 3: Validate port ----" PORT_PATH="/sys/class/infiniband/${MLX5_DEVICE}/ports/${PORT_NUMBER}" -echo "DEBUG: Checking if port $PORT_NUMBER exists on device $MLX5_DEVICE" -echo "DEBUG: Port path: $PORT_PATH" - if [ ! -d "$PORT_PATH" ]; then - echo "ERROR: PORT NOT FOUND: Port ${PORT_NUMBER} does not exist for device ${MLX5_DEVICE}" - echo "ERROR: PORT NOT FOUND: Available ports on device $MLX5_DEVICE:" - ls -la /sys/class/infiniband/${MLX5_DEVICE}/ports/ 2>/dev/null || echo "ERROR: No ports directory found" - exit 1 -else - echo "SUCCESS: Port validation passed - port $PORT_NUMBER exists on device $MLX5_DEVICE" -fi - -# Retrieve GUID from the specified port -GUID_PATH="${PORT_PATH}/gids/0" -echo "DEBUG: Retrieving GUID from port $PORT_NUMBER on device $MLX5_DEVICE" -echo "DEBUG: GUID path: $GUID_PATH" - -if [ ! -f "$GUID_PATH" ]; then - echo "ERROR: GUID NOT FOUND: GUID file not found at ${GUID_PATH}" - echo "ERROR: GUID NOT FOUND: Cannot proceed with interface matching without GUID" + echo "ERROR: Port $PORT_NUMBER not found on device $MLX5_DEVICE" + echo "Available ports:" + ls "/sys/class/infiniband/${MLX5_DEVICE}/ports/" exit 1 fi -GUID=$(cat "$GUID_PATH") -echo "SUCCESS: Retrieved GUID from hardware: $GUID" - -# Convert GUID to individual octets for comparison -# GUID format: fe80:0000:0000:0000:946d:ae03:008c:108c -# Interface format: 94:6d:ae:03:00:8c:10:8c (1-byte groups) -GUID_OCTETS=$(echo "$GUID" | python3 -c " -import sys -guid = sys.stdin.read().strip() -parts = guid.split(':') -if len(parts) >= 8: - # Take last 4 parts and convert each 4-char group to 2-char groups - last_parts = parts[4:8] - octets = [] - for part in last_parts: - octets.append(part[:2]) - octets.append(part[2:]) - print(':'.join(octets)) -") -echo "SUCCESS: GUID octets for interface comparison: $GUID_OCTETS" - -# Find the IB interface by comparing octets -echo "DEBUG: Starting IB interface search using GUID matching" -IB_INTERFACE="" - -# List all available IB interfaces first -echo "DEBUG: Available IB interfaces on this system:" -for iface in $(ip link show | grep -E "^[0-9]+: ib" | awk -F: '{print $2}'); do - echo "DEBUG: Found IB interface: $iface" -done +PORT_STATE=$(cat "$PORT_PATH/state" 2>/dev/null || echo "unknown") +echo "SUCCESS: Port $PORT_NUMBER exists (state: $PORT_STATE)" +echo -for iface in $(ip link show | grep -E "^[0-9]+: ib" | awk -F: '{print $2}'); do - echo "DEBUG: Checking interface $iface for GUID match" - - # Get the full address from the interface and extract the last 8 octets - full_addr=$(ip link show "$iface" | grep "link/infiniband" | sed 's/.*link\/infiniband //' | sed 's/ brd.*//') - echo "DEBUG: Interface $iface full address: $full_addr" - - # Extract the last 8 octets (fields 13-20) and format with colons for comparison - iface_octets=$(echo "$full_addr" | cut -d: -f13-20 | python3 -c " -import sys -parts = sys.stdin.read().strip().split(':') -if len(parts) >= 8: - formatted = f'{parts[0]}:{parts[1]}:{parts[2]}:{parts[3]}:{parts[4]}:{parts[5]}:{parts[6]}:{parts[7]}' - print(formatted) -") - echo "DEBUG: Interface $iface octets: $iface_octets (target: $GUID_OCTETS)" - - if [ "$iface_octets" = "$GUID_OCTETS" ]; then - echo "SUCCESS: GUID MATCH FOUND! Interface $iface matches hardware GUID" - IB_INTERFACE="$iface" - break - else - echo "DEBUG: GUID mismatch - interface $iface does not match target GUID" - fi -done +# ============================== +# STEP 4: Map device+port → netdev interface +# ============================== +echo "---- STEP 4: Map to interface ----" +IB_INTERFACE=$(ibdev2netdev | awk -v dev="$MLX5_DEVICE" -v port="$PORT_NUMBER" ' + $1==dev && $3==port {print $5} +') if [ -z "$IB_INTERFACE" ]; then - echo "ERROR: INTERFACE NOT FOUND: No IB interface found with GUID octets $GUID_OCTETS" - echo "ERROR: INTERFACE NOT FOUND: This indicates a hardware/software mismatch" - echo "ERROR: INTERFACE NOT FOUND: Available IB interfaces with their GUID octets:" - for iface in $(ip link show | grep -E "^[0-9]+: ib" | awk -F: '{print $2}'); do - full_addr=$(ip link show "$iface" | grep "link/infiniband" | sed 's/.*link\/infiniband //' | sed 's/ brd.*//') - iface_octets=$(echo "$full_addr" | cut -d: -f13-20 | python3 -c " -import sys -parts = sys.stdin.read().strip().split(':') -if len(parts) >= 8: - formatted = f'{parts[0]}{parts[1]}:{parts[2]}{parts[3]}:{parts[4]}{parts[5]}:{parts[6]}{parts[7]}' - print(formatted) -") - echo " - $iface: $iface_octets" - done + echo "ERROR: Could not map $MLX5_DEVICE port $PORT_NUMBER to interface" + echo "ibdev2netdev output:" + ibdev2netdev exit 1 fi - -echo "SUCCESS: INTERFACE SELECTION: Found IB interface $IB_INTERFACE matching hardware GUID" +echo "SUCCESS: Interface found → $IB_INTERFACE" +echo + +echo "=============================================" +echo "✅ IB DEVICE SELECTION SUCCESS" +echo "Slot : $SLOT_NUMBER" +echo "Port : $PORT_NUMBER" +echo "PCI Address : $PCI_ADDR" +echo "mlx device : $MLX5_DEVICE" +echo "Interface : $IB_INTERFACE" +echo "=============================================" +echo if command -v nmcli >/dev/null 2>&1; then echo "INFO: IP CONFIGURATION: Using NetworkManager to configure IB interface" From 708d9fa39fd4653ebd807e9c2c50698c16c19e8d Mon Sep 17 00:00:00 2001 From: Sujit Jadhav Date: Tue, 9 Jun 2026 10:38:32 +0530 Subject: [PATCH 68/78] fix(discovery): prompt for OME credentials when not available (OMN01D-2517) When running discovery.yml without prior prepare_oim.yml execution, the playbook previously failed with a hard error instructing users to run prepare_oim.yml first. Now discovery.yml invokes the standard credential utility (get_config_credentials.yml) before the discovery role runs. This reuses the existing credential prompting framework that handles: - Creating credential files if they don't exist - Prompting for OME username/password (conditional_mandatory) - Encrypting credentials with ansible-vault The discovery config is loaded early to set enable_bmc_discovery before the credential utility runs, ensuring OME credentials are prompted when BMC discovery is enabled. This is consistent with how prepare_oim.yml and other Omnia playbooks handle credentials. Fixes: OMN01D-2517 Signed-off-by: Sujit Jadhav --- discovery/discovery.yml | 16 ++++++++++++---- .../ome_discovery/tasks/get_ome_credentials.yml | 4 ++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/discovery/discovery.yml b/discovery/discovery.yml index 1dec83db67..c84d625cb3 100644 --- a/discovery/discovery.yml +++ b/discovery/discovery.yml @@ -30,10 +30,6 @@ omnia_run_tags: "{{ (ansible_run_tags | default([]) | list + ['discovery']) | unique }}" cacheable: true -- name: Invoke validate_config.yml to perform L1 and L2 validations with discovery tag - ansible.builtin.import_playbook: ../input_validation/validate_config.yml - tags: always - - name: Load discovery configuration hosts: localhost connection: local @@ -43,6 +39,18 @@ file: "{{ input_project_dir }}/discovery_config.yml" failed_when: false + - name: Set enable_bmc_discovery for credential utility + ansible.builtin.set_fact: + enable_bmc_discovery: "{{ true if (discovery_mechanism | default('')) == 'ome' else (enable_bmc_discovery | default(false) | bool) }}" + cacheable: true + +- name: Invoke validate_config.yml to perform L1 and L2 validations with discovery tag + ansible.builtin.import_playbook: ../input_validation/validate_config.yml + tags: always + +- name: Invoke get_config_credentials.yml + ansible.builtin.import_playbook: ../utils/credential_utility/get_config_credentials.yml + - name: BMC Discovery Playbook hosts: localhost connection: local diff --git a/discovery/roles/ome_discovery/tasks/get_ome_credentials.yml b/discovery/roles/ome_discovery/tasks/get_ome_credentials.yml index 425b02697b..6fac5b57c3 100644 --- a/discovery/roles/ome_discovery/tasks/get_ome_credentials.yml +++ b/discovery/roles/ome_discovery/tasks/get_ome_credentials.yml @@ -60,9 +60,9 @@ msg: "ome_ip must be provided in discovery_config.yml." when: ome_ip is not defined or ome_ip == '' -- name: Validate OME credentials are set +- name: Validate OME credentials are available ansible.builtin.fail: - msg: "OME credentials (ome_username, ome_password) must be provided. Run prepare_oim or get_config_credentials first." + msg: "OME credentials (ome_username, ome_password) are not set. Cannot proceed with discovery." when: > ome_username is not defined or ome_username == '' or ome_password is not defined or ome_password == '' From 05de5238bab18e9d60417ef2dc6d8c90b70a6d1f Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Tue, 9 Jun 2026 11:43:39 +0530 Subject: [PATCH 69/78] pulp fix Signed-off-by: Abhishek S A --- local_repo/pulp_cleanup.yml | 49 +++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/local_repo/pulp_cleanup.yml b/local_repo/pulp_cleanup.yml index 9a1f2cde5c..0b2eff9346 100644 --- a/local_repo/pulp_cleanup.yml +++ b/local_repo/pulp_cleanup.yml @@ -49,10 +49,16 @@ cluster_os_version: "{{ software_config.cluster_os_version }}" # Step 1: Input Validation + - name: Set cleanup input presence flag + ansible.builtin.set_fact: + has_cleanup_input: "{{ (cleanup_repos | default([]) | length > 0) or + (cleanup_containers | default([]) | length > 0) or + (cleanup_files | default([]) | length > 0) }}" + - name: Validate input - at least one cleanup type must be specified ansible.builtin.assert: that: - - (cleanup_repos | default([]) | length > 0) or (cleanup_containers | default([]) | length > 0) or (cleanup_files | default([]) | length > 0) + - has_cleanup_input | bool fail_msg: | No cleanup items specified. Please provide at least one of: cleanup_repos: ['repo1', 'repo2'] @@ -60,21 +66,40 @@ cleanup_files: ['file1', 'file2'] # Step 2: User Confirmation - - name: Parse cleanup lists + - name: Parse cleanup lists - repos + ansible.builtin.set_fact: + repo_list: >- + {{ cleanup_repos.split(',') | map('trim') | select | list + if (cleanup_repos is defined and cleanup_repos is string) + else (cleanup_repos | default([])) }} + + - name: Parse cleanup lists - containers + ansible.builtin.set_fact: + container_list: >- + {{ cleanup_containers.split(',') | map('trim') | select | list + if (cleanup_containers is defined and cleanup_containers is string) + else (cleanup_containers | default([])) }} + + - name: Parse cleanup lists - files ansible.builtin.set_fact: - repo_list: "{{ cleanup_repos.split(',') | map('trim') | list if cleanup_repos is string else (cleanup_repos | default([])) }}" - container_list: "{{ cleanup_containers.split(',') | map('trim') | list if cleanup_containers is string else (cleanup_containers | default([])) }}" - file_list: "{{ cleanup_files.split(',') | map('trim') | list if cleanup_files is string else (cleanup_files | default([])) }}" + file_list: >- + {{ cleanup_files.split(',') | map('trim') | select | list + if (cleanup_files is defined and cleanup_files is string) + else (cleanup_files | default([])) }} - name: Display cleanup summary ansible.builtin.debug: msg: - "========== CLEANUP SUMMARY ==========" - "Repositories : {{ (repo_list | default([]) | join(', ')) if repo_list | default([]) | length > 0 else 'None' }}" - - "Containers : {{ (container_list | default([]) | join(', ')) if cleanup_containers | default([]) | length > 0 else 'None' }}" - - "Files : {{ (file_list | default([]) | join(', ')) if cleanup_files | default([]) | length > 0 else 'None' }}" + - "Containers : {{ (container_list | default([]) | join(', ')) if container_list | default([]) | length > 0 else 'None' }}" + - "Files : {{ (file_list | default([]) | join(', ')) if file_list | default([]) | length > 0 else 'None' }}" - "=====================================" + - name: Set force skip confirmation flag + ansible.builtin.set_fact: + force_skip_confirmation: "{{ force | default(false) | bool }}" + - name: Get user confirmation ansible.builtin.pause: prompt: | @@ -83,14 +108,18 @@ This action cannot be undone. Type 'yes' to continue or press Ctrl+C to abort register: user_input - when: not (force | default(false)) | bool + when: not force_skip_confirmation + + - name: Set user confirmed flag + ansible.builtin.set_fact: + user_confirmed: "{{ (user_input.user_input | default('') | lower) == 'yes' }}" - name: Abort if not confirmed ansible.builtin.fail: msg: "Cleanup cancelled by user" when: - - not (force | default(false)) | bool - - user_input.user_input | default('') | lower != 'yes' + - not force_skip_confirmation + - not user_confirmed tasks: # Step 3: Call Python Module From b5dd8b4011998493033fa3113156eaf7b297a1c2 Mon Sep 17 00:00:00 2001 From: Nagachandan P Date: Tue, 9 Jun 2026 11:54:01 +0530 Subject: [PATCH 70/78] Update configure-ib-network.sh.j2 Signed-off-by: Nagachandan P --- .../templates/doca-ofed/configure-ib-network.sh.j2 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/provision/roles/configure_ochami/templates/doca-ofed/configure-ib-network.sh.j2 b/provision/roles/configure_ochami/templates/doca-ofed/configure-ib-network.sh.j2 index 0bebb16c6d..1aadb79172 100644 --- a/provision/roles/configure_ochami/templates/doca-ofed/configure-ib-network.sh.j2 +++ b/provision/roles/configure_ochami/templates/doca-ofed/configure-ib-network.sh.j2 @@ -169,7 +169,7 @@ fi # SLOT-BASED DEVICE SELECTION (Integrated Logic) # ============================== echo "=============================================" -echo "🔍 SLOT-BASED IB DEVICE SELECTION" +echo "SLOT-BASED IB DEVICE SELECTION" echo "=============================================" echo "INFO: Target Slot=$SLOT_NUMBER Port=$PORT_NUMBER IP=$IB_IP/$NETMASK_BITS" echo @@ -229,7 +229,7 @@ for device_name in "${IB_DEVICES[@]:-}"; do echo "DEBUG: $device_name → PCI $dev_pci (slot reports $PCI_ADDR)" if [[ "$dev_pci" == "$PCI_ADDR" ]]; then SELECTED_DEVICE="$device_name" - echo "✅ FULL PCI MATCH → $SELECTED_DEVICE" + echo " FULL PCI MATCH → $SELECTED_DEVICE" break fi done @@ -246,7 +246,7 @@ if [ -z "$SELECTED_DEVICE" ]; then dev_bd=$(echo "$dev_pci" | cut -d. -f1) if [[ "$dev_bd" == "$TARGET_BD" ]]; then SELECTED_DEVICE="$device_name" - echo "✅ BUS:DEVICE MATCH → $SELECTED_DEVICE" + echo " BUS:DEVICE MATCH → $SELECTED_DEVICE" break fi done @@ -301,7 +301,7 @@ echo "SUCCESS: Interface found → $IB_INTERFACE" echo echo "=============================================" -echo "✅ IB DEVICE SELECTION SUCCESS" +echo " IB DEVICE SELECTION SUCCESS" echo "Slot : $SLOT_NUMBER" echo "Port : $PORT_NUMBER" echo "PCI Address : $PCI_ADDR" From 54fd1e085e01917aaf50df5b0954bff0dc1db5ae Mon Sep 17 00:00:00 2001 From: Abhishek S A Date: Tue, 9 Jun 2026 12:52:44 +0530 Subject: [PATCH 71/78] disable warning and failures in ansible.cfg Signed-off-by: Abhishek S A --- ansible.cfg | 5 +++-- build_image_aarch64/ansible.cfg | 2 ++ build_image_x86_64/ansible.cfg | 2 ++ discovery/ansible.cfg | 2 ++ gitlab/ansible.cfg | 2 ++ input_validation/ansible.cfg | 2 ++ local_repo/ansible.cfg | 2 ++ log_collector/ansible.cfg | 20 ++++++++++++++++++++ prepare_oim/ansible.cfg | 2 ++ provision/ansible.cfg | 2 ++ rollback/ansible.cfg | 2 ++ telemetry/ansible.cfg | 2 ++ upgrade/ansible.cfg | 2 ++ utils/ansible.cfg | 2 ++ utils/credential_utility/ansible.cfg | 2 ++ 15 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 log_collector/ansible.cfg diff --git a/ansible.cfg b/ansible.cfg index 259d15f7e2..5d6abe5216 100644 --- a/ansible.cfg +++ b/ansible.cfg @@ -7,8 +7,9 @@ forks = 5 timeout = 180 executable = /bin/bash display_skipped_hosts = false -library = discovery/library:common/library/modules -#inventory = /opt/omnia/omnia_inventory/cluster_layout +deprecation_warnings = false +show_task_path_on_failure = false +library = common/library/modules module_utils = common/library/module_utils [persistent_connection] diff --git a/build_image_aarch64/ansible.cfg b/build_image_aarch64/ansible.cfg index 2ddcf7b8bc..6b59ea34a3 100644 --- a/build_image_aarch64/ansible.cfg +++ b/build_image_aarch64/ansible.cfg @@ -6,6 +6,8 @@ forks = 5 timeout = 180 executable = /bin/bash interpreter_python = /usr/bin/python3 +deprecation_warnings = false +show_task_path_on_failure = false library = ../common/library/modules module_utils = ../common/library/module_utils diff --git a/build_image_x86_64/ansible.cfg b/build_image_x86_64/ansible.cfg index 4d5729de9a..1e6bc196a4 100644 --- a/build_image_x86_64/ansible.cfg +++ b/build_image_x86_64/ansible.cfg @@ -6,6 +6,8 @@ forks = 5 timeout = 180 executable = /bin/bash interpreter_python = /usr/bin/python3 +deprecation_warnings = false +show_task_path_on_failure = false library = ../common/library/modules module_utils = ../common/library/module_utils diff --git a/discovery/ansible.cfg b/discovery/ansible.cfg index b7f517ca21..8573a5fe3c 100644 --- a/discovery/ansible.cfg +++ b/discovery/ansible.cfg @@ -6,6 +6,8 @@ forks = 5 timeout = 180 executable = /bin/bash interpreter_python = /usr/bin/python3 +deprecation_warnings = false +show_task_path_on_failure = false library = library:../common/library/modules module_utils = ../common/library/module_utils diff --git a/gitlab/ansible.cfg b/gitlab/ansible.cfg index 2e53b46c4a..beeac5e8fc 100644 --- a/gitlab/ansible.cfg +++ b/gitlab/ansible.cfg @@ -6,6 +6,8 @@ forks = 5 timeout = 180 executable = /bin/bash interpreter_python = /usr/bin/python3 +deprecation_warnings = false +show_task_path_on_failure = false library = ../common/library/modules module_utils = ../common/library/module_utils diff --git a/input_validation/ansible.cfg b/input_validation/ansible.cfg index f5adeaf305..c1fb788afb 100644 --- a/input_validation/ansible.cfg +++ b/input_validation/ansible.cfg @@ -6,6 +6,8 @@ forks = 5 timeout = 180 executable = /bin/bash interpreter_python = /usr/bin/python3 +deprecation_warnings = false +show_task_path_on_failure = false library = ../common/library/modules module_utils = ../common/library/module_utils diff --git a/local_repo/ansible.cfg b/local_repo/ansible.cfg index 57097c487c..0580c918dc 100644 --- a/local_repo/ansible.cfg +++ b/local_repo/ansible.cfg @@ -6,6 +6,8 @@ forks = 5 timeout = 180 executable = /bin/bash interpreter_python = /usr/bin/python3 +deprecation_warnings = false +show_task_path_on_failure = false library = roles/parse_and_download/library:../common/library/modules module_utils = ../common/library/module_utils diff --git a/log_collector/ansible.cfg b/log_collector/ansible.cfg new file mode 100644 index 0000000000..d4161bca13 --- /dev/null +++ b/log_collector/ansible.cfg @@ -0,0 +1,20 @@ +[defaults] +log_path = /opt/omnia/log/core/playbooks/log_collector.log +remote_tmp = /opt/omnia/tmp/.ansible/tmp/ +host_key_checking = false +forks = 5 +timeout = 180 +executable = /bin/bash +interpreter_python = /usr/bin/python3 +deprecation_warnings = false +show_task_path_on_failure = false +library = ../common/library/modules +module_utils = ../common/library/module_utils + +[persistent_connection] +command_timeout = 180 +connect_timeout = 180 + +[ssh_connection] +retries = 3 +ssh_args = -o ControlMaster=auto -o ControlPersist=60 -o ConnectTimeout=60 diff --git a/prepare_oim/ansible.cfg b/prepare_oim/ansible.cfg index 84969280c3..e42f6fea14 100644 --- a/prepare_oim/ansible.cfg +++ b/prepare_oim/ansible.cfg @@ -6,6 +6,8 @@ forks = 5 timeout = 180 executable = /bin/bash interpreter_python = /usr/bin/python3 +deprecation_warnings = false +show_task_path_on_failure = false library = ../common/library/modules module_utils = ../common/library/module_utils diff --git a/provision/ansible.cfg b/provision/ansible.cfg index b65edbf7ab..56c96f755c 100644 --- a/provision/ansible.cfg +++ b/provision/ansible.cfg @@ -6,6 +6,8 @@ forks = 5 timeout = 180 executable = /bin/bash interpreter_python = /usr/bin/python3 +deprecation_warnings = false +show_task_path_on_failure = false library = library:../common/library/modules module_utils = ../common/library/module_utils diff --git a/rollback/ansible.cfg b/rollback/ansible.cfg index c7116f280a..051ecdb550 100644 --- a/rollback/ansible.cfg +++ b/rollback/ansible.cfg @@ -6,6 +6,8 @@ forks = 5 timeout = 180 executable = /bin/bash interpreter_python = /usr/bin/python3 +deprecation_warnings = false +show_task_path_on_failure = false roles_path = roles:../upgrade/roles:../utils/roles library = ../common/library/modules module_utils = ../common/library/module_utils diff --git a/telemetry/ansible.cfg b/telemetry/ansible.cfg index 4b6f270146..e952b30779 100644 --- a/telemetry/ansible.cfg +++ b/telemetry/ansible.cfg @@ -6,6 +6,8 @@ forks = 5 timeout = 180 executable = /bin/bash interpreter_python = /usr/bin/python3 +deprecation_warnings = false +show_task_path_on_failure = false library = ../common/library/modules module_utils = ../common/library/module_utils diff --git a/upgrade/ansible.cfg b/upgrade/ansible.cfg index ff4775869e..b52a289123 100644 --- a/upgrade/ansible.cfg +++ b/upgrade/ansible.cfg @@ -6,6 +6,8 @@ forks = 5 timeout = 180 executable = /bin/bash interpreter_python = /usr/bin/python3 +deprecation_warnings = false +show_task_path_on_failure = false roles_path = roles:../utils/roles:../prepare_oim/roles library = ../common/library/modules module_utils = ../common/library/module_utils diff --git a/utils/ansible.cfg b/utils/ansible.cfg index 2aec0b37f9..7b1befb966 100644 --- a/utils/ansible.cfg +++ b/utils/ansible.cfg @@ -6,6 +6,8 @@ forks = 5 timeout = 180 executable = /bin/bash interpreter_python = /usr/bin/python3 +deprecation_warnings = false +show_task_path_on_failure = false library = ../common/library/modules module_utils = ../common/library/module_utils diff --git a/utils/credential_utility/ansible.cfg b/utils/credential_utility/ansible.cfg index 505802ff42..92cfff7494 100644 --- a/utils/credential_utility/ansible.cfg +++ b/utils/credential_utility/ansible.cfg @@ -6,6 +6,8 @@ forks = 5 timeout = 180 executable = /bin/bash interpreter_python = /usr/bin/python3 +deprecation_warnings = false +show_task_path_on_failure = false library = ../../common/library/modules module_utils = ../../common/library/module_utils From 803d269bad0d693ad5eda7153e110a2f1f257fb8 Mon Sep 17 00:00:00 2001 From: mithileshreddy04 Date: Tue, 9 Jun 2026 15:11:36 +0530 Subject: [PATCH 72/78] Update reload_cloud_init_data.yml Signed-off-by: mithileshreddy04 --- .../roles/upgrade_openchami/tasks/reload_cloud_init_data.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/upgrade/roles/upgrade_openchami/tasks/reload_cloud_init_data.yml b/upgrade/roles/upgrade_openchami/tasks/reload_cloud_init_data.yml index 098544305f..2099ea28a1 100644 --- a/upgrade/roles/upgrade_openchami/tasks/reload_cloud_init_data.yml +++ b/upgrade/roles/upgrade_openchami/tasks/reload_cloud_init_data.yml @@ -265,7 +265,7 @@ - name: Verify each cloud-init group config was loaded ansible.builtin.shell: | set -o pipefail - /usr/bin/ochami cloud-init group get config {{ item }} 2>&1 | head -3 + /usr/bin/ochami cloud-init group get config {{ item }} 2>&1 environment: "{{ ci_reload_ochami_env }}" loop: "{{ reloaded_ci_group_names }}" changed_when: false From 380d1523df48527d7d44985cce9bf2fb4fda4160 Mon Sep 17 00:00:00 2001 From: Sujit Jadhav Date: Tue, 9 Jun 2026 17:08:19 +0530 Subject: [PATCH 73/78] fix(provision): fix invalid YAML in cloud-init template when dns_enabled is true (OMN01D-2533) (#4720) The Python heredoc (<<'PYEOF') used for patching CoreDNS ConfigMap had content at column 0 (import sys, yaml, etc.), which prematurely terminated the outer YAML literal block scalar (content: |). This caused ochami cloud-init group set to fail with: 'yaml: line 1363: could not find expected :' Replace the heredoc with python3 -c '...' where all lines are properly indented (12 spaces) to stay within the YAML content: | block. Python treats the consistent leading whitespace as base-level indentation, so the code executes correctly. Fixes: OMN01D-2533 Signed-off-by: Sujit Jadhav --- ...ce_kube_control_plane_first_x86_64.yaml.j2 | 37 ++++++++----------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_first_x86_64.yaml.j2 b/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_first_x86_64.yaml.j2 index 377fd8778a..db721a3d86 100644 --- a/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_first_x86_64.yaml.j2 +++ b/provision/roles/configure_ochami/templates/cloud_init/ci-group-service_kube_control_plane_first_x86_64.yaml.j2 @@ -640,27 +640,22 @@ {% if dns_enabled | default(false) | bool %} # Forward cluster-internal DNS domain to OIM CoreDNS # This allows K8s pods to resolve Slurm/MPI hostnames via CoreDNS - python3 - "$cfg" << 'PYEOF' -import sys, yaml -cfg_path = sys.argv[1] -with open(cfg_path) as f: - doc = yaml.safe_load(f) -corefile = doc['data']['Corefile'] -fwd_block = """{{ domain_name }}:53 { - errors - cache 30 - forward . {{ admin_nic_ip }} -} -""" -if '{{ domain_name }}:53' not in corefile: - corefile = fwd_block + corefile - doc['data']['Corefile'] = corefile - with open(cfg_path, 'w') as f: - yaml.dump(doc, f, default_flow_style=False) - print("Added {{ domain_name }} forward zone to K8s CoreDNS") -else: - print("{{ domain_name }} forward zone already present in K8s CoreDNS") -PYEOF + python3 -c ' + import sys, yaml + cfg_path = sys.argv[1] + with open(cfg_path) as f: + doc = yaml.safe_load(f) + corefile = doc["data"]["Corefile"] + fwd_block = "{{ domain_name }}:53 {\n errors\n cache 30\n forward . {{ admin_nic_ip }}\n}\n" + if "{{ domain_name }}:53" not in corefile: + corefile = fwd_block + corefile + doc["data"]["Corefile"] = corefile + with open(cfg_path, "w") as f: + yaml.dump(doc, f, default_flow_style=False) + print("Added {{ domain_name }} forward zone to K8s CoreDNS") + else: + print("{{ domain_name }} forward zone already present in K8s CoreDNS") + ' "$cfg" {% endif %} # Apply the patched ConfigMap From 7966c5cfd9738fded2348816be82d6f9276ef4a6 Mon Sep 17 00:00:00 2001 From: Kratika Patidar Date: Tue, 9 Jun 2026 17:48:47 +0530 Subject: [PATCH 74/78] updating the comments (#4722) Signed-off-by: Kratika_Patidar --- .../openchami/templates/coredhcp/coredhcp.yaml.j2 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/prepare_oim/roles/deploy_containers/openchami/templates/coredhcp/coredhcp.yaml.j2 b/prepare_oim/roles/deploy_containers/openchami/templates/coredhcp/coredhcp.yaml.j2 index d5ded992bb..edfcb8c583 100644 --- a/prepare_oim/roles/deploy_containers/openchami/templates/coredhcp/coredhcp.yaml.j2 +++ b/prepare_oim/roles/deploy_containers/openchami/templates/coredhcp/coredhcp.yaml.j2 @@ -44,7 +44,9 @@ server4: # 1. Pull the new coresmd image: podman pull ghcr.io/openchami/coresmd:v0.6.x # 2. Comment out the single-subnet coresmd and bootloop lines above # 3. Uncomment the multi-subnet coresmd and bootloop blocks below - # 4. Restart services: systemctl restart openchami.target + # 4. Replace the new coresmd image version in files: /etc/containers/systemd/coresmd-coredhcp.container /etc/containers/systemd/coresmd-coredns.container with the old version + # 5. Reload daemon: systemctl daemon-reload + # 6. Restart services: systemctl restart openchami.target # ------------------------------------------------------------------- # - coresmd: | # svc_base_uri=https://{{ cluster_name }}.{{ cluster_domain }}:8443 From 03fda4603809de0012b730cb0e093507454eaaa2 Mon Sep 17 00:00:00 2001 From: Mithilesh Reddy Date: Tue, 9 Jun 2026 17:49:29 +0530 Subject: [PATCH 75/78] postgres backup idempotency fix (#4721) Signed-off-by: mithileshreddy04 --- .../tasks/rollback_status.yml | 8 +++ .../roles/rollback_openchami/vars/main.yml | 4 ++ .../tasks/backup_openchami.yml | 58 +++++++++++++++++-- .../tasks/upgrade_status.yml | 9 +++ upgrade/roles/upgrade_openchami/vars/main.yml | 8 +++ 5 files changed, 82 insertions(+), 5 deletions(-) diff --git a/rollback/roles/rollback_openchami/tasks/rollback_status.yml b/rollback/roles/rollback_openchami/tasks/rollback_status.yml index c04222fdc4..c17d6ec8df 100644 --- a/rollback/roles/rollback_openchami/tasks/rollback_status.yml +++ b/rollback/roles/rollback_openchami/tasks/rollback_status.yml @@ -35,6 +35,14 @@ when: - not (openchami_rollback_failed | default(false) | bool) + - name: Clear pg_dump backup lock after successful rollback + ansible.builtin.file: + path: "{{ rollback_backup_dir | default(rollback_backup_dir_default) }}/{{ pgdump_backup_lock }}" + state: absent + when: + - not (openchami_rollback_failed | default(false) | bool) + failed_when: false + - name: Fail the play if rollback failed ansible.builtin.fail: msg: "OpenCHAMI rollback failed. See above for details." diff --git a/rollback/roles/rollback_openchami/vars/main.yml b/rollback/roles/rollback_openchami/vars/main.yml index c6672f1d35..73cf84e88e 100644 --- a/rollback/roles/rollback_openchami/vars/main.yml +++ b/rollback/roles/rollback_openchami/vars/main.yml @@ -94,6 +94,10 @@ rollback_v21_critical_containers: - registry - minio-server +# PostgreSQL backup lock file — cleared after successful rollback to allow +# fresh pg_dump on next upgrade cycle. See upgrade_openchami vars for details. +pgdump_backup_lock: "openchami/postgresql_backup/pgdump_v21_backup.lock" + # Backup sub-paths (relative to rollback_backup_dir) # Created by omnia.sh --upgrade (backup_openchami_data function) backup_quadlets_subpath: "openchami/quadlets" diff --git a/upgrade/roles/upgrade_openchami/tasks/backup_openchami.yml b/upgrade/roles/upgrade_openchami/tasks/backup_openchami.yml index 72742b968a..3da3739141 100644 --- a/upgrade/roles/upgrade_openchami/tasks/backup_openchami.yml +++ b/upgrade/roles/upgrade_openchami/tasks/backup_openchami.yml @@ -107,6 +107,23 @@ - name: Retrieve PostgreSQL credentials ansible.builtin.include_tasks: retrieve_postgres_credentials.yml + # ── Check if a valid pg_dump backup is already locked ────────────── + # If a previous upgrade run already captured the v2.1 pg_dump, a lock + # file prevents this run from overwriting it with a (possibly v2.2) + # schema dump. The lock is cleared on successful upgrade or rollback. + - name: Check if pg_dump backup lock exists + ansible.builtin.stat: + path: "{{ openchami_backup_dir }}/{{ pgdump_backup_lock }}" + register: pgdump_lock_stat + + - name: Skip pg_dump — existing v2.1 backup is locked + ansible.builtin.debug: + msg: >- + PostgreSQL backup lock found — a valid pre-upgrade pg_dump already + exists from a previous run. Skipping pg_dump to preserve the + original v2.1 database backup for rollback safety. + when: pgdump_lock_stat.stat.exists | default(false) + # ── Verify disk space for pg_dump (df on backup target) ──────────── # /opt/omnia is bind-mounted — df works locally inside omnia_core. - name: Check available disk space on backup target @@ -115,6 +132,7 @@ df -BG "/opt/omnia" | tail -1 | awk '{print $4}' | sed 's/G//' register: backup_disk_space_gb changed_when: false + when: not (pgdump_lock_stat.stat.exists | default(false)) - name: Get current PostgreSQL database size ansible.builtin.shell: | @@ -127,6 +145,7 @@ delegate_to: oim delegate_facts: true connection: ssh + when: not (pgdump_lock_stat.stat.exists | default(false)) - name: Display disk space status ansible.builtin.debug: @@ -135,6 +154,7 @@ - "Backup target: {{ openchami_backup_dir }}" - "Available space: {{ backup_disk_space_gb.stdout | default('unknown') | trim }}GB" - "PostgreSQL database size: {{ postgres_db_size.stdout | default('unknown') | trim }}" + when: not (pgdump_lock_stat.stat.exists | default(false)) - name: Verify sufficient disk space for backup ansible.builtin.assert: @@ -146,8 +166,9 @@ Minimum required: 5GB Please free up space on /opt/omnia before proceeding. success_msg: "Sufficient disk space available for backup ({{ backup_disk_space_gb.stdout | default('0') | trim }}GB)" + when: not (pgdump_lock_stat.stat.exists | default(false)) - # ── PostgreSQL pg_dump ───────────────────────────────────────────── + # ── PostgreSQL pg_dump (skipped if lock exists) ──────────────────── # Dump the hmsds database inside the postgres container, then copy # the SQL file to the OIM host-side shared backup path. # For prepare_oim-only scenarios the DB may be empty — pg_dump still @@ -167,6 +188,7 @@ delegate_to: oim delegate_facts: true connection: ssh + when: not (pgdump_lock_stat.stat.exists | default(false)) - name: Copy pg_dump from postgres container to shared backup path ansible.builtin.shell: | @@ -176,7 +198,9 @@ register: pgdump_copy_result changed_when: pgdump_copy_result.rc == 0 failed_when: false - when: pgdump_result.rc == 0 + when: + - not (pgdump_lock_stat.stat.exists | default(false)) + - pgdump_result.rc | default(1) == 0 delegate_to: oim delegate_facts: true connection: ssh @@ -189,7 +213,9 @@ -- stderr: {{ pgdump_result.stderr | default('') | trim }} dest: "{{ openchami_backup_dir }}/openchami/postgresql_backup/openchami.sql" mode: "{{ file_permissions_644 }}" - when: pgdump_result.rc | default(1) != 0 + when: + - not (pgdump_lock_stat.stat.exists | default(false)) + - pgdump_result.rc | default(1) != 0 - name: Display pg_dump warning if it failed ansible.builtin.debug: @@ -197,7 +223,9 @@ WARNING: pg_dump returned rc={{ pgdump_result.rc | default('unknown') }}. This is acceptable for prepare_oim-only setups with empty databases. stderr: {{ pgdump_result.stderr | default('none') | trim }} - when: pgdump_result.rc | default(1) != 0 + when: + - not (pgdump_lock_stat.stat.exists | default(false)) + - pgdump_result.rc | default(1) != 0 - name: Clean up temporary pg_dump file inside postgres container ansible.builtin.shell: | @@ -208,19 +236,39 @@ delegate_to: oim delegate_facts: true connection: ssh + when: not (pgdump_lock_stat.stat.exists | default(false)) - name: Verify pg_dump output exists locally ansible.builtin.stat: path: "{{ openchami_backup_dir }}/openchami/postgresql_backup/openchami.sql" register: pgdump_stat + # ── Create pg_dump backup lock after successful dump ──────────────── + - name: Create pg_dump backup lock to protect v2.1 backup + ansible.builtin.copy: + content: | + # PostgreSQL backup lock — do not delete manually. + # This lock prevents upgrade re-runs from overwriting the + # original v2.1 pg_dump with a v2.2 schema dump. + # Cleared automatically on successful upgrade or rollback. + locked_at: "{{ ansible_date_time.iso8601 }}" + backup_file: "{{ openchami_backup_dir }}/openchami/postgresql_backup/openchami.sql" + backup_size_bytes: {{ pgdump_stat.stat.size | default(0) }} + dest: "{{ openchami_backup_dir }}/{{ pgdump_backup_lock }}" + mode: "{{ file_permissions_644 }}" + when: + - not (pgdump_lock_stat.stat.exists | default(false)) + - pgdump_stat.stat.exists | default(false) + - pgdump_stat.stat.size | default(0) | int > 500 + - name: Display pg_dump backup status ansible.builtin.debug: verbosity: 1 msg: >- PostgreSQL backup: {{ 'created' if pgdump_stat.stat.exists | default(false) else 'MISSING' }} ({{ pgdump_stat.stat.size | default(0) }} bytes, - pg_dump rc={{ pgdump_result.rc | default('unknown') }}) + pg_dump rc={{ pgdump_result.rc | default('skipped (locked)') }}, + lock: {{ 'active' if pgdump_lock_stat.stat.exists | default(false) else 'created' }}) # ── Backup entire /etc/openchami directory ────────────────────────── # Back up the complete /etc/openchami directory to the backup folder. diff --git a/upgrade/roles/upgrade_openchami/tasks/upgrade_status.yml b/upgrade/roles/upgrade_openchami/tasks/upgrade_status.yml index 9beadc2572..3b6f3d540e 100644 --- a/upgrade/roles/upgrade_openchami/tasks/upgrade_status.yml +++ b/upgrade/roles/upgrade_openchami/tasks/upgrade_status.yml @@ -30,6 +30,15 @@ - not (openchami_upgrade_failed | default(false) | bool) - openchami_deployed | default(false) | bool + - name: Clear pg_dump backup lock after successful upgrade + ansible.builtin.file: + path: "{{ openchami_backup_dir | default(openchami_backup_dir_default) }}/{{ pgdump_backup_lock }}" + state: absent + when: + - not (openchami_upgrade_failed | default(false) | bool) + - openchami_deployed | default(false) | bool + failed_when: false + - name: Report upgrade skipped (not deployed) ansible.builtin.debug: msg: "{{ upgrade_messages.deployment.not_deployed }}" diff --git a/upgrade/roles/upgrade_openchami/vars/main.yml b/upgrade/roles/upgrade_openchami/vars/main.yml index 5f0b9bca95..a3b70aeb13 100644 --- a/upgrade/roles/upgrade_openchami/vars/main.yml +++ b/upgrade/roles/upgrade_openchami/vars/main.yml @@ -41,6 +41,14 @@ openchami_haproxy_fallback: "https://localhost:8443" # Default backup directory (used when manifest.backup_dir is not available) openchami_backup_dir_default: "/opt/omnia/backups/upgrade/version_2.1.0.0" +# PostgreSQL backup lock file — prevents pg_dump from being overwritten on +# upgrade re-runs. If the upgrade fails AFTER db migration (schema is now v2.2) +# and the user re-runs the upgrade, a second pg_dump would capture v2.2 schema +# and overwrite the original v2.1 backup, making rollback impossible. +# The lock is created after the first successful pg_dump and cleared only when +# the upgrade succeeds fully or a rollback completes. +pgdump_backup_lock: "openchami/postgresql_backup/pgdump_v21_backup.lock" + # PostgreSQL database settings # These are fallback defaults only — actual credentials are dynamically # retrieved from the postgres container's POSTGRES_MULTIPLE_DATABASES env var From eb2497a6e64bbf1da27d0334ba0f4e0228490d61 Mon Sep 17 00:00:00 2001 From: Jagadeesh N V Date: Tue, 9 Jun 2026 22:53:17 +0530 Subject: [PATCH 76/78] Fixed reading of build_stream_terminal ------------- increased reboot timeout for slurm login_compiler_* in rollback restoring slurm conf fro rollback Signed-off-by: Jagadeesh N V --- rollback/playbooks/rollback_slurm.yml | 20 +++++++---- .../rollback_slurm/tasks/slurm_backup.yml | 36 ++++++++----------- rollback/rollback.yml | 5 +-- 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/rollback/playbooks/rollback_slurm.yml b/rollback/playbooks/rollback_slurm.yml index 8d0bc9374c..9cb4b01833 100644 --- a/rollback/playbooks/rollback_slurm.yml +++ b/rollback/playbooks/rollback_slurm.yml @@ -54,11 +54,11 @@ (manifest.component_status[component_name] | default('pending') == 'completed') or (software_config.softwares | selectattr('name', 'equalto', 'slurm_custom') | list | length == 0) }} + cacheable: true - name: "Handle BuildStream terminal gate (C-24)" - when: - - hostvars['localhost']['build_stream_terminal'] | default(false) | bool - - manifest.component_status.build_stream | default('pending') == 'completed' + when: ((build_stream_terminal | default(false) | bool) or + (manifest.component_status.build_stream | default('pending') == 'completed')) block: - name: "Mark as skipped — BuildStream terminal gate active" ansible.builtin.copy: @@ -75,6 +75,7 @@ - name: "Set slurm_skip — BuildStream terminal gate active" ansible.builtin.set_fact: slurm_skip: true + cacheable: true - name: "Skip — BuildStream terminal gate active" ansible.builtin.meta: end_play @@ -220,8 +221,8 @@ [ROLLBACK] SLURM CLUSTER — PRE-ROLLBACK NOTICE ============================================== 1. NODE REBOOT — All Slurm/login nodes will reboot. - 2. NFS MOUNTS — Omnia 2.1 mount points are preserved. Do not modify during rollback. - 3. ROLLBACK SCOPE — New NFS mounts(Ex: VAST) added during upgrade will NOT be retained on rollback. + 2. NFS MOUNTS — New NFS mounts(Ex: VAST) added during upgrade will NOT be retained on rollback. + 3. SLURM CONF - Slurm configuration files will be restored to their original state. - name: Read oim_metadata for oim_node_name (standalone fallback) ansible.builtin.include_vars: @@ -271,13 +272,20 @@ serial: 100% strategy: host_pinned vars: - reboot_timeout: 1200 + default_reboot_timeout: 1200 ssh_timeout: 60 + reboot_time_map: + login_compiler_node_x86_64: 1800 + login_compiler_node_aarch64: 1800 tasks: - name: Skip if slurm upgrade not needed ansible.builtin.meta: end_play when: hostvars['localhost']['slurm_skip'] | default(false) | bool + - name: Set reboot timeout based on node type + ansible.builtin.set_fact: + reboot_timeout: "{{ reboot_time_map.get(group_names[0], default_reboot_timeout) }}" + - name: Initialize state ansible.builtin.set_fact: node_status: diff --git a/rollback/roles/rollback_slurm/tasks/slurm_backup.yml b/rollback/roles/rollback_slurm/tasks/slurm_backup.yml index df9e96ed7f..1e04de6052 100644 --- a/rollback/roles/rollback_slurm/tasks/slurm_backup.yml +++ b/rollback/roles/rollback_slurm/tasks/slurm_backup.yml @@ -123,25 +123,17 @@ msg: "SLURM: MySQL datadir not found in nfs" when: not is_mysql_datadir -- name: Create backup directory - when: backup_slurm_nfs_contents - block: - - name: Create backup directory - ansible.builtin.file: - path: "{{ slurm_2_1_backup_dir | replace('/opt', oim_shared_path) }}/slurm_21_nfs_backups" - state: directory - owner: root - group: root - mode: '0755' - - - name: Copy slurm data - ansible.builtin.copy: - src: "{{ slurm_nfs_mounted_path }}" - dest: "{{ slurm_2_1_backup_dir | replace('/opt', oim_shared_path) }}/slurm_21_nfs_backups" # remove the /opt prefix with {{ oim_shared_path }} - remote_src: true - mode: preserve - - # - name: Synchronize slurm data - # ansible.builtin.synchronize: - # src: "{{ slurm_nfs[0].client_share_path }}/slurm" - # dest: "{{ slurm_2_1_backup_dir | replace('/opt', oim_shared_path) }}/slurm_21_nfs_backups" +- name: Set path for slurm backup + ansible.builtin.set_fact: + up_slurm_backup_path: "{{ slurm_nfs[0].client_share_path }}/slurm_backups/preupgrade_{{ rollback_manifest.target_version }}_backup" + +- name: Restore slurm etc/slurm directories + ansible.builtin.copy: + src: "{{ up_slurm_backup_path }}/{{ item }}/" + dest: "{{ ctld_dir_nfs }}/{{ item }}/" + remote_src: true + mode: preserve + register: conf_backup_result + loop: + - "etc/slurm" + ignore_errors: true diff --git a/rollback/rollback.yml b/rollback/rollback.yml index 9f75363ced..24c812f713 100644 --- a/rollback/rollback.yml +++ b/rollback/rollback.yml @@ -254,9 +254,10 @@ ansible.builtin.set_fact: build_stream_terminal: >- {{ (build_stream_config.enable_build_stream | default(false) | bool) - and (upgrade_manifest is defined) + or ((upgrade_manifest is defined) and (upgrade_manifest.component_status is defined) - and (upgrade_manifest.component_status.build_stream | default('pending') in ['completed', 'skipped']) }} + and (upgrade_manifest.component_status.build_stream | default('pending') in ['completed', 'skipped'])) }} + cacheable: true - name: Identify components skipped by BuildStream terminal gate ansible.builtin.set_fact: From 854746791b56d5bc9d313774a4706136f389e52c Mon Sep 17 00:00:00 2001 From: Jagadeesh N V <39791839+jagadeeshnv@users.noreply.github.com> Date: Tue, 9 Jun 2026 23:04:54 +0530 Subject: [PATCH 77/78] Update main.yml Signed-off-by: Jagadeesh N V <39791839+jagadeeshnv@users.noreply.github.com> --- provision/roles/configure_ochami/vars/main.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/provision/roles/configure_ochami/vars/main.yml b/provision/roles/configure_ochami/vars/main.yml index 70f7b34bf2..982a164460 100644 --- a/provision/roles/configure_ochami/vars/main.yml +++ b/provision/roles/configure_ochami/vars/main.yml @@ -152,6 +152,3 @@ ochami_discovery_failed_msg: | 4. Check ochami CLI: /usr/bin/ochami smd service status 5. Restart openchami.target: systemctl restart openchami.target 6. Rerun provision.yml: ansible-playbook provision/provision.yml - -# templates/hpc_tools/install_cuda_driver.sh.j2 - add a hyphen so that empty will also work for latest -cuda_driver_version: "-3:580.159.04-1.el10" From 92f14aa402083b17736151be3ac5e7fc87fe9490 Mon Sep 17 00:00:00 2001 From: pullan1 Date: Wed, 10 Jun 2026 10:52:30 +0530 Subject: [PATCH 78/78] Ansible 2.20 fixes (#4717) * localrepo checkmarx fixes Signed-off-by: pullan1 * Checkmarx fixes Signed-off-by: pullan1 * vast metrics relabeling fix Signed-off-by: pullan1 * ansible 2.20 fixes Signed-off-by: pullan1 --------- Signed-off-by: pullan1 --- local_repo/pulp_cleanup.yml | 13 ++++++++++--- .../pulp/tasks/create_pulp_config_https.yml | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/local_repo/pulp_cleanup.yml b/local_repo/pulp_cleanup.yml index 0b2eff9346..04e4fda5ae 100644 --- a/local_repo/pulp_cleanup.yml +++ b/local_repo/pulp_cleanup.yml @@ -106,7 +106,8 @@ WARNING: This will permanently delete the specified artifacts. This action cannot be undone. - Type 'yes' to continue or press Ctrl+C to abort + Type 'yes' to continue or 'no' to abort + echo: true register: user_input when: not force_skip_confirmation @@ -114,9 +115,15 @@ ansible.builtin.set_fact: user_confirmed: "{{ (user_input.user_input | default('') | lower) == 'yes' }}" + - name: Display cancellation message + ansible.builtin.debug: + msg: "Cleanup cancelled by user. Exiting." + when: + - not force_skip_confirmation + - not user_confirmed + - name: Abort if not confirmed - ansible.builtin.fail: - msg: "Cleanup cancelled by user" + ansible.builtin.meta: end_play when: - not force_skip_confirmation - not user_confirmed diff --git a/prepare_oim/roles/deploy_containers/pulp/tasks/create_pulp_config_https.yml b/prepare_oim/roles/deploy_containers/pulp/tasks/create_pulp_config_https.yml index f015f6ce74..a548bf6ed6 100644 --- a/prepare_oim/roles/deploy_containers/pulp/tasks/create_pulp_config_https.yml +++ b/prepare_oim/roles/deploy_containers/pulp/tasks/create_pulp_config_https.yml @@ -128,7 +128,7 @@ - name: Record current timestamp in track file ansible.builtin.copy: dest: "{{ track_file_path }}" - content: "Timestamp: {{ ansible_date_time.iso8601 }}" + content: "Timestamp: {{ ansible_facts.date_time.iso8601 }}" mode: "{{ logs_dir_permission }}" # CERT GENERATION USING community.crypto x509_certificate MODULE