From ef372d702c4c8044bfca36114e3ef2828d76abff Mon Sep 17 00:00:00 2001 From: Sujit Jadhav Date: Mon, 15 Jun 2026 15:05:38 +0530 Subject: [PATCH 1/2] fix: ADMIN_IP subnet consistency validation - remove HA duplicate and add file path in error (#4753) Fixes OMN01D-2552: Input validation missing for PXE mapping ADMIN_IP and network_spec additional_subnets consistency checks. Issues fixed: 1. Remove duplicate subnet consistency check from high_availability_validation.py that caused the same error to appear against both provision_config.yml and high_availability_config.yml. The check now runs only in provision_validation. 2. Include the PXE mapping file path in the error_key so the rendered message shows which file contains the misconfigured ADMIN_IP: [pxe_mapping_file_path]: ADMIN_IP subnet consistency Files changed: - high_availability_validation.py: removed validate_all_host_ips_same_subnet_as_vip call and unused import - provision_validation.py: prepend pxe_mapping_file_path to error_key - test_additional_subnets_validation.py: assert file path present in error_key Signed-off-by: Sujit Jadhav --- .../validation_flows/high_availability_validation.py | 4 ---- .../input_validation/validation_flows/provision_validation.py | 2 +- .../modules/tests/test_additional_subnets_validation.py | 3 +++ 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py b/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py index 1d635eaec3..5f5458cb65 100644 --- a/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/high_availability_validation.py @@ -23,7 +23,6 @@ from ansible.module_utils.input_validation.common_utils import en_us_validation_msg from ansible.module_utils.input_validation.validation_flows.vip_pxe_validation import ( validate_vip_vs_pxe_mapping_host_ips, - validate_all_host_ips_same_subnet_as_vip ) file_names = config.files @@ -401,9 +400,6 @@ def validate_vip_address( if pxe_mapping_file_path: # Check VIP doesn't conflict with any HOST_IP in PXE mapping validate_vip_vs_pxe_mapping_host_ips(errors, config_type, vip_address, pxe_mapping_file_path) - - # Check all HOST_IPs are in same subnet as VIP - validate_all_host_ips_same_subnet_as_vip(errors, vip_address, pxe_mapping_file_path, admin_netmaskbits, additional_subnets, oim_admin_ip) def validate_service_k8s_cluster_ha( errors, diff --git a/common/library/module_utils/input_validation/validation_flows/provision_validation.py b/common/library/module_utils/input_validation/validation_flows/provision_validation.py index 23cdd1efb4..b897ba6c63 100644 --- a/common/library/module_utils/input_validation/validation_flows/provision_validation.py +++ b/common/library/module_utils/input_validation/validation_flows/provision_validation.py @@ -913,7 +913,7 @@ def validate_pxe_admin_ips_subnet_consistency( if not in_additional: errors.append( create_error_msg( - "ADMIN_IP subnet consistency", + f"{pxe_mapping_file_path}: ADMIN_IP subnet consistency", host_ip, f"Node ADMIN_IP {host_ip} does not belong to the primary " f"admin subnet ({oim_admin_ip}/{admin_netmaskbits}) or any " diff --git a/common/library/modules/tests/test_additional_subnets_validation.py b/common/library/modules/tests/test_additional_subnets_validation.py index 62d60b112f..ddaf0dea85 100644 --- a/common/library/modules/tests/test_additional_subnets_validation.py +++ b/common/library/modules/tests/test_additional_subnets_validation.py @@ -314,6 +314,7 @@ def test_admin_ip_in_undefined_subnet(self): ) self.assertTrue(len(errors) > 0) self.assertTrue("192.168.100.10" in str(errors)) + self.assertIn(pxe_file, errors[0]["error_key"]) finally: os.unlink(pxe_file) @@ -333,6 +334,7 @@ def test_admin_ip_in_additional_subnet_undefined_in_network_spec(self): ) self.assertTrue(len(errors) > 0) self.assertTrue("10.40.2.10" in str(errors)) + self.assertIn(pxe_file, errors[0]["error_key"]) finally: os.unlink(pxe_file) @@ -347,6 +349,7 @@ def test_mixed_valid_and_invalid_admin_ips(self): self.assertEqual(len(errors), 1) self.assertTrue("192.168.100.10" in str(errors)) self.assertFalse("172.16.0.10" in str(errors)) + self.assertIn(pxe_file, errors[0]["error_key"]) finally: os.unlink(pxe_file) From 5b2d1fa48be017cfb7b1e4b138d03a67c89c82f2 Mon Sep 17 00:00:00 2001 From: Venu-p1 <236371043+Venu-p1@users.noreply.github.com> Date: Mon, 15 Jun 2026 18:50:18 +0530 Subject: [PATCH 2/2] Fix/rollback buildstream code (#4755) * fix BuildStream rollback source directory restoration - Remove existing 2.2 source directory completely before restoring from backup - Remove failed_when: false from source directory restoration task to catch errors Signed-off-by: venu <236371043+Venu-p1@users.noreply.github.com> * fix BuildStream rollback watcher service restart and validation Signed-off-by: venu <236371043+Venu-p1@users.noreply.github.com> * refactor BuildStream rollback watcher service validation to use systemd_service module Signed-off-by: venu <236371043+Venu-p1@users.noreply.github.com> --------- Signed-off-by: venu <236371043+Venu-p1@users.noreply.github.com> --- .../tasks/buildstream.yml | 85 ++++++++++++++++++- .../roles/rollback_buildstream/tasks/main.yml | 10 +++ 2 files changed, 91 insertions(+), 4 deletions(-) diff --git a/rollback/roles/rollback_buildstream/tasks/buildstream.yml b/rollback/roles/rollback_buildstream/tasks/buildstream.yml index 80e24d8322..3d4e01ff74 100644 --- a/rollback/roles/rollback_buildstream/tasks/buildstream.yml +++ b/rollback/roles/rollback_buildstream/tasks/buildstream.yml @@ -16,14 +16,16 @@ # buildstream.yml — All BuildStream Tasks (Action Dispatch) # ============================================================================ # Available _action values: -# validate_backup — stat check backup quadlet exists, fail if missing -# stop — Restore: systemd stop; Uninstall: podman stop +# validate_backup — stat check backup quadlet + source dir exist, fail if missing +# stop — Restore: stop watcher THEN buildstream; Uninstall: podman stop # restore_quadlet — copy backup quadlet to /etc/containers/systemd/ # restore_config — copy backup build_stream_config.yml +# restore_source — delete 2.2 source dir, restore 2.1 from backup # remove_quadlet — file absent + set enable_build_stream: false # start — systemd start + URI health check retry loop +# restart_watcher — Restore: restart playbook_watcher.service + verify active # cleanup_full — Uninstall: stop/remove watcher, remove dirs, images, omnia.target entries, automation -# validate_health — Restore: URI health check; Uninstall: confirm no container +# validate_health — Restore: URI health check + watcher active; Uninstall: confirm no container # # All OIM host operations use delegate_to: oim + connection: ssh # ============================================================================ @@ -45,7 +47,37 @@ - _action == 'validate_backup' - not (_bs_quadlet_backup_stat.stat.exists | default(false)) +- name: "BuildStream | Validate source directory backup exists" + ansible.builtin.stat: + path: "{{ backup_path_from_oim }}/buildstream/build_stream" + register: _bs_source_backup_stat + delegate_to: oim + delegate_facts: true + connection: ssh + when: _action == 'validate_backup' + +- name: "BuildStream | Fail if source directory backup missing" + ansible.builtin.fail: + msg: "BuildStream source directory backup not found: {{ backup_path_from_oim }}/buildstream/build_stream" + when: + - _action == 'validate_backup' + - not (_bs_source_backup_stat.stat.exists | default(false)) + # ── stop ───────────────────────────────────────────────────────────────────── +# Restore path: stop watcher FIRST — it holds an open Python process inside +# the build_stream source directory that restore_source will delete. +- name: "BuildStream | Stop playbook_watcher service (Restore)" + ansible.builtin.systemd_service: + name: "{{ buildstream_orchestrator_watcher_service }}" + state: stopped + failed_when: false + delegate_to: oim + delegate_facts: true + connection: ssh + when: + - _action == 'stop' + - build_stream_upgrade_path == 'upgrade_existing' + - name: "BuildStream | Stop service (Restore)" ansible.builtin.systemd_service: name: "{{ buildstream_orchestrator_bs_service }}" @@ -96,6 +128,15 @@ when: _action == 'restore_config' # ── restore_source ─────────────────────────────────────────────────────────── +- name: "BuildStream | Remove existing 2.2 source directory completely" + ansible.builtin.file: + path: "{{ omnia_nfs_share }}/build_stream" + state: absent + delegate_to: oim + delegate_facts: true + connection: ssh + when: _action == 'restore_source' + - name: "BuildStream | Restore build_stream source directory from backup" ansible.builtin.copy: src: "{{ backup_path_from_oim }}/buildstream/build_stream" @@ -105,7 +146,6 @@ delegate_to: oim delegate_facts: true connection: ssh - failed_when: false when: _action == 'restore_source' # ── remove_quadlet ─────────────────────────────────────────────────────────── @@ -304,6 +344,30 @@ connection: ssh when: _action == 'cleanup_full' +# ── restart_watcher ────────────────────────────────────────────────────────── +# Runs AFTER daemon_reload (Phase 5) and after buildstream is started (Phase 6). +# The 2.1 Python script is now in place; a clean restart picks it up. +- name: "BuildStream | Restart playbook_watcher service (Restore)" + ansible.builtin.systemd_service: + name: "{{ buildstream_orchestrator_watcher_service }}" + state: restarted + enabled: true + delegate_to: oim + delegate_facts: true + connection: ssh + when: _action == 'restart_watcher' + +- name: "BuildStream | Verify playbook_watcher service is active" + ansible.builtin.systemd_service: + name: "{{ buildstream_orchestrator_watcher_service }}" + register: _watcher_restart_check + changed_when: false + failed_when: not (_watcher_restart_check.status.ActiveState | default('') == 'active') + delegate_to: oim + delegate_facts: true + connection: ssh + when: _action == 'restart_watcher' + # ── validate_health ────────────────────────────────────────────────────────── - name: "BuildStream | Validate API health (Restore)" ansible.builtin.uri: @@ -318,6 +382,19 @@ - _action == 'validate_health' - build_stream_upgrade_path == 'upgrade_existing' +- name: "BuildStream | Validate playbook_watcher service is active (Restore)" + ansible.builtin.systemd_service: + name: "{{ buildstream_orchestrator_watcher_service }}" + register: _watcher_health_check + changed_when: false + failed_when: not (_watcher_health_check.status.ActiveState | default('') == 'active') + delegate_to: oim + delegate_facts: true + connection: ssh + when: + - _action == 'validate_health' + - build_stream_upgrade_path == 'upgrade_existing' + - name: "BuildStream | Validate no container running (Uninstall)" ansible.builtin.command: "podman ps --filter name={{ buildstream_orchestrator_bs_container }} --format '{{ '{{' }}.Names{{ '}}' }}'" register: _bs_container_validate diff --git a/rollback/roles/rollback_buildstream/tasks/main.yml b/rollback/roles/rollback_buildstream/tasks/main.yml index 7c295ccb3b..2c5024b7af 100644 --- a/rollback/roles/rollback_buildstream/tasks/main.yml +++ b/rollback/roles/rollback_buildstream/tasks/main.yml @@ -160,6 +160,16 @@ - build_stream_rollback - start + # Restore: Restart watcher with 2.1 source now in place. + - name: "Phase 6 | Restart playbook_watcher (Restore)" + ansible.builtin.include_tasks: buildstream.yml + vars: + _action: restart_watcher + when: build_stream_upgrade_path == 'upgrade_existing' + tags: + - build_stream_rollback + - start + # Uninstall: Full cleanup (containers, dirs, images, watcher, automation, omnia.target) - name: "Phase 6 | Full BuildStream cleanup (Uninstall)" ansible.builtin.include_tasks: buildstream.yml