diff --git a/autobot-slm-backend/ansible/playbooks/provision-fleet-roles.yml b/autobot-slm-backend/ansible/playbooks/provision-fleet-roles.yml index 32f885976..8576b761b 100644 --- a/autobot-slm-backend/ansible/playbooks/provision-fleet-roles.yml +++ b/autobot-slm-backend/ansible/playbooks/provision-fleet-roles.yml @@ -201,41 +201,45 @@ gather_facts: false tasks: - - name: "SLM | Check if node has both frontend and slm_manager roles" + # Detect co-location on the SLM manager host (#3426): + # 1. Wizard sets slm_colocated_frontend=True on 00-SLM-Manager when any + # local node carries the 'frontend' role. + # 2. Standalone runs (slm-nodes.yml) fall back to the package.json stat. + # Only runs on slm_server/slm group hosts; skipped on all others. + - name: "SLM | Determine if this is the SLM manager host" ansible.builtin.set_fact: - _is_slm_frontend_colocated: >- + _is_slm_manager: >- {{ - ( - 'frontend' in (node_roles | default([])) or - 'autobot-frontend' in (node_roles | default([])) or - inventory_hostname in groups.get('frontend', []) or - inventory_hostname in groups.get('02-Frontend', []) - ) and ( - 'slm-backend' in (node_roles | default([])) or - 'slm_manager' in (node_roles | default([])) or - inventory_hostname in groups.get('slm', []) or - inventory_hostname in groups.get('slm_server', []) - ) + inventory_hostname in groups.get('slm_server', []) or + inventory_hostname in groups.get('slm', []) or + 'slm-backend' in (node_roles | default([])) or + 'slm_manager' in (node_roles | default([])) }} tags: ['frontend', 'slm-nginx', 'provision'] - - name: "SLM | Detect co-located user frontend after frontend deploy" + - name: "SLM | Stat user frontend package.json for fallback co-location detection" ansible.builtin.stat: path: "{{ frontend_dist_dir | default('/opt/autobot/autobot-frontend/dist') | dirname }}/package.json" register: _post_frontend_colocated_check - when: _is_slm_frontend_colocated | bool + when: _is_slm_manager | bool tags: ['frontend', 'slm-nginx', 'provision'] - - name: "SLM | Set co-located frontend flag" + - name: "SLM | Resolve co-location flag (wizard var or package.json fallback)" ansible.builtin.set_fact: - slm_colocated_frontend: "{{ _post_frontend_colocated_check.stat.exists | default(false) }}" - when: _is_slm_frontend_colocated | bool + _is_slm_frontend_colocated: >- + {{ + _is_slm_manager | bool and ( + slm_colocated_frontend | default(false) | bool or + _post_frontend_colocated_check.stat.exists | default(false) | bool + ) + }} + when: _is_slm_manager | bool tags: ['frontend', 'slm-nginx', 'provision'] - name: "SLM | Load slm_manager defaults for nginx re-render" ansible.builtin.include_vars: file: "{{ playbook_dir }}/../roles/slm_manager/defaults/main.yml" - when: _is_slm_frontend_colocated | bool + when: _is_slm_frontend_colocated | default(false) | bool tags: ['frontend', 'slm-nginx', 'provision'] - name: "SLM | Re-render SLM nginx config for co-located mode (#3012)" @@ -244,13 +248,11 @@ dest: "/etc/nginx/sites-available/{{ slm_nginx_config | default('autobot-slm') }}" mode: "0644" backup: true - when: - - _is_slm_frontend_colocated | bool - - slm_colocated_frontend | default(false) | bool + when: _is_slm_frontend_colocated | default(false) | bool register: _slm_nginx_rerendered tags: ['frontend', 'slm-nginx', 'provision'] - # Re-build the SLM frontend so VITE_API_URL='/slm' is baked in (#3268). + # Re-build the SLM frontend so VITE_API_URL='/slm' is baked in (#3268, #3426). # Without this, the SLM login page calls /api/auth/login which nginx routes # to the user backend (port 8001) → 502 in co-located mode. # Must run BEFORE nginx test/reload so the new assets are served immediately. @@ -261,17 +263,14 @@ become_user: "{{ slm_user | default('autobot') }}" environment: VITE_API_URL: "/slm" - when: - - _is_slm_frontend_colocated | bool - - slm_colocated_frontend | default(false) | bool + when: _is_slm_frontend_colocated | default(false) | bool changed_when: true tags: ['frontend', 'slm-nginx', 'provision'] - name: "SLM | Test nginx config after co-location update" ansible.builtin.command: cmd: nginx -t - when: - - _slm_nginx_rerendered is changed + when: _slm_nginx_rerendered is defined and _slm_nginx_rerendered is changed changed_when: false tags: ['frontend', 'slm-nginx', 'provision'] @@ -279,8 +278,7 @@ ansible.builtin.systemd: name: nginx state: reloaded - when: - - _slm_nginx_rerendered is changed + when: _slm_nginx_rerendered is defined and _slm_nginx_rerendered is changed tags: ['frontend', 'slm-nginx', 'provision'] # ------------------------------------------------------------------- diff --git a/autobot-slm-backend/api/setup_wizard.py b/autobot-slm-backend/api/setup_wizard.py index c3b7f3f40..9f2e87574 100644 --- a/autobot-slm-backend/api/setup_wizard.py +++ b/autobot-slm-backend/api/setup_wizard.py @@ -136,8 +136,11 @@ def _build_inventory_children( # Role name -> (variable_name, port) for infrastructure service discovery. # Maps active roles to the Ansible vars that templates expect (#1431). +# Ports are INTERNAL service ports (uvicorn/service listen ports), not the +# external nginx TLS port (8443). Co-located nodes use 127.0.0.1 so uvicorn +# binds to loopback; nginx already holds 8443 on the same host (#3426). _ROLE_INFRA_VARS: dict[str, tuple[str, int]] = { - "backend": ("backend_host", 8443), + "backend": ("backend_host", 8001), # uvicorn internal port (#3426: was 8443) "redis": ("redis_host", 6379), "frontend": ("frontend_host", 5173), "ai-stack": ("ai_stack_host", 8080), @@ -149,8 +152,14 @@ def _build_inventory_children( def _build_infra_vars( node_roles: list, node_id_to_ip: dict[str, str], + local_ips: set | None = None, ) -> dict: - """Derive infrastructure discovery vars from active role assignments (#1431).""" + """Derive infrastructure discovery vars from active role assignments (#1431). + + For co-located services (node IP in local_ips), uses 127.0.0.1 so that + uvicorn and other daemons bind to loopback rather than an external + interface that nginx may already hold (#3426). + """ infra_vars: dict = {} for nr in node_roles: mapping = _ROLE_INFRA_VARS.get(nr.role_name) @@ -161,7 +170,9 @@ def _build_infra_vars( continue host_var, port = mapping if host_var not in infra_vars: - infra_vars[host_var] = ip + # Co-located: use loopback so services bind correctly on the SLM host. + resolved = "127.0.0.1" if (local_ips and ip in local_ips) else ip + infra_vars[host_var] = resolved infra_vars[host_var.replace("_host", "_port")] = port return infra_vars @@ -272,9 +283,14 @@ def _apply_colocation_vars( frontend at / and SLM at /slm/ (#2829). When backend is co-located too, sets frontend_backend_port=8001 and frontend_backend_protocol=http so templates proxy directly to uvicorn, eliminating the double-proxy. + + Also propagates slm_colocated_frontend=True to the 00-SLM-Manager host + entry so Phase 4c in provision-fleet-roles.yml can rebuild the SLM + frontend with VITE_API_URL=/slm (#3426). """ _frontend_roles = {"frontend", "autobot-frontend"} _backend_roles = {"backend", "autobot-backend"} + colocated_frontend_detected = False for node in db_nodes: inv_name = node.ansible_target if inv_name not in hosts: @@ -285,11 +301,20 @@ def _apply_colocation_vars( is_local = node.ip_address in local_ips or node.node_id == "00-SLM-Manager" if is_local and roles & _frontend_roles: hosts[inv_name]["slm_colocated_frontend"] = True + colocated_frontend_detected = True if roles & _backend_roles: hosts[inv_name]["frontend_backend_host"] = "127.0.0.1" hosts[inv_name]["frontend_backend_port"] = 8001 hosts[inv_name]["frontend_backend_protocol"] = "http" + # Propagate to 00-SLM-Manager so Phase 4c can rebuild the SLM frontend + # with VITE_API_URL=/slm after the user frontend has been deployed (#3426). + if colocated_frontend_detected: + for node in db_nodes: + if node.node_id == "00-SLM-Manager" and node.ansible_target in hosts: + hosts[node.ansible_target]["slm_colocated_frontend"] = True + break + def _build_inventory_dict( hosts: dict[str, dict], @@ -333,12 +358,13 @@ async def _fetch_inventory_data( list, list, dict[str, str], + set, ] ]: """Load all DB data needed to build the Ansible inventory (#2823). Returns (db_nodes, hosts, node_id_to_hostname, node_id_to_ip, - all_node_roles, all_active, all_ip_map) or None when no nodes match. + all_node_roles, all_active, all_ip_map, local_ips) or None when no nodes match. """ from sqlalchemy import select @@ -387,6 +413,7 @@ async def _fetch_inventory_data( all_node_roles, all_active, all_ip_map, + local_ips, ) @@ -411,11 +438,12 @@ async def _generate_dynamic_inventory( all_node_roles, all_active, all_ip_map, + local_ips, ) = result children, ansible_groups = _build_inventory_children( hosts, all_node_roles, node_id_to_hostname ) - infra_vars = _build_infra_vars(all_active, all_ip_map) + infra_vars = _build_infra_vars(all_active, all_ip_map, local_ips) inventory = _build_inventory_dict(hosts, children, infra_vars) fd, path = tempfile.mkstemp(suffix=".yml", prefix="wizard-inventory-")