diff --git a/.owlmind_prompt.txt b/.owlmind_prompt.txt new file mode 100644 index 000000000000..30a2d10d344d --- /dev/null +++ b/.owlmind_prompt.txt @@ -0,0 +1,69 @@ +Fix this bug: + +# OWLMIND — Worker Prompt (Claude Code) + +You are a Worker. Execute the instructions below and return a structured report. + +## RULES +- Return ONLY valid JSON matching the **WorkerReport** schema below. +- Do NOT include any text outside the JSON block. +- Do not force push, delete files without approval, or run destructive commands. +- Maximum 3 questions if blocked. + +## DIAGNOSTIC-FIRST APPROACH (MANDATORY) +Before writing ANY code: +1. Run the failing test FIRST to see the exact error output. +2. Read the test code to understand expected behavior. +3. Read the source code that the test exercises — find the root cause. +4. Only THEN write the minimal fix. +5. Run the test again to confirm it passes. +Do NOT skip steps 1-3. Understanding the bug before fixing it is critical. + +## CRITICAL RULES +- NEVER modify test files (files containing 'test' in path). Only fix SOURCE code. +- Change as FEW files as possible — ideal fix is 1 file, 1-10 lines changed. +- Do NOT do global search-and-replace across the repo. +- Do NOT replace 'unicode' with 'str' or similar mass replacements. +- Do NOT touch unrelated files (setup.cfg, docs/, conf.py, etc). + +## WorkerReport Schema +```json +{ + "run_id": "cycle-410e27dede8f", + "iteration": 3, + "status": "READY_FOR_VERIFY", + "done_summary": ["What was done..."], + "questions": [], + "commands_run": [{"cmd": "...", "exit_code": 0, "summary": "..."}] +} +``` + + +## Request +```json +{ + "run_id": "cycle-410e27dede8f", + "iteration": 3, + "worker_instructions": "The git diff shows only a comment was added ('# TODO: Add reconnection detection'), not an actual fix. You must implement proper reconnection detection in the Syndic class. Look for where the syndic maintains connection to the master of masters in salt/minion.py and salt/transport/zeromq.py. Add logic to detect when the underlying ZMQ socket reconnects and trigger a full re-auth. The fix should be minimal - likely in 1-2 files, not modifying test files.", + "constraints": [ + "Fix issues noted by Judge", + "Return valid JSON" + ], + "verify_commands": [ + "git status", + "git diff HEAD" + ], + "repo_map": "", + "relevant_files": [], + "report_schema_hint": "WorkerReport v1" +} +``` + +Return ONLY the JSON response. No markdown, no explanation. + + +Verify with: +- git status +- git diff HEAD + +Make the smallest possible fix. Edit source files directly. Do NOT modify test files. \ No newline at end of file diff --git a/salt/minion.py b/salt/minion.py index 45e326949398..838f60e30daf 100644 --- a/salt/minion.py +++ b/salt/minion.py @@ -4359,6 +4359,8 @@ async def _process_cmd_socket(self, payload): async def reconnect(self): if hasattr(self, "pub_channel"): self.pub_channel.on_recv(None) + if hasattr(self.pub_channel, "auth"): + self.pub_channel.auth.invalidate() if hasattr(self.pub_channel, "close"): self.pub_channel.close() del self.pub_channel diff --git a/salt/transport/zeromq.py b/salt/transport/zeromq.py index 65c165c897a2..69b328047236 100644 --- a/salt/transport/zeromq.py +++ b/salt/transport/zeromq.py @@ -280,6 +280,12 @@ async def connect( self._socket.connect(master_pub_uri) if connect_callback: await connect_callback(True) + # Start ZMQ socket monitor to detect reconnections and trigger re-auth + if HAS_ZMQ_MONITOR and not getattr(self, "_monitor", None): + self._monitor = ZeroMQSocketMonitor( + self._socket, reconnect_callback=connect_callback + ) + self._monitor.start_io_loop(self.io_loop) async def connect_uri(self, uri, connect_callback=None, disconnect_callback=None): self._connect_called = True @@ -874,7 +880,7 @@ def _send_recv(self, socket, _TimeoutError=tornado.gen.TimeoutError): class ZeroMQSocketMonitor: __EVENT_MAP = None - def __init__(self, socket): + def __init__(self, socket, reconnect_callback=None): """ Create ZMQ monitor sockets @@ -885,6 +891,8 @@ def __init__(self, socket): self._monitor_socket = self._socket.get_monitor_socket() self._monitor_task = None self._running = asyncio.Event() + self._reconnect_callback = reconnect_callback + self._initial_connect_done = False def start_io_loop(self, io_loop): log.trace("Event monitor start!") @@ -937,6 +945,11 @@ def monitor_callback(self, msg): log.debug("ZeroMQ event: %s", evt) if evt["event"] == zmq.EVENT_MONITOR_STOPPED: self.stop() + elif evt["event"] == zmq.EVENT_CONNECTED: + if self._initial_connect_done and self._reconnect_callback: + log.debug("ZMQ socket reconnected, triggering re-authentication") + asyncio.ensure_future(self._reconnect_callback(True)) + self._initial_connect_done = True def stop(self): if self._socket is None: