Skip to content

Commit 66dc75b

Browse files
committed
fix(yutori): treat wait/hold_key duration as seconds, not ms
Yutori's reference impl (frontend-visualqa actions.py:469, 506) interprets the model-supplied `duration` argument on `wait` and `hold_key` as seconds — passed straight to asyncio.sleep on the wait path, and clamped to 100s on the hold_key path. Our handlers were treating duration as milliseconds, so any model-supplied value was silently interpreted 1000× too short (`wait { duration: 2 }` slept 2ms instead of 2s; `hold_key { duration: 0.5 }` held 0.5ms instead of 500ms). Defaults were unaffected because they were pre-computed in ms. Convert seconds → ms before passing to Kernel's pressKey, and use seconds directly for asyncio.sleep / setTimeout. Adds a `> 0` guard on hold_key duration, which also resolves the bugbot nit about negative values reaching the SDK.
1 parent d2affba commit 66dc75b

2 files changed

Lines changed: 10 additions & 5 deletions

File tree

pkg/templates/python/yutori/tools/computer.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -251,12 +251,14 @@ async def _handle_hold_key(self, action: N15Action) -> ToolResult:
251251
raise ToolError("key is required for hold_key action")
252252

253253
mapped_key = self._map_key(key)
254-
duration = action.get("duration") or 1000
254+
# Yutori emits `duration` in seconds; Kernel SDK's press_key takes ms.
255+
duration_s = action.get("duration")
256+
duration_ms = int(duration_s * 1000) if duration_s and duration_s > 0 else 1000
255257

256258
self.kernel.browsers.computer.press_key(
257259
self.session_id,
258260
keys=[mapped_key],
259-
duration=duration,
261+
duration=duration_ms,
260262
)
261263

262264
await asyncio.sleep(SCREENSHOT_DELAY_S)
@@ -276,8 +278,9 @@ async def _handle_drag(self, action: N15Action) -> ToolResult:
276278
return await self.screenshot()
277279

278280
async def _handle_wait(self, action: N15Action) -> ToolResult:
281+
# Yutori emits `duration` in seconds (matches reference impl).
279282
duration = action.get("duration")
280-
seconds = (duration / 1000) if duration and duration > 0 else 2
283+
seconds = duration if duration and duration > 0 else 2
281284
await asyncio.sleep(seconds)
282285
return await self.screenshot()
283286

pkg/templates/typescript/yutori/tools/computer.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,8 @@ export class ComputerTool {
285285
}
286286

287287
const mappedKey = this.mapKey(key);
288-
const durationMs = action.duration && action.duration > 0 ? action.duration : 1000;
288+
// Yutori emits `duration` in seconds; Kernel SDK's pressKey takes ms.
289+
const durationMs = action.duration && action.duration > 0 ? Math.round(action.duration * 1000) : 1000;
289290

290291
await this.kernel.browsers.computer.pressKey(this.sessionId, {
291292
keys: [mappedKey],
@@ -310,7 +311,8 @@ export class ComputerTool {
310311
}
311312

312313
private async handleWait(action: N15Action): Promise<ToolResult> {
313-
const durationMs = action.duration && action.duration > 0 ? action.duration : 2000;
314+
// Yutori emits `duration` in seconds (matches reference impl).
315+
const durationMs = action.duration && action.duration > 0 ? Math.round(action.duration * 1000) : 2000;
314316
await this.sleep(durationMs);
315317
return this.screenshot();
316318
}

0 commit comments

Comments
 (0)