From b7989e6e35346766fd7967f213b913228d641eb2 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 30 Apr 2026 15:38:43 +0000 Subject: [PATCH 1/4] fix: handle WebGL context loss and add stream auto-reconnect (#584) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 3D scene could go white and lose all objects due to two issues: 1. No WebGL context loss handling — when the browser evicts a WebGL context (common with multiple 3D tabs), the scene goes blank with no recovery. Add a ContextLossHandler component that calls preventDefault() on context loss (allowing restoration) and invalidates the renderer when the context is restored. 2. No auto-reconnect for Connect-RPC streams — when streamEntityChanges or streamSceneChanges errors out, the stream dies permanently. Add a retryStream utility with exponential backoff (1s–30s) that automatically reconnects failed streams, preserving existing entities in the scene during brief disconnections. Co-Authored-By: Claude Opus 4.6 --- .changeset/fix-scene-drops-white.md | 5 + src/lib/__tests__/retry-stream.spec.ts | 137 +++++++++++++++++++ src/lib/components/App.svelte | 2 + src/lib/components/ContextLossHandler.svelte | 26 ++++ src/lib/hooks/useDrawService.svelte.ts | 52 ++++--- src/lib/retry-stream.ts | 44 ++++++ 6 files changed, 239 insertions(+), 27 deletions(-) create mode 100644 .changeset/fix-scene-drops-white.md create mode 100644 src/lib/__tests__/retry-stream.spec.ts create mode 100644 src/lib/components/ContextLossHandler.svelte create mode 100644 src/lib/retry-stream.ts diff --git a/.changeset/fix-scene-drops-white.md b/.changeset/fix-scene-drops-white.md new file mode 100644 index 00000000..777162aa --- /dev/null +++ b/.changeset/fix-scene-drops-white.md @@ -0,0 +1,5 @@ +--- +'@viamrobotics/motion-tools': patch +--- + +Fix 3D scene going white by handling WebGL context loss and adding auto-reconnect to draw service streams diff --git a/src/lib/__tests__/retry-stream.spec.ts b/src/lib/__tests__/retry-stream.spec.ts new file mode 100644 index 00000000..3a8fe427 --- /dev/null +++ b/src/lib/__tests__/retry-stream.spec.ts @@ -0,0 +1,137 @@ +import { describe, expect, it, vi } from 'vitest' + +import { retryStream } from '../retry-stream' + +describe('retryStream', () => { + it('calls run and resolves when run succeeds', async () => { + const run = vi.fn().mockResolvedValue(undefined) + const controller = new AbortController() + + // run resolves once, retryStream will call it again — abort after first call + run.mockImplementation(async () => { + controller.abort() + }) + + await retryStream(run, controller.signal) + + expect(run).toHaveBeenCalledTimes(1) + }) + + it('retries when run throws', async () => { + vi.useFakeTimers() + + const controller = new AbortController() + let callCount = 0 + + const run = vi.fn().mockImplementation(async () => { + callCount++ + if (callCount < 3) { + throw new Error('stream error') + } + controller.abort() + }) + + const promise = retryStream(run, controller.signal) + // Advance through the backoff delays + await vi.advanceTimersByTimeAsync(1_000) + await vi.advanceTimersByTimeAsync(2_000) + + await promise + + expect(run).toHaveBeenCalledTimes(3) + + vi.useRealTimers() + }) + + it('stops retrying when signal is aborted', async () => { + vi.useFakeTimers() + + const controller = new AbortController() + const run = vi.fn().mockRejectedValue(new Error('stream error')) + const onRetry = vi.fn() + + const promise = retryStream(run, controller.signal, onRetry) + + // First call fails immediately, then waits for backoff + await vi.advanceTimersByTimeAsync(0) + expect(run).toHaveBeenCalledTimes(1) + + // Abort during backoff wait + controller.abort() + await vi.advanceTimersByTimeAsync(1_000) + + await promise + + // Should have called onRetry once, but not retried run + expect(onRetry).toHaveBeenCalledTimes(1) + expect(run).toHaveBeenCalledTimes(1) + + vi.useRealTimers() + }) + + it('calls onRetry with the current delay', async () => { + vi.useFakeTimers() + + const controller = new AbortController() + let callCount = 0 + + const run = vi.fn().mockImplementation(async () => { + callCount++ + if (callCount < 3) { + throw new Error('stream error') + } + controller.abort() + }) + + const onRetry = vi.fn() + const promise = retryStream(run, controller.signal, onRetry) + + await vi.advanceTimersByTimeAsync(1_000) + await vi.advanceTimersByTimeAsync(2_000) + + await promise + + expect(onRetry).toHaveBeenCalledTimes(2) + expect(onRetry).toHaveBeenNthCalledWith(1, 1_000) + expect(onRetry).toHaveBeenNthCalledWith(2, 2_000) + + vi.useRealTimers() + }) + + it('resets delay after a successful run', async () => { + vi.useFakeTimers() + + const controller = new AbortController() + let callCount = 0 + + const run = vi.fn().mockImplementation(async () => { + callCount++ + // First call: fail + if (callCount === 1) throw new Error('fail') + // Second call: succeed (stream ended cleanly) + if (callCount === 2) return + // Third call: fail + if (callCount === 3) throw new Error('fail') + // Fourth call: abort + controller.abort() + }) + + const onRetry = vi.fn() + const promise = retryStream(run, controller.signal, onRetry) + + // First failure + 1s backoff + await vi.advanceTimersByTimeAsync(1_000) + // Second call succeeds, delay resets. Third call fails, should use 1s again + await vi.advanceTimersByTimeAsync(1_000) + // Fourth call - abort + await vi.advanceTimersByTimeAsync(2_000) + + await promise + + // Both retries should have used 1000ms (reset after success) + expect(onRetry).toHaveBeenNthCalledWith(1, 1_000) + expect(onRetry).toHaveBeenNthCalledWith(2, 1_000) + + vi.useRealTimers() + }) +}) diff --git a/src/lib/components/App.svelte b/src/lib/components/App.svelte index 5dbe5a3c..8320c4d3 100644 --- a/src/lib/components/App.svelte +++ b/src/lib/components/App.svelte @@ -28,6 +28,7 @@ import { provideWeblabs } from '$lib/hooks/useWeblabs.svelte' import { domPortal } from '$lib/portal' + import ContextLossHandler from './ContextLossHandler.svelte' import FileDrop from './FileDrop/FileDrop.svelte' import HoveredEntities from './hover/HoveredEntities.svelte' import AddFrames from './overlay/AddFrames.svelte' @@ -120,6 +121,7 @@ bind:this={root} > + {#snippet children({ focus })} diff --git a/src/lib/components/ContextLossHandler.svelte b/src/lib/components/ContextLossHandler.svelte new file mode 100644 index 00000000..5918c926 --- /dev/null +++ b/src/lib/components/ContextLossHandler.svelte @@ -0,0 +1,26 @@ + diff --git a/src/lib/hooks/useDrawService.svelte.ts b/src/lib/hooks/useDrawService.svelte.ts index d23705e1..31f92a2a 100644 --- a/src/lib/hooks/useDrawService.svelte.ts +++ b/src/lib/hooks/useDrawService.svelte.ts @@ -28,6 +28,7 @@ import { uuidStringToBytes, } from '$lib/draw' import { traits, useWorld } from '$lib/ecs' +import { retryStream } from '$lib/retry-stream' import { useCameraControls } from './useControls.svelte' import { useDrawConnectionConfig } from './useDrawConnectionConfig.svelte' @@ -320,33 +321,34 @@ export function provideDrawService() { } const streamEntityChanges = async (client: Client, signal: AbortSignal) => { - try { - for await (const response of client.streamEntityChanges({}, { signal })) { - connectionStatus = ConnectionStatus.CONNECTED - - const { entity } = response - if (!entity.case) continue - - const uuid = UuidTool.toString([...(entity.value.uuid ?? [])]) - pendingEvents.push({ - uuid, - changeType: response.changeType, - entity, - updatedFields: response.updatedFields, - }) - scheduleFlush() - } - } catch (error) { - if (!signal.aborted) { - console.error('Draw service entity stream error:', error) + await retryStream( + async (sig) => { + for await (const response of client.streamEntityChanges({}, { signal: sig })) { + connectionStatus = ConnectionStatus.CONNECTED + + const { entity } = response + if (!entity.case) continue + + const uuid = UuidTool.toString([...(entity.value.uuid ?? [])]) + pendingEvents.push({ + uuid, + changeType: response.changeType, + entity, + updatedFields: response.updatedFields, + }) + scheduleFlush() + } + }, + signal, + () => { connectionStatus = ConnectionStatus.DISCONNECTED } - } + ) } const streamSceneChanges = async (client: Client, signal: AbortSignal) => { - try { - for await (const response of client.streamSceneChanges({}, { signal })) { + await retryStream(async (sig) => { + for await (const response of client.streamSceneChanges({}, { signal: sig })) { const { sceneMetadata } = response if (!sceneMetadata) continue @@ -361,11 +363,7 @@ export function provideDrawService() { ) } } - } catch (error) { - if (!signal.aborted) { - console.error('Draw service scene stream error:', error) - } - } + }, signal) } const createRelationship = async ( diff --git a/src/lib/retry-stream.ts b/src/lib/retry-stream.ts new file mode 100644 index 00000000..aee87209 --- /dev/null +++ b/src/lib/retry-stream.ts @@ -0,0 +1,44 @@ +const INITIAL_DELAY_MS = 1_000 +const MAX_DELAY_MS = 30_000 + +/** + * Calls `run` in a loop, retrying with exponential backoff when it throws. + * Stops when the signal is aborted or `run` resolves without throwing. + */ +export const retryStream = async ( + run: (signal: AbortSignal) => Promise, + signal: AbortSignal, + onRetry?: (delay: number) => void +): Promise => { + let delay = INITIAL_DELAY_MS + + while (!signal.aborted) { + try { + await run(signal) + // Stream ended cleanly (server closed it) — restart from the beginning. + delay = INITIAL_DELAY_MS + } catch { + if (signal.aborted) return + } + + if (signal.aborted) return + + onRetry?.(delay) + await sleep(delay, signal) + delay = Math.min(delay * 2, MAX_DELAY_MS) + } +} + +const sleep = (ms: number, signal: AbortSignal): Promise => { + return new Promise((resolve) => { + const timer = setTimeout(resolve, ms) + signal.addEventListener( + 'abort', + () => { + clearTimeout(timer) + resolve() + }, + { once: true } + ) + }) +} From 700606bb049c4d544efef5bc563d42e6b6d2ead1 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 30 Apr 2026 15:49:32 +0000 Subject: [PATCH 2/4] fix: only call onRetry on error, not on clean stream end When the server closes the stream cleanly, retryStream was incorrectly calling onRetry (which sets connectionStatus=DISCONNECTED) and sleeping 1s before reconnecting. This caused a spurious disconnect flash and unnecessary delay on normal stream restarts. Also restores error logging that was dropped when the original try/catch was replaced with retryStream. Adds a test verifying onRetry is not called on clean stream end. --- src/lib/__tests__/retry-stream.spec.ts | 17 +++++++++++++++++ src/lib/retry-stream.ts | 19 +++++++++++++------ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/lib/__tests__/retry-stream.spec.ts b/src/lib/__tests__/retry-stream.spec.ts index 3a8fe427..dcaa7347 100644 --- a/src/lib/__tests__/retry-stream.spec.ts +++ b/src/lib/__tests__/retry-stream.spec.ts @@ -98,6 +98,23 @@ describe('retryStream', () => { vi.useRealTimers() }) + it('does not call onRetry and restarts immediately on clean stream end', async () => { + const controller = new AbortController() + let callCount = 0 + + const run = vi.fn().mockImplementation(async () => { + callCount++ + if (callCount === 1) return // clean end — server closed the stream + controller.abort() + }) + + const onRetry = vi.fn() + await retryStream(run, controller.signal, onRetry) + + expect(run).toHaveBeenCalledTimes(2) + expect(onRetry).not.toHaveBeenCalled() + }) + it('resets delay after a successful run', async () => { vi.useFakeTimers() diff --git a/src/lib/retry-stream.ts b/src/lib/retry-stream.ts index aee87209..14a6e900 100644 --- a/src/lib/retry-stream.ts +++ b/src/lib/retry-stream.ts @@ -3,7 +3,9 @@ const MAX_DELAY_MS = 30_000 /** * Calls `run` in a loop, retrying with exponential backoff when it throws. - * Stops when the signal is aborted or `run` resolves without throwing. + * - Clean stream end (server closed it): restarts immediately, delay resets. + * - Error: calls `onRetry`, waits with exponential backoff, then retries. + * Stops when the signal is aborted. */ export const retryStream = async ( run: (signal: AbortSignal) => Promise, @@ -13,19 +15,24 @@ export const retryStream = async ( let delay = INITIAL_DELAY_MS while (!signal.aborted) { + let errored = false try { await run(signal) - // Stream ended cleanly (server closed it) — restart from the beginning. + // Stream ended cleanly (server closed it) — restart immediately. delay = INITIAL_DELAY_MS - } catch { + } catch (error) { if (signal.aborted) return + errored = true + console.warn('Stream error, retrying in', delay, 'ms:', error) } if (signal.aborted) return - onRetry?.(delay) - await sleep(delay, signal) - delay = Math.min(delay * 2, MAX_DELAY_MS) + if (errored) { + onRetry?.(delay) + await sleep(delay, signal) + delay = Math.min(delay * 2, MAX_DELAY_MS) + } } } From b5b9b037e8ec77c3ba9c66a8ca97a59a904181a3 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 30 Apr 2026 15:54:51 +0000 Subject: [PATCH 3/4] =?UTF-8?q?fix:=20remove=20ContextLossHandler=20?= =?UTF-8?q?=E2=80=94=20context=20loss=20not=20the=20root=20cause?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The user reported that non-network scene elements (like Grid) remained visible when entities disappeared, ruling out WebGL context loss as the cause. The stream reconnect fix in retryStream is sufficient. Blindly calling preventDefault() on webglcontextlost would also silently recover an app that is legitimately overusing WebGL contexts rather than failing explicitly. Co-authored-by: Micheal Parks --- src/lib/components/App.svelte | 2 -- src/lib/components/ContextLossHandler.svelte | 26 -------------------- 2 files changed, 28 deletions(-) delete mode 100644 src/lib/components/ContextLossHandler.svelte diff --git a/src/lib/components/App.svelte b/src/lib/components/App.svelte index 8320c4d3..5dbe5a3c 100644 --- a/src/lib/components/App.svelte +++ b/src/lib/components/App.svelte @@ -28,7 +28,6 @@ import { provideWeblabs } from '$lib/hooks/useWeblabs.svelte' import { domPortal } from '$lib/portal' - import ContextLossHandler from './ContextLossHandler.svelte' import FileDrop from './FileDrop/FileDrop.svelte' import HoveredEntities from './hover/HoveredEntities.svelte' import AddFrames from './overlay/AddFrames.svelte' @@ -121,7 +120,6 @@ bind:this={root} > - {#snippet children({ focus })} diff --git a/src/lib/components/ContextLossHandler.svelte b/src/lib/components/ContextLossHandler.svelte deleted file mode 100644 index 5918c926..00000000 --- a/src/lib/components/ContextLossHandler.svelte +++ /dev/null @@ -1,26 +0,0 @@ - From 841106840d9a7c3dd7cb2dc25609fac54dc1c891 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 30 Apr 2026 16:10:20 +0000 Subject: [PATCH 4/4] fix: preserve geometry and pointcloud entities during machine disconnect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When useResourceNames returns empty (machine connection dropped), useGeometries and usePointclouds were destroying all ECS entities. On reconnect the queries came back but there was a blank window. Guard the outer cleanup: if ALL queries are gone (activeQueryKeys empty), the machine is likely temporarily disconnected — skip entity destruction. Only destroy when the partID changed or other queries are still active (connected machine, resource legitimately removed). Co-authored-by: Micheal Parks --- src/lib/hooks/useGeometries.svelte.ts | 24 ++++++++++++++++-------- src/lib/hooks/usePointclouds.svelte.ts | 16 ++++++++++++---- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/lib/hooks/useGeometries.svelte.ts b/src/lib/hooks/useGeometries.svelte.ts index 81e929d3..58428ccb 100644 --- a/src/lib/hooks/useGeometries.svelte.ts +++ b/src/lib/hooks/useGeometries.svelte.ts @@ -195,19 +195,27 @@ export const provideGeometries = (partID: () => string) => { }) } - // Clean up owners whose queries disappeared entirely + // Clean up owners whose queries disappeared entirely. + // Guard: if ALL queries are gone (activeQueryKeys empty), the machine is likely + // temporarily disconnected — preserve entities so they reappear on reconnect. + // Only destroy when the partID changed (old-partID entities) or other queries + // are still active (connected machine, resource legitimately removed). + const anyQueriesActive = activeQueryKeys.size > 0 for (const [queryKey, keys] of queryEntityKeys) { if (!activeQueryKeys.has(queryKey)) { - for (const key of keys) { - const entity = entities.get(key) - if (entity && world.has(entity)) { - entity.destroy() + const queryPartID = queryKey.split(':')[0]! + if (queryPartID !== currentPartID || anyQueriesActive) { + for (const key of keys) { + const entity = entities.get(key) + if (entity && world.has(entity)) { + entity.destroy() + } + + entities.delete(key) } - entities.delete(key) + queryEntityKeys.delete(queryKey) } - - queryEntityKeys.delete(queryKey) } } }) diff --git a/src/lib/hooks/usePointclouds.svelte.ts b/src/lib/hooks/usePointclouds.svelte.ts index e266dd42..92b8cd78 100644 --- a/src/lib/hooks/usePointclouds.svelte.ts +++ b/src/lib/hooks/usePointclouds.svelte.ts @@ -187,13 +187,21 @@ export const providePointclouds = (partID: () => string) => { }) } - // clean up queries that disappeared entirely + // clean up queries that disappeared entirely. + // Guard: if ALL queries are gone (activeQueryKeys empty), the machine is likely + // temporarily disconnected — preserve entities so they reappear on reconnect. + // Only destroy when the partID changed (old-partID entities) or other queries + // are still active (connected machine, camera legitimately removed). + const anyQueriesActive = activeQueryKeys.size > 0 for (const [queryKey, entity] of entities) { if (!activeQueryKeys.has(queryKey)) { - if (world.has(entity)) { - entity.destroy() + const queryPartID = queryKey.split(':')[0]! + if (queryPartID !== currentPartID || anyQueriesActive) { + if (world.has(entity)) { + entity.destroy() + } + entities.delete(queryKey) } - entities.delete(queryKey) } } })