From 1b3fdfd8dd6b988fbcb009a00319c09672d9e9fd Mon Sep 17 00:00:00 2001 From: livepeer-tessa Date: Mon, 6 Apr 2026 06:18:01 +0000 Subject: [PATCH] fix: downgrade trickle publisher 404 logs to DEBUG during teardown When close() is called, set a _closing flag so that in-flight POST requests that receive a 404 from the orchestrator are logged at DEBUG level instead of ERROR. This avoids noisy false-alarm errors in session teardown sequences where the orchestrator closes trickle channels before the publisher has fully drained. A 404 during active session is still logged at ERROR (channel missing unexpectedly). Only 404s that arrive after close() has been initiated are treated as expected and suppressed to DEBUG. Fixes daydreamlive/scope#846 Signed-off-by: livepeer-tessa --- src/livepeer_gateway/trickle_publisher.py | 31 +++++++++++++++++++---- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/src/livepeer_gateway/trickle_publisher.py b/src/livepeer_gateway/trickle_publisher.py index 179de50..2327e45 100644 --- a/src/livepeer_gateway/trickle_publisher.py +++ b/src/livepeer_gateway/trickle_publisher.py @@ -120,6 +120,10 @@ def __init__( # Preconnected writer state for the next segment. self._next_state: Optional[_SegmentPostState] = None + # Set to True when close() is called so that expected 404s during + # teardown are logged at DEBUG level instead of ERROR. + self._closing: bool = False + # Terminal failure for the whole publisher. Once set, no new segments # should be opened or written. self._terminal_error: Optional[TricklePublisherTerminalError] = None @@ -243,14 +247,27 @@ async def _run_post(self, url: str, seg_state: _SegmentPostState) -> None: continue break - if final_status is not None: - _LOG.error("Trickle POST failed url=%s status=%s body=%r", url, final_status, final_body) - else: - _LOG.error("Trickle POST exception url=%s error=%s", url, final_exc) assert final_exc is not None + # A 404 during teardown is expected — the orchestrator already closed the + # channel before the publisher drained. Downgrade to DEBUG to avoid + # noise in logs when this is part of a normal disconnect sequence. + is_teardown_404 = (final_status == 404 and self._closing) + if is_teardown_404: + if final_status is not None: + _LOG.debug("Trickle POST 404 during teardown (expected) url=%s", url) + else: + _LOG.debug("Trickle POST exception during teardown url=%s error=%s", url, final_exc) + else: + if final_status is not None: + _LOG.error("Trickle POST failed url=%s status=%s body=%r", url, final_status, final_body) + else: + _LOG.error("Trickle POST exception url=%s error=%s", url, final_exc) self._record_segment_failure(final_exc, seg_state) if final_status == 404 and self._terminal_error is None: - _LOG.error("Trickle publisher channel does not exist url=%s", self.url) + if is_teardown_404: + _LOG.debug("Trickle publisher channel gone during teardown url=%s", self.url) + else: + _LOG.error("Trickle publisher channel does not exist url=%s", self.url) terminal_exc = TricklePublisherTerminalError( "Trickle publisher channel does not exist", consecutive_failures=self._consecutive_failures, @@ -389,6 +406,10 @@ async def close(self) -> None: if self._session is None and self._lock is None and self._next_state is None: return + # Signal that we are in teardown so in-flight POSTs can downgrade 404 + # log levels — the orchestrator may have already closed the channel. + self._closing = True + try: await self._ensure_runtime() # Close is best-effort; suppress cancellation/runtime-init failures.