From 7d6cf8afa33e243fe47e340bd54675fedaf03221 Mon Sep 17 00:00:00 2001 From: raman325 <7243222+raman325@users.noreply.github.com> Date: Sun, 14 Jun 2026 22:06:38 -0400 Subject: [PATCH 1/2] fix(coordinator): don't raise lock_offline for a lock that was never reached Closes the repair-issue flapping in #1257: after a Home Assistant restart, LCM polls/writes before the lock's integration has finished starting up (Matter surfaces this as `InvalidState: Not connected`). Those failures feed the lock breaker, and at POLL_FAILURE_ALERT_THRESHOLD (12) the coordinator raises the `lock_offline` repair -- which is then auto-cleared the instant the integration finishes loading. The repair is created and dismissed entirely within the startup window. A lock that has never been reached is not "offline" -- "offline" presupposes it was once online. Track `_reached_once` (set on the first successful poll/push via `_reset_backoff`) and only raise `lock_offline` once the lock has actually been reached. A lock that is reached and then drops still alerts normally; a lock still coming up at startup no longer flaps a repair. #1258 already routes the underlying transient Matter startup errors (`unknown(133)`, `InvalidState: Not connected`) to the retry path, so they no longer disable/suspend slots; this closes the remaining startup-window repair they fed into via the connectivity breaker. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../lock_code_manager/domain/coordinator.py | 18 +++++- tests/test_coordinator.py | 64 +++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/custom_components/lock_code_manager/domain/coordinator.py b/custom_components/lock_code_manager/domain/coordinator.py index 7b87b0782..03800fac3 100644 --- a/custom_components/lock_code_manager/domain/coordinator.py +++ b/custom_components/lock_code_manager/domain/coordinator.py @@ -66,6 +66,11 @@ def __init__(self, hass: HomeAssistant, lock: BaseLock, config_entry: Any) -> No backoff_max=timedelta(seconds=BACKOFF_MAX_SECONDS), ) self._original_update_interval: timedelta | None = update_interval + # Whether the lock has ever been reached successfully. A lock that has + # never been reached is "not ready yet" (e.g. its integration is still + # starting up after a Home Assistant restart), not "offline" -- so it + # must not raise the lock_offline repair during the startup window. + self._reached_once = False # Set up drift detection timer for locks with hard_refresh_interval if lock.hard_refresh_interval: @@ -163,7 +168,15 @@ def _apply_backoff(self) -> None: new_interval.total_seconds(), ) - if self._lock_breaker.failure_count == POLL_FAILURE_ALERT_THRESHOLD: + # Only a lock that was reached at least once can go "offline". A lock + # that has never been reached is still coming up (e.g. its integration + # is mid-startup after a HA restart, surfacing transient "not connected" + # errors); raising lock_offline there produces a repair that is created + # and then auto-cleared the moment the integration finishes loading. + if ( + self._reached_once + and self._lock_breaker.failure_count == POLL_FAILURE_ALERT_THRESHOLD + ): async_create_issue( self.hass, DOMAIN, @@ -184,6 +197,9 @@ def unreachable(self) -> bool: def _reset_backoff(self) -> None: """Reset the lock breaker and restore the original update interval.""" + # A successful reach proves the lock is (now) reachable; from here on a + # later drop is a genuine outage that may raise lock_offline. + self._reached_once = True if self._lock_breaker.failure_count > 0: _LOGGER.info( "Lock %s recovered after %d consecutive failures", diff --git a/tests/test_coordinator.py b/tests/test_coordinator.py index df638d044..074282911 100644 --- a/tests/test_coordinator.py +++ b/tests/test_coordinator.py @@ -726,6 +726,7 @@ async def test_poll_failure_alert_created_after_threshold( ) -> None: """Test that a repair issue is created after POLL_FAILURE_ALERT_THRESHOLD failures.""" poll_coordinator.last_update_success = True + poll_coordinator._reached_once = True # lock was online before going offline mock_get = AsyncMock(side_effect=LockDisconnected("Lock offline")) with patch.object(poll_lock, "async_internal_get_usercodes", mock_get): @@ -748,6 +749,7 @@ async def test_poll_failure_alert_not_created_before_threshold( ) -> None: """Test that no repair issue exists before reaching the alert threshold.""" poll_coordinator.last_update_success = True + poll_coordinator._reached_once = True # lock was online before going offline mock_get = AsyncMock(side_effect=LockDisconnected("Lock offline")) with patch.object(poll_lock, "async_internal_get_usercodes", mock_get): @@ -768,6 +770,7 @@ async def test_poll_failure_alert_dismissed_on_recovery( ) -> None: """Test that the repair issue is dismissed when the lock recovers.""" poll_coordinator.last_update_success = True + poll_coordinator._reached_once = True # lock was online before going offline mock_get_fail = AsyncMock(side_effect=LockDisconnected("Lock offline")) with patch.object(poll_lock, "async_internal_get_usercodes", mock_get_fail): @@ -800,6 +803,7 @@ async def test_lock_offline_issue_persists_across_shutdown( The issue is persistent and only cleaned up on entry unload or recovery. """ poll_coordinator.last_update_success = True + poll_coordinator._reached_once = True # lock was online before going offline mock_get_fail = AsyncMock(side_effect=LockDisconnected("Lock offline")) with patch.object(poll_lock, "async_internal_get_usercodes", mock_get_fail): @@ -816,6 +820,66 @@ async def test_lock_offline_issue_persists_across_shutdown( assert issue_registry.async_get_issue(DOMAIN, issue_id) is not None +async def test_lock_offline_not_created_when_never_reached( + poll_coordinator: LockUsercodeUpdateCoordinator, + poll_lock: MockLCMLock, + hass: HomeAssistant, +) -> None: + """ + A lock that has never been reached must not raise lock_offline. + + During the startup window (e.g. the lock's integration is still loading + after a HA restart) every poll fails with a transient "not connected" + error. Raising lock_offline there produces a repair that is created and + then auto-cleared the moment the integration finishes loading -- the flap + reported in issue #1257. ``_reached_once`` stays False until a real reach, + so the alert is suppressed. + """ + assert poll_coordinator._reached_once is False + + mock_get = AsyncMock(side_effect=LockDisconnected("Not connected")) + with patch.object(poll_lock, "async_internal_get_usercodes", mock_get): + for _ in range(POLL_FAILURE_ALERT_THRESHOLD + 2): + with pytest.raises(UpdateFailed): + await poll_coordinator.async_get_usercodes() + + issue_registry = async_get_issue_registry(hass) + issue_id = f"lock_offline_{poll_lock.lock.entity_id}" + assert issue_registry.async_get_issue(DOMAIN, issue_id) is None + + +async def test_lock_offline_created_after_reach_then_drop( + poll_coordinator: LockUsercodeUpdateCoordinator, + poll_lock: MockLCMLock, + hass: HomeAssistant, +) -> None: + """Once reached, a later sustained outage raises lock_offline normally.""" + # A first successful poll proves the lock was online. + mock_get_ok = AsyncMock(return_value={1: "1234"}) + with patch.object(poll_lock, "async_internal_get_usercodes", mock_get_ok): + await poll_coordinator.async_get_usercodes() + assert poll_coordinator._reached_once is True + + mock_get_fail = AsyncMock(side_effect=LockDisconnected("Lock offline")) + with patch.object(poll_lock, "async_internal_get_usercodes", mock_get_fail): + for _ in range(POLL_FAILURE_ALERT_THRESHOLD): + with pytest.raises(UpdateFailed): + await poll_coordinator.async_get_usercodes() + + issue_registry = async_get_issue_registry(hass) + issue_id = f"lock_offline_{poll_lock.lock.entity_id}" + assert issue_registry.async_get_issue(DOMAIN, issue_id) is not None + + +async def test_push_update_marks_reached( + push_coordinator: LockUsercodeUpdateCoordinator, +) -> None: + """A push update proves the lock is reachable and marks it reached.""" + assert push_coordinator._reached_once is False + push_coordinator.push_update({1: SlotCredential.known("9999")}) + assert push_coordinator._reached_once is True + + async def test_unreachable_reflects_backoff_trip( poll_coordinator: LockUsercodeUpdateCoordinator, poll_lock: MockLCMLock, From e8330cc2678774c984ac935182d69d4d48fc8fbc Mon Sep 17 00:00:00 2001 From: raman325 <7243222+raman325@users.noreply.github.com> Date: Sun, 14 Jun 2026 22:26:34 -0400 Subject: [PATCH 2/2] fix(coordinator): treat a successful drift refresh as a reach Review follow-up: a successful drift-detection hard refresh is a genuine contact with the lock, but it did not flow through _reset_backoff, so it never set _reached_once. Set it on drift success too, so a lock whose only successful contact was via drift can still raise lock_offline on a later outage. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../lock_code_manager/domain/coordinator.py | 5 +++++ tests/test_coordinator.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/custom_components/lock_code_manager/domain/coordinator.py b/custom_components/lock_code_manager/domain/coordinator.py index 03800fac3..545ba2bbf 100644 --- a/custom_components/lock_code_manager/domain/coordinator.py +++ b/custom_components/lock_code_manager/domain/coordinator.py @@ -266,6 +266,11 @@ async def _async_drift_check(self, now: datetime) -> None: ) return + # A successful hard refresh is a genuine reach -- mark it so a later + # outage can raise lock_offline even if the lock's only successful + # contact was via drift detection rather than a poll/push. + self._reached_once = True + # Push subscription retry is handled by the config entry state # listener and connection transition handler — no need to retry here. diff --git a/tests/test_coordinator.py b/tests/test_coordinator.py index 074282911..adfb65ec3 100644 --- a/tests/test_coordinator.py +++ b/tests/test_coordinator.py @@ -880,6 +880,24 @@ async def test_push_update_marks_reached( assert push_coordinator._reached_once is True +async def test_drift_check_success_marks_reached( + push_coordinator: LockUsercodeUpdateCoordinator, + push_lock: MockLCMPushLock, +) -> None: + """A successful drift hard refresh is a reach and marks the lock reached.""" + push_coordinator.last_update_success = True + assert push_coordinator._reached_once is False + + with patch.object( + push_lock, + "async_internal_hard_refresh_codes", + AsyncMock(return_value={1: SlotCredential.known("1234")}), + ): + await push_coordinator._async_drift_check(dt_util.utcnow()) + + assert push_coordinator._reached_once is True + + async def test_unreachable_reflects_backoff_trip( poll_coordinator: LockUsercodeUpdateCoordinator, poll_lock: MockLCMLock,