diff --git a/custom_components/lock_code_manager/domain/coordinator.py b/custom_components/lock_code_manager/domain/coordinator.py index 7b87b0782..545ba2bbf 100644 --- a/custom_components/lock_code_manager/domain/coordinator.py +++ b/custom_components/lock_code_manager/domain/coordinator.py @@ -66,6 +66,11 @@ def __init__(self, hass: HomeAssistant, lock: BaseLock, config_entry: Any) -> No backoff_max=timedelta(seconds=BACKOFF_MAX_SECONDS), ) self._original_update_interval: timedelta | None = update_interval + # Whether the lock has ever been reached successfully. A lock that has + # never been reached is "not ready yet" (e.g. its integration is still + # starting up after a Home Assistant restart), not "offline" -- so it + # must not raise the lock_offline repair during the startup window. + self._reached_once = False # Set up drift detection timer for locks with hard_refresh_interval if lock.hard_refresh_interval: @@ -163,7 +168,15 @@ def _apply_backoff(self) -> None: new_interval.total_seconds(), ) - if self._lock_breaker.failure_count == POLL_FAILURE_ALERT_THRESHOLD: + # Only a lock that was reached at least once can go "offline". A lock + # that has never been reached is still coming up (e.g. its integration + # is mid-startup after a HA restart, surfacing transient "not connected" + # errors); raising lock_offline there produces a repair that is created + # and then auto-cleared the moment the integration finishes loading. + if ( + self._reached_once + and self._lock_breaker.failure_count == POLL_FAILURE_ALERT_THRESHOLD + ): async_create_issue( self.hass, DOMAIN, @@ -184,6 +197,9 @@ def unreachable(self) -> bool: def _reset_backoff(self) -> None: """Reset the lock breaker and restore the original update interval.""" + # A successful reach proves the lock is (now) reachable; from here on a + # later drop is a genuine outage that may raise lock_offline. + self._reached_once = True if self._lock_breaker.failure_count > 0: _LOGGER.info( "Lock %s recovered after %d consecutive failures", @@ -250,6 +266,11 @@ async def _async_drift_check(self, now: datetime) -> None: ) return + # A successful hard refresh is a genuine reach -- mark it so a later + # outage can raise lock_offline even if the lock's only successful + # contact was via drift detection rather than a poll/push. + self._reached_once = True + # Push subscription retry is handled by the config entry state # listener and connection transition handler — no need to retry here. diff --git a/tests/test_coordinator.py b/tests/test_coordinator.py index df638d044..adfb65ec3 100644 --- a/tests/test_coordinator.py +++ b/tests/test_coordinator.py @@ -726,6 +726,7 @@ async def test_poll_failure_alert_created_after_threshold( ) -> None: """Test that a repair issue is created after POLL_FAILURE_ALERT_THRESHOLD failures.""" poll_coordinator.last_update_success = True + poll_coordinator._reached_once = True # lock was online before going offline mock_get = AsyncMock(side_effect=LockDisconnected("Lock offline")) with patch.object(poll_lock, "async_internal_get_usercodes", mock_get): @@ -748,6 +749,7 @@ async def test_poll_failure_alert_not_created_before_threshold( ) -> None: """Test that no repair issue exists before reaching the alert threshold.""" poll_coordinator.last_update_success = True + poll_coordinator._reached_once = True # lock was online before going offline mock_get = AsyncMock(side_effect=LockDisconnected("Lock offline")) with patch.object(poll_lock, "async_internal_get_usercodes", mock_get): @@ -768,6 +770,7 @@ async def test_poll_failure_alert_dismissed_on_recovery( ) -> None: """Test that the repair issue is dismissed when the lock recovers.""" poll_coordinator.last_update_success = True + poll_coordinator._reached_once = True # lock was online before going offline mock_get_fail = AsyncMock(side_effect=LockDisconnected("Lock offline")) with patch.object(poll_lock, "async_internal_get_usercodes", mock_get_fail): @@ -800,6 +803,7 @@ async def test_lock_offline_issue_persists_across_shutdown( The issue is persistent and only cleaned up on entry unload or recovery. """ poll_coordinator.last_update_success = True + poll_coordinator._reached_once = True # lock was online before going offline mock_get_fail = AsyncMock(side_effect=LockDisconnected("Lock offline")) with patch.object(poll_lock, "async_internal_get_usercodes", mock_get_fail): @@ -816,6 +820,84 @@ async def test_lock_offline_issue_persists_across_shutdown( assert issue_registry.async_get_issue(DOMAIN, issue_id) is not None +async def test_lock_offline_not_created_when_never_reached( + poll_coordinator: LockUsercodeUpdateCoordinator, + poll_lock: MockLCMLock, + hass: HomeAssistant, +) -> None: + """ + A lock that has never been reached must not raise lock_offline. + + During the startup window (e.g. the lock's integration is still loading + after a HA restart) every poll fails with a transient "not connected" + error. Raising lock_offline there produces a repair that is created and + then auto-cleared the moment the integration finishes loading -- the flap + reported in issue #1257. ``_reached_once`` stays False until a real reach, + so the alert is suppressed. + """ + assert poll_coordinator._reached_once is False + + mock_get = AsyncMock(side_effect=LockDisconnected("Not connected")) + with patch.object(poll_lock, "async_internal_get_usercodes", mock_get): + for _ in range(POLL_FAILURE_ALERT_THRESHOLD + 2): + with pytest.raises(UpdateFailed): + await poll_coordinator.async_get_usercodes() + + issue_registry = async_get_issue_registry(hass) + issue_id = f"lock_offline_{poll_lock.lock.entity_id}" + assert issue_registry.async_get_issue(DOMAIN, issue_id) is None + + +async def test_lock_offline_created_after_reach_then_drop( + poll_coordinator: LockUsercodeUpdateCoordinator, + poll_lock: MockLCMLock, + hass: HomeAssistant, +) -> None: + """Once reached, a later sustained outage raises lock_offline normally.""" + # A first successful poll proves the lock was online. + mock_get_ok = AsyncMock(return_value={1: "1234"}) + with patch.object(poll_lock, "async_internal_get_usercodes", mock_get_ok): + await poll_coordinator.async_get_usercodes() + assert poll_coordinator._reached_once is True + + mock_get_fail = AsyncMock(side_effect=LockDisconnected("Lock offline")) + with patch.object(poll_lock, "async_internal_get_usercodes", mock_get_fail): + for _ in range(POLL_FAILURE_ALERT_THRESHOLD): + with pytest.raises(UpdateFailed): + await poll_coordinator.async_get_usercodes() + + issue_registry = async_get_issue_registry(hass) + issue_id = f"lock_offline_{poll_lock.lock.entity_id}" + assert issue_registry.async_get_issue(DOMAIN, issue_id) is not None + + +async def test_push_update_marks_reached( + push_coordinator: LockUsercodeUpdateCoordinator, +) -> None: + """A push update proves the lock is reachable and marks it reached.""" + assert push_coordinator._reached_once is False + push_coordinator.push_update({1: SlotCredential.known("9999")}) + assert push_coordinator._reached_once is True + + +async def test_drift_check_success_marks_reached( + push_coordinator: LockUsercodeUpdateCoordinator, + push_lock: MockLCMPushLock, +) -> None: + """A successful drift hard refresh is a reach and marks the lock reached.""" + push_coordinator.last_update_success = True + assert push_coordinator._reached_once is False + + with patch.object( + push_lock, + "async_internal_hard_refresh_codes", + AsyncMock(return_value={1: SlotCredential.known("1234")}), + ): + await push_coordinator._async_drift_check(dt_util.utcnow()) + + assert push_coordinator._reached_once is True + + async def test_unreachable_reflects_backoff_trip( poll_coordinator: LockUsercodeUpdateCoordinator, poll_lock: MockLCMLock,