Skip to content
162 changes: 156 additions & 6 deletions custom_components/lock_code_manager/domain/coordinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ def __init__(self, hass: HomeAssistant, lock: BaseLock, config_entry: Any) -> No
config_entry=config_entry,
)
self.data: dict[int, SlotCredential] = {}
# Per-slot "verified" flag, kept in lockstep with ``data``. A slot is
# unverified only while an optimistic (ambiguous-but-treated-as-completed)
# write awaits confirmation; every other source -- genuine push events,
# polls, hard refreshes, and authoritative writes -- is verified. Absent
# slots read as verified, so poll/cloud providers (which never push an
# optimistic update) are unaffected. See the Phase 2 push-as-commit spec.
self._verified: dict[int, bool] = {}
self._config_entry = config_entry
self._lock_breaker = CircuitBreaker(
BACKOFF_FAILURE_THRESHOLD,
Expand Down Expand Up @@ -112,14 +119,155 @@ def _normalize_keys(
"""Coerce slot keys to ``int``. Raises ValueError/TypeError if a key cannot be cast."""
return {int(k): v for k, v in data.items()}

def _apply_read(
self, observed: dict[int, SlotCredential]
) -> dict[int, SlotCredential]:
"""
Resolve a genuine read (poll or hard refresh) against pending writes.

A read is the dropped-push backstop for the verified-credential
lifecycle: for a slot with an outstanding optimistic write, observing
the slot present confirms our write -- keep the believed value and mark
it verified. The one exception (mirroring ``BaseLock._confirm_slot``) is
a *readable* observation of a different code: that is an external change
racing our write, so take the observation rather than masking it with
the believed value -- otherwise a drift refresh, whose whole purpose is
to surface out-of-band changes, would silently overwrite one. Observing
the slot still absent means the write has not landed yet, so keep waiting
(stay unverified, pending intact). Slots with no pending write are
genuine observations and are marked verified. See the Phase 2
push-as-commit spec.
"""
out: dict[int, SlotCredential] = {}
for slot, cred in observed.items():
pending = self._lock._pending_writes.get(slot)
if pending is not None and cred.is_present:
pin, _deadline = pending
del self._lock._pending_writes[slot]
if cred.is_readable and cred.readable_pin != pin:
out[slot] = cred
else:
out[slot] = SlotCredential.known(pin)
self._verified[slot] = True
elif pending is not None:
out[slot] = cred
self._verified[slot] = False
else:
out[slot] = cred
self._verified.pop(slot, None)
# Keep the verified map in lockstep with the read.
self._verified = {
slot: flag for slot, flag in self._verified.items() if slot in out
}
return out

def is_verified(self, slot: int) -> bool:
"""
Return whether the slot's credential is a confirmed observation.

Absent slots default to verified: a slot is only unverified while an
optimistic write awaits confirmation (push event or hard refresh).
"""
return self._verified.get(slot, True)

@callback
def mark_verified(self, slot: int) -> None:
"""
Clear a slot's unverified flag (it defaults back to verified).

Called when a write is confirmed by the lock (an authoritative
``WriteResult.CONFIRMED``), so a stale unverified flag from a prior
optimistic write on the same slot cannot strand it.
"""
self._verified.pop(slot, None)

async def async_confirm_pending_writes(self) -> None:
"""
Actively read the lock back to confirm outstanding optimistic writes.

An ambiguous write (``WriteResult.OPTIMISTIC``) gets no confirming push
on some stacks: node-zwave-js, for one, emits no ``credential
added/modified`` event when its own post-write verification fails on a
lock that reports codes back masked -- the event and the ``ERROR_UNKNOWN``
result are mutually exclusive. Waiting for the hourly drift refresh would
let the breaker suspend a slot whose code actually landed (~3 attempts in
the 5-minute window, long before the hourly backstop). So the seam calls
this immediately after recording an optimistic write.

This is the order-independent confirmation path: it does not depend on
receiving any event. A hard read observes the slot present-but-masked
(LCM projects masked codes to ``unreadable`` rather than repeating the
driver's ``userCode == codeData`` check) and ``_apply_read`` confirms it;
a genuinely-absent slot stays pending and re-syncs on the next tick.

A failed read is non-fatal and does not apply backoff: the slot stays
pending and the sync tick reconciles it within the TTL.
"""
if not self._lock._pending_writes:
return
try:
new_data = self._apply_read(
self._normalize_keys(
await self._lock.async_internal_hard_refresh_codes()
)
)
except LockCodeManagerError as err:
_LOGGER.debug(
"On-demand confirmation read failed for %s: %s; leaving pending "
"writes for the sync tick to reconcile",
self._lock.lock.entity_id,
err,
)
return
except Exception:
# The confirmation read is a best-effort backstop, never fatal: it
# must not escape into the set seam and suspend the slot. The pending
# write stays recorded and the sync tick reconciles it via the TTL.
_LOGGER.exception(
"Unexpected error during on-demand confirmation read for %s; "
"leaving pending writes for the sync tick to reconcile",
self._lock.lock.entity_id,
)
return
# _apply_read already cleared pending + flipped the verified flag in
# place, so the confirmation takes effect even when the data is
# unchanged; only the listener notification is gated on a real delta.
if new_data != self.data:
self.async_set_updated_data(new_data)

@callback
def push_update(self, updates: dict[int, SlotCredential]) -> None:
"""Push one or more slot updates and notify listening entities."""
def push_update(
self, updates: dict[int, SlotCredential], *, optimistic: bool = False
) -> None:
"""
Push one or more slot updates and notify listening entities.

``optimistic=True`` marks the pushed slots unverified (an ambiguous
write we are treating as completed but have not yet confirmed). The
default, ``False``, marks them verified -- every existing caller keeps
today's behavior.
"""
if not updates:
return

new_data = {**self.data, **self._normalize_keys(updates)}
normalized = self._normalize_keys(updates)
new_data = {**self.data, **normalized}
verified = not optimistic

# Record the verified flag for the pushed slots regardless of whether
# the value changed: an optimistic re-push of the same value still
# flips the slot to unverified.
for slot in normalized:
self._verified[slot] = verified
# Keep the verified map in lockstep with data.
self._verified = {
slot: flag for slot, flag in self._verified.items() if slot in new_data
}

if new_data == self.data:
# Verified-flag-only change: the sync layer reads ``is_verified``
# directly on its next tick, and entities don't render the flag, so
# there's nothing to notify and no reachability proof (no new data).
return

# A successful push update proves the lock is reachable, so reset
Expand Down Expand Up @@ -218,7 +366,7 @@ async def async_get_usercodes(self) -> dict[int, SlotCredential]:
raise UpdateFailed from err

self._reset_backoff()
return self._normalize_keys(data)
return self._apply_read(self._normalize_keys(data))

async def _async_drift_check(self, now: datetime) -> None:
"""Perform a hard refresh to detect out-of-band code changes."""
Expand All @@ -238,8 +386,10 @@ async def _async_drift_check(self, now: datetime) -> None:
self._lock.lock.entity_id,
)
try:
new_data = self._normalize_keys(
await self._lock.async_internal_hard_refresh_codes()
new_data = self._apply_read(
self._normalize_keys(
await self._lock.async_internal_hard_refresh_codes()
)
)
except LockCodeManagerError as err:
self._apply_backoff()
Expand Down
29 changes: 29 additions & 0 deletions custom_components/lock_code_manager/domain/credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,35 @@ def credential_for(self, credential_type: CredentialType) -> Credential | None:
return next(iter(self.credentials_of_type(credential_type)), None)


class WriteResult(StrEnum):
"""
Outcome of a credential write (``async_set_credential``).

Replaces the old ``bool`` return, distinguishing three cases the seam
needs:

- ``NO_CHANGE`` -- the value was already set; nothing was written (the old
``False``). The coordinator is not refreshed.
- ``CONFIRMED`` -- the lock acknowledged the write (the old ``True``). The
slot is marked verified; non-push providers refresh to read it back.
- ``OPTIMISTIC`` -- the write returned an ambiguous result we are treating
as completed but have NOT confirmed (e.g. a Z-Wave driver
``ERROR_UNKNOWN`` from a masked read-back). The slot is marked unverified
and awaits confirmation via a push event or hard refresh; if none
arrives, it re-syncs rather than silently reporting success. See the
Phase 2 push-as-commit spec.
"""

NO_CHANGE = "no_change"
CONFIRMED = "confirmed"
OPTIMISTIC = "optimistic"

@property
def changed(self) -> bool:
"""Return whether a write actually occurred (CONFIRMED or OPTIMISTIC)."""
return self is not WriteResult.NO_CHANGE


@dataclass(frozen=True, slots=True)
class SetUserResult:
"""
Expand Down
5 changes: 5 additions & 0 deletions custom_components/lock_code_manager/domain/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ class SyncState(StrEnum):
IN_SYNC: desired state matches actual state on the lock.
OUT_OF_SYNC: mismatch detected, pending sync on next tick.
SYNCING: sync operation in progress.
PENDING_CONFIRMATION: an optimistic (ambiguous-but-treated-as-completed)
write was issued and we are waiting for the lock to confirm it (a push
event or hard-refresh read). The tick does not re-write while waiting;
confirmation -> IN_SYNC, timeout -> re-sync.
SUSPENDED: circuit breaker tripped or unexpected error; awaiting
coordinator recovery (suspended flag cleared).
"""
Expand All @@ -40,6 +44,7 @@ class SyncState(StrEnum):
IN_SYNC = "in_sync"
OUT_OF_SYNC = "out_of_sync"
SYNCING = "syncing"
PENDING_CONFIRMATION = "pending_confirmation"
SUSPENDED = "suspended"


Expand Down
62 changes: 62 additions & 0 deletions custom_components/lock_code_manager/domain/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from dataclasses import dataclass
from datetime import datetime
import logging
import time
from typing import TYPE_CHECKING, Any

from homeassistant.components.binary_sensor import DOMAIN as BINARY_SENSOR_DOMAIN
Expand Down Expand Up @@ -401,7 +402,16 @@
that PIN changes trigger a re-set even when the lock code is
unreadable, and that taking over a slot with an existing masked code
triggers a set.

An unverified slot (an optimistic write still awaiting confirmation,
or one whose confirmation never arrived) is never in sync: the
coordinator holds the believed value but the lock has not confirmed it,
so the tick must keep watching (PENDING_CONFIRMATION) or re-sync rather
than declare success. Slots with no recorded flag read as verified, so
this is a no-op for providers that never write optimistically.
"""
if not self._coordinator.is_verified(self._slot_num):
return False
credential = slot_state.coordinator_code
if slot_state.active_state == STATE_ON:
if credential is not None:
Expand Down Expand Up @@ -699,6 +709,48 @@
self._write_state()
return

# -- PENDING_CONFIRMATION: an optimistic write is awaiting confirmation.
# Don't re-write while waiting; a push event / hard refresh resolves it
# (clearing the pending entry). On timeout, count a breaker failure and
# fall through to re-sync -- so a write the lock never committed ends in
# a visible suspend, never a silent in-sync.
pending = self._lock._pending_writes.get(self._slot_num)
if pending is not None:
believed_pin, deadline = pending
# A pending write only gates reconciliation while it still reflects
# the desired state. If the user changed the PIN or disabled the slot
# while the write was outstanding, the entry is stale: drop it and
# reconcile the new target instead of holding PENDING_CONFIRMATION
# (and re-syncing the old value) until the deadline.
still_wanted = (
slot_state.active_state == STATE_ON
and slot_state.pin_state == believed_pin
)
if still_wanted and time.monotonic() < deadline:
if self._state is not SyncState.PENDING_CONFIRMATION:
self._state = SyncState.PENDING_CONFIRMATION
self._write_state()
return
# Drop the entry and re-sync on the NEXT tick. Returning here (rather
# than falling through to _perform_sync this tick) lets a confirming
# push that lands between ticks resolve the slot first, and avoids
# double-charging the breaker when the re-sync itself also fails.
del self._lock._pending_writes[self._slot_num]
if still_wanted:
# Genuine timeout: the write we still want never confirmed.
self._slot_breaker.record_failure()
_LOGGER.warning(
"%s: optimistic write not confirmed within the timeout; "
"re-syncing (attempt %s)",
self._log_prefix,
self._slot_breaker.failure_count,
)
# A stale entry (desired state changed) is not a sync failure, so it
# does not charge the breaker.
self._state = SyncState.OUT_OF_SYNC
self._write_state()
return

# -- OUT_OF_SYNC: check lock reachability, then attempt sync --
if self._coordinator.unreachable:
self._state = SyncState.SUSPENDED
Expand Down Expand Up @@ -822,6 +874,16 @@
self._state = SyncState.OUT_OF_SYNC
return

# An optimistic (ambiguous) set records a pending write: don't judge it
# now -- wait for the lock to confirm it (a push event or hard refresh)
# in PENDING_CONFIRMATION. The breaker is only charged when that wait
# times out (handled at the top of the tick), so a masked-but-accepted
# write is not penalised before its confirming event arrives.
if self._slot_num in self._lock._pending_writes:
self._state = SyncState.PENDING_CONFIRMATION
self._write_state()
return

Check warning on line 885 in custom_components/lock_code_manager/domain/sync.py

View check run for this annotation

Codecov / codecov/patch

custom_components/lock_code_manager/domain/sync.py#L883-L885

Added lines #L883 - L885 were not covered by tests

# Check if sync actually worked.
slot_state = self._resolve_slot_state()
if slot_state is not None and self.calculate_in_sync(slot_state):
Expand Down
Loading
Loading