From 41c399b5418461efce9a30aaac1f1cd497ddfbb8 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 4 Jun 2026 22:57:58 +0000 Subject: [PATCH 1/5] fix(telemetry): send distinct ids as UUID strings to preserve uniqueness distinct_id and distinct_app_id were reported as 128-bit integers. PostHog coerces large JSON numbers to float64, discarding all but ~16 significant digits, so two distinct users or apps could collapse onto the same truncated value and have their events incorrectly correlated. Encode both identifiers as canonical UUID hex strings before sending. A UUID holds the same 128 bits, so str(UUID(int=existing_id)) is a lossless re-encoding: the value is unchanged, only its wire form differs. Existing installs derive their UUID from the stored integer (never regenerated), so post-migration events stay linkable to their pre-migration history. New installs now generate a real uuid4, persisted as its integer form to keep the installation_id and reflex.json files readable by older Reflex versions. https://claude.ai/code/session_0162Wc1GmkskbgCRs7fjg9Cy --- reflex/utils/frontend_skeleton.py | 7 ++- reflex/utils/prerequisites.py | 7 ++- reflex/utils/telemetry.py | 32 ++++++++-- tests/units/test_telemetry.py | 98 ++++++++++++++++++++++++++++++- 4 files changed, 133 insertions(+), 11 deletions(-) diff --git a/reflex/utils/frontend_skeleton.py b/reflex/utils/frontend_skeleton.py index 6212fb43121..3564740bf99 100644 --- a/reflex/utils/frontend_skeleton.py +++ b/reflex/utils/frontend_skeleton.py @@ -1,7 +1,7 @@ """This module provides utility functions to initialize the frontend skeleton.""" import json -import random +import uuid from pathlib import Path from reflex_base import constants @@ -498,8 +498,9 @@ def init_reflex_json(project_hash: int | None): if project_hash is not None: console.debug(f"Project hash is already set to {project_hash}.") else: - # Get a random project hash. - project_hash = random.getrandbits(128) + # Generate a uuid4 and persist its 128-bit integer form. Telemetry + # re-encodes it as the canonical UUID string before sending. + project_hash = uuid.uuid4().int console.debug(f"Setting project hash to {project_hash}.") # Write the hash and version to the reflex json file. diff --git a/reflex/utils/prerequisites.py b/reflex/utils/prerequisites.py index 29a2d75d924..05a74d44081 100644 --- a/reflex/utils/prerequisites.py +++ b/reflex/utils/prerequisites.py @@ -7,10 +7,10 @@ import importlib.metadata import inspect import json -import random import re import sys import typing +import uuid from datetime import datetime from os import getcwd from pathlib import Path @@ -603,7 +603,10 @@ def ensure_reflex_installation_id() -> int | None: # - content not parseable as an int if installation_id is None: - installation_id = random.getrandbits(128) + # Generate a uuid4 and persist its 128-bit integer form. Storing the + # int keeps the file readable by older Reflex versions; telemetry + # re-encodes it as the canonical UUID string before sending. + installation_id = uuid.uuid4().int installation_id_file.write_text(str(installation_id)) except Exception as e: console.debug(f"Failed to ensure reflex installation id: {e}") diff --git a/reflex/utils/telemetry.py b/reflex/utils/telemetry.py index c2866a127c0..9b31618797d 100644 --- a/reflex/utils/telemetry.py +++ b/reflex/utils/telemetry.py @@ -8,6 +8,7 @@ import os import platform import sys +import uuid import warnings from contextlib import suppress from datetime import datetime, timezone @@ -232,8 +233,8 @@ def _raise_on_missing_project_hash() -> bool: class _Properties(TypedDict): """Properties type for telemetry.""" - distinct_id: int - distinct_app_id: NotRequired[int] + distinct_id: str + distinct_app_id: NotRequired[str] user_os: str user_os_detail: str reflex_version: str @@ -259,6 +260,29 @@ class _Event(_DefaultEvent): timestamp: str +def _encode_distinct_id(value: int) -> str: + """Encode a 128-bit telemetry identifier as a canonical UUID string. + + Historically ``distinct_id`` and ``distinct_app_id`` were sent as raw + 128-bit integers. PostHog coerces large JSON numbers to floats, silently + discarding all but ~16 significant digits, so distinct installs or apps can + collapse onto the same truncated value and have their events correlated. + + A UUID carries the same 128 bits, so the hex string is sent losslessly while + remaining the *same value* as the legacy integer + (``uuid.UUID(int=value).int == value``). Deriving the UUID from the existing + identifier — rather than minting a fresh one — keeps an installation's new + events linkable to its pre-migration history. + + Args: + value: The stored 128-bit identifier. + + Returns: + The identifier encoded as a UUID hex string. + """ + return str(uuid.UUID(int=value)) + + def _get_event_defaults() -> _DefaultEvent | None: """Get the default event data. @@ -270,7 +294,7 @@ def _get_event_defaults() -> _DefaultEvent | None: return None cpuinfo = get_cpu_info() properties: _Properties = { - "distinct_id": installation_id, + "distinct_id": _encode_distinct_id(installation_id), "user_os": get_os(), "user_os_detail": get_detailed_platform_str(), "reflex_version": get_reflex_version(), @@ -288,7 +312,7 @@ def _get_event_defaults() -> _DefaultEvent | None: if ( project_hash := get_project_hash(raise_on_fail=_raise_on_missing_project_hash()) ) is not None: - properties["distinct_app_id"] = project_hash + properties["distinct_app_id"] = _encode_distinct_id(project_hash) return { "api_key": "phc_JoMo0fOyi0GQAooY3UyO9k0hebGkMyFJrrCw1Gt5SGb", diff --git a/tests/units/test_telemetry.py b/tests/units/test_telemetry.py index c43a285a192..c963b444001 100644 --- a/tests/units/test_telemetry.py +++ b/tests/units/test_telemetry.py @@ -1,3 +1,4 @@ +import uuid from types import SimpleNamespace import pytest @@ -18,8 +19,10 @@ def event_defaults(mocker: MockerFixture) -> dict: defaults = { "api_key": "test_api_key", "properties": { - "distinct_id": 12345, - "distinct_app_id": 78285505863498957834586115958872998605, + # Post-conversion defaults carry UUID-string identifiers (the hex + # forms of 12345 and 78285505863498957834586115958872998605). + "distinct_id": "00000000-0000-0000-0000-000000003039", + "distinct_app_id": "3ae53d70-56b0-b52a-f645-37040fb802cd", "user_os": "Test OS", "user_os_detail": "Mocked Platform", "reflex_version": "0.8.0", @@ -336,3 +339,94 @@ def test_prepare_event_properties_override_kwargs(event_defaults): assert event is not None props: dict = event["properties"] # pyright: ignore[reportAssignmentType] assert props["template"] == "from-properties" + + +def test_encode_distinct_id_round_trips_losslessly(): + """A legacy 128-bit integer id encodes to UUID without losing precision.""" + legacy_id = 78285505863498957834586115958872998605 + encoded = telemetry._encode_distinct_id(legacy_id) + + assert isinstance(encoded, str) + assert encoded == str(uuid.UUID(int=legacy_id)) + # Full 128-bit fidelity is preserved, unlike the old float-truncated int. + assert uuid.UUID(encoded).int == legacy_id + + +def test_encode_distinct_id_handles_uuid4_int_form(): + """A freshly generated uuid4 round-trips through its integer storage form.""" + generated = uuid.uuid4() + assert telemetry._encode_distinct_id(generated.int) == str(generated) + + +def test_encode_distinct_id_pads_small_values(): + """Small integers still encode to a valid, zero-padded UUID string.""" + encoded = telemetry._encode_distinct_id(12345) + assert encoded == "00000000-0000-0000-0000-000000003039" + assert uuid.UUID(encoded).int == 12345 + + +@pytest.fixture +def stub_event_default_sources(mocker: MockerFixture): + """Stub the slow/host-specific inputs of ``_get_event_defaults``. + + Returns: + A callable ``configure(installation_id, project_hash)`` that sets the + stored identifier values feeding the default event payload. + """ + mocker.patch.object(telemetry, "get_cpu_info", return_value=None) + mocker.patch.object(telemetry, "get_node_version", return_value=None) + mocker.patch.object(telemetry, "get_bun_version", return_value=None) + + def configure(*, installation_id: int | None, project_hash: int | None) -> None: + mocker.patch.object( + telemetry, "ensure_reflex_installation_id", return_value=installation_id + ) + mocker.patch.object(telemetry, "get_project_hash", return_value=project_hash) + + return configure + + +def test_get_event_defaults_encodes_ids_as_uuid_strings(stub_event_default_sources): + """distinct_id and distinct_app_id are sent as lossless UUID strings. + + Regression: previously these were raw 128-bit ints that PostHog truncated + to floats, collapsing distinct installs/apps onto one identifier. + """ + installation_id = 0xDEADBEEFDEADBEEFDEADBEEFDEADBEEF + project_hash = 78285505863498957834586115958872998605 + stub_event_default_sources( + installation_id=installation_id, project_hash=project_hash + ) + + defaults = telemetry._get_event_defaults() + + assert defaults is not None + props: dict = defaults["properties"] # pyright: ignore[reportAssignmentType] + assert isinstance(props["distinct_id"], str) + assert isinstance(props["distinct_app_id"], str) + assert props["distinct_id"] == str(uuid.UUID(int=installation_id)) + assert props["distinct_app_id"] == str(uuid.UUID(int=project_hash)) + # Continuity: each encoded id decodes back to the original integer value. + assert uuid.UUID(props["distinct_id"]).int == installation_id + assert uuid.UUID(props["distinct_app_id"]).int == project_hash + + +def test_get_event_defaults_omits_distinct_app_id_without_project_hash( + stub_event_default_sources, +): + """No distinct_app_id is emitted when the project hash is unavailable.""" + stub_event_default_sources(installation_id=12345, project_hash=None) + + defaults = telemetry._get_event_defaults() + + assert defaults is not None + assert "distinct_app_id" not in defaults["properties"] + assert defaults["properties"]["distinct_id"] == str(uuid.UUID(int=12345)) + + +def test_get_event_defaults_returns_none_without_installation_id( + stub_event_default_sources, +): + """A missing installation id short-circuits defaults (unchanged contract).""" + stub_event_default_sources(installation_id=None, project_hash=12345) + assert telemetry._get_event_defaults() is None From ce84aac4b9c3d264e75ce2d739ad8b09e3533f60 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 5 Jun 2026 00:48:00 +0000 Subject: [PATCH 2/5] feat(telemetry): alias legacy numeric distinct_id to its UUID in PostHog Re-encoding distinct_id as a UUID string makes PostHog treat the new UUID identity and the old (float-truncated) numeric identity as separate persons, breaking continuity with pre-migration events. On the first telemetry send of a process, emit a one-time PostHog $create_alias event linking the new UUID distinct_id to the legacy numeric id. The legacy id is sent as a JSON number so PostHog coerces it to the same lossy float as the historic events, merging the two persons. The attempt is best-effort and runs exactly once: a flag in reflex.json records that it ran (set even when the alias does not match, since the lossy legacy id may not), and it is written with a merging update so it survives Reflex downgrades and upgrades. Brand-new UUID-native projects preset the flag during init since they have no legacy numeric telemetry to alias. https://claude.ai/code/session_0162Wc1GmkskbgCRs7fjg9Cy --- reflex/utils/frontend_skeleton.py | 10 ++-- reflex/utils/prerequisites.py | 31 ++++++++++ reflex/utils/telemetry.py | 61 +++++++++++++++++++- tests/units/test_prerequisites.py | 60 ++++++++++++++++++- tests/units/test_telemetry.py | 95 +++++++++++++++++++++++++++++++ 5 files changed, 251 insertions(+), 6 deletions(-) diff --git a/reflex/utils/frontend_skeleton.py b/reflex/utils/frontend_skeleton.py index 3564740bf99..b03a9943f56 100644 --- a/reflex/utils/frontend_skeleton.py +++ b/reflex/utils/frontend_skeleton.py @@ -495,6 +495,7 @@ def init_reflex_json(project_hash: int | None): Args: project_hash: The app hash. """ + reflex_json: dict[str, object] = {"version": constants.Reflex.VERSION} if project_hash is not None: console.debug(f"Project hash is already set to {project_hash}.") else: @@ -502,10 +503,11 @@ def init_reflex_json(project_hash: int | None): # re-encodes it as the canonical UUID string before sending. project_hash = uuid.uuid4().int console.debug(f"Setting project hash to {project_hash}.") + # A brand-new project is UUID-native, so it never reported a legacy + # numeric distinct_id to alias; mark the alias as handled up front. The + # reuse branch leaves the flag untouched (the merging write preserves it). + reflex_json["alias_created"] = True # Write the hash and version to the reflex json file. - reflex_json = { - "version": constants.Reflex.VERSION, - "project_hash": project_hash, - } + reflex_json["project_hash"] = project_hash path_ops.update_json_file(get_web_dir() / constants.Reflex.JSON, reflex_json) diff --git a/reflex/utils/prerequisites.py b/reflex/utils/prerequisites.py index 05a74d44081..b9a1ec6ec51 100644 --- a/reflex/utils/prerequisites.py +++ b/reflex/utils/prerequisites.py @@ -511,6 +511,37 @@ def get_project_hash(raise_on_fail: bool = False) -> int | None: return data.get("project_hash") +def get_alias_created() -> bool | None: + """Read the telemetry distinct_id alias flag from the reflex.json file. + + The flag records whether the one-time PostHog ``$create_alias`` event that + links an installation's legacy numeric ``distinct_id`` to its UUID form has + already been handled (either sent, or skipped for a UUID-native project). + + Returns: + The flag value, or None when reflex.json is missing or unreadable (i.e. + there is nowhere to persist the flag). + """ + json_file = get_web_dir() / constants.Reflex.JSON + if not json_file.exists(): + return None + with contextlib.suppress(Exception): + return bool(json.loads(json_file.read_text()).get("alias_created", False)) + return None + + +def set_alias_created(): + """Record in reflex.json that the telemetry distinct_id alias was handled. + + The write merges into the existing file, so ``project_hash``/``version`` are + preserved; likewise an older Reflex version rewriting reflex.json keeps this + flag, so it survives downgrades and upgrades. + """ + path_ops.update_json_file( + get_web_dir() / constants.Reflex.JSON, {"alias_created": True} + ) + + def check_running_mode(frontend: bool, backend: bool) -> RunningMode: """Check if the app is running in frontend or backend mode. diff --git a/reflex/utils/telemetry.py b/reflex/utils/telemetry.py index 9b31618797d..bc874d2a469 100644 --- a/reflex/utils/telemetry.py +++ b/reflex/utils/telemetry.py @@ -24,7 +24,12 @@ from reflex.utils import console, processes from reflex.utils.js_runtimes import get_bun_version, get_node_version -from reflex.utils.prerequisites import ensure_reflex_installation_id, get_project_hash +from reflex.utils.prerequisites import ( + ensure_reflex_installation_id, + get_alias_created, + get_project_hash, + set_alias_created, +) UTC = timezone.utc POSTHOG_API_URL: str = "https://app.posthog.com/capture/" @@ -426,6 +431,59 @@ def _send( background_tasks = set() +_legacy_alias_attempted = False + + +def _maybe_alias_legacy_distinct_id(telemetry_enabled: bool | None) -> None: + """Link the legacy numeric distinct_id to its UUID form, once per install. + + Older Reflex versions reported ``distinct_id`` as a 128-bit integer, which + PostHog stored as a lossy float. Now that the same value is sent as a UUID + string (see ``_encode_distinct_id``), the two PostHog identities must be + merged so an installation's history stays on a single person. PostHog does + this through a one-time ``$create_alias`` event. + + A flag in ``reflex.json`` records that the attempt was made. The flag is set + even when the alias does not match — the legacy id is lossy, so PostHog may + silently drop it — to avoid resending on every run. ``reflex.json`` merges + unknown keys on write, so the flag survives Reflex downgrades and upgrades. + + Args: + telemetry_enabled: Whether telemetry is enabled (resolved from the config + when None). + """ + global _legacy_alias_attempted + if _legacy_alias_attempted: + return + + with suppress(Exception): + if telemetry_enabled is None: + telemetry_enabled = get_config().telemetry_enabled + if not telemetry_enabled: + # Don't latch: a later enabled send in this process should retry. + return + + # Latch before the alias send below (which re-enters send()) so it cannot + # recurse and so the attempt happens at most once per process. + _legacy_alias_attempted = True + + alias_created = get_alias_created() + # None: no reflex.json, so nowhere to persist the flag -> skip. + # True: already handled (or a UUID-native project) -> skip. + if alias_created is None or alias_created: + return + + if (installation_id := ensure_reflex_installation_id()) is None: + return + + # distinct_id is the UUID form (set by get_event_defaults); send the + # legacy integer as ``alias`` so PostHog coerces it to the same float as + # the historic events and merges the two persons. + send("$create_alias", telemetry_enabled, properties={"alias": installation_id}) + + # Record the attempt regardless of outcome; we must not retry every run. + set_alias_created() + def send( event: str, @@ -443,6 +501,7 @@ def send( properties. Preferred over ``kwargs`` for new events. kwargs: Additional data to send with the event. """ + _maybe_alias_legacy_distinct_id(telemetry_enabled) async def async_send( # noqa: RUF029 event: str, diff --git a/tests/units/test_prerequisites.py b/tests/units/test_prerequisites.py index edd3140383e..f3911181557 100644 --- a/tests/units/test_prerequisites.py +++ b/tests/units/test_prerequisites.py @@ -14,7 +14,7 @@ from reflex.reflex import cli from reflex.testing import chdir -from reflex.utils import frontend_skeleton, js_runtimes +from reflex.utils import frontend_skeleton, js_runtimes, prerequisites from reflex.utils.frontend_skeleton import ( _compile_vite_config, _update_react_router_config, @@ -1372,3 +1372,61 @@ def index(): app.add_page(index) """ ) + + +def test_get_alias_created_missing_file(tmp_path, monkeypatch: pytest.MonkeyPatch): + """A missing reflex.json yields None (nowhere to persist the flag).""" + monkeypatch.setattr(prerequisites, "get_web_dir", lambda: tmp_path) + assert prerequisites.get_alias_created() is None + + +def test_get_alias_created_absent_flag(tmp_path, monkeypatch: pytest.MonkeyPatch): + """An existing reflex.json without the flag yields False.""" + monkeypatch.setattr(prerequisites, "get_web_dir", lambda: tmp_path) + (tmp_path / constants.Reflex.JSON).write_text('{"project_hash": 5}') + assert prerequisites.get_alias_created() is False + + +def test_get_alias_created_flag_set(tmp_path, monkeypatch: pytest.MonkeyPatch): + """A set flag yields True.""" + monkeypatch.setattr(prerequisites, "get_web_dir", lambda: tmp_path) + (tmp_path / constants.Reflex.JSON).write_text('{"alias_created": true}') + assert prerequisites.get_alias_created() is True + + +def test_set_alias_created_merges_and_preserves( + tmp_path, monkeypatch: pytest.MonkeyPatch +): + """Setting the flag preserves existing keys (downgrade/upgrade safety).""" + monkeypatch.setattr(prerequisites, "get_web_dir", lambda: tmp_path) + json_file = tmp_path / constants.Reflex.JSON + json_file.write_text('{"version": "0.1.0", "project_hash": 5}') + + prerequisites.set_alias_created() + + data = json.loads(json_file.read_text()) + assert data["alias_created"] is True + assert data["project_hash"] == 5 + assert data["version"] == "0.1.0" + + +def test_init_reflex_json_presets_alias_for_new_project( + tmp_path, monkeypatch: pytest.MonkeyPatch +): + """A brand-new project is marked alias-handled so no alias event is sent.""" + _patch_web_dir(monkeypatch, tmp_path) + frontend_skeleton.init_reflex_json(project_hash=None) + data = json.loads((tmp_path / constants.Reflex.JSON).read_text()) + assert data["alias_created"] is True + assert "project_hash" in data + + +def test_init_reflex_json_keeps_existing_project_unflagged( + tmp_path, monkeypatch: pytest.MonkeyPatch +): + """Reusing an existing hash does not preset the flag (legacy project).""" + _patch_web_dir(monkeypatch, tmp_path) + frontend_skeleton.init_reflex_json(project_hash=12345) + data = json.loads((tmp_path / constants.Reflex.JSON).read_text()) + assert "alias_created" not in data + assert data["project_hash"] == 12345 diff --git a/tests/units/test_telemetry.py b/tests/units/test_telemetry.py index c963b444001..f1e4fb1ff10 100644 --- a/tests/units/test_telemetry.py +++ b/tests/units/test_telemetry.py @@ -430,3 +430,98 @@ def test_get_event_defaults_returns_none_without_installation_id( """A missing installation id short-circuits defaults (unchanged contract).""" stub_event_default_sources(installation_id=None, project_hash=12345) assert telemetry._get_event_defaults() is None + + +@pytest.fixture(autouse=True) +def _reset_alias_guard(): + """Reset the per-process alias guard so each test starts fresh.""" + telemetry._legacy_alias_attempted = False + yield + telemetry._legacy_alias_attempted = False + + +def test_maybe_alias_sends_create_alias_when_unflagged(mocker: MockerFixture): + """An un-flagged reflex.json triggers a $create_alias and sets the flag.""" + mocker.patch.object(telemetry, "get_alias_created", return_value=False) + mocker.patch.object(telemetry, "ensure_reflex_installation_id", return_value=12345) + set_flag = mocker.patch.object(telemetry, "set_alias_created") + send_mock = mocker.patch.object(telemetry, "send") + + telemetry._maybe_alias_legacy_distinct_id(telemetry_enabled=True) + + send_mock.assert_called_once_with( + "$create_alias", True, properties={"alias": 12345} + ) + set_flag.assert_called_once() + + +def test_maybe_alias_skips_when_already_created(mocker: MockerFixture): + """A flag already set means no alias event and no rewrite.""" + mocker.patch.object(telemetry, "get_alias_created", return_value=True) + send_mock = mocker.patch.object(telemetry, "send") + set_flag = mocker.patch.object(telemetry, "set_alias_created") + + telemetry._maybe_alias_legacy_distinct_id(telemetry_enabled=True) + + send_mock.assert_not_called() + set_flag.assert_not_called() + + +def test_maybe_alias_skips_without_reflex_json(mocker: MockerFixture): + """No reflex.json (None) means nowhere to persist the flag, so skip.""" + mocker.patch.object(telemetry, "get_alias_created", return_value=None) + send_mock = mocker.patch.object(telemetry, "send") + set_flag = mocker.patch.object(telemetry, "set_alias_created") + + telemetry._maybe_alias_legacy_distinct_id(telemetry_enabled=True) + + send_mock.assert_not_called() + set_flag.assert_not_called() + + +def test_maybe_alias_skips_when_telemetry_disabled(mocker: MockerFixture): + """Disabled telemetry sends nothing and leaves the persistent flag unset.""" + get_alias = mocker.patch.object(telemetry, "get_alias_created") + send_mock = mocker.patch.object(telemetry, "send") + + telemetry._maybe_alias_legacy_distinct_id(telemetry_enabled=False) + + get_alias.assert_not_called() + send_mock.assert_not_called() + + +def test_maybe_alias_runs_at_most_once_per_process(mocker: MockerFixture): + """The guard prevents a second alias attempt within the same process.""" + mocker.patch.object(telemetry, "get_alias_created", return_value=False) + mocker.patch.object(telemetry, "ensure_reflex_installation_id", return_value=7) + mocker.patch.object(telemetry, "set_alias_created") + send_mock = mocker.patch.object(telemetry, "send") + + telemetry._maybe_alias_legacy_distinct_id(telemetry_enabled=True) + telemetry._maybe_alias_legacy_distinct_id(telemetry_enabled=True) + + send_mock.assert_called_once() + + +def test_maybe_alias_create_alias_payload( + event_defaults, httpx_post, mocker: MockerFixture +): + """The posted $create_alias pairs the new UUID distinct_id with the legacy int.""" + mocker.patch.object(telemetry, "get_alias_created", return_value=False) + mocker.patch.object(telemetry, "set_alias_created") + legacy_id = 78285505863498957834586115958872998605 + mocker.patch.object( + telemetry, "ensure_reflex_installation_id", return_value=legacy_id + ) + + telemetry._maybe_alias_legacy_distinct_id(telemetry_enabled=True) + + httpx_post.assert_called_once() + payload = httpx_post.call_args.kwargs["json"] + assert payload["event"] == "$create_alias" + props = payload["properties"] + # The legacy integer is sent at full precision so PostHog re-coerces it to + # the same lossy float as the historic events and merges the two persons. + assert props["alias"] == legacy_id + # distinct_id is the new UUID-string identity (from the event defaults). + assert props["distinct_id"] == event_defaults["properties"]["distinct_id"] From 43f02b23e9264f88d23c189aa76203e4970fee77 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 5 Jun 2026 01:08:15 +0000 Subject: [PATCH 3/5] refactor(telemetry): store distinct_id alias marker per-machine, not per-app The alias links the per-machine installation distinct_id, so gating it on a per-app reflex.json flag was the wrong scope. Replace that flag with a marker file next to the installation id in the Reflex dir, recording that the install uses v0.9.5 UUID distinct_id semantics. - New installs write the marker when the id is first generated (ensure_reflex_installation_id), so they never attempt a pointless alias. - Legacy installs (id present, marker absent) attempt the one-time $create_alias, then write the marker regardless of outcome so it is not retried on every run. The marker lives in the per-user Reflex dir, which no Reflex version clears, so it persists across downgrades and upgrades. reflex.json is no longer touched for telemetry. https://claude.ai/code/session_0162Wc1GmkskbgCRs7fjg9Cy --- reflex/utils/frontend_skeleton.py | 10 ++--- reflex/utils/prerequisites.py | 53 ++++++++++++++--------- reflex/utils/telemetry.py | 27 ++++++------ tests/units/test_prerequisites.py | 72 ++++++++++++++----------------- tests/units/test_telemetry.py | 47 ++++++++++---------- 5 files changed, 106 insertions(+), 103 deletions(-) diff --git a/reflex/utils/frontend_skeleton.py b/reflex/utils/frontend_skeleton.py index b03a9943f56..3564740bf99 100644 --- a/reflex/utils/frontend_skeleton.py +++ b/reflex/utils/frontend_skeleton.py @@ -495,7 +495,6 @@ def init_reflex_json(project_hash: int | None): Args: project_hash: The app hash. """ - reflex_json: dict[str, object] = {"version": constants.Reflex.VERSION} if project_hash is not None: console.debug(f"Project hash is already set to {project_hash}.") else: @@ -503,11 +502,10 @@ def init_reflex_json(project_hash: int | None): # re-encodes it as the canonical UUID string before sending. project_hash = uuid.uuid4().int console.debug(f"Setting project hash to {project_hash}.") - # A brand-new project is UUID-native, so it never reported a legacy - # numeric distinct_id to alias; mark the alias as handled up front. The - # reuse branch leaves the flag untouched (the merging write preserves it). - reflex_json["alias_created"] = True # Write the hash and version to the reflex json file. - reflex_json["project_hash"] = project_hash + reflex_json = { + "version": constants.Reflex.VERSION, + "project_hash": project_hash, + } path_ops.update_json_file(get_web_dir() / constants.Reflex.JSON, reflex_json) diff --git a/reflex/utils/prerequisites.py b/reflex/utils/prerequisites.py index b9a1ec6ec51..a8c84da6aaa 100644 --- a/reflex/utils/prerequisites.py +++ b/reflex/utils/prerequisites.py @@ -511,35 +511,43 @@ def get_project_hash(raise_on_fail: bool = False) -> int | None: return data.get("project_hash") -def get_alias_created() -> bool | None: - """Read the telemetry distinct_id alias flag from the reflex.json file. +_DISTINCT_ID_SEMANTICS_VERSION = "0.9.5" - The flag records whether the one-time PostHog ``$create_alias`` event that - links an installation's legacy numeric ``distinct_id`` to its UUID form has - already been handled (either sent, or skipped for a UUID-native project). + +def _installation_id_semantics_file() -> Path: + """Return the path of the telemetry distinct_id semantics marker file. Returns: - The flag value, or None when reflex.json is missing or unreadable (i.e. - there is nowhere to persist the flag). + The marker path, next to the installation id in the Reflex dir. """ - json_file = get_web_dir() / constants.Reflex.JSON - if not json_file.exists(): - return None - with contextlib.suppress(Exception): - return bool(json.loads(json_file.read_text()).get("alias_created", False)) - return None + return environment.REFLEX_DIR.get() / "installation_id_semantics" -def set_alias_created(): - """Record in reflex.json that the telemetry distinct_id alias was handled. +def has_uuid_distinct_id_semantics() -> bool: + """Return whether this installation uses UUID telemetry distinct_id semantics. - The write merges into the existing file, so ``project_hash``/``version`` are - preserved; likewise an older Reflex version rewriting reflex.json keeps this - flag, so it survives downgrades and upgrades. + The marker is written for brand-new installs (by + ``ensure_reflex_installation_id``) and after a legacy install attempts to + alias its numeric distinct_id to the UUID form, so its absence identifies an + as-yet-unmigrated legacy installation. + + Returns: + True if the per-installation semantics marker file exists. """ - path_ops.update_json_file( - get_web_dir() / constants.Reflex.JSON, {"alias_created": True} - ) + return _installation_id_semantics_file().exists() + + +def mark_uuid_distinct_id_semantics(): + """Record that this installation uses UUID telemetry distinct_id semantics. + + The marker lives next to the installation id in the Reflex dir, so it is + per-machine (like the id itself) rather than per-app. Failures are ignored: + the marker is best-effort and a missing one only triggers a later retry. + """ + with contextlib.suppress(Exception): + marker = _installation_id_semantics_file() + marker.parent.mkdir(parents=True, exist_ok=True) + marker.write_text(_DISTINCT_ID_SEMANTICS_VERSION) def check_running_mode(frontend: bool, backend: bool) -> RunningMode: @@ -639,6 +647,9 @@ def ensure_reflex_installation_id() -> int | None: # re-encodes it as the canonical UUID string before sending. installation_id = uuid.uuid4().int installation_id_file.write_text(str(installation_id)) + # A freshly generated id is UUID-native, so record the new semantics + # up front; there is no legacy numeric id for telemetry to alias. + mark_uuid_distinct_id_semantics() except Exception as e: console.debug(f"Failed to ensure reflex installation id: {e}") return None diff --git a/reflex/utils/telemetry.py b/reflex/utils/telemetry.py index bc874d2a469..8a11b44bec5 100644 --- a/reflex/utils/telemetry.py +++ b/reflex/utils/telemetry.py @@ -26,9 +26,9 @@ from reflex.utils.js_runtimes import get_bun_version, get_node_version from reflex.utils.prerequisites import ( ensure_reflex_installation_id, - get_alias_created, get_project_hash, - set_alias_created, + has_uuid_distinct_id_semantics, + mark_uuid_distinct_id_semantics, ) UTC = timezone.utc @@ -443,10 +443,10 @@ def _maybe_alias_legacy_distinct_id(telemetry_enabled: bool | None) -> None: merged so an installation's history stays on a single person. PostHog does this through a one-time ``$create_alias`` event. - A flag in ``reflex.json`` records that the attempt was made. The flag is set - even when the alias does not match — the legacy id is lossy, so PostHog may - silently drop it — to avoid resending on every run. ``reflex.json`` merges - unknown keys on write, so the flag survives Reflex downgrades and upgrades. + A per-machine marker file (next to the installation id) records that the + install uses the new semantics. The marker is written even when the alias + does not match — the legacy id is lossy, so PostHog may silently drop it — to + avoid resending on every run. Args: telemetry_enabled: Whether telemetry is enabled (resolved from the config @@ -467,22 +467,21 @@ def _maybe_alias_legacy_distinct_id(telemetry_enabled: bool | None) -> None: # recurse and so the attempt happens at most once per process. _legacy_alias_attempted = True - alias_created = get_alias_created() - # None: no reflex.json, so nowhere to persist the flag -> skip. - # True: already handled (or a UUID-native project) -> skip. - if alias_created is None or alias_created: - return - + # Resolve the installation id first: a brand-new install is created and + # marked UUID-native by this call, so the marker check then skips it. if (installation_id := ensure_reflex_installation_id()) is None: return + if has_uuid_distinct_id_semantics(): + return # distinct_id is the UUID form (set by get_event_defaults); send the # legacy integer as ``alias`` so PostHog coerces it to the same float as # the historic events and merges the two persons. send("$create_alias", telemetry_enabled, properties={"alias": installation_id}) - # Record the attempt regardless of outcome; we must not retry every run. - set_alias_created() + # Record the new semantics regardless of outcome; we must not retry every + # run even if the lossy legacy id failed to match. + mark_uuid_distinct_id_semantics() def send( diff --git a/tests/units/test_prerequisites.py b/tests/units/test_prerequisites.py index f3911181557..0079366ce97 100644 --- a/tests/units/test_prerequisites.py +++ b/tests/units/test_prerequisites.py @@ -1,6 +1,7 @@ import json import shutil import tempfile +import uuid from collections.abc import Callable, Generator from dataclasses import dataclass from pathlib import Path @@ -1374,59 +1375,50 @@ def index(): ) -def test_get_alias_created_missing_file(tmp_path, monkeypatch: pytest.MonkeyPatch): - """A missing reflex.json yields None (nowhere to persist the flag).""" - monkeypatch.setattr(prerequisites, "get_web_dir", lambda: tmp_path) - assert prerequisites.get_alias_created() is None +def test_has_uuid_distinct_id_semantics_absent( + tmp_path, monkeypatch: pytest.MonkeyPatch +): + """No marker file means the install has not adopted UUID semantics.""" + monkeypatch.setenv("REFLEX_DIR", str(tmp_path)) + assert prerequisites.has_uuid_distinct_id_semantics() is False -def test_get_alias_created_absent_flag(tmp_path, monkeypatch: pytest.MonkeyPatch): - """An existing reflex.json without the flag yields False.""" - monkeypatch.setattr(prerequisites, "get_web_dir", lambda: tmp_path) - (tmp_path / constants.Reflex.JSON).write_text('{"project_hash": 5}') - assert prerequisites.get_alias_created() is False +def test_mark_uuid_distinct_id_semantics_writes_marker( + tmp_path, monkeypatch: pytest.MonkeyPatch +): + """Marking creates the per-install marker file with the semantics version.""" + monkeypatch.setenv("REFLEX_DIR", str(tmp_path)) + prerequisites.mark_uuid_distinct_id_semantics() -def test_get_alias_created_flag_set(tmp_path, monkeypatch: pytest.MonkeyPatch): - """A set flag yields True.""" - monkeypatch.setattr(prerequisites, "get_web_dir", lambda: tmp_path) - (tmp_path / constants.Reflex.JSON).write_text('{"alias_created": true}') - assert prerequisites.get_alias_created() is True + assert prerequisites.has_uuid_distinct_id_semantics() is True + marker = tmp_path / "installation_id_semantics" + assert marker.read_text() == prerequisites._DISTINCT_ID_SEMANTICS_VERSION -def test_set_alias_created_merges_and_preserves( +def test_ensure_installation_id_marks_new_install( tmp_path, monkeypatch: pytest.MonkeyPatch ): - """Setting the flag preserves existing keys (downgrade/upgrade safety).""" - monkeypatch.setattr(prerequisites, "get_web_dir", lambda: tmp_path) - json_file = tmp_path / constants.Reflex.JSON - json_file.write_text('{"version": "0.1.0", "project_hash": 5}') + """A brand-new installation id is generated and marked UUID-native.""" + monkeypatch.setenv("REFLEX_DIR", str(tmp_path)) + assert prerequisites.has_uuid_distinct_id_semantics() is False - prerequisites.set_alias_created() + install_id = prerequisites.ensure_reflex_installation_id() - data = json.loads(json_file.read_text()) - assert data["alias_created"] is True - assert data["project_hash"] == 5 - assert data["version"] == "0.1.0" + assert install_id is not None + # The id is a uuid4 persisted as its integer form. + assert uuid.UUID(int=install_id).version == 4 + assert prerequisites.has_uuid_distinct_id_semantics() is True -def test_init_reflex_json_presets_alias_for_new_project( +def test_ensure_installation_id_keeps_legacy_install_unmarked( tmp_path, monkeypatch: pytest.MonkeyPatch ): - """A brand-new project is marked alias-handled so no alias event is sent.""" - _patch_web_dir(monkeypatch, tmp_path) - frontend_skeleton.init_reflex_json(project_hash=None) - data = json.loads((tmp_path / constants.Reflex.JSON).read_text()) - assert data["alias_created"] is True - assert "project_hash" in data + """An existing legacy id is read and left unmarked, so telemetry will alias it.""" + monkeypatch.setenv("REFLEX_DIR", str(tmp_path)) + (tmp_path / "installation_id").write_text("12345") + install_id = prerequisites.ensure_reflex_installation_id() -def test_init_reflex_json_keeps_existing_project_unflagged( - tmp_path, monkeypatch: pytest.MonkeyPatch -): - """Reusing an existing hash does not preset the flag (legacy project).""" - _patch_web_dir(monkeypatch, tmp_path) - frontend_skeleton.init_reflex_json(project_hash=12345) - data = json.loads((tmp_path / constants.Reflex.JSON).read_text()) - assert "alias_created" not in data - assert data["project_hash"] == 12345 + assert install_id == 12345 + assert prerequisites.has_uuid_distinct_id_semantics() is False diff --git a/tests/units/test_telemetry.py b/tests/units/test_telemetry.py index f1e4fb1ff10..0c21923b42e 100644 --- a/tests/units/test_telemetry.py +++ b/tests/units/test_telemetry.py @@ -440,11 +440,11 @@ def _reset_alias_guard(): telemetry._legacy_alias_attempted = False -def test_maybe_alias_sends_create_alias_when_unflagged(mocker: MockerFixture): - """An un-flagged reflex.json triggers a $create_alias and sets the flag.""" - mocker.patch.object(telemetry, "get_alias_created", return_value=False) +def test_maybe_alias_sends_create_alias_for_legacy_install(mocker: MockerFixture): + """A legacy install (no semantics marker) aliases and then marks itself.""" mocker.patch.object(telemetry, "ensure_reflex_installation_id", return_value=12345) - set_flag = mocker.patch.object(telemetry, "set_alias_created") + mocker.patch.object(telemetry, "has_uuid_distinct_id_semantics", return_value=False) + mark = mocker.patch.object(telemetry, "mark_uuid_distinct_id_semantics") send_mock = mocker.patch.object(telemetry, "send") telemetry._maybe_alias_legacy_distinct_id(telemetry_enabled=True) @@ -452,49 +452,52 @@ def test_maybe_alias_sends_create_alias_when_unflagged(mocker: MockerFixture): send_mock.assert_called_once_with( "$create_alias", True, properties={"alias": 12345} ) - set_flag.assert_called_once() + mark.assert_called_once() -def test_maybe_alias_skips_when_already_created(mocker: MockerFixture): - """A flag already set means no alias event and no rewrite.""" - mocker.patch.object(telemetry, "get_alias_created", return_value=True) +def test_maybe_alias_skips_for_uuid_native_install(mocker: MockerFixture): + """An install already on UUID semantics sends no alias and is not re-marked.""" + mocker.patch.object(telemetry, "ensure_reflex_installation_id", return_value=12345) + mocker.patch.object(telemetry, "has_uuid_distinct_id_semantics", return_value=True) + mark = mocker.patch.object(telemetry, "mark_uuid_distinct_id_semantics") send_mock = mocker.patch.object(telemetry, "send") - set_flag = mocker.patch.object(telemetry, "set_alias_created") telemetry._maybe_alias_legacy_distinct_id(telemetry_enabled=True) send_mock.assert_not_called() - set_flag.assert_not_called() + mark.assert_not_called() -def test_maybe_alias_skips_without_reflex_json(mocker: MockerFixture): - """No reflex.json (None) means nowhere to persist the flag, so skip.""" - mocker.patch.object(telemetry, "get_alias_created", return_value=None) +def test_maybe_alias_skips_without_installation_id(mocker: MockerFixture): + """No installation id means no alias, no marker, and no semantics check.""" + mocker.patch.object(telemetry, "ensure_reflex_installation_id", return_value=None) + has = mocker.patch.object(telemetry, "has_uuid_distinct_id_semantics") + mark = mocker.patch.object(telemetry, "mark_uuid_distinct_id_semantics") send_mock = mocker.patch.object(telemetry, "send") - set_flag = mocker.patch.object(telemetry, "set_alias_created") telemetry._maybe_alias_legacy_distinct_id(telemetry_enabled=True) send_mock.assert_not_called() - set_flag.assert_not_called() + mark.assert_not_called() + has.assert_not_called() def test_maybe_alias_skips_when_telemetry_disabled(mocker: MockerFixture): - """Disabled telemetry sends nothing and leaves the persistent flag unset.""" - get_alias = mocker.patch.object(telemetry, "get_alias_created") + """Disabled telemetry does no work and leaves the marker unwritten.""" + ensure = mocker.patch.object(telemetry, "ensure_reflex_installation_id") send_mock = mocker.patch.object(telemetry, "send") telemetry._maybe_alias_legacy_distinct_id(telemetry_enabled=False) - get_alias.assert_not_called() + ensure.assert_not_called() send_mock.assert_not_called() def test_maybe_alias_runs_at_most_once_per_process(mocker: MockerFixture): """The guard prevents a second alias attempt within the same process.""" - mocker.patch.object(telemetry, "get_alias_created", return_value=False) mocker.patch.object(telemetry, "ensure_reflex_installation_id", return_value=7) - mocker.patch.object(telemetry, "set_alias_created") + mocker.patch.object(telemetry, "has_uuid_distinct_id_semantics", return_value=False) + mocker.patch.object(telemetry, "mark_uuid_distinct_id_semantics") send_mock = mocker.patch.object(telemetry, "send") telemetry._maybe_alias_legacy_distinct_id(telemetry_enabled=True) @@ -507,8 +510,8 @@ def test_maybe_alias_create_alias_payload( event_defaults, httpx_post, mocker: MockerFixture ): """The posted $create_alias pairs the new UUID distinct_id with the legacy int.""" - mocker.patch.object(telemetry, "get_alias_created", return_value=False) - mocker.patch.object(telemetry, "set_alias_created") + mocker.patch.object(telemetry, "has_uuid_distinct_id_semantics", return_value=False) + mocker.patch.object(telemetry, "mark_uuid_distinct_id_semantics") legacy_id = 78285505863498957834586115958872998605 mocker.patch.object( telemetry, "ensure_reflex_installation_id", return_value=legacy_id From f21c21cffa1dfd26bfcade754c400cc637ab22a1 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 5 Jun 2026 05:25:01 +0000 Subject: [PATCH 4/5] chore: add changelog news fragment for telemetry UUID distinct_id PR https://claude.ai/code/session_0162Wc1GmkskbgCRs7fjg9Cy --- news/6611.bugfix.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 news/6611.bugfix.md diff --git a/news/6611.bugfix.md b/news/6611.bugfix.md new file mode 100644 index 00000000000..b74d18cff6d --- /dev/null +++ b/news/6611.bugfix.md @@ -0,0 +1 @@ +Anonymous telemetry now reports the installation and project identifiers as UUID strings rather than 128-bit integers. PostHog coerced the large integers to floats, discarding all but ~16 significant digits and risking distinct installs or apps being correlated as one. Each identifier is re-encoded to the same value (a UUID carries the same 128 bits), and a one-time PostHog `$create_alias` links an installation's pre-existing history to its new identifier so continuity is preserved. From 097f50686ea7ae231686d1db7e638e0301c0bbc8 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 5 Jun 2026 19:04:22 +0000 Subject: [PATCH 5/5] style: drop stray blank line in test_telemetry imports after main merge The "Update branch" merge of main combined `import importlib.metadata` (#6610) with this branch's `import uuid` but left a blank line splitting the stdlib import group, which ruff's isort rejected in CI pre-commit. Remove it. https://claude.ai/code/session_0162Wc1GmkskbgCRs7fjg9Cy --- tests/units/test_telemetry.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/units/test_telemetry.py b/tests/units/test_telemetry.py index bb8894db790..35780ccbf90 100644 --- a/tests/units/test_telemetry.py +++ b/tests/units/test_telemetry.py @@ -1,6 +1,5 @@ import importlib.metadata import uuid - from types import SimpleNamespace import pytest