Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/6611.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Anonymous telemetry now reports the installation and project identifiers as UUID strings rather than 128-bit integers. PostHog coerced the large integers to floats, discarding all but ~16 significant digits and risking distinct installs or apps being correlated as one. Each identifier is re-encoded to the same value (a UUID carries the same 128 bits), and a one-time PostHog `$create_alias` links an installation's pre-existing history to its new identifier so continuity is preserved.
7 changes: 4 additions & 3 deletions reflex/utils/frontend_skeleton.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""This module provides utility functions to initialize the frontend skeleton."""

import json
import random
import uuid
from pathlib import Path

from reflex_base import constants
Expand Down Expand Up @@ -498,8 +498,9 @@ def init_reflex_json(project_hash: int | None):
if project_hash is not None:
console.debug(f"Project hash is already set to {project_hash}.")
else:
# Get a random project hash.
project_hash = random.getrandbits(128)
# Generate a uuid4 and persist its 128-bit integer form. Telemetry
# re-encodes it as the canonical UUID string before sending.
project_hash = uuid.uuid4().int
console.debug(f"Setting project hash to {project_hash}.")

# Write the hash and version to the reflex json file.
Expand Down
49 changes: 47 additions & 2 deletions reflex/utils/prerequisites.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
import importlib.metadata
import inspect
import json
import random
import re
import sys
import typing
import uuid
from datetime import datetime
from os import getcwd
from pathlib import Path
Expand Down Expand Up @@ -511,6 +511,45 @@ def get_project_hash(raise_on_fail: bool = False) -> int | None:
return data.get("project_hash")


_DISTINCT_ID_SEMANTICS_VERSION = "0.9.5"


def _installation_id_semantics_file() -> Path:
"""Return the path of the telemetry distinct_id semantics marker file.

Returns:
The marker path, next to the installation id in the Reflex dir.
"""
return environment.REFLEX_DIR.get() / "installation_id_semantics"


def has_uuid_distinct_id_semantics() -> bool:
"""Return whether this installation uses UUID telemetry distinct_id semantics.

The marker is written for brand-new installs (by
``ensure_reflex_installation_id``) and after a legacy install attempts to
alias its numeric distinct_id to the UUID form, so its absence identifies an
as-yet-unmigrated legacy installation.

Returns:
True if the per-installation semantics marker file exists.
"""
return _installation_id_semantics_file().exists()


def mark_uuid_distinct_id_semantics():
"""Record that this installation uses UUID telemetry distinct_id semantics.

The marker lives next to the installation id in the Reflex dir, so it is
per-machine (like the id itself) rather than per-app. Failures are ignored:
the marker is best-effort and a missing one only triggers a later retry.
"""
with contextlib.suppress(Exception):
marker = _installation_id_semantics_file()
marker.parent.mkdir(parents=True, exist_ok=True)
marker.write_text(_DISTINCT_ID_SEMANTICS_VERSION)


def check_running_mode(frontend: bool, backend: bool) -> RunningMode:
"""Check if the app is running in frontend or backend mode.

Expand Down Expand Up @@ -603,8 +642,14 @@ def ensure_reflex_installation_id() -> int | None:
# - content not parseable as an int

if installation_id is None:
installation_id = random.getrandbits(128)
# Generate a uuid4 and persist its 128-bit integer form. Storing the
# int keeps the file readable by older Reflex versions; telemetry
# re-encodes it as the canonical UUID string before sending.
installation_id = uuid.uuid4().int
installation_id_file.write_text(str(installation_id))
# A freshly generated id is UUID-native, so record the new semantics
# up front; there is no legacy numeric id for telemetry to alias.
mark_uuid_distinct_id_semantics()
except Exception as e:
console.debug(f"Failed to ensure reflex installation id: {e}")
return None
Expand Down
92 changes: 87 additions & 5 deletions reflex/utils/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import platform
import sys
import uuid
import warnings
from contextlib import suppress
from datetime import datetime, timezone
Expand All @@ -25,7 +26,12 @@

from reflex.utils import console, processes
from reflex.utils.js_runtimes import get_bun_version, get_node_version
from reflex.utils.prerequisites import ensure_reflex_installation_id, get_project_hash
from reflex.utils.prerequisites import (
ensure_reflex_installation_id,
get_project_hash,
has_uuid_distinct_id_semantics,
mark_uuid_distinct_id_semantics,
)

UTC = timezone.utc
POSTHOG_API_URL: str = "https://app.posthog.com/capture/"
Expand Down Expand Up @@ -264,8 +270,8 @@ def _raise_on_missing_project_hash() -> bool:
class _Properties(TypedDict):
"""Properties type for telemetry."""

distinct_id: int
distinct_app_id: NotRequired[int]
distinct_id: str
distinct_app_id: NotRequired[str]
user_os: str
user_os_detail: str
reflex_version: str
Expand All @@ -292,6 +298,29 @@ class _Event(_DefaultEvent):
timestamp: str


def _encode_distinct_id(value: int) -> str:
"""Encode a 128-bit telemetry identifier as a canonical UUID string.

Historically ``distinct_id`` and ``distinct_app_id`` were sent as raw
128-bit integers. PostHog coerces large JSON numbers to floats, silently
discarding all but ~16 significant digits, so distinct installs or apps can
collapse onto the same truncated value and have their events correlated.

A UUID carries the same 128 bits, so the hex string is sent losslessly while
remaining the *same value* as the legacy integer
(``uuid.UUID(int=value).int == value``). Deriving the UUID from the existing
identifier — rather than minting a fresh one — keeps an installation's new
events linkable to its pre-migration history.

Args:
value: The stored 128-bit identifier.

Returns:
The identifier encoded as a UUID hex string.
"""
return str(uuid.UUID(int=value))


def _get_event_defaults() -> _DefaultEvent | None:
"""Get the default event data.

Expand All @@ -303,7 +332,7 @@ def _get_event_defaults() -> _DefaultEvent | None:
return None
cpuinfo = get_cpu_info()
properties: _Properties = {
"distinct_id": installation_id,
"distinct_id": _encode_distinct_id(installation_id),
"user_os": get_os(),
"user_os_detail": get_detailed_platform_str(),
"reflex_version": get_reflex_version(),
Expand All @@ -322,7 +351,7 @@ def _get_event_defaults() -> _DefaultEvent | None:
if (
project_hash := get_project_hash(raise_on_fail=_raise_on_missing_project_hash())
) is not None:
properties["distinct_app_id"] = project_hash
properties["distinct_app_id"] = _encode_distinct_id(project_hash)

return {
"api_key": "phc_JoMo0fOyi0GQAooY3UyO9k0hebGkMyFJrrCw1Gt5SGb",
Expand Down Expand Up @@ -436,6 +465,58 @@ def _send(

background_tasks = set()

_legacy_alias_attempted = False


def _maybe_alias_legacy_distinct_id(telemetry_enabled: bool | None) -> None:
"""Link the legacy numeric distinct_id to its UUID form, once per install.

Older Reflex versions reported ``distinct_id`` as a 128-bit integer, which
PostHog stored as a lossy float. Now that the same value is sent as a UUID
string (see ``_encode_distinct_id``), the two PostHog identities must be
merged so an installation's history stays on a single person. PostHog does
this through a one-time ``$create_alias`` event.

A per-machine marker file (next to the installation id) records that the
install uses the new semantics. The marker is written even when the alias
does not match — the legacy id is lossy, so PostHog may silently drop it — to
avoid resending on every run.

Args:
telemetry_enabled: Whether telemetry is enabled (resolved from the config
when None).
"""
global _legacy_alias_attempted
if _legacy_alias_attempted:
return

with suppress(Exception):
if telemetry_enabled is None:
telemetry_enabled = get_config().telemetry_enabled
if not telemetry_enabled:
# Don't latch: a later enabled send in this process should retry.
return

# Latch before the alias send below (which re-enters send()) so it cannot
# recurse and so the attempt happens at most once per process.
_legacy_alias_attempted = True

# Resolve the installation id first: a brand-new install is created and
# marked UUID-native by this call, so the marker check then skips it.
if (installation_id := ensure_reflex_installation_id()) is None:
return
if has_uuid_distinct_id_semantics():
return

# distinct_id is the UUID form (set by get_event_defaults); send the
# legacy integer as ``alias`` so PostHog coerces it to the same float as
# the historic events and merges the two persons.
send("$create_alias", telemetry_enabled, properties={"alias": installation_id})

# Record the new semantics regardless of outcome; we must not retry every
# run even if the lossy legacy id failed to match.
mark_uuid_distinct_id_semantics()


def send(
event: str,
Expand All @@ -453,6 +534,7 @@ def send(
properties. Preferred over ``kwargs`` for new events.
kwargs: Additional data to send with the event.
"""
_maybe_alias_legacy_distinct_id(telemetry_enabled)

async def async_send( # noqa: RUF029
event: str,
Expand Down
52 changes: 51 additions & 1 deletion tests/units/test_prerequisites.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import shutil
import tempfile
import uuid
from collections.abc import Callable, Generator
from dataclasses import dataclass
from pathlib import Path
Expand All @@ -14,7 +15,7 @@

from reflex.reflex import cli
from reflex.testing import chdir
from reflex.utils import frontend_skeleton, js_runtimes
from reflex.utils import frontend_skeleton, js_runtimes, prerequisites
from reflex.utils.frontend_skeleton import (
_compile_vite_config,
_update_react_router_config,
Expand Down Expand Up @@ -1372,3 +1373,52 @@ def index():
app.add_page(index)
"""
)


def test_has_uuid_distinct_id_semantics_absent(
tmp_path, monkeypatch: pytest.MonkeyPatch
):
"""No marker file means the install has not adopted UUID semantics."""
monkeypatch.setenv("REFLEX_DIR", str(tmp_path))
assert prerequisites.has_uuid_distinct_id_semantics() is False


def test_mark_uuid_distinct_id_semantics_writes_marker(
tmp_path, monkeypatch: pytest.MonkeyPatch
):
"""Marking creates the per-install marker file with the semantics version."""
monkeypatch.setenv("REFLEX_DIR", str(tmp_path))

prerequisites.mark_uuid_distinct_id_semantics()

assert prerequisites.has_uuid_distinct_id_semantics() is True
marker = tmp_path / "installation_id_semantics"
assert marker.read_text() == prerequisites._DISTINCT_ID_SEMANTICS_VERSION


def test_ensure_installation_id_marks_new_install(
tmp_path, monkeypatch: pytest.MonkeyPatch
):
"""A brand-new installation id is generated and marked UUID-native."""
monkeypatch.setenv("REFLEX_DIR", str(tmp_path))
assert prerequisites.has_uuid_distinct_id_semantics() is False

install_id = prerequisites.ensure_reflex_installation_id()

assert install_id is not None
# The id is a uuid4 persisted as its integer form.
assert uuid.UUID(int=install_id).version == 4
assert prerequisites.has_uuid_distinct_id_semantics() is True


def test_ensure_installation_id_keeps_legacy_install_unmarked(
tmp_path, monkeypatch: pytest.MonkeyPatch
):
"""An existing legacy id is read and left unmarked, so telemetry will alias it."""
monkeypatch.setenv("REFLEX_DIR", str(tmp_path))
(tmp_path / "installation_id").write_text("12345")

install_id = prerequisites.ensure_reflex_installation_id()

assert install_id == 12345
assert prerequisites.has_uuid_distinct_id_semantics() is False
Loading
Loading