From 1e4d8052446528f6c245fe658348477fa7b0665a Mon Sep 17 00:00:00 2001 From: noahho Date: Sun, 3 May 2026 20:13:04 +0200 Subject: [PATCH 1/3] telemetry: fail closed when remote config can't be fetched or parsed download_config previously defaulted to {"enabled": True} when the remote config endpoint was unreachable, returned a non-200, or returned an unparseable body. That meant a client behind a restrictive network (firewall, air-gapped environment, transient outage) would silently fall back to enabling telemetry, even when the operator had no way to reach the public configuration to opt out. Flip the default to {"enabled": False} for all three failure modes: - requests.get raises (timeout, DNS, ConnectionError, etc.) - response status != 200 - response body is not valid JSON Behaviour when the endpoint is reachable and the JSON parses is unchanged: telemetry follows the server-side configuration as before. Adds tests covering each failure mode plus the env-var URL override and the happy path. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../telemetry/core/config.py | 19 ++++- tests/telemetry/core/test_config.py | 75 +++++++++++++++++++ 2 files changed, 90 insertions(+), 4 deletions(-) create mode 100644 tests/telemetry/core/test_config.py diff --git a/src/tabpfn_common_utils/telemetry/core/config.py b/src/tabpfn_common_utils/telemetry/core/config.py index a680284..f428b35 100644 --- a/src/tabpfn_common_utils/telemetry/core/config.py +++ b/src/tabpfn_common_utils/telemetry/core/config.py @@ -23,11 +23,19 @@ def download_config() -> Dict[str, Any]: """Download the configuration from server. + If the configuration cannot be fetched or parsed (network error, non-200 + response, malformed JSON) the function returns a fail-closed default of + ``{"enabled": False}`` so that telemetry is not emitted from clients that + cannot reach the public configuration endpoint. + Returns: Dict[str, Any]: The configuration. """ - # The default configuration - default = {"enabled": True} + # Fail-closed default: when we can't reach or parse the remote + # configuration, treat telemetry as disabled. Respects users on restrictive + # networks (firewalls, air-gapped environments) and avoids emitting events + # under any state other than an explicit, server-confirmed enable. + default = {"enabled": False} # This is a public URL anyone can and should read from url = os.environ.get( @@ -41,9 +49,12 @@ def download_config() -> Dict[str, Any]: logger.debug(f"Failed to download telemetry config: {url}") return default - # Disable telemetry by default if resp.status_code != 200: logger.debug(f"Failed to download telemetry config: {resp.status_code}") return default - return resp.json() + try: + return resp.json() + except ValueError: + logger.debug(f"Failed to parse telemetry config JSON from: {url}") + return default diff --git a/tests/telemetry/core/test_config.py b/tests/telemetry/core/test_config.py new file mode 100644 index 0000000..a4b7936 --- /dev/null +++ b/tests/telemetry/core/test_config.py @@ -0,0 +1,75 @@ +"""Tests for the telemetry remote-config download behaviour.""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest +import requests + +from tabpfn_common_utils.telemetry.core import config as config_module + + +@pytest.fixture(autouse=True) +def _clear_ttl_cache(): + """Clear the lru_cache on download_config between tests. + + download_config is wrapped by @ttl_cache (which itself uses functools.lru_cache). + functools.wraps preserves the underlying cached callable as __wrapped__, so we + can clear it directly to ensure each test sees a fresh fetch. + """ + config_module.download_config.__wrapped__.cache_clear() + yield + config_module.download_config.__wrapped__.cache_clear() + + +def _mock_response(status_code: int, json_payload=None, raise_on_json: bool = False): + resp = MagicMock(spec=requests.Response) + resp.status_code = status_code + if raise_on_json: + resp.json.side_effect = ValueError("not json") + else: + resp.json.return_value = json_payload + return resp + + +def test_download_config_returns_payload_on_200(): + payload = {"enabled": True, "sampling_rate": 0.5} + with patch.object(config_module.requests, "get", return_value=_mock_response(200, payload)): + assert config_module.download_config() == payload + + +def test_download_config_fails_closed_on_network_exception(): + """If requests.get raises (timeout, DNS, firewall, etc.), default to disabled.""" + with patch.object(config_module.requests, "get", side_effect=requests.exceptions.ConnectionError("boom")): + assert config_module.download_config() == {"enabled": False} + + +def test_download_config_fails_closed_on_timeout(): + with patch.object(config_module.requests, "get", side_effect=requests.exceptions.Timeout("slow")): + assert config_module.download_config() == {"enabled": False} + + +@pytest.mark.parametrize("status", [403, 404, 500, 503]) +def test_download_config_fails_closed_on_non_200(status: int): + with patch.object(config_module.requests, "get", return_value=_mock_response(status, {"enabled": True})): + assert config_module.download_config() == {"enabled": False} + + +def test_download_config_fails_closed_on_invalid_json(): + """If the body isn't parseable JSON, default to disabled.""" + with patch.object( + config_module.requests, "get", return_value=_mock_response(200, raise_on_json=True) + ): + assert config_module.download_config() == {"enabled": False} + + +def test_download_config_uses_env_override(monkeypatch): + """Respects TABPFN_TELEMETRY_CONFIG_URL when set.""" + monkeypatch.setenv("TABPFN_TELEMETRY_CONFIG_URL", "https://example.test/cfg.json") + with patch.object( + config_module.requests, "get", return_value=_mock_response(200, {"enabled": True}) + ) as mock_get: + config_module.download_config() + called_url = mock_get.call_args.args[0] if mock_get.call_args.args else mock_get.call_args.kwargs.get("url") + assert called_url == "https://example.test/cfg.json" From 8fd68b4a6d4db40f166b1baf60f3b556703153f3 Mon Sep 17 00:00:00 2001 From: noahho Date: Sun, 3 May 2026 20:17:12 +0200 Subject: [PATCH 2/3] test: silence pyright on __wrapped__ access The @ttl_cache wrapper exposes __wrapped__ at runtime via functools.wraps, but the type stubs for FunctionType don't include it. Add narrow type-ignore comments on the two test-fixture lines that need it. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/telemetry/core/test_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/telemetry/core/test_config.py b/tests/telemetry/core/test_config.py index a4b7936..0b56a3e 100644 --- a/tests/telemetry/core/test_config.py +++ b/tests/telemetry/core/test_config.py @@ -18,9 +18,9 @@ def _clear_ttl_cache(): functools.wraps preserves the underlying cached callable as __wrapped__, so we can clear it directly to ensure each test sees a fresh fetch. """ - config_module.download_config.__wrapped__.cache_clear() + config_module.download_config.__wrapped__.cache_clear() # type: ignore[attr-defined] yield - config_module.download_config.__wrapped__.cache_clear() + config_module.download_config.__wrapped__.cache_clear() # type: ignore[attr-defined] def _mock_response(status_code: int, json_payload=None, raise_on_json: bool = False): From 19c9facccf0f9471638ec0f421c6f47c2a8acaf9 Mon Sep 17 00:00:00 2001 From: noahho Date: Sun, 3 May 2026 21:13:16 +0200 Subject: [PATCH 3/3] telemetry: validate config shape, fail closed on unexpected JSON resp.json() can return any JSON value (list, string, number, null) and even a dict response may not contain the "enabled" key. With the fail-closed pattern in place, a malformed-shape response should fall back to disabled rather than letting the downstream ProductTelemetry.telemetry_enabled call raise TypeError or KeyError on the host process. Validate that the parsed JSON is a dict containing "enabled" before returning it; otherwise return the fail-closed default. Adds tests covering list, string, int, null, dict-without-enabled, and empty-dict shapes. Addresses the high-severity review comment on PR #72 from gemini-code-assist. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../telemetry/core/config.py | 13 ++++++++++++- tests/telemetry/core/test_config.py | 19 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/tabpfn_common_utils/telemetry/core/config.py b/src/tabpfn_common_utils/telemetry/core/config.py index f428b35..08fc6b4 100644 --- a/src/tabpfn_common_utils/telemetry/core/config.py +++ b/src/tabpfn_common_utils/telemetry/core/config.py @@ -54,7 +54,18 @@ def download_config() -> Dict[str, Any]: return default try: - return resp.json() + config = resp.json() except ValueError: logger.debug(f"Failed to parse telemetry config JSON from: {url}") return default + + # Validate the shape: anything other than a dict containing "enabled" would + # cause a TypeError/KeyError in downstream `config["enabled"]` access. Fail + # closed so a malformed remote response cannot crash the host process. + if not isinstance(config, dict) or "enabled" not in config: + logger.debug( + f"Telemetry config from {url} is malformed or missing 'enabled' key" + ) + return default + + return config diff --git a/tests/telemetry/core/test_config.py b/tests/telemetry/core/test_config.py index 0b56a3e..0f463e5 100644 --- a/tests/telemetry/core/test_config.py +++ b/tests/telemetry/core/test_config.py @@ -64,6 +64,25 @@ def test_download_config_fails_closed_on_invalid_json(): assert config_module.download_config() == {"enabled": False} +@pytest.mark.parametrize( + "payload", + [ + [1, 2, 3], # list + "enabled", # string + 42, # int + None, # null + {"foo": "bar"}, # dict missing "enabled" + {}, # empty dict + ], +) +def test_download_config_fails_closed_on_malformed_shape(payload): + """If the JSON parses but isn't a dict with an 'enabled' key, default to disabled.""" + with patch.object( + config_module.requests, "get", return_value=_mock_response(200, payload) + ): + assert config_module.download_config() == {"enabled": False} + + def test_download_config_uses_env_override(monkeypatch): """Respects TABPFN_TELEMETRY_CONFIG_URL when set.""" monkeypatch.setenv("TABPFN_TELEMETRY_CONFIG_URL", "https://example.test/cfg.json")