From 6dc6b0713e0dd5653bd64bad539f6c6ab0523942 Mon Sep 17 00:00:00 2001 From: victor wang Date: Thu, 21 May 2026 10:20:05 -0700 Subject: [PATCH 1/3] fix(otlp-aws-exporter): avoid RecursionError with pip_system_certs on Python 3.12 When pip_system_certs is installed it injects truststore.SSLContext as the process-wide ssl.SSLContext via a .pth file. The injection runs in the finally block of a site.execsitecustomize wrapper - i.e. after sitecustomize.py returns. OpenTelemetry's auto-instrumentation runs from sitecustomize, which loads the ADOT distro and transitively imports requests (via the upstream OTLP HTTP exporters) and botocore. Both of those modules capture a reference to ssl.SSLContext at import time, before pip_system_certs's injection runs. On Python 3.12, ssl.SSLContext.options.__set__ resolves SSLContext from ssl module globals at call time. After pip_system_certs runs, that name resolves to truststore.SSLContext, and the super() chain in the options setter bounces between the original ssl.SSLContext and truststore.SSLContext until the recursion limit (~978 frames) is exceeded. The OTLP AWS exporter amplifies the issue by signing every export with SigV4, which constructs a urllib3 SSL context every 5 seconds. Fix: - Add patches/_pip_system_certs_patches.py with a one-shot helper that rebinds botocore.httpsession.SSLContext and urllib3.util.ssl_.SSLContext to the current ssl.SSLContext (truststore's wrapper). truststore's own SSLContext.options setter does not use the recursive super() pattern, so subsequent SSL context creations succeed. - Apply the patch on the first AwsAuthSession.request() call, which is the earliest point at which pip_system_certs's deferred injection is guaranteed to have run. - Cache the resolved AWS credentials on the same first request so the credential resolver chain (which also constructs an SSL context) is exercised once per exporter rather than once per export. RefreshableCredentials handles rotation internally on attribute access, so caching the reference is safe. Tests: - New test_pip_system_certs_patches.py covers the no-op, rebind, idempotent, and one-shot guard paths. - test_aws_auth_session.py adds assertions that get_credentials runs once across multiple requests and that the patch helper is invoked once. --- CHANGELOG.md | 1 + .../otlp/aws/common/aws_auth_session.py | 57 ++++++++++- .../patches/_pip_system_certs_patches.py | 98 ++++++++++++++++++ .../otlp/aws/common/test_aws_auth_session.py | 38 +++++++ .../patches/test_pip_system_certs_patches.py | 99 +++++++++++++++++++ 5 files changed, 289 insertions(+), 4 deletions(-) create mode 100644 aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_pip_system_certs_patches.py create mode 100644 aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/patches/test_pip_system_certs_patches.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 438822dcc..8cd09d34d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ If your change does not need a CHANGELOG entry, add the "skip changelog" label t ## Unreleased +- fix(otlp-aws-exporter): avoid `RecursionError` when `pip_system_certs` replaces `ssl.SSLContext` on Python 3.12 by rebinding stale `botocore`/`urllib3` SSL context references and caching credentials in `AwsAuthSession` - fix(genai-instrumentors): cleanup code, align with OTel GenAI semconv, add missing attributes and fix deprecated usage ([#706](https://github.com/aws-observability/aws-otel-python-instrumentation/pull/706)) - feat(genai-instrumentation): add oldest/latest dependency testing and scheduled instrumentation tests for GenAI libraries diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py index 564bfe9e2..5223ac9cf 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py @@ -2,12 +2,17 @@ # SPDX-License-Identifier: Apache-2.0 import logging +from threading import Lock import requests from botocore.auth import SigV4Auth from botocore.awsrequest import AWSRequest from botocore.session import Session +from amazon.opentelemetry.distro.patches._pip_system_certs_patches import ( + apply_pip_system_certs_compatibility_patch, +) + _logger = logging.getLogger(__name__) @@ -39,13 +44,57 @@ def __init__(self, aws_region: str, service: str, session: Session): self._service: str = service self._session: Session = session + # Cached credentials are resolved on the first ``request()`` call. The returned + # ``Credentials`` / ``RefreshableCredentials`` object handles its own expiry and + # rotation when its attributes are accessed, so caching the reference does not + # cache the underlying credential values. + self._credentials = None + self._credentials_resolved = False + self._credentials_lock = Lock() + super().__init__() + def _ensure_initialized(self) -> None: + """Apply one-time, deferred initialization on the first ``request()`` call. + + This runs after sitecustomize has fully completed (i.e., after any ``.pth`` + based ``ssl.SSLContext`` injection from packages such as ``pip_system_certs``), + which is the only point at which we can safely re-align stale ``SSLContext`` + references captured by ``botocore`` / ``urllib3`` during ADOT startup. + + Credentials are also resolved once here. ``RefreshableCredentials`` handles + rotation internally on attribute access, so caching the reference is safe. + """ + if self._credentials_resolved: + return + + with self._credentials_lock: + if self._credentials_resolved: + return + + # Realign stale ssl.SSLContext references in botocore / urllib3 before + # the first credential resolution constructs an SSL context. This is a + # no-op when pip_system_certs is not installed. + try: + apply_pip_system_certs_compatibility_patch() + except Exception as patch_error: # pylint: disable=broad-except + _logger.warning( + "Failed to apply pip_system_certs compatibility patch: %s", patch_error + ) + + try: + self._credentials = self._session.get_credentials() + except Exception as cred_error: # pylint: disable=broad-except + _logger.error("Failed to load AWS Credentials: %s", cred_error) + self._credentials = None + + self._credentials_resolved = True + def request(self, method, url, *args, data=None, headers=None, **kwargs): - credentials = self._session.get_credentials() + self._ensure_initialized() - if credentials: - signer = SigV4Auth(credentials, self._service, self._aws_region) + if self._credentials: + signer = SigV4Auth(self._credentials, self._service, self._aws_region) request = AWSRequest( method="POST", url=url, @@ -64,6 +113,6 @@ def request(self, method, url, *args, data=None, headers=None, **kwargs): except Exception as signing_error: # pylint: disable=broad-except _logger.error("Failed to sign request: %s", signing_error) else: - _logger.error("Failed to load AWS Credentials: %s") + _logger.error("Failed to load AWS Credentials") return super().request(method=method, url=url, *args, data=data, headers=headers, **kwargs) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_pip_system_certs_patches.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_pip_system_certs_patches.py new file mode 100644 index 000000000..955c5ff4c --- /dev/null +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_pip_system_certs_patches.py @@ -0,0 +1,98 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +from importlib.metadata import PackageNotFoundError, version +from logging import Logger, getLogger + +_logger: Logger = getLogger(__name__) + +# Module-level guard so the patch is applied at most once per process. +_patch_applied = False + + +def _is_pip_system_certs_installed() -> bool: + """Is the pip_system_certs package installed?""" + try: + dist_version = version("pip_system_certs") + _logger.debug("pip_system_certs is installed: %s", dist_version) + return True + except PackageNotFoundError as exc: + _logger.debug("pip_system_certs is not installed. %s", exc) + return False + + +def apply_pip_system_certs_compatibility_patch() -> None: + """Re-bind stale ``ssl.SSLContext`` references in botocore/urllib3. + + When ``pip_system_certs`` is installed, it injects ``truststore.SSLContext`` as the + process-wide ``ssl.SSLContext`` via a ``.pth`` file. The injection runs in the + ``finally`` block of a ``site.execsitecustomize`` wrapper, i.e. *after* + ``sitecustomize.py`` returns. + + OpenTelemetry's auto-instrumentation entry point (``opentelemetry-instrument``) + runs from ``sitecustomize.py``, which loads the ADOT distro and transitively imports + ``requests`` (via the upstream OTLP HTTP exporters) and ``botocore``. Both of those + modules capture a reference to ``ssl.SSLContext`` at import time. Because the import + happens before ``pip_system_certs``'s injection runs, the captured reference is the + original C-level ``ssl.SSLContext``, not the truststore-wrapped class. + + On Python 3.12, ``ssl.SSLContext.options.__set__`` is implemented as + ``super(SSLContext, SSLContext).options.__set__(self, value)`` where ``SSLContext`` + is resolved from ``ssl``'s module globals at call time. After ``pip_system_certs`` + runs, that name resolves to ``truststore.SSLContext``, and the ``super()`` chain + bounces between the original and truststore classes until the recursion limit + (~978 frames) is exceeded. + + This patch re-binds ``botocore.httpsession.SSLContext`` and + ``urllib3.util.ssl_.SSLContext`` to the *current* ``ssl.SSLContext`` + (i.e., truststore's wrapper). truststore's own ``SSLContext.options`` setter does + not use the recursive ``super()`` pattern, so subsequent SSL context creations + succeed. + + The patch is idempotent: a module-level guard ensures it only runs once per + process. It is a no-op when ``pip_system_certs`` is not installed or when the + references already match ``ssl.SSLContext``. + """ + global _patch_applied # pylint: disable=global-statement + if _patch_applied: + return + + # Only apply the patch when pip_system_certs is installed in user application space. + if not _is_pip_system_certs_installed(): + _patch_applied = True + return + + # pylint: disable=import-outside-toplevel + import ssl + + try: + # pylint: disable=import-outside-toplevel + import botocore.httpsession + + if botocore.httpsession.SSLContext is not ssl.SSLContext: + _logger.debug( + "Rebinding botocore.httpsession.SSLContext to current ssl.SSLContext (pip_system_certs detected)." + ) + botocore.httpsession.SSLContext = ssl.SSLContext + except ImportError: + # botocore not installed; nothing to rebind on the botocore side. + pass + except Exception as exc: # pylint: disable=broad-except + _logger.warning("Failed to rebind botocore.httpsession.SSLContext: %s", exc) + + try: + # pylint: disable=import-outside-toplevel + import urllib3.util.ssl_ + + if urllib3.util.ssl_.SSLContext is not ssl.SSLContext: + _logger.debug( + "Rebinding urllib3.util.ssl_.SSLContext to current ssl.SSLContext (pip_system_certs detected)." + ) + urllib3.util.ssl_.SSLContext = ssl.SSLContext + except ImportError: + # urllib3 not installed; nothing to rebind. + pass + except Exception as exc: # pylint: disable=broad-except + _logger.warning("Failed to rebind urllib3.util.ssl_.SSLContext: %s", exc) + + _patch_applied = True diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py index 11babbb7b..2ec3f398e 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py @@ -47,3 +47,41 @@ def test_aws_auth_session(self, _, __): self.assertIn(AUTHORIZATION_HEADER, actual_headers) self.assertIn(X_AMZ_DATE_HEADER, actual_headers) self.assertIn(X_AMZ_SECURITY_TOKEN_HEADER, actual_headers) + + @patch("requests.Session.request", return_value=requests.Response()) + @patch("botocore.session.Session.get_credentials", return_value=mock_credentials) + def test_credentials_are_resolved_once(self, mock_get_credentials, _): + """Credentials must be resolved only once across multiple ``request()`` calls. + + This is the hot-path mitigation for the pip_system_certs RecursionError: each + ``get_credentials()`` call walks the credential resolver chain, which constructs + a urllib3 SSL context. Caching the returned object (``RefreshableCredentials`` + rotates internally on attribute access) ensures the SSL context is created at + most once per exporter, not once per export. + """ + session = AwsAuthSession("us-east-1", "xray", get_aws_session()) + + for _ in range(5): + session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers={}) + + self.assertEqual(mock_get_credentials.call_count, 1) + + @patch("requests.Session.request", return_value=requests.Response()) + @patch("botocore.session.Session.get_credentials", return_value=mock_credentials) + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session" + ".apply_pip_system_certs_compatibility_patch" + ) + def test_pip_system_certs_patch_invoked_on_first_request(self, mock_apply_patch, _, __): + """The ssl.SSLContext rebind helper is invoked on the first ``request()`` call + and not re-invoked on subsequent calls. + + The patch itself is a no-op when pip_system_certs is not installed, so this + test only asserts the call site, not the patch behavior.""" + session = AwsAuthSession("us-east-1", "xray", get_aws_session()) + + session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers={}) + session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers={}) + session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers={}) + + self.assertEqual(mock_apply_patch.call_count, 1) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/patches/test_pip_system_certs_patches.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/patches/test_pip_system_certs_patches.py new file mode 100644 index 000000000..43b8caf27 --- /dev/null +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/patches/test_pip_system_certs_patches.py @@ -0,0 +1,99 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +from importlib.metadata import PackageNotFoundError +from unittest import TestCase +from unittest.mock import patch + +from amazon.opentelemetry.distro.patches import _pip_system_certs_patches +from amazon.opentelemetry.distro.patches._pip_system_certs_patches import ( + apply_pip_system_certs_compatibility_patch, +) + + +class TestPipSystemCertsPatches(TestCase): + def setUp(self) -> None: + # Reset the module-level guard before every test so each test exercises the + # full code path. + _pip_system_certs_patches._patch_applied = False + + def tearDown(self) -> None: + # Leave the guard in a clean state for tests that follow. + _pip_system_certs_patches._patch_applied = False + + @patch("amazon.opentelemetry.distro.patches._pip_system_certs_patches.version") + def test_no_op_when_pip_system_certs_not_installed(self, mock_version): + """When pip_system_certs is not installed, the patch is a no-op and does not + touch botocore/urllib3 module globals.""" + mock_version.side_effect = PackageNotFoundError("pip_system_certs") + + # pylint: disable=import-outside-toplevel + import botocore.httpsession + import urllib3.util.ssl_ + + sentinel_class = type("SentinelSSLContext", (), {}) + + with patch.object(botocore.httpsession, "SSLContext", sentinel_class): + with patch.object(urllib3.util.ssl_, "SSLContext", sentinel_class): + apply_pip_system_certs_compatibility_patch() + + # References must remain untouched when pip_system_certs is not present. + self.assertIs(botocore.httpsession.SSLContext, sentinel_class) + self.assertIs(urllib3.util.ssl_.SSLContext, sentinel_class) + + self.assertTrue(_pip_system_certs_patches._patch_applied) + + @patch("amazon.opentelemetry.distro.patches._pip_system_certs_patches.version") + def test_rebinds_stale_references_when_installed(self, mock_version): + """When pip_system_certs is installed and botocore/urllib3 hold stale + ``ssl.SSLContext`` references, the patch rebinds them to the current + ``ssl.SSLContext``.""" + mock_version.return_value = "5.3" + + # pylint: disable=import-outside-toplevel + import ssl + import botocore.httpsession + import urllib3.util.ssl_ + + # Simulate the post-injection state: ssl.SSLContext has been replaced with + # truststore's wrapper, but botocore/urllib3 still hold the original. + original_ssl_context = ssl.SSLContext + truststore_like = type("TruststoreSSLContext", (), {}) + + with patch.object(botocore.httpsession, "SSLContext", original_ssl_context): + with patch.object(urllib3.util.ssl_, "SSLContext", original_ssl_context): + with patch.object(ssl, "SSLContext", truststore_like): + apply_pip_system_certs_compatibility_patch() + + self.assertIs(botocore.httpsession.SSLContext, truststore_like) + self.assertIs(urllib3.util.ssl_.SSLContext, truststore_like) + + @patch("amazon.opentelemetry.distro.patches._pip_system_certs_patches.version") + def test_no_op_when_references_already_match(self, mock_version): + """When references already match the current ``ssl.SSLContext``, the patch + leaves them untouched (idempotent).""" + mock_version.return_value = "5.3" + + # pylint: disable=import-outside-toplevel + import ssl + import botocore.httpsession + import urllib3.util.ssl_ + + current = ssl.SSLContext + + with patch.object(botocore.httpsession, "SSLContext", current): + with patch.object(urllib3.util.ssl_, "SSLContext", current): + apply_pip_system_certs_compatibility_patch() + + self.assertIs(botocore.httpsession.SSLContext, current) + self.assertIs(urllib3.util.ssl_.SSLContext, current) + + @patch("amazon.opentelemetry.distro.patches._pip_system_certs_patches.version") + def test_runs_only_once(self, mock_version): + """The patch is guarded so the package detection only runs on the first call.""" + mock_version.side_effect = PackageNotFoundError("pip_system_certs") + + apply_pip_system_certs_compatibility_patch() + apply_pip_system_certs_compatibility_patch() + apply_pip_system_certs_compatibility_patch() + + self.assertEqual(mock_version.call_count, 1) From 492e834bb14d12bdbe5dcb4484547d04eb2add04 Mon Sep 17 00:00:00 2001 From: victor wang Date: Tue, 2 Jun 2026 15:59:11 -0700 Subject: [PATCH 2/3] fix: fix the CR build failure --- .../otlp/aws/common/aws_auth_session.py | 19 +++- .../patches/_pip_system_certs_patches.py | 12 +-- .../otlp/aws/common/test_aws_auth_session.py | 89 +++++++++++++++++++ .../patches/test_pip_system_certs_patches.py | 46 ++++++++++ 4 files changed, 158 insertions(+), 8 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py index 5223ac9cf..23186e445 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py @@ -64,6 +64,17 @@ def _ensure_initialized(self) -> None: Credentials are also resolved once here. ``RefreshableCredentials`` handles rotation internally on attribute access, so caching the reference is safe. + + On a transient credential resolution failure (e.g., IMDS timeout), the + ``_credentials_resolved`` flag is left ``False`` so the next ``request()`` call + will retry. Only a successful resolution latches the flag, matching the + original "retry every request" behavior for the failure path while keeping + the SSL-context-construction cost amortized to once on the success path. + + Note: the read of ``_credentials_resolved`` outside the lock is safe because + Python's GIL makes attribute reads/writes atomic. On free-threaded Python + builds (3.13t+) this would need a memory barrier; revisit if/when we + support those. """ if self._credentials_resolved: return @@ -78,15 +89,17 @@ def _ensure_initialized(self) -> None: try: apply_pip_system_certs_compatibility_patch() except Exception as patch_error: # pylint: disable=broad-except - _logger.warning( - "Failed to apply pip_system_certs compatibility patch: %s", patch_error - ) + _logger.warning("Failed to apply pip_system_certs compatibility patch: %s", patch_error) try: self._credentials = self._session.get_credentials() except Exception as cred_error: # pylint: disable=broad-except + # Don't latch _credentials_resolved on failure - leave it False so + # the next request retries credential resolution. This preserves + # self-healing behavior on transient errors (e.g., IMDS timeouts). _logger.error("Failed to load AWS Credentials: %s", cred_error) self._credentials = None + return self._credentials_resolved = True diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_pip_system_certs_patches.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_pip_system_certs_patches.py index 955c5ff4c..eaad4fde2 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_pip_system_certs_patches.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_pip_system_certs_patches.py @@ -7,6 +7,10 @@ _logger: Logger = getLogger(__name__) # Module-level guard so the patch is applied at most once per process. +# The plain bool is intentional: the patch body itself is idempotent +# (re-running it produces the same final state), so a benign race between two +# threads where both observe ``_patch_applied is False`` and both run the rebind +# costs an extra dict assignment and nothing more. We don't pay for a lock here. _patch_applied = False @@ -51,7 +55,9 @@ def apply_pip_system_certs_compatibility_patch() -> None: The patch is idempotent: a module-level guard ensures it only runs once per process. It is a no-op when ``pip_system_certs`` is not installed or when the - references already match ``ssl.SSLContext``. + references already match ``ssl.SSLContext``. ``ImportError`` is the only + expected failure (e.g., ``botocore`` or ``urllib3`` not installed in some + minimal environment) and is silently skipped per library. """ global _patch_applied # pylint: disable=global-statement if _patch_applied: @@ -77,8 +83,6 @@ def apply_pip_system_certs_compatibility_patch() -> None: except ImportError: # botocore not installed; nothing to rebind on the botocore side. pass - except Exception as exc: # pylint: disable=broad-except - _logger.warning("Failed to rebind botocore.httpsession.SSLContext: %s", exc) try: # pylint: disable=import-outside-toplevel @@ -92,7 +96,5 @@ def apply_pip_system_certs_compatibility_patch() -> None: except ImportError: # urllib3 not installed; nothing to rebind. pass - except Exception as exc: # pylint: disable=broad-except - _logger.warning("Failed to rebind urllib3.util.ssl_.SSLContext: %s", exc) _patch_applied = True diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py index 2ec3f398e..cbc84908f 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/exporter/otlp/aws/common/test_aws_auth_session.py @@ -66,6 +66,39 @@ def test_credentials_are_resolved_once(self, mock_get_credentials, _): self.assertEqual(mock_get_credentials.call_count, 1) + @patch("requests.Session.request", return_value=requests.Response()) + def test_credentials_retry_after_transient_failure(self, _): + """A transient ``get_credentials()`` failure must NOT latch the resolved + flag. The next ``request()`` call must retry resolution. This preserves + self-healing behavior on transient errors (e.g., IMDS timeouts) and matches + the pre-fix behavior on the failure path. + """ + # First call raises, subsequent calls succeed. + get_credentials_mock = patch( + "botocore.session.Session.get_credentials", + side_effect=[RuntimeError("transient"), mock_credentials, mock_credentials], + ) + with get_credentials_mock as mock_get_credentials: + session = AwsAuthSession("us-east-1", "xray", get_aws_session()) + + # 1st request: get_credentials raises, no auth headers added. + headers_first = {} + session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=headers_first) + self.assertNotIn(AUTHORIZATION_HEADER, headers_first) + + # 2nd request: get_credentials succeeds, auth headers must appear. + headers_second = {} + session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=headers_second) + self.assertIn(AUTHORIZATION_HEADER, headers_second) + + # 3rd request: cached credentials reused, no further get_credentials calls. + headers_third = {} + session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=headers_third) + self.assertIn(AUTHORIZATION_HEADER, headers_third) + + # Two resolution attempts: one failed, one succeeded; third request reuses cache. + self.assertEqual(mock_get_credentials.call_count, 2) + @patch("requests.Session.request", return_value=requests.Response()) @patch("botocore.session.Session.get_credentials", return_value=mock_credentials) @patch( @@ -85,3 +118,59 @@ def test_pip_system_certs_patch_invoked_on_first_request(self, mock_apply_patch, session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers={}) self.assertEqual(mock_apply_patch.call_count, 1) + + @patch("requests.Session.request", return_value=requests.Response()) + @patch( + "amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session" + ".apply_pip_system_certs_compatibility_patch", + side_effect=RuntimeError("simulated patch failure"), + ) + @patch("botocore.session.Session.get_credentials", return_value=mock_credentials) + def test_patch_failure_does_not_break_request(self, _, __, ___): + """If the SSL-context-rebind helper itself raises, the failure is logged + but ``request()`` still proceeds and signs successfully. The patch is + defensive infrastructure, not a hard precondition.""" + session = AwsAuthSession("us-east-1", "xray", get_aws_session()) + actual_headers: dict = {} + + session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) + + self.assertIn(AUTHORIZATION_HEADER, actual_headers) + + @patch("requests.Session.request", return_value=requests.Response()) + @patch("botocore.session.Session.get_credentials", return_value=mock_credentials) + def test_signing_failure_does_not_break_request(self, _, __): + """If SigV4 signing itself raises, ``request()`` still issues the + unauthenticated request rather than crashing the caller.""" + session = AwsAuthSession("us-east-1", "xray", get_aws_session()) + + with patch("amazon.opentelemetry.distro.exporter.otlp.aws.common.aws_auth_session.SigV4Auth") as mock_sigv4: + mock_sigv4.return_value.add_auth.side_effect = RuntimeError("signing boom") + actual_headers: dict = {} + # Should not raise + session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers=actual_headers) + + # No auth header because signing raised before headers could be merged. + self.assertNotIn(AUTHORIZATION_HEADER, actual_headers) + + @patch("requests.Session.request", return_value=requests.Response()) + @patch("botocore.session.Session.get_credentials", return_value=mock_credentials) + def test_concurrent_requests_resolve_credentials_once(self, mock_get_credentials, _): + """Two threads racing on the first request must both observe a single + credential resolution. The double-checked locking in ``_ensure_initialized`` + is what provides this guarantee.""" + # pylint: disable=import-outside-toplevel + from threading import Thread + + session = AwsAuthSession("us-east-1", "xray", get_aws_session()) + + def call(): + session.request("POST", AWS_OTLP_TRACES_ENDPOINT, data="", headers={}) + + threads = [Thread(target=call) for _ in range(8)] + for t in threads: + t.start() + for t in threads: + t.join() + + self.assertEqual(mock_get_credentials.call_count, 1) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/patches/test_pip_system_certs_patches.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/patches/test_pip_system_certs_patches.py index 43b8caf27..e10bdc332 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/patches/test_pip_system_certs_patches.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/patches/test_pip_system_certs_patches.py @@ -97,3 +97,49 @@ def test_runs_only_once(self, mock_version): apply_pip_system_certs_compatibility_patch() self.assertEqual(mock_version.call_count, 1) + + @patch("amazon.opentelemetry.distro.patches._pip_system_certs_patches.version") + def test_botocore_import_failure_does_not_crash(self, mock_version): + """If botocore.httpsession is absent the patch silently skips it and + still processes urllib3.""" + mock_version.return_value = "5.3" + + # pylint: disable=import-outside-toplevel + import sys + import urllib3.util.ssl_ + + saved = sys.modules.get("botocore.httpsession") + # Setting to None forces Python to raise ImportError on `import botocore.httpsession`. + sys.modules["botocore.httpsession"] = None + try: + apply_pip_system_certs_compatibility_patch() + finally: + if saved is None: + sys.modules.pop("botocore.httpsession", None) + else: + sys.modules["botocore.httpsession"] = saved + + # Patch should still mark itself as applied even when one library is missing. + self.assertTrue(_pip_system_certs_patches._patch_applied) + # urllib3 path should still have been considered. + self.assertTrue(hasattr(urllib3.util.ssl_, "SSLContext")) + + @patch("amazon.opentelemetry.distro.patches._pip_system_certs_patches.version") + def test_urllib3_import_failure_does_not_crash(self, mock_version): + """If urllib3.util.ssl_ is absent the patch silently skips it.""" + mock_version.return_value = "5.3" + + # pylint: disable=import-outside-toplevel + import sys + + saved = sys.modules.get("urllib3.util.ssl_") + sys.modules["urllib3.util.ssl_"] = None + try: + apply_pip_system_certs_compatibility_patch() + finally: + if saved is None: + sys.modules.pop("urllib3.util.ssl_", None) + else: + sys.modules["urllib3.util.ssl_"] = saved + + self.assertTrue(_pip_system_certs_patches._patch_applied) From cccf8de09587a2468afed0baf074b43bb4d73347 Mon Sep 17 00:00:00 2001 From: victor wang Date: Wed, 3 Jun 2026 12:33:42 -0700 Subject: [PATCH 3/3] fix: lint fix isort import ordering --- .../distro/exporter/otlp/aws/common/aws_auth_session.py | 4 +--- .../distro/patches/test_pip_system_certs_patches.py | 7 ++++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py index 23186e445..336d09ea2 100644 --- a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py +++ b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/exporter/otlp/aws/common/aws_auth_session.py @@ -9,9 +9,7 @@ from botocore.awsrequest import AWSRequest from botocore.session import Session -from amazon.opentelemetry.distro.patches._pip_system_certs_patches import ( - apply_pip_system_certs_compatibility_patch, -) +from amazon.opentelemetry.distro.patches._pip_system_certs_patches import apply_pip_system_certs_compatibility_patch _logger = logging.getLogger(__name__) diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/patches/test_pip_system_certs_patches.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/patches/test_pip_system_certs_patches.py index e10bdc332..38e405ae2 100644 --- a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/patches/test_pip_system_certs_patches.py +++ b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/patches/test_pip_system_certs_patches.py @@ -5,9 +5,7 @@ from unittest.mock import patch from amazon.opentelemetry.distro.patches import _pip_system_certs_patches -from amazon.opentelemetry.distro.patches._pip_system_certs_patches import ( - apply_pip_system_certs_compatibility_patch, -) +from amazon.opentelemetry.distro.patches._pip_system_certs_patches import apply_pip_system_certs_compatibility_patch class TestPipSystemCertsPatches(TestCase): @@ -51,6 +49,7 @@ def test_rebinds_stale_references_when_installed(self, mock_version): # pylint: disable=import-outside-toplevel import ssl + import botocore.httpsession import urllib3.util.ssl_ @@ -75,6 +74,7 @@ def test_no_op_when_references_already_match(self, mock_version): # pylint: disable=import-outside-toplevel import ssl + import botocore.httpsession import urllib3.util.ssl_ @@ -106,6 +106,7 @@ def test_botocore_import_failure_does_not_crash(self, mock_version): # pylint: disable=import-outside-toplevel import sys + import urllib3.util.ssl_ saved = sys.modules.get("botocore.httpsession")