diff --git a/.changelog/5266.added b/.changelog/5266.added new file mode 100644 index 00000000000..66f88e9f5d0 --- /dev/null +++ b/.changelog/5266.added @@ -0,0 +1 @@ +`opentelemetry-api`, `opentelemetry-sdk`: add support for extended attribute values everywhere. \ No newline at end of file diff --git a/.changelog/5266.changed b/.changelog/5266.changed new file mode 100644 index 00000000000..0f0c52be216 --- /dev/null +++ b/.changelog/5266.changed @@ -0,0 +1,2 @@ +The public `opentelemetry.util.types.AttributeValue` type in package `opentelemetry-api` is being expanded to include `None`, heterogeneous sequences of primitive types (and nested sequences) as opposed to only homogeneous primitive sequences, and Mappings of strings to any primitive types or sequences/mappings (which themselves must only contain primitive types or sequences/mappings validated the same way). +If a `bytes` type is found as an AttributeValue, it will no longer be utf-8 decoded to a string, instead it will be passed along as is in accordance with the OTEL spec, since `bytes` is a valid type in the OTLP proto. \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 01b386beacc..b8a6a47bd21 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -26,14 +26,17 @@ settings.configure() -# Provide AnyValue in opentelemetry.attributes module's namespace so the -# "AnyValue" forward reference in opentelemetry.util.types._ExtendedAttributes -# resolves when sphinx_autodoc_typehints calls typing.get_type_hints() on -# BoundedAttributes (whose __globals__ is the attributes module). Docs-only. -import opentelemetry.attributes # noqa: E402 -from opentelemetry.util.types import AnyValue as _AnyValue # noqa: E402 +# Docs-only: resolves the "AnyValue" forward reference wherever +# get_type_hints() evaluates it. AnyValue is a recursive alias, so +# modules that import it only under TYPE_CHECKING hit a NameError. +# TODO: https://github.com/open-telemetry/opentelemetry-python/issues/5304 +# this can be removed by importing AnyValue at runtime in the +# modules that annotate with it +import builtins # noqa: E402 -opentelemetry.attributes.AnyValue = _AnyValue +from opentelemetry.util.types import AnyValue # noqa: E402 + +builtins.AnyValue = AnyValue source_dirs = [ @@ -186,10 +189,6 @@ "py:class", "AnyValue", ), - ( - "py:class", - "_ExtendedAttributes", - ), ("py:class", "Token"), # ``from os import PathLike`` renders as the bare name ``PathLike`` in the # file exporter type hints, which sphinx cannot resolve to os.PathLike. diff --git a/exporter/opentelemetry-exporter-otlp-json-common/src/opentelemetry/exporter/otlp/json/common/_internal/__init__.py b/exporter/opentelemetry-exporter-otlp-json-common/src/opentelemetry/exporter/otlp/json/common/_internal/__init__.py index a59ab85fe47..2bec5e9a605 100644 --- a/exporter/opentelemetry-exporter-otlp-json-common/src/opentelemetry/exporter/otlp/json/common/_internal/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-json-common/src/opentelemetry/exporter/otlp/json/common/_internal/__init__.py @@ -44,7 +44,7 @@ ) from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.util.instrumentation import InstrumentationScope -from opentelemetry.util.types import _ExtendedAttributes +from opentelemetry.util.types import Attributes _logger = logging.getLogger(__name__) @@ -109,7 +109,7 @@ def _encode_trace_id(trace_id: int) -> bytes: def _encode_attributes( - attributes: _ExtendedAttributes | None, + attributes: Attributes, ) -> list[JSONKeyValue]: if not attributes: return [] diff --git a/exporter/opentelemetry-exporter-otlp-json-common/src/opentelemetry/exporter/otlp/json/common/_internal/_log_encoder/__init__.py b/exporter/opentelemetry-exporter-otlp-json-common/src/opentelemetry/exporter/otlp/json/common/_internal/_log_encoder/__init__.py index 104d17dbc8f..ccd7c173525 100644 --- a/exporter/opentelemetry-exporter-otlp-json-common/src/opentelemetry/exporter/otlp/json/common/_internal/_log_encoder/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-json-common/src/opentelemetry/exporter/otlp/json/common/_internal/_log_encoder/__init__.py @@ -53,7 +53,7 @@ def _encode_log(readable_log_record: ReadableLogRecord) -> JSONLogRecord: body=_encode_value(readable_log_record.log_record.body), severity_text=readable_log_record.log_record.severity_text, attributes=_encode_attributes( - cast(Attributes, readable_log_record.log_record.attributes), + cast(Attributes, readable_log_record.log_record.attributes) ), dropped_attributes_count=readable_log_record.dropped_attributes, severity_number=getattr( diff --git a/exporter/opentelemetry-exporter-otlp-proto-common/src/opentelemetry/exporter/otlp/proto/common/_internal/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-common/src/opentelemetry/exporter/otlp/proto/common/_internal/__init__.py index 7528b5994b8..03db470a42f 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-common/src/opentelemetry/exporter/otlp/proto/common/_internal/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-common/src/opentelemetry/exporter/otlp/proto/common/_internal/__init__.py @@ -27,7 +27,7 @@ ) from opentelemetry.sdk.trace import Resource from opentelemetry.sdk.util.instrumentation import InstrumentationScope -from opentelemetry.util.types import _ExtendedAttributes +from opentelemetry.util.types import Attributes _logger = logging.getLogger(__name__) @@ -89,9 +89,7 @@ def _encode_trace_id(trace_id: int) -> bytes: return trace_id.to_bytes(length=16, byteorder="big", signed=False) -def _encode_attributes( - attributes: _ExtendedAttributes | None, -) -> list[PB2KeyValue]: +def _encode_attributes(attributes: Attributes) -> list[PB2KeyValue]: if not attributes: return [] pb2_attributes = [] diff --git a/exporter/opentelemetry-exporter-otlp-proto-grpc/benchmarks/test_benchmark_trace_exporter.py b/exporter/opentelemetry-exporter-otlp-proto-grpc/benchmarks/test_benchmark_trace_exporter.py index db583f69068..b8ae543ce45 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-grpc/benchmarks/test_benchmark_trace_exporter.py +++ b/exporter/opentelemetry-exporter-otlp-proto-grpc/benchmarks/test_benchmark_trace_exporter.py @@ -30,7 +30,7 @@ def __init__(self, channel): @patch( - "opentelemetry.exporter.otlp.proto.grpc.trace_exporter.OTLPSpanExporter._stub", + "opentelemetry.exporter.otlp.proto.grpc.trace_exporter.TraceServiceStub", new=MockTraceServiceStub, ) def test_simple_span_processor(benchmark): @@ -51,7 +51,7 @@ def create_spans_to_be_exported(): @patch( - "opentelemetry.exporter.otlp.proto.grpc.trace_exporter.OTLPSpanExporter._stub", + "opentelemetry.exporter.otlp.proto.grpc.trace_exporter.TraceServiceStub", new=MockTraceServiceStub, ) def test_batch_span_processor(benchmark): diff --git a/opentelemetry-api/src/opentelemetry/_events/__init__.py b/opentelemetry-api/src/opentelemetry/_events/__init__.py index e9e60f74f6d..29f5853fd5e 100644 --- a/opentelemetry-api/src/opentelemetry/_events/__init__.py +++ b/opentelemetry-api/src/opentelemetry/_events/__init__.py @@ -18,7 +18,7 @@ from opentelemetry.trace.span import TraceFlags from opentelemetry.util._once import Once from opentelemetry.util._providers import _load_provider -from opentelemetry.util.types import AnyValue, _ExtendedAttributes +from opentelemetry.util.types import AnyValue, Attributes _logger = getLogger(__name__) @@ -37,7 +37,7 @@ def __init__( trace_flags: TraceFlags | None = None, body: AnyValue | None = None, severity_number: SeverityNumber | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ): attributes = attributes or {} event_attributes = { @@ -66,7 +66,7 @@ def __init__( name: str, version: str | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ): self._name = name self._version = version @@ -97,7 +97,7 @@ def __init__( name: str, version: str | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ): super().__init__( name=name, @@ -138,7 +138,7 @@ def get_event_logger( name: str, version: str | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ) -> EventLogger: """Returns an EventLoggerProvider for use.""" @@ -153,7 +153,7 @@ def get_event_logger( name: str, version: str | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ) -> EventLogger: return NoOpEventLogger( name, version=version, schema_url=schema_url, attributes=attributes @@ -170,7 +170,7 @@ def get_event_logger( name: str, version: str | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ) -> EventLogger: if _EVENT_LOGGER_PROVIDER: return _EVENT_LOGGER_PROVIDER.get_event_logger( @@ -244,7 +244,7 @@ def get_event_logger( name: str, version: str | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, event_logger_provider: EventLoggerProvider | None = None, ) -> EventLogger: if event_logger_provider is None: diff --git a/opentelemetry-api/src/opentelemetry/_logs/_internal/__init__.py b/opentelemetry-api/src/opentelemetry/_logs/_internal/__init__.py index 2319a461c9b..4976f1e531a 100644 --- a/opentelemetry-api/src/opentelemetry/_logs/_internal/__init__.py +++ b/opentelemetry-api/src/opentelemetry/_logs/_internal/__init__.py @@ -40,7 +40,7 @@ from opentelemetry.trace.span import TraceFlags from opentelemetry.util._once import Once from opentelemetry.util._providers import _load_provider -from opentelemetry.util.types import AnyValue, _ExtendedAttributes +from opentelemetry.util.types import AnyValue, Attributes _logger = getLogger(__name__) @@ -63,7 +63,7 @@ def __init__( severity_text: str | None = None, severity_number: SeverityNumber | None = None, body: AnyValue = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, event_name: str | None = None, exception: BaseException | None = None, ) -> None: ... @@ -83,7 +83,7 @@ def __init__( severity_text: str | None = None, severity_number: SeverityNumber | None = None, body: AnyValue = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ) -> None: ... def __init__( @@ -98,7 +98,7 @@ def __init__( severity_text: str | None = None, severity_number: SeverityNumber | None = None, body: AnyValue = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, event_name: str | None = None, exception: BaseException | None = None, ) -> None: @@ -129,7 +129,7 @@ def __init__( name: str, version: str | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ) -> None: super().__init__() self._name = name @@ -147,7 +147,7 @@ def emit( severity_number: SeverityNumber | None = None, severity_text: str | None = None, body: AnyValue | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, event_name: str | None = None, exception: BaseException | None = None, ) -> None: ... @@ -169,7 +169,7 @@ def emit( severity_number: SeverityNumber | None = None, severity_text: str | None = None, body: AnyValue | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, event_name: str | None = None, exception: BaseException | None = None, ) -> None: @@ -192,7 +192,7 @@ def emit( severity_number: SeverityNumber | None = None, severity_text: str | None = None, body: AnyValue | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, event_name: str | None = None, exception: BaseException | None = None, ) -> None: ... @@ -213,7 +213,7 @@ def emit( severity_number: SeverityNumber | None = None, severity_text: str | None = None, body: AnyValue | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, event_name: str | None = None, exception: BaseException | None = None, ) -> None: @@ -226,7 +226,7 @@ def __init__( # pylint: disable=super-init-not-called name: str, version: str | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ): self._name = name self._version = version @@ -260,7 +260,7 @@ def emit( severity_number: SeverityNumber | None = None, severity_text: str | None = None, body: AnyValue | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, event_name: str | None = None, exception: BaseException | None = None, ) -> None: ... @@ -281,7 +281,7 @@ def emit( severity_number: SeverityNumber | None = None, severity_text: str | None = None, body: AnyValue | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, event_name: str | None = None, exception: BaseException | None = None, ) -> None: @@ -312,7 +312,7 @@ def get_logger( name: str, version: str | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ) -> Logger: """Returns a `Logger` for use by the given instrumentation library. @@ -351,7 +351,7 @@ def get_logger( name: str, version: str | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ) -> Logger: """Returns a NoOpLogger.""" return NoOpLogger( @@ -365,7 +365,7 @@ def get_logger( name: str, version: str | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ) -> Logger: if _LOGGER_PROVIDER: return _LOGGER_PROVIDER.get_logger( @@ -428,7 +428,7 @@ def get_logger( instrumenting_library_version: str = "", logger_provider: LoggerProvider | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ) -> Logger: """Returns a `Logger` for use within a python process. diff --git a/opentelemetry-api/src/opentelemetry/attributes/__init__.py b/opentelemetry-api/src/opentelemetry/attributes/__init__.py index 15519ce0c2c..317e4b932f1 100644 --- a/opentelemetry-api/src/opentelemetry/attributes/__init__.py +++ b/opentelemetry-api/src/opentelemetry/attributes/__init__.py @@ -4,265 +4,140 @@ import copy import logging import threading -from collections import OrderedDict from collections.abc import Mapping, MutableMapping, Sequence +from types import MappingProxyType, NoneType +from typing import overload -from opentelemetry.util import types - -# bytes are accepted as a user supplied value for attributes but -# decoded to strings internally. -_VALID_ATTR_VALUE_TYPES = (bool, str, bytes, int, float) -# AnyValue possible values -_VALID_ANY_VALUE_TYPES = ( - type(None), - bool, - bytes, - int, - float, - str, - Sequence, - Mapping, -) - - -# TODO: Remove this workaround and revert to the simpler implementation -# once Python 3.9 support is dropped (planned around May 2026). -# This exists only to avoid issues caused by deprecated behavior in 3.9. -def _type_name(t): - return getattr(t, "__name__", getattr(t, "_name", repr(t))) +from typing_extensions import deprecated +from opentelemetry.util import types _logger = logging.getLogger(__name__) -# pylint: disable=too-many-return-statements -# pylint: disable=too-many-branches -def _clean_attribute( - key: str, value: types.AttributeValue, max_len: int | None -) -> types.AttributeValue | tuple[str | int | float, ...] | None: - """Checks if attribute value is valid and cleans it if required. - - The function returns the cleaned value or None if the value is not valid. +def _clean_attribute_value( + value: types.AttributeValue, + max_string_value_length: int | None, +) -> types.AttributeValue: + """Recursively checks if an attribute value is valid and cleans it if required. - An attribute value is valid if it is either: - - A primitive type: string, boolean, double precision floating - point (IEEE 754-1985) or integer. - - An array of primitive type values. The array MUST be homogeneous, - i.e. it MUST NOT contain values of different types. + String values are truncated to max_string_value_length if provided. + Anything that isn't of `types.AttributeValue`, we attempt to cast to `str`. + If this fails, the value is replaced with None. Sequence's are converted to tuples and mappings + are converted to MappingProxyType, these are immutable data structures, so if the sequence/map + is modified outside of this method, it will not affect the value in this container. - An attribute needs cleansing if: - - Its length is greater than the maximum allowed length. - - It needs to be encoded/decoded e.g, bytes to strings. + Returns: + The recursively cleaned AttributeValue. """ - - if not (key and isinstance(key, str)): - _logger.warning("invalid key `%s`. must be non-empty string.", key) - return None - - if isinstance(value, _VALID_ATTR_VALUE_TYPES): - if isinstance(value, bytes): - try: - value = value.decode() - except UnicodeDecodeError: - _logger.warning("Byte attribute could not be decoded.") - return None - if max_len is not None and isinstance(value, str): - value = value[:max_len] + if isinstance(value, (NoneType, bool, int, float, bytes)): + return value + if isinstance(value, str): + if ( + max_string_value_length is not None + and len(value) > max_string_value_length + ): + _logger.warning( + "String attribute value exceeds max length of %d, truncating.", + max_string_value_length, + ) + value = value[:max_string_value_length] return value - if isinstance(value, Sequence): - sequence_first_valid_type = None - cleaned_seq = [] - - for element in value: - if isinstance(element, bytes): - try: - element = element.decode() - except UnicodeDecodeError: - _logger.warning("Byte attribute could not be decoded.") - cleaned_seq.append(None) - continue - if max_len is not None and isinstance(element, str): - element = element[:max_len] - elif element is None: - cleaned_seq.append(None) - continue - - element_type = type(element) - # Reject attribute value if sequence contains a value with an incompatible type. - if element_type not in _VALID_ATTR_VALUE_TYPES: - _logger.warning( - "Invalid type %s in attribute '%s' value sequence. Expected one of " - "%s or None", - element_type.__name__, - key, - [ - valid_type.__name__ - for valid_type in _VALID_ATTR_VALUE_TYPES - ], - ) - return None - - # The type of the sequence must be homogeneous. The first non-None - # element determines the type of the sequence - if sequence_first_valid_type is None: - sequence_first_valid_type = element_type - # use equality instead of isinstance as isinstance(True, int) evaluates to True - elif element_type != sequence_first_valid_type: + return tuple( + _clean_attribute_value(v, max_string_value_length) for v in value + ) + if isinstance(value, Mapping): + cleaned_mapping = {} + for key, val in value.items(): + # Spec says to convert unknown types to strings if possible (here and below too). + if not isinstance(key, str): _logger.warning( - "Attribute %r mixes types %s and %s in attribute value sequence", + "Invalid type `%s` for attribute key `%s`, must be a str. Key's `__str__/__repr__` method will be called if it exists, otherwise the key/value pair will be dropped.", + type(key), key, - sequence_first_valid_type.__name__, - type(element).__name__, ) - return None - - cleaned_seq.append(element) - - # Freeze mutable sequences defensively - return tuple(cleaned_seq) - + # Calling str(x) will use an object's `__str__` method if it exists, otherwise it will use it's `__repr__` method. + # If neither is defined it uses the base class's `object.__repr__` method, which returns a string that is hard to understand. + # So in that case we drop the key/value pair. + if ( + type(key).__str__ is not object.__str__ + or type(key).__repr__ is not object.__repr__ + ): + key = str(key) + else: + continue + cleaned_mapping[key] = _clean_attribute_value( + val, max_string_value_length + ) + return MappingProxyType(cleaned_mapping) _logger.warning( - "Invalid type %s for attribute '%s' value. Expected one of %s or a " - "sequence of those types", - type(value).__name__, - key, - [valid_type.__name__ for valid_type in _VALID_ATTR_VALUE_TYPES], + "Invalid type `%s` for attribute value. Expected one of bool, str, None, bytes, int, float or a " + "Mapping or Sequence of those types. Value's __str__ method will be called if it exists, otherwise the value will be replaced with None.", + type(value), ) - return None - - -def _clean_extended_attribute_value( # pylint: disable=too-many-branches - value: types.AnyValue, max_len: int | None -) -> types.AnyValue: - # for primitive types just return the value and eventually shorten the string length - if value is None or isinstance(value, _VALID_ATTR_VALUE_TYPES): - if max_len is not None and isinstance(value, str): - value = value[:max_len] - return value - - if isinstance(value, Mapping): - cleaned_dict: dict[str, types.AnyValue] = {} - for key, element in value.items(): - # skip invalid keys - if not (key and isinstance(key, str)): - _logger.warning( - "invalid key `%s`. must be non-empty string.", key - ) - continue - - cleaned_dict[key] = _clean_extended_attribute( - key=key, value=element, max_len=max_len - ) - - return cleaned_dict - - if isinstance(value, Sequence): - sequence_first_valid_type = None - cleaned_seq: list[types.AnyValue] = [] - - for element in value: - if element is None: - cleaned_seq.append(element) - continue - - if max_len is not None and isinstance(element, str): - element = element[:max_len] - - element_type = type(element) - if element_type not in _VALID_ATTR_VALUE_TYPES: - element = _clean_extended_attribute_value( - element, max_len=max_len - ) - element_type = type(element) # type: ignore - - # The type of the sequence must be homogeneous. The first non-None - # element determines the type of the sequence - if sequence_first_valid_type is None: - sequence_first_valid_type = element_type - # use equality instead of isinstance as isinstance(True, int) evaluates to True - elif element_type != sequence_first_valid_type: - _logger.warning( - "Mixed types %s and %s in attribute value sequence", - sequence_first_valid_type.__name__, - type(element).__name__, - ) - return None - - cleaned_seq.append(element) - - # Freeze mutable sequences defensively - return tuple(cleaned_seq) - - # Some applications such as Django add values to log records whose types fall outside the - # primitive types and `_VALID_ANY_VALUE_TYPES`, i.e., they are not of type `AnyValue`. - # Rather than attempt to whitelist every possible instrumentation, we stringify those values here - # so they can still be represented as attributes, falling back to the original TypeError only if - # converting to string raises. - try: + if ( + type(value).__str__ is not object.__str__ + or type(value).__repr__ is not object.__repr__ + ): return str(value) - except Exception: - raise TypeError( - f"Invalid type {type(value).__name__} for attribute value. " - f"Expected one of {[_type_name(valid_type) for valid_type in _VALID_ANY_VALUE_TYPES]} or a " - "sequence of those types", - ) + return None -def _clean_extended_attribute( - key: str, value: types.AnyValue, max_len: int | None -) -> types.AnyValue: - """Checks if attribute value is valid and cleans it if required. +class BoundedAttributes(MutableMapping): + """A dict with a fixed max capacity which cleans and potentially drops values to ensure they are valid attribute values. - The function returns the cleaned value or None if the value is not valid. + Args: + maxlen: The maximum number of attributes to store, use None for no limit. + attributes: The initial attributes to store. + immutable: Defaults to true. Whether to allow adding/removing of attributes after the initialisation of the instance. + max_value_len: The maximum length of string values, use None for no limit. + extended_attributes: Deprecated. Kept for backwards compatibility. Extended attributes are now always used for attributes everywhere. - An attribute value is valid if it is an AnyValue. - An attribute needs cleansing if: - - Its length is greater than the maximum allowed length. + When the dict is full and a new element is added, the oldest element is dropped. Attributes are made to be immutable when set in this container. + So passing a mutable list as an attribute value, and then mutating it after will not change it's value in this container. """ - if not (key and isinstance(key, str)): - _logger.warning("invalid key `%s`. must be non-empty string.", key) - return None - - try: - return _clean_extended_attribute_value(value, max_len=max_len) - except TypeError as exception: - _logger.warning("Attribute %s: %s", key, exception) - return None - - -class BoundedAttributes(MutableMapping): # type: ignore - """An ordered dict with a fixed max capacity. + @overload + def __init__( + self, + maxlen: int | None = None, + attributes: types.Attributes = None, + immutable: bool = True, + max_value_len: int | None = None, + ) -> None: ... - Oldest elements are dropped when the dict is full and a new element is - added. - """ + @overload + @deprecated( + "Creating BoundedAttributes with `extended_attributes` set is deprecated. " + "The `extended_attributes` param is no longer used and will be removed " + "in a future release. Extended attributes are now always used for attributes everywhere." + ) + def __init__( + self, + maxlen: int | None = None, + attributes: types.Attributes = None, + immutable: bool = True, + max_value_len: int | None = None, + extended_attributes: bool = False, + ) -> None: ... def __init__( self, maxlen: int | None = None, - attributes: types._ExtendedAttributes | None = None, + attributes: types.Attributes = None, immutable: bool = True, max_value_len: int | None = None, extended_attributes: bool = False, - ): - if maxlen is not None: - if not isinstance(maxlen, int) or maxlen < 0: - raise ValueError( - "maxlen must be valid int greater or equal to 0" - ) + ) -> None: + if maxlen is not None and (not isinstance(maxlen, int) or maxlen < 0): + raise ValueError("maxlen must be valid int greater or equal to 0") + self._dict = {} self.maxlen = maxlen self.dropped = 0 self.max_value_len = max_value_len - self._extended_attributes = extended_attributes - # OrderedDict is not used until the maxlen is reached for efficiency. - - self._dict: ( - MutableMapping[str, types.AnyValue] - | OrderedDict[str, types.AnyValue] - ) = {} self._lock = threading.RLock() + self._immutable = False if attributes: for key, value in attributes.items(): self[key] = value @@ -271,61 +146,64 @@ def __init__( def __repr__(self) -> str: return f"{dict(self._dict)}" - def __getitem__(self, key: str) -> types.AnyValue: + def __getitem__(self, key: str) -> types.AttributeValue: return self._dict[key] def __setitem__(self, key: str, value: types.AnyValue) -> None: - if getattr(self, "_immutable", False): # type: ignore - raise TypeError + if self._immutable: + raise TypeError( + "Cannot mutate this instance, as it was created with immutable=True." + ) if self.maxlen is not None and self.maxlen == 0: with self._lock: self.dropped += 1 - return - if self._extended_attributes: - value = _clean_extended_attribute(key, value, self.max_value_len) - else: - value = _clean_attribute(key, value, self.max_value_len) # type: ignore - if value is None: return + if not key or not isinstance(key, str): + _logger.warning( + "invalid key `%s`. must be non-empty string. Dropping key from attributes.", + key, + ) + self.dropped += 1 + return with self._lock: - self._setitem_locked(key, value) + self._setitem_locked( + key, _clean_attribute_value(value, self.max_value_len) + ) - def _set_items(self, attributes: "types._ExtendedAttributes") -> None: - if getattr(self, "_immutable", False): # type: ignore - raise TypeError + def _set_items(self, attributes: Mapping[str, types.AnyValue]) -> None: + if self._immutable: + raise TypeError( + "Cannot mutate this instance, as it was created with immutable=True." + ) if self.maxlen is not None and self.maxlen == 0: with self._lock: self.dropped += len(attributes) return - cleaned = [] - for key, value in attributes.items(): - if self._extended_attributes: - cv = _clean_extended_attribute(key, value, self.max_value_len) - else: - cv = _clean_attribute(key, value, self.max_value_len) # type: ignore - if cv is None: - continue - cleaned.append((key, cv)) with self._lock: - for key, cv in cleaned: - self._setitem_locked(key, cv) + for key, value in attributes.items(): + self._setitem_locked( + key, _clean_attribute_value(value, self.max_value_len) + ) def _setitem_locked(self, key: str, value: types.AnyValue) -> None: if key in self._dict: del self._dict[key] - elif self.maxlen is not None and len(self._dict) == self.maxlen: - if not isinstance(self._dict, OrderedDict): - self._dict = OrderedDict(self._dict) - self._dict.popitem(last=False) # type: ignore + if self.maxlen is not None and len(self._dict) >= self.maxlen: + _logger.warning( + "Attributes dict is full. Dropping the oldest key-value pair from attributes to make space for the new key-value pair.", + ) + # Dictionaries are insertion ordered in Python, this is the recommended way to get the oldest value. + del self._dict[next(iter(self._dict.keys()))] self.dropped += 1 - self._dict[key] = value # type: ignore + self._dict[key] = value def __delitem__(self, key: str) -> None: - if getattr(self, "_immutable", False): # type: ignore - raise TypeError - with self._lock: - del self._dict[key] + if self._immutable: + raise TypeError( + "Cannot mutate this instance, as it was created with immutable=True." + ) + del self._dict[key] def __iter__(self): if self._immutable: @@ -341,7 +219,6 @@ def __deepcopy__(self, memo: dict) -> "BoundedAttributes": maxlen=self.maxlen, immutable=self._immutable, max_value_len=self.max_value_len, - extended_attributes=self._extended_attributes, ) memo[id(self)] = copy_ with self._lock: @@ -351,5 +228,5 @@ def __deepcopy__(self, memo: dict) -> "BoundedAttributes": copy_.dropped = self.dropped return copy_ - def copy(self): # type: ignore - return self._dict.copy() # type: ignore + def copy(self): + return self._dict.copy() diff --git a/opentelemetry-api/src/opentelemetry/util/types.py b/opentelemetry-api/src/opentelemetry/util/types.py index b8a753871bc..56d6296d11e 100644 --- a/opentelemetry-api/src/opentelemetry/util/types.py +++ b/opentelemetry-api/src/opentelemetry/util/types.py @@ -6,7 +6,7 @@ # This is the implementation of the "Any" type as specified by the specifications of OpenTelemetry data model for logs. # For more details, refer to the OTel specification: # https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/logs/data-model.md#type-any -AnyValue = ( +AnyValue = AttributeValue = ( str | bool | int @@ -16,31 +16,4 @@ | Mapping[str, "AnyValue"] | None ) - -AttributeValue = ( - str - | bool - | int - | float - | Sequence[str] - | Sequence[bool] - | Sequence[int] - | Sequence[float] -) -Attributes = Mapping[str, AttributeValue] | None -AttributesAsKey = tuple[ - tuple[ - str, - str - | bool - | int - | float - | tuple[str | None, ...] - | tuple[bool | None, ...] - | tuple[int | None, ...] - | tuple[float | None, ...], - ], - ..., -] - -_ExtendedAttributes = Mapping[str, "AnyValue"] +Attributes = Mapping[str, AnyValue] | None diff --git a/opentelemetry-api/tests/attributes/test_attributes.py b/opentelemetry-api/tests/attributes/test_attributes.py index fb6259f8c47..dab8b06fc7e 100644 --- a/opentelemetry-api/tests/attributes/test_attributes.py +++ b/opentelemetry-api/tests/attributes/test_attributes.py @@ -6,173 +6,16 @@ import copy import unittest import unittest.mock -from collections.abc import MutableSequence from opentelemetry.attributes import ( BoundedAttributes, - _clean_attribute, - _clean_extended_attribute, - _clean_extended_attribute_value, + _clean_attribute_value, ) -class TestAttributes(unittest.TestCase): - # pylint: disable=invalid-name - def assertValid(self, value, key="k"): - expected = value - if isinstance(value, MutableSequence): - expected = tuple(value) - self.assertEqual(_clean_attribute(key, value, None), expected) - - def assertInvalid(self, value, key="k"): - self.assertIsNone(_clean_attribute(key, value, None)) - - def test_attribute_key_validation(self): - # only non-empty strings are valid keys - self.assertInvalid(1, "") - self.assertInvalid(1, 1) - self.assertInvalid(1, {}) - self.assertInvalid(1, []) - self.assertInvalid(1, b"1") - self.assertValid(1, "k") - self.assertValid(1, "1") - - def test_clean_attribute(self): - self.assertInvalid([1, 2, 3.4, "ss", 4]) - self.assertInvalid([{}, 1, 2, 3.4, 4]) - self.assertInvalid(["sw", "lf", 3.4, "ss"]) - self.assertInvalid([1, 2, 3.4, 5]) - self.assertInvalid({}) - self.assertInvalid([1, True]) - self.assertValid(True) - self.assertValid("hi") - self.assertValid(3.4) - self.assertValid(15) - self.assertValid([1, 2, 3, 5]) - self.assertValid([1.2, 2.3, 3.4, 4.5]) - self.assertValid([True, False]) - self.assertValid(["ss", "dw", "fw"]) - self.assertValid([]) - # None in sequences are valid - self.assertValid(["A", None, None]) - self.assertValid(["A", None, None, "B"]) - self.assertValid([None, None]) - self.assertInvalid(["A", None, 1]) - self.assertInvalid([None, "A", None, 1]) - - # test keys - self.assertValid("value", "key") - self.assertInvalid("value", "") - self.assertInvalid("value", None) - - def test_sequence_attr_decode(self): - seq = [ - None, - b"Content-Disposition", - b"Content-Type", - b"\x81", - b"Keep-Alive", - ] - expected = [ - None, - "Content-Disposition", - "Content-Type", - None, - "Keep-Alive", - ] - self.assertEqual( - _clean_attribute("headers", seq, None), tuple(expected) - ) - - -class TestExtendedAttributes(unittest.TestCase): - # pylint: disable=invalid-name - def assertValid(self, value, key="k"): - expected = value - if isinstance(value, MutableSequence): - expected = tuple(value) - self.assertEqual(_clean_extended_attribute(key, value, None), expected) - - def assertInvalid(self, value, key="k"): - self.assertIsNone(_clean_extended_attribute(key, value, None)) - - def test_attribute_key_validation(self): - # only non-empty strings are valid keys - self.assertInvalid(1, "") - self.assertInvalid(1, 1) - self.assertInvalid(1, {}) - self.assertInvalid(1, []) - self.assertInvalid(1, b"1") - self.assertValid(1, "k") - self.assertValid(1, "1") - - def test_clean_extended_attribute(self): - self.assertInvalid([1, 2, 3.4, "ss", 4]) - self.assertInvalid([{}, 1, 2, 3.4, 4]) - self.assertInvalid(["sw", "lf", 3.4, "ss"]) - self.assertInvalid([1, 2, 3.4, 5]) - self.assertInvalid([1, True]) - self.assertValid(None) - self.assertValid(True) - self.assertValid("hi") - self.assertValid(3.4) - self.assertValid(15) - self.assertValid([1, 2, 3, 5]) - self.assertValid([1.2, 2.3, 3.4, 4.5]) - self.assertValid([True, False]) - self.assertValid(["ss", "dw", "fw"]) - self.assertValid([]) - # None in sequences are valid - self.assertValid(["A", None, None]) - self.assertValid(["A", None, None, "B"]) - self.assertValid([None, None]) - self.assertInvalid(["A", None, 1]) - self.assertInvalid([None, "A", None, 1]) - # mappings - self.assertValid({}) - self.assertValid({"k": "v"}) - # mappings in sequences - self.assertValid([{"k": "v"}]) - - # test keys - self.assertValid("value", "key") - self.assertInvalid("value", "") - self.assertInvalid("value", None) - - def test_sequence_attr_decode(self): - seq = [ - None, - b"Content-Disposition", - b"Content-Type", - b"\x81", - b"Keep-Alive", - ] - self.assertEqual( - _clean_extended_attribute("headers", seq, None), tuple(seq) - ) - - def test_mapping(self): - mapping = { - "": "invalid", - b"bytes": "invalid", - "none": {"": "invalid"}, - "valid_primitive": "str", - "valid_sequence": ["str"], - "invalid_sequence": ["str", 1], - "valid_mapping": {"str": 1}, - "invalid_mapping": {"": 1}, - } - expected = { - "none": {}, - "valid_primitive": "str", - "valid_sequence": ("str",), - "invalid_sequence": None, - "valid_mapping": {"str": 1}, - "invalid_mapping": {}, - } - self.assertEqual( - _clean_extended_attribute("headers", mapping, None), expected - ) +class _NoStrNoReprObject: + def __init__(self): + pass class TestBoundedAttributes(unittest.TestCase): @@ -184,11 +27,106 @@ class TestBoundedAttributes(unittest.TestCase): "vaccinated": True, } - def test_negative_maxlen(self): + def test_clean_attribute_value_with_various_params(self): + # A python object that isn't a primitive and has no string/repr method is converted to None. + with self.assertLogs("opentelemetry", level="WARNING") as cm: + self.assertEqual( + _clean_attribute_value(_NoStrNoReprObject(), None), + None, + ) + self.assertEqual(len(cm.output), 1) + self.assertIn( + "Expected one of bool, str, None, bytes, int, float", cm.output[0] + ) + + valid_primitive_sequence = [1, 2.2, None, "cookie"] + self.assertEqual( + _clean_attribute_value(valid_primitive_sequence, None), + tuple(valid_primitive_sequence), + ) + for valid_primitive in valid_primitive_sequence: + self.assertEqual( + _clean_attribute_value(valid_primitive, None), valid_primitive + ) + + self.assertEqual(_clean_attribute_value(b"hello", 4), b"hello") + + with self.assertLogs("opentelemetry", level="WARNING") as cm: + # String is truncated. + self.assertEqual(_clean_attribute_value("a" * 1000, 5), "aaaaa") + self.assertEqual(len(cm.output), 1) + self.assertIn( + "String attribute value exceeds max length", cm.output[0] + ) + + # Sequence of different types of values. List converted to tuple. + self.assertEqual( + _clean_attribute_value( + ["a", 2, _NoStrNoReprObject(), None, b"\xff"], None + ), + ("a", 2, None, None, b"\xff"), + ) + + # non-str key in map... will be converted to string + with self.assertLogs("opentelemetry", level="WARNING") as cm: + self.assertEqual( + _clean_attribute_value({2.2: 4.4}, None), {"2.2": 4.4} + ) + self.assertEqual(len(cm.output), 1) + self.assertIn( + "Invalid type", + cm.output[0], + ) + + # Mapping of values.. + # Non string key without a string conversion method is dropped. + self.assertEqual( + _clean_attribute_value( + { + "a": 1, + _NoStrNoReprObject(): 2, + "c": 3, + "d": [2, 3], + "bytes": b"\xff", + }, + None, + ), + {"a": 1, "c": 3, "d": (2, 3), "bytes": b"\xff"}, + ) + + def test_same_key_value_overwritten(self): + bdict = BoundedAttributes(1, {"name": "Firulais"}, immutable=False) + self.assertEqual(bdict["name"], "Firulais") + self.assertEqual(bdict.dropped, 0) + bdict["name"] = "Bruno" + self.assertEqual(bdict["name"], "Bruno") + self.assertEqual(bdict.dropped, 0) + + def test_invalid_key_not_used(self): + bdict = BoundedAttributes(50, {}, immutable=False) + with self.assertLogs("opentelemetry", level="WARNING") as cm: + bdict[1] = 2 + self.assertEqual(len(cm.output), 1) + self.assertIn("invalid key", cm.output[0]) + self.assertNotIn(1, bdict) + self.assertEqual(bdict.dropped, 1) + + def test_maxlen_reached(self): + bdict = BoundedAttributes(1, {"first": "value"}, immutable=False) + with self.assertLogs("opentelemetry", level="WARNING") as cm: + bdict["second"] = "another" + self.assertIn( + "Attributes dict is full. Dropping the oldest", cm.output[0] + ) + self.assertNotIn("first", bdict) + self.assertEqual(bdict["second"], "another") + self.assertEqual(bdict.dropped, 1) + + def test_negative_maxlen_not_allowed(self): with self.assertRaises(ValueError): BoundedAttributes(-1) - def test_from_map(self): + def test_base_copy_isolated_and_len_works(self): dic_len = len(self.base) base_copy = self.base.copy() bdict = BoundedAttributes(dic_len, base_copy) @@ -205,13 +143,7 @@ def test_from_map(self): # test that iter yields the correct number of elements self.assertEqual(len(tuple(bdict)), dic_len) - # map too big - half_len = dic_len // 2 - bdict = BoundedAttributes(half_len, self.base) - self.assertEqual(len(tuple(bdict)), half_len) - self.assertEqual(bdict.dropped, dic_len - half_len) - - def test_bounded_dict(self): + def test_basic_insertion_update_iteration_and_deletion(self): # create empty dict dic_len = len(self.base) bdict = BoundedAttributes(dic_len, immutable=False) @@ -235,15 +167,12 @@ def test_bounded_dict(self): bdict["name"] = "Bruno" self.assertEqual(bdict.dropped, 0) - # try to append more elements + # try to append more elements, old elements should be dropped for key in self.base: bdict["new-" + key] = self.base[key] self.assertEqual(len(bdict), dic_len) self.assertEqual(bdict.dropped, dic_len) - # Invalid values shouldn't be considered for `dropped` - bdict["invalid-seq"] = [None, 1, "2"] - self.assertEqual(bdict.dropped, dic_len) # test that elements in the dict are the new ones for key in self.base: @@ -256,14 +185,6 @@ def test_bounded_dict(self): with self.assertRaises(KeyError): _ = bdict["new-name"] - def test_no_limit_code(self): - bdict = BoundedAttributes(maxlen=None, immutable=False) - for num in range(100): - bdict[str(num)] = num - - for num in range(100): - self.assertEqual(bdict[str(num)], num) - def test_immutable(self): bdict = BoundedAttributes() with self.assertRaises(TypeError): @@ -283,44 +204,18 @@ def test_locking(self): for num in range(100): self.assertEqual(bdict[str(num)], num) - # pylint: disable=no-self-use - def test_extended_attributes(self): - bdict = BoundedAttributes(extended_attributes=True, immutable=False) - with unittest.mock.patch( - "opentelemetry.attributes._clean_extended_attribute", - return_value="mock_value", - ) as clean_extended_attribute_mock: - bdict["key"] = "value" - - clean_extended_attribute_mock.assert_called_once() - def test_wsgi_request_conversion_to_string(self): - """Test that WSGI request objects are converted to strings when _clean_extended_attribute is called.""" + """Test that WSGI request objects are converted to strings when _clean_attribute_value is called.""" class DummyWSGIRequest: def __str__(self): return "" - wsgi_request = DummyWSGIRequest() - - cleaned_value = _clean_extended_attribute( - "request", wsgi_request, None - ) - - # Verify we get a string back from the cleaner - self.assertIsInstance(cleaned_value, str) self.assertEqual( - "", cleaned_value + "", + _clean_attribute_value(DummyWSGIRequest(), None), ) - def test_invalid_anyvalue_type_raises_typeerror(self): - class BadStr: - def __str__(self): - raise RuntimeError("boom") - - with self.assertRaises(TypeError): - _clean_extended_attribute_value(BadStr(), None) - def test_deepcopy(self): bdict = BoundedAttributes(4, self.base, immutable=False) bdict.dropped = 10 diff --git a/opentelemetry-api/tests/events/test_proxy_event.py b/opentelemetry-api/tests/events/test_proxy_event.py index 562fb28d319..a72f6a52d51 100644 --- a/opentelemetry-api/tests/events/test_proxy_event.py +++ b/opentelemetry-api/tests/events/test_proxy_event.py @@ -6,7 +6,7 @@ import opentelemetry._events as events from opentelemetry.test.globals_test import EventsGlobalsTest -from opentelemetry.util.types import _ExtendedAttributes +from opentelemetry.util.types import Attributes class TestProvider(events.NoOpEventLoggerProvider): @@ -15,7 +15,7 @@ def get_event_logger( name: str, version: str | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ) -> events.EventLogger: return LoggerTest(name) diff --git a/opentelemetry-api/tests/logs/test_proxy.py b/opentelemetry-api/tests/logs/test_proxy.py index 71772eb5a72..910c02cc777 100644 --- a/opentelemetry-api/tests/logs/test_proxy.py +++ b/opentelemetry-api/tests/logs/test_proxy.py @@ -8,7 +8,7 @@ import opentelemetry._logs._internal as _logs_internal from opentelemetry import _logs from opentelemetry.test.globals_test import LoggingGlobalsTest -from opentelemetry.util.types import _ExtendedAttributes +from opentelemetry.util.types import Attributes class TestProvider(_logs.NoOpLoggerProvider): @@ -17,7 +17,7 @@ def get_logger( name: str, version: str | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ) -> _logs.Logger: return LoggerTest(name) diff --git a/opentelemetry-sdk/benchmarks/metrics/test_benchmark_metrics_histogram.py b/opentelemetry-sdk/benchmarks/metrics/test_benchmark_metrics_histogram.py index 440a37fc2b8..1c2cb21738f 100644 --- a/opentelemetry-sdk/benchmarks/metrics/test_benchmark_metrics_histogram.py +++ b/opentelemetry-sdk/benchmarks/metrics/test_benchmark_metrics_histogram.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # pylint: disable=invalid-name +import json import random import pytest @@ -57,61 +58,100 @@ def _generate_bounds(bound_count): hist1000 = meter.create_histogram("test_histogram_1000_bound") -@pytest.mark.parametrize("num_labels", [0, 1, 3, 5, 7]) -def test_histogram_record(benchmark, num_labels): - labels = {} - for i in range(num_labels): - labels[f"Key{i}"] = "Value{i}" +@pytest.mark.parametrize("num_labels", [1, 3]) +def test_histogram_record_10(benchmark, num_labels): + def benchmark_histogram_record_10(): + hist10.record( + random.random() * MAX_BOUND_VALUE, + attributes={f"Key{i}": f"Value{i}" for i in range(num_labels)}, + ) - def benchmark_histogram_record(): - hist.record(random.random() * MAX_BOUND_VALUE) + benchmark(benchmark_histogram_record_10) - benchmark(benchmark_histogram_record) +@pytest.mark.parametrize("num_labels", [1, 3]) +def test_histogram_record_10_small_mapping_attrs(benchmark, num_labels): + def test_histogram_record_10_attrs(): + hist10.record( + random.random() * MAX_BOUND_VALUE, + attributes={f"Key{i}": {"k1": "v1"} for i in range(num_labels)}, + ) -@pytest.mark.parametrize("num_labels", [0, 1, 3, 5, 7]) -def test_histogram_record_10(benchmark, num_labels): - labels = {} - for i in range(num_labels): - labels[f"Key{i}"] = "Value{i}" + benchmark(test_histogram_record_10_attrs) - def benchmark_histogram_record_10(): - hist10.record(random.random() * MAX_BOUND_VALUE) - benchmark(benchmark_histogram_record_10) +@pytest.mark.parametrize("num_labels", [1, 3]) +def test_histogram_record_10_complex_attrs(benchmark, num_labels): + def test_histogram_record_10_attrs(): + hist10.record( + random.random() * MAX_BOUND_VALUE, + attributes={ + f"Key{i}": {"k1": "v1", "k2": {"k3": "v3", "k4": [1, 2, 3]}} + for i in range(num_labels) + }, + ) + + benchmark(test_histogram_record_10_attrs) + + +@pytest.mark.parametrize("num_labels", [1, 3]) +def test_histogram_record_10_array_attrs(benchmark, num_labels): + def test_histogram_record_10_attrs(): + hist10.record( + random.random() * MAX_BOUND_VALUE, + attributes={ + f"Key{i}": ["k1", "v1", "k2", "k3", "v3", "k4", "1", "2", "3"] + for i in range(num_labels) + }, + ) + + benchmark(test_histogram_record_10_attrs) + + +@pytest.mark.parametrize("num_labels", [1, 3]) +def test_histogram_record_10_json_string_attrs(benchmark, num_labels): + def test_histogram_record_10_attrs(): + hist10.record( + random.random() * MAX_BOUND_VALUE, + attributes={ + f"Key{i}": json.dumps( + {"k1": "v1", "k2": {"k3": "v3", "k4": [1, 2, 3]}} + ) + for i in range(num_labels) + }, + ) + + benchmark(test_histogram_record_10_attrs) @pytest.mark.parametrize("num_labels", [0, 1, 3, 5, 7]) def test_histogram_record_49(benchmark, num_labels): - labels = {} - for i in range(num_labels): - labels[f"Key{i}"] = "Value{i}" - def benchmark_histogram_record_49(): - hist49.record(random.random() * MAX_BOUND_VALUE) + hist49.record( + random.random() * MAX_BOUND_VALUE, + attributes={f"Key{i}": f"Value{i}" for i in range(num_labels)}, + ) benchmark(benchmark_histogram_record_49) @pytest.mark.parametrize("num_labels", [0, 1, 3, 5, 7]) def test_histogram_record_50(benchmark, num_labels): - labels = {} - for i in range(num_labels): - labels[f"Key{i}"] = "Value{i}" - def benchmark_histogram_record_50(): - hist50.record(random.random() * MAX_BOUND_VALUE) + hist50.record( + random.random() * MAX_BOUND_VALUE, + attributes={f"Key{i}": f"Value{i}" for i in range(num_labels)}, + ) benchmark(benchmark_histogram_record_50) @pytest.mark.parametrize("num_labels", [0, 1, 3, 5, 7]) def test_histogram_record_1000(benchmark, num_labels): - labels = {} - for i in range(num_labels): - labels[f"Key{i}"] = "Value{i}" - def benchmark_histogram_record_1000(): - hist1000.record(random.random() * MAX_BOUND_VALUE) + hist1000.record( + random.random() * MAX_BOUND_VALUE, + attributes={f"Key{i}": f"Value{i}" for i in range(num_labels)}, + ) benchmark(benchmark_histogram_record_1000) diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/_events/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/_events/__init__.py index 5ec8d864039..8b4a666f25c 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/_events/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/_events/__init__.py @@ -17,7 +17,7 @@ get_logger_provider, ) from opentelemetry.sdk._logs import Logger, LoggerProvider -from opentelemetry.util.types import _ExtendedAttributes +from opentelemetry.util.types import Attributes _logger = logging.getLogger(__name__) @@ -33,7 +33,7 @@ def __init__( name: str, version: str | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ): super().__init__( name=name, @@ -78,7 +78,7 @@ def get_event_logger( name: str, version: str | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ) -> EventLogger: if not name: _logger.warning("EventLogger created with invalid name: %s", name) diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/_logs/_internal/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/_logs/_internal/__init__.py index 74b759c328c..50908f0b65c 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/_logs/_internal/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/_logs/_internal/__init__.py @@ -16,8 +16,13 @@ from os import environ from threading import Lock from time import time_ns +from types import NoneType from typing import ( # noqa Any, + Mapping, + Sequence, + Tuple, + Union, cast, overload, ) @@ -34,7 +39,7 @@ get_logger, get_logger_provider, ) -from opentelemetry.attributes import _VALID_ANY_VALUE_TYPES, BoundedAttributes +from opentelemetry.attributes import BoundedAttributes from opentelemetry.context import get_current from opentelemetry.context.context import Context from opentelemetry.metrics import MeterProvider, get_meter_provider @@ -68,7 +73,7 @@ format_span_id, format_trace_id, ) -from opentelemetry.util.types import AnyValue, _ExtendedAttributes +from opentelemetry.util.types import AnyValue, Attributes # pylint: disable=too-many-lines @@ -268,7 +273,6 @@ def __post_init__(self): else None, immutable=False, max_value_len=self.limits.max_attribute_length, - extended_attributes=True, ) if self.dropped_attributes > 0: warnings.warn( @@ -552,7 +556,7 @@ def __init__( ) @staticmethod - def _get_attributes(record: logging.LogRecord) -> _ExtendedAttributes: + def _get_attributes(record: logging.LogRecord) -> Attributes: attributes = { k: v for k, v in vars(record).items() if k not in _RESERVED_ATTRS } @@ -600,7 +604,21 @@ def _translate(self, record: logging.LogRecord) -> LogRecord: # For more background, see: https://github.com/open-telemetry/opentelemetry-python/pull/4216 if not record.args and not isinstance(record.msg, str): # if record.msg is not a value we can export, cast it to string - if not isinstance(record.msg, _VALID_ANY_VALUE_TYPES): + # TODO: https://github.com/open-telemetry/opentelemetry-python/issues/5304 - do something better + # than just casting to a string. + if not isinstance( + record.msg, + ( + NoneType, + bool, + bytes, + int, + float, + str, + Sequence, + Mapping, + ), + ): body = str(record.msg) else: body = record.msg @@ -709,7 +727,7 @@ def emit( severity_number: SeverityNumber | None = None, severity_text: str | None = None, body: AnyValue | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, event_name: str | None = None, exception: BaseException | None = None, ) -> None: @@ -819,7 +837,7 @@ def _get_logger_no_cache( name: str, version: str | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ) -> Logger: scope = InstrumentationScope(name, version, schema_url, attributes) @@ -852,7 +870,7 @@ def get_logger( name: str, version: str | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ) -> APILogger: if self._disabled: return NoOpLogger( diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/_logs/_internal/_exceptions.py b/opentelemetry-sdk/src/opentelemetry/sdk/_logs/_internal/_exceptions.py index f4805c7528e..6c2699bd04a 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/_logs/_internal/_exceptions.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/_logs/_internal/_exceptions.py @@ -7,7 +7,7 @@ from opentelemetry._logs import LogRecord from opentelemetry.attributes import BoundedAttributes from opentelemetry.semconv.attributes import exception_attributes -from opentelemetry.util.types import AnyValue, _ExtendedAttributes +from opentelemetry.util.types import AnyValue, Attributes def _get_exception_attributes( @@ -31,15 +31,15 @@ def _get_exception_attributes( def _get_attributes_with_exception( - attributes: _ExtendedAttributes | None, + attributes: Attributes, exception: BaseException | None, -) -> _ExtendedAttributes | None: +) -> Attributes: if exception is None: return attributes exception_attributes_map = _get_exception_attributes(exception) if attributes is None: - attributes_map: _ExtendedAttributes = {} + attributes_map: Attributes = {} else: attributes_map = attributes @@ -50,7 +50,6 @@ def _get_attributes_with_exception( attributes=bounded_attributes, immutable=False, max_value_len=bounded_attributes.max_value_len, - extended_attributes=bounded_attributes._extended_attributes, # pylint: disable=protected-access ) merged.dropped = bounded_attributes.dropped for key, value in exception_attributes_map.items(): @@ -63,7 +62,7 @@ def _get_attributes_with_exception( def _copy_log_record( record: LogRecord, - attributes: _ExtendedAttributes | None, + attributes: Attributes, ) -> LogRecord: copied_record = LogRecord( timestamp=record.timestamp, @@ -104,7 +103,7 @@ def _create_log_record_with_exception( severity_number=None, severity_text: str | None = None, body: AnyValue | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, event_name: str | None = None, exception: BaseException | None = None, ) -> LogRecord: diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables/__init__.py index 94738ca4697..5907b45ae79 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/environment_variables/__init__.py @@ -141,7 +141,7 @@ """ .. envvar:: OTEL_ATTRIBUTE_VALUE_LENGTH_LIMIT -The :envvar:`OTEL_ATTRIBUTE_VALUE_LENGTH_LIMIT` represents the maximum allowed attribute length. +The :envvar:`OTEL_ATTRIBUTE_VALUE_LENGTH_LIMIT` represents the maximum allowed attribute value length for string attribute values. """ OTEL_EVENT_ATTRIBUTE_COUNT_LIMIT = "OTEL_EVENT_ATTRIBUTE_COUNT_LIMIT" @@ -174,8 +174,7 @@ """ .. envvar:: OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT -The :envvar:`OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT` represents the maximum allowed length -span attribute values can have. This takes precedence over :envvar:`OTEL_ATTRIBUTE_VALUE_LENGTH_LIMIT`. +The :envvar:`OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT` represents the maximum allowed attribute value length for string span attribute values. This takes precedence over :envvar:`OTEL_ATTRIBUTE_VALUE_LENGTH_LIMIT`. """ OTEL_SPAN_EVENT_COUNT_LIMIT = "OTEL_SPAN_EVENT_COUNT_LIMIT" diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/_view_instrument_match.py b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/_view_instrument_match.py index d9ba05363b1..f9eb2a6e0fc 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/_view_instrument_match.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/metrics/_internal/_view_instrument_match.py @@ -1,11 +1,13 @@ # Copyright The OpenTelemetry Authors # SPDX-License-Identifier: Apache-2.0 - -from collections.abc import Sequence +import copy +import json +from collections.abc import Mapping, Sequence from logging import getLogger from threading import Lock from time import time_ns +from types import NoneType from typing import cast from opentelemetry.sdk.metrics._internal.aggregation import ( @@ -19,9 +21,30 @@ from opentelemetry.sdk.metrics._internal.measurement import Measurement from opentelemetry.sdk.metrics._internal.point import DataPointT from opentelemetry.sdk.metrics._internal.view import View +from opentelemetry.util.types import Attributes, AttributeValue _logger = getLogger(__name__) +_HashedAttributes = ( + str | bool | int | float | bytes | None | tuple["_HashedAttributes", ...] +) + + +def _hash_attributes(value: Attributes | AttributeValue) -> _HashedAttributes: + if isinstance(value, (NoneType, str, int, float, bool, bytes)): + return value + if isinstance(value, Sequence): + return tuple(_hash_attributes(v) for v in value) + if isinstance(value, Mapping): + return tuple( + (k, _hash_attributes(value[k])) + for k in sorted( + value, + key=lambda item: item if isinstance(item, str) else str(item), + ) + ) + raise TypeError(f"Invalid value type for attributes: {type(value)}") + class _ViewInstrumentMatch: def __init__( @@ -32,7 +55,9 @@ def __init__( ): self._view = view self._instrument = instrument - self._attributes_aggregation: dict[frozenset, _Aggregation] = {} + self._attributes_aggregation: dict[ + _HashedAttributes, _Aggregation + ] = {} self._lock = Lock() self._instrument_class_aggregation = instrument_class_aggregation self._name = self._view._name or self._instrument.name @@ -87,18 +112,27 @@ def conflicts(self, other: "_ViewInstrumentMatch") -> bool: def consume_measurement( self, measurement: Measurement, should_sample_exemplar: bool = True ) -> None: + attributes = {} + if measurement.attributes: + # Deepcopy to prevent the user from modifying the attributes after the + # measurement has been consumed. + attributes = copy.deepcopy(measurement.attributes) if self._view._attribute_keys is not None: - attributes = {} - - for key, value in (measurement.attributes or {}).items(): - if key in self._view._attribute_keys: - attributes[key] = value - elif measurement.attributes is not None: - attributes = dict(measurement.attributes) - else: - attributes = {} - - aggr_key = frozenset(attributes.items()) + attributes = { + k: v + for k, v in attributes.items() + if k in self._view._attribute_keys + } + # Use sort keys to get a stable key. + # use default=str to serialize non-default types, the OTLP proto/json encoder does the same thing. + # json.dumps is 2x-4x faster than _hash_attributes.. + try: + aggr_key = json.dumps(attributes, sort_keys=True, default=str) + # Raises a TypeError when dictionary keys are not all strings because sort_keys cannot compare + # values of different types. This should be a very rare case because dictionary keys are supposed + # to be strings (and are eventually coerced to strings by the OTLP json/proto encoders) according to the spec. + except TypeError: + aggr_key = _hash_attributes(attributes) if aggr_key not in self._attributes_aggregation: with self._lock: diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py index 09120f71498..6dc5b691f1b 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/resources/__init__.py @@ -74,7 +74,7 @@ __version__ as _OPENTELEMETRY_SDK_VERSION, ) from opentelemetry.semconv.resource import ResourceAttributes -from opentelemetry.util.types import AttributeValue +from opentelemetry.util.types import Attributes, AttributeValue psutil: ModuleType | None = None @@ -85,10 +85,8 @@ except ImportError: pass -LabelValue = AttributeValue -Attributes = Mapping[str, LabelValue] logger = logging.getLogger(__name__) - +LabelValue = AttributeValue CLOUD_PROVIDER = ResourceAttributes.CLOUD_PROVIDER CLOUD_ACCOUNT_ID = ResourceAttributes.CLOUD_ACCOUNT_ID CLOUD_REGION = ResourceAttributes.CLOUD_REGION @@ -156,14 +154,17 @@ class Resource: _schema_url: str def __init__(self, attributes: Attributes, schema_url: str | None = None): - self._attributes = BoundedAttributes(attributes=attributes) + # Immutable set to true so attributes cannot be added or removed after creation. + self._attributes = BoundedAttributes( + attributes=attributes, immutable=True + ) if schema_url is None: schema_url = "" self._schema_url = schema_url @staticmethod def create( - attributes: Attributes | None = None, + attributes: Attributes = None, schema_url: str | None = None, ) -> "Resource": """Creates a new `Resource` from attributes. @@ -203,7 +204,7 @@ def get_empty() -> "Resource": return _EMPTY_RESOURCE @property - def attributes(self) -> Attributes: + def attributes(self) -> Mapping[str, AttributeValue]: return self._attributes @property diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py index d292fed8e55..67c50fbe35d 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py @@ -846,10 +846,12 @@ def __init__( self._events = self._new_events() if events: for event in events: + # Immutable set to true so attributes cannot be added or removed after creation. event._attributes = BoundedAttributes( self._limits.max_event_attributes, event.attributes, max_value_len=self._limits.max_attribute_length, + immutable=True, ) self._events.append(event) @@ -870,11 +872,13 @@ def _new_links(self, links: Sequence[trace_api.Link]): valid_links = [] for link in links: if link and _is_valid_link(link.context, link.attributes): + # Immutable set to true so attributes cannot be added or removed after creation. # pylint: disable=protected-access link._attributes = BoundedAttributes( self._limits.max_link_attributes, link.attributes, max_value_len=self._limits.max_attribute_length, + immutable=True, ) valid_links.append(link) @@ -911,10 +915,12 @@ def add_event( attributes: types.Attributes = None, timestamp: int | None = None, ) -> None: + # Immutable set to true so attributes cannot be added or removed after creation. attributes = BoundedAttributes( self._limits.max_event_attributes, attributes, max_value_len=self._limits.max_attribute_length, + immutable=True, ) self._add_event( Event( @@ -935,11 +941,12 @@ def add_link( ) -> None: if not _is_valid_link(context, attributes): return - + # Immutable set to true so attributes cannot be added or removed after creation. attributes = BoundedAttributes( self._limits.max_link_attributes, attributes, max_value_len=self._limits.max_attribute_length, + immutable=True, ) self._add_link( trace_api.Link( diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/util/__init__.pyi b/opentelemetry-sdk/src/opentelemetry/sdk/util/__init__.pyi deleted file mode 100644 index 53d372afba0..00000000000 --- a/opentelemetry-sdk/src/opentelemetry/sdk/util/__init__.pyi +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright The OpenTelemetry Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from collections.abc import ( - Iterable, - Iterator, - Mapping, - MutableMapping, - Sequence, -) -from typing import ( - Any, - TypeVar, - overload, -) - -from opentelemetry.util.types import AttributesAsKey, AttributeValue - -_T = TypeVar("_T") -_KT = TypeVar("_KT") -_VT = TypeVar("_VT") - -def ns_to_iso_str(nanoseconds: int) -> str: ... -def get_dict_as_key( - labels: Mapping[str, AttributeValue], -) -> AttributesAsKey: ... - -# pylint: disable=no-self-use -class BoundedList(Sequence[_T]): - """An append only list with a fixed max size. - - Calls to `append` and `extend` will drop the oldest elements if there is - not enough room. - """ - - dropped: int - def __init__(self, maxlen: int | None): ... - def __deepcopy__(self, memo: dict[int, Any]) -> BoundedList[_T]: ... - def insert(self, index: int, value: _T) -> None: ... - @overload - def __getitem__(self, i: int) -> _T: ... - @overload - def __getitem__(self, s: slice) -> Sequence[_T]: ... - def __len__(self) -> int: ... - def append(self, item: _T) -> None: ... - def extend(self, seq: Sequence[_T]) -> None: ... - @classmethod - def from_seq( - cls, maxlen: int | None, seq: Iterable[_T] - ) -> BoundedList[_T]: ... # pylint: disable=undefined-variable - -class BoundedDict(MutableMapping[_KT, _VT]): - """An ordered dict with a fixed max capacity. - - Oldest elements are dropped when the dict is full and a new element is - added. - """ - - dropped: int - def __init__(self, maxlen: int): ... - def __getitem__(self, k: _KT) -> _VT: ... - def __setitem__(self, k: _KT, v: _VT) -> None: ... - def __delitem__(self, v: _KT) -> None: ... - def __iter__(self) -> Iterator[_KT]: ... - def __len__(self) -> int: ... - @classmethod - def from_map( - cls, maxlen: int, mapping: Mapping[_KT, _VT] - ) -> BoundedDict[_KT, _VT]: ... # pylint: disable=undefined-variable diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/util/instrumentation.py b/opentelemetry-sdk/src/opentelemetry/sdk/util/instrumentation.py index fc408e431e7..6d1aa17fe4b 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/util/instrumentation.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/util/instrumentation.py @@ -10,7 +10,6 @@ from opentelemetry.util.types import ( # TODO: see if we can remove F401 when using new sphinx version # noqa: F401 # pylint: disable=unused-import AnyValue, Attributes, - _ExtendedAttributes, ) @@ -88,14 +87,17 @@ def __init__( name: str, version: str | None = None, schema_url: str | None = None, - attributes: _ExtendedAttributes | None = None, + attributes: Attributes = None, ) -> None: self._name = name self._version = version if schema_url is None: schema_url = "" self._schema_url = schema_url - self._attributes = BoundedAttributes(attributes=attributes) + # Attributes cannot be added/removed after creation. + self._attributes = BoundedAttributes( + attributes=attributes, immutable=True + ) def __repr__(self) -> str: return f"{type(self).__name__}({self._name}, {self._version}, {self._schema_url}, {self._attributes})" diff --git a/opentelemetry-sdk/tests/logs/test_logs.py b/opentelemetry-sdk/tests/logs/test_logs.py index a9b171b6fd2..90870ca270d 100644 --- a/opentelemetry-sdk/tests/logs/test_logs.py +++ b/opentelemetry-sdk/tests/logs/test_logs.py @@ -408,7 +408,6 @@ def test_emit_logrecord_exception_with_immutable_attributes(self): original_attributes = BoundedAttributes( attributes={"custom": "value"}, immutable=True, - extended_attributes=True, ) log_record = LogRecord( observed_timestamp=0, diff --git a/opentelemetry-sdk/tests/metrics/test_view_instrument_match.py b/opentelemetry-sdk/tests/metrics/test_view_instrument_match.py index 73b0e6e24a5..22c496aaf16 100644 --- a/opentelemetry-sdk/tests/metrics/test_view_instrument_match.py +++ b/opentelemetry-sdk/tests/metrics/test_view_instrument_match.py @@ -4,6 +4,7 @@ # pylint: disable=protected-access from __future__ import annotations +import json from collections.abc import Callable, Sequence from time import time_ns from unittest import TestCase @@ -11,6 +12,7 @@ from opentelemetry.context import Context from opentelemetry.sdk.metrics._internal._view_instrument_match import ( + _hash_attributes, _ViewInstrumentMatch, ) from opentelemetry.sdk.metrics._internal.aggregation import ( @@ -101,7 +103,7 @@ def test_consume_measurement(self): ) self.assertEqual( view_instrument_match._attributes_aggregation, - {frozenset([("c", "d")]): self.mock_created_aggregation}, + {json.dumps({"c": "d"}): self.mock_created_aggregation}, ) view_instrument_match.consume_measurement( @@ -117,12 +119,13 @@ def test_consume_measurement(self): self.assertEqual( view_instrument_match._attributes_aggregation, { - frozenset(): self.mock_created_aggregation, - frozenset([("c", "d")]): self.mock_created_aggregation, + json.dumps({}): self.mock_created_aggregation, + json.dumps({"c": "d"}): self.mock_created_aggregation, }, ) - # None attribute_keys (default) will keep all attributes + # setup new instrument match without `attribute_keys` set, defaults to keeping + # all attributes. view_instrument_match = _ViewInstrumentMatch( view=View( instrument_name="instrument1", @@ -146,14 +149,10 @@ def test_consume_measurement(self): ) self.assertEqual( view_instrument_match._attributes_aggregation, - { - frozenset( - [("c", "d"), ("f", "g")] - ): self.mock_created_aggregation - }, + {json.dumps({"c": "d", "f": "g"}): self.mock_created_aggregation}, ) - # empty set attribute_keys will drop all labels and aggregate + # empty set attribute_keys will drop all attributes and aggregate # everything together view_instrument_match = _ViewInstrumentMatch( view=View( @@ -173,12 +172,12 @@ def test_consume_measurement(self): time_unix_nano=time_ns(), instrument=instrument1, context=Context(), - attributes=None, + attributes={"a": 1, "b": 2}, ) ) self.assertEqual( view_instrument_match._attributes_aggregation, - {frozenset({}): self.mock_created_aggregation}, + {json.dumps({}): self.mock_created_aggregation}, ) # Test that a drop aggregation is handled in the same way as any @@ -207,7 +206,7 @@ def test_consume_measurement(self): ) ) self.assertIsInstance( - view_instrument_match._attributes_aggregation[frozenset({})], + view_instrument_match._attributes_aggregation[json.dumps({})], _DropAggregation, ) @@ -276,7 +275,7 @@ def test_consume_measurement_attributes_are_copied(self): ), ) - attributes = {"key": "original"} + attributes = {"key": "original", "mutable_value": [1, 2, 3]} view_instrument_match.consume_measurement( Measurement( value=1, @@ -289,13 +288,16 @@ def test_consume_measurement_attributes_are_copied(self): # Mutate the original dict after recording attributes["key"] = "mutated" - + attributes["mutable_value"].append(4) number_data_points = view_instrument_match.collect( AggregationTemporality.CUMULATIVE, 0 ) number_data_points = list(number_data_points) self.assertEqual(len(number_data_points), 1) - self.assertEqual(number_data_points[0].attributes, {"key": "original"}) + self.assertEqual( + number_data_points[0].attributes, + {"key": "original", "mutable_value": [1, 2, 3]}, + ) @patch( "opentelemetry.sdk.metrics._internal._view_instrument_match.time_ns", @@ -515,11 +517,66 @@ def test_setting_aggregation(self): self.assertIsInstance( view_instrument_match._attributes_aggregation[ - frozenset({("c", "d")}) + json.dumps({"c": "d"}) ], _LastValueAggregation, ) + def test_attributes_hash_fallsback_to_hash_attributes_value_function(self): + instrument1 = _Counter( + name="instrument1", + instrumentation_scope=Mock(), + measurement_consumer=Mock(), + description="description", + unit="unit", + ) + instrument1.instrumentation_scope = self.mock_instrumentation_scope + view_instrument_match = _ViewInstrumentMatch( + view=View( + instrument_name="instrument1", + name="name", + aggregation=DefaultAggregation(), + ), + instrument=instrument1, + instrument_class_aggregation={_Counter: LastValueAggregation()}, + ) + + # this will fail with json.dumps because dictionary keys are not all strings + attributes = {"c": 1, 22: 3, (1,): 2} + + view_instrument_match.consume_measurement( + Measurement( + value=0, + time_unix_nano=time_ns(), + instrument=Mock(name="instrument1"), + context=Context(), + attributes=attributes, + ) + ) + self.assertIn( + _hash_attributes(attributes), + view_instrument_match._attributes_aggregation, + ) + + def test_json_dumps_works_as_stable_hash_key(self): + attributes = { + "a": [1, 2], + "b": [2, 1], + "c": b"1234asf", + "d": 1.2324124, + "e": -2.32323124, + "f": {1: 2, 2: (1, 2, 3), 3: "a", 4: "bc"}, + } + self.assertEqual( + json.dumps(attributes, sort_keys=True, default=str), + json.dumps(attributes, sort_keys=True, default=str), + ) + + self.assertNotEqual( + json.dumps({"1": (1, "2", 3, "4")}, sort_keys=True, default=str), + json.dumps({"1": ("1", 2, "3", 4)}, sort_keys=True, default=str), + ) + class TestSimpleFixedSizeExemplarReservoir(TestCase): def test_consume_measurement_with_custom_reservoir_factory(self): diff --git a/opentelemetry-sdk/tests/resources/test_resources.py b/opentelemetry-sdk/tests/resources/test_resources.py index 2b945ea9fdd..0f8df361123 100644 --- a/opentelemetry-sdk/tests/resources/test_resources.py +++ b/opentelemetry-sdk/tests/resources/test_resources.py @@ -235,26 +235,28 @@ def test_service_name_using_process_name(self): ) def test_invalid_resource_attribute_values(self): + # This class has no __str__ or __repr__ method, so BoundedAttributes does + # not attempt to convert it to a string and defaults to returning None. + class NoStrNoRepr: + def __init__(self): + pass + with self.assertLogs(level=WARNING): resource = Resource( { SERVICE_NAME: "test", - "non-primitive-data-type": {}, - "invalid-byte-type-attribute": ( - b"\xd8\xe1\xb7\xeb\xa8\xe5 \xd2\xb7\xe1" - ), + "bad-type": NoStrNoRepr(), "": "empty-key-value", - None: "null-key-value", - "another-non-primitive": uuid.uuid4(), } ) self.assertEqual( resource.attributes, { SERVICE_NAME: "test", + "bad-type": None, }, ) - self.assertEqual(len(resource.attributes), 1) + self.assertEqual(len(resource.attributes), 2) def test_aggregated_resources_no_detectors(self): aggregated_resources = get_aggregated_resources([]) diff --git a/opentelemetry-sdk/tests/trace/test_trace.py b/opentelemetry-sdk/tests/trace/test_trace.py index 0b07908ec5b..c638bcba04d 100644 --- a/opentelemetry-sdk/tests/trace/test_trace.py +++ b/opentelemetry-sdk/tests/trace/test_trace.py @@ -842,7 +842,12 @@ def test_attributes(self): root.set_attribute("http.response.status_code", 200) root.set_attribute("http.status_text", "OK") root.set_attribute("misc.pi", 3.14) - + root.set_attribute("mapping-key", {"key": "value"}) + root.set_attribute("array-key", [1, 2, 3]) + root.set_attribute( + "complex-key", + [{"key1": "value1"}, {"key2": 42}, "key3", [1, 2, 3]], + ) # Setting an attribute with the same key as an existing attribute # SHOULD overwrite the existing attribute's value. root.set_attribute("attr-key", "attr-value1") @@ -854,7 +859,7 @@ def test_attributes(self): list_of_numerics = [123, 314, 0] root.set_attribute("list-of-numerics", list_of_numerics) - self.assertEqual(len(root.attributes), 9) + self.assertEqual(len(root.attributes), 12) self.assertEqual(root.attributes["http.request.method"], "GET") self.assertEqual( root.attributes["url.full"], @@ -894,53 +899,30 @@ def test_attributes(self): self.assertEqual(root.attributes["attr-in-both"], "span-attr") def test_invalid_attribute_values(self): + # This class has no __str__ or __repr__ method, so BoundedAttributes does + # not attempt to convert it to a string and defaults to returning None. + class NoStrNoRepr: + def __init__(self): + pass + with self.tracer.start_as_current_span("root") as root: with self.assertLogs(level=WARNING): root.set_attributes( - {"correct-value": "foo", "non-primitive-data-type": {}} + { + "correct-value": "foo", + "bad-type": NoStrNoRepr(), + } ) - with self.assertLogs(level=WARNING): - root.set_attribute("non-primitive-data-type", {}) - with self.assertLogs(level=WARNING): - root.set_attribute( - "list-of-mixed-data-types-numeric-first", - [123, False, "string"], - ) - with self.assertLogs(level=WARNING): - root.set_attribute( - "list-of-mixed-data-types-non-numeric-first", - [False, 123, "string"], - ) - with self.assertLogs(level=WARNING): - root.set_attribute( - "list-with-non-primitive-data-type", [{}, 123] - ) - with self.assertLogs(level=WARNING): - root.set_attribute("list-with-numeric-and-bool", [1, True]) - - with self.assertLogs(level=WARNING): - root.set_attribute("", 123) - with self.assertLogs(level=WARNING): - root.set_attribute(None, 123) - - self.assertEqual(len(root.attributes), 1) + self.assertEqual(len(root.attributes), 2) self.assertEqual(root.attributes["correct-value"], "foo") + self.assertIsNone(root.attributes["bad-type"]) def test_byte_type_attribute_value(self): with self.tracer.start_as_current_span("root") as root: - with self.assertLogs(level=WARNING): - root.set_attribute( - "invalid-byte-type-attribute", - b"\xd8\xe1\xb7\xeb\xa8\xe5 \xd2\xb7\xe1", - ) - self.assertFalse( - "invalid-byte-type-attribute" in root.attributes - ) - root.set_attribute("valid-byte-type-attribute", b"valid byte") self.assertTrue( - isinstance(root.attributes["valid-byte-type-attribute"], str) + isinstance(root.attributes["valid-byte-type-attribute"], bytes) ) def test_sampling_attributes(self): @@ -1031,26 +1013,25 @@ def test_event_attributes_are_immutable(self): with self.assertRaises(TypeError): event.attributes["name"] = "hello" - def test_invalid_event_attributes(self): + def test_setting_event_attributes(self): self.assertEqual(trace_api.get_current_span(), trace_api.INVALID_SPAN) with self.tracer.start_as_current_span("root") as root: - with self.assertLogs(level=WARNING): - root.add_event( - "event0", {"attr1": True, "attr2": ["hi", False]} - ) - with self.assertLogs(level=WARNING): - root.add_event("event0", {"attr1": {}}) - with self.assertLogs(level=WARNING): - root.add_event("event0", {"attr1": [[True]]}) - with self.assertLogs(level=WARNING): - root.add_event("event0", {"attr1": [{}], "attr2": [1, 2]}) + root.add_event("event0", {"attr1": True, "attr2": ["hi", False]}) + root.add_event("event0", {"attr1": {}}) + root.add_event("event0", {"attr1": [[True]]}) + root.add_event("event0", {"attr1": [{}], "attr2": [1, 2]}) self.assertEqual(len(root.events), 4) - self.assertEqual(root.events[0].attributes, {"attr1": True}) - self.assertEqual(root.events[1].attributes, {}) - self.assertEqual(root.events[2].attributes, {}) - self.assertEqual(root.events[3].attributes, {"attr2": (1, 2)}) + self.assertEqual( + root.events[0].attributes, + {"attr1": True, "attr2": ("hi", False)}, + ) + self.assertEqual(root.events[1].attributes, {"attr1": {}}) + self.assertEqual(root.events[2].attributes, {"attr1": ((True,),)}) + self.assertEqual( + root.events[3].attributes, {"attr2": (1, 2), "attr1": ({},)} + ) def test_links(self): id_generator = RandomIdGenerator()