From 8bfe25824b2b9d63626be0f87e0f7c175fb47a71 Mon Sep 17 00:00:00 2001 From: Raja Sekhar Rao Dheekonda Date: Fri, 9 May 2025 15:23:29 -0700 Subject: [PATCH 1/6] Add log object support for custom data types: Image, Video, Table, Audio, and 3D Objects --- dreadnode/__init__.py | 6 + dreadnode/data_types/__init__.py | 7 + dreadnode/data_types/audio.py | 186 ++++++++++++++ dreadnode/data_types/base_data_type.py | 17 ++ dreadnode/data_types/image.py | 294 +++++++++++++++++++++ dreadnode/data_types/object_3d.py | 101 ++++++++ dreadnode/data_types/py.typed | 0 dreadnode/data_types/table.py | 157 ++++++++++++ dreadnode/data_types/video.py | 230 +++++++++++++++++ dreadnode/serialization.py | 130 ++-------- examples/log_object/audio.ipynb | 241 ++++++++++++++++++ examples/log_object/image.ipynb | 322 +++++++++++++++++++++++ examples/log_object/object3d.ipynb | 117 +++++++++ examples/log_object/table.ipynb | 165 ++++++++++++ examples/log_object/video.ipynb | 340 +++++++++++++++++++++++++ poetry.lock | 133 +++++++++- pyproject.toml | 2 + 17 files changed, 2326 insertions(+), 122 deletions(-) create mode 100644 dreadnode/data_types/__init__.py create mode 100644 dreadnode/data_types/audio.py create mode 100644 dreadnode/data_types/base_data_type.py create mode 100644 dreadnode/data_types/image.py create mode 100644 dreadnode/data_types/object_3d.py create mode 100644 dreadnode/data_types/py.typed create mode 100644 dreadnode/data_types/table.py create mode 100644 dreadnode/data_types/video.py create mode 100644 examples/log_object/audio.ipynb create mode 100644 examples/log_object/image.ipynb create mode 100644 examples/log_object/object3d.ipynb create mode 100644 examples/log_object/table.ipynb create mode 100644 examples/log_object/video.ipynb diff --git a/dreadnode/__init__.py b/dreadnode/__init__.py index a7bbd9cd..7991aa89 100644 --- a/dreadnode/__init__.py +++ b/dreadnode/__init__.py @@ -1,3 +1,4 @@ +from dreadnode.data_types import Audio, Image, Object3D, Table, Video from dreadnode.main import DEFAULT_INSTANCE, Dreadnode from dreadnode.metric import Metric, MetricDict, Scorer from dreadnode.object import Object @@ -29,17 +30,22 @@ __version__ = VERSION __all__ = [ + "Audio", "Dreadnode", + "Image", "Metric", "MetricDict", "Object", + "Object3D", "Run", "RunSpan", "Score", "Scorer", "Span", + "Table", "Task", "TaskSpan", + "Video", "__version__", "configure", "log_metric", diff --git a/dreadnode/data_types/__init__.py b/dreadnode/data_types/__init__.py new file mode 100644 index 00000000..7506d88f --- /dev/null +++ b/dreadnode/data_types/__init__.py @@ -0,0 +1,7 @@ +from .audio import Audio +from .image import Image +from .object_3d import Object3D +from .table import Table +from .video import Video + +__all__ = ["Audio", "Image", "Object3D", "Table", "Video"] diff --git a/dreadnode/data_types/audio.py b/dreadnode/data_types/audio.py new file mode 100644 index 00000000..3271647a --- /dev/null +++ b/dreadnode/data_types/audio.py @@ -0,0 +1,186 @@ +import io +import typing as t +from pathlib import Path + +import numpy as np +import soundfile as sf +from pydub import AudioSegment + +from dreadnode.data_types.base_data_type import BaseDataType + +AudioDataType = str | Path | np.ndarray[t.Any, t.Any] | bytes | AudioSegment + + +class Audio(BaseDataType): + """ + Audio media type for Dreadnode logging. + + Supports: + - Local file paths (str or Path) + - Numpy arrays with sample rate + - Raw bytes + - Pydub AudioSegment object + """ + + def __init__( + self, + data: AudioDataType, + sample_rate: int | None = None, + caption: str | None = None, + format: str | None = None, + ): + """ + Initialize an Audio object. + + Args: + data: The audio data, which can be: + - A path to a local audio file (str or Path) + - A numpy array (requires sample_rate) + - Raw bytes + - A pydub AudioSegment + sample_rate: Required when using numpy arrays + caption: Optional caption for the audio + format: Optional format to use (default is wav for numpy arrays) + """ + self._data = data + self._sample_rate = sample_rate + self._caption = caption + self._format = format + + def to_serializable(self) -> tuple[t.Any, dict[str, t.Any]]: + """ + Serialize the audio data to bytes and return with metadata. + Returns: + A tuple of (audio_bytes, metadata_dict) + """ + audio_bytes, format_name, sample_rate, duration = self._process_audio_data() + metadata = self._generate_metadata(format_name, sample_rate, duration) + return audio_bytes, metadata + + def _process_audio_data(self) -> tuple[bytes, str, int | None, float | None]: + """ + Process the audio data and return bytes, format, sample rate, and duration. + Returns: + A tuple of (audio_bytes, format_name, sample_rate, duration) + """ + if isinstance(self._data, (str, Path)) and Path(self._data).exists(): + return self._process_file_path() + if isinstance(self._data, np.ndarray): + return self._process_numpy_array() + if isinstance(self._data, bytes): + return self._process_raw_bytes() + if isinstance(self._data, AudioSegment): + return self._process_pydub_audio_segment() + raise TypeError(f"Unsupported audio data type: {type(self._data)}") + + def _process_file_path(self) -> tuple[bytes, str, int | None, float | None]: + """ + Process audio from file path. Obtain sample rate and duration using soundfile. + Returns: + A tuple of (audio_bytes, format_name, sample_rate, duration) + """ + path_str = str(self._data) + audio_bytes = Path(path_str).read_bytes() + format_name = self._format or Path(path_str).suffix.lstrip(".").lower() or "wav" + sample_rate = self._sample_rate + duration = None + with sf.SoundFile(path_str) as f: + sample_rate = sample_rate or f.samplerate + duration = f.frames / f.samplerate + + return audio_bytes, format_name, sample_rate, duration + + def _process_numpy_array(self) -> tuple[bytes, str, int | None, float | None]: + """ + Process numpy array to WAV using soundfile. + Returns: + A tuple of (audio_bytes, format_name, sample_rate, duration) + """ + if self._sample_rate is None: + raise ValueError('Argument "sample_rate" is required when using numpy arrays.') + + buffer = io.BytesIO() + format_name = self._format or "wav" + sf.write(buffer, self._data, self._sample_rate, format=format_name) + buffer.seek(0) + audio_bytes = buffer.read() + + if isinstance(self._data, np.ndarray): + duration = len(self._data) / float(self._sample_rate) + else: + raise TypeError("Invalid data type for numpy array processing.") + + return audio_bytes, format_name, self._sample_rate, duration + + def _process_raw_bytes(self) -> tuple[bytes, str, int | None, float | None]: + """ + Process raw bytes. Format is determined by the provided format argument. + Returns: + A tuple of (audio_bytes, format_name, sample_rate, duration) + """ + format_name = self._format or "wav" + if not isinstance(self._data, bytes): + raise TypeError("Raw bytes are expected for this processing method.") + return self._data, format_name, self._sample_rate, None + + def _process_pydub_audio_segment(self) -> tuple[bytes, str, int | None, float | None]: + """ + Process pydub AudioSegment to bytes. + Returns: + A tuple of (audio_bytes, format_name, sample_rate, duration) + """ + + if not isinstance(self._data, AudioSegment): + raise TypeError("AudioSegment is expected for this processing method.") + + sample_rate = self._data.frame_rate + + buffer = io.BytesIO() + format_name = self._format or "wav" + self._data.export(buffer, format=format_name) + buffer.seek(0) + audio_bytes = buffer.read() + + # PyDUB provides duration in milliseconds, convert to seconds for consistency + duration = len(self._data) / 1000.0 + + return audio_bytes, format_name, sample_rate, duration + + def _generate_metadata( + self, format_name: str, sample_rate: int | None, duration: float | None + ) -> dict[str, str | int | float | None]: + """ + Generate metadata for the audio data. + Returns: + A dictionary of metadata + """ + metadata: dict[str, str | int | float | None] = { + "extension": format_name.lower(), + "x-python-datatype": "dreadnode.Audio.bytes", + } + + if isinstance(self._data, (str, Path)): + metadata["source-type"] = "file" + metadata["source-path"] = str(self._data) + elif isinstance(self._data, np.ndarray): + metadata["source-type"] = "numpy.ndarray" + elif isinstance(self._data, bytes): + metadata["source-type"] = "bytes" + elif isinstance(self._data, AudioSegment): + metadata["source-type"] = "pydub.AudioSegment" + + if sample_rate is not None: + metadata["sample-rate"] = sample_rate + + if duration is not None: + metadata["duration"] = duration + + # Add pydub-specific metadata if available + if isinstance(self._data, AudioSegment): + metadata["channels"] = self._data.channels + metadata["sample-width"] = self._data.sample_width + + if self._caption: + metadata["caption"] = self._caption + + return metadata diff --git a/dreadnode/data_types/base_data_type.py b/dreadnode/data_types/base_data_type.py new file mode 100644 index 00000000..74c38119 --- /dev/null +++ b/dreadnode/data_types/base_data_type.py @@ -0,0 +1,17 @@ +import typing as t +from abc import ABC, abstractmethod + + +class BaseDataType(ABC): + """Base class for all data types that can be logged with Dreadnode.""" + + @abstractmethod + def to_serializable(self) -> tuple[t.Any, dict[str, t.Any]]: + """ + Convert the media type to a serializable format. + + Returns: + Tuple of (data, metadata) where: + - data: The serialized data + - metadata: Additional metadata for this data type + """ diff --git a/dreadnode/data_types/image.py b/dreadnode/data_types/image.py new file mode 100644 index 00000000..130bf68b --- /dev/null +++ b/dreadnode/data_types/image.py @@ -0,0 +1,294 @@ +import base64 +import io +import typing as t +from pathlib import Path + +import numpy as np +from PIL import Image as PILImage + +from dreadnode.data_types.base_data_type import BaseDataType + +ImageDataType = PILImage.Image | np.ndarray[t.Any, t.Any] +ImageDataOrPathType = str | Path | bytes | ImageDataType + + +class Image(BaseDataType): + """ + Image media type for Dreadnode logging. + + Supports: + - Local file paths (str or Path) + - PIL Image objects + - Numpy arrays + - Base64 encoded strings + """ + + def __init__( + self, + data: ImageDataOrPathType, + mode: str | None = None, + caption: str | None = None, + format: str | None = None, + ): + """ + Initialize an Image object. + + Args: + data: The image data, which can be: + - A path to a local image file (str or Path) + - A PIL Image object + - A numpy array + - Base64 encoded string + - Raw bytes + mode: Optional mode for the image (RGB, L, etc.) + caption: Optional caption for the image + format: Optional format to use when saving (png, jpg, etc.) + """ + self._data = data + self._mode = mode + self._caption = caption + self._format = format + + def to_serializable(self) -> tuple[t.Any, dict[str, t.Any]]: + """ + Convert the image to bytes and return with metadata. + Returns: + A tuple of (image_bytes, metadata_dict) + """ + image_bytes, image_format, mode, width, height = self._process_image_data() + metadata = self._generate_metadata(image_format, mode, width, height) + return image_bytes, metadata + + def _process_image_data(self) -> tuple[bytes, str, str | None, int | None, int | None]: + """ + Process the image data and return bytes, format, mode, width, and height. + Returns: + A tuple of (image_bytes, image_format, mode, width, height) + """ + if isinstance(self._data, (str, Path)) and Path(self._data).exists(): + return self._process_file_path() + if isinstance(self._data, PILImage.Image): + return self._process_pil_image() + if isinstance(self._data, np.ndarray): + return self._process_numpy_array() + if isinstance(self._data, bytes): + return self._process_raw_bytes() + if isinstance(self._data, str) and self._data.startswith("data:image/"): + return self._process_base64_string() + raise TypeError(f"Unsupported image data type: {type(self._data)}") + + def _process_file_path(self) -> tuple[bytes, str, str | None, int | None, int | None]: + """ + Process image from file path. + Returns: + A tuple of (image_bytes, image_format, mode, width, height) + """ + path_str = str(self._data) + image_bytes = Path(path_str).read_bytes() + image_format = self._format or Path(path_str).suffix.lstrip(".") or "png" + mode, width, height = self._mode, None, None + with PILImage.open(path_str) as img: + width, height = img.size + detected_mode = img.mode + mode = mode or detected_mode + return image_bytes, image_format, mode, width, height + + def _process_pil_image(self) -> tuple[bytes, str, str | None, int | None, int | None]: + """ + Process PIL Image object. + Returns: + A tuple of (image_bytes, image_format, mode, width, height) + """ + if not isinstance(self._data, PILImage.Image): + raise TypeError(f"Expected PILImage.Image, got {type(self._data)}") + + pil_image = self._data + mode = self._mode or pil_image.mode + image_format = self._format or (pil_image.format.lower() if pil_image.format else "png") + + buffer = io.BytesIO() + img_to_save = pil_image + + if mode and pil_image.mode != mode: + if mode == "RGBA" and pil_image.mode in ("RGB", "L"): + # For RGB to RGBA, add an alpha channel + # Convert to RGBA first + img_to_save = pil_image.convert("RGBA") + else: + # Standard conversion + img_to_save = pil_image.convert(mode) + + # Make sure format supports alpha if using RGBA mode + if mode == "RGBA" and image_format.lower() in ("jpg", "jpeg"): + # JPEG doesn't support transparency, switch to PNG + image_format = "png" + + # Save image to buffer + img_to_save.save(buffer, format=image_format) + image_bytes = buffer.getvalue() + width, height = pil_image.size + return image_bytes, image_format, mode, width, height + + def _process_numpy_array(self) -> tuple[bytes, str, str | None, int | None, int | None]: + """ + Process numpy array to bytes. + Returns: + A tuple of (image_bytes, image_format, mode, width, height) + """ + buffer = io.BytesIO() + image_format = self._format or "png" + + mode = self._mode or ( + self._guess_mode(self._data) if isinstance(self._data, np.ndarray) else None + ) + if not isinstance(self._data, np.ndarray): + raise TypeError(f"Expected numpy.ndarray, got {type(self._data)}") + valid_array = self._ensure_valid_image_array(self._data) + + # Explicitly handle float arrays with values in [0, 1] + if valid_array.dtype.kind == "f" and valid_array.max() <= 1.0: + valid_array = (valid_array * 255).astype(np.uint8) + elif valid_array.dtype != np.uint8: + valid_array = np.clip(valid_array, 0, 255).astype(np.uint8) + + img = PILImage.fromarray(valid_array, mode=mode) + img.save(buffer, format=image_format) + image_bytes = buffer.getvalue() + width, height = img.size + return image_bytes, image_format, mode, width, height + + def _process_raw_bytes(self) -> tuple[bytes, str, str | None, int | None, int | None]: + """ + Process raw bytes. + Returns: + A tuple of (image_bytes, image_format, mode, width, height) + """ + if not isinstance(self._data, bytes): + raise TypeError(f"Expected bytes, got {type(self._data)}") + image_bytes = self._data + image_format = self._format or "png" + mode, width, height = self._mode, None, None + with PILImage.open(io.BytesIO(image_bytes)) as img: + width, height = img.size + detected_mode = img.mode + mode = mode or detected_mode + + if mode and img.mode != mode: + buffer = io.BytesIO() + img.convert(mode).save(buffer, format=image_format) + image_bytes = buffer.getvalue() + + return image_bytes, image_format, mode, width, height + + def _process_base64_string(self) -> tuple[bytes, str, str | None, int | None, int | None]: + """ + Process base64 encoded string. + Returns: + A tuple of (image_bytes, image_format, mode, width, height) + """ + if not isinstance(self._data, str): + raise TypeError(f"Expected str, got {type(self._data)}") + + # Handle data URL format (data:image/png;base64,...) + if "," in self._data: + header, encoded = self._data.split(",", 1) + format_part = header.split("/")[1].split(";")[0] if "/" in header else "png" + else: + encoded = self._data + format_part = "png" # Default for raw base64 + + image_format = self._format or format_part + + # Decode the base64 string + # TODO(@raja): See if we could optimize this # noqa: TD003 + image_bytes = base64.b64decode(encoded) + + # Open with PIL to get properties + with PILImage.open(io.BytesIO(image_bytes)) as img: + width, height = img.size + detected_mode = img.mode + mode = self._mode or detected_mode + + # Convert mode if needed + if mode and img.mode != mode: + buffer = io.BytesIO() + img.convert(mode).save(buffer, format=image_format) + image_bytes = buffer.getvalue() + + return image_bytes, image_format, mode, width, height + + def _generate_metadata( + self, image_format: str, mode: str | None, width: int | None, height: int | None + ) -> dict[str, str | int | None]: + """Generate metadata for the image.""" + metadata: dict[str, str | int | None] = { + "extension": image_format.lower(), + "x-python-datatype": "dreadnode.Image.bytes", + } + + if isinstance(self._data, (str, Path)) and Path(self._data).exists(): + metadata["source-type"] = "file" + metadata["source-path"] = str(self._data) + elif isinstance(self._data, PILImage.Image): + metadata["source-type"] = "PIL.Image" + elif isinstance(self._data, np.ndarray): + metadata["source-type"] = "numpy.ndarray" + metadata["array-shape"] = str(self._data.shape) + metadata["array-dtype"] = str(self._data.dtype) + elif isinstance(self._data, bytes): + metadata["source-type"] = "bytes" + elif isinstance(self._data, str) and self._data.startswith("data:image/"): + metadata["source-type"] = "base64" + + if mode: + metadata["mode"] = mode + + if width is not None and height is not None: + metadata["width"] = width + metadata["height"] = height + + if self._caption: + metadata["caption"] = self._caption + + return metadata + + def _guess_mode(self, data: np.ndarray[t.Any, np.dtype[t.Any]]) -> str: + """Guess what type of image the np.array is representing.""" + ndims = data.ndim + + if ndims == 2: + return "L" + + if ndims == 3: + # Map shape to mode for channels-last (HWC) and channels-first (CHW) + shape_to_mode = { + (1,): "L", + (3,): "RGB", + (4,): "RGBA", + } + if data.shape[2:] in shape_to_mode: + return shape_to_mode[data.shape[2:]] + if data.shape[:1] in shape_to_mode: + return shape_to_mode[data.shape[:1]] + + raise ValueError(f"Unsupported array shape for image: {data.shape}") + + def _ensure_valid_image_array( + self, array: np.ndarray[t.Any, np.dtype[t.Any]] + ) -> np.ndarray[t.Any, np.dtype[t.Any]]: + """Convert numpy array to a format suitable for PIL.""" + # Handle grayscale (2D arrays) + if array.ndim == 2: + return array + + # Handle standard 3D arrays + if array.ndim == 3: + # Channels-last format (HWC) - standard for PIL + if array.shape[2] in (1, 3, 4): + return array + + # Channels-first format (CHW) - convert to channels-last + if array.shape[0] in (1, 3, 4): + return np.transpose(array, (1, 2, 0)) + + raise ValueError(f"Unsupported numpy array shape: {array.shape}") diff --git a/dreadnode/data_types/object_3d.py b/dreadnode/data_types/object_3d.py new file mode 100644 index 00000000..6f64450b --- /dev/null +++ b/dreadnode/data_types/object_3d.py @@ -0,0 +1,101 @@ +import typing as t +from pathlib import Path +from typing import ClassVar + +from dreadnode.data_types.base_data_type import BaseDataType + +Object3DDataType = str | Path | bytes + + +class Object3D(BaseDataType): + """ + 3D object media type for Dreadnode logging. + + Supports: + - Local file paths to 3D models (.obj, .glb, .gltf, etc.) + - Raw bytes with metadata + """ + + SUPPORTED_FORMATS: ClassVar[list[str]] = [ + "obj", + "glb", + "gltf", + "stl", + "fbx", + "ply", + "dae", + "usdz", + ] + + def __init__( + self, + data: Object3DDataType, + caption: str | None = None, + format: str | None = None, + ): + """ + Initialize a 3D Object. + + Args: + data: The 3D object data, which can be: + - A path to a local 3D model file (str or Path) + - Raw bytes of a 3D model file + caption: Optional caption for the 3D object + format: Optional format override (obj, glb, etc.) + """ + self._data = data + self._caption = caption + self._format = format + + def to_serializable(self) -> tuple[bytes, dict[str, t.Any]]: + """ + Convert the 3D object to bytes and return with metadata. + + Returns: + A tuple of (object_bytes, metadata_dict) + """ + if isinstance(self._data, (str, Path)) and Path(self._data).exists(): + return self._process_file_path() + if isinstance(self._data, bytes): + format_name = self._format or "glb" + return self._data, self._generate_metadata(format_name) + raise TypeError(f"Unsupported 3D object data type: {type(self._data)}") + + def _process_file_path(self) -> tuple[bytes, dict[str, t.Any]]: + """ + Process a 3D object from a file path. + Returns: + A tuple of (object_bytes, metadata_dict) + """ + if not isinstance(self._data, (str, Path)): + raise TypeError(f"Expected str or Path for file path, got {type(self._data)}") + path = Path(self._data) + object_bytes = path.read_bytes() + format_name = self._format or path.suffix.lstrip(".") + + metadata = self._generate_metadata(format_name) + return object_bytes, metadata + + def _generate_metadata(self, format_name: str) -> dict[str, t.Any]: + """ + Generate metadata for the 3D object. + Args: + format_name: The format of the 3D object (obj, glb, etc.) + Returns: + A dictionary of metadata + """ + metadata = { + "extension": format_name.lower(), + "x-python-datatype": "dreadnode.Object3D.bytes", + } + + if self._caption: + metadata["caption"] = self._caption + + if isinstance(self._data, (str, Path)): + metadata["source-type"] = "file" + metadata["source-path"] = str(self._data) + elif isinstance(self._data, bytes): + metadata["source-type"] = "bytes" + + return metadata diff --git a/dreadnode/data_types/py.typed b/dreadnode/data_types/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/dreadnode/data_types/table.py b/dreadnode/data_types/table.py new file mode 100644 index 00000000..797946f3 --- /dev/null +++ b/dreadnode/data_types/table.py @@ -0,0 +1,157 @@ +import io +import typing as t +from pathlib import Path +from typing import ClassVar + +import numpy as np +import pandas as pd + +from dreadnode.data_types.base_data_type import BaseDataType + +TableDataType = ( + pd.DataFrame | dict[t.Any, t.Any] | list[t.Any] | str | Path | np.ndarray[t.Any, t.Any] +) + + +class Table(BaseDataType): + """ + Table data type for Dreadnode logging. + + Supports: + - Pandas DataFrames + - CSV/Parquet/JSON files + - Dict or list data structures + - NumPy arrays + """ + + SUPPORTED_FORMATS: ClassVar[list[str]] = ["csv", "parquet", "json"] + + def __init__( + self, + data: TableDataType, + caption: str | None = None, + format: str | None = None, + *, + index: bool = False, + ): + """ + Initialize a Table object. + + Args: + data: The table data, which can be: + - A pandas DataFrame + - A path to a CSV/JSON/Parquet file + - A dict or list of dicts + - A NumPy array + caption: Optional caption for the table + format: Optional format to use when saving (csv, parquet, json) + index: Whether to include index in the output + """ + self._data = data + self._caption = caption + self._format = format or "csv" # Default to CSV + if self._format not in self.SUPPORTED_FORMATS: + raise ValueError( + f"Unsupported format: {self._format}. " + f"Supported formats are: {', '.join(self.SUPPORTED_FORMATS)}" + ) + self._index = index + + def to_serializable(self) -> tuple[bytes, dict[str, t.Any]]: + """ + Convert the table to bytes and return with metadata. + + Returns: + A tuple of (table_bytes, metadata_dict) + """ + data_frame = self._to_dataframe() + + table_bytes = self._dataframe_to_bytes(data_frame) + metadata = self._generate_metadata(data_frame) + + return table_bytes, metadata + + def _to_dataframe(self) -> pd.DataFrame: + """ + Convert the input data to a pandas DataFrame. + Returns: + A pandas DataFrame representation of the input data + """ + if isinstance(self._data, pd.DataFrame): + return self._data + if isinstance(self._data, (str, Path)) and Path(self._data).exists(): + path = Path(self._data) + suffix = path.suffix.lower() + + if suffix == ".csv": + return pd.read_csv(path) + if suffix == ".parquet": + return pd.read_parquet(path) + if suffix in (".json", ".jsonl"): + return pd.read_json(path) + raise ValueError(f"Unsupported file format: {suffix}") + + if isinstance(self._data, dict): + return pd.DataFrame.from_dict(self._data) + + if isinstance(self._data, (list, np.ndarray)): + return pd.DataFrame(self._data) + + raise ValueError(f"Unsupported table data type: {type(self._data)}") + + def _dataframe_to_bytes(self, data_frame: pd.DataFrame) -> bytes: + """ + Convert the DataFrame to bytes based on the specified format. + Args: + data_frame: The pandas DataFrame to convert + Returns: + Bytes representation of the DataFrame + """ + buffer = io.BytesIO() + + if self._format == "csv": + data_frame.to_csv(buffer, index=self._index) + elif self._format == "parquet": + data_frame.to_parquet(buffer, index=self._index) + elif self._format == "json": + json_str = data_frame.to_json(orient="records") + buffer.write(json_str.encode()) + else: + data_frame.to_csv(buffer, index=self._index) + + buffer.seek(0) + return buffer.getvalue() + + def _generate_metadata(self, data_frame: pd.DataFrame) -> dict[str, t.Any]: + """ + Generate metadata for the table. + Args: + data_frame: The pandas DataFrame to generate metadata for + Returns: + A dictionary of metadata + """ + metadata = { + "extension": self._format, + "x-python-datatype": "dreadnode.Table.bytes", + "rows": len(data_frame), + "columns": len(data_frame.columns), + } + + metadata["column-names"] = data_frame.columns.tolist() + + if self._caption: + metadata["caption"] = self._caption + + if isinstance(self._data, pd.DataFrame): + metadata["source-type"] = "pandas.DataFrame" + elif isinstance(self._data, (str, Path)): + metadata["source-type"] = "file" + metadata["source-path"] = str(self._data) + elif isinstance(self._data, dict): + metadata["source-type"] = "dict" + elif isinstance(self._data, list): + metadata["source-type"] = "list" + elif isinstance(self._data, np.ndarray): + metadata["source-type"] = "numpy.ndarray" + + return metadata diff --git a/dreadnode/data_types/video.py b/dreadnode/data_types/video.py new file mode 100644 index 00000000..caf00bd4 --- /dev/null +++ b/dreadnode/data_types/video.py @@ -0,0 +1,230 @@ +import os +import tempfile +import typing as t +from pathlib import Path + +import numpy as np +from moviepy.video.io.ImageSequenceClip import ImageSequenceClip +from moviepy.video.VideoClip import VideoClip + +from dreadnode.data_types.base_data_type import BaseDataType + +VideoDataType = ( + str | Path | np.ndarray[t.Any, t.Any] | bytes | list[np.ndarray[t.Any, t.Any]] | VideoClip +) + + +class Video(BaseDataType): + """ + Video media type for Dreadnode logging. + + Supports: + - Local file paths (str or Path) + - Numpy array sequences with frame rate + - Raw bytes with metadata + - MoviePy VideoClip objects (if installed) + """ + + def __init__( + self, + data: VideoDataType, + fps: float | None = None, + caption: str | None = None, + format: str | None = None, + width: int | None = None, + height: int | None = None, + ): + """ + Initialize a Video object. + + Args: + data: The video data, which can be: + - A path to a local video file (str or Path) + - A numpy array of frames (requires fps) + - A list of numpy arrays for individual frames (requires fps) + - Raw bytes + - A MoviePy VideoClip object (if MoviePy is installed) + fps: Frames per second, required for numpy array input + (ignored if data is a file path or raw bytes) + caption: Optional caption for the video + format: Optional format override (mp4, avi, etc.) + width: Optional width in pixels + height: Optional height in pixels + """ + self._data = data + self._fps = fps + self._caption = caption + self._format = format or "mp4" + self._width = width + self._height = height + + def to_serializable(self) -> tuple[bytes, dict[str, t.Any]]: + """ + Convert the video to bytes and return with metadata. + + Returns: + A tuple of (video_bytes, metadata_dict) + """ + if isinstance(self._data, (str, Path)) and Path(self._data).exists(): + return self._process_file_path() + if isinstance(self._data, bytes): + return self._process_bytes() + if isinstance(self._data, (np.ndarray, list)): + return self._process_numpy_array() + if isinstance(self._data, VideoClip): + return self._process_moviepy_clip() + raise TypeError(f"Unsupported video data type: {type(self._data)}") + + def _process_file_path(self) -> tuple[bytes, dict[str, t.Any]]: + """ + Process a video file from a file path. + Returns: + A tuple of (video_bytes, metadata_dict) + """ + video_bytes = Path(self._data).read_bytes() + format_name = self._format + + if not format_name or format_name == "mp4": + ext = Path(self._data).suffix.lstrip(".") + if ext: + format_name = ext + + metadata = self._generate_metadata(format_name) + return video_bytes, metadata + + def _process_bytes(self) -> tuple[bytes, dict[str, t.Any]]: + """ + Process raw bytes of video data. + Returns: + A tuple of (video_bytes, metadata_dict) + """ + metadata = self._generate_metadata(self._format) + return self._data, metadata + + def _process_numpy_array(self) -> tuple[bytes, dict[str, t.Any]]: + """ + Process numpy array frames using MoviePy. + Returns: + A tuple of (video_bytes, metadata_dict) + """ + if not self._fps: + raise ValueError("fps is required for numpy array video frames") + if not isinstance(self._data, (np.ndarray, list)): + raise TypeError("data must be a numpy array or list of numpy arrays") + frames = [] + if isinstance(self._data, np.ndarray): + if self._data.ndim == 3: # Single frame + frames = [self._data] + elif self._data.ndim == 4: # Multiple frames + frames = [self._data[i] for i in range(self._data.shape[0])] + else: + raise ValueError(f"Unsupported numpy array shape: {self._data.ndim}") + elif isinstance(self._data, list): + frames = self._data + + if not frames: + raise ValueError("No frames found in input data") + + frame_height, frame_width = frames[0].shape[:2] + + temp_fd, temp_path = tempfile.mkstemp(suffix=f".{self._format}") + os.close(temp_fd) + + try: + # Create clip and write to file + clip = ImageSequenceClip(frames, fps=self._fps) + + clip.write_videofile( + temp_path, + fps=self._fps, + ) + + video_bytes = Path(temp_path).read_bytes() + + metadata = self._generate_metadata(self._format) + metadata.update( + { + "frame-count": len(frames), + "width": self._width or frame_width, + "height": self._height or frame_height, + } + ) + if isinstance(self._data, np.ndarray): + metadata["source-type"] = "numpy.ndarray" + metadata["array-shape"] = str(self._data.shape) + metadata["array-dtype"] = str(self._data.dtype) + else: + metadata["source-type"] = "list[numpy.ndarray]" + metadata["frames-count"] = len(frames) + return video_bytes, metadata + + finally: + if Path(temp_path).exists(): + Path(temp_path).unlink() + + def _process_moviepy_clip(self) -> tuple[bytes, dict[str, t.Any]]: + """ + Process a MoviePy VideoClip object. + Returns: + A tuple of (video_bytes, metadata_dict) + """ + if not isinstance(self._data, VideoClip): + raise TypeError("data must be a MoviePy VideoClip object") + temp_fd, temp_path = tempfile.mkstemp(suffix=f".{self._format}") + os.close(temp_fd) + + try: + # Get FPS from clip or provided value + fps = self._fps or getattr(self._data, "fps", 24) + + # Write to file with compatible parameters + self._data.write_videofile( + temp_path, + fps=fps, + ) + + video_bytes = Path(temp_path).read_bytes() + + metadata = self._generate_metadata(self._format) + + metadata["source-type"] = "moviepy.VideoClip" + + # Add clip metadata if available + for attr in ["duration", "fps", "size", "rotation", "w", "h", "aspect_ratio"]: + if hasattr(self._data, attr): + value = getattr(self._data, attr) + if value is not None: + metadata[attr] = value + + return video_bytes, metadata + + finally: + if Path(temp_path).exists(): + Path(temp_path).unlink() + + def _generate_metadata(self, format_name: str) -> dict[str, t.Any]: + """ + Generate metadata for the video. + Args: + format_name: The format of the video (mp4, avi, etc.) + Returns: + A dictionary of metadata + """ + metadata: dict[str, t.Any] = { + "extension": format_name.lower(), + "x-python-datatype": "dreadnode.Video.bytes", + } + + if self._fps: + metadata["fps"] = self._fps + + if self._width: + metadata["width"] = self._width + + if self._height: + metadata["height"] = self._height + + if self._caption: + metadata["caption"] = self._caption + + return metadata diff --git a/dreadnode/serialization.py b/dreadnode/serialization.py index 8964ac8c..afc9dc23 100644 --- a/dreadnode/serialization.py +++ b/dreadnode/serialization.py @@ -23,6 +23,7 @@ from re import Pattern from uuid import UUID +from dreadnode.data_types.base_data_type import BaseDataType from dreadnode.types import JsonDict, JsonValue from dreadnode.util import safe_repr @@ -388,105 +389,6 @@ def _handle_pandas_series( return serialized, schema -def _handle_pil_image( - obj: t.Any, - _seen: set[int], -) -> tuple[JsonValue, JsonDict]: - import PIL.Image - - if not isinstance(obj, PIL.Image.Image): - return safe_repr(obj), UNKNOWN_OBJECT_SCHEMA - - buffer = io.BytesIO() - export_format = "PNG" - - if hasattr(obj, "format") and isinstance(obj.format, str): - export_format = obj.format.lower() - - obj.save(buffer, format=export_format) - - return _handle_bytes( - buffer.getvalue(), - _seen, - { - "x-python-datatype": "PIL.Image", - "format": export_format.lower(), - }, - ) - - -def _handle_pydub_audio_segment( - obj: t.Any, - _seen: set[int], -) -> tuple[JsonValue, JsonDict]: - from pydub import AudioSegment # type: ignore[import-untyped, unused-ignore, import-not-found] - - if not isinstance(obj, AudioSegment): - return safe_repr(obj), UNKNOWN_OBJECT_SCHEMA - - # AudioSegment can be in different formats, but we will use WAV as a default - # Since there is no way to get the format from the AudioSegment object, we will use WAV - # as a default format for export. TODO: Add a way to get the format from the user via tags may be. - export_format = "wav" - # Raw audio data from AudioSegment class is in bytes format. - raw_bytes_data = obj.raw_data - schema = { - "x-python-datatype": "pydub.AudioSegment", - "format": export_format, - "x-audio-sample-rate": obj.frame_rate, - "x-audio-channels": obj.channels, - "x-audio-sample-width": obj.sample_width, - } - - return _handle_bytes(raw_bytes_data, _seen, schema) - - -def _handle_moviepy_video_clip( - obj: t.Any, - _seen: set[int], -) -> tuple[JsonValue, JsonDict]: - import tempfile - from pathlib import Path - - from moviepy import ( # type: ignore[import-untyped, unused-ignore, import-not-found] - VideoFileClip, - ) - - if not isinstance(obj, VideoFileClip): - return safe_repr(obj), UNKNOWN_OBJECT_SCHEMA - - # Infer format from filename if available - export_format = "mp4" - if getattr(obj, "filename", None): - ext = Path(obj.filename).suffix.lstrip(".").lower() - if ext: - export_format = ext - - # Export video to temp file - with tempfile.NamedTemporaryFile(suffix=f".{export_format}") as temp_file: - obj.write_videofile( - temp_file.name, - ) - raw_bytes_data = Path(temp_file.name).read_bytes() - - schema = { - "x-python-datatype": "moviepy.VideoFileClip", - "format": export_format, - "start": obj.start, - "end": obj.end, - "duration": obj.duration, - "fps": obj.fps, - "size": obj.size, - "rotation": obj.rotation, - "aspect_ratio": obj.aspect_ratio, - "w": obj.w, - "h": obj.h, - "n_frames": obj.n_frames, - } - - return _handle_bytes(raw_bytes_data, _seen, schema) - - def _handle_dataset(obj: t.Any, _seen: set[int]) -> tuple[JsonValue, JsonDict]: import datasets # type: ignore[import-untyped] @@ -506,6 +408,22 @@ def _handle_dataset(obj: t.Any, _seen: set[int]) -> tuple[JsonValue, JsonDict]: ) +def _handle_custom_data_type(obj: BaseDataType, _seen: set[int]) -> tuple[JsonValue, JsonDict]: + """Handler for Dreadnode custom data types.""" + if not isinstance(obj, BaseDataType): + return safe_repr(obj), UNKNOWN_OBJECT_SCHEMA + + # Get the serialized data and metadata from the media type + data, metadata = obj.to_serializable() + + if isinstance(data, bytes): + return _handle_bytes(data, _seen, metadata) + serialized, schema = _serialize(data, _seen) + schema.update(metadata) + + return serialized, schema + + @lru_cache(maxsize=1) def _get_handlers() -> dict[type, HandlerFunc]: handlers: dict[type, HandlerFunc] = { @@ -587,25 +505,13 @@ def _get_handlers() -> dict[type, HandlerFunc]: handlers[pd.DataFrame] = _handle_pandas_dataframe handlers[pd.Series] = _handle_pandas_series - with contextlib.suppress(Exception): - import PIL.Image - - handlers[PIL.Image.Image] = _handle_pil_image - with contextlib.suppress(Exception): import datasets handlers[datasets.Dataset] = _handle_dataset with contextlib.suppress(Exception): - from pydub import AudioSegment - - handlers[AudioSegment] = _handle_pydub_audio_segment - - with contextlib.suppress(Exception): - from moviepy import VideoFileClip - - handlers[VideoFileClip] = _handle_moviepy_video_clip + handlers[BaseDataType] = _handle_custom_data_type return handlers diff --git a/examples/log_object/audio.ipynb b/examples/log_object/audio.ipynb new file mode 100644 index 00000000..102a9ee0 --- /dev/null +++ b/examples/log_object/audio.ipynb @@ -0,0 +1,241 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dreadnode Audio Logging\n", + "\n", + "This notebook demonstrates how to log audio data using Dreadnode's `Audio` data type. The examples cover various audio formats and sources including file paths, numpy arrays, and pydub AudioSegment objects.\n", + "\n", + "## Features\n", + "\n", + "- Log audio files directly from disk (WAV, MP3, etc.)\n", + "- Convert and log numpy arrays as audio\n", + "- Process and log pydub AudioSegment objects\n", + "- Add captions and metadata to audio logs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import dreadnode as dn\n", + "\n", + "dn.configure(\n", + " server=\"Your Dreadnode API\", # Replace with your server address\n", + " token=\"Your Dreadnode API Key\", # Replace with your token\n", + " project=\"audio-examples\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. File Path Examples\n", + "\n", + "The simplest way to log audio is directly from file paths. Dreadnode supports common audio formats like WAV, MP3, OGG, and more." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Created test audio file at: /var/folders/ln/f1c_19n537310md15785b6d80000gn/T/tmpzkbdqvce.wav\n", + "21:24:25.708 audio_file_examples\n" + ] + } + ], + "source": [ + "import dreadnode as dn\n", + "import numpy as np\n", + "from dreadnode import Audio\n", + "import tempfile\n", + "import os\n", + "import soundfile as sf\n", + "\n", + "# Create a test audio file - a simple sine wave\n", + "sample_rate = 44100\n", + "duration = 2.0 \n", + "frequency = 440\n", + "\n", + "# Generate the sine wave\n", + "t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)\n", + "sine_wave = np.sin(2 * np.pi * frequency * t) * 0.5\n", + "\n", + "# Create a temporary WAV file\n", + "temp_wav = tempfile.NamedTemporaryFile(suffix=\".wav\", delete=False)\n", + "temp_wav.close()\n", + "audio_file_path = temp_wav.name\n", + "\n", + "sf.write(audio_file_path, sine_wave, sample_rate)\n", + "print(f\"Created test audio file at: {audio_file_path}\")\n", + "\n", + "# Log examples from file paths\n", + "with dn.run(\"audio_file_examples\") as r:\n", + " # Basic file logging\n", + " dn.log_input(\"audio_file\", Audio(audio_file_path, caption=\"440 Hz sine wave - WAV format\"))\n", + " \n", + " # With explicit format override\n", + " dn.log_input(\"format_override\", Audio(audio_file_path, format=\"wav\", caption=\"With explicit format\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Numpy Array Examples\n", + "\n", + "Numpy arrays are commonly used for audio processing in Python. When logging numpy arrays, a sample rate must be provided." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "21:24:25.738 audio_numpy_examples\n" + ] + } + ], + "source": [ + "with dn.run(\"audio_numpy_examples\") as r:\n", + " sample_rate = 44100\n", + " \n", + " # Single frequency (A4 - 440 Hz) for 3 seconds\n", + " duration = 3.0\n", + " t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)\n", + " sine_440hz = np.sin(2 * np.pi * 440 * t) * 0.5\n", + " dn.log_input(\"sine_wave_440hz\", Audio(sine_440hz, sample_rate=sample_rate, caption=\"A4 note (440 Hz)\"))\n", + " \n", + " # Frequency sweep (100 Hz to 10000 Hz)\n", + " duration = 5.0\n", + " t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)\n", + " frequency = np.logspace(np.log10(100), np.log10(10000), len(t))\n", + " sweep = np.sin(2 * np.pi * frequency * t / sample_rate * frequency) * 0.5\n", + " dn.log_input(\"freq_sweep\", Audio(sweep, sample_rate=sample_rate, caption=\"Frequency sweep (100 Hz to 10 kHz)\"))\n", + " \n", + " # Chord - multiple frequencies combined\n", + " duration = 2.0\n", + " t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)\n", + " c4 = np.sin(2 * np.pi * 261.63 * t) * 0.3\n", + " e4 = np.sin(2 * np.pi * 329.63 * t) * 0.3\n", + " g4 = np.sin(2 * np.pi * 392.00 * t) * 0.3\n", + " chord = c4 + e4 + g4\n", + " dn.log_input(\"chord\", Audio(chord, sample_rate=sample_rate, caption=\"C major chord\"))\n", + " \n", + " # Stereo audio (two channels)\n", + " duration = 3.0\n", + " t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)\n", + " left = np.sin(2 * np.pi * 440 * t) * 0.5\n", + " right = np.sin(2 * np.pi * 880 * t) * 0.5\n", + " stereo = np.column_stack((left, right))\n", + " dn.log_input(\"stereo_sine\", Audio(stereo, sample_rate=sample_rate, caption=\"Stereo audio (440 Hz left, 880 Hz right)\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Pydub AudioSegment Examples\n", + "\n", + "Pydub is a popular library for audio manipulation in Python. Dreadnode supports logging AudioSegment objects directly, which enables powerful audio processing before logging." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "21:24:25.862 audio_pydub_examples\n" + ] + } + ], + "source": [ + "from pydub import AudioSegment\n", + "from pydub.generators import Sine\n", + "\n", + "with dn.run(\"audio_pydub_examples\") as r:\n", + " # Load the file with pydub\n", + " audio_segment = AudioSegment.from_file(audio_file_path)\n", + " \n", + " # Log the original AudioSegment\n", + " dn.log_input(\"pydub_original\", Audio(audio_segment, caption=\"Original audio with pydub\"))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clean it up" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.unlink(audio_file_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "In this notebook, we demonstrated how to log audio in Dreadnode from various sources:\n", + "\n", + "1. Audio files (e.g., WAV, MP3 files)\n", + "2. Numpy arrays with sample rate\n", + "3. Pydub AudioSegment objects\n", + "4. Audio with custom metadata and captions\n", + "\n", + "We also showed more advanced audio processing techniques including:\n", + "- Generating various audio signals (sine waves, chords, sweeps)\n", + "- Audio manipulations (volume changes, fades, reversing)\n", + "\n", + "The `Audio` data type in Dreadnode provides a flexible way to track and analyze audio data throughout your projects and workflows." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/log_object/image.ipynb b/examples/log_object/image.ipynb new file mode 100644 index 00000000..ded95f2c --- /dev/null +++ b/examples/log_object/image.ipynb @@ -0,0 +1,322 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dreadnode Image Logging\n", + "\n", + "This notebook demonstrates how to log images using Dreadnode's `Image` data type. The examples cover various image formats and sources including file paths, PIL images, numpy arrays, and base64 encoded strings.\n", + "\n", + "## Features\n", + "\n", + "- Log images from file paths (JPG, PNG, etc.)\n", + "- Convert and log PIL Image objects\n", + "- Transform numpy arrays into images\n", + "- Handle raw bytes and base64 encoded images\n", + "- Convert between image modes (RGB, RGBA, grayscale)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import dreadnode as dn\n", + "\n", + "dn.configure(\n", + " server=\"Your Dreadnode API\", # Replace with your server address\n", + " token=\"Your Dreadnode API Key\", # Replace with your token\n", + " project=\"image-examples\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. File Path Examples\n", + "\n", + "Let's first look at logging images directly from file paths. We'll create a temporary image file to use in our examples." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Created test image at: /var/folders/ln/f1c_19n537310md15785b6d80000gn/T/tmpu5xo56lz.png\n", + "21:05:36.657 file_path_example\n" + ] + } + ], + "source": [ + "from dreadnode import Image\n", + "from PIL import Image as PILImage\n", + "import numpy as np\n", + "import tempfile\n", + "import os\n", + "\n", + "temp_file = tempfile.NamedTemporaryFile(suffix=\".png\", delete=False)\n", + "temp_file.close()\n", + "image_file_path = temp_file.name\n", + "\n", + "# Generate a simple gradient test image\n", + "width, height = 300, 200\n", + "img_array = np.zeros((height, width, 3), dtype=np.uint8)\n", + "for x in range(width):\n", + " for y in range(height):\n", + " r = int(255 * x / width)\n", + " g = int(255 * y / height)\n", + " b = int(255 * (x + y) / (width + height))\n", + " img_array[y, x] = [r, g, b]\n", + " \n", + "# Save the generated image\n", + "test_img = PILImage.fromarray(img_array)\n", + "test_img.save(image_file_path)\n", + "\n", + "print(f\"Created test image at: {image_file_path}\")\n", + "\n", + "with dn.run(\"file_path_example\") as r:\n", + " # Basic file path logging\n", + " dn.log_input(\"image_file\", Image(image_file_path, caption=\"RGB Gradient Example\"))\n", + " \n", + " # Load and convert to grayscale\n", + " dn.log_input(\"grayscale_image\", Image(image_file_path, mode=\"L\", caption=\"Grayscale Conversion\"))\n", + " \n", + " # Explicit format override\n", + " dn.log_input(\"format_override\", Image(image_file_path, format=\"jpg\", caption=\"Format override to JPG\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. PIL Image Objects\n", + "\n", + "Dreadnode can directly log PIL Image objects, which allows you to perform image processing before logging." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "21:06:38.295 pil_image_example\n" + ] + } + ], + "source": [ + "with dn.run(\"pil_image_example\") as r:\n", + " \n", + " pil_img = PILImage.open(image_file_path)\n", + " \n", + " # Log the original PIL image\n", + " dn.log_input(\"original_pil\", Image(pil_img, caption=\"Original PIL Image\"))\n", + " \n", + " # Convert to grayscale\n", + " grayscale = pil_img.convert(\"L\")\n", + " dn.log_input(\"pil_grayscale\", Image(grayscale, caption=\"Grayscale PIL Image\"))\n", + " \n", + " # Apply rotation\n", + " rotated = pil_img.rotate(45, expand=True)\n", + " dn.log_input(\"pil_rotated\", Image(rotated, caption=\"Rotated 45 degrees\"))\n", + " \n", + " # Resize\n", + " resized = pil_img.resize((150, 100))\n", + " dn.log_input(\"pil_resized\", Image(resized, caption=\"Resized to 150×100\"))\n", + " \n", + " # Convert to RGBA (with transparency)\n", + " rgba = pil_img.convert(\"RGBA\")\n", + " # Add transparency to the top half\n", + " data = np.array(rgba)\n", + " data[:data.shape[0]//2, :, 3] = 128 # 50% transparency to top half\n", + " rgba_modified = PILImage.fromarray(data)\n", + " dn.log_input(\"pil_rgba\", Image(rgba_modified, format=\"png\", caption=\"RGBA with transparency\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Numpy Arrays\n", + "\n", + "Numpy arrays are commonly used for image representation in machine learning. Dreadnode can log numpy arrays as images in various formats." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "21:07:14.133 image_numpy_test\n", + "\n", + "--- Testing Numpy Arrays ---\n" + ] + } + ], + "source": [ + "with dn.run(\"image_numpy_test\") as r:\n", + " \n", + " # 3.1 RGB Array (channels last - HWC format)\n", + " rgb_array = np.random.randint(0, 255, (200, 200, 3), dtype=np.uint8)\n", + " dn.log_input(\"numpy_rgb\", Image(rgb_array, caption=\"Random RGB Array (200×200×3)\"))\n", + " \n", + " # 3.2 Grayscale Array (2D)\n", + " gray_array = np.random.randint(0, 255, (200, 200), dtype=np.uint8)\n", + " dn.log_input(\"numpy_gray\", Image(gray_array, caption=\"Random Grayscale Array (200×200)\"))\n", + " \n", + " # 3.3 RGBA Array with transparency\n", + " rgba_array = np.zeros((200, 200, 4), dtype=np.uint8)\n", + " # Create a red square with 50% transparency\n", + " rgba_array[50:150, 50:150, 0] = 255 # Red channel\n", + " rgba_array[50:150, 50:150, 3] = 128 # Alpha channel (50%)\n", + " dn.log_input(\"numpy_rgba\", Image(rgba_array, format=\"png\", caption=\"RGBA with transparent red square\"))\n", + " \n", + " # 3.4 Float array [0-1] range\n", + " float_array = np.zeros((200, 200, 3), dtype=np.float32)\n", + " # Make a color gradient\n", + " for i in range(200):\n", + " float_array[:, i, 0] = i / 200.0 # Red increases from left to right\n", + " float_array[i, :, 1] = i / 200.0 # Green increases from top to bottom\n", + " dn.log_input(\"numpy_float\", Image(float_array, caption=\"Float gradient (0-1 range)\"))\n", + " \n", + " # 3.5 Channels first format (PyTorch style - CHW)\n", + " chw_array = np.random.randint(0, 255, (3, 200, 200), dtype=np.uint8)\n", + " dn.log_input(\"numpy_chw\", Image(chw_array, caption=\"Channels-first array (3×200×200)\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Raw Bytes and Binary Data\n", + "\n", + "Images can also be logged from raw bytes, which is useful when working with image data from APIs or databases." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "21:08:25.913 bytes_test\n" + ] + } + ], + "source": [ + "import io\n", + "\n", + "with dn.run(\"bytes_test\") as r:\n", + " # Create a simple test image to use for our examples\n", + " pil_img = PILImage.new(\"RGB\", (100, 100), color=\"red\")\n", + " \n", + " # 4.1 Basic PNG bytes\n", + " buffer = io.BytesIO()\n", + " pil_img.save(buffer, format=\"PNG\")\n", + " png_bytes = buffer.getvalue()\n", + " dn.log_input(\"bytes_png\", Image(png_bytes, format=\"png\", caption=\"PNG bytes (red square)\"))\n", + " \n", + " # 4.2 JPEG bytes\n", + " buffer = io.BytesIO()\n", + " pil_img.save(buffer, format=\"JPEG\", quality=90)\n", + " jpeg_bytes = buffer.getvalue()\n", + " dn.log_input(\"bytes_jpeg\", Image(jpeg_bytes, format=\"jpeg\", caption=\"JPEG bytes (red square)\"))\n", + " \n", + " # 4.3 Bytes with mode conversion\n", + " dn.log_input(\"bytes_grayscale\", Image(png_bytes, format=\"png\", mode=\"L\", caption=\"PNG bytes converted to grayscale\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Base64 Encoded Images\n", + "\n", + "Base64 encoded images are common in web applications and APIs. Dreadnode supports Data URLs and raw base64 strings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import base64\n", + "\n", + "with dn.run(\"base64_test\") as r:\n", + " pil_img = PILImage.new(\"RGB\", (100, 100), color=\"blue\")\n", + " \n", + " # Save as PNG\n", + " buffer = io.BytesIO()\n", + " pil_img.save(buffer, format=\"PNG\")\n", + " png_bytes = buffer.getvalue()\n", + " \n", + " # Create base64 string with data URL\n", + " png_base64 = base64.b64encode(png_bytes).decode('utf-8')\n", + " png_data_url = f\"data:image/png;base64,{png_base64}\"\n", + " \n", + " # 5.1 Log with data URL format\n", + " dn.log_input(\"base64_dataurl\", Image(png_data_url, caption=\"Data URL format (blue square)\"))\n", + " \n", + " # 5.2 Log with grayscale conversion\n", + " dn.log_input(\"base64_grayscale\", Image(png_data_url, mode=\"L\", caption=\"Data URL converted to grayscale\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "In this notebook, we've demonstrated how to log images in Dreadnode from various sources:\n", + "\n", + "1. File paths (e.g., JPG, PNG files)\n", + "2. PIL Image objects\n", + "3. Numpy arrays in different formats\n", + "4. Raw bytes and binary data\n", + "5. Base64 encoded strings\n", + "\n", + "We've also shown how to apply transformations like format conversion, grayscale conversion, and resizing before logging.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/log_object/object3d.ipynb b/examples/log_object/object3d.ipynb new file mode 100644 index 00000000..a0e0b3d0 --- /dev/null +++ b/examples/log_object/object3d.ipynb @@ -0,0 +1,117 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dreadnode 3D Object Logging\n", + "\n", + "This notebook demonstrates how to log 3D object data using Dreadnode's `Object3D` data type." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import dreadnode as dn\n", + "\n", + "dn.configure(\n", + " server=\"Your Dreadnode API\", # Replace with your server address\n", + " token=\"Your Dreadnode API Key\", # Replace with your token\n", + " project=\"object3d-examples\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "22:15:13.441 object3d_test\n" + ] + } + ], + "source": [ + "import dreadnode as dn\n", + "from dreadnode.data_types import Object3D\n", + "import numpy as np\n", + "import tempfile\n", + "import os\n", + "import struct\n", + "\n", + "\n", + "def create_sample_glb():\n", + " temp_glb = tempfile.NamedTemporaryFile(suffix=\".glb\", delete=False)\n", + " temp_glb.close()\n", + " glb_path = temp_glb.name\n", + " magic = b'glTF'\n", + " version = struct.pack('=2023.1.0,<=2025.3.0", extras = ["s3"]} # Pinned for datasets compatibility transformers = { version = "^4.41.0", optional = true } +soundfile = "^0.13.1" +moviepy = "^2.1.2" [tool.poetry.extras] training = ["transformers"] From e1c48b4994d961dd732e54677f94c5d7fcd53a98 Mon Sep 17 00:00:00 2001 From: Raja Sekhar Rao Dheekonda Date: Fri, 9 May 2025 15:38:19 -0700 Subject: [PATCH 2/6] Fix ruff errors --- dreadnode/data_types/image.py | 13 ++++++++----- dreadnode/data_types/video.py | 6 ++++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/dreadnode/data_types/image.py b/dreadnode/data_types/image.py index 130bf68b..ee886b2e 100644 --- a/dreadnode/data_types/image.py +++ b/dreadnode/data_types/image.py @@ -255,11 +255,12 @@ def _generate_metadata( def _guess_mode(self, data: np.ndarray[t.Any, np.dtype[t.Any]]) -> str: """Guess what type of image the np.array is representing.""" ndims = data.ndim - - if ndims == 2: + grayscale_dim = 2 + rgb_dim = 3 + if ndims == grayscale_dim: return "L" - if ndims == 3: + if ndims == rgb_dim: # Map shape to mode for channels-last (HWC) and channels-first (CHW) shape_to_mode = { (1,): "L", @@ -277,12 +278,14 @@ def _ensure_valid_image_array( self, array: np.ndarray[t.Any, np.dtype[t.Any]] ) -> np.ndarray[t.Any, np.dtype[t.Any]]: """Convert numpy array to a format suitable for PIL.""" + grayscale_dim = 2 + rgb_dim = 3 # Handle grayscale (2D arrays) - if array.ndim == 2: + if array.ndim == grayscale_dim: return array # Handle standard 3D arrays - if array.ndim == 3: + if array.ndim == rgb_dim: # Channels-last format (HWC) - standard for PIL if array.shape[2] in (1, 3, 4): return array diff --git a/dreadnode/data_types/video.py b/dreadnode/data_types/video.py index caf00bd4..f47a2ec2 100644 --- a/dreadnode/data_types/video.py +++ b/dreadnode/data_types/video.py @@ -112,10 +112,12 @@ def _process_numpy_array(self) -> tuple[bytes, dict[str, t.Any]]: if not isinstance(self._data, (np.ndarray, list)): raise TypeError("data must be a numpy array or list of numpy arrays") frames = [] + rgb_dim = 3 + rgba_dim = 4 if isinstance(self._data, np.ndarray): - if self._data.ndim == 3: # Single frame + if self._data.ndim == rgb_dim: # Single frame frames = [self._data] - elif self._data.ndim == 4: # Multiple frames + elif self._data.ndim == rgba_dim: # Multiple frames frames = [self._data[i] for i in range(self._data.shape[0])] else: raise ValueError(f"Unsupported numpy array shape: {self._data.ndim}") From b40fefc018f9b8c9de660a096156e846cfe8b57f Mon Sep 17 00:00:00 2001 From: Raja Sekhar Rao Dheekonda Date: Fri, 9 May 2025 15:52:58 -0700 Subject: [PATCH 3/6] Fix mypy errors --- dreadnode/data_types/audio.py | 6 +++--- dreadnode/data_types/video.py | 13 ++++++++----- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/dreadnode/data_types/audio.py b/dreadnode/data_types/audio.py index 3271647a..cbb78ce1 100644 --- a/dreadnode/data_types/audio.py +++ b/dreadnode/data_types/audio.py @@ -3,12 +3,12 @@ from pathlib import Path import numpy as np -import soundfile as sf -from pydub import AudioSegment +import soundfile as sf # type: ignore # noqa: PGH003 +from pydub import AudioSegment # type: ignore # noqa: PGH003 from dreadnode.data_types.base_data_type import BaseDataType -AudioDataType = str | Path | np.ndarray[t.Any, t.Any] | bytes | AudioSegment +AudioDataType: t.TypeAlias = str | Path | np.ndarray[t.Any, t.Any] | bytes | AudioSegment class Audio(BaseDataType): diff --git a/dreadnode/data_types/video.py b/dreadnode/data_types/video.py index f47a2ec2..430ec3e1 100644 --- a/dreadnode/data_types/video.py +++ b/dreadnode/data_types/video.py @@ -4,14 +4,13 @@ from pathlib import Path import numpy as np -from moviepy.video.io.ImageSequenceClip import ImageSequenceClip -from moviepy.video.VideoClip import VideoClip +from moviepy.video.io.ImageSequenceClip import ImageSequenceClip # type: ignore # noqa: PGH003 +from moviepy.video.VideoClip import VideoClip # type: ignore # noqa: PGH003 +from numpy.typing import NDArray from dreadnode.data_types.base_data_type import BaseDataType -VideoDataType = ( - str | Path | np.ndarray[t.Any, t.Any] | bytes | list[np.ndarray[t.Any, t.Any]] | VideoClip -) +VideoDataType: t.TypeAlias = str | Path | NDArray[t.Any] | bytes | list[NDArray[t.Any]] | VideoClip class Video(BaseDataType): @@ -81,6 +80,8 @@ def _process_file_path(self) -> tuple[bytes, dict[str, t.Any]]: Returns: A tuple of (video_bytes, metadata_dict) """ + if not isinstance(self._data, (str, Path)): + raise TypeError("Expected file path as str or Path") video_bytes = Path(self._data).read_bytes() format_name = self._format @@ -98,6 +99,8 @@ def _process_bytes(self) -> tuple[bytes, dict[str, t.Any]]: Returns: A tuple of (video_bytes, metadata_dict) """ + if not isinstance(self._data, bytes): + raise TypeError("Expected bytes for video data") metadata = self._generate_metadata(self._format) return self._data, metadata From 432b56ae8a9d769b60487cf823f3eda8720d23a9 Mon Sep 17 00:00:00 2001 From: Raja Sekhar Rao Dheekonda Date: Tue, 13 May 2025 14:07:27 -0700 Subject: [PATCH 4/6] Updated potery lock --- poetry.lock | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 82964f2a..91d4bcb8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3619,6 +3619,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f66efbc1caa63c088dead1c4170d148eabc9b80d95fb75b6c92ac0aad2437d76"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:22353049ba4181685023b25b5b51a574bce33e7f51c759371a7422dcae5402a6"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:932205970b9f9991b34f55136be327501903f7c66830e9760a8ffb15b07f05cd"}, + {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a52d48f4e7bf9005e8f0a89209bf9a73f7190ddf0489eee5eb51377385f59f2a"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win32.whl", hash = "sha256:3eac5a91891ceb88138c113f9db04f3cebdae277f5d44eaa3651a4f573e6a5da"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win_amd64.whl", hash = "sha256:ab007f2f5a87bd08ab1499bdf96f3d5c6ad4dcfa364884cb4549aa0154b13a28"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:4a6679521a58256a90b0d89e03992c15144c5f3858f40d7c18886023d7943db6"}, @@ -3627,6 +3628,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:811ea1594b8a0fb466172c384267a4e5e367298af6b228931f273b111f17ef52"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cf12567a7b565cbf65d438dec6cfbe2917d3c1bdddfce84a9930b7d35ea59642"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7dd5adc8b930b12c8fc5b99e2d535a09889941aa0d0bd06f4749e9a9397c71d2"}, + {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1492a6051dab8d912fc2adeef0e8c72216b24d57bd896ea607cb90bb0c4981d3"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win32.whl", hash = "sha256:bd0a08f0bab19093c54e18a14a10b4322e1eacc5217056f3c063bd2f59853ce4"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win_amd64.whl", hash = "sha256:a274fb2cb086c7a3dea4322ec27f4cb5cc4b6298adb583ab0e211a4682f241eb"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:20b0f8dc160ba83b6dcc0e256846e1a02d044e13f7ea74a3d1d56ede4e48c632"}, @@ -3635,6 +3637,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:749c16fcc4a2b09f28843cda5a193e0283e47454b63ec4b81eaa2242f50e4ccd"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bf165fef1f223beae7333275156ab2022cffe255dcc51c27f066b4370da81e31"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:32621c177bbf782ca5a18ba4d7af0f1082a3f6e517ac2a18b3974d4edf349680"}, + {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b82a7c94a498853aa0b272fd5bc67f29008da798d4f93a2f9f289feb8426a58d"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win32.whl", hash = "sha256:e8c4ebfcfd57177b572e2040777b8abc537cdef58a2120e830124946aa9b42c5"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win_amd64.whl", hash = "sha256:0467c5965282c62203273b838ae77c0d29d7638c8a4e3a1c8bdd3602c10904e4"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4c8c5d82f50bb53986a5e02d1b3092b03622c02c2eb78e29bec33fd9593bae1a"}, @@ -3643,6 +3646,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96777d473c05ee3e5e3c3e999f5d23c6f4ec5b0c38c098b3a5229085f74236c6"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:3bc2a80e6420ca8b7d3590791e2dfc709c88ab9152c00eeb511c9875ce5778bf"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e188d2699864c11c36cdfdada94d781fd5d6b0071cd9c427bceb08ad3d7c70e1"}, + {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4f6f3eac23941b32afccc23081e1f50612bdbe4e982012ef4f5797986828cd01"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win32.whl", hash = "sha256:6442cb36270b3afb1b4951f060eccca1ce49f3d087ca1ca4563a6eb479cb3de6"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win_amd64.whl", hash = "sha256:e5b8daf27af0b90da7bb903a876477a9e6d7270be6146906b276605997c7e9a3"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:fc4b630cd3fa2cf7fce38afa91d7cfe844a9f75d7f0f36393fa98815e911d987"}, @@ -3651,6 +3655,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2f1c3765db32be59d18ab3953f43ab62a761327aafc1594a2a1fbe038b8b8a7"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d85252669dc32f98ebcd5d36768f5d4faeaeaa2d655ac0473be490ecdae3c285"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e143ada795c341b56de9418c58d028989093ee611aa27ffb9b7f609c00d813ed"}, + {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2c59aa6170b990d8d2719323e628aaf36f3bfbc1c26279c0eeeb24d05d2d11c7"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win32.whl", hash = "sha256:beffaed67936fbbeffd10966a4eb53c402fafd3d6833770516bf7314bc6ffa12"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win_amd64.whl", hash = "sha256:040ae85536960525ea62868b642bdb0c2cc6021c9f9d507810c0c604e66f5a7b"}, {file = "ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f"}, @@ -4590,4 +4595,4 @@ training = ["transformers"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.14" -content-hash = "41eca5e8e88105efc70fe88fc0a8259511439b329da854c463d1e13df445165e" +content-hash = "5de28adea02b5d3763e9fd942b8d5077b71eaa158bc1ebf05aa7ce7befc88d92" From b1efbc1a0012b5dac555cc315a2146afe0f785b5 Mon Sep 17 00:00:00 2001 From: Raja Sekhar Rao Dheekonda Date: Wed, 14 May 2025 18:43:08 -0700 Subject: [PATCH 5/6] Add missing dependency --- poetry.lock | 4 ++-- pyproject.toml | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index 91d4bcb8..6ad50a3e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3080,7 +3080,7 @@ version = "0.25.1" description = "Manipulate audio with an simple and easy high level interface" optional = false python-versions = "*" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6"}, {file = "pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f"}, @@ -4595,4 +4595,4 @@ training = ["transformers"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.14" -content-hash = "5de28adea02b5d3763e9fd942b8d5077b71eaa158bc1ebf05aa7ce7befc88d92" +content-hash = "5385eea978274d5d19adaa7fc49a3325016b0391e942c3881adc42cba8146f7e" diff --git a/pyproject.toml b/pyproject.toml index 67dbd396..c8c41200 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ fsspec = {version = ">=2023.1.0,<=2025.3.0", extras = ["s3"]} # Pinned for datas transformers = { version = "^4.41.0", optional = true } soundfile = "^0.13.1" moviepy = "^2.1.2" +pydub = "^0.25.1" [tool.poetry.extras] training = ["transformers"] From 1652f436461cc12ed79bd9a7e99d5bb01b03fbfe Mon Sep 17 00:00:00 2001 From: Raja Sekhar Rao Dheekonda Date: Thu, 15 May 2025 10:13:45 -0700 Subject: [PATCH 6/6] Fix object identification to maintain schema uniqueness while preserving storage efficiency --- dreadnode/tracing/span.py | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/dreadnode/tracing/span.py b/dreadnode/tracing/span.py index a14b2125..92f14c78 100644 --- a/dreadnode/tracing/span.py +++ b/dreadnode/tracing/span.py @@ -1,3 +1,4 @@ +import hashlib import logging import re import types @@ -368,20 +369,28 @@ def log_object( data_hash = serialized.data_hash schema_hash = serialized.schema_hash - # Store object if we haven't already - if data_hash not in self._objects: - self._objects[data_hash] = self._create_object(serialized) - - object_ = self._objects[data_hash] + # Create a composite key that represents both data and schema + hash_input = f"{data_hash}:{schema_hash}" + composite_hash = hashlib.sha1(hash_input.encode()).hexdigest()[:16] # noqa: S324 # Store schema if new if schema_hash not in self._object_schemas: self._object_schemas[schema_hash] = serialized.schema - # Build event attributes + # Check if we already have this exact composite hash + if composite_hash not in self._objects: + # Create a new object, but use the data_hash for deduplication of storage + obj = self._create_object_by_hash(serialized, composite_hash) + + # Store with composite hash so we can look it up by the combination + self._objects[composite_hash] = obj + + object_ = self._objects[composite_hash] + + # Build event attributes, use composite hash in events event_attributes = { **attributes, - EVENT_ATTRIBUTE_OBJECT_HASH: object_.hash, + EVENT_ATTRIBUTE_OBJECT_HASH: composite_hash, EVENT_ATTRIBUTE_ORIGIN_SPAN_ID: trace_api.format_span_id( trace_api.get_current_span().get_span_context().span_id, ), @@ -410,8 +419,8 @@ def _store_file_by_hash(self, data: bytes, full_path: str) -> str: return str(self._file_system.unstrip_protocol(full_path)) - def _create_object(self, serialized: Serialized) -> Object: - """Create an ObjectVal or ObjectUri depending on size.""" + def _create_object_by_hash(self, serialized: Serialized, object_hash: str) -> Object: + """Create an ObjectVal or ObjectUri depending on size with a specific hash.""" data = serialized.data data_bytes = serialized.data_bytes data_len = serialized.data_len @@ -420,17 +429,19 @@ def _create_object(self, serialized: Serialized) -> Object: if data is None or data_bytes is None or data_len <= MAX_INLINE_OBJECT_BYTES: return ObjectVal( - hash=data_hash, + hash=object_hash, value=data, schema_hash=schema_hash, ) # Offload to file system (e.g., S3) + # For storage efficiency, still use just the data_hash for the file path + # This ensures we don't duplicate storage for the same data full_path = f"{self._prefix_path.rstrip('/')}/{data_hash}" object_uri = self._store_file_by_hash(data_bytes, full_path) return ObjectUri( - hash=data_hash, + hash=object_hash, uri=object_uri, schema_hash=schema_hash, size=data_len,