From 8bfe25824b2b9d63626be0f87e0f7c175fb47a71 Mon Sep 17 00:00:00 2001
From: Raja Sekhar Rao Dheekonda <raja@dreadnode.io>
Date: Fri, 9 May 2025 15:23:29 -0700
Subject: [PATCH 1/6] Add log object support for custom data types: Image,
 Video, Table, Audio, and 3D Objects

---
 dreadnode/__init__.py                  |   6 +
 dreadnode/data_types/__init__.py       |   7 +
 dreadnode/data_types/audio.py          | 186 ++++++++++++++
 dreadnode/data_types/base_data_type.py |  17 ++
 dreadnode/data_types/image.py          | 294 +++++++++++++++++++++
 dreadnode/data_types/object_3d.py      | 101 ++++++++
 dreadnode/data_types/py.typed          |   0
 dreadnode/data_types/table.py          | 157 ++++++++++++
 dreadnode/data_types/video.py          | 230 +++++++++++++++++
 dreadnode/serialization.py             | 130 ++--------
 examples/log_object/audio.ipynb        | 241 ++++++++++++++++++
 examples/log_object/image.ipynb        | 322 +++++++++++++++++++++++
 examples/log_object/object3d.ipynb     | 117 +++++++++
 examples/log_object/table.ipynb        | 165 ++++++++++++
 examples/log_object/video.ipynb        | 340 +++++++++++++++++++++++++
 poetry.lock                            | 133 +++++++++-
 pyproject.toml                         |   2 +
 17 files changed, 2326 insertions(+), 122 deletions(-)
 create mode 100644 dreadnode/data_types/__init__.py
 create mode 100644 dreadnode/data_types/audio.py
 create mode 100644 dreadnode/data_types/base_data_type.py
 create mode 100644 dreadnode/data_types/image.py
 create mode 100644 dreadnode/data_types/object_3d.py
 create mode 100644 dreadnode/data_types/py.typed
 create mode 100644 dreadnode/data_types/table.py
 create mode 100644 dreadnode/data_types/video.py
 create mode 100644 examples/log_object/audio.ipynb
 create mode 100644 examples/log_object/image.ipynb
 create mode 100644 examples/log_object/object3d.ipynb
 create mode 100644 examples/log_object/table.ipynb
 create mode 100644 examples/log_object/video.ipynb

diff --git a/dreadnode/__init__.py b/dreadnode/__init__.py
index a7bbd9cd..7991aa89 100644
--- a/dreadnode/__init__.py
+++ b/dreadnode/__init__.py
@@ -1,3 +1,4 @@
+from dreadnode.data_types import Audio, Image, Object3D, Table, Video
 from dreadnode.main import DEFAULT_INSTANCE, Dreadnode
 from dreadnode.metric import Metric, MetricDict, Scorer
 from dreadnode.object import Object
@@ -29,17 +30,22 @@
 __version__ = VERSION
 
 __all__ = [
+    "Audio",
     "Dreadnode",
+    "Image",
     "Metric",
     "MetricDict",
     "Object",
+    "Object3D",
     "Run",
     "RunSpan",
     "Score",
     "Scorer",
     "Span",
+    "Table",
     "Task",
     "TaskSpan",
+    "Video",
     "__version__",
     "configure",
     "log_metric",
diff --git a/dreadnode/data_types/__init__.py b/dreadnode/data_types/__init__.py
new file mode 100644
index 00000000..7506d88f
--- /dev/null
+++ b/dreadnode/data_types/__init__.py
@@ -0,0 +1,7 @@
+from .audio import Audio
+from .image import Image
+from .object_3d import Object3D
+from .table import Table
+from .video import Video
+
+__all__ = ["Audio", "Image", "Object3D", "Table", "Video"]
diff --git a/dreadnode/data_types/audio.py b/dreadnode/data_types/audio.py
new file mode 100644
index 00000000..3271647a
--- /dev/null
+++ b/dreadnode/data_types/audio.py
@@ -0,0 +1,186 @@
+import io
+import typing as t
+from pathlib import Path
+
+import numpy as np
+import soundfile as sf
+from pydub import AudioSegment
+
+from dreadnode.data_types.base_data_type import BaseDataType
+
+AudioDataType = str | Path | np.ndarray[t.Any, t.Any] | bytes | AudioSegment
+
+
+class Audio(BaseDataType):
+    """
+    Audio media type for Dreadnode logging.
+
+    Supports:
+    - Local file paths (str or Path)
+    - Numpy arrays with sample rate
+    - Raw bytes
+    - Pydub AudioSegment object
+    """
+
+    def __init__(
+        self,
+        data: AudioDataType,
+        sample_rate: int | None = None,
+        caption: str | None = None,
+        format: str | None = None,
+    ):
+        """
+        Initialize an Audio object.
+
+        Args:
+            data: The audio data, which can be:
+                - A path to a local audio file (str or Path)
+                - A numpy array (requires sample_rate)
+                - Raw bytes
+                - A pydub AudioSegment
+            sample_rate: Required when using numpy arrays
+            caption: Optional caption for the audio
+            format: Optional format to use (default is wav for numpy arrays)
+        """
+        self._data = data
+        self._sample_rate = sample_rate
+        self._caption = caption
+        self._format = format
+
+    def to_serializable(self) -> tuple[t.Any, dict[str, t.Any]]:
+        """
+        Serialize the audio data to bytes and return with metadata.
+        Returns:
+            A tuple of (audio_bytes, metadata_dict)
+        """
+        audio_bytes, format_name, sample_rate, duration = self._process_audio_data()
+        metadata = self._generate_metadata(format_name, sample_rate, duration)
+        return audio_bytes, metadata
+
+    def _process_audio_data(self) -> tuple[bytes, str, int | None, float | None]:
+        """
+        Process the audio data and return bytes, format, sample rate, and duration.
+        Returns:
+            A tuple of (audio_bytes, format_name, sample_rate, duration)
+        """
+        if isinstance(self._data, (str, Path)) and Path(self._data).exists():
+            return self._process_file_path()
+        if isinstance(self._data, np.ndarray):
+            return self._process_numpy_array()
+        if isinstance(self._data, bytes):
+            return self._process_raw_bytes()
+        if isinstance(self._data, AudioSegment):
+            return self._process_pydub_audio_segment()
+        raise TypeError(f"Unsupported audio data type: {type(self._data)}")
+
+    def _process_file_path(self) -> tuple[bytes, str, int | None, float | None]:
+        """
+        Process audio from file path. Obtain sample rate and duration using soundfile.
+        Returns:
+            A tuple of (audio_bytes, format_name, sample_rate, duration)
+        """
+        path_str = str(self._data)
+        audio_bytes = Path(path_str).read_bytes()
+        format_name = self._format or Path(path_str).suffix.lstrip(".").lower() or "wav"
+        sample_rate = self._sample_rate
+        duration = None
+        with sf.SoundFile(path_str) as f:
+            sample_rate = sample_rate or f.samplerate
+            duration = f.frames / f.samplerate
+
+        return audio_bytes, format_name, sample_rate, duration
+
+    def _process_numpy_array(self) -> tuple[bytes, str, int | None, float | None]:
+        """
+        Process numpy array to WAV using soundfile.
+        Returns:
+            A tuple of (audio_bytes, format_name, sample_rate, duration)
+        """
+        if self._sample_rate is None:
+            raise ValueError('Argument "sample_rate" is required when using numpy arrays.')
+
+        buffer = io.BytesIO()
+        format_name = self._format or "wav"
+        sf.write(buffer, self._data, self._sample_rate, format=format_name)
+        buffer.seek(0)
+        audio_bytes = buffer.read()
+
+        if isinstance(self._data, np.ndarray):
+            duration = len(self._data) / float(self._sample_rate)
+        else:
+            raise TypeError("Invalid data type for numpy array processing.")
+
+        return audio_bytes, format_name, self._sample_rate, duration
+
+    def _process_raw_bytes(self) -> tuple[bytes, str, int | None, float | None]:
+        """
+        Process raw bytes. Format is determined by the provided format argument.
+        Returns:
+            A tuple of (audio_bytes, format_name, sample_rate, duration)
+        """
+        format_name = self._format or "wav"
+        if not isinstance(self._data, bytes):
+            raise TypeError("Raw bytes are expected for this processing method.")
+        return self._data, format_name, self._sample_rate, None
+
+    def _process_pydub_audio_segment(self) -> tuple[bytes, str, int | None, float | None]:
+        """
+        Process pydub AudioSegment to bytes.
+        Returns:
+            A tuple of (audio_bytes, format_name, sample_rate, duration)
+        """
+
+        if not isinstance(self._data, AudioSegment):
+            raise TypeError("AudioSegment is expected for this processing method.")
+
+        sample_rate = self._data.frame_rate
+
+        buffer = io.BytesIO()
+        format_name = self._format or "wav"
+        self._data.export(buffer, format=format_name)
+        buffer.seek(0)
+        audio_bytes = buffer.read()
+
+        # PyDUB provides duration in milliseconds, convert to seconds for consistency
+        duration = len(self._data) / 1000.0
+
+        return audio_bytes, format_name, sample_rate, duration
+
+    def _generate_metadata(
+        self, format_name: str, sample_rate: int | None, duration: float | None
+    ) -> dict[str, str | int | float | None]:
+        """
+        Generate metadata for the audio data.
+        Returns:
+            A dictionary of metadata
+        """
+        metadata: dict[str, str | int | float | None] = {
+            "extension": format_name.lower(),
+            "x-python-datatype": "dreadnode.Audio.bytes",
+        }
+
+        if isinstance(self._data, (str, Path)):
+            metadata["source-type"] = "file"
+            metadata["source-path"] = str(self._data)
+        elif isinstance(self._data, np.ndarray):
+            metadata["source-type"] = "numpy.ndarray"
+        elif isinstance(self._data, bytes):
+            metadata["source-type"] = "bytes"
+        elif isinstance(self._data, AudioSegment):
+            metadata["source-type"] = "pydub.AudioSegment"
+
+        if sample_rate is not None:
+            metadata["sample-rate"] = sample_rate
+
+        if duration is not None:
+            metadata["duration"] = duration
+
+        # Add pydub-specific metadata if available
+        if isinstance(self._data, AudioSegment):
+            metadata["channels"] = self._data.channels
+            metadata["sample-width"] = self._data.sample_width
+
+        if self._caption:
+            metadata["caption"] = self._caption
+
+        return metadata
diff --git a/dreadnode/data_types/base_data_type.py b/dreadnode/data_types/base_data_type.py
new file mode 100644
index 00000000..74c38119
--- /dev/null
+++ b/dreadnode/data_types/base_data_type.py
@@ -0,0 +1,17 @@
+import typing as t
+from abc import ABC, abstractmethod
+
+
+class BaseDataType(ABC):
+    """Base class for all data types that can be logged with Dreadnode."""
+
+    @abstractmethod
+    def to_serializable(self) -> tuple[t.Any, dict[str, t.Any]]:
+        """
+        Convert the media type to a serializable format.
+
+        Returns:
+            Tuple of (data, metadata) where:
+                - data: The serialized data
+                - metadata: Additional metadata for this data type
+        """
diff --git a/dreadnode/data_types/image.py b/dreadnode/data_types/image.py
new file mode 100644
index 00000000..130bf68b
--- /dev/null
+++ b/dreadnode/data_types/image.py
@@ -0,0 +1,294 @@
+import base64
+import io
+import typing as t
+from pathlib import Path
+
+import numpy as np
+from PIL import Image as PILImage
+
+from dreadnode.data_types.base_data_type import BaseDataType
+
+ImageDataType = PILImage.Image | np.ndarray[t.Any, t.Any]
+ImageDataOrPathType = str | Path | bytes | ImageDataType
+
+
+class Image(BaseDataType):
+    """
+    Image media type for Dreadnode logging.
+
+    Supports:
+    - Local file paths (str or Path)
+    - PIL Image objects
+    - Numpy arrays
+    - Base64 encoded strings
+    """
+
+    def __init__(
+        self,
+        data: ImageDataOrPathType,
+        mode: str | None = None,
+        caption: str | None = None,
+        format: str | None = None,
+    ):
+        """
+        Initialize an Image object.
+
+        Args:
+            data: The image data, which can be:
+                - A path to a local image file (str or Path)
+                - A PIL Image object
+                - A numpy array
+                - Base64 encoded string
+                - Raw bytes
+            mode: Optional mode for the image (RGB, L, etc.)
+            caption: Optional caption for the image
+            format: Optional format to use when saving (png, jpg, etc.)
+        """
+        self._data = data
+        self._mode = mode
+        self._caption = caption
+        self._format = format
+
+    def to_serializable(self) -> tuple[t.Any, dict[str, t.Any]]:
+        """
+        Convert the image to bytes and return with metadata.
+        Returns:
+            A tuple of (image_bytes, metadata_dict)
+        """
+        image_bytes, image_format, mode, width, height = self._process_image_data()
+        metadata = self._generate_metadata(image_format, mode, width, height)
+        return image_bytes, metadata
+
+    def _process_image_data(self) -> tuple[bytes, str, str | None, int | None, int | None]:
+        """
+        Process the image data and return bytes, format, mode, width, and height.
+        Returns:
+            A tuple of (image_bytes, image_format, mode, width, height)
+        """
+        if isinstance(self._data, (str, Path)) and Path(self._data).exists():
+            return self._process_file_path()
+        if isinstance(self._data, PILImage.Image):
+            return self._process_pil_image()
+        if isinstance(self._data, np.ndarray):
+            return self._process_numpy_array()
+        if isinstance(self._data, bytes):
+            return self._process_raw_bytes()
+        if isinstance(self._data, str) and self._data.startswith("data:image/"):
+            return self._process_base64_string()
+        raise TypeError(f"Unsupported image data type: {type(self._data)}")
+
+    def _process_file_path(self) -> tuple[bytes, str, str | None, int | None, int | None]:
+        """
+        Process image from file path.
+        Returns:
+            A tuple of (image_bytes, image_format, mode, width, height)
+        """
+        path_str = str(self._data)
+        image_bytes = Path(path_str).read_bytes()
+        image_format = self._format or Path(path_str).suffix.lstrip(".") or "png"
+        mode, width, height = self._mode, None, None
+        with PILImage.open(path_str) as img:
+            width, height = img.size
+            detected_mode = img.mode
+            mode = mode or detected_mode
+        return image_bytes, image_format, mode, width, height
+
+    def _process_pil_image(self) -> tuple[bytes, str, str | None, int | None, int | None]:
+        """
+        Process PIL Image object.
+        Returns:
+            A tuple of (image_bytes, image_format, mode, width, height)
+        """
+        if not isinstance(self._data, PILImage.Image):
+            raise TypeError(f"Expected PILImage.Image, got {type(self._data)}")
+
+        pil_image = self._data
+        mode = self._mode or pil_image.mode
+        image_format = self._format or (pil_image.format.lower() if pil_image.format else "png")
+
+        buffer = io.BytesIO()
+        img_to_save = pil_image
+
+        if mode and pil_image.mode != mode:
+            if mode == "RGBA" and pil_image.mode in ("RGB", "L"):
+                # For RGB to RGBA, add an alpha channel
+                # Convert to RGBA first
+                img_to_save = pil_image.convert("RGBA")
+            else:
+                # Standard conversion
+                img_to_save = pil_image.convert(mode)
+
+        # Make sure format supports alpha if using RGBA mode
+        if mode == "RGBA" and image_format.lower() in ("jpg", "jpeg"):
+            # JPEG doesn't support transparency, switch to PNG
+            image_format = "png"
+
+        # Save image to buffer
+        img_to_save.save(buffer, format=image_format)
+        image_bytes = buffer.getvalue()
+        width, height = pil_image.size
+        return image_bytes, image_format, mode, width, height
+
+    def _process_numpy_array(self) -> tuple[bytes, str, str | None, int | None, int | None]:
+        """
+        Process numpy array to bytes.
+        Returns:
+            A tuple of (image_bytes, image_format, mode, width, height)
+        """
+        buffer = io.BytesIO()
+        image_format = self._format or "png"
+
+        mode = self._mode or (
+            self._guess_mode(self._data) if isinstance(self._data, np.ndarray) else None
+        )
+        if not isinstance(self._data, np.ndarray):
+            raise TypeError(f"Expected numpy.ndarray, got {type(self._data)}")
+        valid_array = self._ensure_valid_image_array(self._data)
+
+        # Explicitly handle float arrays with values in [0, 1]
+        if valid_array.dtype.kind == "f" and valid_array.max() <= 1.0:
+            valid_array = (valid_array * 255).astype(np.uint8)
+        elif valid_array.dtype != np.uint8:
+            valid_array = np.clip(valid_array, 0, 255).astype(np.uint8)
+
+        img = PILImage.fromarray(valid_array, mode=mode)
+        img.save(buffer, format=image_format)
+        image_bytes = buffer.getvalue()
+        width, height = img.size
+        return image_bytes, image_format, mode, width, height
+
+    def _process_raw_bytes(self) -> tuple[bytes, str, str | None, int | None, int | None]:
+        """
+        Process raw bytes.
+        Returns:
+            A tuple of (image_bytes, image_format, mode, width, height)
+        """
+        if not isinstance(self._data, bytes):
+            raise TypeError(f"Expected bytes, got {type(self._data)}")
+        image_bytes = self._data
+        image_format = self._format or "png"
+        mode, width, height = self._mode, None, None
+        with PILImage.open(io.BytesIO(image_bytes)) as img:
+            width, height = img.size
+            detected_mode = img.mode
+            mode = mode or detected_mode
+
+            if mode and img.mode != mode:
+                buffer = io.BytesIO()
+                img.convert(mode).save(buffer, format=image_format)
+                image_bytes = buffer.getvalue()
+
+        return image_bytes, image_format, mode, width, height
+
+    def _process_base64_string(self) -> tuple[bytes, str, str | None, int | None, int | None]:
+        """
+        Process base64 encoded string.
+        Returns:
+            A tuple of (image_bytes, image_format, mode, width, height)
+        """
+        if not isinstance(self._data, str):
+            raise TypeError(f"Expected str, got {type(self._data)}")
+
+        # Handle data URL format (data:image/png;base64,...)
+        if "," in self._data:
+            header, encoded = self._data.split(",", 1)
+            format_part = header.split("/")[1].split(";")[0] if "/" in header else "png"
+        else:
+            encoded = self._data
+            format_part = "png"  # Default for raw base64
+
+        image_format = self._format or format_part
+
+        # Decode the base64 string
+        # TODO(@raja): See if we could optimize this  # noqa: TD003
+        image_bytes = base64.b64decode(encoded)
+
+        # Open with PIL to get properties
+        with PILImage.open(io.BytesIO(image_bytes)) as img:
+            width, height = img.size
+            detected_mode = img.mode
+            mode = self._mode or detected_mode
+
+            # Convert mode if needed
+            if mode and img.mode != mode:
+                buffer = io.BytesIO()
+                img.convert(mode).save(buffer, format=image_format)
+                image_bytes = buffer.getvalue()
+
+        return image_bytes, image_format, mode, width, height
+
+    def _generate_metadata(
+        self, image_format: str, mode: str | None, width: int | None, height: int | None
+    ) -> dict[str, str | int | None]:
+        """Generate metadata for the image."""
+        metadata: dict[str, str | int | None] = {
+            "extension": image_format.lower(),
+            "x-python-datatype": "dreadnode.Image.bytes",
+        }
+
+        if isinstance(self._data, (str, Path)) and Path(self._data).exists():
+            metadata["source-type"] = "file"
+            metadata["source-path"] = str(self._data)
+        elif isinstance(self._data, PILImage.Image):
+            metadata["source-type"] = "PIL.Image"
+        elif isinstance(self._data, np.ndarray):
+            metadata["source-type"] = "numpy.ndarray"
+            metadata["array-shape"] = str(self._data.shape)
+            metadata["array-dtype"] = str(self._data.dtype)
+        elif isinstance(self._data, bytes):
+            metadata["source-type"] = "bytes"
+        elif isinstance(self._data, str) and self._data.startswith("data:image/"):
+            metadata["source-type"] = "base64"
+
+        if mode:
+            metadata["mode"] = mode
+
+        if width is not None and height is not None:
+            metadata["width"] = width
+            metadata["height"] = height
+
+        if self._caption:
+            metadata["caption"] = self._caption
+
+        return metadata
+
+    def _guess_mode(self, data: np.ndarray[t.Any, np.dtype[t.Any]]) -> str:
+        """Guess what type of image the np.array is representing."""
+        ndims = data.ndim
+
+        if ndims == 2:
+            return "L"
+
+        if ndims == 3:
+            # Map shape to mode for channels-last (HWC) and channels-first (CHW)
+            shape_to_mode = {
+                (1,): "L",
+                (3,): "RGB",
+                (4,): "RGBA",
+            }
+            if data.shape[2:] in shape_to_mode:
+                return shape_to_mode[data.shape[2:]]
+            if data.shape[:1] in shape_to_mode:
+                return shape_to_mode[data.shape[:1]]
+
+        raise ValueError(f"Unsupported array shape for image: {data.shape}")
+
+    def _ensure_valid_image_array(
+        self, array: np.ndarray[t.Any, np.dtype[t.Any]]
+    ) -> np.ndarray[t.Any, np.dtype[t.Any]]:
+        """Convert numpy array to a format suitable for PIL."""
+        # Handle grayscale (2D arrays)
+        if array.ndim == 2:
+            return array
+
+        # Handle standard 3D arrays
+        if array.ndim == 3:
+            # Channels-last format (HWC) - standard for PIL
+            if array.shape[2] in (1, 3, 4):
+                return array
+
+            # Channels-first format (CHW) - convert to channels-last
+            if array.shape[0] in (1, 3, 4):
+                return np.transpose(array, (1, 2, 0))
+
+        raise ValueError(f"Unsupported numpy array shape: {array.shape}")
diff --git a/dreadnode/data_types/object_3d.py b/dreadnode/data_types/object_3d.py
new file mode 100644
index 00000000..6f64450b
--- /dev/null
+++ b/dreadnode/data_types/object_3d.py
@@ -0,0 +1,101 @@
+import typing as t
+from pathlib import Path
+from typing import ClassVar
+
+from dreadnode.data_types.base_data_type import BaseDataType
+
+Object3DDataType = str | Path | bytes
+
+
+class Object3D(BaseDataType):
+    """
+    3D object media type for Dreadnode logging.
+
+    Supports:
+    - Local file paths to 3D models (.obj, .glb, .gltf, etc.)
+    - Raw bytes with metadata
+    """
+
+    SUPPORTED_FORMATS: ClassVar[list[str]] = [
+        "obj",
+        "glb",
+        "gltf",
+        "stl",
+        "fbx",
+        "ply",
+        "dae",
+        "usdz",
+    ]
+
+    def __init__(
+        self,
+        data: Object3DDataType,
+        caption: str | None = None,
+        format: str | None = None,
+    ):
+        """
+        Initialize a 3D Object.
+
+        Args:
+            data: The 3D object data, which can be:
+                - A path to a local 3D model file (str or Path)
+                - Raw bytes of a 3D model file
+            caption: Optional caption for the 3D object
+            format: Optional format override (obj, glb, etc.)
+        """
+        self._data = data
+        self._caption = caption
+        self._format = format
+
+    def to_serializable(self) -> tuple[bytes, dict[str, t.Any]]:
+        """
+        Convert the 3D object to bytes and return with metadata.
+
+        Returns:
+            A tuple of (object_bytes, metadata_dict)
+        """
+        if isinstance(self._data, (str, Path)) and Path(self._data).exists():
+            return self._process_file_path()
+        if isinstance(self._data, bytes):
+            format_name = self._format or "glb"
+            return self._data, self._generate_metadata(format_name)
+        raise TypeError(f"Unsupported 3D object data type: {type(self._data)}")
+
+    def _process_file_path(self) -> tuple[bytes, dict[str, t.Any]]:
+        """
+        Process a 3D object from a file path.
+        Returns:
+            A tuple of (object_bytes, metadata_dict)
+        """
+        if not isinstance(self._data, (str, Path)):
+            raise TypeError(f"Expected str or Path for file path, got {type(self._data)}")
+        path = Path(self._data)
+        object_bytes = path.read_bytes()
+        format_name = self._format or path.suffix.lstrip(".")
+
+        metadata = self._generate_metadata(format_name)
+        return object_bytes, metadata
+
+    def _generate_metadata(self, format_name: str) -> dict[str, t.Any]:
+        """
+        Generate metadata for the 3D object.
+        Args:
+            format_name: The format of the 3D object (obj, glb, etc.)
+        Returns:
+            A dictionary of metadata
+        """
+        metadata = {
+            "extension": format_name.lower(),
+            "x-python-datatype": "dreadnode.Object3D.bytes",
+        }
+
+        if self._caption:
+            metadata["caption"] = self._caption
+
+        if isinstance(self._data, (str, Path)):
+            metadata["source-type"] = "file"
+            metadata["source-path"] = str(self._data)
+        elif isinstance(self._data, bytes):
+            metadata["source-type"] = "bytes"
+
+        return metadata
diff --git a/dreadnode/data_types/py.typed b/dreadnode/data_types/py.typed
new file mode 100644
index 00000000..e69de29b
diff --git a/dreadnode/data_types/table.py b/dreadnode/data_types/table.py
new file mode 100644
index 00000000..797946f3
--- /dev/null
+++ b/dreadnode/data_types/table.py
@@ -0,0 +1,157 @@
+import io
+import typing as t
+from pathlib import Path
+from typing import ClassVar
+
+import numpy as np
+import pandas as pd
+
+from dreadnode.data_types.base_data_type import BaseDataType
+
+TableDataType = (
+    pd.DataFrame | dict[t.Any, t.Any] | list[t.Any] | str | Path | np.ndarray[t.Any, t.Any]
+)
+
+
+class Table(BaseDataType):
+    """
+    Table data type for Dreadnode logging.
+
+    Supports:
+    - Pandas DataFrames
+    - CSV/Parquet/JSON files
+    - Dict or list data structures
+    - NumPy arrays
+    """
+
+    SUPPORTED_FORMATS: ClassVar[list[str]] = ["csv", "parquet", "json"]
+
+    def __init__(
+        self,
+        data: TableDataType,
+        caption: str | None = None,
+        format: str | None = None,
+        *,
+        index: bool = False,
+    ):
+        """
+        Initialize a Table object.
+
+        Args:
+            data: The table data, which can be:
+                - A pandas DataFrame
+                - A path to a CSV/JSON/Parquet file
+                - A dict or list of dicts
+                - A NumPy array
+            caption: Optional caption for the table
+            format: Optional format to use when saving (csv, parquet, json)
+            index: Whether to include index in the output
+        """
+        self._data = data
+        self._caption = caption
+        self._format = format or "csv"  # Default to CSV
+        if self._format not in self.SUPPORTED_FORMATS:
+            raise ValueError(
+                f"Unsupported format: {self._format}. "
+                f"Supported formats are: {', '.join(self.SUPPORTED_FORMATS)}"
+            )
+        self._index = index
+
+    def to_serializable(self) -> tuple[bytes, dict[str, t.Any]]:
+        """
+        Convert the table to bytes and return with metadata.
+
+        Returns:
+            A tuple of (table_bytes, metadata_dict)
+        """
+        data_frame = self._to_dataframe()
+
+        table_bytes = self._dataframe_to_bytes(data_frame)
+        metadata = self._generate_metadata(data_frame)
+
+        return table_bytes, metadata
+
+    def _to_dataframe(self) -> pd.DataFrame:
+        """
+        Convert the input data to a pandas DataFrame.
+        Returns:
+            A pandas DataFrame representation of the input data
+        """
+        if isinstance(self._data, pd.DataFrame):
+            return self._data
+        if isinstance(self._data, (str, Path)) and Path(self._data).exists():
+            path = Path(self._data)
+            suffix = path.suffix.lower()
+
+            if suffix == ".csv":
+                return pd.read_csv(path)
+            if suffix == ".parquet":
+                return pd.read_parquet(path)
+            if suffix in (".json", ".jsonl"):
+                return pd.read_json(path)
+            raise ValueError(f"Unsupported file format: {suffix}")
+
+        if isinstance(self._data, dict):
+            return pd.DataFrame.from_dict(self._data)
+
+        if isinstance(self._data, (list, np.ndarray)):
+            return pd.DataFrame(self._data)
+
+        raise ValueError(f"Unsupported table data type: {type(self._data)}")
+
+    def _dataframe_to_bytes(self, data_frame: pd.DataFrame) -> bytes:
+        """
+        Convert the DataFrame to bytes based on the specified format.
+        Args:
+            data_frame: The pandas DataFrame to convert
+        Returns:
+            Bytes representation of the DataFrame
+        """
+        buffer = io.BytesIO()
+
+        if self._format == "csv":
+            data_frame.to_csv(buffer, index=self._index)
+        elif self._format == "parquet":
+            data_frame.to_parquet(buffer, index=self._index)
+        elif self._format == "json":
+            json_str = data_frame.to_json(orient="records")
+            buffer.write(json_str.encode())
+        else:
+            data_frame.to_csv(buffer, index=self._index)
+
+        buffer.seek(0)
+        return buffer.getvalue()
+
+    def _generate_metadata(self, data_frame: pd.DataFrame) -> dict[str, t.Any]:
+        """
+        Generate metadata for the table.
+        Args:
+            data_frame: The pandas DataFrame to generate metadata for
+        Returns:
+            A dictionary of metadata
+        """
+        metadata = {
+            "extension": self._format,
+            "x-python-datatype": "dreadnode.Table.bytes",
+            "rows": len(data_frame),
+            "columns": len(data_frame.columns),
+        }
+
+        metadata["column-names"] = data_frame.columns.tolist()
+
+        if self._caption:
+            metadata["caption"] = self._caption
+
+        if isinstance(self._data, pd.DataFrame):
+            metadata["source-type"] = "pandas.DataFrame"
+        elif isinstance(self._data, (str, Path)):
+            metadata["source-type"] = "file"
+            metadata["source-path"] = str(self._data)
+        elif isinstance(self._data, dict):
+            metadata["source-type"] = "dict"
+        elif isinstance(self._data, list):
+            metadata["source-type"] = "list"
+        elif isinstance(self._data, np.ndarray):
+            metadata["source-type"] = "numpy.ndarray"
+
+        return metadata
diff --git a/dreadnode/data_types/video.py b/dreadnode/data_types/video.py
new file mode 100644
index 00000000..caf00bd4
--- /dev/null
+++ b/dreadnode/data_types/video.py
@@ -0,0 +1,230 @@
+import os
+import tempfile
+import typing as t
+from pathlib import Path
+
+import numpy as np
+from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
+from moviepy.video.VideoClip import VideoClip
+
+from dreadnode.data_types.base_data_type import BaseDataType
+
+VideoDataType = (
+    str | Path | np.ndarray[t.Any, t.Any] | bytes | list[np.ndarray[t.Any, t.Any]] | VideoClip
+)
+
+
+class Video(BaseDataType):
+    """
+    Video media type for Dreadnode logging.
+
+    Supports:
+    - Local file paths (str or Path)
+    - Numpy array sequences with frame rate
+    - Raw bytes with metadata
+    - MoviePy VideoClip objects (if installed)
+    """
+
+    def __init__(
+        self,
+        data: VideoDataType,
+        fps: float | None = None,
+        caption: str | None = None,
+        format: str | None = None,
+        width: int | None = None,
+        height: int | None = None,
+    ):
+        """
+        Initialize a Video object.
+
+        Args:
+            data: The video data, which can be:
+                - A path to a local video file (str or Path)
+                - A numpy array of frames (requires fps)
+                - A list of numpy arrays for individual frames (requires fps)
+                - Raw bytes
+                - A MoviePy VideoClip object (if MoviePy is installed)
+            fps: Frames per second, required for numpy array input
+                 (ignored if data is a file path or raw bytes)
+            caption: Optional caption for the video
+            format: Optional format override (mp4, avi, etc.)
+            width: Optional width in pixels
+            height: Optional height in pixels
+        """
+        self._data = data
+        self._fps = fps
+        self._caption = caption
+        self._format = format or "mp4"
+        self._width = width
+        self._height = height
+
+    def to_serializable(self) -> tuple[bytes, dict[str, t.Any]]:
+        """
+        Convert the video to bytes and return with metadata.
+
+        Returns:
+            A tuple of (video_bytes, metadata_dict)
+        """
+        if isinstance(self._data, (str, Path)) and Path(self._data).exists():
+            return self._process_file_path()
+        if isinstance(self._data, bytes):
+            return self._process_bytes()
+        if isinstance(self._data, (np.ndarray, list)):
+            return self._process_numpy_array()
+        if isinstance(self._data, VideoClip):
+            return self._process_moviepy_clip()
+        raise TypeError(f"Unsupported video data type: {type(self._data)}")
+
+    def _process_file_path(self) -> tuple[bytes, dict[str, t.Any]]:
+        """
+        Process a video file from a file path.
+        Returns:
+            A tuple of (video_bytes, metadata_dict)
+        """
+        video_bytes = Path(self._data).read_bytes()
+        format_name = self._format
+
+        if not format_name or format_name == "mp4":
+            ext = Path(self._data).suffix.lstrip(".")
+            if ext:
+                format_name = ext
+
+        metadata = self._generate_metadata(format_name)
+        return video_bytes, metadata
+
+    def _process_bytes(self) -> tuple[bytes, dict[str, t.Any]]:
+        """
+        Process raw bytes of video data.
+        Returns:
+            A tuple of (video_bytes, metadata_dict)
+        """
+        metadata = self._generate_metadata(self._format)
+        return self._data, metadata
+
+    def _process_numpy_array(self) -> tuple[bytes, dict[str, t.Any]]:
+        """
+        Process numpy array frames using MoviePy.
+        Returns:
+            A tuple of (video_bytes, metadata_dict)
+        """
+        if not self._fps:
+            raise ValueError("fps is required for numpy array video frames")
+        if not isinstance(self._data, (np.ndarray, list)):
+            raise TypeError("data must be a numpy array or list of numpy arrays")
+        frames = []
+        if isinstance(self._data, np.ndarray):
+            if self._data.ndim == 3:  # Single frame
+                frames = [self._data]
+            elif self._data.ndim == 4:  # Multiple frames
+                frames = [self._data[i] for i in range(self._data.shape[0])]
+            else:
+                raise ValueError(f"Unsupported numpy array shape: {self._data.ndim}")
+        elif isinstance(self._data, list):
+            frames = self._data
+
+        if not frames:
+            raise ValueError("No frames found in input data")
+
+        frame_height, frame_width = frames[0].shape[:2]
+
+        temp_fd, temp_path = tempfile.mkstemp(suffix=f".{self._format}")
+        os.close(temp_fd)
+
+        try:
+            # Create clip and write to file
+            clip = ImageSequenceClip(frames, fps=self._fps)
+
+            clip.write_videofile(
+                temp_path,
+                fps=self._fps,
+            )
+
+            video_bytes = Path(temp_path).read_bytes()
+
+            metadata = self._generate_metadata(self._format)
+            metadata.update(
+                {
+                    "frame-count": len(frames),
+                    "width": self._width or frame_width,
+                    "height": self._height or frame_height,
+                }
+            )
+            if isinstance(self._data, np.ndarray):
+                metadata["source-type"] = "numpy.ndarray"
+                metadata["array-shape"] = str(self._data.shape)
+                metadata["array-dtype"] = str(self._data.dtype)
+            else:
+                metadata["source-type"] = "list[numpy.ndarray]"
+                metadata["frames-count"] = len(frames)
+            return video_bytes, metadata
+
+        finally:
+            if Path(temp_path).exists():
+                Path(temp_path).unlink()
+
+    def _process_moviepy_clip(self) -> tuple[bytes, dict[str, t.Any]]:
+        """
+        Process a MoviePy VideoClip object.
+        Returns:
+            A tuple of (video_bytes, metadata_dict)
+        """
+        if not isinstance(self._data, VideoClip):
+            raise TypeError("data must be a MoviePy VideoClip object")
+        temp_fd, temp_path = tempfile.mkstemp(suffix=f".{self._format}")
+        os.close(temp_fd)
+
+        try:
+            # Get FPS from clip or provided value
+            fps = self._fps or getattr(self._data, "fps", 24)
+
+            # Write to file with compatible parameters
+            self._data.write_videofile(
+                temp_path,
+                fps=fps,
+            )
+
+            video_bytes = Path(temp_path).read_bytes()
+
+            metadata = self._generate_metadata(self._format)
+
+            metadata["source-type"] = "moviepy.VideoClip"
+
+            # Add clip metadata if available
+            for attr in ["duration", "fps", "size", "rotation", "w", "h", "aspect_ratio"]:
+                if hasattr(self._data, attr):
+                    value = getattr(self._data, attr)
+                    if value is not None:
+                        metadata[attr] = value
+
+            return video_bytes, metadata
+
+        finally:
+            if Path(temp_path).exists():
+                Path(temp_path).unlink()
+
+    def _generate_metadata(self, format_name: str) -> dict[str, t.Any]:
+        """
+        Generate metadata for the video.
+        Args:
+            format_name: The format of the video (mp4, avi, etc.)
+        Returns:
+            A dictionary of metadata
+        """
+        metadata: dict[str, t.Any] = {
+            "extension": format_name.lower(),
+            "x-python-datatype": "dreadnode.Video.bytes",
+        }
+
+        if self._fps:
+            metadata["fps"] = self._fps
+
+        if self._width:
+            metadata["width"] = self._width
+
+        if self._height:
+            metadata["height"] = self._height
+
+        if self._caption:
+            metadata["caption"] = self._caption
+
+        return metadata
diff --git a/dreadnode/serialization.py b/dreadnode/serialization.py
index 8964ac8c..afc9dc23 100644
--- a/dreadnode/serialization.py
+++ b/dreadnode/serialization.py
@@ -23,6 +23,7 @@
 from re import Pattern
 from uuid import UUID
 
+from dreadnode.data_types.base_data_type import BaseDataType
 from dreadnode.types import JsonDict, JsonValue
 from dreadnode.util import safe_repr
 
@@ -388,105 +389,6 @@ def _handle_pandas_series(
     return serialized, schema
 
 
-def _handle_pil_image(
-    obj: t.Any,
-    _seen: set[int],
-) -> tuple[JsonValue, JsonDict]:
-    import PIL.Image
-
-    if not isinstance(obj, PIL.Image.Image):
-        return safe_repr(obj), UNKNOWN_OBJECT_SCHEMA
-
-    buffer = io.BytesIO()
-    export_format = "PNG"
-
-    if hasattr(obj, "format") and isinstance(obj.format, str):
-        export_format = obj.format.lower()
-
-    obj.save(buffer, format=export_format)
-
-    return _handle_bytes(
-        buffer.getvalue(),
-        _seen,
-        {
-            "x-python-datatype": "PIL.Image",
-            "format": export_format.lower(),
-        },
-    )
-
-
-def _handle_pydub_audio_segment(
-    obj: t.Any,
-    _seen: set[int],
-) -> tuple[JsonValue, JsonDict]:
-    from pydub import AudioSegment  # type: ignore[import-untyped, unused-ignore, import-not-found]
-
-    if not isinstance(obj, AudioSegment):
-        return safe_repr(obj), UNKNOWN_OBJECT_SCHEMA
-
-    # AudioSegment can be in different formats, but we will use WAV as a default
-    # Since there is no way to get the format from the AudioSegment object, we will use WAV
-    # as a default format for export. TODO: Add a way to get the format from the user via tags may be.
-    export_format = "wav"
-    # Raw audio data from AudioSegment class is in bytes format.
-    raw_bytes_data = obj.raw_data
-    schema = {
-        "x-python-datatype": "pydub.AudioSegment",
-        "format": export_format,
-        "x-audio-sample-rate": obj.frame_rate,
-        "x-audio-channels": obj.channels,
-        "x-audio-sample-width": obj.sample_width,
-    }
-
-    return _handle_bytes(raw_bytes_data, _seen, schema)
-
-
-def _handle_moviepy_video_clip(
-    obj: t.Any,
-    _seen: set[int],
-) -> tuple[JsonValue, JsonDict]:
-    import tempfile
-    from pathlib import Path
-
-    from moviepy import (  # type: ignore[import-untyped, unused-ignore, import-not-found]
-        VideoFileClip,
-    )
-
-    if not isinstance(obj, VideoFileClip):
-        return safe_repr(obj), UNKNOWN_OBJECT_SCHEMA
-
-    # Infer format from filename if available
-    export_format = "mp4"
-    if getattr(obj, "filename", None):
-        ext = Path(obj.filename).suffix.lstrip(".").lower()
-        if ext:
-            export_format = ext
-
-    # Export video to temp file
-    with tempfile.NamedTemporaryFile(suffix=f".{export_format}") as temp_file:
-        obj.write_videofile(
-            temp_file.name,
-        )
-        raw_bytes_data = Path(temp_file.name).read_bytes()
-
-    schema = {
-        "x-python-datatype": "moviepy.VideoFileClip",
-        "format": export_format,
-        "start": obj.start,
-        "end": obj.end,
-        "duration": obj.duration,
-        "fps": obj.fps,
-        "size": obj.size,
-        "rotation": obj.rotation,
-        "aspect_ratio": obj.aspect_ratio,
-        "w": obj.w,
-        "h": obj.h,
-        "n_frames": obj.n_frames,
-    }
-
-    return _handle_bytes(raw_bytes_data, _seen, schema)
-
-
 def _handle_dataset(obj: t.Any, _seen: set[int]) -> tuple[JsonValue, JsonDict]:
     import datasets  # type: ignore[import-untyped]
 
@@ -506,6 +408,22 @@ def _handle_dataset(obj: t.Any, _seen: set[int]) -> tuple[JsonValue, JsonDict]:
     )
 
 
+def _handle_custom_data_type(obj: BaseDataType, _seen: set[int]) -> tuple[JsonValue, JsonDict]:
+    """Handler for Dreadnode custom data types."""
+    if not isinstance(obj, BaseDataType):
+        return safe_repr(obj), UNKNOWN_OBJECT_SCHEMA
+
+    # Get the serialized data and metadata from the media type
+    data, metadata = obj.to_serializable()
+
+    if isinstance(data, bytes):
+        return _handle_bytes(data, _seen, metadata)
+    serialized, schema = _serialize(data, _seen)
+    schema.update(metadata)
+
+    return serialized, schema
+
+
 @lru_cache(maxsize=1)
 def _get_handlers() -> dict[type, HandlerFunc]:
     handlers: dict[type, HandlerFunc] = {
@@ -587,25 +505,13 @@ def _get_handlers() -> dict[type, HandlerFunc]:
         handlers[pd.DataFrame] = _handle_pandas_dataframe
         handlers[pd.Series] = _handle_pandas_series
 
-    with contextlib.suppress(Exception):
-        import PIL.Image
-
-        handlers[PIL.Image.Image] = _handle_pil_image
-
     with contextlib.suppress(Exception):
         import datasets
 
         handlers[datasets.Dataset] = _handle_dataset
 
     with contextlib.suppress(Exception):
-        from pydub import AudioSegment
-
-        handlers[AudioSegment] = _handle_pydub_audio_segment
-
-    with contextlib.suppress(Exception):
-        from moviepy import VideoFileClip
-
-        handlers[VideoFileClip] = _handle_moviepy_video_clip
+        handlers[BaseDataType] = _handle_custom_data_type
 
     return handlers
 
diff --git a/examples/log_object/audio.ipynb b/examples/log_object/audio.ipynb
new file mode 100644
index 00000000..102a9ee0
--- /dev/null
+++ b/examples/log_object/audio.ipynb
@@ -0,0 +1,241 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Dreadnode Audio Logging\n",
+    "\n",
+    "This notebook demonstrates how to log audio data using Dreadnode's `Audio` data type. The examples cover various audio formats and sources including file paths, numpy arrays, and pydub AudioSegment objects.\n",
+    "\n",
+    "## Features\n",
+    "\n",
+    "- Log audio files directly from disk (WAV, MP3, etc.)\n",
+    "- Convert and log numpy arrays as audio\n",
+    "- Process and log pydub AudioSegment objects\n",
+    "- Add captions and metadata to audio logs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import dreadnode as dn\n",
+    "\n",
+    "dn.configure(\n",
+    "    server=\"Your Dreadnode API\", # Replace with your server address\n",
+    "    token=\"Your Dreadnode API Key\", # Replace with your token\n",
+    "    project=\"audio-examples\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. File Path Examples\n",
+    "\n",
+    "The simplest way to log audio is directly from file paths. Dreadnode supports common audio formats like WAV, MP3, OGG, and more."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Created test audio file at: /var/folders/ln/f1c_19n537310md15785b6d80000gn/T/tmpzkbdqvce.wav\n",
+      "21:24:25.708 audio_file_examples\n"
+     ]
+    }
+   ],
+   "source": [
+    "import dreadnode as dn\n",
+    "import numpy as np\n",
+    "from dreadnode import Audio\n",
+    "import tempfile\n",
+    "import os\n",
+    "import soundfile as sf\n",
+    "\n",
+    "# Create a test audio file - a simple sine wave\n",
+    "sample_rate = 44100\n",
+    "duration = 2.0     \n",
+    "frequency = 440\n",
+    "\n",
+    "# Generate the sine wave\n",
+    "t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)\n",
+    "sine_wave = np.sin(2 * np.pi * frequency * t) * 0.5\n",
+    "\n",
+    "# Create a temporary WAV file\n",
+    "temp_wav = tempfile.NamedTemporaryFile(suffix=\".wav\", delete=False)\n",
+    "temp_wav.close()\n",
+    "audio_file_path = temp_wav.name\n",
+    "\n",
+    "sf.write(audio_file_path, sine_wave, sample_rate)\n",
+    "print(f\"Created test audio file at: {audio_file_path}\")\n",
+    "\n",
+    "# Log examples from file paths\n",
+    "with dn.run(\"audio_file_examples\") as r:\n",
+    "    # Basic file logging\n",
+    "    dn.log_input(\"audio_file\", Audio(audio_file_path, caption=\"440 Hz sine wave - WAV format\"))\n",
+    "    \n",
+    "    # With explicit format override\n",
+    "    dn.log_input(\"format_override\", Audio(audio_file_path, format=\"wav\", caption=\"With explicit format\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Numpy Array Examples\n",
+    "\n",
+    "Numpy arrays are commonly used for audio processing in Python. When logging numpy arrays, a sample rate must be provided."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "21:24:25.738 audio_numpy_examples\n"
+     ]
+    }
+   ],
+   "source": [
+    "with dn.run(\"audio_numpy_examples\") as r:\n",
+    "    sample_rate = 44100\n",
+    "    \n",
+    "    # Single frequency (A4 - 440 Hz) for 3 seconds\n",
+    "    duration = 3.0\n",
+    "    t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)\n",
+    "    sine_440hz = np.sin(2 * np.pi * 440 * t) * 0.5\n",
+    "    dn.log_input(\"sine_wave_440hz\", Audio(sine_440hz, sample_rate=sample_rate, caption=\"A4 note (440 Hz)\"))\n",
+    "    \n",
+    "    # Frequency sweep (100 Hz to 10000 Hz)\n",
+    "    duration = 5.0\n",
+    "    t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)\n",
+    "    frequency = np.logspace(np.log10(100), np.log10(10000), len(t))\n",
+    "    sweep = np.sin(2 * np.pi * frequency * t / sample_rate * frequency) * 0.5\n",
+    "    dn.log_input(\"freq_sweep\", Audio(sweep, sample_rate=sample_rate, caption=\"Frequency sweep (100 Hz to 10 kHz)\"))\n",
+    "    \n",
+    "    # Chord - multiple frequencies combined\n",
+    "    duration = 2.0\n",
+    "    t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)\n",
+    "    c4 = np.sin(2 * np.pi * 261.63 * t) * 0.3\n",
+    "    e4 = np.sin(2 * np.pi * 329.63 * t) * 0.3\n",
+    "    g4 = np.sin(2 * np.pi * 392.00 * t) * 0.3\n",
+    "    chord = c4 + e4 + g4\n",
+    "    dn.log_input(\"chord\", Audio(chord, sample_rate=sample_rate, caption=\"C major chord\"))\n",
+    "    \n",
+    "    # Stereo audio (two channels)\n",
+    "    duration = 3.0\n",
+    "    t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)\n",
+    "    left = np.sin(2 * np.pi * 440 * t) * 0.5\n",
+    "    right = np.sin(2 * np.pi * 880 * t) * 0.5\n",
+    "    stereo = np.column_stack((left, right))\n",
+    "    dn.log_input(\"stereo_sine\", Audio(stereo, sample_rate=sample_rate, caption=\"Stereo audio (440 Hz left, 880 Hz right)\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Pydub AudioSegment Examples\n",
+    "\n",
+    "Pydub is a popular library for audio manipulation in Python. Dreadnode supports logging AudioSegment objects directly, which enables powerful audio processing before logging."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "21:24:25.862 audio_pydub_examples\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pydub import AudioSegment\n",
+    "from pydub.generators import Sine\n",
+    "\n",
+    "with dn.run(\"audio_pydub_examples\") as r:\n",
+    "    # Load the file with pydub\n",
+    "    audio_segment = AudioSegment.from_file(audio_file_path)\n",
+    "    \n",
+    "    # Log the original AudioSegment\n",
+    "    dn.log_input(\"pydub_original\", Audio(audio_segment, caption=\"Original audio with pydub\"))\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Clean it up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.unlink(audio_file_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Summary\n",
+    "\n",
+    "In this notebook, we demonstrated how to log audio in Dreadnode from various sources:\n",
+    "\n",
+    "1. Audio files (e.g., WAV, MP3 files)\n",
+    "2. Numpy arrays with sample rate\n",
+    "3. Pydub AudioSegment objects\n",
+    "4. Audio with custom metadata and captions\n",
+    "\n",
+    "We also showed more advanced audio processing techniques including:\n",
+    "- Generating various audio signals (sine waves, chords, sweeps)\n",
+    "- Audio manipulations (volume changes, fades, reversing)\n",
+    "\n",
+    "The `Audio` data type in Dreadnode provides a flexible way to track and analyze audio data throughout your projects and workflows."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/log_object/image.ipynb b/examples/log_object/image.ipynb
new file mode 100644
index 00000000..ded95f2c
--- /dev/null
+++ b/examples/log_object/image.ipynb
@@ -0,0 +1,322 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Dreadnode Image Logging\n",
+    "\n",
+    "This notebook demonstrates how to log images using Dreadnode's `Image` data type. The examples cover various image formats and sources including file paths, PIL images, numpy arrays, and base64 encoded strings.\n",
+    "\n",
+    "## Features\n",
+    "\n",
+    "- Log images from file paths (JPG, PNG, etc.)\n",
+    "- Convert and log PIL Image objects\n",
+    "- Transform numpy arrays into images\n",
+    "- Handle raw bytes and base64 encoded images\n",
+    "- Convert between image modes (RGB, RGBA, grayscale)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import dreadnode as dn\n",
+    "\n",
+    "dn.configure(\n",
+    "    server=\"Your Dreadnode API\", # Replace with your server address\n",
+    "    token=\"Your Dreadnode API Key\", # Replace with your token\n",
+    "    project=\"image-examples\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. File Path Examples\n",
+    "\n",
+    "Let's first look at logging images directly from file paths. We'll create a temporary image file to use in our examples."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Created test image at: /var/folders/ln/f1c_19n537310md15785b6d80000gn/T/tmpu5xo56lz.png\n",
+      "21:05:36.657 file_path_example\n"
+     ]
+    }
+   ],
+   "source": [
+    "from dreadnode import Image\n",
+    "from PIL import Image as PILImage\n",
+    "import numpy as np\n",
+    "import tempfile\n",
+    "import os\n",
+    "\n",
+    "temp_file = tempfile.NamedTemporaryFile(suffix=\".png\", delete=False)\n",
+    "temp_file.close()\n",
+    "image_file_path = temp_file.name\n",
+    "\n",
+    "# Generate a simple gradient test image\n",
+    "width, height = 300, 200\n",
+    "img_array = np.zeros((height, width, 3), dtype=np.uint8)\n",
+    "for x in range(width):\n",
+    "    for y in range(height):\n",
+    "        r = int(255 * x / width)\n",
+    "        g = int(255 * y / height)\n",
+    "        b = int(255 * (x + y) / (width + height))\n",
+    "        img_array[y, x] = [r, g, b]\n",
+    "        \n",
+    "# Save the generated image\n",
+    "test_img = PILImage.fromarray(img_array)\n",
+    "test_img.save(image_file_path)\n",
+    "\n",
+    "print(f\"Created test image at: {image_file_path}\")\n",
+    "\n",
+    "with dn.run(\"file_path_example\") as r:\n",
+    "    # Basic file path logging\n",
+    "    dn.log_input(\"image_file\", Image(image_file_path, caption=\"RGB Gradient Example\"))\n",
+    "    \n",
+    "    # Load and convert to grayscale\n",
+    "    dn.log_input(\"grayscale_image\", Image(image_file_path, mode=\"L\", caption=\"Grayscale Conversion\"))\n",
+    "    \n",
+    "    # Explicit format override\n",
+    "    dn.log_input(\"format_override\", Image(image_file_path, format=\"jpg\", caption=\"Format override to JPG\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. PIL Image Objects\n",
+    "\n",
+    "Dreadnode can directly log PIL Image objects, which allows you to perform image processing before logging."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "21:06:38.295 pil_image_example\n"
+     ]
+    }
+   ],
+   "source": [
+    "with dn.run(\"pil_image_example\") as r:\n",
+    "    \n",
+    "    pil_img = PILImage.open(image_file_path)\n",
+    "    \n",
+    "    # Log the original PIL image\n",
+    "    dn.log_input(\"original_pil\", Image(pil_img, caption=\"Original PIL Image\"))\n",
+    "    \n",
+    "    # Convert to grayscale\n",
+    "    grayscale = pil_img.convert(\"L\")\n",
+    "    dn.log_input(\"pil_grayscale\", Image(grayscale, caption=\"Grayscale PIL Image\"))\n",
+    "    \n",
+    "    # Apply rotation\n",
+    "    rotated = pil_img.rotate(45, expand=True)\n",
+    "    dn.log_input(\"pil_rotated\", Image(rotated, caption=\"Rotated 45 degrees\"))\n",
+    "    \n",
+    "    # Resize\n",
+    "    resized = pil_img.resize((150, 100))\n",
+    "    dn.log_input(\"pil_resized\", Image(resized, caption=\"Resized to 150×100\"))\n",
+    "    \n",
+    "    # Convert to RGBA (with transparency)\n",
+    "    rgba = pil_img.convert(\"RGBA\")\n",
+    "    # Add transparency to the top half\n",
+    "    data = np.array(rgba)\n",
+    "    data[:data.shape[0]//2, :, 3] = 128  # 50% transparency to top half\n",
+    "    rgba_modified = PILImage.fromarray(data)\n",
+    "    dn.log_input(\"pil_rgba\", Image(rgba_modified, format=\"png\", caption=\"RGBA with transparency\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Numpy Arrays\n",
+    "\n",
+    "Numpy arrays are commonly used for image representation in machine learning. Dreadnode can log numpy arrays as images in various formats."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "21:07:14.133 image_numpy_test\n",
+      "\n",
+      "--- Testing Numpy Arrays ---\n"
+     ]
+    }
+   ],
+   "source": [
+    "with dn.run(\"image_numpy_test\") as r:\n",
+    "    \n",
+    "    # 3.1 RGB Array (channels last - HWC format)\n",
+    "    rgb_array = np.random.randint(0, 255, (200, 200, 3), dtype=np.uint8)\n",
+    "    dn.log_input(\"numpy_rgb\", Image(rgb_array, caption=\"Random RGB Array (200×200×3)\"))\n",
+    "    \n",
+    "    # 3.2 Grayscale Array (2D)\n",
+    "    gray_array = np.random.randint(0, 255, (200, 200), dtype=np.uint8)\n",
+    "    dn.log_input(\"numpy_gray\", Image(gray_array, caption=\"Random Grayscale Array (200×200)\"))\n",
+    "    \n",
+    "    # 3.3 RGBA Array with transparency\n",
+    "    rgba_array = np.zeros((200, 200, 4), dtype=np.uint8)\n",
+    "    # Create a red square with 50% transparency\n",
+    "    rgba_array[50:150, 50:150, 0] = 255  # Red channel\n",
+    "    rgba_array[50:150, 50:150, 3] = 128  # Alpha channel (50%)\n",
+    "    dn.log_input(\"numpy_rgba\", Image(rgba_array, format=\"png\", caption=\"RGBA with transparent red square\"))\n",
+    "    \n",
+    "    # 3.4 Float array [0-1] range\n",
+    "    float_array = np.zeros((200, 200, 3), dtype=np.float32)\n",
+    "    # Make a color gradient\n",
+    "    for i in range(200):\n",
+    "        float_array[:, i, 0] = i / 200.0  # Red increases from left to right\n",
+    "        float_array[i, :, 1] = i / 200.0  # Green increases from top to bottom\n",
+    "    dn.log_input(\"numpy_float\", Image(float_array, caption=\"Float gradient (0-1 range)\"))\n",
+    "    \n",
+    "    # 3.5 Channels first format (PyTorch style - CHW)\n",
+    "    chw_array = np.random.randint(0, 255, (3, 200, 200), dtype=np.uint8)\n",
+    "    dn.log_input(\"numpy_chw\", Image(chw_array, caption=\"Channels-first array (3×200×200)\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Raw Bytes and Binary Data\n",
+    "\n",
+    "Images can also be logged from raw bytes, which is useful when working with image data from APIs or databases."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "21:08:25.913 bytes_test\n"
+     ]
+    }
+   ],
+   "source": [
+    "import io\n",
+    "\n",
+    "with dn.run(\"bytes_test\") as r:\n",
+    "    # Create a simple test image to use for our examples\n",
+    "    pil_img = PILImage.new(\"RGB\", (100, 100), color=\"red\")\n",
+    "    \n",
+    "    # 4.1 Basic PNG bytes\n",
+    "    buffer = io.BytesIO()\n",
+    "    pil_img.save(buffer, format=\"PNG\")\n",
+    "    png_bytes = buffer.getvalue()\n",
+    "    dn.log_input(\"bytes_png\", Image(png_bytes, format=\"png\", caption=\"PNG bytes (red square)\"))\n",
+    "    \n",
+    "    # 4.2 JPEG bytes\n",
+    "    buffer = io.BytesIO()\n",
+    "    pil_img.save(buffer, format=\"JPEG\", quality=90)\n",
+    "    jpeg_bytes = buffer.getvalue()\n",
+    "    dn.log_input(\"bytes_jpeg\", Image(jpeg_bytes, format=\"jpeg\", caption=\"JPEG bytes (red square)\"))\n",
+    "    \n",
+    "    # 4.3 Bytes with mode conversion\n",
+    "    dn.log_input(\"bytes_grayscale\", Image(png_bytes, format=\"png\", mode=\"L\", caption=\"PNG bytes converted to grayscale\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. Base64 Encoded Images\n",
+    "\n",
+    "Base64 encoded images are common in web applications and APIs. Dreadnode supports Data URLs and raw base64 strings."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import base64\n",
+    "\n",
+    "with dn.run(\"base64_test\") as r:\n",
+    "    pil_img = PILImage.new(\"RGB\", (100, 100), color=\"blue\")\n",
+    "    \n",
+    "    # Save as PNG\n",
+    "    buffer = io.BytesIO()\n",
+    "    pil_img.save(buffer, format=\"PNG\")\n",
+    "    png_bytes = buffer.getvalue()\n",
+    "    \n",
+    "    # Create base64 string with data URL\n",
+    "    png_base64 = base64.b64encode(png_bytes).decode('utf-8')\n",
+    "    png_data_url = f\"data:image/png;base64,{png_base64}\"\n",
+    "    \n",
+    "    # 5.1 Log with data URL format\n",
+    "    dn.log_input(\"base64_dataurl\", Image(png_data_url, caption=\"Data URL format (blue square)\"))\n",
+    "    \n",
+    "    # 5.2 Log with grayscale conversion\n",
+    "    dn.log_input(\"base64_grayscale\", Image(png_data_url, mode=\"L\", caption=\"Data URL converted to grayscale\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Summary\n",
+    "\n",
+    "In this notebook, we've demonstrated how to log images in Dreadnode from various sources:\n",
+    "\n",
+    "1. File paths (e.g., JPG, PNG files)\n",
+    "2. PIL Image objects\n",
+    "3. Numpy arrays in different formats\n",
+    "4. Raw bytes and binary data\n",
+    "5. Base64 encoded strings\n",
+    "\n",
+    "We've also shown how to apply transformations like format conversion, grayscale conversion, and resizing before logging.\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/log_object/object3d.ipynb b/examples/log_object/object3d.ipynb
new file mode 100644
index 00000000..a0e0b3d0
--- /dev/null
+++ b/examples/log_object/object3d.ipynb
@@ -0,0 +1,117 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Dreadnode 3D Object Logging\n",
+    "\n",
+    "This notebook demonstrates how to log 3D object data using Dreadnode's `Object3D` data type."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import dreadnode as dn\n",
+    "\n",
+    "dn.configure(\n",
+    "    server=\"Your Dreadnode API\", # Replace with your server address\n",
+    "    token=\"Your Dreadnode API Key\", # Replace with your token\n",
+    "    project=\"object3d-examples\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "22:15:13.441 object3d_test\n"
+     ]
+    }
+   ],
+   "source": [
+    "import dreadnode as dn\n",
+    "from dreadnode.data_types import Object3D\n",
+    "import numpy as np\n",
+    "import tempfile\n",
+    "import os\n",
+    "import struct\n",
+    "\n",
+    "\n",
+    "def create_sample_glb():\n",
+    "    temp_glb = tempfile.NamedTemporaryFile(suffix=\".glb\", delete=False)\n",
+    "    temp_glb.close()\n",
+    "    glb_path = temp_glb.name\n",
+    "    magic = b'glTF'\n",
+    "    version = struct.pack('<I', 2)\n",
+    "    \n",
+    "    json_data = b'{\"asset\":{\"version\":\"2.0\"}}'\n",
+    "    json_length = struct.pack('<I', len(json_data))\n",
+    "    json_type = b'JSON'\n",
+    "    \n",
+    "    total_length = struct.pack('<I', 12 + 8 + len(json_data))\n",
+    "    \n",
+    "    with open(glb_path, 'wb') as f:\n",
+    "        f.write(magic)\n",
+    "        f.write(version)\n",
+    "        f.write(total_length)\n",
+    "        \n",
+    "        f.write(json_length)\n",
+    "        f.write(json_type)\n",
+    "        \n",
+    "        f.write(json_data)\n",
+    "    \n",
+    "    return glb_path\n",
+    "\n",
+    "model_path = create_sample_glb()\n",
+    "\n",
+    "with dn.run(\"object3d_test\") as r:\n",
+    "    # Log from file path\n",
+    "    dn.log_input(\"file_model\", Object3D(\n",
+    "        model_path,\n",
+    "        caption=\"3D Model from generated file\"\n",
+    "    ))\n",
+    "    \n",
+    "    # Also test with raw bytes\n",
+    "    model_bytes = open(model_path, 'rb').read()\n",
+    "    dn.log_input(\"bytes_model\", Object3D(\n",
+    "        model_bytes,\n",
+    "        format=\"glb\",\n",
+    "        caption=\"3D Model from raw bytes\"\n",
+    "    ))\n",
+    "\n",
+    "\n",
+    "os.unlink(model_path)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/log_object/table.ipynb b/examples/log_object/table.ipynb
new file mode 100644
index 00000000..91280596
--- /dev/null
+++ b/examples/log_object/table.ipynb
@@ -0,0 +1,165 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Dreadnode Table Logging\n",
+    "\n",
+    "This notebook demonstrates how to log table data using Dreadnode's `Table` data type. The examples cover various table formats and sources including pandas DataFrames, dictionaries, lists, NumPy arrays, and files."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import dreadnode as dn\n",
+    "\n",
+    "dn.configure(\n",
+    "    server=\"Your Dreadnode API\", # Replace with your server address\n",
+    "    token=\"Your Dreadnode API Key\", # Replace with your token\n",
+    "    project=\"table-examples\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "22:10:39.910 table_test\n"
+     ]
+    }
+   ],
+   "source": [
+    "import dreadnode as dn\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from pathlib import Path\n",
+    "import tempfile\n",
+    "import json\n",
+    "from dreadnode import Table\n",
+    "\n",
+    "\n",
+    "with dn.run(\"table_test\") as r:\n",
+    "    # 1. Simple DataFrame\n",
+    "    df = pd.DataFrame({\n",
+    "        'Name': ['Alice', 'Bob', 'Charlie', 'David'],\n",
+    "        'Age': [25, 30, 35, 40],\n",
+    "        'City': ['New York', 'San Francisco', 'Los Angeles', 'Chicago']\n",
+    "    })\n",
+    "    \n",
+    "    dn.log_input(\"pandas_df\", Table(\n",
+    "        df,\n",
+    "        caption=\"Simple DataFrame as CSV\"\n",
+    "    ))\n",
+    "    \n",
+    "    # 2. Same DataFrame as JSON\n",
+    "    dn.log_input(\"df_as_json\", Table(\n",
+    "        df,\n",
+    "        caption=\"DataFrame as JSON\",\n",
+    "        format=\"json\"\n",
+    "    ))\n",
+    "    \n",
+    "    # 3. Dictionary\n",
+    "    data_dict = {\n",
+    "        'products': ['Laptop', 'Phone', 'Tablet', 'Monitor'],\n",
+    "        'prices': [1200, 800, 500, 300],\n",
+    "        'stock': [10, 25, 15, 5]\n",
+    "    }\n",
+    "    \n",
+    "    dn.log_input(\"dict_data\", Table(\n",
+    "        data_dict,\n",
+    "        caption=\"Dictionary converted to table\"\n",
+    "    ))\n",
+    "    \n",
+    "    # 4. List of dictionaries\n",
+    "    list_data = [\n",
+    "        {'id': 1, 'name': 'Product A', 'price': 100},\n",
+    "        {'id': 2, 'name': 'Product B', 'price': 200},\n",
+    "        {'id': 3, 'name': 'Product C', 'price': 300},\n",
+    "    ]\n",
+    "    \n",
+    "    dn.log_input(\"list_of_dicts\", Table(\n",
+    "        list_data,\n",
+    "        caption=\"List of dictionaries\"\n",
+    "    ))\n",
+    "    \n",
+    "    # 5. NumPy array\n",
+    "    array_data = np.array([\n",
+    "        [1, 2, 3, 4],\n",
+    "        [5, 6, 7, 8],\n",
+    "        [9, 10, 11, 12]\n",
+    "    ])\n",
+    "    \n",
+    "    dn.log_input(\"numpy_array\", Table(\n",
+    "        array_data,\n",
+    "        caption=\"NumPy array\"\n",
+    "    ))\n",
+    "    \n",
+    "    # 6. Create and use a CSV file\n",
+    "    with tempfile.NamedTemporaryFile(suffix='.csv', delete=False) as temp_file:\n",
+    "        csv_path = temp_file.name\n",
+    "        df.to_csv(csv_path, index=False)\n",
+    "    \n",
+    "    dn.log_input(\"csv_file\", Table(\n",
+    "        csv_path,\n",
+    "        caption=\"Table from CSV file\"\n",
+    "    ))\n",
+    "    \n",
+    "    # 7. Create and use a JSON file\n",
+    "    with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_file:\n",
+    "        json_path = temp_file.name\n",
+    "        with open(json_path, 'w') as f:\n",
+    "            json.dump(list_data, f)\n",
+    "    \n",
+    "    dn.log_input(\"json_file\", Table(\n",
+    "        json_path,\n",
+    "        caption=\"Table from JSON file\"\n",
+    "    ))\n",
+    "    \n",
+    "    # 8. Large table (should be stored as URI)\n",
+    "    large_df = pd.DataFrame(\n",
+    "        np.random.rand(500, 20),  # 500 rows, 20 columns\n",
+    "        columns=[f'Column_{i}' for i in range(20)]\n",
+    "    )\n",
+    "    \n",
+    "    dn.log_input(\"large_table\", Table(\n",
+    "        large_df,\n",
+    "        caption=\"Large table (500 rows, 20 columns)\"\n",
+    "    ))\n",
+    "    \n",
+    "    # Clean up temporary files\n",
+    "    Path(csv_path).unlink()\n",
+    "    Path(json_path).unlink()\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/log_object/video.ipynb b/examples/log_object/video.ipynb
new file mode 100644
index 00000000..4d29a6f4
--- /dev/null
+++ b/examples/log_object/video.ipynb
@@ -0,0 +1,340 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Dreadnode Video Logging\n",
+    "\n",
+    "This notebook demonstrates how to log video data using Dreadnode's `Video` data type. The examples cover various video sources including file paths, numpy arrays for frame sequences, and MoviePy video clips.\n",
+    "\n",
+    "## Features\n",
+    "\n",
+    "- Log videos from file paths (MP4, MOV, etc.)\n",
+    "- Create and log videos from sequences of numpy array frames\n",
+    "- Log MoviePy video clip objects\n",
+    "- Control video properties like frame rate, dimensions and format"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/raja/Desktop/dreadnode/projects/sdk/.venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work\n",
+      "  warn(\"Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work\", RuntimeWarning)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import dreadnode as dn\n",
+    "\n",
+    "dn.configure(\n",
+    "    server=\"Your Dreadnode API\", # Replace with your server address\n",
+    "    token=\"Your Dreadnode API Key\", # Replace with your token\n",
+    "    project=\"video-examples\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. File Path Examples\n",
+    "\n",
+    "The simplest way to log video is directly from file paths. Dreadnode supports common video formats like MP4, MOV, and others."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample MOV created at: /var/folders/ln/f1c_19n537310md15785b6d80000gn/T/tmpa5yfwmh2.mov\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "from PIL import Image, ImageDraw\n",
+    "import tempfile\n",
+    "import imageio\n",
+    "\n",
+    "def create_sample_mov(duration=3, fps=30, width=320, height=240):\n",
+    "    \"\"\"Creates a simple 3-second .mov test video with a moving rectangle.\"\"\"\n",
+    "    \n",
+    "    frames = []\n",
+    "    total_frames = int(duration * fps)\n",
+    "    \n",
+    "    for i in range(total_frames):\n",
+    "        img = Image.new('RGB', (width, height), color=(0, 0, 0))\n",
+    "        draw = ImageDraw.Draw(img)\n",
+    "        \n",
+    "        x_pos = int((i / total_frames) * (width - 60))\n",
+    "        y_pos = int((i / total_frames) * (height - 60))\n",
+    "        r = int(255 * (1 - i / total_frames))\n",
+    "        b = int(255 * (i / total_frames))\n",
+    "        \n",
+    "        draw.rectangle([x_pos, y_pos, x_pos + 60, y_pos + 60], fill=(r, 100, b))\n",
+    "        draw.text((10, 10), f\"Frame {i}\", fill=(255, 255, 255))\n",
+    "        \n",
+    "        frames.append(np.array(img))\n",
+    "    \n",
+    "    temp_file = tempfile.NamedTemporaryFile(suffix='.mov', delete=False)\n",
+    "    temp_file.close()\n",
+    "    \n",
+    "    imageio.mimwrite(temp_file.name, frames, fps=fps, codec='libx264', \n",
+    "                    quality=7, pixelformat='yuv420p')\n",
+    "    \n",
+    "    return temp_file.name\n",
+    "\n",
+    "\n",
+    "video_file_path = create_sample_mov()\n",
+    "print(f\"Sample MOV created at: {video_file_path}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "21:41:38.956 video_file_examples\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/raja/Desktop/dreadnode/projects/sdk/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "import dreadnode as dn\n",
+    "from dreadnode import Video\n",
+    "\n",
+    "with dn.run(\"video_file_examples\") as r:\n",
+    "    dn.log_input(\"video_file\", Video(\n",
+    "        video_file_path,\n",
+    "        caption=\"Basic video file example\"\n",
+    "    ))\n",
+    "    \n",
+    "    dn.log_input(\"format_override\", Video(\n",
+    "        video_file_path,\n",
+    "        format=\"mp4\",\n",
+    "        caption=\"MOV file with MP4 format override\"\n",
+    "    ))\n",
+    "    \n",
+    "    dn.log_input(\"dimension_override\", Video(\n",
+    "        video_file_path,\n",
+    "        width=1280,\n",
+    "        height=720,\n",
+    "        caption=\"Video with dimension specifications\"\n",
+    "    ))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Numpy Array Frames\n",
+    "\n",
+    "You can create videos from numpy arrays representing individual frames. This is useful for visualizing generated content, simulations, or processing results."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "21:41:39.210 video_numpy_examples\n",
+      "MoviePy - Building video /var/folders/ln/f1c_19n537310md15785b6d80000gn/T/tmp26q9pf3i.mp4.\n",
+      "MoviePy - Writing video /var/folders/ln/f1c_19n537310md15785b6d80000gn/T/tmp26q9pf3i.mp4\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "                                                            "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MoviePy - Done !\n",
+      "MoviePy - video ready /var/folders/ln/f1c_19n537310md15785b6d80000gn/T/tmp26q9pf3i.mp4\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "with dn.run(\"video_numpy_examples\") as r:\n",
+    "    frame = np.ones((120, 160, 3), dtype=np.uint8) * 200  # Gray background\n",
+    "    \n",
+    "    frame[40:80, 60:100] = [255, 0, 0]  # Red rectangle\n",
+    "    \n",
+    "    static_frames = [frame.copy() for _ in range(3)]\n",
+    "    \n",
+    "    dn.log_input(\"static_video\", Video(\n",
+    "        static_frames,\n",
+    "        fps=5,  # 5 frames per second\n",
+    "        format=\"mp4\",\n",
+    "        caption=\"Static video with 3 identical frames\"\n",
+    "    ))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. MoviePy Video Clips\n",
+    "\n",
+    "MoviePy is a popular library for video editing in Python. Dreadnode supports logging VideoClip objects directly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "21:41:39.235 video_moviepy_examples\n",
+      "{'video_found': True, 'audio_found': False, 'metadata': {'major_brand': 'qt', 'minor_version': '512', 'compatible_brands': 'qt', 'encoder': 'Lavf61.7.100'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [320, 240], 'bitrate': 46, 'fps': 30.0, 'codec_name': 'h264', 'profile': '(High)', 'metadata': {'Metadata': '', 'handler_name': 'VideoHandler', 'vendor_id': 'FFMP', 'encoder': 'Lavc61.19.100 libx264'}}], 'input_number': 0}], 'duration': 3.0, 'bitrate': 51, 'start': 0.0, 'default_video_input_number': 0, 'default_video_stream_number': 0, 'video_codec_name': 'h264', 'video_profile': '(High)', 'video_size': [320, 240], 'video_bitrate': 46, 'video_fps': 30.0, 'video_duration': 3.0, 'video_n_frames': 90}\n",
+      "/Users/raja/Desktop/dreadnode/projects/sdk/.venv/lib/python3.12/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -i /var/folders/ln/f1c_19n537310md15785b6d80000gn/T/tmpa5yfwmh2.mov -loglevel error -f image2pipe -vf scale=320:240 -sws_flags bicubic -pix_fmt rgb24 -vcodec rawvideo -\n",
+      "MoviePy - Building video /var/folders/ln/f1c_19n537310md15785b6d80000gn/T/tmpx47gkl1c.mp4.\n",
+      "MoviePy - Writing video /var/folders/ln/f1c_19n537310md15785b6d80000gn/T/tmpx47gkl1c.mp4\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "                                                             "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MoviePy - Done !\n",
+      "MoviePy - video ready /var/folders/ln/f1c_19n537310md15785b6d80000gn/T/tmpx47gkl1c.mp4\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "from moviepy.video.io.VideoFileClip import VideoFileClip\n",
+    "\n",
+    "with dn.run(\"video_moviepy_examples\") as r:\n",
+    "    video_clip = VideoFileClip(video_file_path)\n",
+    "    \n",
+    "    dn.log_input(\"moviepy_full\", Video(\n",
+    "        video_clip,\n",
+    "        caption=\"Full video loaded with MoviePy\"\n",
+    "    ))\n",
+    "    \n",
+    "    try:\n",
+    "        if hasattr(video_clip, 'subclip'):\n",
+    "            first_5_seconds = video_clip.subclip(0, 5)\n",
+    "            dn.log_input(\"moviepy_clip\", Video(\n",
+    "                first_5_seconds,\n",
+    "                caption=\"First 5 seconds of video\"\n",
+    "            ))\n",
+    "    except Exception as e:\n",
+    "        print(f\"Subclip not supported in this MoviePy version: {e}\")\n",
+    "    \n",
+    "    # Clean up resources\n",
+    "    video_clip.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Clean it up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.unlink(video_file_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Summary\n",
+    "\n",
+    "This notebook demonstrated how to log videos in Dreadnode from various sources:\n",
+    "\n",
+    "1. Direct file paths (MP4, MOV, etc.)\n",
+    "2. Numpy arrays representing frame sequences\n",
+    "3. MoviePy VideoClip objects\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/poetry.lock b/poetry.lock
index aaf1b720..17775833 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -697,6 +697,86 @@ files = [
     {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"},
 ]
 
+[[package]]
+name = "cffi"
+version = "1.17.1"
+description = "Foreign Function Interface for Python calling C code."
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"},
+    {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6"},
+    {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17"},
+    {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8"},
+    {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e"},
+    {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be"},
+    {file = "cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c"},
+    {file = "cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15"},
+    {file = "cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401"},
+    {file = "cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6"},
+    {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d"},
+    {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6"},
+    {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f"},
+    {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b"},
+    {file = "cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655"},
+    {file = "cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0"},
+    {file = "cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4"},
+    {file = "cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99"},
+    {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93"},
+    {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3"},
+    {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8"},
+    {file = "cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65"},
+    {file = "cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903"},
+    {file = "cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e"},
+    {file = "cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4"},
+    {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd"},
+    {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed"},
+    {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9"},
+    {file = "cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d"},
+    {file = "cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a"},
+    {file = "cffi-1.17.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c"},
+    {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1"},
+    {file = "cffi-1.17.1-cp38-cp38-win32.whl", hash = "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8"},
+    {file = "cffi-1.17.1-cp38-cp38-win_amd64.whl", hash = "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1"},
+    {file = "cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16"},
+    {file = "cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0"},
+    {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3"},
+    {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595"},
+    {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a"},
+    {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e"},
+    {file = "cffi-1.17.1-cp39-cp39-win32.whl", hash = "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7"},
+    {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"},
+    {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"},
+]
+
+[package.dependencies]
+pycparser = "*"
+
 [[package]]
 name = "cfgv"
 version = "3.4.0"
@@ -837,7 +917,7 @@ files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]
-markers = {main = "extra == \"training\" and platform_system == \"Windows\""}
+markers = {main = "platform_system == \"Windows\""}
 
 [[package]]
 name = "coolname"
@@ -901,7 +981,7 @@ version = "5.2.1"
 description = "Decorators for Humans"
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a"},
     {file = "decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360"},
@@ -1398,7 +1478,7 @@ version = "2.37.0"
 description = "Library for reading and writing a wide range of image, video, scientific, and volumetric data formats."
 optional = false
 python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "imageio-2.37.0-py3-none-any.whl", hash = "sha256:11efa15b87bc7871b61590326b2d635439acc321cf7f8ce996f812543ce10eed"},
     {file = "imageio-2.37.0.tar.gz", hash = "sha256:71b57b3669666272c818497aebba2b4c5f20d5b37c81720e5e1a56d59c492996"},
@@ -1432,7 +1512,7 @@ version = "0.6.0"
 description = "FFMPEG wrapper for Python"
 optional = false
 python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "imageio_ffmpeg-0.6.0-py3-none-macosx_10_9_intel.macosx_10_9_x86_64.whl", hash = "sha256:9d2baaf867088508d4a3458e61eeb30e945c4ad8016025545f66c4b5aaef0a61"},
     {file = "imageio_ffmpeg-0.6.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:b1ae3173414b5fc5f538a726c4e48ea97edc0d2cdc11f103afee655c463fa742"},
@@ -1878,7 +1958,7 @@ version = "2.1.2"
 description = "Video editing with Python"
 optional = false
 python-versions = "*"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "moviepy-2.1.2-py3-none-any.whl", hash = "sha256:6cdc0d739110c8f347a224d72bd59eebaec010720d01eff290d37111bf545a73"},
     {file = "moviepy-2.1.2.tar.gz", hash = "sha256:22c57a7472f607eaad9fe80791df67c05082e1060fb74817c4eaac68e138ee77"},
@@ -2464,7 +2544,7 @@ version = "10.4.0"
 description = "Python Imaging Library (Fork)"
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "pillow-10.4.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:4d9667937cfa347525b319ae34375c37b9ee6b525440f3ef48542fcf66f2731e"},
     {file = "pillow-10.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:543f3dc61c18dafb755773efc89aae60d06b6596a63914107f75459cf984164d"},
@@ -2626,7 +2706,7 @@ version = "0.1.11"
 description = "Log and progress bar manager for console, notebooks, web..."
 optional = false
 python-versions = "*"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "proglog-0.1.11-py3-none-any.whl", hash = "sha256:1729b829e1e609a3f340d6659fbde401cace9e2feab65647ceaf52ecfccf362d"},
     {file = "proglog-0.1.11.tar.gz", hash = "sha256:ce35a0f9d1153e69d0063cdae6e6f2d8708fa0a588fc4e089501b77005e72884"},
@@ -2819,6 +2899,18 @@ files = [
 [package.extras]
 test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"]
 
+[[package]]
+name = "pycparser"
+version = "2.22"
+description = "C parser in Python"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"},
+    {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
+]
+
 [[package]]
 name = "pydantic"
 version = "2.11.3"
@@ -3063,7 +3155,7 @@ version = "1.1.0"
 description = "Read key-value pairs from a .env file and set them as environment variables"
 optional = false
 python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d"},
     {file = "python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5"},
@@ -3697,6 +3789,28 @@ files = [
     {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
 ]
 
+[[package]]
+name = "soundfile"
+version = "0.13.1"
+description = "An audio library based on libsndfile, CFFI and NumPy"
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+    {file = "soundfile-0.13.1-py2.py3-none-any.whl", hash = "sha256:a23c717560da2cf4c7b5ae1142514e0fd82d6bbd9dfc93a50423447142f2c445"},
+    {file = "soundfile-0.13.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:82dc664d19831933fe59adad199bf3945ad06d84bc111a5b4c0d3089a5b9ec33"},
+    {file = "soundfile-0.13.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:743f12c12c4054921e15736c6be09ac26b3b3d603aef6fd69f9dde68748f2593"},
+    {file = "soundfile-0.13.1-py2.py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:9c9e855f5a4d06ce4213f31918653ab7de0c5a8d8107cd2427e44b42df547deb"},
+    {file = "soundfile-0.13.1-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:03267c4e493315294834a0870f31dbb3b28a95561b80b134f0bd3cf2d5f0e618"},
+    {file = "soundfile-0.13.1-py2.py3-none-win32.whl", hash = "sha256:c734564fab7c5ddf8e9be5bf70bab68042cd17e9c214c06e365e20d64f9a69d5"},
+    {file = "soundfile-0.13.1-py2.py3-none-win_amd64.whl", hash = "sha256:1e70a05a0626524a69e9f0f4dd2ec174b4e9567f4d8b6c11d38b5c289be36ee9"},
+    {file = "soundfile-0.13.1.tar.gz", hash = "sha256:b2c68dab1e30297317080a5b43df57e302584c49e2942defdde0acccc53f0e5b"},
+]
+
+[package.dependencies]
+cffi = ">=1.0"
+numpy = "*"
+
 [[package]]
 name = "tiktoken"
 version = "0.9.0"
@@ -3833,7 +3947,6 @@ files = [
     {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"},
     {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"},
 ]
-markers = {main = "extra == \"training\""}
 
 [package.dependencies]
 colorama = {version = "*", markers = "platform_system == \"Windows\""}
@@ -4473,4 +4586,4 @@ training = ["transformers"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<3.14"
-content-hash = "e229acba61403826d1e4478753c7294fcceba369ba4ced1fa72855cea246f2c2"
+content-hash = "a95de9e5f9000b30c2bab83bc400455dafab543159ddebfce37965407118bcd6"
diff --git a/pyproject.toml b/pyproject.toml
index 882f15e2..08c6b939 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,8 @@ pandas = "^2.2.3"
 fsspec = {version = ">=2023.1.0,<=2025.3.0", extras = ["s3"]} # Pinned for datasets compatibility
 
 transformers = { version = "^4.41.0", optional = true }
+soundfile = "^0.13.1"
+moviepy = "^2.1.2"
 
 [tool.poetry.extras]
 training = ["transformers"]

From e1c48b4994d961dd732e54677f94c5d7fcd53a98 Mon Sep 17 00:00:00 2001
From: Raja Sekhar Rao Dheekonda <raja@dreadnode.io>
Date: Fri, 9 May 2025 15:38:19 -0700
Subject: [PATCH 2/6] Fix ruff errors

---
 dreadnode/data_types/image.py | 13 ++++++++-----
 dreadnode/data_types/video.py |  6 ++++--
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/dreadnode/data_types/image.py b/dreadnode/data_types/image.py
index 130bf68b..ee886b2e 100644
--- a/dreadnode/data_types/image.py
+++ b/dreadnode/data_types/image.py
@@ -255,11 +255,12 @@ def _generate_metadata(
     def _guess_mode(self, data: np.ndarray[t.Any, np.dtype[t.Any]]) -> str:
         """Guess what type of image the np.array is representing."""
         ndims = data.ndim
-
-        if ndims == 2:
+        grayscale_dim = 2
+        rgb_dim = 3
+        if ndims == grayscale_dim:
             return "L"
 
-        if ndims == 3:
+        if ndims == rgb_dim:
             # Map shape to mode for channels-last (HWC) and channels-first (CHW)
             shape_to_mode = {
                 (1,): "L",
@@ -277,12 +278,14 @@ def _ensure_valid_image_array(
         self, array: np.ndarray[t.Any, np.dtype[t.Any]]
     ) -> np.ndarray[t.Any, np.dtype[t.Any]]:
         """Convert numpy array to a format suitable for PIL."""
+        grayscale_dim = 2
+        rgb_dim = 3
         # Handle grayscale (2D arrays)
-        if array.ndim == 2:
+        if array.ndim == grayscale_dim:
             return array
 
         # Handle standard 3D arrays
-        if array.ndim == 3:
+        if array.ndim == rgb_dim:
             # Channels-last format (HWC) - standard for PIL
             if array.shape[2] in (1, 3, 4):
                 return array
diff --git a/dreadnode/data_types/video.py b/dreadnode/data_types/video.py
index caf00bd4..f47a2ec2 100644
--- a/dreadnode/data_types/video.py
+++ b/dreadnode/data_types/video.py
@@ -112,10 +112,12 @@ def _process_numpy_array(self) -> tuple[bytes, dict[str, t.Any]]:
         if not isinstance(self._data, (np.ndarray, list)):
             raise TypeError("data must be a numpy array or list of numpy arrays")
         frames = []
+        rgb_dim = 3
+        rgba_dim = 4
         if isinstance(self._data, np.ndarray):
-            if self._data.ndim == 3:  # Single frame
+            if self._data.ndim == rgb_dim:  # Single frame
                 frames = [self._data]
-            elif self._data.ndim == 4:  # Multiple frames
+            elif self._data.ndim == rgba_dim:  # Multiple frames
                 frames = [self._data[i] for i in range(self._data.shape[0])]
             else:
                 raise ValueError(f"Unsupported numpy array shape: {self._data.ndim}")

From b40fefc018f9b8c9de660a096156e846cfe8b57f Mon Sep 17 00:00:00 2001
From: Raja Sekhar Rao Dheekonda <raja@dreadnode.io>
Date: Fri, 9 May 2025 15:52:58 -0700
Subject: [PATCH 3/6] Fix mypy errors

---
 dreadnode/data_types/audio.py |  6 +++---
 dreadnode/data_types/video.py | 13 ++++++++-----
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/dreadnode/data_types/audio.py b/dreadnode/data_types/audio.py
index 3271647a..cbb78ce1 100644
--- a/dreadnode/data_types/audio.py
+++ b/dreadnode/data_types/audio.py
@@ -3,12 +3,12 @@
 from pathlib import Path
 
 import numpy as np
-import soundfile as sf
-from pydub import AudioSegment
+import soundfile as sf  # type: ignore  # noqa: PGH003
+from pydub import AudioSegment  # type: ignore  # noqa: PGH003
 
 from dreadnode.data_types.base_data_type import BaseDataType
 
-AudioDataType = str | Path | np.ndarray[t.Any, t.Any] | bytes | AudioSegment
+AudioDataType: t.TypeAlias = str | Path | np.ndarray[t.Any, t.Any] | bytes | AudioSegment
 
 
 class Audio(BaseDataType):
diff --git a/dreadnode/data_types/video.py b/dreadnode/data_types/video.py
index f47a2ec2..430ec3e1 100644
--- a/dreadnode/data_types/video.py
+++ b/dreadnode/data_types/video.py
@@ -4,14 +4,13 @@
 from pathlib import Path
 
 import numpy as np
-from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
-from moviepy.video.VideoClip import VideoClip
+from moviepy.video.io.ImageSequenceClip import ImageSequenceClip  # type: ignore  # noqa: PGH003
+from moviepy.video.VideoClip import VideoClip  # type: ignore  # noqa: PGH003
+from numpy.typing import NDArray
 
 from dreadnode.data_types.base_data_type import BaseDataType
 
-VideoDataType = (
-    str | Path | np.ndarray[t.Any, t.Any] | bytes | list[np.ndarray[t.Any, t.Any]] | VideoClip
-)
+VideoDataType: t.TypeAlias = str | Path | NDArray[t.Any] | bytes | list[NDArray[t.Any]] | VideoClip
 
 
 class Video(BaseDataType):
@@ -81,6 +80,8 @@ def _process_file_path(self) -> tuple[bytes, dict[str, t.Any]]:
         Returns:
             A tuple of (video_bytes, metadata_dict)
         """
+        if not isinstance(self._data, (str, Path)):
+            raise TypeError("Expected file path as str or Path")
         video_bytes = Path(self._data).read_bytes()
         format_name = self._format
 
@@ -98,6 +99,8 @@ def _process_bytes(self) -> tuple[bytes, dict[str, t.Any]]:
         Returns:
             A tuple of (video_bytes, metadata_dict)
         """
+        if not isinstance(self._data, bytes):
+            raise TypeError("Expected bytes for video data")
         metadata = self._generate_metadata(self._format)
         return self._data, metadata
 

From 432b56ae8a9d769b60487cf823f3eda8720d23a9 Mon Sep 17 00:00:00 2001
From: Raja Sekhar Rao Dheekonda <raja@dreadnode.io>
Date: Tue, 13 May 2025 14:07:27 -0700
Subject: [PATCH 4/6] Updated potery lock

---
 poetry.lock | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index 82964f2a..91d4bcb8 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -3619,6 +3619,7 @@ files = [
     {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f66efbc1caa63c088dead1c4170d148eabc9b80d95fb75b6c92ac0aad2437d76"},
     {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:22353049ba4181685023b25b5b51a574bce33e7f51c759371a7422dcae5402a6"},
     {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:932205970b9f9991b34f55136be327501903f7c66830e9760a8ffb15b07f05cd"},
+    {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a52d48f4e7bf9005e8f0a89209bf9a73f7190ddf0489eee5eb51377385f59f2a"},
     {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win32.whl", hash = "sha256:3eac5a91891ceb88138c113f9db04f3cebdae277f5d44eaa3651a4f573e6a5da"},
     {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win_amd64.whl", hash = "sha256:ab007f2f5a87bd08ab1499bdf96f3d5c6ad4dcfa364884cb4549aa0154b13a28"},
     {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:4a6679521a58256a90b0d89e03992c15144c5f3858f40d7c18886023d7943db6"},
@@ -3627,6 +3628,7 @@ files = [
     {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:811ea1594b8a0fb466172c384267a4e5e367298af6b228931f273b111f17ef52"},
     {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cf12567a7b565cbf65d438dec6cfbe2917d3c1bdddfce84a9930b7d35ea59642"},
     {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7dd5adc8b930b12c8fc5b99e2d535a09889941aa0d0bd06f4749e9a9397c71d2"},
+    {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1492a6051dab8d912fc2adeef0e8c72216b24d57bd896ea607cb90bb0c4981d3"},
     {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win32.whl", hash = "sha256:bd0a08f0bab19093c54e18a14a10b4322e1eacc5217056f3c063bd2f59853ce4"},
     {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win_amd64.whl", hash = "sha256:a274fb2cb086c7a3dea4322ec27f4cb5cc4b6298adb583ab0e211a4682f241eb"},
     {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:20b0f8dc160ba83b6dcc0e256846e1a02d044e13f7ea74a3d1d56ede4e48c632"},
@@ -3635,6 +3637,7 @@ files = [
     {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:749c16fcc4a2b09f28843cda5a193e0283e47454b63ec4b81eaa2242f50e4ccd"},
     {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bf165fef1f223beae7333275156ab2022cffe255dcc51c27f066b4370da81e31"},
     {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:32621c177bbf782ca5a18ba4d7af0f1082a3f6e517ac2a18b3974d4edf349680"},
+    {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b82a7c94a498853aa0b272fd5bc67f29008da798d4f93a2f9f289feb8426a58d"},
     {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win32.whl", hash = "sha256:e8c4ebfcfd57177b572e2040777b8abc537cdef58a2120e830124946aa9b42c5"},
     {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win_amd64.whl", hash = "sha256:0467c5965282c62203273b838ae77c0d29d7638c8a4e3a1c8bdd3602c10904e4"},
     {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4c8c5d82f50bb53986a5e02d1b3092b03622c02c2eb78e29bec33fd9593bae1a"},
@@ -3643,6 +3646,7 @@ files = [
     {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96777d473c05ee3e5e3c3e999f5d23c6f4ec5b0c38c098b3a5229085f74236c6"},
     {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:3bc2a80e6420ca8b7d3590791e2dfc709c88ab9152c00eeb511c9875ce5778bf"},
     {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e188d2699864c11c36cdfdada94d781fd5d6b0071cd9c427bceb08ad3d7c70e1"},
+    {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4f6f3eac23941b32afccc23081e1f50612bdbe4e982012ef4f5797986828cd01"},
     {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win32.whl", hash = "sha256:6442cb36270b3afb1b4951f060eccca1ce49f3d087ca1ca4563a6eb479cb3de6"},
     {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win_amd64.whl", hash = "sha256:e5b8daf27af0b90da7bb903a876477a9e6d7270be6146906b276605997c7e9a3"},
     {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:fc4b630cd3fa2cf7fce38afa91d7cfe844a9f75d7f0f36393fa98815e911d987"},
@@ -3651,6 +3655,7 @@ files = [
     {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2f1c3765db32be59d18ab3953f43ab62a761327aafc1594a2a1fbe038b8b8a7"},
     {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d85252669dc32f98ebcd5d36768f5d4faeaeaa2d655ac0473be490ecdae3c285"},
     {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e143ada795c341b56de9418c58d028989093ee611aa27ffb9b7f609c00d813ed"},
+    {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2c59aa6170b990d8d2719323e628aaf36f3bfbc1c26279c0eeeb24d05d2d11c7"},
     {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win32.whl", hash = "sha256:beffaed67936fbbeffd10966a4eb53c402fafd3d6833770516bf7314bc6ffa12"},
     {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win_amd64.whl", hash = "sha256:040ae85536960525ea62868b642bdb0c2cc6021c9f9d507810c0c604e66f5a7b"},
     {file = "ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f"},
@@ -4590,4 +4595,4 @@ training = ["transformers"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<3.14"
-content-hash = "41eca5e8e88105efc70fe88fc0a8259511439b329da854c463d1e13df445165e"
+content-hash = "5de28adea02b5d3763e9fd942b8d5077b71eaa158bc1ebf05aa7ce7befc88d92"

From b1efbc1a0012b5dac555cc315a2146afe0f785b5 Mon Sep 17 00:00:00 2001
From: Raja Sekhar Rao Dheekonda <raja@dreadnode.io>
Date: Wed, 14 May 2025 18:43:08 -0700
Subject: [PATCH 5/6] Add missing dependency

---
 poetry.lock    | 4 ++--
 pyproject.toml | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 91d4bcb8..6ad50a3e 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -3080,7 +3080,7 @@ version = "0.25.1"
 description = "Manipulate audio with an simple and easy high level interface"
 optional = false
 python-versions = "*"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6"},
     {file = "pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f"},
@@ -4595,4 +4595,4 @@ training = ["transformers"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<3.14"
-content-hash = "5de28adea02b5d3763e9fd942b8d5077b71eaa158bc1ebf05aa7ce7befc88d92"
+content-hash = "5385eea978274d5d19adaa7fc49a3325016b0391e942c3881adc42cba8146f7e"
diff --git a/pyproject.toml b/pyproject.toml
index 67dbd396..c8c41200 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,6 +25,7 @@ fsspec = {version = ">=2023.1.0,<=2025.3.0", extras = ["s3"]} # Pinned for datas
 transformers = { version = "^4.41.0", optional = true }
 soundfile = "^0.13.1"
 moviepy = "^2.1.2"
+pydub = "^0.25.1"
 
 [tool.poetry.extras]
 training = ["transformers"]

From 1652f436461cc12ed79bd9a7e99d5bb01b03fbfe Mon Sep 17 00:00:00 2001
From: Raja Sekhar Rao Dheekonda <raja@dreadnode.io>
Date: Thu, 15 May 2025 10:13:45 -0700
Subject: [PATCH 6/6] Fix object identification to maintain schema uniqueness
 while preserving storage efficiency

---
 dreadnode/tracing/span.py | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/dreadnode/tracing/span.py b/dreadnode/tracing/span.py
index a14b2125..92f14c78 100644
--- a/dreadnode/tracing/span.py
+++ b/dreadnode/tracing/span.py
@@ -1,3 +1,4 @@
+import hashlib
 import logging
 import re
 import types
@@ -368,20 +369,28 @@ def log_object(
         data_hash = serialized.data_hash
         schema_hash = serialized.schema_hash
 
-        # Store object if we haven't already
-        if data_hash not in self._objects:
-            self._objects[data_hash] = self._create_object(serialized)
-
-        object_ = self._objects[data_hash]
+        # Create a composite key that represents both data and schema
+        hash_input = f"{data_hash}:{schema_hash}"
+        composite_hash = hashlib.sha1(hash_input.encode()).hexdigest()[:16]  # noqa: S324
 
         # Store schema if new
         if schema_hash not in self._object_schemas:
             self._object_schemas[schema_hash] = serialized.schema
 
-        # Build event attributes
+        # Check if we already have this exact composite hash
+        if composite_hash not in self._objects:
+            # Create a new object, but use the data_hash for deduplication of storage
+            obj = self._create_object_by_hash(serialized, composite_hash)
+
+            # Store with composite hash so we can look it up by the combination
+            self._objects[composite_hash] = obj
+
+        object_ = self._objects[composite_hash]
+
+        # Build event attributes, use composite hash in events
         event_attributes = {
             **attributes,
-            EVENT_ATTRIBUTE_OBJECT_HASH: object_.hash,
+            EVENT_ATTRIBUTE_OBJECT_HASH: composite_hash,
             EVENT_ATTRIBUTE_ORIGIN_SPAN_ID: trace_api.format_span_id(
                 trace_api.get_current_span().get_span_context().span_id,
             ),
@@ -410,8 +419,8 @@ def _store_file_by_hash(self, data: bytes, full_path: str) -> str:
 
         return str(self._file_system.unstrip_protocol(full_path))
 
-    def _create_object(self, serialized: Serialized) -> Object:
-        """Create an ObjectVal or ObjectUri depending on size."""
+    def _create_object_by_hash(self, serialized: Serialized, object_hash: str) -> Object:
+        """Create an ObjectVal or ObjectUri depending on size with a specific hash."""
         data = serialized.data
         data_bytes = serialized.data_bytes
         data_len = serialized.data_len
@@ -420,17 +429,19 @@ def _create_object(self, serialized: Serialized) -> Object:
 
         if data is None or data_bytes is None or data_len <= MAX_INLINE_OBJECT_BYTES:
             return ObjectVal(
-                hash=data_hash,
+                hash=object_hash,
                 value=data,
                 schema_hash=schema_hash,
             )
 
         # Offload to file system (e.g., S3)
+        # For storage efficiency, still use just the data_hash for the file path
+        # This ensures we don't duplicate storage for the same data
         full_path = f"{self._prefix_path.rstrip('/')}/{data_hash}"
         object_uri = self._store_file_by_hash(data_bytes, full_path)
 
         return ObjectUri(
-            hash=data_hash,
+            hash=object_hash,
             uri=object_uri,
             schema_hash=schema_hash,
             size=data_len,