Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,33 @@ pip install -U dreadnode
```

If you want to build from source:

```bash

poetry install
# Install with multimodal extras
poetry install --extras multimodal

# Install with training extras
poetry install --extras training

# Install with all extras
poetry install --all-extras
```

## Installation from PyPI with Optional Features

For advanced media processing capabilities (audio, video, images), install the multimodal extras:

```bash
# Multimodal support (audio, video processing)
pip install -U dreadnode[multimodal]

# Training support (ML model integration)
pip install -U dreadnode[training]

# All optional features
pip install -U dreadnode[all]
```

See our **[installation guide](https://docs.dreadnode.io/strikes/install)** for more options.
Expand Down
3 changes: 0 additions & 3 deletions docs/sdk/data_types.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ Supports:
- Local file paths (str or Path)
- Numpy arrays with sample rate
- Raw bytes
- Pydub AudioSegment object

Initialize an Audio object.

Expand All @@ -36,7 +35,6 @@ Initialize an Audio object.
- A path to a local audio file (str or Path)
- A numpy array (requires sample\_rate)
- Raw bytes
- A pydub AudioSegment
* **`sample_rate`**
(`int | None`, default:
`None`
Expand Down Expand Up @@ -70,7 +68,6 @@ def __init__(
- A path to a local audio file (str or Path)
- A numpy array (requires sample_rate)
- Raw bytes
- A pydub AudioSegment
sample_rate: Required when using numpy arrays
caption: Optional caption for the audio
format: Optional format to use (default is wav for numpy arrays)
Expand Down
11 changes: 0 additions & 11 deletions docs/usage/rich-objects.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,6 @@ with dn.run("audio-example-numpy"):
dn.log_input("my-audio", dn.Audio(audio_data, sample_rate=sample_rate))
```

```python AudioSegment
import dreadnode as dn
from pydub import AudioSegment

# Load audio with pydub
audio_segment = AudioSegment.from_file("path/to/audio.mp3")

with dn.run("audio-example-segment"):
dn.log_input("my-audio", dn.Audio(audio_segment))
```

```python Raw Bytes
import dreadnode as dn

Expand Down
48 changes: 11 additions & 37 deletions dreadnode/data_types/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
from pathlib import Path

import numpy as np
import soundfile as sf # type: ignore # noqa: PGH003
from pydub import AudioSegment # type: ignore # noqa: PGH003

try:
import soundfile as sf # type: ignore # noqa: PGH003
except ImportError:
sf = None

from dreadnode.data_types.base_data_type import BaseDataType

AudioDataType: t.TypeAlias = str | Path | np.ndarray[t.Any, t.Any] | bytes | AudioSegment
AudioDataType: t.TypeAlias = str | Path | np.ndarray[t.Any, t.Any] | bytes


class Audio(BaseDataType):
Expand All @@ -19,7 +22,6 @@ class Audio(BaseDataType):
- Local file paths (str or Path)
- Numpy arrays with sample rate
- Raw bytes
- Pydub AudioSegment object
"""

def __init__(
Expand All @@ -37,11 +39,15 @@ def __init__(
- A path to a local audio file (str or Path)
- A numpy array (requires sample_rate)
- Raw bytes
- A pydub AudioSegment
sample_rate: Required when using numpy arrays
caption: Optional caption for the audio
format: Optional format to use (default is wav for numpy arrays)
"""
if sf is None:
raise ImportError(
"Audio processing requires optional dependencies. "
"Install with: pip install dreadnode[multimodal]"
)
self._data = data
self._sample_rate = sample_rate
self._caption = caption
Expand Down Expand Up @@ -69,8 +75,6 @@ def _process_audio_data(self) -> tuple[bytes, str, int | None, float | None]:
return self._process_numpy_array()
if isinstance(self._data, bytes):
return self._process_raw_bytes()
if isinstance(self._data, AudioSegment):
return self._process_pydub_audio_segment()
raise TypeError(f"Unsupported audio data type: {type(self._data)}")

def _process_file_path(self) -> tuple[bytes, str, int | None, float | None]:
Expand Down Expand Up @@ -123,29 +127,6 @@ def _process_raw_bytes(self) -> tuple[bytes, str, int | None, float | None]:
raise TypeError("Raw bytes are expected for this processing method.")
return self._data, format_name, self._sample_rate, None

def _process_pydub_audio_segment(self) -> tuple[bytes, str, int | None, float | None]:
"""
Process pydub AudioSegment to bytes.
Returns:
A tuple of (audio_bytes, format_name, sample_rate, duration)
"""

if not isinstance(self._data, AudioSegment):
raise TypeError("AudioSegment is expected for this processing method.")

sample_rate = self._data.frame_rate

buffer = io.BytesIO()
format_name = self._format or "wav"
self._data.export(buffer, format=format_name)
buffer.seek(0)
audio_bytes = buffer.read()

# PyDUB provides duration in milliseconds, convert to seconds for consistency
duration = len(self._data) / 1000.0

return audio_bytes, format_name, sample_rate, duration

def _generate_metadata(
self, format_name: str, sample_rate: int | None, duration: float | None
) -> dict[str, str | int | float | None]:
Expand All @@ -166,20 +147,13 @@ def _generate_metadata(
metadata["source-type"] = "numpy.ndarray"
elif isinstance(self._data, bytes):
metadata["source-type"] = "bytes"
elif isinstance(self._data, AudioSegment):
metadata["source-type"] = "pydub.AudioSegment"

if sample_rate is not None:
metadata["sample-rate"] = sample_rate

if duration is not None:
metadata["duration"] = duration

# Add pydub-specific metadata if available
if isinstance(self._data, AudioSegment):
metadata["channels"] = self._data.channels
metadata["sample-width"] = self._data.sample_width

if self._caption:
metadata["caption"] = self._caption

Expand Down
12 changes: 10 additions & 2 deletions dreadnode/data_types/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@
from pathlib import Path

import numpy as np
from PIL import Image as PILImage

from dreadnode.data_types.base_data_type import BaseDataType

ImageDataType = PILImage.Image | np.ndarray[t.Any, t.Any]
try:
from PIL import Image as PILImage
except ImportError:
PILImage = None # type: ignore[assignment]

ImageDataType = t.Any | np.ndarray[t.Any, t.Any]
ImageDataOrPathType = str | Path | bytes | ImageDataType


Expand Down Expand Up @@ -44,6 +48,10 @@ def __init__(
caption: Optional caption for the image
format: Optional format to use when saving (png, jpg, etc.)
"""
if PILImage is None:
raise ImportError(
"Image processing requires PIL (Pillow). Install with: pip install dreadnode[multimodal]"
)
self._data = data
self._mode = mode
self._caption = caption
Expand Down
47 changes: 38 additions & 9 deletions dreadnode/data_types/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,19 @@
from pathlib import Path

import numpy as np
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip # type: ignore # noqa: PGH003
from moviepy.video.VideoClip import VideoClip # type: ignore # noqa: PGH003
from numpy.typing import NDArray

from dreadnode.data_types.base_data_type import BaseDataType

VideoDataType: t.TypeAlias = str | Path | NDArray[t.Any] | bytes | list[NDArray[t.Any]] | VideoClip
try:
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip # type: ignore # noqa: PGH003
from moviepy.video.VideoClip import VideoClip # type: ignore # noqa: PGH003
except ImportError:
ImageSequenceClip = None
VideoClip = None


VideoDataType: t.TypeAlias = str | Path | NDArray[t.Any] | bytes | list[NDArray[t.Any]] | t.Any


class Video(BaseDataType):
Expand Down Expand Up @@ -70,8 +76,13 @@ def to_serializable(self) -> tuple[bytes, dict[str, t.Any]]:
return self._process_bytes()
if isinstance(self._data, (np.ndarray, list)):
return self._process_numpy_array()
if isinstance(self._data, VideoClip):
if VideoClip is not None and isinstance(self._data, VideoClip):
return self._process_moviepy_clip()
if VideoClip is None and hasattr(self._data, "write_videofile"):
raise ImportError(
"MoviePy VideoClip detected but moviepy not installed. "
"Install with: pip install dreadnode[multimodal]"
)
raise TypeError(f"Unsupported video data type: {type(self._data)}")

def _process_file_path(self) -> tuple[bytes, dict[str, t.Any]]:
Expand Down Expand Up @@ -110,13 +121,31 @@ def _process_numpy_array(self) -> tuple[bytes, dict[str, t.Any]]:
Returns:
A tuple of (video_bytes, metadata_dict)
"""
if ImageSequenceClip is None:
raise ImportError(
"Video processing from numpy arrays requires moviepy. "
"Install with: pip install dreadnode[multimodal]"
)
if not self._fps:
raise ValueError("fps is required for numpy array video frames")
if not isinstance(self._data, (np.ndarray, list)):
raise TypeError("data must be a numpy array or list of numpy arrays")

# Type guard for mypy
assert ImageSequenceClip is not None # noqa: S101

frames = self._extract_frames_from_data()
if not frames:
raise ValueError("No frames found in input data")

return self._create_video_from_frames_data(frames)

def _extract_frames_from_data(self) -> list[NDArray[t.Any]]:
"""Extract frames from numpy array or list data."""
frames = []
rgb_dim = 3
rgba_dim = 4

if isinstance(self._data, np.ndarray):
if self._data.ndim == rgb_dim: # Single frame
frames = [self._data]
Expand All @@ -127,23 +156,23 @@ def _process_numpy_array(self) -> tuple[bytes, dict[str, t.Any]]:
elif isinstance(self._data, list):
frames = self._data

if not frames:
raise ValueError("No frames found in input data")
return frames

def _create_video_from_frames_data(
self, frames: list[NDArray[t.Any]]
) -> tuple[bytes, dict[str, t.Any]]:
"""Create video file from frames."""
frame_height, frame_width = frames[0].shape[:2]

temp_fd, temp_path = tempfile.mkstemp(suffix=f".{self._format}")
os.close(temp_fd)

try:
# Create clip and write to file
clip = ImageSequenceClip(frames, fps=self._fps)

clip.write_videofile(
temp_path,
fps=self._fps,
)

video_bytes = Path(temp_path).read_bytes()

metadata = self._generate_metadata(self._format)
Expand Down
37 changes: 6 additions & 31 deletions examples/log_object/audio.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,16 @@
"source": [
"# Dreadnode Audio Logging\n",
"\n",
"This notebook demonstrates how to log audio data using Dreadnode's `Audio` data type. The examples cover various audio formats and sources including file paths, numpy arrays, and pydub AudioSegment objects.\n",
"This notebook demonstrates how to log audio data using Dreadnode's `Audio` data type. The examples cover various audio formats and sources including file paths, and numpy arrays.\n",
"\n",
"## Features\n",
"\n",
"- Log audio files directly from disk (WAV, MP3, etc.)\n",
"- Convert and log numpy arrays as audio\n",
"- Process and log pydub AudioSegment objects\n",
"- Add captions and metadata to audio logs"
"- Add captions and metadata to audio logs\n",
"\n",
"⚠️ Note: Ensure you have installed the multimodal extras to use the Audio data type:\n",
"`pip install dreadnode[multimodal]`"
]
},
{
Expand Down Expand Up @@ -128,32 +130,6 @@
" dn.log_input(\"stereo_sine\", Audio(stereo, sample_rate=sample_rate, caption=\"Stereo audio (440 Hz left, 880 Hz right)\"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. Pydub AudioSegment Examples\n",
"\n",
"Pydub is a popular library for audio manipulation in Python. Dreadnode supports logging AudioSegment objects directly, which enables powerful audio processing before logging."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pydub import AudioSegment\n",
"from pydub.generators import Sine\n",
"\n",
"with dn.run(\"audio_pydub_examples\") as r:\n",
" # Load the file with pydub\n",
" audio_segment = AudioSegment.from_file(audio_file_path)\n",
" \n",
" # Log the original AudioSegment\n",
" dn.log_input(\"pydub_original\", Audio(audio_segment, caption=\"Original audio with pydub\"))\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -181,7 +157,6 @@
"\n",
"1. Audio files (e.g., WAV, MP3 files)\n",
"2. Numpy arrays with sample rate\n",
"3. Pydub AudioSegment objects\n",
"4. Audio with custom metadata and captions\n",
"\n",
"We also showed more advanced audio processing techniques including:\n",
Expand All @@ -194,7 +169,7 @@
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"display_name": "dreadnode-py3.12",
"language": "python",
"name": "python3"
},
Expand Down
7 changes: 5 additions & 2 deletions examples/log_object/image.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
"- Convert and log PIL Image objects\n",
"- Transform numpy arrays into images\n",
"- Handle raw bytes and base64 encoded images\n",
"- Convert between image modes (RGB, RGBA, grayscale)"
"- Convert between image modes (RGB, RGBA, grayscale)\n",
"\n",
"⚠️ Note: Ensure you have installed the multimodal extras to use the Video data type:\n",
"`pip install dreadnode[multimodal]`"
]
},
{
Expand Down Expand Up @@ -265,7 +268,7 @@
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
Expand Down
Loading