diff --git a/docs/sdk/data_types.mdx b/docs/sdk/data_types.mdx
index c8d1aaae..46c03f3a 100644
--- a/docs/sdk/data_types.mdx
+++ b/docs/sdk/data_types.mdx
@@ -110,6 +110,34 @@ def to_serializable(self) -> tuple[t.Any, dict[str, t.Any]]:
```
+
+
+Code
+----
+
+```python
+Code(text: str, language: str = '')
+```
+
+Hint type for code-formatted text.
+
+This is a subclass of Text with format set to "code".
+
+Example
+
+```python
+log_output("code_snippet", Code("print('Hello, World!')", language="python"))
+```
+
+
+
+```python
+def __init__(self, text: str, language: str = ""):
+ super().__init__(text, format="code")
+ self._language = language
+```
+
+
Image
@@ -220,6 +248,33 @@ def to_serializable(self) -> tuple[t.Any, dict[str, t.Any]]:
```
+
+
+Markdown
+--------
+
+```python
+Markdown(text: str)
+```
+
+Hint type for markdown-formatted text.
+
+This is a subclass of Text with format set to "markdown".
+
+Example
+
+```python
+log_output("report", Markdown("..."))
+```
+
+
+
+```python
+def __init__(self, text: str):
+ super().__init__(text, format="markdown")
+```
+
+
Object3D
@@ -434,6 +489,43 @@ def to_serializable(self) -> tuple[bytes, dict[str, t.Any]]:
```
+
+
+Text
+----
+
+```python
+Text(text: str, format: str)
+```
+
+Text data type for Dreadnode logging.
+
+Initialize a Text object.
+
+**Parameters:**
+
+* **`text`**
+ (`str`)
+ –The text content to log
+* **`format`**
+ (`str`)
+ –The format hint of the text
+
+
+```python
+def __init__(self, text: str, format: str):
+ """
+ Initialize a Text object.
+
+ Args:
+ text: The text content to log
+ format: The format hint of the text
+ """
+ self._text = text
+ self._format = format
+```
+
+
Video
@@ -575,4 +667,75 @@ def to_serializable(self) -> tuple[bytes, dict[str, t.Any]]:
```
+
+
+WithMeta
+--------
+
+```python
+WithMeta(obj: Any, metadata: dict[str, Any])
+```
+
+Helper data type to add additional metadata to the schema for logged data.
+
+Example
+
+```python
+log_output("my_data", WithMeta(data, {"format": "custom-data"}))
+```
+
+Initialize a data type with associated metadata.
+
+**Parameters:**
+
+* **`metadata`**
+ (`dict[str, Any]`)
+ –The metadata for this data type
+
+
+```python
+def __init__(self, obj: t.Any, metadata: dict[str, t.Any]):
+ """
+ Initialize a data type with associated metadata.
+
+ Args:
+ metadata: The metadata for this data type
+ """
+ self._obj = obj
+ self._metadata = metadata
+```
+
+
+
+
+### to\_serializable
+
+```python
+to_serializable() -> tuple[t.Any, dict[str, t.Any]]
+```
+
+Convert the media type to a serializable format.
+
+**Returns:**
+
+* `tuple[Any, dict[str, Any]]`
+ –Tuple of (data, metadata) where:
+ - data: The serialized data
+ - metadata: Additional metadata for this data type
+
+
+```python
+def to_serializable(self) -> tuple[t.Any, dict[str, t.Any]]:
+ """
+ Convert the media type to a serializable format.
+
+ Returns:
+ Tuple of (data, metadata) where:
+ - data: The serialized data
+ - metadata: Additional metadata for this data type
+ """
+ return self._obj, self._metadata
+```
+
+
\ No newline at end of file
diff --git a/docs/sdk/serialization.mdx b/docs/sdk/serialization.mdx
index 19aba1f2..6d4d04b1 100644
--- a/docs/sdk/serialization.mdx
+++ b/docs/sdk/serialization.mdx
@@ -57,7 +57,9 @@ serialize
---------
```python
-serialize(obj: Any) -> Serialized
+serialize(
+ obj: Any, *, schema_extras: JsonDict | None = None
+) -> Serialized
```
Serializes a Python object into a JSON-compatible structure and
@@ -69,6 +71,11 @@ the serialization format and the schema.
* **`obj`**
(`Any`)
–The Python object to process.
+* **`schema_extras`**
+ (`JsonDict | None`, default:
+ `None`
+ )
+ –Additional JSON Schema properties to include.
**Returns:**
@@ -77,7 +84,7 @@ the serialization format and the schema.
```python
-def serialize(obj: t.Any) -> Serialized:
+def serialize(obj: t.Any, *, schema_extras: JsonDict | None = None) -> Serialized:
"""
Serializes a Python object into a JSON-compatible structure and
generates a corresponding JSON Schema, ensuring consistency between
@@ -85,6 +92,7 @@ def serialize(obj: t.Any) -> Serialized:
Args:
obj: The Python object to process.
+ schema_extras: Additional JSON Schema properties to include.
Returns:
An object containing the serialized data, schema, and their hashes.
@@ -96,6 +104,9 @@ def serialize(obj: t.Any) -> Serialized:
else:
serialized_bytes = json.dumps(serialized, separators=(",", ":")).encode()
+ if schema_extras:
+ schema = {**schema, **schema_extras}
+
schema_str = json.dumps(schema, separators=(",", ":"))
data_hash = EMPTY_HASH
diff --git a/docs/usage/data-tracking.mdx b/docs/usage/data-tracking.mdx
index 489ac3a7..fff51406 100644
--- a/docs/usage/data-tracking.mdx
+++ b/docs/usage/data-tracking.mdx
@@ -67,6 +67,8 @@ with dn.run("text-generation"):
Strikes maintains a rich serialization layer to support many different kinds of Python objects:
- Dictionaries, lists, and other JSON-serializable objects
- NumPy arrays and Pandas DataFrames
+- Rich media types (images, audio, video, 3D objects, tables)
+- Formatted text (markdown, code with syntax highlighting)
- Custom objects (serialized with pickle)
- Large datasets (automatically stored efficiently)
diff --git a/docs/usage/rich-objects.mdx b/docs/usage/rich-objects.mdx
index 899c1a2e..d4bb342e 100644
--- a/docs/usage/rich-objects.mdx
+++ b/docs/usage/rich-objects.mdx
@@ -1,10 +1,10 @@
---
title: 'Rich Objects'
-description: 'Store data types like images, audio, video, and 3D objects in your runs.'
+description: 'Store data types like images, audio, video, text with formatting, and 3D objects in your runs.'
public: true
---
-Strikes extends its data tracking capabilities to handle complex, non-JSON serializable data types. This allows you to store rich media and other complex objects directly within your runs, making it easy to track and analyze all aspects of your data-driven workflows.
+Strikes extends its data tracking capabilities to handle complex, non-JSON serializable data types. This allows you to store rich media, formatted text, and other complex objects directly within your runs, making it easy to track and analyze all aspects of your data-driven workflows.
## Images
@@ -173,6 +173,57 @@ with dn.run("3d-formats-example"):
```
+## Text with Formatting Hints
+
+For text data that needs special rendering in the UI, you can use text hint types. These provide better visualization and formatting for different types of text content.
+
+
+```python Markdown Text
+import dreadnode as dn
+
+markdown_content = """
+# Results Summary
+
+## Model Performance
+- **Accuracy**: 94.2%
+- **Loss**: 0.156
+
+### Key Findings
+The model shows excellent performance on validation data.
+"""
+
+with dn.run("markdown-example"):
+ dn.log_output("report", dn.Markdown(markdown_content))
+```
+
+```python Code Snippets
+import dreadnode as dn
+
+python_code = """
+def fibonacci(n):
+ if n <= 1:
+ return n
+ return fibonacci(n-1) + fibonacci(n-2)
+
+result = fibonacci(10)
+print(f"Fibonacci(10) = {result}")
+"""
+
+with dn.run("code-example"):
+ dn.log_output("generated_code", dn.Code(python_code, language="python"))
+```
+
+```python Generic Text with Format
+import dreadnode as dn
+
+# For custom text formatting
+formatted_text = "This is custom formatted text"
+
+with dn.run("text-example"):
+ dn.log_output("custom", dn.Text(formatted_text, format="custom"))
+```
+
+
## Tables
For structured data, you can use the `dn.Table` data type. It can be created from various data formats and provides flexible data organization.
diff --git a/dreadnode/data_types/__init__.py b/dreadnode/data_types/__init__.py
index 7506d88f..04a95f21 100644
--- a/dreadnode/data_types/__init__.py
+++ b/dreadnode/data_types/__init__.py
@@ -1,7 +1,9 @@
from .audio import Audio
+from .base import WithMeta
from .image import Image
from .object_3d import Object3D
from .table import Table
+from .text import Code, Markdown, Text
from .video import Video
-__all__ = ["Audio", "Image", "Object3D", "Table", "Video"]
+__all__ = ["Audio", "Code", "Image", "Markdown", "Object3D", "Table", "Text", "Video", "WithMeta"]
diff --git a/dreadnode/data_types/audio.py b/dreadnode/data_types/audio.py
index c4bd3964..e3b6e0c7 100644
--- a/dreadnode/data_types/audio.py
+++ b/dreadnode/data_types/audio.py
@@ -9,12 +9,12 @@
except ImportError:
sf = None
-from dreadnode.data_types.base_data_type import BaseDataType
+from dreadnode.data_types.base import DataType
AudioDataType: t.TypeAlias = str | Path | np.ndarray[t.Any, t.Any] | bytes
-class Audio(BaseDataType):
+class Audio(DataType):
"""
Audio media type for Dreadnode logging.
diff --git a/dreadnode/data_types/base.py b/dreadnode/data_types/base.py
new file mode 100644
index 00000000..9d566b16
--- /dev/null
+++ b/dreadnode/data_types/base.py
@@ -0,0 +1,49 @@
+import typing as t
+from abc import ABC, abstractmethod
+
+
+class DataType(ABC):
+ """Base class for dedicated data types that can be logged with Dreadnode."""
+
+ @abstractmethod
+ def to_serializable(self) -> tuple[t.Any, dict[str, t.Any]]:
+ """
+ Convert the media type to a serializable format.
+
+ Returns:
+ Tuple of (data, metadata) where:
+ - data: The serialized data
+ - metadata: Additional metadata for this data type
+ """
+
+
+class WithMeta(DataType):
+ """
+ Helper data type to add additional metadata to the schema for logged data.
+
+ Example:
+ ```
+ log_output("my_data", WithMeta(data, {"format": "custom-data"}))
+ ```
+ """
+
+ def __init__(self, obj: t.Any, metadata: dict[str, t.Any]):
+ """
+ Initialize a data type with associated metadata.
+
+ Args:
+ metadata: The metadata for this data type
+ """
+ self._obj = obj
+ self._metadata = metadata
+
+ def to_serializable(self) -> tuple[t.Any, dict[str, t.Any]]:
+ """
+ Convert the media type to a serializable format.
+
+ Returns:
+ Tuple of (data, metadata) where:
+ - data: The serialized data
+ - metadata: Additional metadata for this data type
+ """
+ return self._obj, self._metadata
diff --git a/dreadnode/data_types/base_data_type.py b/dreadnode/data_types/base_data_type.py
deleted file mode 100644
index 74c38119..00000000
--- a/dreadnode/data_types/base_data_type.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import typing as t
-from abc import ABC, abstractmethod
-
-
-class BaseDataType(ABC):
- """Base class for all data types that can be logged with Dreadnode."""
-
- @abstractmethod
- def to_serializable(self) -> tuple[t.Any, dict[str, t.Any]]:
- """
- Convert the media type to a serializable format.
-
- Returns:
- Tuple of (data, metadata) where:
- - data: The serialized data
- - metadata: Additional metadata for this data type
- """
diff --git a/dreadnode/data_types/image.py b/dreadnode/data_types/image.py
index 301e7448..0a4c7a22 100644
--- a/dreadnode/data_types/image.py
+++ b/dreadnode/data_types/image.py
@@ -5,7 +5,7 @@
import numpy as np
-from dreadnode.data_types.base_data_type import BaseDataType
+from dreadnode.data_types.base import DataType
try:
from PIL import Image as PILImage
@@ -16,7 +16,7 @@
ImageDataOrPathType = str | Path | bytes | ImageDataType
-class Image(BaseDataType):
+class Image(DataType):
"""
Image media type for Dreadnode logging.
diff --git a/dreadnode/data_types/object_3d.py b/dreadnode/data_types/object_3d.py
index 6f64450b..d4000983 100644
--- a/dreadnode/data_types/object_3d.py
+++ b/dreadnode/data_types/object_3d.py
@@ -2,12 +2,12 @@
from pathlib import Path
from typing import ClassVar
-from dreadnode.data_types.base_data_type import BaseDataType
+from dreadnode.data_types.base import DataType
Object3DDataType = str | Path | bytes
-class Object3D(BaseDataType):
+class Object3D(DataType):
"""
3D object media type for Dreadnode logging.
diff --git a/dreadnode/data_types/table.py b/dreadnode/data_types/table.py
index 797946f3..37a09372 100644
--- a/dreadnode/data_types/table.py
+++ b/dreadnode/data_types/table.py
@@ -6,14 +6,14 @@
import numpy as np
import pandas as pd
-from dreadnode.data_types.base_data_type import BaseDataType
+from dreadnode.data_types.base import DataType
TableDataType = (
pd.DataFrame | dict[t.Any, t.Any] | list[t.Any] | str | Path | np.ndarray[t.Any, t.Any]
)
-class Table(BaseDataType):
+class Table(DataType):
"""
Table data type for Dreadnode logging.
diff --git a/dreadnode/data_types/text.py b/dreadnode/data_types/text.py
new file mode 100644
index 00000000..bc4ef24d
--- /dev/null
+++ b/dreadnode/data_types/text.py
@@ -0,0 +1,59 @@
+import typing as t
+
+from dreadnode.data_types.base import DataType
+
+
+class Text(DataType):
+ """
+ Text data type for Dreadnode logging.
+ """
+
+ def __init__(self, text: str, format: str):
+ """
+ Initialize a Text object.
+
+ Args:
+ text: The text content to log
+ format: The format hint of the text
+ """
+ self._text = text
+ self._format = format
+
+ def to_serializable(self) -> tuple[str, dict[str, t.Any]]:
+ return self._text, {"format": self._format}
+
+
+class Markdown(Text):
+ """
+ Hint type for markdown-formatted text.
+
+ This is a subclass of Text with format set to "markdown".
+
+ Example:
+ ```
+ log_output("report", Markdown("..."))
+ ```
+ """
+
+ def __init__(self, text: str):
+ super().__init__(text, format="markdown")
+
+
+class Code(Text):
+ """
+ Hint type for code-formatted text.
+
+ This is a subclass of Text with format set to "code".
+
+ Example:
+ ```
+ log_output("code_snippet", Code("print('Hello, World!')", language="python"))
+ ```
+ """
+
+ def __init__(self, text: str, language: str = ""):
+ super().__init__(text, format="code")
+ self._language = language
+
+ def to_serializable(self) -> tuple[str, dict[str, t.Any]]:
+ return self._text, {"format": self._format, "code-language": self._language}
diff --git a/dreadnode/data_types/video.py b/dreadnode/data_types/video.py
index 73fc1a74..a475c76d 100644
--- a/dreadnode/data_types/video.py
+++ b/dreadnode/data_types/video.py
@@ -6,7 +6,7 @@
import numpy as np
from numpy.typing import NDArray
-from dreadnode.data_types.base_data_type import BaseDataType
+from dreadnode.data_types.base import DataType
try:
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip # type: ignore # noqa: PGH003
@@ -19,7 +19,7 @@
VideoDataType: t.TypeAlias = str | Path | NDArray[t.Any] | bytes | list[NDArray[t.Any]] | t.Any
-class Video(BaseDataType):
+class Video(DataType):
"""
Video media type for Dreadnode logging.
diff --git a/dreadnode/serialization.py b/dreadnode/serialization.py
index fcafc7fa..1d9fc7bc 100644
--- a/dreadnode/serialization.py
+++ b/dreadnode/serialization.py
@@ -23,7 +23,7 @@
from re import Pattern
from uuid import UUID
-from dreadnode.data_types.base_data_type import BaseDataType
+from dreadnode.data_types.base import DataType
from dreadnode.types import JsonDict, JsonValue
from dreadnode.util import safe_repr
@@ -408,16 +408,12 @@ def _handle_dataset(obj: t.Any, _seen: set[int]) -> tuple[JsonValue, JsonDict]:
)
-def _handle_custom_data_type(obj: BaseDataType, _seen: set[int]) -> tuple[JsonValue, JsonDict]:
+def _handle_custom_data_type(obj: DataType, _seen: set[int]) -> tuple[JsonValue, JsonDict]:
"""Handler for Dreadnode custom data types."""
- if not isinstance(obj, BaseDataType):
+ if not isinstance(obj, DataType):
return safe_repr(obj), UNKNOWN_OBJECT_SCHEMA
- # Get the serialized data and metadata from the media type
data, metadata = obj.to_serializable()
-
- if isinstance(data, bytes):
- return _handle_bytes(data, _seen, metadata)
serialized, schema = _serialize(data, _seen)
schema.update(metadata)
@@ -511,7 +507,7 @@ def _get_handlers() -> dict[type, HandlerFunc]:
handlers[datasets.Dataset] = _handle_dataset
with contextlib.suppress(Exception):
- handlers[BaseDataType] = _handle_custom_data_type
+ handlers[DataType] = _handle_custom_data_type
return handlers
@@ -619,7 +615,7 @@ class Serialized:
EMPTY_HASH = "0" * 16
-def serialize(obj: t.Any) -> Serialized:
+def serialize(obj: t.Any, *, schema_extras: JsonDict | None = None) -> Serialized:
"""
Serializes a Python object into a JSON-compatible structure and
generates a corresponding JSON Schema, ensuring consistency between
@@ -627,6 +623,7 @@ def serialize(obj: t.Any) -> Serialized:
Args:
obj: The Python object to process.
+ schema_extras: Additional JSON Schema properties to include.
Returns:
An object containing the serialized data, schema, and their hashes.
@@ -638,6 +635,9 @@ def serialize(obj: t.Any) -> Serialized:
else:
serialized_bytes = json.dumps(serialized, separators=(",", ":")).encode()
+ if schema_extras:
+ schema = {**schema, **schema_extras}
+
schema_str = json.dumps(schema, separators=(",", ":"))
data_hash = EMPTY_HASH