From 367fee00dcc28edf31ae82f2458d4fee8a113525 Mon Sep 17 00:00:00 2001 From: Raja Sekhar Rao Dheekonda Date: Mon, 28 Jul 2025 21:22:07 -0700 Subject: [PATCH 1/5] feat: Add automatic credential refresh with configurable buffer for storage operations --- dreadnode/api/client.py | 10 ++++-- dreadnode/artifact/storage.py | 16 ++++++++- dreadnode/constants.py | 4 +++ dreadnode/main.py | 65 +++++++++++++++++++++++++++++++---- dreadnode/storage_utils.py | 37 ++++++++++++++++++++ dreadnode/tracing/span.py | 14 +++++++- 6 files changed, 135 insertions(+), 11 deletions(-) create mode 100644 dreadnode/storage_utils.py diff --git a/dreadnode/api/client.py b/dreadnode/api/client.py index 789edc23..777728e7 100644 --- a/dreadnode/api/client.py +++ b/dreadnode/api/client.py @@ -13,6 +13,7 @@ process_run, process_task, ) +from dreadnode.constants import DEFAULT_FS_CREDENTIAL_DURATION from dreadnode.util import logger from dreadnode.version import VERSION @@ -430,12 +431,17 @@ def export_timeseries( # User data access - def get_user_data_credentials(self) -> UserDataCredentials: + def get_user_data_credentials( + self, duration: int = DEFAULT_FS_CREDENTIAL_DURATION + ) -> UserDataCredentials: """ Retrieves user data credentials for secondary storage access. + Args: + duration: Credential lifetime in seconds (default: 4 hours) + Returns: The user data credentials object. """ - response = self.request("GET", "/user-data/credentials") + response = self._request("GET", "/user-data/credentials", params={"duration": duration}) return UserDataCredentials(**response.json()) diff --git a/dreadnode/artifact/storage.py b/dreadnode/artifact/storage.py index 3b482589..f6fa180a 100644 --- a/dreadnode/artifact/storage.py +++ b/dreadnode/artifact/storage.py @@ -4,10 +4,12 @@ """ import hashlib +import typing as t from pathlib import Path import fsspec # type: ignore[import-untyped] +from dreadnode.storage_utils import with_credential_refresh from dreadnode.util import logger CHUNK_SIZE = 8 * 1024 * 1024 # 8MB @@ -22,15 +24,27 @@ class ArtifactStorage: - Batch uploads for directories handled by fsspec """ - def __init__(self, file_system: fsspec.AbstractFileSystem): + def __init__( + self, + file_system: fsspec.AbstractFileSystem, + credential_refresher: t.Callable[[], bool] | None = None, + ): """ Initialize artifact storage with a file system and prefix path. Args: file_system: FSSpec-compatible file system + credential_refresher: Optional function to refresh credentials when it's about to expire """ self._file_system = file_system + self._credential_refresher = credential_refresher + def _refresh_credentials_if_needed(self) -> None: + """Refresh credentials if refresher is available.""" + if self._credential_refresher: + self._credential_refresher() + + @with_credential_refresh def store_file(self, file_path: Path, target_key: str) -> str: """ Store a file in the storage system, using multipart upload for large files. diff --git a/dreadnode/constants.py b/dreadnode/constants.py index ae9dc730..d0eb1075 100644 --- a/dreadnode/constants.py +++ b/dreadnode/constants.py @@ -14,3 +14,7 @@ # Default values for the S3 storage MAX_INLINE_OBJECT_BYTES = 10 * 1024 # 10KB + +# Default values for the file system credential management +DEFAULT_FS_CREDENTIAL_DURATION = 14400 # 4 hours in seconds +FS_CREDENTIAL_REFRESH_BUFFER = 300 # 5 minutes in seconds diff --git a/dreadnode/main.py b/dreadnode/main.py index 6fee3c8e..3968c705 100644 --- a/dreadnode/main.py +++ b/dreadnode/main.py @@ -25,6 +25,7 @@ from dreadnode.api.client import ApiClient from dreadnode.constants import ( + DEFAULT_FS_CREDENTIAL_DURATION, DEFAULT_SERVER_URL, ENV_API_KEY, ENV_API_TOKEN, @@ -32,6 +33,7 @@ ENV_PROJECT, ENV_SERVER, ENV_SERVER_URL, + FS_CREDENTIAL_REFRESH_BUFFER, ) from dreadnode.metric import ( Metric, @@ -70,6 +72,8 @@ from opentelemetry.sdk.trace import SpanProcessor from opentelemetry.trace import Tracer + from dreadnode.api.models import UserDataCredentials + ToObject = t.Literal["task-or-run", "run"] @@ -135,6 +139,9 @@ def __init__( self._initialized = False + self._credentials: UserDataCredentials | None = None + self._credentials_expiry: datetime | None = None + @staticmethod def _resolve_endpoint(endpoint: str | None) -> str | None: """Automatically resolve endpoints based on environment @@ -358,18 +365,21 @@ def initialize(self) -> None: # ) # ) - credentials = self._api.get_user_data_credentials() - resolved_endpoint = self._resolve_endpoint(credentials.endpoint) + self._credentials = self._api.get_user_data_credentials( + duration=DEFAULT_FS_CREDENTIAL_DURATION + ) + self._credentials_expiry = self._credentials.expiration + resolved_endpoint = self._resolve_endpoint(self._credentials.endpoint) self._fs = S3FileSystem( - key=credentials.access_key_id, - secret=credentials.secret_access_key, - token=credentials.session_token, + key=self._credentials.access_key_id, + secret=self._credentials.secret_access_key, + token=self._credentials.session_token, client_kwargs={ "endpoint_url": resolved_endpoint, - "region_name": credentials.region, + "region_name": self._credentials.region, }, ) - self._fs_prefix = f"{credentials.bucket}/{credentials.prefix}/" + self._fs_prefix = f"{self._credentials.bucket}/{self._credentials.prefix}/" self._logfire = logfire.configure( local=not self.is_default, @@ -416,6 +426,45 @@ def api(self, *, server: str | None = None, token: str | None = None) -> ApiClie return self._api + def _refresh_storage_credentials(self) -> bool: + """Refresh storage credentials if they are about to expire.""" + if not self._api or not self._credentials: + return False + + now = datetime.now(timezone.utc) + + if ( + self._credentials_expiry is None + or (self._credentials_expiry - now).total_seconds() < FS_CREDENTIAL_REFRESH_BUFFER + ): + try: + logger.info("Refreshing storage credentials") + self._credentials = self._api.get_user_data_credentials( + duration=DEFAULT_FS_CREDENTIAL_DURATION + ) + self._credentials_expiry = self._credentials.expiration + + resolved_endpoint = self._resolve_endpoint(self._credentials.endpoint) + self._fs = S3FileSystem( + key=self._credentials.access_key_id, + secret=self._credentials.secret_access_key, + token=self._credentials.session_token, + client_kwargs={ + "endpoint_url": resolved_endpoint, + "region_name": self._credentials.region, + }, + ) + logger.info( + f"Storage credentials refreshed, valid until {self._credentials_expiry}" + ) + return True # noqa: TRY300 + + except Exception as e: # noqa: BLE001 + logger.error(f"Failed to refresh storage credentials: {e}") + return False + + return True + def _get_tracer(self, *, is_span_tracer: bool = True) -> "Tracer": return self._logfire._tracer_provider.get_tracer( # noqa: SLF001 self.otel_scope, @@ -791,6 +840,7 @@ def run( file_system=self._fs, prefix_path=self._fs_prefix, autolog=autolog, + credential_refresher=self._refresh_storage_credentials if self._credentials else None, ) def get_run_context(self) -> RunContext: @@ -837,6 +887,7 @@ def continue_run(self, run_context: RunContext) -> RunSpan: tracer=self._get_tracer(), file_system=self._fs, prefix_path=self._fs_prefix, + credential_refresher=self._refresh_storage_credentials if self._credentials else None, ) def tag(self, *tag: str, to: ToObject = "task-or-run") -> None: diff --git a/dreadnode/storage_utils.py b/dreadnode/storage_utils.py new file mode 100644 index 00000000..9599a238 --- /dev/null +++ b/dreadnode/storage_utils.py @@ -0,0 +1,37 @@ +import functools +import typing as t + +from dreadnode.util import logger + + +def with_credential_refresh(func: t.Callable[..., t.Any]) -> t.Callable[..., t.Any]: + """Decorator that automatically handles credential refresh on storage errors.""" + + @functools.wraps(func) + def wrapper(self: t.Any, *args: t.Any, **kwargs: t.Any) -> t.Any: + # Try to refresh credentials before operation + if hasattr(self, "_refresh_credentials_if_needed"): + self._refresh_credentials_if_needed() + + try: + return func(self, *args, **kwargs) + except Exception as e: + error_str = str(e) + if any( + error in error_str + for error in [ + "ExpiredToken", + "TokenRefreshRequired", + "InvalidAccessKeyId", + "The Access Key Id you provided does not exist", + ] + ): + logger.info("Storage credential error, forcing refresh and retrying") + + if hasattr(self, "_refresh_credentials_if_needed"): + self._refresh_credentials_if_needed() + + return func(self, *args, **kwargs) + raise + + return wrapper diff --git a/dreadnode/tracing/span.py b/dreadnode/tracing/span.py index 525d5124..d6dadcb7 100644 --- a/dreadnode/tracing/span.py +++ b/dreadnode/tracing/span.py @@ -36,6 +36,7 @@ from dreadnode.metric import Metric, MetricAggMode, MetricsDict from dreadnode.object import Object, ObjectRef, ObjectUri, ObjectVal from dreadnode.serialization import Serialized, serialize +from dreadnode.storage_utils import with_credential_refresh from dreadnode.types import UNSET, AnyDict, JsonDict, Unset from dreadnode.util import clean_str from dreadnode.version import VERSION @@ -366,6 +367,7 @@ def __init__( update_frequency: int = 5, run_id: str | ULID | None = None, type: SpanType = "run", + credential_refresher: t.Callable[[], bool] | None = None, ) -> None: self.autolog = autolog self.project = project @@ -376,7 +378,9 @@ def __init__( self._object_schemas: dict[str, JsonDict] = {} self._inputs: list[ObjectRef] = [] self._outputs: list[ObjectRef] = [] - self._artifact_storage = ArtifactStorage(file_system=file_system) + self._artifact_storage = ArtifactStorage( + file_system=file_system, credential_refresher=credential_refresher + ) self._artifacts: list[DirectoryNode] = [] self._artifact_merger = ArtifactMerger() self._artifact_tree_builder = ArtifactTreeBuilder( @@ -407,6 +411,7 @@ def __init__( SPAN_ATTRIBUTE_PROJECT: project, **(attributes or {}), } + self._credential_refresher = credential_refresher super().__init__(name, tracer, attributes=attributes, type=type, tags=tags) @classmethod @@ -416,6 +421,7 @@ def from_context( tracer: Tracer, file_system: AbstractFileSystem, prefix_path: str, + credential_refresher: t.Callable[[], bool] | None = None, ) -> "RunSpan": self = RunSpan( name=f"run.{context['run_id']}.fragment", @@ -426,6 +432,7 @@ def from_context( prefix_path=prefix_path, type="run_fragment", run_id=context["run_id"], + credential_refresher=credential_refresher, ) self._remote_context = context["trace_context"] @@ -501,6 +508,10 @@ def __exit__( if self._context_token is not None: current_run_span.reset(self._context_token) + def _refresh_credentials_if_needed(self) -> None: + if self._credential_refresher: + self._credential_refresher() + def push_update(self, *, force: bool = False) -> None: if self._span is None: return @@ -604,6 +615,7 @@ def log_object( return composite_hash + @with_credential_refresh def _store_file_by_hash(self, data: bytes, full_path: str) -> str: """ Writes data to the given full_path in the object store if it doesn't already exist. From c324c7eb3e388dcbb0955c80c238065b50ceb062 Mon Sep 17 00:00:00 2001 From: Raja Sekhar Rao Dheekonda Date: Mon, 28 Jul 2025 21:28:01 -0700 Subject: [PATCH 2/5] Add docstring-parser dependency and generate docs for credential refresh changes --- docs/sdk/docs.json | 4 + docs/sdk/dreadnode/main.mdx | 536 +++++++++++++ docs/sdk/dreadnode/storage_utils.mdx | 23 + docs/sdk/dreadnode/tracing/span.mdx | 1113 ++++++++++++++++++++++++++ poetry.lock | 27 +- pyproject.toml | 1 + 6 files changed, 1699 insertions(+), 5 deletions(-) create mode 100644 docs/sdk/docs.json create mode 100644 docs/sdk/dreadnode/main.mdx create mode 100644 docs/sdk/dreadnode/storage_utils.mdx create mode 100644 docs/sdk/dreadnode/tracing/span.mdx diff --git a/docs/sdk/docs.json b/docs/sdk/docs.json new file mode 100644 index 00000000..8cb7c821 --- /dev/null +++ b/docs/sdk/docs.json @@ -0,0 +1,4 @@ +{ + "group": "API Reference", + "pages": [] +} \ No newline at end of file diff --git a/docs/sdk/dreadnode/main.mdx b/docs/sdk/dreadnode/main.mdx new file mode 100644 index 00000000..c8e965f4 --- /dev/null +++ b/docs/sdk/dreadnode/main.mdx @@ -0,0 +1,536 @@ +--- +title: 'main' +sidebarTitle: 'main' +groups: ["strikes"] +--- + +# Module `main` + +*(Full name: `dreadnode.main`)* + +**Source file:** `main.py` + +## Classes + + +### Class `Dreadnode` + +**Inherits from:** `object` + +The core Dreadnode SDK class. + +A default instance of this class is created and can be used directly with `dreadnode.*`. + +Otherwise, you can create your own instance and configure it with `configure()`. + +#### Properties + +##### `is_default` + +**Type:** `\` *(property)* + +*Has: getter* + +--- + +#### Methods + +##### `__eq__` + +```python +__eq__(self, other) +``` + +Return self==value. + +--- + +##### `__init__` + +```python +__init__(self, *, server: str | None = None, token: str | None = None, local_dir: Union[str, pathlib.Path, Literal[False]] = False, project: str | None = None, service_name: str | None = None, service_version: str | None = None, console: Union[logfire._internal.config.ConsoleOptions, Literal[False, True]] = True, send_to_logfire: Union[bool, Literal['if-token-present']] = False, otel_scope: str = 'dreadnode') -> None +``` + +Initialize self. See help(type(self)) for accurate signature. + +--- + +##### `__repr__` + +```python +__repr__(self) +``` + +Return repr(self). + +--- + +##### `api` + +```python +api(self, *, server: str | None = None, token: str | None = None) -> dreadnode.api.client.ApiClient +``` + +Get an API client based on the current configuration or the provided server and token. + +If the server and token are not provided, the method will use the current configuration +and `configure()` needs to be called first. + +**Parameters:** + +- **`server`**: The server URL to use for the API client. +- **`token`**: The API token to use for authentication. + +**Returns:** An ApiClient instance. + +--- + +##### `configure` + +```python +configure(self, *, server: str | None = None, token: str | None = None, local_dir: Union[str, pathlib.Path, Literal[False]] = False, project: str | None = None, service_name: str | None = None, service_version: str | None = None, console: Union[logfire._internal.config.ConsoleOptions, Literal[False, True]] = True, send_to_logfire: Union[bool, Literal['if-token-present']] = False, otel_scope: str = 'dreadnode') -> None +``` + +Configure the Dreadnode SDK and call `initialize()`. + +This method should always be called before using the SDK. + +If `server` and `token` are not provided, the SDK will look in +the associated environment variables: + +- `DREADNODE_SERVER_URL` or `DREADNODE_SERVER` +- `DREADNODE_API_TOKEN` or `DREADNODE_API_KEY` + +**Parameters:** + +- **`server`**: The Dreadnode server URL. +- **`token`**: The Dreadnode API token. +- **`local_dir`**: The local directory to store data in. +- **`project`**: The default project name to associate all runs with. +- **`service_name`**: The service name to use for OpenTelemetry. +- **`service_version`**: The service version to use for OpenTelemetry. +- **`console`**: Whether to log span information to the console. +- **`send_to_logfire`**: Whether to send data to Logfire. +- **`otel_scope`**: The OpenTelemetry scope name. + +--- + +##### `continue_run` + +```python +continue_run(self, run_context: dreadnode.tracing.span.RunContext) -> dreadnode.tracing.span.RunSpan +``` + +Continue a run from captured context on a remote host. + +**Parameters:** + +- **`run_context`**: The RunContext captured from get_run_context(). + +**Returns:** A RunSpan object that can be used as a context manager. + +--- + +##### `get_run_context` + +```python +get_run_context(self) -> dreadnode.tracing.span.RunContext +``` + +Capture the current run context for transfer to another host, thread, or process. + +Use `continue_run()` to continue the run anywhere else. + +**Returns:** RunContext containing run state and trace propagation headers. + +**Raises:** + +- `RuntimeError` — If called outside of an active run. + +--- + +##### `initialize` + +```python +initialize(self) -> None +``` + +Initialize the Dreadnode SDK. + +This method is called automatically when you call `configure()`. + +--- + +##### `link_objects` + +```python +link_objects(self, origin: Any, link: Any, attributes: dict[str, typing.Any] | None = None) -> None +``` + +Associate two runtime objects with each other. + +This is useful for linking any two objects which are related to +each other, such as a model and its training data, or an input +prompt and the resulting output. + +**Parameters:** + +- **`origin`**: The origin object to link from. +- **`link`**: The linked object to link to. +- **`attributes`**: Additional attributes to attach to the link. + +--- + +##### `log_artifact` + +```python +log_artifact(self, local_uri: str | pathlib.Path) -> None +``` + +Log a file or directory artifact to the current run. + +This method uploads a local file or directory to the artifact storage associated with the run. + +**Parameters:** + +- **`local_uri`**: The local path to the file to upload. + +--- + +##### `log_input` + +```python +log_input(self, name: str, value: Any, *, label: str | None = None, to: Literal['task-or-run', 'run'] = 'task-or-run', attributes: dict[str, typing.Any] | None = None) -> None +``` + +Log a single input to the current task or run. + +Inputs can be any runtime object, which are serialized, stored, and tracked +in the Dreadnode UI. + +--- + +##### `log_inputs` + +```python +log_inputs(self, to: Literal['task-or-run', 'run'] = 'task-or-run', **inputs: Any) -> None +``` + +Log multiple inputs to the current task or run. + +See `log_input()` for more details. + +--- + +##### `log_metric` + +```python +log_metric(self, name: str, value: float | bool | dreadnode.metric.Metric, *, step: int = 0, origin: typing.Any | None = None, timestamp: datetime.datetime | None = None, mode: Optional[Literal['avg', 'sum', 'min', 'max', 'count']] = None, attributes: dict[str, typing.Any] | None = None, to: Literal['task-or-run', 'run'] = 'task-or-run') -> dreadnode.metric.Metric +``` + +Log a single metric to the current task or run. + +Metrics are some measurement or recorded value related to the task or run. +They can be used to track performance, resource usage, or other quantitative data. + +**Parameters:** + +- **`name`**: The name of the metric. +- **`value`**: The value of the metric, either as a raw float/bool or a Metric object. +- **`step`**: The step of the metric. +- **`origin`**: The origin of the metric - can be provided any object which was logged +as an input or output anywhere in the run. +- **`timestamp`**: The timestamp of the metric - defaults to the current time. +- **`mode`**: The aggregation mode to use for the metric. Helpful when you want to let +the library take care of translating your raw values into better representations. +- direct: do not modify the value at all (default) +- min: the lowest observed value reported for this metric +- max: the highest observed value reported for this metric +- avg: the average of all reported values for this metric +- sum: the cumulative sum of all reported values for this metric +- count: increment every time this metric is logged - disregard value +- **`attributes`**: A dictionary of additional attributes to attach to the metric. +- **`to`**: The target object to log the metric to. Can be "task-or-run" or "run". +Defaults to "task-or-run". If "task-or-run", the metric will be logged +to the current task or run, whichever is the nearest ancestor. + +**Returns:** The logged metric object. + +--- + +##### `log_metrics` + +```python +log_metrics(self, metrics: dict[str, float | bool] | list[dreadnode.metric.MetricDict], *, step: int = 0, timestamp: datetime.datetime | None = None, mode: Optional[Literal['avg', 'sum', 'min', 'max', 'count']] = None, attributes: dict[str, typing.Any] | None = None, to: Literal['task-or-run', 'run'] = 'task-or-run') -> list[dreadnode.metric.Metric] +``` + +Log multiple metrics to the current task or run. + +**Parameters:** + +- **`metrics`**: Either a dictionary of name/value pairs or a list of MetricDicts to log. +- **`step`**: Default step value for metrics if not supplied. +- **`timestamp`**: Default timestamp for metrics if not supplied. +- **`mode`**: Default aggregation mode for metrics if not supplied. +- **`attributes`**: Default attributes for metrics if not supplied. +- **`to`**: The target object to log metrics to. Can be "task-or-run" or "run". +Defaults to "task-or-run". If "task-or-run", the metrics will be logged +to the current task or run, whichever is the nearest ancestor. + +**Returns:** List of logged Metric objects. + +--- + +##### `log_output` + +```python +log_output(self, name: str, value: Any, *, label: str | None = None, to: Literal['task-or-run', 'run'] = 'task-or-run', attributes: dict[str, typing.Any] | None = None) -> None +``` + +Log a single output to the current task or run. + +Outputs can be any runtime object, which are serialized, stored, and tracked +in the Dreadnode UI. + +**Parameters:** + +- **`name`**: The name of the output. +- **`value`**: The value of the output. +- **`label`**: An optional label for the output, useful for filtering in the UI. +- **`to`**: The target object to log the output to. Can be "task-or-run" or "run". +Defaults to "task-or-run". If "task-or-run", the output will be logged +to the current task or run, whichever is the nearest ancestor. +- **`attributes`**: Additional attributes to attach to the output. + +--- + +##### `log_outputs` + +```python +log_outputs(self, to: Literal['task-or-run', 'run'] = 'task-or-run', **outputs: Union[int, float, str, bool, NoneType, list['JsonValue'], tuple['JsonValue', ...], ForwardRef('JsonDict')]) -> None +``` + +Log multiple outputs to the current task or run. + +See `log_output()` for more details. + +--- + +##### `log_param` + +```python +log_param(self, key: str, value: Union[int, float, str, bool, NoneType, list['JsonValue'], tuple['JsonValue', ...], ForwardRef('JsonDict')]) -> None +``` + +Log a single parameter to the current task or run. + +Parameters are key-value pairs that are associated with the task or run +and can be used to track configuration values, hyperparameters, or other +metadata. + +**Parameters:** + +- **`key`**: The name of the parameter. +- **`value`**: The value of the parameter. + +--- + +##### `log_params` + +```python +log_params(self, **params: Union[int, float, str, bool, NoneType, list['JsonValue'], tuple['JsonValue', ...], ForwardRef('JsonDict')]) -> None +``` + +Log multiple parameters to the current task or run. + +Parameters are key-value pairs that are associated with the task or run +and can be used to track configuration values, hyperparameters, or other +metadata. + +**Parameters:** + +- **`**params`**: The parameters to log. Each parameter is a key-value pair. + +--- + +##### `push_update` + +```python +push_update(self) -> None +``` + +Push any pending run data to the server before run completion. + +This is useful for ensuring that the UI is up to date with the +latest data. Data is automatically pushed periodically, but +you can call this method to force a push. + +--- + +##### `run` + +```python +run(self, name: str | None = None, *, tags: Optional[Sequence[str]] = None, params: dict[str, typing.Any] | None = None, project: str | None = None, autolog: bool = True, attributes: dict[str, typing.Any] | None = None) -> dreadnode.tracing.span.RunSpan +``` + +Create a new run. + +Runs are the main way to track work in Dreadnode. They are +associated with a specific project and can have parameters, +inputs, and outputs logged to them. + +You cannot create runs inside other runs. + +**Parameters:** + +- **`name`**: The name of the run. If not provided, a random name will be generated. +- **`tags`**: A list of tags to attach to the run. +- **`params`**: A dictionary of parameters to attach to the run. +- **`project`**: The project name to associate the run with. If not provided, +the project passed to `configure()` will be used, or the +run will be associated with a default project. +- **`autolog`**: Whether to automatically log task inputs, outputs, and execution metrics if otherwise unspecified. +- **`attributes`**: Additional attributes to attach to the run span. + +**Returns:** A RunSpan object that can be used as a context manager. +The run will automatically be completed when the context manager exits. + +--- + +##### `scorer` + +```python +scorer(self, *, name: str | None = None, tags: Optional[Sequence[str]] = None, attributes: dict[str, typing.Any] | None = None) -> Callable[[Union[Callable[[~T], Awaitable[float | int | bool | dreadnode.metric.Metric]], Callable[[~T], float | int | bool | dreadnode.metric.Metric]]], dreadnode.metric.Scorer[~T]] +``` + +Make a scorer from a callable function. + +This is useful when you want to change the name of the scorer +or add additional attributes to it. + +**Parameters:** + +- **`name`**: The name of the scorer. +- **`tags`**: A list of tags to attach to the scorer. +- **`attributes`**: A dictionary of attributes to attach to the scorer. + +**Returns:** A new Scorer object. + +--- + +##### `shutdown` + +```python +shutdown(self) -> None +``` + +Shutdown any associate OpenTelemetry components and flush any pending spans. + +It is not required to call this method, as the SDK will automatically +flush and shutdown when the process exits. + +However, if you want to ensure that all spans are flushed before +exiting, you can call this method manually. + +--- + +##### `span` + +```python +span(self, name: str, *, tags: Optional[Sequence[str]] = None, attributes: dict[str, typing.Any] | None = None) -> dreadnode.tracing.span.Span +``` + +Create a new OpenTelemety span. + +Spans are more lightweight than tasks, but still let you track +work being performed and view it in the UI. You cannot +log parameters, inputs, or outputs to spans. + +**Parameters:** + +- **`name`**: The name of the span. +- **`tags`**: A list of tags to attach to the span. +- **`attributes`**: A dictionary of attributes to attach to the span. + +**Returns:** A Span object. + +--- + +##### `tag` + +```python +tag(self, *tag: str, to: Literal['task-or-run', 'run'] = 'task-or-run') -> None +``` + +Add one or many tags to the current task or run. + +**Parameters:** + +- **`tag`**: The tag to attach to the task or run. +- **`to`**: The target object to log the tag to. Can be "task-or-run" or "run". +Defaults to "task-or-run". If "task-or-run", the tag will be logged +to the current task or run, whichever is the nearest ancestor. + +--- + +##### `task` + +```python +task(self, *, scorers: Optional[Sequence[Union[dreadnode.metric.Scorer[Any], Callable[[Any], Awaitable[float | int | bool | dreadnode.metric.Metric]], Callable[[Any], float | int | bool | dreadnode.metric.Metric]]]] = None, name: str | None = None, label: str | None = None, log_inputs: Union[Sequence[str], bool, dreadnode.types.Inherited] = Inherited, log_output: bool | dreadnode.types.Inherited = Inherited, log_execution_metrics: bool = False, tags: Optional[Sequence[str]] = None, attributes: dict[str, typing.Any] | None = None) -> dreadnode.main.TaskDecorator +``` + +Create a new task from a function. + +**Parameters:** + +- **`scorers`**: A list of scorers to attach to the task. These will be called after every execution +of the task and will be passed the task's output. +- **`name`**: The name of the task. +- **`label`**: The label of the task - useful for filtering in the UI. +- **`log_inputs`**: Log all, or specific, incoming arguments to the function as inputs. +- **`log_output`**: Log the result of the function as an output. +- **`log_execution_metrics`**: Log execution metrics for the task, such as success rate and run count. +- **`tags`**: A list of tags to attach to the task span. +- **`attributes`**: A dictionary of attributes to attach to the task span. + +**Returns:** A new Task object. + +--- + +##### `task_span` + +```python +task_span(self, name: str, *, label: str | None = None, tags: Optional[Sequence[str]] = None, attributes: dict[str, typing.Any] | None = None) -> dreadnode.tracing.span.TaskSpan[typing.Any] +``` + +Create a task span without an explicit associated function. + +This is useful for creating tasks on the fly without having to +define a function. + +**Parameters:** + +- **`name`**: The name of the task. +- **`label`**: The label of the task - useful for filtering in the UI. +- **`tags`**: A list of tags to attach to the task span. +- **`attributes`**: A dictionary of attributes to attach to the task span. + +**Returns:** A TaskSpan object. + +--- + + +### Class `DreadnodeConfigWarning` + +**Inherits from:** `UserWarning` + +Base class for warnings generated by user code. + + +### Class `DreadnodeUsageWarning` + +**Inherits from:** `UserWarning` + +Base class for warnings generated by user code. + + diff --git a/docs/sdk/dreadnode/storage_utils.mdx b/docs/sdk/dreadnode/storage_utils.mdx new file mode 100644 index 00000000..4c8559f4 --- /dev/null +++ b/docs/sdk/dreadnode/storage_utils.mdx @@ -0,0 +1,23 @@ +--- +title: 'storage_utils' +sidebarTitle: 'storage_utils' +groups: ["strikes"] +--- + +# Module `storage_utils` + +*(Full name: `dreadnode.storage_utils`)* + +**Source file:** `storage_utils.py` + +## Functions + +### `with_credential_refresh` + +```python +with_credential_refresh(func: Callable[..., Any]) -> Callable[..., Any] +``` + +Decorator that automatically handles credential refresh on storage errors. + + diff --git a/docs/sdk/dreadnode/tracing/span.mdx b/docs/sdk/dreadnode/tracing/span.mdx new file mode 100644 index 00000000..52e3ddd2 --- /dev/null +++ b/docs/sdk/dreadnode/tracing/span.mdx @@ -0,0 +1,1113 @@ +--- +title: 'span' +sidebarTitle: 'span' +groups: ["strikes"] +--- + +# Module `span` + +*(Full name: `dreadnode.tracing.span`)* + +**Source file:** `span.py` + +## Classes + + +### Class `RunContext` + +**Inherits from:** `dict` + +Context for transferring and continuing runs in other places. + + +### Class `RunSpan` + +**Inherits from:** `dreadnode.tracing.span.Span` + +Provides read-only access to span attributes. + +Users should NOT be creating these objects directly. `ReadableSpan`s are created as +a direct result from using the tracing pipeline via the `Tracer`. + +#### Properties + +##### `active` + +**Type:** `\` *(property)* + +Check if the span is currently active (recording). + +*Has: getter* + +--- + +##### `all_tasks` + +**Type:** `list[dreadnode.tracing.span.TaskSpan[Any]]` *(property)* + +Get all tasks, including subtasks. + +*Has: getter* + +--- + +##### `duration` + +**Type:** `\` *(property)* + +Get the duration of the span in seconds. + +*Has: getter* + +--- + +##### `failed` + +**Type:** `\` *(property)* + +Check if the span has failed. + +*Has: getter* + +--- + +##### `inputs` + +**Type:** `dict[str, Any]` *(property)* + +*Has: getter* + +--- + +##### `is_recording` + +**Type:** `\` *(property)* + +Check if the span is currently recording. + +*Has: getter* + +--- + +##### `label` + +**Type:** `\` *(property)* + +Get the label of the span. + +*Has: getter* + +--- + +##### `metrics` + +**Type:** `dict[str, list[dreadnode.metric.Metric]]` *(property)* + +*Has: getter* + +--- + +##### `outputs` + +**Type:** `dict[str, Any]` *(property)* + +*Has: getter* + +--- + +##### `params` + +**Type:** `dict[str, Any]` *(property)* + +*Has: getter* + +--- + +##### `run_id` + +**Type:** `\` *(property)* + +*Has: getter* + +--- + +##### `span_id` + +**Type:** `\` *(property)* + +*Has: getter* + +--- + +##### `tasks` + +**Type:** `list[dreadnode.tracing.span.TaskSpan[Any]]` *(property)* + +*Has: getter* + +--- + +##### `trace_id` + +**Type:** `\` *(property)* + +*Has: getter* + +--- + +#### Methods + +##### `__enter__` + +```python +__enter__(self) -> Self +``` + +--- + +##### `__exit__` + +```python +__exit__(self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: traceback | None) -> None +``` + +--- + +##### `__getattr__` + +```python +__getattr__(self, name: str) -> Any +``` + +--- + +##### `__init__` + +```python +__init__(self, name: str, project: str, tracer: opentelemetry.trace.Tracer, file_system: fsspec.spec.AbstractFileSystem, prefix_path: str, *, attributes: dict[str, typing.Any] | None = None, params: dict[str, typing.Any] | None = None, metrics: dict[str, list[dreadnode.metric.Metric]] | None = None, tags: Optional[Sequence[str]] = None, autolog: bool = True, update_frequency: int = 5, run_id: str | ulid.ULID | None = None, type: Literal['run', 'task', 'span', 'run_update', 'run_fragment'] = 'run', credential_refresher: Optional[Callable[[], bool]] = None) -> None +``` + +Initialize self. See help(type(self)) for accurate signature. + +--- + +##### `__repr__` + +```python +__repr__(self) -> str +``` + +Return repr(self). + +--- + +##### `__str__` + +```python +__str__(self) -> str +``` + +Return str(self). + +--- + +##### `add_tags` + +```python +add_tags(self, tags: Sequence[str]) -> None +``` + +--- + +##### `get_attribute` + +```python +get_attribute(self, key: str, default: Any) -> Any +``` + +--- + +##### `get_attributes` + +```python +get_attributes(self) -> dict[str, typing.Any] +``` + +--- + +##### `get_object` + +```python +get_object(self, hash_: str) -> Any +``` + +--- + +##### `link_objects` + +```python +link_objects(self, object_hash: str, link_hash: str, attributes: dict[str, typing.Any] | None = None) -> None +``` + +--- + +##### `log_artifact` + +```python +log_artifact(self, local_uri: str | pathlib.Path) -> None +``` + +Logs a local file or directory as an artifact to the object store. + +Preserves directory structure and uses content hashing for deduplication. + +**Parameters:** + +- **`local_uri`**: Path to the local file or directory + +**Returns:** DirectoryNode representing the artifact's tree structure + +**Raises:** + +- `FileNotFoundError` — If the path doesn't exist + +--- + +##### `log_event` + +```python +log_event(self, name: str, attributes: dict[str, typing.Any] | None = None) -> None +``` + +--- + +##### `log_input` + +```python +log_input(self, name: str, value: Any, *, label: str | None = None, attributes: dict[str, typing.Any] | None = None) -> None +``` + +--- + +##### `log_metric` + +```python +log_metric(self, name: str, value: float | bool | dreadnode.metric.Metric, *, step: int = 0, origin: typing.Any | None = None, timestamp: datetime.datetime | None = None, mode: Optional[Literal['avg', 'sum', 'min', 'max', 'count']] = None, prefix: str | None = None, attributes: dict[str, typing.Union[int, float, str, bool, NoneType, list['JsonValue'], tuple['JsonValue', ...], ForwardRef('JsonDict')]] | None = None) -> dreadnode.metric.Metric +``` + +--- + +##### `log_object` + +```python +log_object(self, value: Any, *, label: str | None = None, event_name: str = 'dreadnode.object', attributes: dict[str, typing.Any] | None = None) -> str +``` + +--- + +##### `log_output` + +```python +log_output(self, name: str, value: Any, *, label: str | None = None, attributes: dict[str, typing.Any] | None = None) -> None +``` + +--- + +##### `log_param` + +```python +log_param(self, key: str, value: Any) -> None +``` + +--- + +##### `log_params` + +```python +log_params(self, **params: Any) -> None +``` + +--- + +##### `push_update` + +```python +push_update(self, *, force: bool = False) -> None +``` + +--- + +##### `set_attribute` + +```python +set_attribute(self, key: str, value: Any, *, schema: bool = True, raw: bool = False) -> None +``` + +--- + +##### `set_attributes` + +```python +set_attributes(self, attributes: dict[str, typing.Any]) -> None +``` + +--- + +##### `set_exception` + +```python +set_exception(self, exception: BaseException, *, attributes: dict[str, typing.Any] | None = None, status: opentelemetry.trace.status.Status | None = None) -> None +``` + +--- + +##### `set_tags` + +```python +set_tags(self, tags: Sequence[str]) -> None +``` + +--- + +##### `to_graph` + +```python +to_graph(self) -> 'nx.DiGraph' +``` + +--- + + +### Class `RunUpdateSpan` + +**Inherits from:** `dreadnode.tracing.span.Span` + +Provides read-only access to span attributes. + +Users should NOT be creating these objects directly. `ReadableSpan`s are created as +a direct result from using the tracing pipeline via the `Tracer`. + +#### Properties + +##### `active` + +**Type:** `\` *(property)* + +Check if the span is currently active (recording). + +*Has: getter* + +--- + +##### `duration` + +**Type:** `\` *(property)* + +Get the duration of the span in seconds. + +*Has: getter* + +--- + +##### `failed` + +**Type:** `\` *(property)* + +Check if the span has failed. + +*Has: getter* + +--- + +##### `is_recording` + +**Type:** `\` *(property)* + +Check if the span is currently recording. + +*Has: getter* + +--- + +##### `label` + +**Type:** `\` *(property)* + +Get the label of the span. + +*Has: getter* + +--- + +##### `span_id` + +**Type:** `\` *(property)* + +*Has: getter* + +--- + +##### `trace_id` + +**Type:** `\` *(property)* + +*Has: getter* + +--- + +#### Methods + +##### `__enter__` + +```python +__enter__(self) -> Self +``` + +--- + +##### `__exit__` + +```python +__exit__(self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: traceback | None) -> None +``` + +--- + +##### `__getattr__` + +```python +__getattr__(self, name: str) -> Any +``` + +--- + +##### `__init__` + +```python +__init__(self, run_id: str, tracer: opentelemetry.trace.Tracer, project: str, *, metrics: dict[str, list[dreadnode.metric.Metric]] | None = None, params: dict[str, typing.Union[int, float, str, bool, NoneType, list['JsonValue'], tuple['JsonValue', ...], ForwardRef('JsonDict')]] | None = None, inputs: list[dreadnode.object.ObjectRef] | None = None, outputs: list[dreadnode.object.ObjectRef] | None = None, objects: dict[str, dreadnode.object.ObjectUri | dreadnode.object.ObjectVal] | None = None, object_schemas: dict[str, dict[str, typing.Union[int, float, str, bool, NoneType, list['JsonValue'], tuple['JsonValue', ...], ForwardRef('JsonDict')]]] | None = None) -> None +``` + +Initialize self. See help(type(self)) for accurate signature. + +--- + +##### `__repr__` + +```python +__repr__(self) -> str +``` + +Return repr(self). + +--- + +##### `__str__` + +```python +__str__(self) -> str +``` + +Return str(self). + +--- + +##### `add_tags` + +```python +add_tags(self, tags: Sequence[str]) -> None +``` + +--- + +##### `get_attribute` + +```python +get_attribute(self, key: str, default: Any) -> Any +``` + +--- + +##### `get_attributes` + +```python +get_attributes(self) -> dict[str, typing.Any] +``` + +--- + +##### `log_event` + +```python +log_event(self, name: str, attributes: dict[str, typing.Any] | None = None) -> None +``` + +--- + +##### `set_attribute` + +```python +set_attribute(self, key: str, value: Any, *, schema: bool = True, raw: bool = False) -> None +``` + +--- + +##### `set_attributes` + +```python +set_attributes(self, attributes: dict[str, typing.Any]) -> None +``` + +--- + +##### `set_exception` + +```python +set_exception(self, exception: BaseException, *, attributes: dict[str, typing.Any] | None = None, status: opentelemetry.trace.status.Status | None = None) -> None +``` + +--- + +##### `set_tags` + +```python +set_tags(self, tags: Sequence[str]) -> None +``` + +--- + + +### Class `Span` + +**Inherits from:** `opentelemetry.sdk.trace.ReadableSpan` + +Provides read-only access to span attributes. + +Users should NOT be creating these objects directly. `ReadableSpan`s are created as +a direct result from using the tracing pipeline via the `Tracer`. + +#### Properties + +##### `active` + +**Type:** `\` *(property)* + +Check if the span is currently active (recording). + +*Has: getter* + +--- + +##### `duration` + +**Type:** `\` *(property)* + +Get the duration of the span in seconds. + +*Has: getter* + +--- + +##### `failed` + +**Type:** `\` *(property)* + +Check if the span has failed. + +*Has: getter* + +--- + +##### `is_recording` + +**Type:** `\` *(property)* + +Check if the span is currently recording. + +*Has: getter* + +--- + +##### `label` + +**Type:** `\` *(property)* + +Get the label of the span. + +*Has: getter* + +--- + +##### `span_id` + +**Type:** `\` *(property)* + +*Has: getter* + +--- + +##### `trace_id` + +**Type:** `\` *(property)* + +*Has: getter* + +--- + +#### Methods + +##### `__enter__` + +```python +__enter__(self) -> Self +``` + +--- + +##### `__exit__` + +```python +__exit__(self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: traceback | None) -> None +``` + +--- + +##### `__getattr__` + +```python +__getattr__(self, name: str) -> Any +``` + +--- + +##### `__init__` + +```python +__init__(self, name: str, tracer: opentelemetry.trace.Tracer, *, attributes: dict[str, typing.Any] | None = None, label: str | None = None, type: Literal['run', 'task', 'span', 'run_update', 'run_fragment'] = 'span', tags: Optional[Sequence[str]] = None) -> None +``` + +Initialize self. See help(type(self)) for accurate signature. + +--- + +##### `__repr__` + +```python +__repr__(self) -> str +``` + +Return repr(self). + +--- + +##### `__str__` + +```python +__str__(self) -> str +``` + +Return str(self). + +--- + +##### `add_tags` + +```python +add_tags(self, tags: Sequence[str]) -> None +``` + +--- + +##### `get_attribute` + +```python +get_attribute(self, key: str, default: Any) -> Any +``` + +--- + +##### `get_attributes` + +```python +get_attributes(self) -> dict[str, typing.Any] +``` + +--- + +##### `log_event` + +```python +log_event(self, name: str, attributes: dict[str, typing.Any] | None = None) -> None +``` + +--- + +##### `set_attribute` + +```python +set_attribute(self, key: str, value: Any, *, schema: bool = True, raw: bool = False) -> None +``` + +--- + +##### `set_attributes` + +```python +set_attributes(self, attributes: dict[str, typing.Any]) -> None +``` + +--- + +##### `set_exception` + +```python +set_exception(self, exception: BaseException, *, attributes: dict[str, typing.Any] | None = None, status: opentelemetry.trace.status.Status | None = None) -> None +``` + +--- + +##### `set_tags` + +```python +set_tags(self, tags: Sequence[str]) -> None +``` + +--- + + +### Class `TaskSpan` + +**Inherits from:** `dreadnode.tracing.span.Span`, `typing.Generic` + +Provides read-only access to span attributes. + +Users should NOT be creating these objects directly. `ReadableSpan`s are created as +a direct result from using the tracing pipeline via the `Tracer`. + +#### Properties + +##### `active` + +**Type:** `\` *(property)* + +Check if the span is currently active (recording). + +*Has: getter* + +--- + +##### `all_tasks` + +**Type:** `list[dreadnode.tracing.span.TaskSpan[Any]]` *(property)* + +Get all tasks, including subtasks. + +*Has: getter* + +--- + +##### `duration` + +**Type:** `\` *(property)* + +Get the duration of the span in seconds. + +*Has: getter* + +--- + +##### `failed` + +**Type:** `\` *(property)* + +Check if the span has failed. + +*Has: getter* + +--- + +##### `inputs` + +**Type:** `dict[str, Any]` *(property)* + +*Has: getter* + +--- + +##### `is_recording` + +**Type:** `\` *(property)* + +Check if the span is currently recording. + +*Has: getter* + +--- + +##### `label` + +**Type:** `\` *(property)* + +Get the label of the span. + +*Has: getter* + +--- + +##### `metrics` + +**Type:** `dict[str, list[dreadnode.metric.Metric]]` *(property)* + +*Has: getter* + +--- + +##### `output` + +**Type:** `~R` *(property)* + +*Has: getter, setter* + +--- + +##### `outputs` + +**Type:** `dict[str, Any]` *(property)* + +*Has: getter* + +--- + +##### `parent_task` + +**Type:** `Optional[dreadnode.tracing.span.TaskSpan[Any]]` *(property)* + +Get the parent task if it exists. + +*Has: getter* + +--- + +##### `parent_task_id` + +**Type:** `\` *(property)* + +Get the parent task ID if it exists (may be empty). + +*Has: getter* + +--- + +##### `run` + +**Type:** `\` *(property)* + +Get the run this task is associated with. + +*Has: getter* + +--- + +##### `run_id` + +**Type:** `\` *(property)* + +Get the run id this task is associated with (may be empty). + +*Has: getter* + +--- + +##### `span_id` + +**Type:** `\` *(property)* + +*Has: getter* + +--- + +##### `tasks` + +**Type:** `list[dreadnode.tracing.span.TaskSpan[Any]]` *(property)* + +Get the list of children tasks. + +*Has: getter* + +--- + +##### `trace_id` + +**Type:** `\` *(property)* + +*Has: getter* + +--- + +#### Methods + +##### `__enter__` + +```python +__enter__(self) -> Self +``` + +--- + +##### `__exit__` + +```python +__exit__(self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: traceback | None) -> None +``` + +--- + +##### `__getattr__` + +```python +__getattr__(self, name: str) -> Any +``` + +--- + +##### `__init__` + +```python +__init__(self, name: str, run_id: str, tracer: opentelemetry.trace.Tracer, *, attributes: dict[str, typing.Any] | None = None, label: str | None = None, metrics: dict[str, list[dreadnode.metric.Metric]] | None = None, tags: Optional[Sequence[str]] = None) -> None +``` + +Initialize self. See help(type(self)) for accurate signature. + +--- + +##### `__repr__` + +```python +__repr__(self) -> str +``` + +Return repr(self). + +--- + +##### `__str__` + +```python +__str__(self) -> str +``` + +Return str(self). + +--- + +##### `add_tags` + +```python +add_tags(self, tags: Sequence[str]) -> None +``` + +--- + +##### `get_attribute` + +```python +get_attribute(self, key: str, default: Any) -> Any +``` + +--- + +##### `get_attributes` + +```python +get_attributes(self) -> dict[str, typing.Any] +``` + +--- + +##### `get_average_metric_value` + +```python +get_average_metric_value(self, key: str | None = None) -> float +``` + +--- + +##### `log_event` + +```python +log_event(self, name: str, attributes: dict[str, typing.Any] | None = None) -> None +``` + +--- + +##### `log_input` + +```python +log_input(self, name: str, value: Any, *, label: str | None = None, attributes: dict[str, typing.Any] | None = None) -> str +``` + +--- + +##### `log_metric` + +```python +log_metric(self, name: str, value: float | bool | dreadnode.metric.Metric, *, step: int = 0, origin: typing.Any | None = None, timestamp: datetime.datetime | None = None, mode: Optional[Literal['avg', 'sum', 'min', 'max', 'count']] = None, attributes: dict[str, typing.Union[int, float, str, bool, NoneType, list['JsonValue'], tuple['JsonValue', ...], ForwardRef('JsonDict')]] | None = None) -> dreadnode.metric.Metric +``` + +--- + +##### `log_output` + +```python +log_output(self, name: str, value: Any, *, label: str | None = None, attributes: dict[str, typing.Any] | None = None) -> str +``` + +--- + +##### `set_attribute` + +```python +set_attribute(self, key: str, value: Any, *, schema: bool = True, raw: bool = False) -> None +``` + +--- + +##### `set_attributes` + +```python +set_attributes(self, attributes: dict[str, typing.Any]) -> None +``` + +--- + +##### `set_exception` + +```python +set_exception(self, exception: BaseException, *, attributes: dict[str, typing.Any] | None = None, status: opentelemetry.trace.status.Status | None = None) -> None +``` + +--- + +##### `set_tags` + +```python +set_tags(self, tags: Sequence[str]) -> None +``` + +--- + + +## Functions + +### `prepare_otlp_attribute` + +```python +prepare_otlp_attribute(value: Any) -> Union[str, bool, int, float, Sequence[str], Sequence[bool], Sequence[int], Sequence[float]] +``` + +### `prepare_otlp_attributes` + +```python +prepare_otlp_attributes(attributes: dict[str, typing.Any]) -> dict[str, typing.Union[str, bool, int, float, typing.Sequence[str], typing.Sequence[bool], typing.Sequence[int], typing.Sequence[float]]] +``` + + diff --git a/poetry.lock b/poetry.lock index 92aa60ad..13bcc746 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -173,7 +173,7 @@ description = "Timeout context manager for asyncio programs" optional = false python-versions = ">=3.8" groups = ["dev"] -markers = "python_version == \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"}, {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, @@ -1046,6 +1046,23 @@ files = [ {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, ] +[[package]] +name = "docstring-parser" +version = "0.17.0" +description = "Parse Python docstrings in reST, Google and Numpydoc format" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708"}, + {file = "docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912"}, +] + +[package.extras] +dev = ["pre-commit (>=2.16.0) ; python_version >= \"3.9\"", "pydoctor (>=25.4.0)", "pytest"] +docs = ["pydoctor (>=25.4.0)"] +test = ["pytest"] + [[package]] name = "elastic-transport" version = "8.17.1" @@ -1113,7 +1130,7 @@ description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" groups = ["main", "dev"] -markers = "python_version == \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"}, {file = "exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88"}, @@ -3935,7 +3952,7 @@ description = "A lil' TOML parser" optional = false python-versions = ">=3.8" groups = ["main", "dev"] -markers = "python_version == \"3.10\"" +markers = "python_version < \"3.11\"" files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, @@ -4624,4 +4641,4 @@ training = ["transformers"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.14" -content-hash = "21fe5cf29eefa6f77e8bb811529fa19adff4f32d8e64f13432402631c4d3808f" +content-hash = "8663d657b6902842020781e652acc419fc9ccb3b2edb023e863f1d24530bacbb" diff --git a/pyproject.toml b/pyproject.toml index 91df662a..6d6c6638 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ rigging = "^2.3.0" typer = "^0.15.2" datasets = "^3.5.0" pyarrow = "^19.0.1" +docstring-parser = "^0.17.0" [build-system] requires = ["poetry-core>=1.0.0", "setuptools>=42", "wheel"] From b06b56e8b7c2e42a33fb22c15f7967f5e154c4f3 Mon Sep 17 00:00:00 2001 From: Raja Sekhar Rao Dheekonda Date: Mon, 28 Jul 2025 21:49:44 -0700 Subject: [PATCH 3/5] Revert docs --- docs/sdk/docs.json | 4 - docs/sdk/dreadnode/main.mdx | 536 ------------- docs/sdk/dreadnode/tracing/span.mdx | 1113 --------------------------- 3 files changed, 1653 deletions(-) delete mode 100644 docs/sdk/docs.json delete mode 100644 docs/sdk/dreadnode/main.mdx delete mode 100644 docs/sdk/dreadnode/tracing/span.mdx diff --git a/docs/sdk/docs.json b/docs/sdk/docs.json deleted file mode 100644 index 8cb7c821..00000000 --- a/docs/sdk/docs.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "group": "API Reference", - "pages": [] -} \ No newline at end of file diff --git a/docs/sdk/dreadnode/main.mdx b/docs/sdk/dreadnode/main.mdx deleted file mode 100644 index c8e965f4..00000000 --- a/docs/sdk/dreadnode/main.mdx +++ /dev/null @@ -1,536 +0,0 @@ ---- -title: 'main' -sidebarTitle: 'main' -groups: ["strikes"] ---- - -# Module `main` - -*(Full name: `dreadnode.main`)* - -**Source file:** `main.py` - -## Classes - - -### Class `Dreadnode` - -**Inherits from:** `object` - -The core Dreadnode SDK class. - -A default instance of this class is created and can be used directly with `dreadnode.*`. - -Otherwise, you can create your own instance and configure it with `configure()`. - -#### Properties - -##### `is_default` - -**Type:** `\` *(property)* - -*Has: getter* - ---- - -#### Methods - -##### `__eq__` - -```python -__eq__(self, other) -``` - -Return self==value. - ---- - -##### `__init__` - -```python -__init__(self, *, server: str | None = None, token: str | None = None, local_dir: Union[str, pathlib.Path, Literal[False]] = False, project: str | None = None, service_name: str | None = None, service_version: str | None = None, console: Union[logfire._internal.config.ConsoleOptions, Literal[False, True]] = True, send_to_logfire: Union[bool, Literal['if-token-present']] = False, otel_scope: str = 'dreadnode') -> None -``` - -Initialize self. See help(type(self)) for accurate signature. - ---- - -##### `__repr__` - -```python -__repr__(self) -``` - -Return repr(self). - ---- - -##### `api` - -```python -api(self, *, server: str | None = None, token: str | None = None) -> dreadnode.api.client.ApiClient -``` - -Get an API client based on the current configuration or the provided server and token. - -If the server and token are not provided, the method will use the current configuration -and `configure()` needs to be called first. - -**Parameters:** - -- **`server`**: The server URL to use for the API client. -- **`token`**: The API token to use for authentication. - -**Returns:** An ApiClient instance. - ---- - -##### `configure` - -```python -configure(self, *, server: str | None = None, token: str | None = None, local_dir: Union[str, pathlib.Path, Literal[False]] = False, project: str | None = None, service_name: str | None = None, service_version: str | None = None, console: Union[logfire._internal.config.ConsoleOptions, Literal[False, True]] = True, send_to_logfire: Union[bool, Literal['if-token-present']] = False, otel_scope: str = 'dreadnode') -> None -``` - -Configure the Dreadnode SDK and call `initialize()`. - -This method should always be called before using the SDK. - -If `server` and `token` are not provided, the SDK will look in -the associated environment variables: - -- `DREADNODE_SERVER_URL` or `DREADNODE_SERVER` -- `DREADNODE_API_TOKEN` or `DREADNODE_API_KEY` - -**Parameters:** - -- **`server`**: The Dreadnode server URL. -- **`token`**: The Dreadnode API token. -- **`local_dir`**: The local directory to store data in. -- **`project`**: The default project name to associate all runs with. -- **`service_name`**: The service name to use for OpenTelemetry. -- **`service_version`**: The service version to use for OpenTelemetry. -- **`console`**: Whether to log span information to the console. -- **`send_to_logfire`**: Whether to send data to Logfire. -- **`otel_scope`**: The OpenTelemetry scope name. - ---- - -##### `continue_run` - -```python -continue_run(self, run_context: dreadnode.tracing.span.RunContext) -> dreadnode.tracing.span.RunSpan -``` - -Continue a run from captured context on a remote host. - -**Parameters:** - -- **`run_context`**: The RunContext captured from get_run_context(). - -**Returns:** A RunSpan object that can be used as a context manager. - ---- - -##### `get_run_context` - -```python -get_run_context(self) -> dreadnode.tracing.span.RunContext -``` - -Capture the current run context for transfer to another host, thread, or process. - -Use `continue_run()` to continue the run anywhere else. - -**Returns:** RunContext containing run state and trace propagation headers. - -**Raises:** - -- `RuntimeError` — If called outside of an active run. - ---- - -##### `initialize` - -```python -initialize(self) -> None -``` - -Initialize the Dreadnode SDK. - -This method is called automatically when you call `configure()`. - ---- - -##### `link_objects` - -```python -link_objects(self, origin: Any, link: Any, attributes: dict[str, typing.Any] | None = None) -> None -``` - -Associate two runtime objects with each other. - -This is useful for linking any two objects which are related to -each other, such as a model and its training data, or an input -prompt and the resulting output. - -**Parameters:** - -- **`origin`**: The origin object to link from. -- **`link`**: The linked object to link to. -- **`attributes`**: Additional attributes to attach to the link. - ---- - -##### `log_artifact` - -```python -log_artifact(self, local_uri: str | pathlib.Path) -> None -``` - -Log a file or directory artifact to the current run. - -This method uploads a local file or directory to the artifact storage associated with the run. - -**Parameters:** - -- **`local_uri`**: The local path to the file to upload. - ---- - -##### `log_input` - -```python -log_input(self, name: str, value: Any, *, label: str | None = None, to: Literal['task-or-run', 'run'] = 'task-or-run', attributes: dict[str, typing.Any] | None = None) -> None -``` - -Log a single input to the current task or run. - -Inputs can be any runtime object, which are serialized, stored, and tracked -in the Dreadnode UI. - ---- - -##### `log_inputs` - -```python -log_inputs(self, to: Literal['task-or-run', 'run'] = 'task-or-run', **inputs: Any) -> None -``` - -Log multiple inputs to the current task or run. - -See `log_input()` for more details. - ---- - -##### `log_metric` - -```python -log_metric(self, name: str, value: float | bool | dreadnode.metric.Metric, *, step: int = 0, origin: typing.Any | None = None, timestamp: datetime.datetime | None = None, mode: Optional[Literal['avg', 'sum', 'min', 'max', 'count']] = None, attributes: dict[str, typing.Any] | None = None, to: Literal['task-or-run', 'run'] = 'task-or-run') -> dreadnode.metric.Metric -``` - -Log a single metric to the current task or run. - -Metrics are some measurement or recorded value related to the task or run. -They can be used to track performance, resource usage, or other quantitative data. - -**Parameters:** - -- **`name`**: The name of the metric. -- **`value`**: The value of the metric, either as a raw float/bool or a Metric object. -- **`step`**: The step of the metric. -- **`origin`**: The origin of the metric - can be provided any object which was logged -as an input or output anywhere in the run. -- **`timestamp`**: The timestamp of the metric - defaults to the current time. -- **`mode`**: The aggregation mode to use for the metric. Helpful when you want to let -the library take care of translating your raw values into better representations. -- direct: do not modify the value at all (default) -- min: the lowest observed value reported for this metric -- max: the highest observed value reported for this metric -- avg: the average of all reported values for this metric -- sum: the cumulative sum of all reported values for this metric -- count: increment every time this metric is logged - disregard value -- **`attributes`**: A dictionary of additional attributes to attach to the metric. -- **`to`**: The target object to log the metric to. Can be "task-or-run" or "run". -Defaults to "task-or-run". If "task-or-run", the metric will be logged -to the current task or run, whichever is the nearest ancestor. - -**Returns:** The logged metric object. - ---- - -##### `log_metrics` - -```python -log_metrics(self, metrics: dict[str, float | bool] | list[dreadnode.metric.MetricDict], *, step: int = 0, timestamp: datetime.datetime | None = None, mode: Optional[Literal['avg', 'sum', 'min', 'max', 'count']] = None, attributes: dict[str, typing.Any] | None = None, to: Literal['task-or-run', 'run'] = 'task-or-run') -> list[dreadnode.metric.Metric] -``` - -Log multiple metrics to the current task or run. - -**Parameters:** - -- **`metrics`**: Either a dictionary of name/value pairs or a list of MetricDicts to log. -- **`step`**: Default step value for metrics if not supplied. -- **`timestamp`**: Default timestamp for metrics if not supplied. -- **`mode`**: Default aggregation mode for metrics if not supplied. -- **`attributes`**: Default attributes for metrics if not supplied. -- **`to`**: The target object to log metrics to. Can be "task-or-run" or "run". -Defaults to "task-or-run". If "task-or-run", the metrics will be logged -to the current task or run, whichever is the nearest ancestor. - -**Returns:** List of logged Metric objects. - ---- - -##### `log_output` - -```python -log_output(self, name: str, value: Any, *, label: str | None = None, to: Literal['task-or-run', 'run'] = 'task-or-run', attributes: dict[str, typing.Any] | None = None) -> None -``` - -Log a single output to the current task or run. - -Outputs can be any runtime object, which are serialized, stored, and tracked -in the Dreadnode UI. - -**Parameters:** - -- **`name`**: The name of the output. -- **`value`**: The value of the output. -- **`label`**: An optional label for the output, useful for filtering in the UI. -- **`to`**: The target object to log the output to. Can be "task-or-run" or "run". -Defaults to "task-or-run". If "task-or-run", the output will be logged -to the current task or run, whichever is the nearest ancestor. -- **`attributes`**: Additional attributes to attach to the output. - ---- - -##### `log_outputs` - -```python -log_outputs(self, to: Literal['task-or-run', 'run'] = 'task-or-run', **outputs: Union[int, float, str, bool, NoneType, list['JsonValue'], tuple['JsonValue', ...], ForwardRef('JsonDict')]) -> None -``` - -Log multiple outputs to the current task or run. - -See `log_output()` for more details. - ---- - -##### `log_param` - -```python -log_param(self, key: str, value: Union[int, float, str, bool, NoneType, list['JsonValue'], tuple['JsonValue', ...], ForwardRef('JsonDict')]) -> None -``` - -Log a single parameter to the current task or run. - -Parameters are key-value pairs that are associated with the task or run -and can be used to track configuration values, hyperparameters, or other -metadata. - -**Parameters:** - -- **`key`**: The name of the parameter. -- **`value`**: The value of the parameter. - ---- - -##### `log_params` - -```python -log_params(self, **params: Union[int, float, str, bool, NoneType, list['JsonValue'], tuple['JsonValue', ...], ForwardRef('JsonDict')]) -> None -``` - -Log multiple parameters to the current task or run. - -Parameters are key-value pairs that are associated with the task or run -and can be used to track configuration values, hyperparameters, or other -metadata. - -**Parameters:** - -- **`**params`**: The parameters to log. Each parameter is a key-value pair. - ---- - -##### `push_update` - -```python -push_update(self) -> None -``` - -Push any pending run data to the server before run completion. - -This is useful for ensuring that the UI is up to date with the -latest data. Data is automatically pushed periodically, but -you can call this method to force a push. - ---- - -##### `run` - -```python -run(self, name: str | None = None, *, tags: Optional[Sequence[str]] = None, params: dict[str, typing.Any] | None = None, project: str | None = None, autolog: bool = True, attributes: dict[str, typing.Any] | None = None) -> dreadnode.tracing.span.RunSpan -``` - -Create a new run. - -Runs are the main way to track work in Dreadnode. They are -associated with a specific project and can have parameters, -inputs, and outputs logged to them. - -You cannot create runs inside other runs. - -**Parameters:** - -- **`name`**: The name of the run. If not provided, a random name will be generated. -- **`tags`**: A list of tags to attach to the run. -- **`params`**: A dictionary of parameters to attach to the run. -- **`project`**: The project name to associate the run with. If not provided, -the project passed to `configure()` will be used, or the -run will be associated with a default project. -- **`autolog`**: Whether to automatically log task inputs, outputs, and execution metrics if otherwise unspecified. -- **`attributes`**: Additional attributes to attach to the run span. - -**Returns:** A RunSpan object that can be used as a context manager. -The run will automatically be completed when the context manager exits. - ---- - -##### `scorer` - -```python -scorer(self, *, name: str | None = None, tags: Optional[Sequence[str]] = None, attributes: dict[str, typing.Any] | None = None) -> Callable[[Union[Callable[[~T], Awaitable[float | int | bool | dreadnode.metric.Metric]], Callable[[~T], float | int | bool | dreadnode.metric.Metric]]], dreadnode.metric.Scorer[~T]] -``` - -Make a scorer from a callable function. - -This is useful when you want to change the name of the scorer -or add additional attributes to it. - -**Parameters:** - -- **`name`**: The name of the scorer. -- **`tags`**: A list of tags to attach to the scorer. -- **`attributes`**: A dictionary of attributes to attach to the scorer. - -**Returns:** A new Scorer object. - ---- - -##### `shutdown` - -```python -shutdown(self) -> None -``` - -Shutdown any associate OpenTelemetry components and flush any pending spans. - -It is not required to call this method, as the SDK will automatically -flush and shutdown when the process exits. - -However, if you want to ensure that all spans are flushed before -exiting, you can call this method manually. - ---- - -##### `span` - -```python -span(self, name: str, *, tags: Optional[Sequence[str]] = None, attributes: dict[str, typing.Any] | None = None) -> dreadnode.tracing.span.Span -``` - -Create a new OpenTelemety span. - -Spans are more lightweight than tasks, but still let you track -work being performed and view it in the UI. You cannot -log parameters, inputs, or outputs to spans. - -**Parameters:** - -- **`name`**: The name of the span. -- **`tags`**: A list of tags to attach to the span. -- **`attributes`**: A dictionary of attributes to attach to the span. - -**Returns:** A Span object. - ---- - -##### `tag` - -```python -tag(self, *tag: str, to: Literal['task-or-run', 'run'] = 'task-or-run') -> None -``` - -Add one or many tags to the current task or run. - -**Parameters:** - -- **`tag`**: The tag to attach to the task or run. -- **`to`**: The target object to log the tag to. Can be "task-or-run" or "run". -Defaults to "task-or-run". If "task-or-run", the tag will be logged -to the current task or run, whichever is the nearest ancestor. - ---- - -##### `task` - -```python -task(self, *, scorers: Optional[Sequence[Union[dreadnode.metric.Scorer[Any], Callable[[Any], Awaitable[float | int | bool | dreadnode.metric.Metric]], Callable[[Any], float | int | bool | dreadnode.metric.Metric]]]] = None, name: str | None = None, label: str | None = None, log_inputs: Union[Sequence[str], bool, dreadnode.types.Inherited] = Inherited, log_output: bool | dreadnode.types.Inherited = Inherited, log_execution_metrics: bool = False, tags: Optional[Sequence[str]] = None, attributes: dict[str, typing.Any] | None = None) -> dreadnode.main.TaskDecorator -``` - -Create a new task from a function. - -**Parameters:** - -- **`scorers`**: A list of scorers to attach to the task. These will be called after every execution -of the task and will be passed the task's output. -- **`name`**: The name of the task. -- **`label`**: The label of the task - useful for filtering in the UI. -- **`log_inputs`**: Log all, or specific, incoming arguments to the function as inputs. -- **`log_output`**: Log the result of the function as an output. -- **`log_execution_metrics`**: Log execution metrics for the task, such as success rate and run count. -- **`tags`**: A list of tags to attach to the task span. -- **`attributes`**: A dictionary of attributes to attach to the task span. - -**Returns:** A new Task object. - ---- - -##### `task_span` - -```python -task_span(self, name: str, *, label: str | None = None, tags: Optional[Sequence[str]] = None, attributes: dict[str, typing.Any] | None = None) -> dreadnode.tracing.span.TaskSpan[typing.Any] -``` - -Create a task span without an explicit associated function. - -This is useful for creating tasks on the fly without having to -define a function. - -**Parameters:** - -- **`name`**: The name of the task. -- **`label`**: The label of the task - useful for filtering in the UI. -- **`tags`**: A list of tags to attach to the task span. -- **`attributes`**: A dictionary of attributes to attach to the task span. - -**Returns:** A TaskSpan object. - ---- - - -### Class `DreadnodeConfigWarning` - -**Inherits from:** `UserWarning` - -Base class for warnings generated by user code. - - -### Class `DreadnodeUsageWarning` - -**Inherits from:** `UserWarning` - -Base class for warnings generated by user code. - - diff --git a/docs/sdk/dreadnode/tracing/span.mdx b/docs/sdk/dreadnode/tracing/span.mdx deleted file mode 100644 index 52e3ddd2..00000000 --- a/docs/sdk/dreadnode/tracing/span.mdx +++ /dev/null @@ -1,1113 +0,0 @@ ---- -title: 'span' -sidebarTitle: 'span' -groups: ["strikes"] ---- - -# Module `span` - -*(Full name: `dreadnode.tracing.span`)* - -**Source file:** `span.py` - -## Classes - - -### Class `RunContext` - -**Inherits from:** `dict` - -Context for transferring and continuing runs in other places. - - -### Class `RunSpan` - -**Inherits from:** `dreadnode.tracing.span.Span` - -Provides read-only access to span attributes. - -Users should NOT be creating these objects directly. `ReadableSpan`s are created as -a direct result from using the tracing pipeline via the `Tracer`. - -#### Properties - -##### `active` - -**Type:** `\` *(property)* - -Check if the span is currently active (recording). - -*Has: getter* - ---- - -##### `all_tasks` - -**Type:** `list[dreadnode.tracing.span.TaskSpan[Any]]` *(property)* - -Get all tasks, including subtasks. - -*Has: getter* - ---- - -##### `duration` - -**Type:** `\` *(property)* - -Get the duration of the span in seconds. - -*Has: getter* - ---- - -##### `failed` - -**Type:** `\` *(property)* - -Check if the span has failed. - -*Has: getter* - ---- - -##### `inputs` - -**Type:** `dict[str, Any]` *(property)* - -*Has: getter* - ---- - -##### `is_recording` - -**Type:** `\` *(property)* - -Check if the span is currently recording. - -*Has: getter* - ---- - -##### `label` - -**Type:** `\` *(property)* - -Get the label of the span. - -*Has: getter* - ---- - -##### `metrics` - -**Type:** `dict[str, list[dreadnode.metric.Metric]]` *(property)* - -*Has: getter* - ---- - -##### `outputs` - -**Type:** `dict[str, Any]` *(property)* - -*Has: getter* - ---- - -##### `params` - -**Type:** `dict[str, Any]` *(property)* - -*Has: getter* - ---- - -##### `run_id` - -**Type:** `\` *(property)* - -*Has: getter* - ---- - -##### `span_id` - -**Type:** `\` *(property)* - -*Has: getter* - ---- - -##### `tasks` - -**Type:** `list[dreadnode.tracing.span.TaskSpan[Any]]` *(property)* - -*Has: getter* - ---- - -##### `trace_id` - -**Type:** `\` *(property)* - -*Has: getter* - ---- - -#### Methods - -##### `__enter__` - -```python -__enter__(self) -> Self -``` - ---- - -##### `__exit__` - -```python -__exit__(self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: traceback | None) -> None -``` - ---- - -##### `__getattr__` - -```python -__getattr__(self, name: str) -> Any -``` - ---- - -##### `__init__` - -```python -__init__(self, name: str, project: str, tracer: opentelemetry.trace.Tracer, file_system: fsspec.spec.AbstractFileSystem, prefix_path: str, *, attributes: dict[str, typing.Any] | None = None, params: dict[str, typing.Any] | None = None, metrics: dict[str, list[dreadnode.metric.Metric]] | None = None, tags: Optional[Sequence[str]] = None, autolog: bool = True, update_frequency: int = 5, run_id: str | ulid.ULID | None = None, type: Literal['run', 'task', 'span', 'run_update', 'run_fragment'] = 'run', credential_refresher: Optional[Callable[[], bool]] = None) -> None -``` - -Initialize self. See help(type(self)) for accurate signature. - ---- - -##### `__repr__` - -```python -__repr__(self) -> str -``` - -Return repr(self). - ---- - -##### `__str__` - -```python -__str__(self) -> str -``` - -Return str(self). - ---- - -##### `add_tags` - -```python -add_tags(self, tags: Sequence[str]) -> None -``` - ---- - -##### `get_attribute` - -```python -get_attribute(self, key: str, default: Any) -> Any -``` - ---- - -##### `get_attributes` - -```python -get_attributes(self) -> dict[str, typing.Any] -``` - ---- - -##### `get_object` - -```python -get_object(self, hash_: str) -> Any -``` - ---- - -##### `link_objects` - -```python -link_objects(self, object_hash: str, link_hash: str, attributes: dict[str, typing.Any] | None = None) -> None -``` - ---- - -##### `log_artifact` - -```python -log_artifact(self, local_uri: str | pathlib.Path) -> None -``` - -Logs a local file or directory as an artifact to the object store. - -Preserves directory structure and uses content hashing for deduplication. - -**Parameters:** - -- **`local_uri`**: Path to the local file or directory - -**Returns:** DirectoryNode representing the artifact's tree structure - -**Raises:** - -- `FileNotFoundError` — If the path doesn't exist - ---- - -##### `log_event` - -```python -log_event(self, name: str, attributes: dict[str, typing.Any] | None = None) -> None -``` - ---- - -##### `log_input` - -```python -log_input(self, name: str, value: Any, *, label: str | None = None, attributes: dict[str, typing.Any] | None = None) -> None -``` - ---- - -##### `log_metric` - -```python -log_metric(self, name: str, value: float | bool | dreadnode.metric.Metric, *, step: int = 0, origin: typing.Any | None = None, timestamp: datetime.datetime | None = None, mode: Optional[Literal['avg', 'sum', 'min', 'max', 'count']] = None, prefix: str | None = None, attributes: dict[str, typing.Union[int, float, str, bool, NoneType, list['JsonValue'], tuple['JsonValue', ...], ForwardRef('JsonDict')]] | None = None) -> dreadnode.metric.Metric -``` - ---- - -##### `log_object` - -```python -log_object(self, value: Any, *, label: str | None = None, event_name: str = 'dreadnode.object', attributes: dict[str, typing.Any] | None = None) -> str -``` - ---- - -##### `log_output` - -```python -log_output(self, name: str, value: Any, *, label: str | None = None, attributes: dict[str, typing.Any] | None = None) -> None -``` - ---- - -##### `log_param` - -```python -log_param(self, key: str, value: Any) -> None -``` - ---- - -##### `log_params` - -```python -log_params(self, **params: Any) -> None -``` - ---- - -##### `push_update` - -```python -push_update(self, *, force: bool = False) -> None -``` - ---- - -##### `set_attribute` - -```python -set_attribute(self, key: str, value: Any, *, schema: bool = True, raw: bool = False) -> None -``` - ---- - -##### `set_attributes` - -```python -set_attributes(self, attributes: dict[str, typing.Any]) -> None -``` - ---- - -##### `set_exception` - -```python -set_exception(self, exception: BaseException, *, attributes: dict[str, typing.Any] | None = None, status: opentelemetry.trace.status.Status | None = None) -> None -``` - ---- - -##### `set_tags` - -```python -set_tags(self, tags: Sequence[str]) -> None -``` - ---- - -##### `to_graph` - -```python -to_graph(self) -> 'nx.DiGraph' -``` - ---- - - -### Class `RunUpdateSpan` - -**Inherits from:** `dreadnode.tracing.span.Span` - -Provides read-only access to span attributes. - -Users should NOT be creating these objects directly. `ReadableSpan`s are created as -a direct result from using the tracing pipeline via the `Tracer`. - -#### Properties - -##### `active` - -**Type:** `\` *(property)* - -Check if the span is currently active (recording). - -*Has: getter* - ---- - -##### `duration` - -**Type:** `\` *(property)* - -Get the duration of the span in seconds. - -*Has: getter* - ---- - -##### `failed` - -**Type:** `\` *(property)* - -Check if the span has failed. - -*Has: getter* - ---- - -##### `is_recording` - -**Type:** `\` *(property)* - -Check if the span is currently recording. - -*Has: getter* - ---- - -##### `label` - -**Type:** `\` *(property)* - -Get the label of the span. - -*Has: getter* - ---- - -##### `span_id` - -**Type:** `\` *(property)* - -*Has: getter* - ---- - -##### `trace_id` - -**Type:** `\` *(property)* - -*Has: getter* - ---- - -#### Methods - -##### `__enter__` - -```python -__enter__(self) -> Self -``` - ---- - -##### `__exit__` - -```python -__exit__(self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: traceback | None) -> None -``` - ---- - -##### `__getattr__` - -```python -__getattr__(self, name: str) -> Any -``` - ---- - -##### `__init__` - -```python -__init__(self, run_id: str, tracer: opentelemetry.trace.Tracer, project: str, *, metrics: dict[str, list[dreadnode.metric.Metric]] | None = None, params: dict[str, typing.Union[int, float, str, bool, NoneType, list['JsonValue'], tuple['JsonValue', ...], ForwardRef('JsonDict')]] | None = None, inputs: list[dreadnode.object.ObjectRef] | None = None, outputs: list[dreadnode.object.ObjectRef] | None = None, objects: dict[str, dreadnode.object.ObjectUri | dreadnode.object.ObjectVal] | None = None, object_schemas: dict[str, dict[str, typing.Union[int, float, str, bool, NoneType, list['JsonValue'], tuple['JsonValue', ...], ForwardRef('JsonDict')]]] | None = None) -> None -``` - -Initialize self. See help(type(self)) for accurate signature. - ---- - -##### `__repr__` - -```python -__repr__(self) -> str -``` - -Return repr(self). - ---- - -##### `__str__` - -```python -__str__(self) -> str -``` - -Return str(self). - ---- - -##### `add_tags` - -```python -add_tags(self, tags: Sequence[str]) -> None -``` - ---- - -##### `get_attribute` - -```python -get_attribute(self, key: str, default: Any) -> Any -``` - ---- - -##### `get_attributes` - -```python -get_attributes(self) -> dict[str, typing.Any] -``` - ---- - -##### `log_event` - -```python -log_event(self, name: str, attributes: dict[str, typing.Any] | None = None) -> None -``` - ---- - -##### `set_attribute` - -```python -set_attribute(self, key: str, value: Any, *, schema: bool = True, raw: bool = False) -> None -``` - ---- - -##### `set_attributes` - -```python -set_attributes(self, attributes: dict[str, typing.Any]) -> None -``` - ---- - -##### `set_exception` - -```python -set_exception(self, exception: BaseException, *, attributes: dict[str, typing.Any] | None = None, status: opentelemetry.trace.status.Status | None = None) -> None -``` - ---- - -##### `set_tags` - -```python -set_tags(self, tags: Sequence[str]) -> None -``` - ---- - - -### Class `Span` - -**Inherits from:** `opentelemetry.sdk.trace.ReadableSpan` - -Provides read-only access to span attributes. - -Users should NOT be creating these objects directly. `ReadableSpan`s are created as -a direct result from using the tracing pipeline via the `Tracer`. - -#### Properties - -##### `active` - -**Type:** `\` *(property)* - -Check if the span is currently active (recording). - -*Has: getter* - ---- - -##### `duration` - -**Type:** `\` *(property)* - -Get the duration of the span in seconds. - -*Has: getter* - ---- - -##### `failed` - -**Type:** `\` *(property)* - -Check if the span has failed. - -*Has: getter* - ---- - -##### `is_recording` - -**Type:** `\` *(property)* - -Check if the span is currently recording. - -*Has: getter* - ---- - -##### `label` - -**Type:** `\` *(property)* - -Get the label of the span. - -*Has: getter* - ---- - -##### `span_id` - -**Type:** `\` *(property)* - -*Has: getter* - ---- - -##### `trace_id` - -**Type:** `\` *(property)* - -*Has: getter* - ---- - -#### Methods - -##### `__enter__` - -```python -__enter__(self) -> Self -``` - ---- - -##### `__exit__` - -```python -__exit__(self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: traceback | None) -> None -``` - ---- - -##### `__getattr__` - -```python -__getattr__(self, name: str) -> Any -``` - ---- - -##### `__init__` - -```python -__init__(self, name: str, tracer: opentelemetry.trace.Tracer, *, attributes: dict[str, typing.Any] | None = None, label: str | None = None, type: Literal['run', 'task', 'span', 'run_update', 'run_fragment'] = 'span', tags: Optional[Sequence[str]] = None) -> None -``` - -Initialize self. See help(type(self)) for accurate signature. - ---- - -##### `__repr__` - -```python -__repr__(self) -> str -``` - -Return repr(self). - ---- - -##### `__str__` - -```python -__str__(self) -> str -``` - -Return str(self). - ---- - -##### `add_tags` - -```python -add_tags(self, tags: Sequence[str]) -> None -``` - ---- - -##### `get_attribute` - -```python -get_attribute(self, key: str, default: Any) -> Any -``` - ---- - -##### `get_attributes` - -```python -get_attributes(self) -> dict[str, typing.Any] -``` - ---- - -##### `log_event` - -```python -log_event(self, name: str, attributes: dict[str, typing.Any] | None = None) -> None -``` - ---- - -##### `set_attribute` - -```python -set_attribute(self, key: str, value: Any, *, schema: bool = True, raw: bool = False) -> None -``` - ---- - -##### `set_attributes` - -```python -set_attributes(self, attributes: dict[str, typing.Any]) -> None -``` - ---- - -##### `set_exception` - -```python -set_exception(self, exception: BaseException, *, attributes: dict[str, typing.Any] | None = None, status: opentelemetry.trace.status.Status | None = None) -> None -``` - ---- - -##### `set_tags` - -```python -set_tags(self, tags: Sequence[str]) -> None -``` - ---- - - -### Class `TaskSpan` - -**Inherits from:** `dreadnode.tracing.span.Span`, `typing.Generic` - -Provides read-only access to span attributes. - -Users should NOT be creating these objects directly. `ReadableSpan`s are created as -a direct result from using the tracing pipeline via the `Tracer`. - -#### Properties - -##### `active` - -**Type:** `\` *(property)* - -Check if the span is currently active (recording). - -*Has: getter* - ---- - -##### `all_tasks` - -**Type:** `list[dreadnode.tracing.span.TaskSpan[Any]]` *(property)* - -Get all tasks, including subtasks. - -*Has: getter* - ---- - -##### `duration` - -**Type:** `\` *(property)* - -Get the duration of the span in seconds. - -*Has: getter* - ---- - -##### `failed` - -**Type:** `\` *(property)* - -Check if the span has failed. - -*Has: getter* - ---- - -##### `inputs` - -**Type:** `dict[str, Any]` *(property)* - -*Has: getter* - ---- - -##### `is_recording` - -**Type:** `\` *(property)* - -Check if the span is currently recording. - -*Has: getter* - ---- - -##### `label` - -**Type:** `\` *(property)* - -Get the label of the span. - -*Has: getter* - ---- - -##### `metrics` - -**Type:** `dict[str, list[dreadnode.metric.Metric]]` *(property)* - -*Has: getter* - ---- - -##### `output` - -**Type:** `~R` *(property)* - -*Has: getter, setter* - ---- - -##### `outputs` - -**Type:** `dict[str, Any]` *(property)* - -*Has: getter* - ---- - -##### `parent_task` - -**Type:** `Optional[dreadnode.tracing.span.TaskSpan[Any]]` *(property)* - -Get the parent task if it exists. - -*Has: getter* - ---- - -##### `parent_task_id` - -**Type:** `\` *(property)* - -Get the parent task ID if it exists (may be empty). - -*Has: getter* - ---- - -##### `run` - -**Type:** `\` *(property)* - -Get the run this task is associated with. - -*Has: getter* - ---- - -##### `run_id` - -**Type:** `\` *(property)* - -Get the run id this task is associated with (may be empty). - -*Has: getter* - ---- - -##### `span_id` - -**Type:** `\` *(property)* - -*Has: getter* - ---- - -##### `tasks` - -**Type:** `list[dreadnode.tracing.span.TaskSpan[Any]]` *(property)* - -Get the list of children tasks. - -*Has: getter* - ---- - -##### `trace_id` - -**Type:** `\` *(property)* - -*Has: getter* - ---- - -#### Methods - -##### `__enter__` - -```python -__enter__(self) -> Self -``` - ---- - -##### `__exit__` - -```python -__exit__(self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: traceback | None) -> None -``` - ---- - -##### `__getattr__` - -```python -__getattr__(self, name: str) -> Any -``` - ---- - -##### `__init__` - -```python -__init__(self, name: str, run_id: str, tracer: opentelemetry.trace.Tracer, *, attributes: dict[str, typing.Any] | None = None, label: str | None = None, metrics: dict[str, list[dreadnode.metric.Metric]] | None = None, tags: Optional[Sequence[str]] = None) -> None -``` - -Initialize self. See help(type(self)) for accurate signature. - ---- - -##### `__repr__` - -```python -__repr__(self) -> str -``` - -Return repr(self). - ---- - -##### `__str__` - -```python -__str__(self) -> str -``` - -Return str(self). - ---- - -##### `add_tags` - -```python -add_tags(self, tags: Sequence[str]) -> None -``` - ---- - -##### `get_attribute` - -```python -get_attribute(self, key: str, default: Any) -> Any -``` - ---- - -##### `get_attributes` - -```python -get_attributes(self) -> dict[str, typing.Any] -``` - ---- - -##### `get_average_metric_value` - -```python -get_average_metric_value(self, key: str | None = None) -> float -``` - ---- - -##### `log_event` - -```python -log_event(self, name: str, attributes: dict[str, typing.Any] | None = None) -> None -``` - ---- - -##### `log_input` - -```python -log_input(self, name: str, value: Any, *, label: str | None = None, attributes: dict[str, typing.Any] | None = None) -> str -``` - ---- - -##### `log_metric` - -```python -log_metric(self, name: str, value: float | bool | dreadnode.metric.Metric, *, step: int = 0, origin: typing.Any | None = None, timestamp: datetime.datetime | None = None, mode: Optional[Literal['avg', 'sum', 'min', 'max', 'count']] = None, attributes: dict[str, typing.Union[int, float, str, bool, NoneType, list['JsonValue'], tuple['JsonValue', ...], ForwardRef('JsonDict')]] | None = None) -> dreadnode.metric.Metric -``` - ---- - -##### `log_output` - -```python -log_output(self, name: str, value: Any, *, label: str | None = None, attributes: dict[str, typing.Any] | None = None) -> str -``` - ---- - -##### `set_attribute` - -```python -set_attribute(self, key: str, value: Any, *, schema: bool = True, raw: bool = False) -> None -``` - ---- - -##### `set_attributes` - -```python -set_attributes(self, attributes: dict[str, typing.Any]) -> None -``` - ---- - -##### `set_exception` - -```python -set_exception(self, exception: BaseException, *, attributes: dict[str, typing.Any] | None = None, status: opentelemetry.trace.status.Status | None = None) -> None -``` - ---- - -##### `set_tags` - -```python -set_tags(self, tags: Sequence[str]) -> None -``` - ---- - - -## Functions - -### `prepare_otlp_attribute` - -```python -prepare_otlp_attribute(value: Any) -> Union[str, bool, int, float, Sequence[str], Sequence[bool], Sequence[int], Sequence[float]] -``` - -### `prepare_otlp_attributes` - -```python -prepare_otlp_attributes(attributes: dict[str, typing.Any]) -> dict[str, typing.Union[str, bool, int, float, typing.Sequence[str], typing.Sequence[bool], typing.Sequence[int], typing.Sequence[float]]] -``` - - From 00c66cab1e9ed6213280474d7a4bf080c9ad8231 Mon Sep 17 00:00:00 2001 From: Raja Sekhar Rao Dheekonda Date: Mon, 28 Jul 2025 22:12:28 -0700 Subject: [PATCH 4/5] Remove manual generate docs --- docs/sdk/dreadnode/storage_utils.mdx | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 docs/sdk/dreadnode/storage_utils.mdx diff --git a/docs/sdk/dreadnode/storage_utils.mdx b/docs/sdk/dreadnode/storage_utils.mdx deleted file mode 100644 index 4c8559f4..00000000 --- a/docs/sdk/dreadnode/storage_utils.mdx +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: 'storage_utils' -sidebarTitle: 'storage_utils' -groups: ["strikes"] ---- - -# Module `storage_utils` - -*(Full name: `dreadnode.storage_utils`)* - -**Source file:** `storage_utils.py` - -## Functions - -### `with_credential_refresh` - -```python -with_credential_refresh(func: Callable[..., Any]) -> Callable[..., Any] -``` - -Decorator that automatically handles credential refresh on storage errors. - - From 5c7f660f8a775fb56bba80e36c4fdd7b89bf82a3 Mon Sep 17 00:00:00 2001 From: Raja Sekhar Rao Dheekonda Date: Mon, 28 Jul 2025 22:31:40 -0700 Subject: [PATCH 5/5] Update docs --- docs/sdk/api.mdx | 21 +- docs/sdk/artifact.mdx | 19 +- docs/sdk/main.mdx | 22 +- docs/sdk/metric.mdx | 4 +- poetry.lock | 311 ++++++++++++++++++- pyproject.toml | 3 + scripts/__init__.py | 0 scripts/make_docs.py | 676 ------------------------------------------ 8 files changed, 365 insertions(+), 691 deletions(-) delete mode 100644 scripts/__init__.py delete mode 100644 scripts/make_docs.py diff --git a/docs/sdk/api.mdx b/docs/sdk/api.mdx index b5017b0f..2d8970ac 100644 --- a/docs/sdk/api.mdx +++ b/docs/sdk/api.mdx @@ -639,11 +639,21 @@ def get_run_trace( ### get\_user\_data\_credentials ```python -get_user_data_credentials() -> UserDataCredentials +get_user_data_credentials( + duration: int = DEFAULT_FS_CREDENTIAL_DURATION, +) -> UserDataCredentials ``` Retrieves user data credentials for secondary storage access. +**Parameters:** + +* **`duration`** + (`int`, default: + `DEFAULT_FS_CREDENTIAL_DURATION` + ) + –Credential lifetime in seconds (default: 4 hours) + **Returns:** * `UserDataCredentials` @@ -651,14 +661,19 @@ Retrieves user data credentials for secondary storage access. ```python -def get_user_data_credentials(self) -> UserDataCredentials: +def get_user_data_credentials( + self, duration: int = DEFAULT_FS_CREDENTIAL_DURATION +) -> UserDataCredentials: """ Retrieves user data credentials for secondary storage access. + Args: + duration: Credential lifetime in seconds (default: 4 hours) + Returns: The user data credentials object. """ - response = self.request("GET", "/user-data/credentials") + response = self._request("GET", "/user-data/credentials", params={"duration": duration}) return UserDataCredentials(**response.json()) ``` diff --git a/docs/sdk/artifact.mdx b/docs/sdk/artifact.mdx index 6e902b22..7c6495f1 100644 --- a/docs/sdk/artifact.mdx +++ b/docs/sdk/artifact.mdx @@ -244,7 +244,10 @@ ArtifactStorage --------------- ```python -ArtifactStorage(file_system: AbstractFileSystem) +ArtifactStorage( + file_system: AbstractFileSystem, + credential_refresher: Callable[[], bool] | None = None, +) ``` Storage for artifacts with efficient handling of large files and directories. @@ -260,17 +263,28 @@ Initialize artifact storage with a file system and prefix path. * **`file_system`** (`AbstractFileSystem`) –FSSpec-compatible file system +* **`credential_refresher`** + (`Callable[[], bool] | None`, default: + `None` + ) + –Optional function to refresh credentials when it's about to expire ```python -def __init__(self, file_system: fsspec.AbstractFileSystem): +def __init__( + self, + file_system: fsspec.AbstractFileSystem, + credential_refresher: t.Callable[[], bool] | None = None, +): """ Initialize artifact storage with a file system and prefix path. Args: file_system: FSSpec-compatible file system + credential_refresher: Optional function to refresh credentials when it's about to expire """ self._file_system = file_system + self._credential_refresher = credential_refresher ``` @@ -464,6 +478,7 @@ Store a file in the storage system, using multipart upload for large files. ```python +@with_credential_refresh def store_file(self, file_path: Path, target_key: str) -> str: """ Store a file in the storage system, using multipart upload for large files. diff --git a/docs/sdk/main.mdx b/docs/sdk/main.mdx index c479c08c..42006bd2 100644 --- a/docs/sdk/main.mdx +++ b/docs/sdk/main.mdx @@ -65,6 +65,9 @@ def __init__( self._fs_prefix: str = ".dreadnode/storage/" self._initialized = False + + self._credentials: UserDataCredentials | None = None + self._credentials_expiry: datetime | None = None ``` @@ -312,6 +315,7 @@ def continue_run(self, run_context: RunContext) -> RunSpan: tracer=self._get_tracer(), file_system=self._fs, prefix_path=self._fs_prefix, + credential_refresher=self._refresh_storage_credentials if self._credentials else None, ) ``` @@ -455,18 +459,21 @@ def initialize(self) -> None: # ) # ) - credentials = self._api.get_user_data_credentials() - resolved_endpoint = self._resolve_endpoint(credentials.endpoint) + self._credentials = self._api.get_user_data_credentials( + duration=DEFAULT_FS_CREDENTIAL_DURATION + ) + self._credentials_expiry = self._credentials.expiration + resolved_endpoint = self._resolve_endpoint(self._credentials.endpoint) self._fs = S3FileSystem( - key=credentials.access_key_id, - secret=credentials.secret_access_key, - token=credentials.session_token, + key=self._credentials.access_key_id, + secret=self._credentials.secret_access_key, + token=self._credentials.session_token, client_kwargs={ "endpoint_url": resolved_endpoint, - "region_name": credentials.region, + "region_name": self._credentials.region, }, ) - self._fs_prefix = f"{credentials.bucket}/{credentials.prefix}/" + self._fs_prefix = f"{self._credentials.bucket}/{self._credentials.prefix}/" self._logfire = logfire.configure( local=not self.is_default, @@ -1654,6 +1661,7 @@ def run( file_system=self._fs, prefix_path=self._fs_prefix, autolog=autolog, + credential_refresher=self._refresh_storage_credentials if self._credentials else None, ) ``` diff --git a/docs/sdk/metric.mdx b/docs/sdk/metric.mdx index 68609cde..368e89c2 100644 --- a/docs/sdk/metric.mdx +++ b/docs/sdk/metric.mdx @@ -31,8 +31,8 @@ Metric Metric( value: float, step: int = 0, - timestamp: datetime = lambda: datetime.now( - timezone.utc + timestamp: datetime = ( + lambda: datetime.now(timezone.utc) )(), attributes: JsonDict = dict(), ) diff --git a/poetry.lock b/poetry.lock index 13bcc746..f5162502 100644 --- a/poetry.lock +++ b/poetry.lock @@ -199,6 +199,29 @@ docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphi tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\""] +[[package]] +name = "beautifulsoup4" +version = "4.13.4" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.7.0" +groups = ["dev"] +files = [ + {file = "beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b"}, + {file = "beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195"}, +] + +[package.dependencies] +soupsieve = ">1.2" +typing-extensions = ">=4.0.0" + +[package.extras] +cchardet = ["cchardet"] +chardet = ["chardet"] +charset-normalizer = ["charset-normalizer"] +html5lib = ["html5lib"] +lxml = ["lxml"] + [[package]] name = "boto3" version = "1.38.14" @@ -1333,6 +1356,24 @@ test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"] tqdm = ["tqdm"] +[[package]] +name = "ghp-import" +version = "2.1.0" +description = "Copy your docs directly to the gh-pages branch." +optional = false +python-versions = "*" +groups = ["dev"] +files = [ + {file = "ghp-import-2.1.0.tar.gz", hash = "sha256:9c535c4c61193c2df8871222567d7fd7e5014d835f97dc7b7439069e2413d343"}, + {file = "ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619"}, +] + +[package.dependencies] +python-dateutil = ">=2.8.1" + +[package.extras] +dev = ["flake8", "markdown", "twine", "wheel"] + [[package]] name = "googleapis-common-protos" version = "1.70.0" @@ -1351,6 +1392,21 @@ protobuf = ">=3.20.2,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4 [package.extras] grpc = ["grpcio (>=1.44.0,<2.0.0)"] +[[package]] +name = "griffe" +version = "1.9.0" +description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "griffe-1.9.0-py3-none-any.whl", hash = "sha256:bcf90ee3ad42bbae70a2a490c782fc8e443de9b84aa089d857c278a4e23215fc"}, + {file = "griffe-1.9.0.tar.gz", hash = "sha256:b5531cf45e9b73f0842c2121cc4d4bcbb98a55475e191fc9830e7aef87a920a0"}, +] + +[package.dependencies] +colorama = ">=0.4" + [[package]] name = "h11" version = "0.16.0" @@ -1874,6 +1930,22 @@ win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} [package.extras] dev = ["Sphinx (==8.1.3) ; python_version >= \"3.11\"", "build (==1.2.2) ; python_version >= \"3.11\"", "colorama (==0.4.5) ; python_version < \"3.8\"", "colorama (==0.4.6) ; python_version >= \"3.8\"", "exceptiongroup (==1.1.3) ; python_version >= \"3.7\" and python_version < \"3.11\"", "freezegun (==1.1.0) ; python_version < \"3.8\"", "freezegun (==1.5.0) ; python_version >= \"3.8\"", "mypy (==v0.910) ; python_version < \"3.6\"", "mypy (==v0.971) ; python_version == \"3.6\"", "mypy (==v1.13.0) ; python_version >= \"3.8\"", "mypy (==v1.4.1) ; python_version == \"3.7\"", "myst-parser (==4.0.0) ; python_version >= \"3.11\"", "pre-commit (==4.0.1) ; python_version >= \"3.9\"", "pytest (==6.1.2) ; python_version < \"3.8\"", "pytest (==8.3.2) ; python_version >= \"3.8\"", "pytest-cov (==2.12.1) ; python_version < \"3.8\"", "pytest-cov (==5.0.0) ; python_version == \"3.8\"", "pytest-cov (==6.0.0) ; python_version >= \"3.9\"", "pytest-mypy-plugins (==1.9.3) ; python_version >= \"3.6\" and python_version < \"3.8\"", "pytest-mypy-plugins (==3.1.0) ; python_version >= \"3.8\"", "sphinx-rtd-theme (==3.0.2) ; python_version >= \"3.11\"", "tox (==3.27.1) ; python_version < \"3.8\"", "tox (==4.23.2) ; python_version >= \"3.8\"", "twine (==6.0.1) ; python_version >= \"3.11\""] +[[package]] +name = "markdown" +version = "3.8.2" +description = "Python implementation of John Gruber's Markdown." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "markdown-3.8.2-py3-none-any.whl", hash = "sha256:5c83764dbd4e00bdd94d85a19b8d55ccca20fe35b2e678a1422b380324dd5f24"}, + {file = "markdown-3.8.2.tar.gz", hash = "sha256:247b9a70dd12e27f67431ce62523e675b866d254f900c4fe75ce3dda62237c45"}, +] + +[package.extras] +docs = ["mdx_gh_links (>=0.2)", "mkdocs (>=1.6)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"] +testing = ["coverage", "pyyaml"] + [[package]] name = "markdown-it-py" version = "3.0.0" @@ -1899,6 +1971,22 @@ profiling = ["gprof2dot"] rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] +[[package]] +name = "markdownify" +version = "1.1.0" +description = "Convert HTML to markdown." +optional = false +python-versions = "*" +groups = ["dev"] +files = [ + {file = "markdownify-1.1.0-py3-none-any.whl", hash = "sha256:32a5a08e9af02c8a6528942224c91b933b4bd2c7d078f9012943776fc313eeef"}, + {file = "markdownify-1.1.0.tar.gz", hash = "sha256:449c0bbbf1401c5112379619524f33b63490a8fa479456d41de9dc9e37560ebd"}, +] + +[package.dependencies] +beautifulsoup4 = ">=4.9,<5" +six = ">=1.15,<2" + [[package]] name = "markupsafe" version = "3.0.2" @@ -1982,6 +2070,126 @@ files = [ {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, ] +[[package]] +name = "mergedeep" +version = "1.3.4" +description = "A deep merge function for 🐍." +optional = false +python-versions = ">=3.6" +groups = ["dev"] +files = [ + {file = "mergedeep-1.3.4-py3-none-any.whl", hash = "sha256:70775750742b25c0d8f36c55aed03d24c3384d17c951b3175d898bd778ef0307"}, + {file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"}, +] + +[[package]] +name = "mkdocs" +version = "1.6.1" +description = "Project documentation with Markdown." +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "mkdocs-1.6.1-py3-none-any.whl", hash = "sha256:db91759624d1647f3f34aa0c3f327dd2601beae39a366d6e064c03468d35c20e"}, + {file = "mkdocs-1.6.1.tar.gz", hash = "sha256:7b432f01d928c084353ab39c57282f29f92136665bdd6abf7c1ec8d822ef86f2"}, +] + +[package.dependencies] +click = ">=7.0" +colorama = {version = ">=0.4", markers = "platform_system == \"Windows\""} +ghp-import = ">=1.0" +jinja2 = ">=2.11.1" +markdown = ">=3.3.6" +markupsafe = ">=2.0.1" +mergedeep = ">=1.3.4" +mkdocs-get-deps = ">=0.2.0" +packaging = ">=20.5" +pathspec = ">=0.11.1" +pyyaml = ">=5.1" +pyyaml-env-tag = ">=0.1" +watchdog = ">=2.0" + +[package.extras] +i18n = ["babel (>=2.9.0)"] +min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4) ; platform_system == \"Windows\"", "ghp-import (==1.0)", "importlib-metadata (==4.4) ; python_version < \"3.10\"", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"] + +[[package]] +name = "mkdocs-autorefs" +version = "1.4.2" +description = "Automatically link across pages in MkDocs." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "mkdocs_autorefs-1.4.2-py3-none-any.whl", hash = "sha256:83d6d777b66ec3c372a1aad4ae0cf77c243ba5bcda5bf0c6b8a2c5e7a3d89f13"}, + {file = "mkdocs_autorefs-1.4.2.tar.gz", hash = "sha256:e2ebe1abd2b67d597ed19378c0fff84d73d1dbce411fce7a7cc6f161888b6749"}, +] + +[package.dependencies] +Markdown = ">=3.3" +markupsafe = ">=2.0.1" +mkdocs = ">=1.1" + +[[package]] +name = "mkdocs-get-deps" +version = "0.2.0" +description = "MkDocs extension that lists all dependencies according to a mkdocs.yml file" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "mkdocs_get_deps-0.2.0-py3-none-any.whl", hash = "sha256:2bf11d0b133e77a0dd036abeeb06dec8775e46efa526dc70667d8863eefc6134"}, + {file = "mkdocs_get_deps-0.2.0.tar.gz", hash = "sha256:162b3d129c7fad9b19abfdcb9c1458a651628e4b1dea628ac68790fb3061c60c"}, +] + +[package.dependencies] +mergedeep = ">=1.3.4" +platformdirs = ">=2.2.0" +pyyaml = ">=5.1" + +[[package]] +name = "mkdocstrings" +version = "0.30.0" +description = "Automatic documentation from sources, for MkDocs." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "mkdocstrings-0.30.0-py3-none-any.whl", hash = "sha256:ae9e4a0d8c1789697ac776f2e034e2ddd71054ae1cf2c2bb1433ccfd07c226f2"}, + {file = "mkdocstrings-0.30.0.tar.gz", hash = "sha256:5d8019b9c31ddacd780b6784ffcdd6f21c408f34c0bd1103b5351d609d5b4444"}, +] + +[package.dependencies] +Jinja2 = ">=2.11.1" +Markdown = ">=3.6" +MarkupSafe = ">=1.1" +mkdocs = ">=1.6" +mkdocs-autorefs = ">=1.4" +pymdown-extensions = ">=6.3" + +[package.extras] +crystal = ["mkdocstrings-crystal (>=0.3.4)"] +python = ["mkdocstrings-python (>=1.16.2)"] +python-legacy = ["mkdocstrings-python-legacy (>=0.2.1)"] + +[[package]] +name = "mkdocstrings-python" +version = "1.16.12" +description = "A Python handler for mkdocstrings." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "mkdocstrings_python-1.16.12-py3-none-any.whl", hash = "sha256:22ded3a63b3d823d57457a70ff9860d5a4de9e8b1e482876fc9baabaf6f5f374"}, + {file = "mkdocstrings_python-1.16.12.tar.gz", hash = "sha256:9b9eaa066e0024342d433e332a41095c4e429937024945fea511afe58f63175d"}, +] + +[package.dependencies] +griffe = ">=1.6.2" +mkdocs-autorefs = ">=1.4" +mkdocstrings = ">=0.28.3" +typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} + [[package]] name = "moviepy" version = "2.2.1" @@ -2569,6 +2777,18 @@ files = [ numpy = ">=1.23.5" types-pytz = ">=2022.1.1" +[[package]] +name = "pathspec" +version = "0.12.1" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, + {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, +] + [[package]] name = "pillow" version = "11.3.0" @@ -3141,6 +3361,25 @@ files = [ [package.extras] windows-terminal = ["colorama (>=0.4.6)"] +[[package]] +name = "pymdown-extensions" +version = "10.16.1" +description = "Extension pack for Python Markdown." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "pymdown_extensions-10.16.1-py3-none-any.whl", hash = "sha256:d6ba157a6c03146a7fb122b2b9a121300056384eafeec9c9f9e584adfdb2a32d"}, + {file = "pymdown_extensions-10.16.1.tar.gz", hash = "sha256:aace82bcccba3efc03e25d584e6a22d27a8e17caa3f4dd9f207e49b787aa9a91"}, +] + +[package.dependencies] +markdown = ">=3.6" +pyyaml = "*" + +[package.extras] +extra = ["pygments (>=2.19.1)"] + [[package]] name = "pytest" version = "8.4.1" @@ -3306,6 +3545,21 @@ files = [ ] markers = {main = "extra == \"training\""} +[[package]] +name = "pyyaml-env-tag" +version = "1.1" +description = "A custom YAML tag for referencing environment variables in YAML files." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "pyyaml_env_tag-1.1-py3-none-any.whl", hash = "sha256:17109e1a528561e32f026364712fee1264bc2ea6715120891174ed1b980d2e04"}, + {file = "pyyaml_env_tag-1.1.tar.gz", hash = "sha256:2eb38b75a2d21ee0475d6d97ec19c63287a7e140231e4214969d0eac923cd7ff"}, +] + +[package.dependencies] +pyyaml = "*" + [[package]] name = "referencing" version = "0.36.2" @@ -3863,6 +4117,18 @@ files = [ cffi = ">=1.0" numpy = "*" +[[package]] +name = "soupsieve" +version = "2.7" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4"}, + {file = "soupsieve-2.7.tar.gz", hash = "sha256:ad282f9b6926286d2ead4750552c8a6142bc4c783fd66b0293547c8fe6ae126a"}, +] + [[package]] name = "tiktoken" version = "0.9.0" @@ -4244,6 +4510,49 @@ platformdirs = ">=3.9.1,<5" docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"GraalVM\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] +[[package]] +name = "watchdog" +version = "6.0.0" +description = "Filesystem events monitoring" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "watchdog-6.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d1cdb490583ebd691c012b3d6dae011000fe42edb7a82ece80965b42abd61f26"}, + {file = "watchdog-6.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc64ab3bdb6a04d69d4023b29422170b74681784ffb9463ed4870cf2f3e66112"}, + {file = "watchdog-6.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c897ac1b55c5a1461e16dae288d22bb2e412ba9807df8397a635d88f671d36c3"}, + {file = "watchdog-6.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6eb11feb5a0d452ee41f824e271ca311a09e250441c262ca2fd7ebcf2461a06c"}, + {file = "watchdog-6.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ef810fbf7b781a5a593894e4f439773830bdecb885e6880d957d5b9382a960d2"}, + {file = "watchdog-6.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:afd0fe1b2270917c5e23c2a65ce50c2a4abb63daafb0d419fde368e272a76b7c"}, + {file = "watchdog-6.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:bdd4e6f14b8b18c334febb9c4425a878a2ac20efd1e0b231978e7b150f92a948"}, + {file = "watchdog-6.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c7c15dda13c4eb00d6fb6fc508b3c0ed88b9d5d374056b239c4ad1611125c860"}, + {file = "watchdog-6.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6f10cb2d5902447c7d0da897e2c6768bca89174d0c6e1e30abec5421af97a5b0"}, + {file = "watchdog-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:490ab2ef84f11129844c23fb14ecf30ef3d8a6abafd3754a6f75ca1e6654136c"}, + {file = "watchdog-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:76aae96b00ae814b181bb25b1b98076d5fc84e8a53cd8885a318b42b6d3a5134"}, + {file = "watchdog-6.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a175f755fc2279e0b7312c0035d52e27211a5bc39719dd529625b1930917345b"}, + {file = "watchdog-6.0.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e6f0e77c9417e7cd62af82529b10563db3423625c5fce018430b249bf977f9e8"}, + {file = "watchdog-6.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:90c8e78f3b94014f7aaae121e6b909674df5b46ec24d6bebc45c44c56729af2a"}, + {file = "watchdog-6.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e7631a77ffb1f7d2eefa4445ebbee491c720a5661ddf6df3498ebecae5ed375c"}, + {file = "watchdog-6.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c7ac31a19f4545dd92fc25d200694098f42c9a8e391bc00bdd362c5736dbf881"}, + {file = "watchdog-6.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9513f27a1a582d9808cf21a07dae516f0fab1cf2d7683a742c498b93eedabb11"}, + {file = "watchdog-6.0.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7a0e56874cfbc4b9b05c60c8a1926fedf56324bb08cfbc188969777940aef3aa"}, + {file = "watchdog-6.0.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e6439e374fc012255b4ec786ae3c4bc838cd7309a540e5fe0952d03687d8804e"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2"}, + {file = "watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a"}, + {file = "watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680"}, + {file = "watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f"}, + {file = "watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282"}, +] + +[package.extras] +watchmedo = ["PyYAML (>=3.10)"] + [[package]] name = "win32-setctime" version = "1.2.0" @@ -4641,4 +4950,4 @@ training = ["transformers"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.14" -content-hash = "8663d657b6902842020781e652acc419fc9ccb3b2edb023e863f1d24530bacbb" +content-hash = "7fe42bec3e3d9c9a4c725c149c8d9ae5920b83d4688dfcf1e032a2b49b363052" diff --git a/pyproject.toml b/pyproject.toml index 6d6c6638..3fd5753e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,9 @@ typer = "^0.15.2" datasets = "^3.5.0" pyarrow = "^19.0.1" docstring-parser = "^0.17.0" +markdown = "^3.8.2" +markdownify = "^1.1.0" +mkdocstrings-python = "^1.16.12" [build-system] requires = ["poetry-core>=1.0.0", "setuptools>=42", "wheel"] diff --git a/scripts/__init__.py b/scripts/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/scripts/make_docs.py b/scripts/make_docs.py deleted file mode 100644 index e47ffebe..00000000 --- a/scripts/make_docs.py +++ /dev/null @@ -1,676 +0,0 @@ -# type: ignore # noqa: PGH003 - - -import argparse -import builtins -import inspect -import io -import json -import logging -import pkgutil -import pydoc -import re -import sys -import types -import typing as t -from importlib import import_module -from pathlib import Path - -from docstring_parser import Docstring, DocstringStyle, ParseError -from docstring_parser import parse as parse_docstring - -logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") - -logger = logging.getLogger(__name__) - - -# --- Helper Functions --- -def get_raw_doc(obj: t.Any) -> str | None: - """Gets the raw, uncleaned docstring.""" - return inspect.getdoc(obj) - - -def format_signature(obj: t.Any, class_name: str | None = None) -> str: - """Formats the signature of a callable object for code display.""" - try: - actual_obj = obj - if isinstance(obj, property): - actual_obj = obj.fget if obj.fget else obj - elif isinstance(obj, (staticmethod, classmethod)): - actual_obj = getattr(obj, "__func__", obj) - if not callable(actual_obj): - return "" - sig = inspect.signature(actual_obj) - sig_str = str(sig) - if class_name: - sig_str = re.sub(rf"\b{re.escape(class_name)}\.([\w]+)\b", r"\1", sig_str) - except (ValueError, TypeError): - return "(...)" - except Exception as e: # noqa: BLE001 - logger.warning( - "Warning: Could not get signature for %s: %s", getattr(obj, "__name__", "unknown"), e - ) - return "(...)" - - return sig_str - - -def format_type_annotation(annotation_str: str) -> str: - """Convert pipe-style union types to MDX-compatible format.""" - if "|" in annotation_str: - # Handle simple Optional types - if " | None" in annotation_str: - base_type = annotation_str.replace(" | None", "").strip() - return f"Optional[{base_type}]" - - # General case for union types - parts = [part.strip() for part in annotation_str.split("|")] - return f"Union[{', '.join(parts)}]" - - return annotation_str - - -# --- Core MDX Generator Class --- - - -class MDXDoc(pydoc.HTMLDoc): - """Formatter class for creating clean, readable MDX documentation.""" - - def __init__(self, auth_group: str | None = None): - super().__init__() - self.auth_group = auth_group - - # --- Docstring Formatting Logic --- - def _format_docstring(self, obj: t.Any) -> str: - """Parses and formats the docstring using a clean, traditional style with concise sections.""" - raw_doc = get_raw_doc(obj) - if not raw_doc: - return "" - - try: - parsed = parse_docstring(raw_doc, style=DocstringStyle.GOOGLE) - output = io.StringIO() - - # Process each section of the docstring - self._write_description(parsed, output) - self._write_parameters(parsed, output) - self._write_returns(parsed, output) - self._write_raises(parsed, output) - - return output.getvalue() - - except ParseError as e: - logger.warning( - "Warning: Could not parse docstring for %s: %s", - getattr(obj, "__name__", "object"), - e, - ) - return raw_doc.replace("<", r"\<") + "\n\n" - except Exception as e: # noqa: BLE001 - logger.info( - "Error formatting docstring for %s: %s", getattr(obj, "__name__", "object"), e - ) - return raw_doc.replace("<", r"\<") + "\n\n" - - def _write_description(self, parsed: Docstring, output: io.StringIO) -> None: - """Writes the description section of the docstring.""" - description = "" - if parsed.short_description: - description += parsed.short_description - if parsed.long_description: - if description: - description += "\n\n" - description += parsed.long_description - if description: - output.write(description.replace("<", r"\<") + "\n\n") - - def _write_parameters(self, parsed: Docstring, output: io.StringIO) -> None: - """Writes the parameters section of the docstring.""" - if parsed.params: - output.write("**Parameters:**\n\n") - for param in parsed.params: - param_header = f"**`{param.arg_name}`**" - if param.type_name: - safe_type = param.type_name.replace("`", r"\`").replace("<", r"\<") - safe_type = format_type_annotation(safe_type) - param_header += f" (`{safe_type}`)" - if param.is_optional: - param_header += " *(optional)*" - output.write(f"- {param_header}") - if param.description: - updated_description = param.description.replace("<", r"\<") - output.write(f": {updated_description}") - if param.default: - safe_default = param.default.replace("`", r"\`") - output.write(f" Default: `{safe_default}`") - output.write("\n") - output.write("\n") - - def _write_returns(self, parsed: Docstring, output: io.StringIO) -> None: - """Writes the returns section of the docstring.""" - if parsed.returns: - return_line = "**Returns:** " - if parsed.returns.type_name: - safe_type = parsed.returns.type_name.replace("`", r"\`").replace("<", r"\<") - safe_type = format_type_annotation(safe_type) - return_line += f"`{safe_type}`" - if parsed.returns.description: - if parsed.returns.type_name: - return_line += " — " - return_line += parsed.returns.description.replace("<", r"\<") - output.write(return_line + "\n\n") - - def _write_raises(self, parsed: Docstring, output: io.StringIO) -> None: - """Writes the raises section of the docstring.""" - if parsed.raises: - output.write("**Raises:**\n\n") - for exc in parsed.raises: - exc_line = "- " - if exc.type_name: - safe_type = exc.type_name.replace("`", r"\`").replace("<", r"\<") - safe_type = format_type_annotation(safe_type) - exc_line += f"`{safe_type}`" - if exc.description: - if exc.type_name: - exc_line += " — " - exc_line += exc.description.replace("<", r"\<") - output.write(exc_line + "\n") - output.write("\n") - - # --- Overridden pydoc methods --- - - def page(self, title: str, contents: str) -> str: - safe_title = title.replace("'", "''") - return f"---\ntitle: '{safe_title}'\n---\n\n{contents}" - - def heading(self, title: str, level: str = 1) -> str: - return f"{'#' * level} {title}\n" - - def section(self, title: str, contents: str, level: int = 2) -> str: - return f"\n{'#' * level} {title}\n\n{contents}\n" - - def docmodule( - self, - object: types.ModuleType, - ) -> str: - full_name = object.__name__ - short_name = full_name.split(".")[-1] - safe_short_name = short_name.replace("'", "''") - output = io.StringIO() - - # Write frontmatter and module header - self._write_frontmatter(output, safe_short_name, short_name, full_name) - - # Write source file information - self._write_source_file_info(output, object) - - # Write module docstring - module_doc_formatted = self._format_docstring(object) - output.write(module_doc_formatted) - - # Collect and document members - classes, functions = self._collect_members(object, full_name) - self._write_classes(output, classes, full_name) - self._write_functions(output, functions) - - return output.getvalue() - - def _write_frontmatter(self, output, safe_short_name, short_name, full_name): - """Writes the frontmatter and module header.""" - output.write(f"---\ntitle: '{safe_short_name}'\nsidebarTitle: '{safe_short_name}'\n") - if self.auth_group: - output.write(f'groups: ["{self.auth_group}"]\n') - output.write("---\n\n") - output.write(f"# Module `{short_name}`\n\n") - output.write(f"*(Full name: `{full_name}`)*\n\n") - - def _write_source_file_info(self, output, object): - """Writes the source file information.""" - try: - source_file = inspect.getsourcefile(object) - if source_file: - output.write(f"**Source file:** `{Path(source_file).name}`\n\n") - except (TypeError, OSError): - pass - except Exception as e: # noqa: BLE001 - logger.warning("Warning: Could not get source file: %s", e) - - def _collect_members(self, object, full_name): - """Collects classes and functions defined in the module.""" - classes, functions = [], [] - try: - for member_name, member_obj in inspect.getmembers(object): - if member_name.startswith("_") and not member_name.startswith("__"): - continue - if self._is_defined_here(member_obj, full_name): - if inspect.isclass(member_obj): - classes.append((member_name, member_obj)) - elif inspect.isfunction(member_obj): - functions.append((member_name, member_obj)) - except Exception as e: # noqa: BLE001 - logger.warning("Error inspecting members: %s", e) - return classes, functions - - def _is_defined_here(self, member_obj, full_name): - """Checks if a member is defined in the current module.""" - try: - member_module = inspect.getmodule(member_obj) - except Exception: # noqa: BLE001 - return False - return member_module is not None and member_module.__name__ == full_name - - def _write_classes(self, output, classes, full_name): - """Writes the documentation for classes.""" - if classes: - output.write("## Classes\n\n") - for class_name, class_obj in sorted(classes, key=lambda item: item[0]): - output.write(self.docclass(class_obj, class_name, module_name=full_name)) - output.write("\n") - - def _write_functions(self, output, functions): - """Writes the documentation for functions.""" - if functions: - output.write("## Functions\n\n") - for func_name, func_obj in sorted(functions, key=lambda item: item[0]): - output.write(self.docroutine(func_obj, func_name, class_name=None)) - output.write("\n") - - def docclass( - self, object: type, name: str | None = None, module_name: str | None = None - ) -> str: - real_name = name or object.__name__ - output = io.StringIO() - output.write(f"\n### Class `{real_name}`\n\n") - - self._write_inheritance_info(object, output) - self._write_class_docstring(object, output) - - methods, properties = self._collect_class_members(object, module_name) - self._write_properties(properties, output, real_name) - self._write_methods(methods, output, real_name) - - return output.getvalue() - - def _write_inheritance_info(self, object: type, output: io.StringIO): - """Writes inheritance information for a class.""" - if object.__bases__: - bases = [] - for b in object.__bases__: - if b is object or (b is builtins.object and len(object.__bases__) > 1): - continue - base_module = getattr(b, "__module__", "") - base_name_str = getattr(b, "__name__", str(b)) - if base_module and base_module != "builtins": - bases.append(f"`{base_module}.{base_name_str}`") - else: - bases.append(f"`{base_name_str}`") - if bases: - output.write(f"**Inherits from:** {', '.join(bases)}\n\n") - - def _write_class_docstring(self, object: type, output: io.StringIO): - """Writes the formatted docstring for a class.""" - class_doc_formatted = self._format_docstring(object) - output.write(class_doc_formatted) - - def _collect_class_members(self, object: type, module_name: str | None) -> tuple[list, list]: - """Collects methods and properties of a class.""" - methods, properties = [], [] - try: - for member_name, member_obj in inspect.getmembers(object): - if member_name.startswith("_") and not member_name.startswith("__"): - continue - if self._is_relevant_member(member_name, member_obj, object, module_name): - if isinstance(member_obj, property): - properties.append((member_name, member_obj)) - elif self._is_method(member_obj): - methods.append((member_name, member_obj)) - except Exception as e: # noqa: BLE001 - logger.warning("Error inspecting members of %s: %s", object.__name__, e) - return methods, properties - - def _is_relevant_member( - self, member_name: str, member_obj: t.Any, object: type, module_name: str | None - ) -> bool: - """Determines if a member is relevant for documentation.""" - is_directly_defined = member_name in object.__dict__ - try: - target_obj = member_obj.fget if isinstance(member_obj, property) else member_obj - member_origin_module = inspect.getmodule(target_obj) - except Exception: # noqa: BLE001 - return False - - return is_directly_defined or ( - member_origin_module and member_origin_module.__name__ == module_name - ) - - def _is_method(self, member_obj: t.Any) -> bool: - """Checks if a member is a method.""" - return inspect.isfunction(member_obj) or isinstance(member_obj, (classmethod, staticmethod)) - - def _write_properties(self, properties: list, output: io.StringIO, class_name: str): - """Writes properties of a class.""" - if properties: - output.write("#### Properties\n\n") - for prop_name, prop_obj in sorted(properties, key=lambda item: item[0]): - output.write(self._docproperty(prop_obj, prop_name, class_name=class_name)) - - def _write_methods(self, methods: list, output: io.StringIO, class_name: str): - """Writes methods of a class.""" - if methods: - output.write("#### Methods\n\n") - for method_name, method_obj in sorted(methods, key=lambda item: item[0]): - output.write(self.docroutine(method_obj, method_name, class_name=class_name)) - - def docroutine( - self, - object: t.Any, - name: str | None = None, - class_name: str | None = None, - ) -> str: - real_name = name or getattr(object, "__name__", "unknown_routine") - output = io.StringIO() - - # Heading level based on context (class method vs standalone function) - heading_level = 5 if class_name else 3 - output.write(f"{'#' * heading_level} `{real_name}`\n\n") - - # Function/method signature - signature = format_signature(object, class_name=class_name) - if signature and signature != "(...)": - output.write(f"```python\n{real_name}{signature}\n```\n\n") - elif real_name: - output.write(f"`{real_name}(...)`\n\n") - - # Docstring content - doc_formatted = self._format_docstring(object) - output.write(doc_formatted) - - # Add a separator only if we're not at the end of a section - if class_name: - output.write("---\n\n") - - return output.getvalue() - - def _docproperty(self, prop: property, name: str, class_name: str | None = None) -> str: - output = io.StringIO() - output.write(f"##### `{name}`\n\n") - - # Get property type annotation - type_hint_str = "" - target_for_type = prop.fget if prop.fget else prop - if hasattr(target_for_type, "__annotations__"): - try: - return_annotation = t.get_type_hints(target_for_type).get("return") - if return_annotation: - annotation = str(return_annotation) - annotation = re.sub(r"\btyping\.", "", annotation) - if class_name: - annotation = re.sub(rf"\b{re.escape(class_name)}\.", "", annotation) - annotation = format_type_annotation(annotation) - safe_annotation = annotation.replace("`", r"\`").replace("<", r"\<") - type_hint_str = f"`{safe_annotation}`" - except Exception as e: # noqa: BLE001 - logger.warning( - "Warning: Error getting type hint for property %s: %s", - getattr(prop, "__name__", "unknown_property"), - e, - ) - - # Show type compactly - if type_hint_str: - output.write(f"**Type:** {type_hint_str} *(property)*\n\n") - else: - output.write("*(property)*\n\n") - - # Documentation - doc_obj = prop - raw_doc = get_raw_doc(prop) - if not raw_doc and prop.fget: - doc_obj = prop.fget - - doc_formatted = self._format_docstring(doc_obj) - output.write(doc_formatted) - - # Property details (compact) - details = [] - if prop.fget: - details.append("getter") - if prop.fset: - details.append("setter") - if prop.fdel: - details.append("deleter") - if details: - output.write(f"*Has: {', '.join(details)}*\n\n") - - # Add separator - output.write("---\n\n") - - return output.getvalue() - - def link(self, text: str, url: str) -> str: - return f"[{text}]({url})" - - def strong(self, text: str) -> str: - return f"**{text}**" - - def emphasis(self, text: str) -> str: - return f"*{text}*" - - def escape(self, text: str) -> str: - return text.replace("<", r"\<") - - def preformat(self, text: str) -> str: - return f"```\n{text}\n```" - - def multicolumn(self, list_items: list[t.Any], fmt: t.Callable) -> str: - return "\n".join(f"- {fmt(item)}" for item in list_items) - - def grey(self, text: str) -> str: - return text - - def write(self, *args, **kwargs): - pass - - -# --- Main execution logic --- -def generate_mdx_docs( - module_paths: list[str], - output_dir: str, - auth_group: str | None = None, - project_root: str | None = None, -): - """Generates clean, traditional MDX documentation for Python modules.""" - output_path = Path(output_dir) - output_path.mkdir(parents=True, exist_ok=True) - logger.info("Output directory: %s", output_path.resolve()) - - mdx_formatter = MDXDoc(auth_group=auth_group) - _project_root_path = Path(project_root).resolve() if project_root else Path.cwd() - _setup_sys_path(_project_root_path) - - processed_modules = set() - generated_files = [] - - for path_str in module_paths: - path = Path(path_str).resolve() - logger.info("Processing path: %s", path) - if path.is_file() and path.suffix == ".py" and path.name != "__init__.py": - _process_file(path, mdx_formatter, processed_modules, generated_files, output_path) - elif path.is_dir(): - _process_directory(path, mdx_formatter, processed_modules, generated_files, output_path) - else: - logger.warning("Warning: Path is not Python file/directory: %s", path) - - _write_docs_json(generated_files, output_path) - - -def _setup_sys_path(project_root: Path): - """Sets up the system path for module imports.""" - if str(project_root) not in sys.path: - sys.path.insert(0, str(project_root)) - if Path.cwd() not in sys.path: - sys.path.insert(0, Path.cwd()) - - -def _process_file(path, mdx_formatter, processed_modules, generated_files, output_path): - """Processes a single Python file.""" - module_name = _determine_module_name(path) - if not module_name: - return - - logger.info(" Attempting to import module: %s", module_name) - try: - module = import_module(module_name) - if module.__name__ in processed_modules: - return - logger.info(" Generating MDX for module: %s", module.__name__) - _generate_mdx(module, mdx_formatter, processed_modules, generated_files, output_path) - except ImportError: - logger.exception("Error importing module '%s'", module_name) - except Exception: - logger.exception("Error processing module %s", module_name) - - -def _process_directory(path, mdx_formatter, processed_modules, generated_files, output_path): - """Processes a directory as a package.""" - logger.info(" Processing directory as package: %s", path.name) - package_name = path.name - if str(path.parent) not in sys.path: - sys.path.insert(0, str(path.parent)) - for _, modname, _ in pkgutil.walk_packages([str(path)], prefix=f"{package_name}."): - if modname in processed_modules: - continue - logger.info(" Attempting to import package module: %s", modname) - try: - module = import_module(modname) - logger.info(" Generating MDX for module: %s", module.__name__) - _generate_mdx(module, mdx_formatter, processed_modules, generated_files, output_path) - except ImportError: - logger.exception("Error importing package module %s", modname) - except Exception: - logger.exception("Error processing package module %s", modname) - - -def _determine_module_name(path): - """Determines the module name for a given file path.""" - try: - best_match_len = -1 - module_name = None - for p_str in sys.path: - p = Path(p_str).resolve() - try: - rel_path = path.relative_to(p) - if ".." not in rel_path.parts: - current_len = len(p.parts) - if current_len > best_match_len: - best_match_len = current_len - module_name_parts = [*list(rel_path.parts[:-1]), path.stem] - module_name = ".".join(part for part in module_name_parts if part) - except ValueError: - continue - if not module_name: - module_name = path.stem - if str(path.parent) not in sys.path: - sys.path.insert(0, str(path.parent)) - else: - return module_name - except Exception as e: # noqa: BLE001 - logger.warning("Warning: Error determining module name for %s: %s", path, e) - return None - - -def _generate_mdx(module, mdx_formatter, processed_modules, generated_files, output_path): - """Generates MDX documentation for a module.""" - mdx_content = mdx_formatter.docmodule(module) - output_filename = f"{module.__name__.replace('.', '/')}.mdx" - output_file = output_path / output_filename - output_file.parent.mkdir(parents=True, exist_ok=True) - with Path.open(output_file, "w", encoding="utf-8") as f: - f.write(mdx_content) - logger.info(" -> Wrote %s", output_file) - processed_modules.add(module.__name__) - relative_path = str(output_file.relative_to(output_path.parent)).replace(".mdx", "") - generated_files.append(relative_path) - - -def _write_docs_json(generated_files, output_path): - """Writes the docs.json file.""" - docs_json_path = output_path / "docs.json" - nav_file_paths = [ - str(file_path).replace(str(output_path) + "/", "") for file_path in generated_files - ] - for i, file_path in enumerate(nav_file_paths): - if "dreadnode/" in file_path: - parts = file_path.split("/") - if len(parts) > 1 and parts[0] == "dreadnode": - parts.insert(1, "library") - nav_file_paths[i] = "/".join(parts) - nested_pages = _build_nested_structure(nav_file_paths) - docs_structure = {"group": "API Reference", "pages": nested_pages} - with Path.open(docs_json_path, "w", encoding="utf-8") as f: - json.dump(docs_structure, f, indent=2) - logger.info("Generated navigation structure written to %s", docs_json_path) - - -def _build_nested_structure(file_paths, base_prefix="dreadnode/library"): - """Builds a nested structure for the navigation based on file paths.""" - nested_structure = {} - for file_path in file_paths: - if file_path.startswith(base_prefix): - relative_path = file_path[len(base_prefix) + 1 :] - parts = relative_path.split("/") - current_level = nested_structure - for part in parts[:-1]: - if part not in current_level: - current_level[part] = {} - elif isinstance(current_level[part], str): - current_level[part] = {"index": current_level[part]} - current_level = current_level[part] - if parts[-1] in current_level and isinstance(current_level[parts[-1]], dict): - if "dreadnode/" in file_path: - modified_path = file_path.replace("dreadnode/", "strikes/") - current_level[parts[-1]]["index"] = modified_path - else: - current_level[parts[-1]]["index"] = file_path - elif "dreadnode/" in file_path: - modified_path = file_path.replace("dreadnode/", "strikes/") - current_level[parts[-1]] = modified_path - else: - current_level[parts[-1]] = file_path - - def convert_to_list(structure): - result = [] - for key, value in sorted(structure.items()): - if isinstance(value, dict): - result.append({"group": key, "pages": convert_to_list(value)}) - else: - result.append(value) - return result - - return convert_to_list(nested_structure) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Generate simple, clean MDX documentation for Python modules." - ) - parser.add_argument("modules", nargs="+", help="Paths to Python files or package directories.") - parser.add_argument( - "-o", "--output-dir", default="docs", help="Directory to write MDX files (default: ./docs)." - ) - parser.add_argument( - "-p", - "--project-root", - default=None, - help="Optional path to the project root directory (assists with import resolution). Defaults to CWD.", - ) - parser.add_argument( - "-g", - "--auth-group", - choices=["crucible", "strikes", "spyglass"], - help="Optional authentication group to add to frontmatter.", - ) - - args = parser.parse_args() - generate_mdx_docs(args.modules, args.output_dir, args.auth_group, args.project_root) - logger.info("MDX generation complete.")