Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
3862e6f
feat: schedules
echarles Jun 8, 2026
bc4ea78
exec: examples
echarles Jun 8, 2026
2fa1e85
exec
echarles Jun 8, 2026
c458536
time
echarles Jun 8, 2026
184fb42
exec
echarles Jun 8, 2026
ebf6cf5
timeline
echarles Jun 9, 2026
85faea9
schedule
echarles Jun 9, 2026
842badf
env
echarles Jun 10, 2026
ed1193f
api key
echarles Jun 10, 2026
5f880ce
api keys
echarles Jun 11, 2026
101ac1d
api keys
echarles Jun 11, 2026
59509c8
fix: packaging
echarles Jun 14, 2026
3a065b8
api
echarles Jun 14, 2026
20d81a6
api keys
echarles Jun 16, 2026
afede80
refactor
echarles Jun 17, 2026
69ff4ad
evals
echarles Jun 17, 2026
4c6cc30
evals
echarles Jun 17, 2026
dbff36c
evals
echarles Jun 18, 2026
dbf2555
whoami
echarles Jun 18, 2026
2370a1e
refactor
echarles Jun 18, 2026
0519730
bump
echarles Jun 18, 2026
41f7219
bump
echarles Jun 19, 2026
bc540db
Automatic application of license header
github-actions[bot] Jun 19, 2026
47766d7
evals
echarles Jun 19, 2026
1d07cbd
bump
echarles Jun 19, 2026
1f241b3
Automatic application of license header
github-actions[bot] Jun 19, 2026
6d56408
bump
echarles Jun 19, 2026
ad3d258
runner
echarles Jun 19, 2026
8b31051
Automatic application of license header
github-actions[bot] Jun 19, 2026
d1dc4ff
lcal runner
echarles Jun 19, 2026
3914aa4
bump
echarles Jun 19, 2026
1baa7e3
runner
echarles Jun 19, 2026
a7e0dee
runner
echarles Jun 19, 2026
9996beb
runner
echarles Jun 19, 2026
c4b8196
runner
echarles Jun 19, 2026
1d49624
report
echarles Jun 20, 2026
102b98b
Automatic application of license header
github-actions[bot] Jun 20, 2026
11e3538
is public
echarles Jun 22, 2026
1991aaf
wip
echarles Jun 22, 2026
a3704a3
principal
echarles Jun 22, 2026
de54d81
evals
echarles Jun 24, 2026
0a0dc81
usage
echarles Jun 24, 2026
5900368
Automatic application of license header
github-actions[bot] Jun 24, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
724 changes: 0 additions & 724 deletions API.md

This file was deleted.

444 changes: 0 additions & 444 deletions CLAUDE.md

This file was deleted.

27 changes: 25 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

[![Become a Sponsor](https://img.shields.io/static/v1?label=Become%20a%20Sponsor&message=%E2%9D%A4&logo=GitHub&style=flat&color=1ABC9C)](https://github.com/sponsors/datalayer)

# ☰ Datalayer Core
# ☰ ☢️ Datalayer Core

<p align="center">
<strong>Python and Typescript libraries for Datalayer</strong>
Expand Down Expand Up @@ -118,7 +118,7 @@ datalayer runtime list
datalayer runtime create ai-env --given-name my-runtime-123

# Execute a script in a runtime
datalayer runtime exec my-script.py --runtime <runtime-id>
datalayer runtime exec my-script.py --agent <agent-id>

# Create a snapshot from a runtime but do not terminate the runtime
datalayer snapshots create <pod-name> my-snapshot 'AI work!' False
Expand Down Expand Up @@ -151,6 +151,29 @@ datalayer usage team-allocate-member --team-uid <team_uid> --member-uid <member_
datalayer usage team-revoke-member --team-uid <team_uid> --member-uid <member_uid> --amount 5
```

### 5. Evals CLI (Multi-Agentspec)

Use comma-separated agentspec ids to create one experiment per agentspec variant:

```bash
# Creates one experiment per agentspec in the list
datalayer evals experiments create my-exp \
--evalset-id <evalset_id> \
--agent-spec-ids example-evals,example-evals-nocodemode,example-custom
```

Generate a comparison report:

```bash
datalayer evals report <evalset_id> --run-limit 50 --export
```

How to interpret grouped comparisons in the report:

- `Within-Agentspec Pairwise Latest-Pass Deltas`: compares experiments using the same agentspec id.
- `Cross-Agentspec Pairwise Latest-Pass Deltas`: compares experiments using different agentspec ids.
- Pairwise sections compute all combinations for the selected experiments, not just two agentspecs.

## Examples

### Python Examples
Expand Down
2 changes: 1 addition & 1 deletion datalayer_core/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

"""Datalayer Core version information."""

__version__ = "1.1.24"
__version__ = "1.1.38"
40 changes: 40 additions & 0 deletions datalayer_core/agents/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright (c) 2023-2025 Datalayer, Inc.
# Distributed under the terms of the Modified BSD License.

"""Runtime and agent execution helpers."""

from datalayer_core.agents.agent_cloud import RuntimeService
from datalayer_core.agents.agent_local import (
DEFAULT_LOCAL_AGENT_NAME,
DEFAULT_LOCAL_HOST,
DEFAULT_LOCAL_LOG_LEVEL,
DEFAULT_LOCAL_PROTOCOL,
LocalAgentRuntime,
ensure_local_agent,
start_local_agent_runtime,
terminate_local_agent_runtime,
)
from datalayer_core.agents.utils import (
compute_time_reservation_minutes,
create_cloud_agent_runtime,
resolve_environment_burning_rate,
teardown_agent_execution_resources,
terminate_cloud_agent_runtime,
)
Comment on lines +7 to +23

__all__ = [
"RuntimeService",
"LocalAgentRuntime",
"DEFAULT_LOCAL_AGENT_NAME",
"DEFAULT_LOCAL_HOST",
"DEFAULT_LOCAL_LOG_LEVEL",
"DEFAULT_LOCAL_PROTOCOL",
"ensure_local_agent",
"start_local_agent_runtime",
"terminate_local_agent_runtime",
"resolve_environment_burning_rate",
"compute_time_reservation_minutes",
"create_cloud_agent_runtime",
"terminate_cloud_agent_runtime",
"teardown_agent_execution_resources",
]
Comment on lines +25 to +40
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
from datalayer_core.mixins.sandbox_snapshots import SandboxSnapshotsMixin
from datalayer_core.mixins.runtimes import RuntimesMixin
from datalayer_core.models import ExecutionResponse
from datalayer_core.models.sandbox_snapshot import SandboxSnapshotModel
from datalayer_core.models.runtime import RuntimeModel
from datalayer_core.runtimes.sandbox_snapshot import (
SandboxSnapshotModel,
from datalayer_core.sandboxes.code_sandbox_snapshots import (
as_code_sandbox_snapshots,
create_snapshot,
)
Expand Down Expand Up @@ -60,6 +60,7 @@ def __init__(
run_url: str = DEFAULT_DATALAYER_RUN_URL,
iam_url: Optional[str] = None,
token: Optional[str] = None,
api_key: Optional[str] = None,
pod_name: Optional[str] = None,
ingress: Optional[str] = None,
reservation_id: Optional[str] = None,
Expand All @@ -86,6 +87,8 @@ def __init__(
Datalayer IAM server URL. If not provided, defaults to run_url.
token : Optional[str]
Authentication token (can also be set via DATALAYER_API_KEY env var).
api_key : Optional[str]
Authentication API key alias for ``token``.
pod_name : Optional[str]
Name of the pod running the runtime.
ingress : Optional[str]
Expand All @@ -110,7 +113,7 @@ def __init__(
time_reservation=time_reservation,
run_url=run_url,
iam_url=iam_url or run_url,
token=token,
token=token or api_key,
external_token=None,
pod_name=pod_name,
ingress=ingress,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Copyright (c) 2023-2025 Datalayer, Inc.
# Distributed under the terms of the Modified BSD License.

# Copyright (c) 2023-2026 Datalayer, Inc.
# Distributed under the terms of the Modified BSD License.

Expand Down Expand Up @@ -122,13 +125,14 @@ def start_local_agent_runtime(
protocol: str = DEFAULT_LOCAL_PROTOCOL,
log_level: str = DEFAULT_LOCAL_LOG_LEVEL,
wait: bool = True,
disable_tool_approvals: bool = False,
) -> LocalAgentRuntime:
"""Launch a local ``agent-runtimes`` server as a subprocess.

Parameters
----------
agent_spec_id : str
Agent spec id to boot the runtime with.
Agentspec id to boot the runtime with.
agent_name : str
Registered agent name/id served by the runtime.
host : str
Expand Down Expand Up @@ -172,6 +176,8 @@ def start_local_agent_runtime(
"--log-level",
log_level,
]
if disable_tool_approvals:
command.append("--disable-tool-approvals")

runtime_env, mapped_targets = build_agent_runtime_env()
if mapped_targets:
Expand Down Expand Up @@ -238,6 +244,7 @@ def ensure_local_agent(
enable_skills: bool = True,
description: Optional[str] = None,
timeout: int = 120,
disable_tool_approvals: bool = False,
) -> None:
"""Ensure a local agent with the expected transport is registered.

Expand Down Expand Up @@ -298,6 +305,7 @@ def ensure_local_agent(
"agent_spec_id": agent_spec_id,
"enable_skills": enable_skills,
"tools": [],
"disableToolApprovals": disable_tool_approvals,
}
try:
response = requests.post(
Expand Down Expand Up @@ -456,6 +464,129 @@ def extract_vercel_stream_text(raw: str) -> str:
return "".join(text_parts).strip()


def _coerce_usage_payload(candidate: Any) -> dict[str, Any]:
if not isinstance(candidate, dict) or not candidate:
return {}
nested = candidate.get("usage")
if isinstance(nested, dict) and nested:
merged = dict(nested)
for key, value in candidate.items():
if key == "usage":
continue
merged.setdefault(str(key), value)
return merged
return dict(candidate)


def _usage_payload_score(payload: dict[str, Any]) -> int:
if not payload:
return 0
token_keys = {
"prompt_tokens",
"promptTokens",
"input_tokens",
"inputTokens",
"completion_tokens",
"completionTokens",
"output_tokens",
"outputTokens",
"total_tokens",
"totalTokens",
"tokens_total",
"token_total",
}
score = len(payload)
if any(key in payload for key in token_keys):
score += 100
if any(
key in payload
for key in (
"credits_consumed",
"creditsConsumed",
"credits",
"total_credits",
"cost_credits",
)
):
score += 10
return score


def extract_vercel_stream_usage(raw: str) -> dict[str, Any]:
"""Extract best-effort pydantic usage metadata from a Vercel AI SSE stream."""
best: dict[str, Any] = {}
best_score = 0
for line in raw.splitlines():
if not line.startswith("data: "):
continue
payload = line[6:].strip()
if not payload or payload == "[DONE]":
continue
try:
event = json.loads(payload)
except json.JSONDecodeError:
continue
if not isinstance(event, dict):
continue

candidates: list[dict[str, Any]] = []
message_metadata = event.get("messageMetadata")
if isinstance(message_metadata, dict):
candidates.extend(
[
_coerce_usage_payload(message_metadata.get("pydantic_ai")),
_coerce_usage_payload(message_metadata.get("pydanticAI")),
_coerce_usage_payload(message_metadata.get("usage")),
]
)
candidates.extend(
[
_coerce_usage_payload(event.get("pydantic_ai_usage")),
_coerce_usage_payload(event.get("pydantic_ai")),
_coerce_usage_payload(event.get("usage")),
]
)
for candidate in candidates:
score = _usage_payload_score(candidate)
if score > best_score:
best = candidate
best_score = score
return best


def _vercel_ai_error_message(raw: str) -> Optional[str]:
"""Detect a non-stream error body returned with an HTTP 200 status.

The ``agent-runtimes`` server answers an unknown agent route with HTTP 200
and a JSON error body (for example
``{"error": "Agent '...' not found", "message": "No agent registered ..."}``)
instead of an SSE stream. Such a body must NOT be treated as a successful
completion, otherwise route-candidate fallback stops at the first wrong
route and an empty answer is recorded.

Returns
-------
Optional[str]
The error message when the body is an error payload (or an empty body),
otherwise ``None`` when the body is a genuine SSE stream.
"""
text = (raw or "").strip()
if not text:
return "Empty response body"
# A genuine Vercel AI response is an SSE stream of ``data:`` lines.
if "data:" in text:
return None
try:
payload = json.loads(text)
except json.JSONDecodeError:
return None
if isinstance(payload, dict):
error = payload.get("error") or payload.get("message")
if error:
return str(error)
return None


def _post_vercel_ai_chat(
*,
endpoint: str,
Expand Down Expand Up @@ -528,13 +659,36 @@ def _post_vercel_ai_chat(
}

output_text = extract_vercel_stream_text(raw)
return {
usage = extract_vercel_stream_usage(raw)
if not output_text:
error_message = _vercel_ai_error_message(raw)
if error_message is not None:
message_text = (
f"{source_label} chat returned no output: {error_message}"
)
return {
"status": "failed",
"output": {"text": "", "raw_stream_excerpt": raw[:2000]},
"failure_cause": {
"stage": "runtime_execution",
"type": "runtime_agent_unavailable",
"message": message_text,
"detail_excerpt": raw[:2000] or message_text,
"execution_url": endpoint,
},
}
output: dict[str, Any] = {
"text": output_text,
"raw_stream_excerpt": raw[:2000],
}
result: dict[str, Any] = {
"status": "completed",
"output": {
"text": output_text,
"raw_stream_excerpt": raw[:2000],
},
"output": output,
}
if usage:
output["pydantic_ai_usage"] = usage
result["usage"] = usage
return result


def run_local_agent_chat(
Expand Down Expand Up @@ -606,7 +760,7 @@ def runtime_route_candidates(

The ``agent-runtimes`` server inside a cloud runtime may register its agent
under different names depending on how it was launched. Trying a few known
candidates (explicit agent name, agent spec id, pod name, then the default
candidates (explicit agent name, agentspec id, pod name, then the default
route) makes cloud execution resilient.
"""
candidates: list[str] = []
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Copyright (c) 2023-2025 Datalayer, Inc.
# Distributed under the terms of the Modified BSD License.

# Copyright (c) 2023-2026 Datalayer, Inc.
# Distributed under the terms of the Modified BSD License.

Expand Down Expand Up @@ -144,9 +147,9 @@ def create_cloud_agent_runtime(
name : Optional[str]
Optional runtime name.
agent_spec_id : Optional[str]
Registered agent spec id (ignored when ``agent_spec`` is provided).
Registered agentspec id (ignored when ``agent_spec`` is provided).
agent_spec : Optional[dict[str, Any]]
Inline agent spec payload (takes precedence over ``agent_spec_id``).
Inline agentspec payload (takes precedence over ``agent_spec_id``).
credits_limit : Optional[float]
Target credits budget used to derive ``time_reservation`` when the
latter is not supplied.
Expand Down Expand Up @@ -284,15 +287,15 @@ def teardown_agent_execution_resources(

if target == "local":
if local_base_url and token and local_agent_name:
from datalayer_core.runtimes.local import delete_local_agent
from datalayer_core.agents.agent_local import delete_local_agent

result["local_agent_deleted"] = delete_local_agent(
base_url=local_base_url,
token=token,
agent_name=local_agent_name,
)
if local_runtime is not None:
from datalayer_core.runtimes.local import terminate_local_agent_runtime
from datalayer_core.agents.agent_local import terminate_local_agent_runtime

terminate_local_agent_runtime(local_runtime)
result["local_runtime_terminated"] = True
Expand Down
Loading