diff --git a/.agents/skills/nemoclaw-user-deploy-remote/evals/evals.json b/.agents/skills/nemoclaw-user-deploy-remote/evals/evals.json index 3d76a9ca82..41af478f9e 100644 --- a/.agents/skills/nemoclaw-user-deploy-remote/evals/evals.json +++ b/.agents/skills/nemoclaw-user-deploy-remote/evals/evals.json @@ -3,168 +3,72 @@ "id": "docs-deployment-deploy-to-remote-gpu-001", "question": "I'm deploying NemoClaw to a remote GPU instance. Help me move the sandboxed assistant off my local machine so I can support persistent or GPU-backed operation.", "expected_skill": "nemoclaw-user-deploy-remote", - "ground_truth": "A NemoClaw-specific answer that helps the user move the sandboxed assistant off my local machine and gives enough concrete guidance, decision criteria, verification steps, or risk framing to support persistent or GPU-backed operation.", - "expected_behavior": [ - "The output directly addresses the user's situation: deploying NemoClaw to a remote GPU instance.", - "The AI coding assistant loads the expected_skill and SKILL.md", - "The output helps the user move the sandboxed assistant off my local machine with NemoClaw-specific guidance rather than generic advice.", - "The output gives enough concrete guidance, decision criteria, verification steps, or risk framing for the user to support persistent or GPU-backed operation.", - "The output avoids inventing unsupported NemoClaw behavior.", - "The output follows progressive disclosure: it answers the current request without dumping unrelated details other than the expected_skill and the SKILL.md file." - ] + "ground_truth": "A NemoClaw-specific answer that helps the user move the sandboxed assistant off my local machine and gives enough concrete guidance, decision criteria, verification steps, or risk framing to support persistent or GPU-backed operation." }, { "id": "docs-deployment-deploy-to-remote-gpu-002", "question": "I'm using the legacy Brev compatibility flow. Help me understand what the flow still does and where it is deprecated so I can avoid depending on an outdated path blindly.", "expected_skill": "nemoclaw-user-deploy-remote", - "ground_truth": "A NemoClaw-specific answer that helps the user understand what the flow still does and where it is deprecated and gives enough concrete guidance, decision criteria, verification steps, or risk framing to avoid depending on an outdated path blindly.", - "expected_behavior": [ - "The output directly addresses the user's situation: using the legacy Brev compatibility flow.", - "The AI coding assistant loads the expected_skill and SKILL.md", - "The output helps the user understand what the flow still does and where it is deprecated with NemoClaw-specific guidance rather than generic advice.", - "The output gives enough concrete guidance, decision criteria, verification steps, or risk framing for the user to avoid depending on an outdated path blindly.", - "The output avoids inventing unsupported NemoClaw behavior.", - "The output follows progressive disclosure: it answers the current request without dumping unrelated details other than the expected_skill and the SKILL.md file." - ] + "ground_truth": "A NemoClaw-specific answer that helps the user understand what the flow still does and where it is deprecated and gives enough concrete guidance, decision criteria, verification steps, or risk framing to avoid depending on an outdated path blindly." }, { "id": "docs-deployment-deploy-to-remote-gpu-003", "question": "I'm after remote deployment succeeds. Help me find the connection, operation, and recovery details so I can operate the sandbox after initial setup.", "expected_skill": "nemoclaw-user-deploy-remote", - "ground_truth": "A NemoClaw-specific answer that helps the user find the connection, operation, and recovery details and gives enough concrete guidance, decision criteria, verification steps, or risk framing to operate the sandbox after initial setup.", - "expected_behavior": [ - "The output directly addresses the user's situation: after remote deployment succeeds.", - "The AI coding assistant loads the expected_skill and SKILL.md", - "The output helps the user find the connection, operation, and recovery details with NemoClaw-specific guidance rather than generic advice.", - "The output gives enough concrete guidance, decision criteria, verification steps, or risk framing for the user to operate the sandbox after initial setup.", - "The output avoids inventing unsupported NemoClaw behavior.", - "The output follows progressive disclosure: it answers the current request without dumping unrelated details other than the expected_skill and the SKILL.md file." - ] + "ground_truth": "A NemoClaw-specific answer that helps the user find the connection, operation, and recovery details and gives enough concrete guidance, decision criteria, verification steps, or risk framing to operate the sandbox after initial setup." }, { "id": "docs-deployment-brev-web-ui-001", "question": "I'm launching NemoClaw from the Brev web UI. Help me avoid local CLI setup and local GPU requirements so I can start a hosted sandbox quickly.", "expected_skill": "nemoclaw-user-deploy-remote", - "ground_truth": "A NemoClaw-specific answer that helps the user avoid local CLI setup and local GPU requirements and gives enough concrete guidance, decision criteria, verification steps, or risk framing to start a hosted sandbox quickly.", - "expected_behavior": [ - "The output directly addresses the user's situation: launching NemoClaw from the Brev web UI.", - "The AI coding assistant loads the expected_skill and references/brev-web-ui.md", - "The output helps the user avoid local CLI setup and local GPU requirements with NemoClaw-specific guidance rather than generic advice.", - "The output gives enough concrete guidance, decision criteria, verification steps, or risk framing for the user to start a hosted sandbox quickly.", - "The output avoids inventing unsupported NemoClaw behavior.", - "The output follows progressive disclosure: it answers the current request without dumping unrelated details other than the expected_skill and the references/brev-web-ui.md file." - ] + "ground_truth": "A NemoClaw-specific answer that helps the user avoid local CLI setup and local GPU requirements and gives enough concrete guidance, decision criteria, verification steps, or risk framing to start a hosted sandbox quickly." }, { "id": "docs-deployment-brev-web-ui-002", "question": "I'm reviewing hosted launch choices. Help me understand each web UI option before creating the instance so I can choose settings that match my expected sandbox workflow.", "expected_skill": "nemoclaw-user-deploy-remote", - "ground_truth": "A NemoClaw-specific answer that helps the user understand each web UI option before creating the instance and gives enough concrete guidance, decision criteria, verification steps, or risk framing to choose settings that match my expected sandbox workflow.", - "expected_behavior": [ - "The output directly addresses the user's situation: reviewing hosted launch choices.", - "The AI coding assistant loads the expected_skill and references/brev-web-ui.md", - "The output helps the user understand each web UI option before creating the instance with NemoClaw-specific guidance rather than generic advice.", - "The output gives enough concrete guidance, decision criteria, verification steps, or risk framing for the user to choose settings that match my expected sandbox workflow.", - "The output avoids inventing unsupported NemoClaw behavior.", - "The output follows progressive disclosure: it answers the current request without dumping unrelated details other than the expected_skill and the references/brev-web-ui.md file." - ] + "ground_truth": "A NemoClaw-specific answer that helps the user understand each web UI option before creating the instance and gives enough concrete guidance, decision criteria, verification steps, or risk framing to choose settings that match my expected sandbox workflow." }, { "id": "docs-deployment-brev-web-ui-003", "question": "I'm the hosted sandbox is created. Help me confirm where to connect and how to start using it so I can move from provisioning to actual agent work.", "expected_skill": "nemoclaw-user-deploy-remote", - "ground_truth": "A NemoClaw-specific answer that helps the user confirm where to connect and how to start using it and gives enough concrete guidance, decision criteria, verification steps, or risk framing to move from provisioning to actual agent work.", - "expected_behavior": [ - "The output directly addresses the user's situation: the hosted sandbox is created.", - "The AI coding assistant loads the expected_skill and references/brev-web-ui.md", - "The output helps the user confirm where to connect and how to start using it with NemoClaw-specific guidance rather than generic advice.", - "The output gives enough concrete guidance, decision criteria, verification steps, or risk framing for the user to move from provisioning to actual agent work.", - "The output avoids inventing unsupported NemoClaw behavior.", - "The output follows progressive disclosure: it answers the current request without dumping unrelated details other than the expected_skill and the references/brev-web-ui.md file." - ] + "ground_truth": "A NemoClaw-specific answer that helps the user confirm where to connect and how to start using it and gives enough concrete guidance, decision criteria, verification steps, or risk framing to move from provisioning to actual agent work." }, { "id": "docs-deployment-install-openclaw-plugins-001", "question": "I'm installing an OpenClaw plugin in a NemoClaw-managed sandbox. Help me add a new agent capability inside the sandbox so I can extend the assistant without weakening the host boundary.", "expected_skill": "nemoclaw-user-deploy-remote", - "ground_truth": "A NemoClaw-specific answer that helps the user add a new agent capability inside the sandbox and gives enough concrete guidance, decision criteria, verification steps, or risk framing to extend the assistant without weakening the host boundary.", - "expected_behavior": [ - "The output directly addresses the user's situation: installing an OpenClaw plugin in a NemoClaw-managed sandbox.", - "The AI coding assistant loads the expected_skill and references/install-openclaw-plugins.md", - "The output helps the user add a new agent capability inside the sandbox with NemoClaw-specific guidance rather than generic advice.", - "The output gives enough concrete guidance, decision criteria, verification steps, or risk framing for the user to extend the assistant without weakening the host boundary.", - "The output avoids inventing unsupported NemoClaw behavior.", - "The output follows progressive disclosure: it answers the current request without dumping unrelated details other than the expected_skill and the references/install-openclaw-plugins.md file." - ] + "ground_truth": "A NemoClaw-specific answer that helps the user add a new agent capability inside the sandbox and gives enough concrete guidance, decision criteria, verification steps, or risk framing to extend the assistant without weakening the host boundary." }, { "id": "docs-deployment-install-openclaw-plugins-002", "question": "I'm deciding where to install a plugin. Help me distinguish host environment changes from sandbox environment changes so I can modify the right filesystem and runtime.", "expected_skill": "nemoclaw-user-deploy-remote", - "ground_truth": "A NemoClaw-specific answer that helps the user distinguish host environment changes from sandbox environment changes and gives enough concrete guidance, decision criteria, verification steps, or risk framing to modify the right filesystem and runtime.", - "expected_behavior": [ - "The output directly addresses the user's situation: deciding where to install a plugin.", - "The AI coding assistant loads the expected_skill and references/install-openclaw-plugins.md", - "The output helps the user distinguish host environment changes from sandbox environment changes with NemoClaw-specific guidance rather than generic advice.", - "The output gives enough concrete guidance, decision criteria, verification steps, or risk framing for the user to modify the right filesystem and runtime.", - "The output avoids inventing unsupported NemoClaw behavior.", - "The output follows progressive disclosure: it answers the current request without dumping unrelated details other than the expected_skill and the references/install-openclaw-plugins.md file." - ] + "ground_truth": "A NemoClaw-specific answer that helps the user distinguish host environment changes from sandbox environment changes and gives enough concrete guidance, decision criteria, verification steps, or risk framing to modify the right filesystem and runtime." }, { "id": "docs-deployment-install-openclaw-plugins-003", "question": "I'm verifying a plugin installation. Help me confirm the agent can discover and use the plugin so I can trust that the capability works inside NemoClaw's security model.", "expected_skill": "nemoclaw-user-deploy-remote", - "ground_truth": "A NemoClaw-specific answer that helps the user confirm the agent can discover and use the plugin and gives enough concrete guidance, decision criteria, verification steps, or risk framing to trust that the capability works inside NemoClaw's security model.", - "expected_behavior": [ - "The output directly addresses the user's situation: verifying a plugin installation.", - "The AI coding assistant loads the expected_skill and references/install-openclaw-plugins.md", - "The output helps the user confirm the agent can discover and use the plugin with NemoClaw-specific guidance rather than generic advice.", - "The output gives enough concrete guidance, decision criteria, verification steps, or risk framing for the user to trust that the capability works inside NemoClaw's security model.", - "The output avoids inventing unsupported NemoClaw behavior.", - "The output follows progressive disclosure: it answers the current request without dumping unrelated details other than the expected_skill and the references/install-openclaw-plugins.md file." - ] + "ground_truth": "A NemoClaw-specific answer that helps the user confirm the agent can discover and use the plugin and gives enough concrete guidance, decision criteria, verification steps, or risk framing to trust that the capability works inside NemoClaw's security model." }, { "id": "docs-deployment-sandbox-hardening-001", "question": "I'm reviewing sandbox image hardening. Help me understand which container risks NemoClaw reduces so I can decide whether unattended agents are acceptable in my environment.", "expected_skill": "nemoclaw-user-deploy-remote", - "ground_truth": "A NemoClaw-specific answer that helps the user understand which container risks NemoClaw reduces and gives enough concrete guidance, decision criteria, verification steps, or risk framing to decide whether unattended agents are acceptable in my environment.", - "expected_behavior": [ - "The output directly addresses the user's situation: reviewing sandbox image hardening.", - "The AI coding assistant loads the expected_skill and references/sandbox-hardening.md", - "The output helps the user understand which container risks NemoClaw reduces with NemoClaw-specific guidance rather than generic advice.", - "The output gives enough concrete guidance, decision criteria, verification steps, or risk framing for the user to decide whether unattended agents are acceptable in my environment.", - "The output avoids inventing unsupported NemoClaw behavior.", - "The output follows progressive disclosure: it answers the current request without dumping unrelated details other than the expected_skill and the references/sandbox-hardening.md file." - ] + "ground_truth": "A NemoClaw-specific answer that helps the user understand which container risks NemoClaw reduces and gives enough concrete guidance, decision criteria, verification steps, or risk framing to decide whether unattended agents are acceptable in my environment." }, { "id": "docs-deployment-sandbox-hardening-002", "question": "I'm mapping NemoClaw to an organizational security baseline. Help me identify capability drops, least privilege, and runtime protections so I can document how the sandbox meets or misses required controls.", "expected_skill": "nemoclaw-user-deploy-remote", - "ground_truth": "A NemoClaw-specific answer that helps the user identify capability drops, least privilege, and runtime protections and gives enough concrete guidance, decision criteria, verification steps, or risk framing to document how the sandbox meets or misses required controls.", - "expected_behavior": [ - "The output directly addresses the user's situation: mapping NemoClaw to an organizational security baseline.", - "The AI coding assistant loads the expected_skill and references/sandbox-hardening.md", - "The output helps the user identify capability drops, least privilege, and runtime protections with NemoClaw-specific guidance rather than generic advice.", - "The output gives enough concrete guidance, decision criteria, verification steps, or risk framing for the user to document how the sandbox meets or misses required controls.", - "The output avoids inventing unsupported NemoClaw behavior.", - "The output follows progressive disclosure: it answers the current request without dumping unrelated details other than the expected_skill and the references/sandbox-hardening.md file." - ] + "ground_truth": "A NemoClaw-specific answer that helps the user identify capability drops, least privilege, and runtime protections and gives enough concrete guidance, decision criteria, verification steps, or risk framing to document how the sandbox meets or misses required controls." }, { "id": "docs-deployment-sandbox-hardening-003", "question": "I'm considering production use. Help me see the limitations and residual risks of the hardened image so I can avoid overstating what container hardening guarantees.", "expected_skill": "nemoclaw-user-deploy-remote", - "ground_truth": "A NemoClaw-specific answer that helps the user see the limitations and residual risks of the hardened image and gives enough concrete guidance, decision criteria, verification steps, or risk framing to avoid overstating what container hardening guarantees.", - "expected_behavior": [ - "The output directly addresses the user's situation: considering production use.", - "The AI coding assistant loads the expected_skill and references/sandbox-hardening.md", - "The output helps the user see the limitations and residual risks of the hardened image with NemoClaw-specific guidance rather than generic advice.", - "The output gives enough concrete guidance, decision criteria, verification steps, or risk framing for the user to avoid overstating what container hardening guarantees.", - "The output avoids inventing unsupported NemoClaw behavior.", - "The output follows progressive disclosure: it answers the current request without dumping unrelated details other than the expected_skill and the references/sandbox-hardening.md file." - ] + "ground_truth": "A NemoClaw-specific answer that helps the user see the limitations and residual risks of the hardened image and gives enough concrete guidance, decision criteria, verification steps, or risk framing to avoid overstating what container hardening guarantees." } ] diff --git a/.agents/skills/nemoclaw-user-monitor-sandbox/evals/evals.json b/.agents/skills/nemoclaw-user-monitor-sandbox/evals/evals.json index 72aed43b0d..260e8ec64e 100644 --- a/.agents/skills/nemoclaw-user-monitor-sandbox/evals/evals.json +++ b/.agents/skills/nemoclaw-user-monitor-sandbox/evals/evals.json @@ -3,42 +3,18 @@ "id": "docs-monitoring-monitor-sandbox-activity-001", "question": "I'm monitoring sandbox activity. Help me understand what the agent and sandbox are doing now so I can detect unhealthy or unexpected behavior early.", "expected_skill": "nemoclaw-user-monitor-sandbox", - "ground_truth": "A NemoClaw-specific answer that helps the user understand what the agent and sandbox are doing now and gives enough concrete guidance, decision criteria, verification steps, or risk framing to detect unhealthy or unexpected behavior early.", - "expected_behavior": [ - "The output directly addresses the user's situation: monitoring sandbox activity.", - "The AI coding assistant loads the expected_skill and SKILL.md", - "The output helps the user understand what the agent and sandbox are doing now with NemoClaw-specific guidance rather than generic advice.", - "The output gives enough concrete guidance, decision criteria, verification steps, or risk framing for the user to detect unhealthy or unexpected behavior early.", - "The output avoids inventing unsupported NemoClaw behavior.", - "The output follows progressive disclosure: it answers the current request without dumping unrelated details other than the expected_skill and the SKILL.md file." - ] + "ground_truth": "A NemoClaw-specific answer that helps the user understand what the agent and sandbox are doing now and gives enough concrete guidance, decision criteria, verification steps, or risk framing to detect unhealthy or unexpected behavior early." }, { "id": "docs-monitoring-monitor-sandbox-activity-002", "question": "I'm diagnosing a runtime failure. Help me use health, logs, and traces to locate the failing layer so I can separate host, gateway, sandbox, policy, and inference issues.", "expected_skill": "nemoclaw-user-monitor-sandbox", - "ground_truth": "A NemoClaw-specific answer that helps the user use health, logs, and traces to locate the failing layer and gives enough concrete guidance, decision criteria, verification steps, or risk framing to separate host, gateway, sandbox, policy, and inference issues.", - "expected_behavior": [ - "The output directly addresses the user's situation: diagnosing a runtime failure.", - "The AI coding assistant loads the expected_skill and SKILL.md", - "The output helps the user use health, logs, and traces to locate the failing layer with NemoClaw-specific guidance rather than generic advice.", - "The output gives enough concrete guidance, decision criteria, verification steps, or risk framing for the user to separate host, gateway, sandbox, policy, and inference issues.", - "The output avoids inventing unsupported NemoClaw behavior.", - "The output follows progressive disclosure: it answers the current request without dumping unrelated details other than the expected_skill and the SKILL.md file." - ] + "ground_truth": "A NemoClaw-specific answer that helps the user use health, logs, and traces to locate the failing layer and gives enough concrete guidance, decision criteria, verification steps, or risk framing to separate host, gateway, sandbox, policy, and inference issues." }, { "id": "docs-monitoring-monitor-sandbox-activity-003", "question": "I'm collecting debugging evidence. Help me gather enough information without weakening controls so I can investigate safely and share useful diagnostics.", "expected_skill": "nemoclaw-user-monitor-sandbox", - "ground_truth": "A NemoClaw-specific answer that helps the user gather enough information without weakening controls and gives enough concrete guidance, decision criteria, verification steps, or risk framing to investigate safely and share useful diagnostics.", - "expected_behavior": [ - "The output directly addresses the user's situation: collecting debugging evidence.", - "The AI coding assistant loads the expected_skill and SKILL.md", - "The output helps the user gather enough information without weakening controls with NemoClaw-specific guidance rather than generic advice.", - "The output gives enough concrete guidance, decision criteria, verification steps, or risk framing for the user to investigate safely and share useful diagnostics.", - "The output avoids inventing unsupported NemoClaw behavior.", - "The output follows progressive disclosure: it answers the current request without dumping unrelated details other than the expected_skill and the SKILL.md file." - ] + "ground_truth": "A NemoClaw-specific answer that helps the user gather enough information without weakening controls and gives enough concrete guidance, decision criteria, verification steps, or risk framing to investigate safely and share useful diagnostics." } ] diff --git a/skills/nemoclaw-user-deploy-remote/BENCHMARK.md b/skills/nemoclaw-user-deploy-remote/BENCHMARK.md new file mode 100644 index 0000000000..82914876a1 --- /dev/null +++ b/skills/nemoclaw-user-deploy-remote/BENCHMARK.md @@ -0,0 +1,70 @@ +# Evaluation Report + +Evaluation of the `nemoclaw-user-deploy-remote` skill before publication through NVSkills-Eval. + +This benchmark summarizes 3-Tier Evaluation from NVSkills-Eval results for the skill. The goal is to document whether the skill is safe, discoverable, effective, and useful for agents before it is published for broader workflow use. + +## Evaluation Summary + +- Skill: `nemoclaw-user-deploy-remote` +- Evaluation date: 2026-05-28 +- NVSkills-Eval profile: `external` +- Overall verdict: FAIL +- Tier 3 live agent evaluation: not available in this report + +## Agents Used + +- Tier 3 agent details were not available in this report. + +## Metrics Used + +Reported benchmark dimensions: + +- Security: checks whether skill-assisted execution avoids unsafe behavior such as secret leakage, destructive commands, or unauthorized access. +- Correctness: checks whether the agent follows the expected workflow and produces the correct final output. +- Discoverability: checks whether the agent loads the skill when relevant and avoids using it when irrelevant. +- Effectiveness: checks whether the agent performs measurably better with the skill than without it. +- Efficiency: checks whether the agent uses fewer tokens and avoids redundant work. + +Underlying evaluation signals used in this run: + +- No Tier 3 evaluation signal details were available in this report. + +## Test Tasks + +Tier 3 evaluation task details were not available in this report. + +## Results + +Tier 3 dimension rollup was not available in this report. + +## Tier 1: Static Validation Summary + +Tier 1 validation passed with observations. NVSkills-Eval ran 9 checks and found 13 total findings. + +Top findings: + +- MEDIUM QUALITY/quality_correctness: SKILL_SPEC recommended field missing: 'metadata.author' (`skills/nemoclaw-user-deploy-remote/SKILL.md`) +- MEDIUM QUALITY/quality_correctness: SKILL_SPEC recommended field missing: 'metadata.tags' (`skills/nemoclaw-user-deploy-remote/SKILL.md`) +- MEDIUM QUALITY/quality_efficiency: Deeply nested references in brev-web-ui.md (`skills/nemoclaw-user-deploy-remote/SKILL.md`) +- MEDIUM SCHEMA/body_recommended_section: Missing recommended section: '## Instructions' (`skills/nemoclaw-user-deploy-remote/SKILL.md`) +- MEDIUM SCHEMA/body_recommended_section: Missing recommended section: '## Examples' (`skills/nemoclaw-user-deploy-remote/SKILL.md`) + +## Tier 2: Deduplication Summary + +Tier 2 validation reported findings. NVSkills-Eval ran 2 checks and found 2 total findings. + +Top findings: + +- HIGH DUPLICATE/duplicate: Duplicate content found within references/install-openclaw-plugins.md: + "## Network Access" in references/install-openclaw-plugins.md (lines 64-73) + vs "## Next Steps" in references/install-openclaw-plugins.md (lines 86-93) (`references/install-openclaw-plugins.md:64`) +- HIGH DUPLICATE/duplicate: Duplicate content found across SKILL.md and references/brev-web-ui.md and references/install-openclaw-plugins.md and references/sandbox-hardening.md: + "(preamble)" in SKILL.md (lines 1-3) + vs "(preamble)" in references/brev-web-ui.md (lines 1-2) + vs "(preamble)" in references/install-openclaw-plugins.md (lines 1-2) + vs "(preamble)" in references/sandbox-hardening.md (lines 1-2) (`SKILL.md:1`) + +## Publication Recommendation + +The skill should be reviewed before NVSkills-Eval publication. Skill owners should address the findings above and rerun NVSkills-Eval to refresh this benchmark. diff --git a/skills/nemoclaw-user-deploy-remote/SKILL.md b/skills/nemoclaw-user-deploy-remote/SKILL.md new file mode 100644 index 0000000000..d2ac40e834 --- /dev/null +++ b/skills/nemoclaw-user-deploy-remote/SKILL.md @@ -0,0 +1,177 @@ +--- +name: "nemoclaw-user-deploy-remote" +description: "Explains how to run NemoClaw on a remote GPU instance, including the deprecated Brev compatibility path and the preferred installer plus onboard flow. Use when deploying NemoClaw to a remote VM, onboarding a Brev instance, or migrating away from the legacy `nemoclaw deploy` wrapper. Trigger keywords - deploy nemoclaw remote gpu, nemoclaw brev cloud deployment, nemoclaw plugins, openclaw plugins, install openclaw plugin, nemoclaw onboard from dockerfile, nemoclaw brev web ui, nemoclaw getting started, brev quickstart, nvidia nemotron agent, nemoclaw sandbox hardening, container security, docker capabilities, process limits." +license: "Apache-2.0" +--- + + + + +# Deploy NemoClaw to a Remote GPU Instance + +## Gotchas + +- The `nemoclaw deploy` command is deprecated. +- On Brev, set `CHAT_UI_URL` in the launchable environment configuration so it is available when the installer builds the sandbox image. + +## Prerequisites + +- The [Brev CLI](https://brev.nvidia.com) installed and authenticated. +- A provider credential for the inference backend you want to use during onboarding. +- `HF_TOKEN` or `HUGGING_FACE_HUB_TOKEN` exported when your remote vLLM or Hugging Face workflow needs access to gated models. +- NemoClaw installed locally if you plan to use the deprecated `nemoclaw deploy` wrapper. Otherwise, install NemoClaw directly on the remote host after provisioning it. + +Run NemoClaw on a remote GPU instance through [Brev](https://brev.nvidia.com). +The preferred path is to provision the VM, run the standard NemoClaw installer on that host, and then run `nemoclaw onboard`. + +## Quick Start + +If your Brev instance is already up and has already been onboarded with a sandbox, start with the standard sandbox chat flow: + +```console +$ nemoclaw my-assistant connect +$ openclaw tui +``` + +This gets you into the sandbox shell first and opens the OpenClaw chat UI right away. +If the VM is fresh, run the standard installer on that host and then run `nemoclaw onboard` before trying `nemoclaw my-assistant connect`. + +If you are connecting from your local machine and still need to provision the remote VM, you can still use `nemoclaw deploy ` as the legacy compatibility path described below. + +## Deploy the Instance + +**Warning:** + +The `nemoclaw deploy` command is deprecated. +Prefer provisioning the remote host separately, then running the standard NemoClaw installer and `nemoclaw onboard` on that host. + +Create a Brev instance and run the legacy compatibility flow: + +```console +$ nemoclaw deploy +``` + +Replace `` with a name for your remote instance, for example `my-gpu-box`. +The sandbox created on the remote VM uses `NEMOCLAW_SANDBOX_NAME`, or `my-assistant` when the variable is unset. +Sandbox names must be lowercase, start with a letter, contain only letters, numbers, and internal hyphens, and end with a letter or number. +The deploy wrapper validates the sandbox name before it provisions the Brev instance, opens SSH, or starts the remote installer. + +The legacy compatibility flow performs the following steps on the VM: + +1. Installs Docker and the NVIDIA Container Toolkit if a GPU is present. +2. Installs the OpenShell CLI. +3. Runs `nemoclaw onboard` (the setup wizard) to create the gateway, register providers, and launch the sandbox. +4. Starts optional host auxiliary services (for example the cloudflared tunnel) when `cloudflared` is available. Channel messaging is configured during onboarding and runs through OpenShell-managed processes, not through `nemoclaw tunnel start`. + +By default, the compatibility wrapper asks Brev to provision on `gcp`. Override this with `NEMOCLAW_BREV_PROVIDER` if you need a different Brev cloud provider. +If you export `HF_TOKEN` or `HUGGING_FACE_HUB_TOKEN`, the wrapper forwards those values to the VM so remote setup can pull gated Hugging Face model repositories. + +## Connect to the Remote Sandbox + +After deployment finishes, the deploy command opens an interactive shell inside the remote sandbox. +To reconnect after closing the session, run the command again: + +```console +$ nemoclaw deploy +``` + +## Monitor the Remote Sandbox + +SSH to the instance and run the OpenShell TUI to monitor activity and approve network requests: + +```console +$ ssh 'cd ~/nemoclaw && set -a && . .env && set +a && openshell term' +``` + +## Verify Inference + +Run a test agent prompt inside the remote sandbox: + +```console +$ openclaw agent --agent main -m "Hello from the remote sandbox" --session-id test +``` + +## Remote Dashboard Access + +The NemoClaw dashboard validates the browser origin against an allowlist baked +into the sandbox image at build time. By default the allowlist only contains +`http://127.0.0.1:18789`. When accessing the dashboard from a remote browser +(for example through a Brev public URL or an SSH port-forward), set +`CHAT_UI_URL` to the origin the browser will use **before** running setup: + +```console +$ export CHAT_UI_URL="https://openclaw0-.brevlab.com" +$ nemoclaw deploy +``` + +For SSH port-forwarding, the origin is typically `http://127.0.0.1:18789` (the +default), so no extra configuration is needed. + +**Warning:** + +On Brev, set `CHAT_UI_URL` in the launchable environment configuration so it is +available when the installer builds the sandbox image. If `CHAT_UI_URL` is not +set on a headless host, the compatibility wrapper prints a warning. + +`NEMOCLAW_DISABLE_DEVICE_AUTH` is also evaluated at image build time. +When `CHAT_UI_URL` points at a non-loopback origin, NemoClaw disables OpenClaw device pairing in the generated sandbox configuration because browser-only remote users cannot complete terminal-based pairing. +Any device that can reach the configured dashboard origin can connect without pairing, so avoid exposing that origin on internet-reachable or shared-network deployments. + +## First-Run Readiness Budget + +On a remote GPU host, the first `nemoclaw onboard` typically does the slowest work of the lifecycle: the sandbox image is built locally and uploaded into the OpenShell gateway, which can stream hundreds of MiB over the VM's link before the readiness wait even starts. +The post-create readiness wait defaults to 180 seconds (`NEMOCLAW_SANDBOX_READY_TIMEOUT`), which is sized for warm-cache, workstation-class onboarding and can be exceeded on: + +- DGX Station first runs with large quantised models (70B+ parameter footprints, NVFP4 weights). +- Cloud VMs where the local image-build cache is cold and the upload runs over the public network. +- Hosts onboarding the Brave Web Search preset on the first run (the egress policy stack adds boot work). + +Raise the budget before re-running onboard: + +```console +$ export NEMOCLAW_SANDBOX_READY_TIMEOUT=600 +$ nemoclaw onboard +``` + +If onboard ends with `Sandbox '' was created but did not become ready within 180s`, onboard deletes the partially-created sandbox first, so the next attempt with the raised budget starts from a clean state. +For the inference-probe budget that runs earlier in onboarding, see `NEMOCLAW_LOCAL_INFERENCE_TIMEOUT` (use the `nemoclaw-user-configure-inference` skill). + +## Proxy Configuration + +NemoClaw routes sandbox traffic through a gateway proxy that defaults to `10.200.0.1:3128`. +If your network requires a different proxy, set `NEMOCLAW_PROXY_HOST` and `NEMOCLAW_PROXY_PORT` before onboarding: + +```console +$ export NEMOCLAW_PROXY_HOST=proxy.example.com +$ export NEMOCLAW_PROXY_PORT=8080 +$ nemoclaw onboard +``` + +These values are baked into the sandbox image at build time. +They are also forwarded into the runtime container during sandbox creation, so `/tmp/nemoclaw-proxy-env.sh` uses the same host and port that the image build used. +Only alphanumeric characters, dots, hyphens, and colons are accepted for the host. +The port must be numeric (0-65535). +Changing the proxy after onboarding requires re-running `nemoclaw onboard`. + +## GPU Configuration + +The deploy script uses the `NEMOCLAW_GPU` environment variable to select the GPU type. +The default value is `a2-highgpu-1g:nvidia-tesla-a100:1`. +Set this variable before running `nemoclaw deploy` to use a different GPU configuration: + +```console +$ export NEMOCLAW_GPU="a2-highgpu-1g:nvidia-tesla-a100:2" +$ nemoclaw deploy +``` + +## References + +- **Load [references/install-openclaw-plugins.md](references/install-openclaw-plugins.md)** when users ask how to install, build, or configure OpenClaw plugins under NemoClaw. Explains the difference between OpenClaw plugins and agent skills, and shows the current Dockerfile-based workflow for baking a plugin into a NemoClaw sandbox. +- **Load [references/brev-web-ui.md](references/brev-web-ui.md)** when a user wants to try NemoClaw without installing the CLI, or asks how to get started on Brev. Guides users through deploying NemoClaw with the Brev web UI. +- **Load [references/sandbox-hardening.md](references/sandbox-hardening.md)** when reviewing sandbox image security controls, auditing capability drops, or looking up the runtime resource limits. Includes the sandbox container image hardening reference, covering Docker capabilities and process limits. + +## Related Skills + +- `nemoclaw-user-manage-sandboxes` — Set Up Messaging Channels (use the `nemoclaw-user-manage-sandboxes` skill) to connect Telegram, Discord, or Slack through OpenShell-managed channel messaging +- `nemoclaw-user-monitor-sandbox` — Monitor Sandbox Activity (use the `nemoclaw-user-monitor-sandbox` skill) for sandbox monitoring tools +- `nemoclaw-user-reference` — Commands (use the `nemoclaw-user-reference` skill) for the full `deploy` command reference diff --git a/skills/nemoclaw-user-deploy-remote/evals/evals.json b/skills/nemoclaw-user-deploy-remote/evals/evals.json new file mode 100644 index 0000000000..41af478f9e --- /dev/null +++ b/skills/nemoclaw-user-deploy-remote/evals/evals.json @@ -0,0 +1,74 @@ +[ + { + "id": "docs-deployment-deploy-to-remote-gpu-001", + "question": "I'm deploying NemoClaw to a remote GPU instance. Help me move the sandboxed assistant off my local machine so I can support persistent or GPU-backed operation.", + "expected_skill": "nemoclaw-user-deploy-remote", + "ground_truth": "A NemoClaw-specific answer that helps the user move the sandboxed assistant off my local machine and gives enough concrete guidance, decision criteria, verification steps, or risk framing to support persistent or GPU-backed operation." + }, + { + "id": "docs-deployment-deploy-to-remote-gpu-002", + "question": "I'm using the legacy Brev compatibility flow. Help me understand what the flow still does and where it is deprecated so I can avoid depending on an outdated path blindly.", + "expected_skill": "nemoclaw-user-deploy-remote", + "ground_truth": "A NemoClaw-specific answer that helps the user understand what the flow still does and where it is deprecated and gives enough concrete guidance, decision criteria, verification steps, or risk framing to avoid depending on an outdated path blindly." + }, + { + "id": "docs-deployment-deploy-to-remote-gpu-003", + "question": "I'm after remote deployment succeeds. Help me find the connection, operation, and recovery details so I can operate the sandbox after initial setup.", + "expected_skill": "nemoclaw-user-deploy-remote", + "ground_truth": "A NemoClaw-specific answer that helps the user find the connection, operation, and recovery details and gives enough concrete guidance, decision criteria, verification steps, or risk framing to operate the sandbox after initial setup." + }, + { + "id": "docs-deployment-brev-web-ui-001", + "question": "I'm launching NemoClaw from the Brev web UI. Help me avoid local CLI setup and local GPU requirements so I can start a hosted sandbox quickly.", + "expected_skill": "nemoclaw-user-deploy-remote", + "ground_truth": "A NemoClaw-specific answer that helps the user avoid local CLI setup and local GPU requirements and gives enough concrete guidance, decision criteria, verification steps, or risk framing to start a hosted sandbox quickly." + }, + { + "id": "docs-deployment-brev-web-ui-002", + "question": "I'm reviewing hosted launch choices. Help me understand each web UI option before creating the instance so I can choose settings that match my expected sandbox workflow.", + "expected_skill": "nemoclaw-user-deploy-remote", + "ground_truth": "A NemoClaw-specific answer that helps the user understand each web UI option before creating the instance and gives enough concrete guidance, decision criteria, verification steps, or risk framing to choose settings that match my expected sandbox workflow." + }, + { + "id": "docs-deployment-brev-web-ui-003", + "question": "I'm the hosted sandbox is created. Help me confirm where to connect and how to start using it so I can move from provisioning to actual agent work.", + "expected_skill": "nemoclaw-user-deploy-remote", + "ground_truth": "A NemoClaw-specific answer that helps the user confirm where to connect and how to start using it and gives enough concrete guidance, decision criteria, verification steps, or risk framing to move from provisioning to actual agent work." + }, + { + "id": "docs-deployment-install-openclaw-plugins-001", + "question": "I'm installing an OpenClaw plugin in a NemoClaw-managed sandbox. Help me add a new agent capability inside the sandbox so I can extend the assistant without weakening the host boundary.", + "expected_skill": "nemoclaw-user-deploy-remote", + "ground_truth": "A NemoClaw-specific answer that helps the user add a new agent capability inside the sandbox and gives enough concrete guidance, decision criteria, verification steps, or risk framing to extend the assistant without weakening the host boundary." + }, + { + "id": "docs-deployment-install-openclaw-plugins-002", + "question": "I'm deciding where to install a plugin. Help me distinguish host environment changes from sandbox environment changes so I can modify the right filesystem and runtime.", + "expected_skill": "nemoclaw-user-deploy-remote", + "ground_truth": "A NemoClaw-specific answer that helps the user distinguish host environment changes from sandbox environment changes and gives enough concrete guidance, decision criteria, verification steps, or risk framing to modify the right filesystem and runtime." + }, + { + "id": "docs-deployment-install-openclaw-plugins-003", + "question": "I'm verifying a plugin installation. Help me confirm the agent can discover and use the plugin so I can trust that the capability works inside NemoClaw's security model.", + "expected_skill": "nemoclaw-user-deploy-remote", + "ground_truth": "A NemoClaw-specific answer that helps the user confirm the agent can discover and use the plugin and gives enough concrete guidance, decision criteria, verification steps, or risk framing to trust that the capability works inside NemoClaw's security model." + }, + { + "id": "docs-deployment-sandbox-hardening-001", + "question": "I'm reviewing sandbox image hardening. Help me understand which container risks NemoClaw reduces so I can decide whether unattended agents are acceptable in my environment.", + "expected_skill": "nemoclaw-user-deploy-remote", + "ground_truth": "A NemoClaw-specific answer that helps the user understand which container risks NemoClaw reduces and gives enough concrete guidance, decision criteria, verification steps, or risk framing to decide whether unattended agents are acceptable in my environment." + }, + { + "id": "docs-deployment-sandbox-hardening-002", + "question": "I'm mapping NemoClaw to an organizational security baseline. Help me identify capability drops, least privilege, and runtime protections so I can document how the sandbox meets or misses required controls.", + "expected_skill": "nemoclaw-user-deploy-remote", + "ground_truth": "A NemoClaw-specific answer that helps the user identify capability drops, least privilege, and runtime protections and gives enough concrete guidance, decision criteria, verification steps, or risk framing to document how the sandbox meets or misses required controls." + }, + { + "id": "docs-deployment-sandbox-hardening-003", + "question": "I'm considering production use. Help me see the limitations and residual risks of the hardened image so I can avoid overstating what container hardening guarantees.", + "expected_skill": "nemoclaw-user-deploy-remote", + "ground_truth": "A NemoClaw-specific answer that helps the user see the limitations and residual risks of the hardened image and gives enough concrete guidance, decision criteria, verification steps, or risk framing to avoid overstating what container hardening guarantees." + } +] diff --git a/skills/nemoclaw-user-deploy-remote/references/brev-web-ui.md b/skills/nemoclaw-user-deploy-remote/references/brev-web-ui.md new file mode 100644 index 0000000000..517e14b48d --- /dev/null +++ b/skills/nemoclaw-user-deploy-remote/references/brev-web-ui.md @@ -0,0 +1,155 @@ + + +# Launch NemoClaw with the Brev Web UI + +Use the Brev web UI to launch a hosted NemoClaw sandbox from your browser. +This flow provisions a remote VM, configures inference, starts OpenClaw inside an OpenShell sandbox, and opens the OpenClaw dashboard. + +**Note:** + +Use this guide when you want to try NemoClaw without installing the CLI or using a local GPU. +If you want to manage the remote host from a terminal, see [Deploy to a Remote GPU Instance](../SKILL.md). + +## What This Flow Creates + +The Brev web flow creates the following resources: + +- A Brev-managed Linux VM. +- Docker and the OpenShell runtime on that VM. +- A NemoClaw sandbox running OpenClaw. +- Inference routing for the provider you select during setup. +- A browser-accessible OpenClaw dashboard. + +## Prerequisites + +- An NVIDIA Brev account at [brev.nvidia.com](https://brev.nvidia.com). +- An NVIDIA API key from [build.nvidia.com](https://build.nvidia.com/settings/api-keys) if you use the default NVIDIA Cloud provider. + +You do not need to install local software for this flow. + +## Get Your NVIDIA API Key + +If you already have an NVIDIA API key skip this section. Otherwise, follow these steps to generate a new key: + +1. Go to [build.nvidia.com](https://build.nvidia.com). +2. Sign in or create an account. +3. Click your profile icon in the top right. +4. Select **API Keys**. +5. Click **Generate API Key**. +6. Copy the key. It starts with `nvapi-`. + +Keep this key ready for the next step. + +## Launch NemoClaw from Brev + +Use the [NemoClaw Brev launchable](https://brev.nvidia.com/launchable/deploy/now?launchableID=env-3Azt0aYgVNFEuz7opyx3gscmowS) to launch a NemoClaw sandbox from your browser. + +1. Open the [NemoClaw Brev launchable](https://brev.nvidia.com/launchable/deploy/now?launchableID=env-3Azt0aYgVNFEuz7opyx3gscmowS) and sign in if prompted. +2. Review the instance type, cloud provider, and estimated hourly cost on the NemoClaw setup page. +3. Click **Deploy NemoClaw**. + +The right-side deployment panel shows progress while Brev deploys the CPU instance and prepares VM mode. +Keep this page open until the deployment completes. +When the panel shows the **NemoClaw** button, click it to open the agent setup page. + +## Configure Your Agent + +The setup page walks you through three stages: **Configure**, **Setup**, and **Launch**. + +### Configure + +The Configure stage opens the **Connect to AI** screen. +Use the NVIDIA Cloud provider shown on this screen. + +1. Leave **NVIDIA Cloud** selected. +2. Paste your `nvapi-` API key. +3. Click **Create Agent**. + +**Note:** + +The **Show Other Providers** dropdown appears below the **NVIDIA Cloud** card and can be easy to miss. +Click it to expand the provider list. +The expanded list includes **OpenAI**, **Anthropic**, and **Google Gemini**. +For these providers, get the API key from the provider's own console before you create the agent. + +### Setup + +NemoClaw configures the remote host and sandbox automatically. +This stage usually takes about 5 minutes. + +During setup, NemoClaw installs the runtime, prepares the sandboxed agent environment, and configures inference routing for the provider you selected. + +### Launch + +When setup finishes, Brev shows the following confirmation: + +```text +AGENT CREATED SUCCESSFULLY +Your agent is running in a secure sandbox and ready to use. + +Agent: agent +Model: nemotron-3-super-120b +Provider: NVIDIA Cloud +``` + +Click **Chat With Agent** to open the OpenClaw dashboard. + +**Note:** + +The dashboard might initially show a **Pairing required** warning. +This means the gateway is still completing pairing in the background. +Wait for about a few minutes for pairing to finish automatically. Refresh the dashboard to see if the warning is resolved and the connection is established. +If pairing does not finish, go to the **Overview** page in the OpenClaw UI, find the **Gateway Access** panel, and click **Connect**. + +## Start a Chat + +Use the dashboard chat box to send your first message: + +```text +Hello! What can you do for me? What skills do you have available? +``` + +The agent reads its workspace files and introduces itself. +The starter workspace includes example skills such as: + +- **Weather** gets current weather and forecasts. +- **Healthcheck** runs security audit and hardening checks. +- **Skill-Creator** creates new custom skills. + +## Personalize Agent Memory + +The agent starts with an empty `USER.md` file. +Ask the agent to add details that help it personalize future responses. + +In the chat, type the following: + +```text +Please update my USER.md file with the following: +Name: [your name] +Timezone: [your timezone, such as "America/New_York"] +Notes: [what you are working on] +``` + +The agent writes this information to its workspace so it can use it across sessions on the same sandbox. + +## Stop Your Instance When Done + +Brev continues billing while the instance runs. +Stop the instance when you finish experimenting. + +1. Go back to [brev.nvidia.com](https://brev.nvidia.com). +2. Click **GPUs** in the nav bar. +3. Find your NemoClaw instance. +4. Click **Stop**. + +Check the Brev UI for the current hourly price before leaving the instance running. + +## Next Steps + +After your agent is running, explore these related tasks: + +- Set Up Messaging Channels (use the `nemoclaw-user-manage-sandboxes` skill) to learn how to connect Telegram, Slack, or Discord. +- Switch Inference Providers (use the `nemoclaw-user-configure-inference` skill) to learn how to change the model provider after setup. +- Monitor Sandbox Activity (use the `nemoclaw-user-monitor-sandbox` skill) to learn how to inspect sandbox health and logs. +- [Deploy to a Remote GPU Instance](../SKILL.md) to learn how to deploy NemoClaw to a remote GPU instance using the CLI. +- Troubleshooting (use the `nemoclaw-user-reference` skill) to learn how to fix common setup and runtime issues. diff --git a/skills/nemoclaw-user-deploy-remote/references/install-openclaw-plugins.md b/skills/nemoclaw-user-deploy-remote/references/install-openclaw-plugins.md new file mode 100644 index 0000000000..b4b2f5beb8 --- /dev/null +++ b/skills/nemoclaw-user-deploy-remote/references/install-openclaw-plugins.md @@ -0,0 +1,93 @@ + + +# Install OpenClaw Plugins + +OpenClaw plugins extend the OpenClaw runtime with hooks, services, tools, or +provider integrations. They are different from NemoClaw-managed agent skills: + +- **Plugins** are code packages loaded by OpenClaw. +- **Skills** are `SKILL.md` directories that teach an agent how to perform a task. +- **Policy presets** are network-egress rules that control what sandboxed code can reach. + +Today, the supported NemoClaw path for OpenClaw plugins is to bake the plugin +into a custom sandbox image and onboard from that Dockerfile. + +## Prepare a Build Directory + +Put the Dockerfile and everything it needs to `COPY` in one directory. +`nemoclaw onboard --from ` uses the Dockerfile's parent directory as +the Docker build context. + +```text +my-plugin-sandbox/ +├── Dockerfile +└── my-plugin/ + ├── package.json + └── src/ +``` + +## Example Dockerfile + +Use the custom image to copy the plugin into the OpenClaw extensions directory +and let OpenClaw refresh its config before NemoClaw starts the sandbox. + +```dockerfile +ARG SANDBOX_BASE=ghcr.io/nvidia/nemoclaw/sandbox-base:latest +FROM ${SANDBOX_BASE} + +COPY my-plugin/ /opt/my-plugin/ +WORKDIR /opt/my-plugin +RUN npm ci --no-audit --no-fund && npm run build + +RUN mkdir -p /sandbox/.openclaw/extensions \ + && cp -a /opt/my-plugin /sandbox/.openclaw/extensions/my-plugin \ + && openclaw doctor --fix + +WORKDIR /opt/nemoclaw +``` + +If the plugin needs configuration in `openclaw.json`, apply it after +`openclaw doctor --fix` so the base config exists first. + +## Create the Sandbox + +Point `nemoclaw onboard --from` at the Dockerfile in the build directory. + +```console +$ nemoclaw onboard --from ./my-plugin-sandbox/Dockerfile +``` + +If you need a second sandbox alongside an existing one, use a dedicated build +directory and rerun onboarding with the sandbox name and ports you intend to +use. + +## Network Access + +Plugins still run inside the sandbox policy boundary. If a plugin needs network +egress, add or update a policy preset for the required hostnames and binaries +before rebuilding the sandbox. + +For example, see Network Policies (use the `nemoclaw-user-reference` skill) for +policy concepts and Customize Network Policy (use the `nemoclaw-user-manage-policy` skill) +for custom preset workflows. + +## Common Mistakes + +These are the most common places where plugin installation gets mixed up with +other NemoClaw extension paths. + +- Do not use `nemoclaw skill install` for OpenClaw plugins. That + command only installs `SKILL.md` agent skills. +- Do not put a Dockerfile in a broad directory such as `/tmp` unless you intend + to send that whole directory as the Docker build context. +- Keep plugin dependencies in the build stage or plugin directory; avoid copying + unrelated host files into the sandbox image. + +## Next Steps + +- Review [Sandbox Hardening](sandbox-hardening.md) before adding plugin code to a + shared or long-lived sandbox. +- Review Network Policies (use the `nemoclaw-user-reference` skill) to plan plugin + egress rules. +- Follow Customize Network Policy (use the `nemoclaw-user-manage-policy` skill) + if the plugin needs a custom preset. diff --git a/skills/nemoclaw-user-deploy-remote/references/sandbox-hardening.md b/skills/nemoclaw-user-deploy-remote/references/sandbox-hardening.md new file mode 100644 index 0000000000..669096f180 --- /dev/null +++ b/skills/nemoclaw-user-deploy-remote/references/sandbox-hardening.md @@ -0,0 +1,127 @@ + + +# Sandbox Image Hardening + +The NemoClaw sandbox image applies several security measures to reduce attack +surface and limit the blast radius of untrusted workloads. + +## Removed Unnecessary Tools + +Build toolchains (`gcc`, `g++`, `make`) and network probes (`netcat`) are +explicitly purged from the runtime image. These tools are not needed at runtime +and would unnecessarily widen the attack surface. + +The runtime image keeps a small set of operational utilities for normal sandbox +workflows, including `vi`, `jq`, and `dos2unix`. Use these for lightweight +inspection and file cleanup inside the sandbox, but make durable image or policy +changes in the NemoClaw source tree and rebuild the sandbox. + +If you need a compiler during build, use the existing multi-stage build +(the `builder` stage has full Node.js tooling) and copy only artifacts into the +runtime stage. + +## Process Limits + +The container ENTRYPOINT sets `ulimit -u 512` to cap the number of processes +a sandbox user can spawn. This mitigates fork-bomb attacks. The startup script +(`nemoclaw-start.sh`) applies the same limit. + +Adjust the value via the `--ulimit nproc=512:512` flag if launching with +`docker run` directly. + +## Dropping Linux Capabilities + +The NemoClaw entrypoint drops dangerous capabilities from the process bounding +set before it starts agent services. +It removes `CAP_SYS_ADMIN`, `CAP_SYS_PTRACE`, `CAP_NET_RAW`, +`CAP_DAC_OVERRIDE`, `CAP_SYS_CHROOT`, `CAP_FSETID`, `CAP_SETFCAP`, +`CAP_MKNOD`, `CAP_AUDIT_WRITE`, and `CAP_NET_BIND_SERVICE`. +When `setpriv` is available, the entrypoint also removes the remaining +privilege-separation capabilities during the switch from root to the +`sandbox` and `gateway` users. + +For defense-in-depth, also drop all Linux capabilities at the container runtime +when you launch the image directly: + +```console +$ docker run --rm \ + --cap-drop=ALL \ + --ulimit nproc=512:512 \ + nemoclaw-sandbox +``` + +### Docker Compose Example + +```yaml +services: + nemoclaw-sandbox: + image: nemoclaw-sandbox:latest + cap_drop: + - ALL + cap_add: + - NET_BIND_SERVICE + ulimits: + nproc: + soft: 512 + hard: 512 + security_opt: + - no-new-privileges:true + read_only: true + tmpfs: + - /tmp:size=64m +``` + +> **Note:** The `Dockerfile` itself cannot enforce `--cap-drop`. That is a +> runtime concern controlled by the container orchestrator. Always configure +> capability dropping in your `docker run` flags, Compose file, or Kubernetes +> `securityContext`. + +## Filesystem Layout + +The sandbox Landlock policy declares which paths are writable. +The agent's home directory (`/sandbox`) is writable by default: + +| Path | Access | Purpose | +|------|--------|---------| +| `/sandbox` | read-write | Home directory — agents can create files and use standard home paths | +| `/sandbox/.openclaw` | read-write | Agent config, state, workspace, plugins | +| `/sandbox/.nemoclaw` | read-write | Plugin state and config; blueprints within are DAC-protected (root-owned) | +| `/tmp` | read-write | Temporary files and logs | + +This writable default is intentional. +Seeing the sandbox user create files under `/sandbox` or `/sandbox/.openclaw` in a fresh sandbox does not mean Landlock failed. +Landlock still enforces the fixed read-only system paths below. + +System paths remain read-only to prevent agents from: + +- Replacing system binaries with trojanized versions +- Modifying DNS resolution or TLS trust stores +- Tampering with libraries or shell configuration outside `/sandbox` + +The image build pre-creates locked shell init files `.bashrc` and `.profile` without proxy entries. +Runtime proxy configuration is sourced from system-wide shell hooks that read `/tmp/nemoclaw-proxy-env.sh`. + +### Landlock Kernel Requirements + +Landlock LSM requires Linux kernel 5.13 or later with `CONFIG_SECURITY_LANDLOCK=y`. +The NemoClaw sandbox policy uses `compatibility: best_effort`, which means Landlock enforcement is silently skipped on kernels that do not support it. + +On such kernels, protection falls back to DAC (file ownership and permissions) only. +Files outside the writable paths would be inaccessible to the agent regardless of DAC permissions. + +Operators should verify Landlock availability: + +```console +$ ls /sys/kernel/security/landlock +``` + +For production deployments, kernel 5.13+ with Landlock enabled is strongly recommended. +The `test/e2e/e2e-cloud-experimental/checks/04-landlock-readonly.sh` script validates enforcement at runtime. + +## References + +- [#804](https://github.com/NVIDIA/NemoClaw/issues/804): Filesystem layout and Landlock policy +- [#807](https://github.com/NVIDIA/NemoClaw/issues/807): gcc in sandbox image +- [#808](https://github.com/NVIDIA/NemoClaw/issues/808): netcat in sandbox image +- [#809](https://github.com/NVIDIA/NemoClaw/issues/809): No process limit +- [#797](https://github.com/NVIDIA/NemoClaw/issues/797): Drop Linux capabilities diff --git a/skills/nemoclaw-user-deploy-remote/skill-card.md b/skills/nemoclaw-user-deploy-remote/skill-card.md new file mode 100644 index 0000000000..796f23bb77 --- /dev/null +++ b/skills/nemoclaw-user-deploy-remote/skill-card.md @@ -0,0 +1,50 @@ +## Description:
+Explains how to run NemoClaw on a remote GPU instance, including the deprecated Brev compatibility path and the preferred installer plus onboard flow.
+ +This skill is ready for commercial/non-commercial use.
+ +## Owner +NVIDIA
+ +### License/Terms of Use:
+Apache 2.0
+## Use Case:
+Developers and engineers deploying NemoClaw to remote GPU instances using Brev or other cloud VMs for always-on AI assistant workloads.
+ +### Deployment Geography for Use:
+Global
+ +## Known Risks and Mitigations:
+Risk: Review before execution as proposals could introduce incorrect or misleading guidance into skills.
+Mitigation: Review and scan skill before deployment.
+ +## Reference(s):
+- [Install OpenClaw Plugins](references/install-openclaw-plugins.md)
+- [Launch NemoClaw with the Brev Web UI](references/brev-web-ui.md)
+- [Sandbox Hardening](references/sandbox-hardening.md)
+ + +## Skill Output:
+**Output Type(s):** [Shell commands, Configuration instructions]
+**Output Format:** [Markdown with inline bash code blocks]
+**Output Parameters:** [1D]
+**Other Properties Related to Output:** [None]
+ +## Evaluation Metrics Used:
+Reported benchmark dimensions:
+- Security: Checks whether skill-assisted execution avoids unsafe behavior such as secret leakage, destructive commands, or unauthorized access.
+- Correctness: Checks whether the agent follows the expected workflow and produces the correct final output.
+- Discoverability: Checks whether the agent loads the skill when relevant and avoids using it when irrelevant.
+- Effectiveness: Checks whether the agent performs measurably better with the skill than without it.
+- Efficiency: Checks whether the agent uses fewer tokens and avoids redundant work.
+ + + +## Skill Version(s):
+0.1.0 (source: package.json)
+ +## Ethical Considerations:
+NVIDIA believes Trustworthy AI is a shared responsibility and we have established policies and practices to enable development for a wide array of AI applications. When downloaded or used in accordance with our terms of service, developers should work with their internal team to ensure this skill meets requirements for the relevant industry and use case and addresses unforeseen product misuse.
+ +(For Release on NVIDIA Platforms Only)
+Please report quality, risk, security vulnerabilities or NVIDIA AI Concerns [here](https://app.intigriti.com/programs/nvidia/nvidiavdp/detail).
diff --git a/skills/nemoclaw-user-deploy-remote/skill.oms.sig b/skills/nemoclaw-user-deploy-remote/skill.oms.sig new file mode 100644 index 0000000000..21d3848ad1 --- /dev/null +++ b/skills/nemoclaw-user-deploy-remote/skill.oms.sig @@ -0,0 +1 @@ +{"mediaType":"application/vnd.dev.sigstore.bundle.v0.3+json","verificationMaterial":{"x509CertificateChain":{"certificates":[{"rawBytes":"MIICgzCCAgmgAwIBAgIUKIyS7SxNteQIiWzK1dWj85E6520wCgYIKoZIzj0EAwMwVTELMAkGA1UEBhMCVVMxGzAZBgNVBAoMEk5WSURJQSBDb3Jwb3JhdGlvbjEpMCcGA1UEAwwgTlZJRElBIEFnZW50IENhcGFiaWxpdGllcyBJQ0EgMDEwHhcNMjYwNDAxMDAwMDAwWhcNMjgwNDIyMTUzMzA5WjBUMQswCQYDVQQGEwJVUzEbMBkGA1UECgwSTlZJRElBIENvcnBvcmF0aW9uMSgwJgYDVQQDDB9OVklESUEgQWdlbnQgU2tpbGxzIFNpZ25pbmcgMDAxMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEYoRM9bQl/dGlwSRNi6bTpIJUXH8Nv9GciP6LSflJYYMLCc296kpyuTSsk5ddbAWiDcFX3C/ydX3jwc+qCLYP6uHy9XphyLjOQ27Yb2J6rBLVtRBS1mgGco/Gr7fL6ODco4GaMIGXMB0GA1UdDgQWBBRQ/5ZW3nJ6lmo9SVk7I15o7UGmpTAfBgNVHSMEGDAWgBRPGpILxMBBleJSsBGjrMKsby1CgjAMBgNVHRMBAf8EAjAAMA4GA1UdDwEB/wQEAwIHgDA3BggrBgEFBQcBAQQrMCkwJwYIKwYBBQUHMAGGG2h0dHA6Ly9vY3NwLm5kaXMubnZpZGlhLmNvbTAKBggqhkjOPQQDAwNoADBlAjAUygu/GiOCIXrgGr4SmLgeEVDcEitfFUv7ALbvLVGVyMysB3mxmO/uInZfXzWcJZsCMQDxuoxj4ZmO30jhkPIcCxGFCOvnUsnfU3TfGcouYm4M6iRpbKvtVnHPiy4bi6pcKf0="},{"rawBytes":"MIICiDCCAg6gAwIBAgIUZsIuSv9NkpJCNqtYEfCouVv5BzowCgYIKoZIzj0EAwMwUTELMAkGA1UEBhMCVVMxGzAZBgNVBAoMEk5WSURJQSBDb3Jwb3JhdGlvbjElMCMGA1UEAwwcTlZJRElBIEFnZW50IENhcGFiaWxpdGllcyBDQTAgFw0yNjA0MDEwMDAwMDBaGA85OTk5MTIzMTIzNTk1OVowVTELMAkGA1UEBhMCVVMxGzAZBgNVBAoMEk5WSURJQSBDb3Jwb3JhdGlvbjEpMCcGA1UEAwwgTlZJRElBIEFnZW50IENhcGFiaWxpdGllcyBJQ0EgMDEwdjAQBgcqhkjOPQIBBgUrgQQAIgNiAASI72cR3ctKGg4VWnB3bNja6g1Z2PnOmFEopkPof+QeIcPk9rT+g9MjJnq51EQXL93a7C2GJ9J985G4o2V85VD7wJ1RaXhluHW2rf3y8bQGeAYaKMr5s/hUgn+M3/9WlWejgaAwgZ0wHQYDVR0OBBYEFE8akgvEwEGV4lKwEaOswqxvLUKCMB8GA1UdIwQYMBaAFItnoAjjfuCEUvzyvWyI2vOGvwPjMBIGA1UdEwEB/wQIMAYBAf8CAQAwDgYDVR0PAQH/BAQDAgEGMDcGCCsGAQUFBwEBBCswKTAnBggrBgEFBQcwAYYbaHR0cDovL29jc3AubmRpcy5udmlkaWEuY29tMAoGCCqGSM49BAMDA2gAMGUCMQCeIMMfAbyzPDacw2MxG+Yt1cikrJX/DVxiGfXuHmkkXn6VgSzE79+lkqDErpVO2gYCMCNEColOyvUvkzZGUEI1hQ3PfMgi3FIo9tHoBKMw4/wGBLFpu/0ubtmbBXM6/UMOEw=="},{"rawBytes":"MIICRTCCAcygAwIBAgIUeJdY3rV86EdvFmG7L8LJBsyQFYkwCgYIKoZIzj0EAwMwUTELMAkGA1UEBhMCVVMxGzAZBgNVBAoMEk5WSURJQSBDb3Jwb3JhdGlvbjElMCMGA1UEAwwcTlZJRElBIEFnZW50IENhcGFiaWxpdGllcyBDQTAgFw0yNjA0MDEwMDAwMDBaGA85OTk5MTIzMTIzNTk1OVowUTELMAkGA1UEBhMCVVMxGzAZBgNVBAoMEk5WSURJQSBDb3Jwb3JhdGlvbjElMCMGA1UEAwwcTlZJRElBIEFnZW50IENhcGFiaWxpdGllcyBDQTB2MBAGByqGSM49AgEGBSuBBAAiA2IABAYpiXCDjJ9NT2eSDhyHJVSw1Tbze18cGG2F/578oWvHxg23eQAhNRYdq88i1iOshZSO6C29doKui5Xpmo/7Ctw9Sx4PP2RzOmIuOLCuTdNtKcTRwi4GEsd5BAFvWj42M6NjMGEwHQYDVR0OBBYEFItnoAjjfuCEUvzyvWyI2vOGvwPjMB8GA1UdIwQYMBaAFItnoAjjfuCEUvzyvWyI2vOGvwPjMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgEGMAoGCCqGSM49BAMDA2cAMGQCMCwtAjWLaNwgGWNCgdyNoTyvNhqWRECRJV2r3+7w8g0PL6NHLOsbkgE09BH95h8XlgIwTaQmbbUh2ChAJ5TA1wRiVDnCcvbzHlZl2jM2FcwQQZlk19LOAbyGMRixbu2Ww/rj"}]},"tlogEntries":[]},"dsseEnvelope":{"payload":"ewogICJfdHlwZSI6ICJodHRwczovL2luLXRvdG8uaW8vU3RhdGVtZW50L3YxIiwKICAic3ViamVjdCI6IFsKICAgIHsKICAgICAgIm5hbWUiOiAibmVtb2NsYXctdXNlci1kZXBsb3ktcmVtb3RlIiwKICAgICAgImRpZ2VzdCI6IHsKICAgICAgICAic2hhMjU2IjogIjFhZDZhNWQzNWMwNDE5NmFkYTE5MWJjNTZmZDZhNzMwODk2ZWU1MGU5ZDlmZjdkZDQ0ZmI4Yzg1YjBjZDdiZTYiCiAgICAgIH0KICAgIH0KICBdLAogICJwcmVkaWNhdGVUeXBlIjogImh0dHBzOi8vbW9kZWxfc2lnbmluZy9zaWduYXR1cmUvdjEuMCIsCiAgInByZWRpY2F0ZSI6IHsKICAgICJzZXJpYWxpemF0aW9uIjogewogICAgICAiaGFzaF90eXBlIjogInNoYTI1NiIsCiAgICAgICJhbGxvd19zeW1saW5rcyI6IGZhbHNlLAogICAgICAibWV0aG9kIjogImZpbGVzIiwKICAgICAgImlnbm9yZV9wYXRocyI6IFsKICAgICAgICAiLmdpdGlnbm9yZSIsCiAgICAgICAgIi5naXQiLAogICAgICAgICIuZ2l0YXR0cmlidXRlcyIsCiAgICAgICAgIi5naXRodWIiCiAgICAgIF0KICAgIH0sCiAgICAicmVzb3VyY2VzIjogWwogICAgICB7CiAgICAgICAgIm5hbWUiOiAiQkVOQ0hNQVJLLm1kIiwKICAgICAgICAiYWxnb3JpdGhtIjogInNoYTI1NiIsCiAgICAgICAgImRpZ2VzdCI6ICI5MTBmMjc1NzI5M2ZlZTJmMzljY2NmN2U1OGI1NGU0MDQwYjNlYzA0MDkxYTVhZjg1ZGZiNWVkMDRhYTU0ZTdlIgogICAgICB9LAogICAgICB7CiAgICAgICAgIm5hbWUiOiAiU0tJTEwubWQiLAogICAgICAgICJhbGdvcml0aG0iOiAic2hhMjU2IiwKICAgICAgICAiZGlnZXN0IjogIjQ1MThkZDVkMjU3NzE0ZDEyYWJlZDBiZTU5ZWU2NDlkY2QwNDUyZjE2MWUzZjcxMzdhMTBlNWNiNGViZjg3MjgiCiAgICAgIH0sCiAgICAgIHsKICAgICAgICAibmFtZSI6ICJldmFscy9ldmFscy5qc29uIiwKICAgICAgICAiYWxnb3JpdGhtIjogInNoYTI1NiIsCiAgICAgICAgImRpZ2VzdCI6ICIxY2EwYThhZjZhYjgyNzRlYTgwMmY3OWQ3NzQ3NmE3MGJmYWMzMjRjNjY0N2YyYmZjNmNmZjIwNmFhMjdkNGUxIgogICAgICB9LAogICAgICB7CiAgICAgICAgIm5hbWUiOiAicmVmZXJlbmNlcy9icmV2LXdlYi11aS5tZCIsCiAgICAgICAgImFsZ29yaXRobSI6ICJzaGEyNTYiLAogICAgICAgICJkaWdlc3QiOiAiN2RlYzA0ZjViNmUxYjc5OWNlYzI5NGZkYTVmYjRiNTljOWYyOWRmZGNhYzNjNzBmMmMzZGIwNTM5MDFlYzAzOCIKICAgICAgfSwKICAgICAgewogICAgICAgICJuYW1lIjogInJlZmVyZW5jZXMvaW5zdGFsbC1vcGVuY2xhdy1wbHVnaW5zLm1kIiwKICAgICAgICAiYWxnb3JpdGhtIjogInNoYTI1NiIsCiAgICAgICAgImRpZ2VzdCI6ICJjMmExNmZlMjM0YTUzNDdmNTg2NzFiNmIyOTBhNzlmNGQ0ODI4Nzk2YzVmMmE5YTNlMWJkMGQ3YmU3YzBjYTAxIgogICAgICB9LAogICAgICB7CiAgICAgICAgIm5hbWUiOiAicmVmZXJlbmNlcy9zYW5kYm94LWhhcmRlbmluZy5tZCIsCiAgICAgICAgImFsZ29yaXRobSI6ICJzaGEyNTYiLAogICAgICAgICJkaWdlc3QiOiAiMzFkMWI1NzhkYTcxNWE0N2NhYWE4ZDA0MGFmY2IyZTU0NzY2ZWViMDQ2OGViMDllNDVkNTQ1NDYzZmFjZTQ3YyIKICAgICAgfSwKICAgICAgewogICAgICAgICJuYW1lIjogInNraWxsLWNhcmQubWQiLAogICAgICAgICJhbGdvcml0aG0iOiAic2hhMjU2IiwKICAgICAgICAiZGlnZXN0IjogIjU1MDZiMGRhMzI0ODFiMjkxMDRlMDcwZDI5MmFkMTcxNzI1NjFkYjBjOGE0MGU5YjE3MWFmNWEyMTYwNGQyMWMiCiAgICAgIH0KICAgIF0KICB9Cn0=","payloadType":"application/vnd.in-toto+json","signatures":[{"sig":"MGUCMQDhrZK1PNAefQFEPLVZHwp9U2ygWTY6H/YEHy5bKoJm7SPqt+waDMdcYiX/mLv7gNwCMHaEuQAd/zvus/5pzesukATe1cXXhdot1ykv/wddtXCzhIKNVk6QI8SJrUJpQkNNsA==","keyid":""}]}} \ No newline at end of file diff --git a/skills/nemoclaw-user-monitor-sandbox/BENCHMARK.md b/skills/nemoclaw-user-monitor-sandbox/BENCHMARK.md new file mode 100644 index 0000000000..bc06cf21cd --- /dev/null +++ b/skills/nemoclaw-user-monitor-sandbox/BENCHMARK.md @@ -0,0 +1,64 @@ +# Evaluation Report + +Evaluation of the `nemoclaw-user-monitor-sandbox` skill before publication through NVSkills-Eval. + +This benchmark summarizes 3-Tier Evaluation from NVSkills-Eval results for the skill. The goal is to document whether the skill is safe, discoverable, effective, and useful for agents before it is published for broader workflow use. + +## Evaluation Summary + +- Skill: `nemoclaw-user-monitor-sandbox` +- Evaluation date: 2026-05-28 +- NVSkills-Eval profile: `external` +- Overall verdict: PASS +- Tier 3 live agent evaluation: not available in this report + +## Agents Used + +- Tier 3 agent details were not available in this report. + +## Metrics Used + +Reported benchmark dimensions: + +- Security: checks whether skill-assisted execution avoids unsafe behavior such as secret leakage, destructive commands, or unauthorized access. +- Correctness: checks whether the agent follows the expected workflow and produces the correct final output. +- Discoverability: checks whether the agent loads the skill when relevant and avoids using it when irrelevant. +- Effectiveness: checks whether the agent performs measurably better with the skill than without it. +- Efficiency: checks whether the agent uses fewer tokens and avoids redundant work. + +Underlying evaluation signals used in this run: + +- No Tier 3 evaluation signal details were available in this report. + +## Test Tasks + +Tier 3 evaluation task details were not available in this report. + +## Results + +Tier 3 dimension rollup was not available in this report. + +## Tier 1: Static Validation Summary + +Tier 1 validation passed with observations. NVSkills-Eval ran 9 checks and found 9 total findings. + +Top findings: + +- MEDIUM QUALITY/quality_correctness: SKILL_SPEC recommended field missing: 'metadata.author' (`skills/nemoclaw-user-monitor-sandbox/SKILL.md`) +- MEDIUM QUALITY/quality_correctness: SKILL_SPEC recommended field missing: 'metadata.tags' (`skills/nemoclaw-user-monitor-sandbox/SKILL.md`) +- MEDIUM SCHEMA/body_recommended_section: Missing recommended section: '## Instructions' (`skills/nemoclaw-user-monitor-sandbox/SKILL.md`) +- MEDIUM SCHEMA/body_recommended_section: Missing recommended section: '## Examples' (`skills/nemoclaw-user-monitor-sandbox/SKILL.md`) +- MEDIUM SCHEMA/author_missing: Author not specified in metadata (`skills/nemoclaw-user-monitor-sandbox/SKILL.md`) + +## Tier 2: Deduplication Summary + +Tier 2 validation passed. NVSkills-Eval ran 2 checks and found 0 total findings. + +Notable observations: + +- Context Deduplication: Collected 1 file(s) +- Inter-Skill Deduplication: Parsed skill 'nemoclaw-user-monitor-sandbox': 234 char description + +## Publication Recommendation + +The skill is suitable to proceed toward NVSkills-Eval publication based on this benchmark. Skill owners should keep this file with the skill and refresh it when the evaluation dataset, skill behavior, or target agents materially change. diff --git a/skills/nemoclaw-user-monitor-sandbox/SKILL.md b/skills/nemoclaw-user-monitor-sandbox/SKILL.md new file mode 100644 index 0000000000..80192e7c14 --- /dev/null +++ b/skills/nemoclaw-user-monitor-sandbox/SKILL.md @@ -0,0 +1,93 @@ +--- +name: "nemoclaw-user-monitor-sandbox" +description: "Inspects sandbox health, traces agent behavior, and diagnoses problems. Use when monitoring a running sandbox, debugging agent issues, or checking sandbox logs. Trigger keywords - monitor nemoclaw sandbox, debug nemoclaw agent issues." +license: "Apache-2.0" +--- + + + + +# Monitor Sandbox Activity and Debug Issues + +## Prerequisites + +- A running NemoClaw sandbox. +- The OpenShell CLI on your `PATH`. + +Use the NemoClaw status, logs, and TUI tools together to inspect sandbox health, trace agent behavior, and diagnose problems. + +## Check Sandbox Health + +Run the status command to view the sandbox state, gateway health, and active inference configuration: + +```console +$ nemoclaw status +``` + +For local Ollama and local vLLM routes, `nemoclaw status` also probes the host-side health endpoint directly. +This catches a stopped local backend before you retry `inference.local` from inside the sandbox. + +Key fields in the output include the following: + +- Sandbox details, which show the configured model, provider, GPU mode, and applied policy presets. +- Gateway and process health, which show whether NemoClaw can still reach the OpenShell gateway and whether the in-sandbox agent process is running. +- Inference health for local Ollama and local vLLM, which shows `healthy` or `unreachable` together with the probed local URL. +- NIM status, which shows whether a NIM container is running and healthy when that path is in use. + +Run `nemoclaw status` on the host to check sandbox state. +Use `openshell sandbox list` for the underlying sandbox details. + +## View Blueprint and Sandbox Logs + +Stream the most recent log output from the blueprint runner and sandbox: + +```console +$ nemoclaw logs +``` + +To follow the log output in real time: + +```console +$ nemoclaw logs --follow +``` + +## Monitor Network Activity in the TUI + +Open the OpenShell terminal UI for a live view of sandbox network activity and egress requests: + +```console +$ openshell term +``` + +For a remote sandbox, SSH to the instance and run `openshell term` there. + +The TUI shows the following information: + +- Active network connections from the sandbox. +- Blocked egress requests awaiting operator approval. +- Inference routing status. + +Refer to Approve or Deny Agent Network Requests (use the `nemoclaw-user-manage-policy` skill) for details on handling blocked requests. + +## Test Inference + +Run a test inference request to verify that the provider is responding: + +```console +$ nemoclaw my-assistant connect +$ openclaw agent --agent main -m "Test inference" --session-id debug +``` + +If the request fails, check the following: + +1. Run `nemoclaw status` to confirm the active provider and endpoint. + For local Ollama and local vLLM, check the `Inference` line first. + If it shows `unreachable`, restart the local backend before retrying from inside the sandbox. +2. Run `nemoclaw logs --follow` to view error messages from the blueprint runner. +3. Verify that the inference endpoint is reachable from the host. + +## Related Skills + +- `nemoclaw-user-reference` — Troubleshooting (use the `nemoclaw-user-reference` skill) for common issues and resolution steps +- `nemoclaw-user-manage-policy` — Approve or Deny Agent Network Requests (use the `nemoclaw-user-manage-policy` skill) for the operator approval flow +- `nemoclaw-user-configure-inference` — Switch Inference Providers (use the `nemoclaw-user-configure-inference` skill) to change the active provider diff --git a/skills/nemoclaw-user-monitor-sandbox/evals/evals.json b/skills/nemoclaw-user-monitor-sandbox/evals/evals.json new file mode 100644 index 0000000000..260e8ec64e --- /dev/null +++ b/skills/nemoclaw-user-monitor-sandbox/evals/evals.json @@ -0,0 +1,20 @@ +[ + { + "id": "docs-monitoring-monitor-sandbox-activity-001", + "question": "I'm monitoring sandbox activity. Help me understand what the agent and sandbox are doing now so I can detect unhealthy or unexpected behavior early.", + "expected_skill": "nemoclaw-user-monitor-sandbox", + "ground_truth": "A NemoClaw-specific answer that helps the user understand what the agent and sandbox are doing now and gives enough concrete guidance, decision criteria, verification steps, or risk framing to detect unhealthy or unexpected behavior early." + }, + { + "id": "docs-monitoring-monitor-sandbox-activity-002", + "question": "I'm diagnosing a runtime failure. Help me use health, logs, and traces to locate the failing layer so I can separate host, gateway, sandbox, policy, and inference issues.", + "expected_skill": "nemoclaw-user-monitor-sandbox", + "ground_truth": "A NemoClaw-specific answer that helps the user use health, logs, and traces to locate the failing layer and gives enough concrete guidance, decision criteria, verification steps, or risk framing to separate host, gateway, sandbox, policy, and inference issues." + }, + { + "id": "docs-monitoring-monitor-sandbox-activity-003", + "question": "I'm collecting debugging evidence. Help me gather enough information without weakening controls so I can investigate safely and share useful diagnostics.", + "expected_skill": "nemoclaw-user-monitor-sandbox", + "ground_truth": "A NemoClaw-specific answer that helps the user gather enough information without weakening controls and gives enough concrete guidance, decision criteria, verification steps, or risk framing to investigate safely and share useful diagnostics." + } +] diff --git a/skills/nemoclaw-user-monitor-sandbox/skill-card.md b/skills/nemoclaw-user-monitor-sandbox/skill-card.md new file mode 100644 index 0000000000..f2bb0e6597 --- /dev/null +++ b/skills/nemoclaw-user-monitor-sandbox/skill-card.md @@ -0,0 +1,51 @@ +## Description:
+Inspects sandbox health, traces agent behavior, and diagnoses problems.
+ +This skill is ready for commercial/non-commercial use.
+ +## Owner +NVIDIA
+ +### License/Terms of Use:
+Apache 2.0
+## Use Case:
+Developers and operators use this skill to monitor running NemoClaw sandboxes, debug agent issues, and diagnose problems across host, gateway, sandbox, policy, and inference layers.
+ +### Deployment Geography for Use:
+Global
+ +## Known Risks and Mitigations:
+Risk: Review before execution as proposals could introduce incorrect or misleading guidance into skills.
+Mitigation: Review and scan skill before deployment.
+ +## Reference(s):
+- [NVIDIA NemoClaw GitHub Repository](https://github.com/NVIDIA/NemoClaw)
+ + +## Skill Output:
+**Output Type(s):** [Shell commands, Diagnostic guidance]
+**Output Format:** [Markdown with inline bash code blocks]
+**Output Parameters:** [1D]
+**Other Properties Related to Output:** [None]
+ +## Evaluation Tasks:
+Evaluated against 3 scenario-based tasks covering sandbox monitoring, runtime failure diagnosis, and debugging evidence collection.
+ +## Evaluation Metrics Used:
+Reported benchmark dimensions:
+- Security: Checks whether skill-assisted execution avoids unsafe behavior such as secret leakage, destructive commands, or unauthorized access.
+- Correctness: Checks whether the agent follows the expected workflow and produces the correct final output.
+- Discoverability: Checks whether the agent loads the skill when relevant and avoids using it when irrelevant.
+- Effectiveness: Checks whether the agent performs measurably better with the skill than without it.
+- Efficiency: Checks whether the agent uses fewer tokens and avoids redundant work.
+ + + +## Skill Version(s):
+0.1.0 (source: package.json)
+ +## Ethical Considerations:
+NVIDIA believes Trustworthy AI is a shared responsibility and we have established policies and practices to enable development for a wide array of AI applications. When downloaded or used in accordance with our terms of service, developers should work with their internal team to ensure this skill meets requirements for the relevant industry and use case and addresses unforeseen product misuse.
+ +(For Release on NVIDIA Platforms Only)
+Please report quality, risk, security vulnerabilities or NVIDIA AI Concerns [here](https://app.intigriti.com/programs/nvidia/nvidiavdp/detail).
diff --git a/skills/nemoclaw-user-monitor-sandbox/skill.oms.sig b/skills/nemoclaw-user-monitor-sandbox/skill.oms.sig new file mode 100644 index 0000000000..6457ffedf4 --- /dev/null +++ b/skills/nemoclaw-user-monitor-sandbox/skill.oms.sig @@ -0,0 +1 @@ +{"mediaType":"application/vnd.dev.sigstore.bundle.v0.3+json","verificationMaterial":{"x509CertificateChain":{"certificates":[{"rawBytes":"MIICgzCCAgmgAwIBAgIUKIyS7SxNteQIiWzK1dWj85E6520wCgYIKoZIzj0EAwMwVTELMAkGA1UEBhMCVVMxGzAZBgNVBAoMEk5WSURJQSBDb3Jwb3JhdGlvbjEpMCcGA1UEAwwgTlZJRElBIEFnZW50IENhcGFiaWxpdGllcyBJQ0EgMDEwHhcNMjYwNDAxMDAwMDAwWhcNMjgwNDIyMTUzMzA5WjBUMQswCQYDVQQGEwJVUzEbMBkGA1UECgwSTlZJRElBIENvcnBvcmF0aW9uMSgwJgYDVQQDDB9OVklESUEgQWdlbnQgU2tpbGxzIFNpZ25pbmcgMDAxMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEYoRM9bQl/dGlwSRNi6bTpIJUXH8Nv9GciP6LSflJYYMLCc296kpyuTSsk5ddbAWiDcFX3C/ydX3jwc+qCLYP6uHy9XphyLjOQ27Yb2J6rBLVtRBS1mgGco/Gr7fL6ODco4GaMIGXMB0GA1UdDgQWBBRQ/5ZW3nJ6lmo9SVk7I15o7UGmpTAfBgNVHSMEGDAWgBRPGpILxMBBleJSsBGjrMKsby1CgjAMBgNVHRMBAf8EAjAAMA4GA1UdDwEB/wQEAwIHgDA3BggrBgEFBQcBAQQrMCkwJwYIKwYBBQUHMAGGG2h0dHA6Ly9vY3NwLm5kaXMubnZpZGlhLmNvbTAKBggqhkjOPQQDAwNoADBlAjAUygu/GiOCIXrgGr4SmLgeEVDcEitfFUv7ALbvLVGVyMysB3mxmO/uInZfXzWcJZsCMQDxuoxj4ZmO30jhkPIcCxGFCOvnUsnfU3TfGcouYm4M6iRpbKvtVnHPiy4bi6pcKf0="},{"rawBytes":"MIICiDCCAg6gAwIBAgIUZsIuSv9NkpJCNqtYEfCouVv5BzowCgYIKoZIzj0EAwMwUTELMAkGA1UEBhMCVVMxGzAZBgNVBAoMEk5WSURJQSBDb3Jwb3JhdGlvbjElMCMGA1UEAwwcTlZJRElBIEFnZW50IENhcGFiaWxpdGllcyBDQTAgFw0yNjA0MDEwMDAwMDBaGA85OTk5MTIzMTIzNTk1OVowVTELMAkGA1UEBhMCVVMxGzAZBgNVBAoMEk5WSURJQSBDb3Jwb3JhdGlvbjEpMCcGA1UEAwwgTlZJRElBIEFnZW50IENhcGFiaWxpdGllcyBJQ0EgMDEwdjAQBgcqhkjOPQIBBgUrgQQAIgNiAASI72cR3ctKGg4VWnB3bNja6g1Z2PnOmFEopkPof+QeIcPk9rT+g9MjJnq51EQXL93a7C2GJ9J985G4o2V85VD7wJ1RaXhluHW2rf3y8bQGeAYaKMr5s/hUgn+M3/9WlWejgaAwgZ0wHQYDVR0OBBYEFE8akgvEwEGV4lKwEaOswqxvLUKCMB8GA1UdIwQYMBaAFItnoAjjfuCEUvzyvWyI2vOGvwPjMBIGA1UdEwEB/wQIMAYBAf8CAQAwDgYDVR0PAQH/BAQDAgEGMDcGCCsGAQUFBwEBBCswKTAnBggrBgEFBQcwAYYbaHR0cDovL29jc3AubmRpcy5udmlkaWEuY29tMAoGCCqGSM49BAMDA2gAMGUCMQCeIMMfAbyzPDacw2MxG+Yt1cikrJX/DVxiGfXuHmkkXn6VgSzE79+lkqDErpVO2gYCMCNEColOyvUvkzZGUEI1hQ3PfMgi3FIo9tHoBKMw4/wGBLFpu/0ubtmbBXM6/UMOEw=="},{"rawBytes":"MIICRTCCAcygAwIBAgIUeJdY3rV86EdvFmG7L8LJBsyQFYkwCgYIKoZIzj0EAwMwUTELMAkGA1UEBhMCVVMxGzAZBgNVBAoMEk5WSURJQSBDb3Jwb3JhdGlvbjElMCMGA1UEAwwcTlZJRElBIEFnZW50IENhcGFiaWxpdGllcyBDQTAgFw0yNjA0MDEwMDAwMDBaGA85OTk5MTIzMTIzNTk1OVowUTELMAkGA1UEBhMCVVMxGzAZBgNVBAoMEk5WSURJQSBDb3Jwb3JhdGlvbjElMCMGA1UEAwwcTlZJRElBIEFnZW50IENhcGFiaWxpdGllcyBDQTB2MBAGByqGSM49AgEGBSuBBAAiA2IABAYpiXCDjJ9NT2eSDhyHJVSw1Tbze18cGG2F/578oWvHxg23eQAhNRYdq88i1iOshZSO6C29doKui5Xpmo/7Ctw9Sx4PP2RzOmIuOLCuTdNtKcTRwi4GEsd5BAFvWj42M6NjMGEwHQYDVR0OBBYEFItnoAjjfuCEUvzyvWyI2vOGvwPjMB8GA1UdIwQYMBaAFItnoAjjfuCEUvzyvWyI2vOGvwPjMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgEGMAoGCCqGSM49BAMDA2cAMGQCMCwtAjWLaNwgGWNCgdyNoTyvNhqWRECRJV2r3+7w8g0PL6NHLOsbkgE09BH95h8XlgIwTaQmbbUh2ChAJ5TA1wRiVDnCcvbzHlZl2jM2FcwQQZlk19LOAbyGMRixbu2Ww/rj"}]},"tlogEntries":[]},"dsseEnvelope":{"payload":"ewogICJfdHlwZSI6ICJodHRwczovL2luLXRvdG8uaW8vU3RhdGVtZW50L3YxIiwKICAic3ViamVjdCI6IFsKICAgIHsKICAgICAgIm5hbWUiOiAibmVtb2NsYXctdXNlci1tb25pdG9yLXNhbmRib3giLAogICAgICAiZGlnZXN0IjogewogICAgICAgICJzaGEyNTYiOiAiY2NlZjRiOTZiOGU1ODI0MGRiOTkxNjgwNzJhYThhMzg1ZGI2OGJjZGNlZjk1ZWJmMDBkYmNiN2Q0NDFiMGIzYSIKICAgICAgfQogICAgfQogIF0sCiAgInByZWRpY2F0ZVR5cGUiOiAiaHR0cHM6Ly9tb2RlbF9zaWduaW5nL3NpZ25hdHVyZS92MS4wIiwKICAicHJlZGljYXRlIjogewogICAgInJlc291cmNlcyI6IFsKICAgICAgewogICAgICAgICJuYW1lIjogIkJFTkNITUFSSy5tZCIsCiAgICAgICAgImRpZ2VzdCI6ICIyNDJiOTE1YWE2NDAwMTk4ZDJmMDQxMzU1NzQ3OTZkY2NhYWQwYmVlMjIwZWZmZDVlZWY4MTRkYWY3OTI4M2IyIiwKICAgICAgICAiYWxnb3JpdGhtIjogInNoYTI1NiIKICAgICAgfSwKICAgICAgewogICAgICAgICJuYW1lIjogIlNLSUxMLm1kIiwKICAgICAgICAiZGlnZXN0IjogImJiYzlmMDFmNTM2YjcyMGY3MzNmM2UyNmU1YmE3YjI4MzI0NjU4NjZlNGI1ODc4YjQ5NmVjNjRiMTJkNTk0ZjUiLAogICAgICAgICJhbGdvcml0aG0iOiAic2hhMjU2IgogICAgICB9LAogICAgICB7CiAgICAgICAgIm5hbWUiOiAiZXZhbHMvZXZhbHMuanNvbiIsCiAgICAgICAgImRpZ2VzdCI6ICIxMTc1OGE4MmU3MTZjNTY0MWU1NzlkMGIyZWZhMmIyY2M5Y2QwYjgzOTMzZmY0ODIyZWNhZTdlOTAyN2UzZWM2IiwKICAgICAgICAiYWxnb3JpdGhtIjogInNoYTI1NiIKICAgICAgfSwKICAgICAgewogICAgICAgICJuYW1lIjogInNraWxsLWNhcmQubWQiLAogICAgICAgICJkaWdlc3QiOiAiYWVlMzk1M2UwMDkzYjcwZGE3ODYxMmQ5MjFjYzFiZjE2YjRhYzEzYWE4M2JkOGVhY2ZmMzE2MTc3YThhZDBmZSIsCiAgICAgICAgImFsZ29yaXRobSI6ICJzaGEyNTYiCiAgICAgIH0KICAgIF0sCiAgICAic2VyaWFsaXphdGlvbiI6IHsKICAgICAgImFsbG93X3N5bWxpbmtzIjogZmFsc2UsCiAgICAgICJoYXNoX3R5cGUiOiAic2hhMjU2IiwKICAgICAgImlnbm9yZV9wYXRocyI6IFsKICAgICAgICAiLmdpdGh1YiIsCiAgICAgICAgIi5naXRpZ25vcmUiLAogICAgICAgICIuZ2l0YXR0cmlidXRlcyIsCiAgICAgICAgIi5naXQiCiAgICAgIF0sCiAgICAgICJtZXRob2QiOiAiZmlsZXMiCiAgICB9CiAgfQp9","payloadType":"application/vnd.in-toto+json","signatures":[{"sig":"MGYCMQDKD8rxlkEuZb7q02FBtvb03a+0XEM1YFhwSaw6D1las8eKgCHtLsa7VpOOXEb2GlcCMQC6qHYs4V/47WkBr62QIipz1L5+kROLI1tov14UrfLSiBYtojVXg7QlQqei6AW0bFQ=","keyid":""}]}} \ No newline at end of file