From 0a60e8275428290f8c67fb1674e777d27a984002 Mon Sep 17 00:00:00 2001 From: Philippe Martin Date: Mon, 1 Jun 2026 13:43:26 +0000 Subject: [PATCH 01/16] feat(policy-schema): create openshell-policy-schema crate skeleton Signed-off-by: Philippe Martin --- Cargo.lock | 10 ++++++++++ crates/openshell-policy-schema/Cargo.toml | 20 ++++++++++++++++++++ crates/openshell-policy-schema/src/lib.rs | 13 +++++++++++++ 3 files changed, 43 insertions(+) create mode 100644 crates/openshell-policy-schema/Cargo.toml create mode 100644 crates/openshell-policy-schema/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 4bc657be3..dd306ff78 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3596,6 +3596,16 @@ dependencies = [ "serde_yml", ] +[[package]] +name = "openshell-policy-schema" +version = "0.0.0" +dependencies = [ + "miette", + "serde", + "serde_json", + "serde_yml", +] + [[package]] name = "openshell-prover" version = "0.0.0" diff --git a/crates/openshell-policy-schema/Cargo.toml b/crates/openshell-policy-schema/Cargo.toml new file mode 100644 index 000000000..c4815d459 --- /dev/null +++ b/crates/openshell-policy-schema/Cargo.toml @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +[package] +name = "openshell-policy-schema" +description = "YAML schema types and pure-Rust parsing for OpenShell sandbox policies" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true +repository.workspace = true + +[dependencies] +serde = { workspace = true } +serde_json = { workspace = true } +serde_yml = { workspace = true } +miette = { workspace = true } + +[lints] +workspace = true diff --git a/crates/openshell-policy-schema/src/lib.rs b/crates/openshell-policy-schema/src/lib.rs new file mode 100644 index 000000000..47515eb0a --- /dev/null +++ b/crates/openshell-policy-schema/src/lib.rs @@ -0,0 +1,13 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! YAML schema types and pure-Rust parsing for `OpenShell` sandbox policies. +//! +//! This crate is intentionally dependency-light: `serde`, `serde_yml`, +//! `serde_json`, and `miette`. It has **no** dependency on `openshell-core`, +//! `tonic`, or `prost`, making it usable from projects that only need YAML +//! parsing and serialization without pulling in gRPC infrastructure. +//! +//! The types here are the **single canonical representation** of the YAML +//! policy schema. Both parsing (YAML→types) and serialization (types→YAML) +//! use these types, ensuring round-trip fidelity. From e2385dd4d4401df7a0bd70a063e618a9ab6099be Mon Sep 17 00:00:00 2001 From: Philippe Martin Date: Mon, 1 Jun 2026 13:45:51 +0000 Subject: [PATCH 02/16] feat(policy-schema): add YAML serde types (verbatim, names unchanged) Signed-off-by: Philippe Martin --- crates/openshell-policy-schema/src/lib.rs | 202 ++++++++++++++++++++++ 1 file changed, 202 insertions(+) diff --git a/crates/openshell-policy-schema/src/lib.rs b/crates/openshell-policy-schema/src/lib.rs index 47515eb0a..d34a172af 100644 --- a/crates/openshell-policy-schema/src/lib.rs +++ b/crates/openshell-policy-schema/src/lib.rs @@ -11,3 +11,205 @@ //! The types here are the **single canonical representation** of the YAML //! policy schema. Both parsing (YAML→types) and serialization (types→YAML) //! use these types, ensuring round-trip fidelity. + +use std::collections::BTreeMap; + +use serde::{Deserialize, Serialize}; + +// --------------------------------------------------------------------------- +// YAML serde types (canonical — used for both parsing and serialization) +// --------------------------------------------------------------------------- + +#[derive(Debug, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct PolicyFile { + pub version: u32, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub filesystem_policy: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub landlock: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub process: Option, + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + pub network_policies: BTreeMap, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct FilesystemDef { + #[serde(default)] + pub include_workdir: bool, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub read_only: Vec, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub read_write: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct LandlockDef { + #[serde(default, skip_serializing_if = "String::is_empty")] + pub compatibility: String, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct ProcessDef { + #[serde(default, skip_serializing_if = "String::is_empty")] + pub run_as_user: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub run_as_group: String, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct NetworkPolicyRuleDef { + #[serde(default, skip_serializing_if = "String::is_empty")] + pub name: String, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub endpoints: Vec, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub binaries: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct NetworkEndpointDef { + #[serde(default, skip_serializing_if = "String::is_empty")] + pub host: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub path: String, + /// Single port (backwards compat). Mutually exclusive with `ports`. + /// Uses `u16` to reject invalid values >65535 at parse time. + #[serde(default, skip_serializing_if = "is_zero")] + pub port: u16, + /// Multiple ports. When non-empty, this endpoint covers all listed ports. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub ports: Vec, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub protocol: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub tls: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub enforcement: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub access: String, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub rules: Vec, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub allowed_ips: Vec, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub deny_rules: Vec, + /// When true, percent-encoded `/` (`%2F`) is preserved in path segments + /// rather than rejected by the L7 path canonicalizer. Required for + /// upstreams like GitLab that embed `%2F` in namespaced resource paths. + /// Defaults to false (strict). + #[serde(default, skip_serializing_if = "std::ops::Not::not")] + pub allow_encoded_slash: bool, + /// When true, client-to-server WebSocket text messages on this REST + /// endpoint rewrite credential placeholders after an allowed 101 upgrade. + /// Defaults to false. + #[serde(default, skip_serializing_if = "std::ops::Not::not")] + pub websocket_credential_rewrite: bool, + /// When true, supported textual REST request bodies rewrite credential + /// placeholders before forwarding upstream. Defaults to false. + #[serde(default, skip_serializing_if = "std::ops::Not::not")] + pub request_body_credential_rewrite: bool, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub persisted_queries: String, + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + pub graphql_persisted_queries: BTreeMap, + #[serde(default, skip_serializing_if = "is_zero_u32")] + pub graphql_max_body_bytes: u32, +} + +// Signature dictated by serde's `skip_serializing_if`, which requires `&T`. +#[allow(clippy::trivially_copy_pass_by_ref)] +fn is_zero(v: &u16) -> bool { + *v == 0 +} + +// Signature dictated by serde's `skip_serializing_if`, which requires `&T`. +#[allow(clippy::trivially_copy_pass_by_ref)] +fn is_zero_u32(v: &u32) -> bool { + *v == 0 +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct GraphqlOperationDef { + #[serde(default, skip_serializing_if = "String::is_empty")] + pub operation_type: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub operation_name: String, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub fields: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct L7RuleDef { + pub allow: L7AllowDef, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct L7AllowDef { + #[serde(default, skip_serializing_if = "String::is_empty")] + pub method: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub path: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub command: String, + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + pub query: BTreeMap, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub operation_type: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub operation_name: String, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub fields: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(untagged)] +pub enum QueryMatcherDef { + Glob(String), + Any(QueryAnyDef), +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct QueryAnyDef { + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub any: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct L7DenyRuleDef { + #[serde(default, skip_serializing_if = "String::is_empty")] + pub method: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub path: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub command: String, + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + pub query: BTreeMap, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub operation_type: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub operation_name: String, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub fields: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct NetworkBinaryDef { + pub path: String, + /// Deprecated: ignored. Kept for backward compat with existing YAML files. + #[serde(default, skip_serializing)] + #[allow(dead_code)] + pub harness: bool, +} From cc8400821ccf235a1a9806471730169b421a1a69 Mon Sep 17 00:00:00 2001 From: Philippe Martin Date: Mon, 1 Jun 2026 13:48:30 +0000 Subject: [PATCH 03/16] feat(policy-schema): add constants and utility functions Verbatim copy from crates/openshell-policy/src/lib.rs. Only visibility changes: MAX_FILESYSTEM_PATHS, MAX_PATH_LENGTH, and truncate_for_display promoted to pub(crate). Signed-off-by: Philippe Martin --- crates/openshell-policy-schema/src/lib.rs | 62 +++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/crates/openshell-policy-schema/src/lib.rs b/crates/openshell-policy-schema/src/lib.rs index d34a172af..10e428a3c 100644 --- a/crates/openshell-policy-schema/src/lib.rs +++ b/crates/openshell-policy-schema/src/lib.rs @@ -213,3 +213,65 @@ pub struct NetworkBinaryDef { #[allow(dead_code)] pub harness: bool, } + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +/// Well-known path where a sandbox container image can ship a policy YAML file. +/// +/// When the gateway provides no policy at sandbox creation time, the sandbox +/// supervisor probes this path before falling back to the restrictive default. +pub const CONTAINER_POLICY_PATH: &str = "/etc/openshell/policy.yaml"; + +/// Legacy path used before the navigator → openshell rename. +/// +/// Existing community sandbox images still ship their policy at this path. +/// The sandbox supervisor tries [`CONTAINER_POLICY_PATH`] first, then falls +/// back to this legacy path for backward compatibility. +pub const LEGACY_CONTAINER_POLICY_PATH: &str = "/etc/navigator/policy.yaml"; + +/// Maximum number of filesystem paths (`read_only` + `read_write` combined). +pub(crate) const MAX_FILESYSTEM_PATHS: usize = 256; + +/// Maximum length of any single filesystem path string. +pub(crate) const MAX_PATH_LENGTH: usize = 4096; + +// --------------------------------------------------------------------------- +// Utility functions +// --------------------------------------------------------------------------- + +/// Normalize a filesystem path by collapsing redundant separators +/// and removing trailing slashes, without requiring the path to exist on disk. +/// +/// This is a lexical normalization only — it does NOT resolve symlinks or +/// check the filesystem. +pub fn normalize_path(path: &str) -> String { + use std::path::Component; + + let p = std::path::Path::new(path); + let mut normalized = std::path::PathBuf::new(); + for component in p.components() { + match component { + Component::Prefix(prefix) => normalized.push(prefix.as_os_str()), + #[allow(clippy::path_buf_push_overwrite)] + Component::RootDir => normalized.push("/"), + Component::CurDir => {} // skip "." + Component::ParentDir => { + // Keep ".." — validation will catch it separately + normalized.push(".."); + } + Component::Normal(c) => normalized.push(c), + } + } + normalized.to_string_lossy().to_string() +} + +/// Truncate a string for safe inclusion in error messages. +pub(crate) fn truncate_for_display(s: &str) -> String { + if s.len() <= 80 { + s.to_string() + } else { + format!("{}...", &s[..77]) + } +} From 110bad2497668321096aad2e8a88bd8120e5676e Mon Sep 17 00:00:00 2001 From: Philippe Martin Date: Mon, 1 Jun 2026 13:53:12 +0000 Subject: [PATCH 04/16] feat(policy-schema): add PolicyViolation enum and Display impl Verbatim copy from crates/openshell-policy/src/lib.rs lines 681-735. Signed-off-by: Philippe Martin --- crates/openshell-policy-schema/src/lib.rs | 61 +++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/crates/openshell-policy-schema/src/lib.rs b/crates/openshell-policy-schema/src/lib.rs index 10e428a3c..4828a8673 100644 --- a/crates/openshell-policy-schema/src/lib.rs +++ b/crates/openshell-policy-schema/src/lib.rs @@ -267,6 +267,67 @@ pub fn normalize_path(path: &str) -> String { normalized.to_string_lossy().to_string() } +// --------------------------------------------------------------------------- +// Policy safety validation types +// --------------------------------------------------------------------------- + +/// A safety violation found in a sandbox policy. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum PolicyViolation { + /// `run_as_user` or `run_as_group` is not "sandbox". + InvalidProcessIdentity { field: &'static str, value: String }, + /// A filesystem path contains `..` components. + PathTraversal { path: String }, + /// A filesystem path is not absolute (does not start with `/`). + RelativePath { path: String }, + /// A read-write filesystem path is overly broad (e.g. `/`). + OverlyBroadPath { path: String }, + /// A filesystem path exceeds the maximum allowed length. + FieldTooLong { path: String, length: usize }, + /// Too many filesystem paths in the policy. + TooManyPaths { count: usize }, + /// A network endpoint uses a TLD wildcard (e.g. `*.com`). + TldWildcard { policy_name: String, host: String }, +} + +impl std::fmt::Display for PolicyViolation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::InvalidProcessIdentity { field, value } => { + write!(f, "{field} must be 'sandbox', got '{value}'") + } + Self::PathTraversal { path } => { + write!(f, "path contains '..' traversal component: {path}") + } + Self::RelativePath { path } => { + write!(f, "path must be absolute (start with '/'): {path}") + } + Self::OverlyBroadPath { path } => { + write!(f, "read-write path is overly broad: {path}") + } + Self::FieldTooLong { path, length } => { + write!( + f, + "path exceeds maximum length ({length} > {MAX_PATH_LENGTH}): {path}" + ) + } + Self::TooManyPaths { count } => { + write!( + f, + "too many filesystem paths ({count} > {MAX_FILESYSTEM_PATHS})" + ) + } + Self::TldWildcard { policy_name, host } => { + write!( + f, + "network policy '{policy_name}': TLD wildcard '{host}' is not allowed; \ + use subdomain wildcards like '*.example.com' instead" + ) + } + } + } +} + /// Truncate a string for safe inclusion in error messages. pub(crate) fn truncate_for_display(s: &str) -> String { if s.len() <= 80 { From 9a435f55e1198db868fedd0b92319edec3d34f5d Mon Sep 17 00:00:00 2001 From: Philippe Martin Date: Mon, 1 Jun 2026 14:02:12 +0000 Subject: [PATCH 05/16] feat(policy-schema): add public API functions Verbatim copy of function bodies from crates/openshell-policy/src/lib.rs, adapted to operate on schema types (PolicyFile, FilesystemDef, ProcessDef, etc.) instead of proto types, and using policy.filesystem_policy instead of policy.filesystem. Added Default derive to ProcessDef: the original ensure_sandbox_process_identity uses ProcessPolicy::default(), which proto-generated types provide automatically. ProcessDef needs an explicit derive to achieve the same. Signed-off-by: Philippe Martin --- crates/openshell-policy-schema/src/lib.rs | 229 +++++++++++++++++++++- 1 file changed, 227 insertions(+), 2 deletions(-) diff --git a/crates/openshell-policy-schema/src/lib.rs b/crates/openshell-policy-schema/src/lib.rs index 4828a8673..a2fa377d6 100644 --- a/crates/openshell-policy-schema/src/lib.rs +++ b/crates/openshell-policy-schema/src/lib.rs @@ -13,7 +13,9 @@ //! use these types, ensuring round-trip fidelity. use std::collections::BTreeMap; +use std::path::Path; +use miette::{IntoDiagnostic, Result, WrapErr}; use serde::{Deserialize, Serialize}; // --------------------------------------------------------------------------- @@ -52,7 +54,7 @@ pub struct LandlockDef { pub compatibility: String, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Default, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct ProcessDef { #[serde(default, skip_serializing_if = "String::is_empty")] @@ -249,7 +251,7 @@ pub(crate) const MAX_PATH_LENGTH: usize = 4096; pub fn normalize_path(path: &str) -> String { use std::path::Component; - let p = std::path::Path::new(path); + let p = Path::new(path); let mut normalized = std::path::PathBuf::new(); for component in p.components() { match component { @@ -336,3 +338,226 @@ pub(crate) fn truncate_for_display(s: &str) -> String { format!("{}...", &s[..77]) } } + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/// Parse a sandbox policy from a YAML string. +pub fn parse_policy(yaml: &str) -> Result { + let raw: PolicyFile = serde_yml::from_str(yaml) + .into_diagnostic() + .wrap_err("failed to parse sandbox policy YAML")?; + Ok(raw) +} + +/// Serialize a sandbox policy to a YAML string. +/// +/// This is the inverse of [`parse_policy`] — the output uses the +/// canonical YAML field names (e.g. `filesystem_policy`, not `filesystem`) +/// and is round-trippable through `parse_policy`. +pub fn serialize_policy(policy: &PolicyFile) -> Result { + serde_yml::to_string(policy) + .into_diagnostic() + .wrap_err("failed to serialize policy to YAML") +} + +/// Convert a sandbox policy into the canonical policy JSON representation. +/// +/// The shape mirrors the YAML schema used by [`serialize_policy`], so +/// automation can use the same documented field names in either format. +pub fn policy_to_json_value(policy: &PolicyFile) -> Result { + serde_json::to_value(policy) + .into_diagnostic() + .wrap_err("failed to serialize policy to JSON") +} + +/// Serialize a sandbox policy to a pretty-printed JSON string. +pub fn serialize_policy_json(policy: &PolicyFile) -> Result { + let json_repr = policy_to_json_value(policy)?; + serde_json::to_string_pretty(&json_repr) + .into_diagnostic() + .wrap_err("failed to serialize policy to JSON") +} + +/// Load a sandbox policy from an explicit source. +/// +/// Resolution order: +/// 1. `cli_path` argument (e.g. from a `--policy` flag) +/// 2. `OPENSHELL_SANDBOX_POLICY` environment variable +/// +/// Returns `Ok(None)` when no policy source is configured, allowing the +/// caller to omit the policy and let the server / sandbox apply its own +/// default. +pub fn load_policy(cli_path: Option<&str>) -> Result> { + let contents = if let Some(p) = cli_path { + let path = Path::new(p); + std::fs::read_to_string(path) + .into_diagnostic() + .wrap_err_with(|| format!("failed to read sandbox policy from {}", path.display()))? + } else if let Ok(policy_path) = std::env::var("OPENSHELL_SANDBOX_POLICY") { + let path = Path::new(&policy_path); + std::fs::read_to_string(path) + .into_diagnostic() + .wrap_err_with(|| format!("failed to read sandbox policy from {}", path.display()))? + } else { + return Ok(None); + }; + parse_policy(&contents).map(Some) +} + +/// Return a restrictive default policy suitable for sandboxes that have no +/// explicit policy configured. +/// +/// This policy grants filesystem access to standard system paths, runs as the +/// `sandbox` user, enables Landlock in best-effort mode, and **blocks all +/// network access** (no network policies, no inference routing). +pub fn restrictive_default() -> PolicyFile { + PolicyFile { + version: 1, + filesystem_policy: Some(FilesystemDef { + include_workdir: true, + read_only: vec![ + "/usr".into(), + "/lib".into(), + "/proc".into(), + "/dev/urandom".into(), + "/app".into(), + "/etc".into(), + "/var/log".into(), + ], + read_write: vec!["/sandbox".into(), "/tmp".into(), "/dev/null".into()], + }), + landlock: Some(LandlockDef { + compatibility: "best_effort".into(), + }), + process: Some(ProcessDef { + run_as_user: "sandbox".into(), + run_as_group: "sandbox".into(), + }), + network_policies: BTreeMap::new(), + } +} + +/// Ensure the policy has `run_as_user: sandbox` and `run_as_group: sandbox`. +/// +/// If the process section is missing, or either field is empty, this fills in +/// the required `"sandbox"` value. Call this before validation so that +/// policies without an explicit process section get the correct default. +pub fn ensure_sandbox_process_identity(policy: &mut PolicyFile) { + let process = policy.process.get_or_insert_with(ProcessDef::default); + if process.run_as_user.is_empty() { + process.run_as_user = "sandbox".into(); + } + if process.run_as_group.is_empty() { + process.run_as_group = "sandbox".into(); + } +} + +/// Validate that a sandbox policy does not contain unsafe content. +/// +/// Returns `Ok(())` if the policy is safe, or `Err(violations)` listing all +/// safety violations found. Callers decide how to handle violations (hard +/// error vs. logged warning). +/// +/// Checks performed: +/// - `run_as_user` / `run_as_group` must be "sandbox" +/// - Filesystem paths must be absolute (start with `/`) +/// - Filesystem paths must not contain `..` components +/// - Read-write paths must not be overly broad (just `/`) +/// - Individual path lengths must not exceed [`MAX_PATH_LENGTH`] +/// - Total path count must not exceed [`MAX_FILESYSTEM_PATHS`] +/// - Network endpoint hosts must not use TLD wildcards (e.g. `*.com`) +pub fn validate_policy(policy: &PolicyFile) -> std::result::Result<(), Vec> { + let mut violations = Vec::new(); + + // Check process identity — must be "sandbox". + // `ensure_sandbox_process_identity` should be called before this to + // fill in defaults; anything other than "sandbox" is rejected. + if let Some(ref process) = policy.process { + if process.run_as_user != "sandbox" { + violations.push(PolicyViolation::InvalidProcessIdentity { + field: "run_as_user", + value: process.run_as_user.clone(), + }); + } + if process.run_as_group != "sandbox" { + violations.push(PolicyViolation::InvalidProcessIdentity { + field: "run_as_group", + value: process.run_as_group.clone(), + }); + } + } + + // Check filesystem paths + if let Some(ref fs) = policy.filesystem_policy { + let total_paths = fs.read_only.len() + fs.read_write.len(); + if total_paths > MAX_FILESYSTEM_PATHS { + violations.push(PolicyViolation::TooManyPaths { count: total_paths }); + } + + for path_str in fs.read_only.iter().chain(fs.read_write.iter()) { + if path_str.len() > MAX_PATH_LENGTH { + violations.push(PolicyViolation::FieldTooLong { + path: truncate_for_display(path_str), + length: path_str.len(), + }); + continue; + } + + let path = Path::new(path_str); + + if !path.has_root() { + violations.push(PolicyViolation::RelativePath { + path: path_str.clone(), + }); + } + + if path + .components() + .any(|c| matches!(c, std::path::Component::ParentDir)) + { + violations.push(PolicyViolation::PathTraversal { + path: path_str.clone(), + }); + } + } + + // Only reject "/" as read-write (overly broad) + for path_str in &fs.read_write { + let normalized = path_str.trim_end_matches('/'); + if normalized.is_empty() { + // Path is "/" or "///" etc. + violations.push(PolicyViolation::OverlyBroadPath { + path: path_str.clone(), + }); + } + } + } + + // Check network policy endpoint hosts for TLD wildcards. + for (key, rule) in &policy.network_policies { + let name = if rule.name.is_empty() { + key.clone() + } else { + rule.name.clone() + }; + for ep in &rule.endpoints { + if ep.host.contains('*') && (ep.host.starts_with("*.") || ep.host.starts_with("**.")) { + let label_count = ep.host.split('.').count(); + if label_count <= 2 { + violations.push(PolicyViolation::TldWildcard { + policy_name: name.clone(), + host: ep.host.clone(), + }); + } + } + } + } + + if violations.is_empty() { + Ok(()) + } else { + Err(violations) + } +} From 4ecefd17b316a70e5c2e5fb4a4917bf236436d51 Mon Sep 17 00:00:00 2001 From: Philippe Martin Date: Mon, 1 Jun 2026 14:07:33 +0000 Subject: [PATCH 06/16] feat(policy-schema): add tests Ported from crates/openshell-policy/src/lib.rs tests section. Adaptations required vs the original: - parse_sandbox_policy/serialize_sandbox_policy -> parse_policy/serialize_policy - policy.filesystem -> policy.filesystem_policy - FilesystemPolicy/ProcessPolicy -> FilesystemDef/ProcessDef - SandboxPolicy{..., network_policies: HashMap::new()} -> PolicyFile{..., BTreeMap::new()} - rule.allow.as_ref().unwrap() -> rule.allow (L7RuleDef.allow is not an Option) - .query[k].glob/.any proto fields -> QueryMatcherDef::Glob/Any enum matching - NetworkPolicyRule/NetworkEndpoint ..Default::default() construction -> parse_policy() helper (those types have no Default derive) - parse_ports_array/parse_single_port: no port normalization in schema crate (normalization happens in to_proto in openshell-policy) Signed-off-by: Philippe Martin --- crates/openshell-policy-schema/src/lib.rs | 953 ++++++++++++++++++++++ 1 file changed, 953 insertions(+) diff --git a/crates/openshell-policy-schema/src/lib.rs b/crates/openshell-policy-schema/src/lib.rs index a2fa377d6..145863318 100644 --- a/crates/openshell-policy-schema/src/lib.rs +++ b/crates/openshell-policy-schema/src/lib.rs @@ -561,3 +561,956 @@ pub fn validate_policy(policy: &PolicyFile) -> std::result::Result<(), Vec = (0..300).map(|i| format!("/path/{i}")).collect(); + policy.filesystem_policy = Some(FilesystemDef { + include_workdir: true, + read_only: many_paths, + read_write: vec!["/tmp".into()], + }); + let violations = validate_policy(&policy).unwrap_err(); + assert!( + violations + .iter() + .any(|v| matches!(v, PolicyViolation::TooManyPaths { .. })) + ); + } + + #[test] + fn validate_rejects_path_too_long() { + let mut policy = restrictive_default(); + let long_path = format!("/{}", "a".repeat(5000)); + policy.filesystem_policy = Some(FilesystemDef { + include_workdir: true, + read_only: vec![long_path], + read_write: vec!["/tmp".into()], + }); + let violations = validate_policy(&policy).unwrap_err(); + assert!( + violations + .iter() + .any(|v| matches!(v, PolicyViolation::FieldTooLong { .. })) + ); + } + + // The original tests constructed NetworkPolicyRule/NetworkEndpoint proto + // structs directly. Here we parse YAML instead, since NetworkPolicyRuleDef + // and NetworkEndpointDef do not implement Default. + #[test] + fn validate_rejects_tld_wildcard() { + let mut policy = restrictive_default(); + policy.network_policies.insert( + "bad".into(), + parse_policy( + "version: 1\nnetwork_policies:\n bad:\n name: bad-rule\n endpoints:\n - host: \"*.com\"\n port: 443\n", + ) + .unwrap() + .network_policies + .remove("bad") + .unwrap(), + ); + let violations = validate_policy(&policy).unwrap_err(); + assert!( + violations + .iter() + .any(|v| matches!(v, PolicyViolation::TldWildcard { .. })) + ); + } + + #[test] + fn validate_rejects_double_star_tld_wildcard() { + let mut policy = restrictive_default(); + policy.network_policies.insert( + "bad".into(), + parse_policy( + "version: 1\nnetwork_policies:\n bad:\n name: bad-rule\n endpoints:\n - host: \"**.org\"\n port: 443\n", + ) + .unwrap() + .network_policies + .remove("bad") + .unwrap(), + ); + let violations = validate_policy(&policy).unwrap_err(); + assert!( + violations + .iter() + .any(|v| matches!(v, PolicyViolation::TldWildcard { .. })) + ); + } + + #[test] + fn validate_accepts_subdomain_wildcard() { + let mut policy = restrictive_default(); + policy.network_policies.insert( + "ok".into(), + parse_policy( + "version: 1\nnetwork_policies:\n ok:\n name: ok-rule\n endpoints:\n - host: \"*.example.com\"\n port: 443\n", + ) + .unwrap() + .network_policies + .remove("ok") + .unwrap(), + ); + assert!(validate_policy(&policy).is_ok()); + } + + #[test] + fn validate_accepts_explicit_domain() { + let mut policy = restrictive_default(); + policy.network_policies.insert( + "ok".into(), + parse_policy( + "version: 1\nnetwork_policies:\n ok:\n name: ok-rule\n endpoints:\n - host: example.com\n port: 443\n", + ) + .unwrap() + .network_policies + .remove("ok") + .unwrap(), + ); + assert!(validate_policy(&policy).is_ok()); + } + + #[test] + fn normalize_path_collapses_separators() { + assert_eq!(normalize_path("/usr//lib"), "/usr/lib"); + assert_eq!(normalize_path("/usr/./lib"), "/usr/lib"); + assert_eq!(normalize_path("/tmp/"), "/tmp"); + } + + #[test] + fn normalize_path_preserves_parent_dir() { + // normalize_path preserves ".." — validation catches it separately + assert_eq!(normalize_path("/usr/../etc"), "/usr/../etc"); + } + + #[test] + fn policy_violation_display() { + let v = PolicyViolation::InvalidProcessIdentity { + field: "run_as_user", + value: "root".into(), + }; + let s = format!("{v}"); + assert!(s.contains("root")); + assert!(s.contains("run_as_user")); + assert!(s.contains("sandbox")); + } + + // ---- Multi-port and host wildcard tests ---- + + // In the schema crate there is no port normalization (that happens in + // to_proto in openshell-policy). When only `ports` is specified, `port` + // stays 0; when only `port` is specified, `ports` stays empty. + #[test] + fn parse_ports_array() { + let yaml = r" +version: 1 +network_policies: + test: + name: test + endpoints: + - { host: api.example.com, ports: [80, 443] } + binaries: + - { path: /usr/bin/curl } +"; + let policy = parse_policy(yaml).expect("should parse"); + let ep = &policy.network_policies["test"].endpoints[0]; + assert_eq!(ep.ports, vec![80, 443]); + assert_eq!(ep.port, 0); // no normalization in schema crate + } + + #[test] + fn parse_single_port() { + let yaml = r" +version: 1 +network_policies: + test: + name: test + endpoints: + - { host: api.example.com, port: 443 } + binaries: + - { path: /usr/bin/curl } +"; + let policy = parse_policy(yaml).expect("should parse"); + let ep = &policy.network_policies["test"].endpoints[0]; + assert_eq!(ep.port, 443); + assert!(ep.ports.is_empty()); // no normalization in schema crate + } + + #[test] + fn round_trip_preserves_endpoint_path() { + let yaml = r#" +version: 1 +network_policies: + test: + name: test + endpoints: + - host: api.example.com + port: 443 + path: "/graphql" + protocol: graphql + rules: + - allow: + operation_type: query + binaries: + - { path: /usr/bin/curl } +"#; + let p1 = parse_policy(yaml).expect("parse failed"); + let yaml_out = serialize_policy(&p1).expect("serialize failed"); + let p2 = parse_policy(&yaml_out).expect("re-parse failed"); + + let ep1 = &p1.network_policies["test"].endpoints[0]; + let ep2 = &p2.network_policies["test"].endpoints[0]; + assert_eq!(ep1.path, "/graphql"); + assert_eq!(ep1.path, ep2.path); + } + + #[test] + fn round_trip_preserves_multi_port() { + let yaml = r" +version: 1 +network_policies: + test: + name: test + endpoints: + - host: api.example.com + ports: + - 80 + - 443 + binaries: + - { path: /usr/bin/curl } +"; + let p1 = parse_policy(yaml).expect("parse failed"); + let yaml_out = serialize_policy(&p1).expect("serialize failed"); + let p2 = parse_policy(&yaml_out).expect("re-parse failed"); + + let ep1 = &p1.network_policies["test"].endpoints[0]; + let ep2 = &p2.network_policies["test"].endpoints[0]; + assert_eq!(ep1.ports, ep2.ports); + assert_eq!(ep1.ports, vec![80, 443]); + } + + #[test] + fn serialize_single_port_uses_compact_form() { + let yaml = r" +version: 1 +network_policies: + test: + name: test + endpoints: + - { host: api.example.com, port: 443 } + binaries: + - { path: /usr/bin/curl } +"; + let policy = parse_policy(yaml).expect("parse failed"); + let yaml_out = serialize_policy(&policy).expect("serialize failed"); + assert!( + yaml_out.contains("port: 443"), + "Single port should serialize as compact form, got:\n{yaml_out}" + ); + assert!( + !yaml_out.contains("ports:"), + "Single port should not produce ports array, got:\n{yaml_out}" + ); + } + + #[test] + fn parse_wildcard_host() { + let yaml = r#" +version: 1 +network_policies: + test: + name: test + endpoints: + - { host: "*.example.com", port: 443 } + binaries: + - { path: /usr/bin/curl } +"#; + let policy = parse_policy(yaml).expect("should parse"); + let ep = &policy.network_policies["test"].endpoints[0]; + assert_eq!(ep.host, "*.example.com"); + } + + #[test] + fn round_trip_preserves_wildcard_host() { + let yaml = r#" +version: 1 +network_policies: + test: + name: test + endpoints: + - host: "*.example.com" + port: 443 + binaries: + - { path: /usr/bin/curl } +"#; + let p1 = parse_policy(yaml).expect("parse failed"); + let yaml_out = serialize_policy(&p1).expect("serialize failed"); + let p2 = parse_policy(&yaml_out).expect("re-parse failed"); + assert_eq!( + p1.network_policies["test"].endpoints[0].host, + p2.network_policies["test"].endpoints[0].host + ); + } + + #[test] + fn parse_deny_rules_from_yaml() { + let yaml = r#" +version: 1 +network_policies: + github: + name: github + endpoints: + - host: api.github.com + port: 443 + protocol: rest + access: read-write + deny_rules: + - method: POST + path: "/repos/*/pulls/*/reviews" + - method: PUT + path: "/repos/*/branches/*/protection" + binaries: + - path: /usr/bin/curl +"#; + let policy = parse_policy(yaml).expect("parse failed"); + let ep = &policy.network_policies["github"].endpoints[0]; + assert_eq!(ep.deny_rules.len(), 2); + assert_eq!(ep.deny_rules[0].method, "POST"); + assert_eq!(ep.deny_rules[0].path, "/repos/*/pulls/*/reviews"); + assert_eq!(ep.deny_rules[1].method, "PUT"); + assert_eq!(ep.deny_rules[1].path, "/repos/*/branches/*/protection"); + } + + // In the original, deny_rules[1].query["force"].glob accessed a proto + // L7QueryMatcher field. Here we match on QueryMatcherDef::Glob instead. + #[test] + fn round_trip_preserves_deny_rules() { + let yaml = r#" +version: 1 +network_policies: + github: + name: github + endpoints: + - host: api.github.com + port: 443 + protocol: rest + access: full + deny_rules: + - method: POST + path: "/repos/*/pulls/*/reviews" + - method: DELETE + path: "/repos/*/branches/*/protection" + query: + force: "true" + binaries: + - path: /usr/bin/curl +"#; + let p1 = parse_policy(yaml).expect("parse failed"); + let yaml_out = serialize_policy(&p1).expect("serialize failed"); + let p2 = parse_policy(&yaml_out).expect("re-parse failed"); + + let ep1 = &p1.network_policies["github"].endpoints[0]; + let ep2 = &p2.network_policies["github"].endpoints[0]; + assert_eq!(ep1.deny_rules.len(), ep2.deny_rules.len()); + assert_eq!(ep2.deny_rules[0].method, "POST"); + assert_eq!(ep2.deny_rules[0].path, "/repos/*/pulls/*/reviews"); + assert_eq!(ep2.deny_rules[1].method, "DELETE"); + assert!( + matches!(&ep2.deny_rules[1].query["force"], QueryMatcherDef::Glob(g) if g == "true") + ); + } + + // In the original, deny_rules[0].query["type"].any accessed a proto field. + // Here we match on QueryMatcherDef::Any instead. + #[test] + fn parse_deny_rules_with_query_any() { + let yaml = r#" +version: 1 +network_policies: + test: + name: test + endpoints: + - host: api.example.com + port: 443 + protocol: rest + access: full + deny_rules: + - method: POST + path: /action + query: + type: + any: ["admin-*", "root-*"] + binaries: + - path: /usr/bin/curl +"#; + let policy = parse_policy(yaml).expect("parse failed"); + let deny = &policy.network_policies["test"].endpoints[0].deny_rules[0]; + assert!( + matches!(&deny.query["type"], QueryMatcherDef::Any(a) if a.any == vec!["admin-*", "root-*"]) + ); + } + + // In the original, rules[0].allow.as_ref().unwrap() was needed because + // L7Rule.allow is Option in proto. In schema, L7RuleDef.allow + // is L7AllowDef directly (not an Option). + #[test] + fn round_trip_preserves_graphql_policy_fields() { + let yaml = r" +version: 1 +network_policies: + github_graphql: + name: github_graphql + endpoints: + - host: api.github.com + port: 443 + protocol: graphql + enforcement: enforce + persisted_queries: allow_registered + graphql_max_body_bytes: 131072 + graphql_persisted_queries: + abc123: + operation_type: query + operation_name: Viewer + fields: [viewer] + rules: + - allow: + operation_type: query + fields: [viewer, repository] + - allow: + operation_type: mutation + operation_name: Issue* + fields: [createIssue] + deny_rules: + - operation_type: mutation + fields: [deleteRepository] + binaries: + - path: /usr/bin/curl +"; + let p1 = parse_policy(yaml).expect("parse failed"); + let yaml_out = serialize_policy(&p1).expect("serialize failed"); + let p2 = parse_policy(&yaml_out).expect("re-parse failed"); + + let ep = &p2.network_policies["github_graphql"].endpoints[0]; + assert_eq!(ep.protocol, "graphql"); + assert_eq!(ep.persisted_queries, "allow_registered"); + assert_eq!(ep.graphql_max_body_bytes, 131_072); + assert_eq!( + ep.graphql_persisted_queries["abc123"].operation_type, + "query" + ); + assert_eq!(ep.rules[0].allow.operation_type, "query"); + assert_eq!(ep.rules[1].allow.operation_name, "Issue*"); + assert_eq!(ep.deny_rules[0].operation_type, "mutation"); + assert_eq!(ep.deny_rules[0].fields, vec!["deleteRepository"]); + } + + #[test] + fn round_trip_preserves_websocket_credential_rewrite() { + let yaml = r" +version: 1 +network_policies: + discord_gateway: + name: discord_gateway + endpoints: + - host: gateway.example.com + port: 443 + protocol: rest + enforcement: enforce + access: full + websocket_credential_rewrite: true + binaries: + - path: /usr/bin/node +"; + let p1 = parse_policy(yaml).expect("parse failed"); + let yaml_out = serialize_policy(&p1).expect("serialize failed"); + let p2 = parse_policy(&yaml_out).expect("re-parse failed"); + + let ep = &p2.network_policies["discord_gateway"].endpoints[0]; + assert_eq!(ep.protocol, "rest"); + assert!(ep.websocket_credential_rewrite); + assert!(yaml_out.contains("websocket_credential_rewrite: true")); + } + + #[test] + fn round_trip_preserves_request_body_credential_rewrite() { + let yaml = r" +version: 1 +network_policies: + slack_api: + name: slack_api + endpoints: + - host: slack.com + port: 443 + protocol: rest + enforcement: enforce + access: read-write + request_body_credential_rewrite: true + binaries: + - path: /usr/bin/node +"; + let p1 = parse_policy(yaml).expect("parse failed"); + let yaml_out = serialize_policy(&p1).expect("serialize failed"); + let p2 = parse_policy(&yaml_out).expect("re-parse failed"); + + let ep = &p2.network_policies["slack_api"].endpoints[0]; + assert_eq!(ep.protocol, "rest"); + assert!(ep.request_body_credential_rewrite); + assert!(yaml_out.contains("request_body_credential_rewrite: true")); + } + + #[test] + fn websocket_credential_rewrite_defaults_false() { + let yaml = r" +version: 1 +network_policies: + gateway: + endpoints: + - host: gateway.example.com + port: 443 + protocol: rest + access: full + binaries: + - path: /usr/bin/node +"; + let policy = parse_policy(yaml).expect("parse failed"); + let ep = &policy.network_policies["gateway"].endpoints[0]; + assert!(!ep.websocket_credential_rewrite); + assert!(!ep.request_body_credential_rewrite); + } + + #[test] + fn parse_rejects_unknown_fields_in_deny_rule() { + let yaml = r" +version: 1 +network_policies: + test: + endpoints: + - host: example.com + port: 443 + deny_rules: + - method: POST + path: /foo + bogus: true +"; + assert!(parse_policy(yaml).is_err()); + } + + #[test] + fn rejects_port_above_65535() { + let yaml = r" +version: 1 +network_policies: + test: + endpoints: + - host: example.com + port: 70000 +"; + assert!( + parse_policy(yaml).is_err(), + "port >65535 should fail to parse" + ); + } +} From f0c2affe787b4d773083f0e13617880e3d2a678a Mon Sep 17 00:00:00 2001 From: Philippe Martin Date: Mon, 1 Jun 2026 14:11:17 +0000 Subject: [PATCH 07/16] feat(policy): add openshell-policy-schema dependency Signed-off-by: Philippe Martin --- Cargo.lock | 1 + crates/openshell-policy/Cargo.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index dd306ff78..47df4c8ef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3591,6 +3591,7 @@ version = "0.0.0" dependencies = [ "miette", "openshell-core", + "openshell-policy-schema", "serde", "serde_json", "serde_yml", diff --git a/crates/openshell-policy/Cargo.toml b/crates/openshell-policy/Cargo.toml index 8936b85be..394fad714 100644 --- a/crates/openshell-policy/Cargo.toml +++ b/crates/openshell-policy/Cargo.toml @@ -12,6 +12,7 @@ repository.workspace = true [dependencies] openshell-core = { path = "../openshell-core" } +openshell-policy-schema = { path = "../openshell-policy-schema" } serde = { workspace = true } serde_json = { workspace = true } serde_yml = { workspace = true } From 2dcd7bff20e820f0085e09d59b02998afa42cb1f Mon Sep 17 00:00:00 2001 From: Philippe Martin Date: Mon, 1 Jun 2026 14:15:06 +0000 Subject: [PATCH 08/16] style(policy-schema): apply rustfmt formatting Signed-off-by: Philippe Martin --- crates/openshell-policy-schema/src/lib.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/crates/openshell-policy-schema/src/lib.rs b/crates/openshell-policy-schema/src/lib.rs index 145863318..aad8f9782 100644 --- a/crates/openshell-policy-schema/src/lib.rs +++ b/crates/openshell-policy-schema/src/lib.rs @@ -671,7 +671,9 @@ network_policies: #[test] fn restrictive_default_has_filesystem_policy() { let policy = restrictive_default(); - let fs = policy.filesystem_policy.expect("must have filesystem policy"); + let fs = policy + .filesystem_policy + .expect("must have filesystem policy"); assert!(fs.include_workdir); assert!( fs.read_only.iter().any(|p| p == "/usr"), @@ -765,7 +767,9 @@ network_policies: - path: /usr/bin/curl "#; let policy = parse_policy(yaml).expect("parse failed"); - let query = &policy.network_policies["query_test"].endpoints[0].rules[0].allow.query; + let query = &policy.network_policies["query_test"].endpoints[0].rules[0] + .allow + .query; assert!( matches!(&query["slug"], QueryMatcherDef::Glob(g) if g == "my-*"), "expected Glob(my-*)" @@ -777,8 +781,9 @@ network_policies: let yaml_out = serialize_policy(&policy).expect("serialize failed"); let policy2 = parse_policy(&yaml_out).expect("re-parse failed"); - let query2 = - &policy2.network_policies["query_test"].endpoints[0].rules[0].allow.query; + let query2 = &policy2.network_policies["query_test"].endpoints[0].rules[0] + .allow + .query; assert!(matches!(&query2["slug"], QueryMatcherDef::Glob(g) if g == "my-*")); assert!( matches!(&query2["tag"], QueryMatcherDef::Any(a) if a.any == vec!["foo-*", "bar-*"]) From e6fb0eda070dfc54b6063394b74045962ddb0fe2 Mon Sep 17 00:00:00 2001 From: Philippe Martin Date: Mon, 1 Jun 2026 14:15:10 +0000 Subject: [PATCH 09/16] refactor(policy): import YAML serde types from openshell-policy-schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the private serde type definitions (PolicyFile, FilesystemDef, LandlockDef, ProcessDef, NetworkPolicyRuleDef, NetworkEndpointDef, GraphqlOperationDef, L7RuleDef, L7AllowDef, QueryMatcherDef, QueryAnyDef, L7DenyRuleDef, NetworkBinaryDef, is_zero, is_zero_u32) and import them from openshell-policy-schema instead. to_proto, from_proto, and all public functions are unchanged — they reference the same type names, now resolved from the schema crate. Signed-off-by: Philippe Martin --- crates/openshell-policy/src/lib.rs | 206 +---------------------------- 1 file changed, 6 insertions(+), 200 deletions(-) diff --git a/crates/openshell-policy/src/lib.rs b/crates/openshell-policy/src/lib.rs index 26c8fc9d3..7b1ce5c38 100644 --- a/crates/openshell-policy/src/lib.rs +++ b/crates/openshell-policy/src/lib.rs @@ -12,7 +12,7 @@ mod compose; mod merge; -use std::collections::{BTreeMap, HashMap}; +use std::collections::HashMap; use std::fmt; use std::path::Path; @@ -22,7 +22,11 @@ use openshell_core::proto::{ LandlockPolicy, NetworkBinary, NetworkEndpoint, NetworkPolicyRule, ProcessPolicy, SandboxPolicy, }; -use serde::{Deserialize, Serialize}; +use openshell_policy_schema::{ + FilesystemDef, GraphqlOperationDef, L7AllowDef, L7DenyRuleDef, L7RuleDef, LandlockDef, + NetworkBinaryDef, NetworkEndpointDef, NetworkPolicyRuleDef, PolicyFile, ProcessDef, + QueryAnyDef, QueryMatcherDef, +}; pub use compose::{ProviderPolicyLayer, compose_effective_policy, provider_rule_name}; pub use merge::{ @@ -30,204 +34,6 @@ pub use merge::{ merge_policy, policy_covers_rule, }; -// --------------------------------------------------------------------------- -// YAML serde types (canonical — used for both parsing and serialization) -// --------------------------------------------------------------------------- - -#[derive(Debug, Serialize, Deserialize)] -#[serde(deny_unknown_fields)] -struct PolicyFile { - version: u32, - #[serde(default, skip_serializing_if = "Option::is_none")] - filesystem_policy: Option, - #[serde(default, skip_serializing_if = "Option::is_none")] - landlock: Option, - #[serde(default, skip_serializing_if = "Option::is_none")] - process: Option, - #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] - network_policies: BTreeMap, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(deny_unknown_fields)] -struct FilesystemDef { - #[serde(default)] - include_workdir: bool, - #[serde(default, skip_serializing_if = "Vec::is_empty")] - read_only: Vec, - #[serde(default, skip_serializing_if = "Vec::is_empty")] - read_write: Vec, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(deny_unknown_fields)] -struct LandlockDef { - #[serde(default, skip_serializing_if = "String::is_empty")] - compatibility: String, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(deny_unknown_fields)] -struct ProcessDef { - #[serde(default, skip_serializing_if = "String::is_empty")] - run_as_user: String, - #[serde(default, skip_serializing_if = "String::is_empty")] - run_as_group: String, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(deny_unknown_fields)] -struct NetworkPolicyRuleDef { - #[serde(default, skip_serializing_if = "String::is_empty")] - name: String, - #[serde(default, skip_serializing_if = "Vec::is_empty")] - endpoints: Vec, - #[serde(default, skip_serializing_if = "Vec::is_empty")] - binaries: Vec, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(deny_unknown_fields)] -struct NetworkEndpointDef { - #[serde(default, skip_serializing_if = "String::is_empty")] - host: String, - #[serde(default, skip_serializing_if = "String::is_empty")] - path: String, - /// Single port (backwards compat). Mutually exclusive with `ports`. - /// Uses `u16` to reject invalid values >65535 at parse time. - #[serde(default, skip_serializing_if = "is_zero")] - port: u16, - /// Multiple ports. When non-empty, this endpoint covers all listed ports. - #[serde(default, skip_serializing_if = "Vec::is_empty")] - ports: Vec, - #[serde(default, skip_serializing_if = "String::is_empty")] - protocol: String, - #[serde(default, skip_serializing_if = "String::is_empty")] - tls: String, - #[serde(default, skip_serializing_if = "String::is_empty")] - enforcement: String, - #[serde(default, skip_serializing_if = "String::is_empty")] - access: String, - #[serde(default, skip_serializing_if = "Vec::is_empty")] - rules: Vec, - #[serde(default, skip_serializing_if = "Vec::is_empty")] - allowed_ips: Vec, - #[serde(default, skip_serializing_if = "Vec::is_empty")] - deny_rules: Vec, - /// When true, percent-encoded `/` (`%2F`) is preserved in path segments - /// rather than rejected by the L7 path canonicalizer. Required for - /// upstreams like GitLab that embed `%2F` in namespaced resource paths. - /// Defaults to false (strict). - #[serde(default, skip_serializing_if = "std::ops::Not::not")] - allow_encoded_slash: bool, - /// When true, client-to-server WebSocket text messages on this REST - /// endpoint rewrite credential placeholders after an allowed 101 upgrade. - /// Defaults to false. - #[serde(default, skip_serializing_if = "std::ops::Not::not")] - websocket_credential_rewrite: bool, - /// When true, supported textual REST request bodies rewrite credential - /// placeholders before forwarding upstream. Defaults to false. - #[serde(default, skip_serializing_if = "std::ops::Not::not")] - request_body_credential_rewrite: bool, - #[serde(default, skip_serializing_if = "String::is_empty")] - persisted_queries: String, - #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] - graphql_persisted_queries: BTreeMap, - #[serde(default, skip_serializing_if = "is_zero_u32")] - graphql_max_body_bytes: u32, -} - -// Signature dictated by serde's `skip_serializing_if`, which requires `&T`. -#[allow(clippy::trivially_copy_pass_by_ref)] -fn is_zero(v: &u16) -> bool { - *v == 0 -} - -// Signature dictated by serde's `skip_serializing_if`, which requires `&T`. -#[allow(clippy::trivially_copy_pass_by_ref)] -fn is_zero_u32(v: &u32) -> bool { - *v == 0 -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(deny_unknown_fields)] -struct GraphqlOperationDef { - #[serde(default, skip_serializing_if = "String::is_empty")] - operation_type: String, - #[serde(default, skip_serializing_if = "String::is_empty")] - operation_name: String, - #[serde(default, skip_serializing_if = "Vec::is_empty")] - fields: Vec, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(deny_unknown_fields)] -struct L7RuleDef { - allow: L7AllowDef, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(deny_unknown_fields)] -struct L7AllowDef { - #[serde(default, skip_serializing_if = "String::is_empty")] - method: String, - #[serde(default, skip_serializing_if = "String::is_empty")] - path: String, - #[serde(default, skip_serializing_if = "String::is_empty")] - command: String, - #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] - query: BTreeMap, - #[serde(default, skip_serializing_if = "String::is_empty")] - operation_type: String, - #[serde(default, skip_serializing_if = "String::is_empty")] - operation_name: String, - #[serde(default, skip_serializing_if = "Vec::is_empty")] - fields: Vec, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(untagged)] -enum QueryMatcherDef { - Glob(String), - Any(QueryAnyDef), -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(deny_unknown_fields)] -struct QueryAnyDef { - #[serde(default, skip_serializing_if = "Vec::is_empty")] - any: Vec, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(deny_unknown_fields)] -struct L7DenyRuleDef { - #[serde(default, skip_serializing_if = "String::is_empty")] - method: String, - #[serde(default, skip_serializing_if = "String::is_empty")] - path: String, - #[serde(default, skip_serializing_if = "String::is_empty")] - command: String, - #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] - query: BTreeMap, - #[serde(default, skip_serializing_if = "String::is_empty")] - operation_type: String, - #[serde(default, skip_serializing_if = "String::is_empty")] - operation_name: String, - #[serde(default, skip_serializing_if = "Vec::is_empty")] - fields: Vec, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(deny_unknown_fields)] -struct NetworkBinaryDef { - path: String, - /// Deprecated: ignored. Kept for backward compat with existing YAML files. - #[serde(default, skip_serializing)] - #[allow(dead_code)] - harness: bool, -} - // --------------------------------------------------------------------------- // YAML → proto conversion // --------------------------------------------------------------------------- From e3b060ae93199a34071b50323b21620295cd4200 Mon Sep 17 00:00:00 2001 From: Philippe Martin Date: Mon, 1 Jun 2026 14:24:26 +0000 Subject: [PATCH 10/16] refactor(policy): delegate YAML functions to openshell-policy-schema parse_sandbox_policy, serialize_sandbox_policy, sandbox_policy_to_json_value, serialize_sandbox_policy_json, load_sandbox_policy, and restrictive_default_policy now delegate to the schema crate and apply to_proto / from_proto for the proto conversion layer. No behaviour change. Signed-off-by: Philippe Martin --- crates/openshell-policy/src/lib.rs | 62 ++++-------------------------- 1 file changed, 7 insertions(+), 55 deletions(-) diff --git a/crates/openshell-policy/src/lib.rs b/crates/openshell-policy/src/lib.rs index 7b1ce5c38..a31f0287e 100644 --- a/crates/openshell-policy/src/lib.rs +++ b/crates/openshell-policy/src/lib.rs @@ -16,7 +16,7 @@ use std::collections::HashMap; use std::fmt; use std::path::Path; -use miette::{IntoDiagnostic, Result, WrapErr}; +use miette::Result; use openshell_core::proto::{ FilesystemPolicy, GraphqlOperation, L7Allow, L7DenyRule, L7QueryMatcher, L7Rule, LandlockPolicy, NetworkBinary, NetworkEndpoint, NetworkPolicyRule, ProcessPolicy, @@ -349,10 +349,7 @@ fn from_proto(policy: &SandboxPolicy) -> PolicyFile { /// Parse a sandbox policy from a YAML string. pub fn parse_sandbox_policy(yaml: &str) -> Result { - let raw: PolicyFile = serde_yml::from_str(yaml) - .into_diagnostic() - .wrap_err("failed to parse sandbox policy YAML")?; - Ok(to_proto(raw)) + openshell_policy_schema::parse_policy(yaml).map(to_proto) } /// Serialize a proto sandbox policy to a YAML string. @@ -361,10 +358,7 @@ pub fn parse_sandbox_policy(yaml: &str) -> Result { /// canonical YAML field names (e.g. `filesystem_policy`, not `filesystem`) /// and is round-trippable through `parse_sandbox_policy`. pub fn serialize_sandbox_policy(policy: &SandboxPolicy) -> Result { - let yaml_repr = from_proto(policy); - serde_yml::to_string(&yaml_repr) - .into_diagnostic() - .wrap_err("failed to serialize policy to YAML") + openshell_policy_schema::serialize_policy(&from_proto(policy)) } /// Convert a proto sandbox policy into the canonical policy JSON representation. @@ -372,18 +366,12 @@ pub fn serialize_sandbox_policy(policy: &SandboxPolicy) -> Result { /// The shape mirrors the YAML schema used by [`serialize_sandbox_policy`], so /// automation can use the same documented field names in either format. pub fn sandbox_policy_to_json_value(policy: &SandboxPolicy) -> Result { - let json_repr = from_proto(policy); - serde_json::to_value(&json_repr) - .into_diagnostic() - .wrap_err("failed to serialize policy to JSON") + openshell_policy_schema::policy_to_json_value(&from_proto(policy)) } /// Serialize a proto sandbox policy to a pretty-printed JSON string. pub fn serialize_sandbox_policy_json(policy: &SandboxPolicy) -> Result { - let json_repr = sandbox_policy_to_json_value(policy)?; - serde_json::to_string_pretty(&json_repr) - .into_diagnostic() - .wrap_err("failed to serialize policy to JSON") + openshell_policy_schema::serialize_policy_json(&from_proto(policy)) } /// Load a sandbox policy from an explicit source. @@ -396,20 +384,7 @@ pub fn serialize_sandbox_policy_json(policy: &SandboxPolicy) -> Result { /// caller to omit the policy and let the server / sandbox apply its own /// default. pub fn load_sandbox_policy(cli_path: Option<&str>) -> Result> { - let contents = if let Some(p) = cli_path { - let path = Path::new(p); - std::fs::read_to_string(path) - .into_diagnostic() - .wrap_err_with(|| format!("failed to read sandbox policy from {}", path.display()))? - } else if let Ok(policy_path) = std::env::var("OPENSHELL_SANDBOX_POLICY") { - let path = Path::new(&policy_path); - std::fs::read_to_string(path) - .into_diagnostic() - .wrap_err_with(|| format!("failed to read sandbox policy from {}", path.display()))? - } else { - return Ok(None); - }; - parse_sandbox_policy(&contents).map(Some) + Ok(openshell_policy_schema::load_policy(cli_path)?.map(to_proto)) } /// Well-known path where a sandbox container image can ship a policy YAML file. @@ -432,30 +407,7 @@ pub const LEGACY_CONTAINER_POLICY_PATH: &str = "/etc/navigator/policy.yaml"; /// `sandbox` user, enables Landlock in best-effort mode, and **blocks all /// network access** (no network policies, no inference routing). pub fn restrictive_default_policy() -> SandboxPolicy { - SandboxPolicy { - version: 1, - filesystem: Some(FilesystemPolicy { - include_workdir: true, - read_only: vec![ - "/usr".into(), - "/lib".into(), - "/proc".into(), - "/dev/urandom".into(), - "/app".into(), - "/etc".into(), - "/var/log".into(), - ], - read_write: vec!["/sandbox".into(), "/tmp".into(), "/dev/null".into()], - }), - landlock: Some(LandlockPolicy { - compatibility: "best_effort".into(), - }), - process: Some(ProcessPolicy { - run_as_user: "sandbox".into(), - run_as_group: "sandbox".into(), - }), - network_policies: HashMap::new(), - } + to_proto(openshell_policy_schema::restrictive_default()) } /// Ensure the policy has `run_as_user: sandbox` and `run_as_group: sandbox`. From e5931e89bd8cb0961ac0b8d3263f9d13bffb4885 Mon Sep 17 00:00:00 2001 From: Philippe Martin Date: Mon, 1 Jun 2026 14:26:03 +0000 Subject: [PATCH 11/16] refactor(policy): delegate normalize_path to openshell-policy-schema truncate_for_display stays in openshell-policy as a private helper for validate_sandbox_policy, since the schema crate exposes it as pub(crate) only. Signed-off-by: Philippe Martin --- crates/openshell-policy/src/lib.rs | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/crates/openshell-policy/src/lib.rs b/crates/openshell-policy/src/lib.rs index a31f0287e..3dce516c7 100644 --- a/crates/openshell-policy/src/lib.rs +++ b/crates/openshell-policy/src/lib.rs @@ -617,24 +617,7 @@ fn truncate_for_display(s: &str) -> String { /// This is a lexical normalization only — it does NOT resolve symlinks or /// check the filesystem. pub fn normalize_path(path: &str) -> String { - use std::path::Component; - - let p = Path::new(path); - let mut normalized = std::path::PathBuf::new(); - for component in p.components() { - match component { - Component::Prefix(prefix) => normalized.push(prefix.as_os_str()), - #[allow(clippy::path_buf_push_overwrite)] - Component::RootDir => normalized.push("/"), - Component::CurDir => {} // skip "." - Component::ParentDir => { - // Keep ".." — validation will catch it separately - normalized.push(".."); - } - Component::Normal(c) => normalized.push(c), - } - } - normalized.to_string_lossy().to_string() + openshell_policy_schema::normalize_path(path) } // --------------------------------------------------------------------------- From f702cff2f5d748966785f8c6147e9035ee9c2ec6 Mon Sep 17 00:00:00 2001 From: Philippe Martin Date: Mon, 1 Jun 2026 14:27:03 +0000 Subject: [PATCH 12/16] refactor(policy): re-export PolicyViolation from schema, drop serde/serde_yml deps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the local PolicyViolation definition and re-export it from openshell-policy-schema. Callers importing openshell_policy::PolicyViolation are unaffected — the re-export preserves the public path. serde and serde_yml are no longer direct dependencies of openshell-policy since all YAML logic is now handled by openshell-policy-schema. Signed-off-by: Philippe Martin --- Cargo.lock | 2 - crates/openshell-policy/Cargo.toml | 2 - crates/openshell-policy/src/lib.rs | 60 +----------------------------- 3 files changed, 2 insertions(+), 62 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 47df4c8ef..d71c880a0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3592,9 +3592,7 @@ dependencies = [ "miette", "openshell-core", "openshell-policy-schema", - "serde", "serde_json", - "serde_yml", ] [[package]] diff --git a/crates/openshell-policy/Cargo.toml b/crates/openshell-policy/Cargo.toml index 394fad714..6eca2d9d5 100644 --- a/crates/openshell-policy/Cargo.toml +++ b/crates/openshell-policy/Cargo.toml @@ -13,9 +13,7 @@ repository.workspace = true [dependencies] openshell-core = { path = "../openshell-core" } openshell-policy-schema = { path = "../openshell-policy-schema" } -serde = { workspace = true } serde_json = { workspace = true } -serde_yml = { workspace = true } miette = { workspace = true } [lints] diff --git a/crates/openshell-policy/src/lib.rs b/crates/openshell-policy/src/lib.rs index 3dce516c7..aa8367cc4 100644 --- a/crates/openshell-policy/src/lib.rs +++ b/crates/openshell-policy/src/lib.rs @@ -13,7 +13,6 @@ mod compose; mod merge; use std::collections::HashMap; -use std::fmt; use std::path::Path; use miette::Result; @@ -429,69 +428,14 @@ pub fn ensure_sandbox_process_identity(policy: &mut SandboxPolicy) { // Policy safety validation // --------------------------------------------------------------------------- +pub use openshell_policy_schema::PolicyViolation; + /// Maximum number of filesystem paths (`read_only` + `read_write` combined). const MAX_FILESYSTEM_PATHS: usize = 256; /// Maximum length of any single filesystem path string. const MAX_PATH_LENGTH: usize = 4096; -/// A safety violation found in a sandbox policy. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum PolicyViolation { - /// `run_as_user` or `run_as_group` is not "sandbox". - InvalidProcessIdentity { field: &'static str, value: String }, - /// A filesystem path contains `..` components. - PathTraversal { path: String }, - /// A filesystem path is not absolute (does not start with `/`). - RelativePath { path: String }, - /// A read-write filesystem path is overly broad (e.g. `/`). - OverlyBroadPath { path: String }, - /// A filesystem path exceeds the maximum allowed length. - FieldTooLong { path: String, length: usize }, - /// Too many filesystem paths in the policy. - TooManyPaths { count: usize }, - /// A network endpoint uses a TLD wildcard (e.g. `*.com`). - TldWildcard { policy_name: String, host: String }, -} - -impl fmt::Display for PolicyViolation { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::InvalidProcessIdentity { field, value } => { - write!(f, "{field} must be 'sandbox', got '{value}'") - } - Self::PathTraversal { path } => { - write!(f, "path contains '..' traversal component: {path}") - } - Self::RelativePath { path } => { - write!(f, "path must be absolute (start with '/'): {path}") - } - Self::OverlyBroadPath { path } => { - write!(f, "read-write path is overly broad: {path}") - } - Self::FieldTooLong { path, length } => { - write!( - f, - "path exceeds maximum length ({length} > {MAX_PATH_LENGTH}): {path}" - ) - } - Self::TooManyPaths { count } => { - write!( - f, - "too many filesystem paths ({count} > {MAX_FILESYSTEM_PATHS})" - ) - } - Self::TldWildcard { policy_name, host } => { - write!( - f, - "network policy '{policy_name}': TLD wildcard '{host}' is not allowed; \ - use subdomain wildcards like '*.example.com' instead" - ) - } - } - } -} - /// Validate that a sandbox policy does not contain unsafe content. /// /// Returns `Ok(())` if the policy is safe, or `Err(violations)` listing all From 533d0f4b64d54385dcf1004b1ef87a45e2d79fba Mon Sep 17 00:00:00 2001 From: Philippe Martin Date: Mon, 1 Jun 2026 14:30:29 +0000 Subject: [PATCH 13/16] refactor(policy): re-export path constants from openshell-policy-schema Signed-off-by: Philippe Martin --- crates/openshell-policy/src/lib.rs | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/crates/openshell-policy/src/lib.rs b/crates/openshell-policy/src/lib.rs index aa8367cc4..8910ae265 100644 --- a/crates/openshell-policy/src/lib.rs +++ b/crates/openshell-policy/src/lib.rs @@ -386,18 +386,7 @@ pub fn load_sandbox_policy(cli_path: Option<&str>) -> Result Date: Mon, 1 Jun 2026 14:31:12 +0000 Subject: [PATCH 14/16] fix(policy): move HashMap import into test module Signed-off-by: Philippe Martin --- crates/openshell-policy/src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/openshell-policy/src/lib.rs b/crates/openshell-policy/src/lib.rs index 8910ae265..124902cd1 100644 --- a/crates/openshell-policy/src/lib.rs +++ b/crates/openshell-policy/src/lib.rs @@ -12,7 +12,6 @@ mod compose; mod merge; -use std::collections::HashMap; use std::path::Path; use miette::Result; @@ -559,6 +558,8 @@ pub fn normalize_path(path: &str) -> String { #[cfg(test)] mod tests { + use std::collections::HashMap; + use super::*; /// Verify that the serialized YAML uses `filesystem_policy` (not From 1dc65a1e6dc7fb38dc97d5a69c6d6e95d0b8e8c7 Mon Sep 17 00:00:00 2001 From: Philippe Martin Date: Mon, 1 Jun 2026 14:31:42 +0000 Subject: [PATCH 15/16] docs(policy): update module doc comment to reflect schema crate split Signed-off-by: Philippe Martin --- crates/openshell-policy/src/lib.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crates/openshell-policy/src/lib.rs b/crates/openshell-policy/src/lib.rs index 124902cd1..a930541ca 100644 --- a/crates/openshell-policy/src/lib.rs +++ b/crates/openshell-policy/src/lib.rs @@ -4,10 +4,8 @@ //! Shared sandbox policy parsing and defaults for `OpenShell`. //! //! Provides bidirectional YAML↔proto conversion for sandbox policies. -//! -//! The serde types here are the **single canonical representation** of the YAML -//! policy schema. Both parsing (YAML→proto) and serialization (proto→YAML) use -//! these types, ensuring round-trip fidelity. +//! YAML schema types and pure parsing logic live in `openshell-policy-schema`; +//! this crate adds the proto conversion layer on top. mod compose; mod merge; From 5c6717756dc344691f820d2a70df961d25624dea Mon Sep 17 00:00:00 2001 From: Philippe Martin Date: Mon, 1 Jun 2026 14:35:23 +0000 Subject: [PATCH 16/16] docs(agents): add openshell-policy-schema to architecture table Signed-off-by: Philippe Martin --- AGENTS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/AGENTS.md b/AGENTS.md index 2d5f293fc..d598f3c42 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -33,6 +33,7 @@ These pipelines connect skills into end-to-end workflows. Individual skill files | `crates/openshell-server/` | Gateway server | Control-plane API, sandbox lifecycle, auth boundary | | `crates/openshell-sandbox/` | Sandbox runtime | Container supervision, policy-enforced egress routing | | `crates/openshell-policy/` | Policy engine | Filesystem, network, process, and inference constraints | +| `crates/openshell-policy-schema/` | Policy YAML schema | Dependency-light serde types and pure YAML parsing — no proto, no gRPC | | `crates/openshell-router/` | Privacy router | Privacy-aware LLM routing | | `crates/openshell-bootstrap/` | Gateway metadata | Gateway registration metadata, auth token storage, mTLS bundle storage | | `crates/openshell-ocsf/` | OCSF logging | OCSF v1.7.0 event types, builders, shorthand/JSONL formatters, tracing layers |