feat: per-script Rhai sandbox overrides with admin ceiling
Adds optional per-script overrides for the six Rhai sandbox knobs
(max_operations, max_string_size, max_array_size, max_map_size,
max_call_levels, max_expr_depth). The executor merges its defaults
with each script's overrides on every call; the manager validates
overrides against an admin-set ceiling at write time, so the
executor trusts whatever is stored.
Storage chose JSONB on the existing scripts table over six new
columns: lets future knobs land as code-only changes, keeps the
sparse common case (most scripts override nothing) cheap to store
and serialize, and matches how the manager + executor pass the
config across the wire.
* 0002_sandbox.sql — ALTER TABLE scripts ADD COLUMN sandbox
JSONB NOT NULL DEFAULT '{}'
* shared::ScriptSandbox — six Option<u64> fields with
deny_unknown_fields so typos surface as 422
* Script.sandbox + ExecRequest.sandbox_overrides — typed end
to end; cluster mode just serializes the same struct
* executor-core::Limits::with_overrides — field-by-field
replacement; tests cover the override actually tightening
the live engine
* manager-core::SandboxCeiling — built-in conservative
defaults (10M ops, 1 MiB strings, 100k array/map, 128
call/expr depth); env vars override per knob, invalid
values warn-and-skip rather than blocking boot
* manager-core admin API — POST/PUT accept `sandbox`; values
above the ceiling return 422 with the specific field +
requested + ceiling; absent or `{}` keeps platform defaults
* picloud all-in-one — wires SandboxCeiling::from_env() into
AdminState
* memory_limit_mb stays in the schema, marked v1.3+ advisory
(no enforcement until OS-level isolation lands with
cluster-mode executors)
Verified live through Caddy:
* /version reports schema 2, product 0.3.0
* Script with max_operations: 500 → 507 on a 10k-iteration loop
* Same script after PUT raising to 1M → succeeds, returns 10000
* POST with max_operations: 1_000_000_000 → 422 (exceeds ceiling)
Tests:
* 13 executor-core unit tests (added 2 for override semantics)
* 20 integration tests (added 6 for sandbox CRUD + ceiling +
unknown-field rejection + executor honoring overrides)
* default cargo test --workspace stays green (integration tests
remain #[ignore]'d until DATABASE_URL is set)
Bumps:
* schema 1 → 2
* product 0.2.0 → 0.3.0
* SDK unchanged (scripts see nothing new)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -11,12 +11,15 @@ use axum::{
|
||||
routing::get,
|
||||
Json, Router,
|
||||
};
|
||||
use picloud_shared::{ExecutionLog, Script, ScriptId, ScriptValidator, ValidationError};
|
||||
use picloud_shared::{
|
||||
ExecutionLog, Script, ScriptId, ScriptSandbox, ScriptValidator, ValidationError,
|
||||
};
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::repo::{
|
||||
ExecutionLogRepository, NewScript, ScriptPatch, ScriptRepository, ScriptRepositoryError,
|
||||
};
|
||||
use crate::sandbox::{CeilingError, SandboxCeiling};
|
||||
|
||||
/// State shared by control-plane handlers. Separates concerns so the
|
||||
/// manager can validate at upload time without depending on the
|
||||
@@ -25,6 +28,7 @@ pub struct AdminState<R, L> {
|
||||
pub repo: Arc<R>,
|
||||
pub logs: Arc<L>,
|
||||
pub validator: Arc<dyn ScriptValidator>,
|
||||
pub sandbox_ceiling: SandboxCeiling,
|
||||
}
|
||||
|
||||
impl<R, L> Clone for AdminState<R, L> {
|
||||
@@ -33,6 +37,7 @@ impl<R, L> Clone for AdminState<R, L> {
|
||||
repo: self.repo.clone(),
|
||||
logs: self.logs.clone(),
|
||||
validator: self.validator.clone(),
|
||||
sandbox_ceiling: self.sandbox_ceiling,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -70,6 +75,11 @@ pub struct CreateScriptRequest {
|
||||
pub source: String,
|
||||
pub timeout_seconds: Option<i32>,
|
||||
pub memory_limit_mb: Option<i32>,
|
||||
/// Sandbox overrides; absent or empty `{}` means "use platform
|
||||
/// defaults". Each non-null field is checked against the admin
|
||||
/// ceiling at write time.
|
||||
#[serde(default)]
|
||||
pub sandbox: ScriptSandbox,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
@@ -83,6 +93,10 @@ pub struct UpdateScriptRequest {
|
||||
pub source: Option<String>,
|
||||
pub timeout_seconds: Option<i32>,
|
||||
pub memory_limit_mb: Option<i32>,
|
||||
/// `Some(sandbox)` replaces the stored overrides wholesale (use
|
||||
/// `Some(ScriptSandbox::empty())` to clear them). Absent leaves
|
||||
/// the stored value unchanged.
|
||||
pub sandbox: Option<ScriptSandbox>,
|
||||
}
|
||||
|
||||
#[allow(clippy::option_option)]
|
||||
@@ -120,6 +134,7 @@ async fn create_script<R: ScriptRepository, L: ExecutionLogRepository>(
|
||||
Json(input): Json<CreateScriptRequest>,
|
||||
) -> Result<(StatusCode, Json<Script>), ApiError> {
|
||||
state.validator.validate(&input.source)?;
|
||||
state.sandbox_ceiling.check(&input.sandbox)?;
|
||||
let created = state
|
||||
.repo
|
||||
.create(NewScript {
|
||||
@@ -128,6 +143,11 @@ async fn create_script<R: ScriptRepository, L: ExecutionLogRepository>(
|
||||
source: input.source,
|
||||
timeout_seconds: input.timeout_seconds,
|
||||
memory_limit_mb: input.memory_limit_mb,
|
||||
sandbox: if input.sandbox.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(input.sandbox)
|
||||
},
|
||||
})
|
||||
.await?;
|
||||
Ok((StatusCode::CREATED, Json(created)))
|
||||
@@ -141,6 +161,9 @@ async fn update_script<R: ScriptRepository, L: ExecutionLogRepository>(
|
||||
if let Some(src) = input.source.as_deref() {
|
||||
state.validator.validate(src)?;
|
||||
}
|
||||
if let Some(sb) = input.sandbox.as_ref() {
|
||||
state.sandbox_ceiling.check(sb)?;
|
||||
}
|
||||
let updated = state
|
||||
.repo
|
||||
.update(
|
||||
@@ -151,6 +174,7 @@ async fn update_script<R: ScriptRepository, L: ExecutionLogRepository>(
|
||||
source: input.source,
|
||||
timeout_seconds: input.timeout_seconds,
|
||||
memory_limit_mb: input.memory_limit_mb,
|
||||
sandbox: input.sandbox,
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
@@ -205,6 +229,9 @@ pub enum ApiError {
|
||||
#[error("invalid script: {0}")]
|
||||
Invalid(#[from] ValidationError),
|
||||
|
||||
#[error("{0}")]
|
||||
Ceiling(#[from] CeilingError),
|
||||
|
||||
#[error("repository error: {0}")]
|
||||
Repo(#[from] ScriptRepositoryError),
|
||||
}
|
||||
@@ -214,7 +241,9 @@ impl IntoResponse for ApiError {
|
||||
let (status, message) = match &self {
|
||||
Self::NotFound(_) => (StatusCode::NOT_FOUND, self.to_string()),
|
||||
Self::Conflict(_) => (StatusCode::CONFLICT, self.to_string()),
|
||||
Self::Invalid(_) => (StatusCode::UNPROCESSABLE_ENTITY, self.to_string()),
|
||||
Self::Invalid(_) | Self::Ceiling(_) => {
|
||||
(StatusCode::UNPROCESSABLE_ENTITY, self.to_string())
|
||||
}
|
||||
Self::Repo(ScriptRepositoryError::NotFound(_)) => {
|
||||
(StatusCode::NOT_FOUND, self.to_string())
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ pub mod api;
|
||||
pub mod log_sink;
|
||||
pub mod migrations;
|
||||
pub mod repo;
|
||||
pub mod sandbox;
|
||||
pub mod scheduler;
|
||||
|
||||
pub use api::{admin_router, AdminState};
|
||||
@@ -16,3 +17,4 @@ pub use repo::{
|
||||
ExecutionLogRepository, NewScript, PostgresExecutionLogRepository, PostgresScriptRepository,
|
||||
RepoResolver, ScriptPatch, ScriptRepository, ScriptRepositoryError,
|
||||
};
|
||||
pub use sandbox::{CeilingError, SandboxCeiling};
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::collections::BTreeMap;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use picloud_orchestrator_core::{ResolverError, ScriptResolver};
|
||||
use picloud_shared::{ExecutionLog, ExecutionStatus, RequestId, Script, ScriptId};
|
||||
use picloud_shared::{ExecutionLog, ExecutionStatus, RequestId, Script, ScriptId, ScriptSandbox};
|
||||
use sqlx::PgPool;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
@@ -40,6 +40,9 @@ pub struct NewScript {
|
||||
pub source: String,
|
||||
pub timeout_seconds: Option<i32>,
|
||||
pub memory_limit_mb: Option<i32>,
|
||||
/// Sandbox overrides; `None` means store an empty object (use
|
||||
/// platform defaults at exec time).
|
||||
pub sandbox: Option<ScriptSandbox>,
|
||||
}
|
||||
|
||||
/// Inbound shape for update. `None` fields are left untouched.
|
||||
@@ -50,6 +53,9 @@ pub struct ScriptPatch {
|
||||
pub source: Option<String>,
|
||||
pub timeout_seconds: Option<i32>,
|
||||
pub memory_limit_mb: Option<i32>,
|
||||
/// `Some(sandbox)` replaces the stored overrides wholesale (including
|
||||
/// `Some(empty)` to clear them); `None` leaves them untouched.
|
||||
pub sandbox: Option<ScriptSandbox>,
|
||||
}
|
||||
|
||||
pub struct PostgresScriptRepository {
|
||||
@@ -73,7 +79,7 @@ impl ScriptRepository for PostgresScriptRepository {
|
||||
async fn get(&self, id: ScriptId) -> Result<Option<Script>, ScriptRepositoryError> {
|
||||
let row = sqlx::query_as::<_, ScriptRow>(
|
||||
"SELECT id, name, description, version, source, \
|
||||
timeout_seconds, memory_limit_mb, created_at, updated_at \
|
||||
timeout_seconds, memory_limit_mb, sandbox, created_at, updated_at \
|
||||
FROM scripts WHERE id = $1",
|
||||
)
|
||||
.bind(id.into_inner())
|
||||
@@ -85,7 +91,7 @@ impl ScriptRepository for PostgresScriptRepository {
|
||||
async fn list(&self) -> Result<Vec<Script>, ScriptRepositoryError> {
|
||||
let rows = sqlx::query_as::<_, ScriptRow>(
|
||||
"SELECT id, name, description, version, source, \
|
||||
timeout_seconds, memory_limit_mb, created_at, updated_at \
|
||||
timeout_seconds, memory_limit_mb, sandbox, created_at, updated_at \
|
||||
FROM scripts ORDER BY name",
|
||||
)
|
||||
.fetch_all(&self.pool)
|
||||
@@ -94,17 +100,22 @@ impl ScriptRepository for PostgresScriptRepository {
|
||||
}
|
||||
|
||||
async fn create(&self, input: NewScript) -> Result<Script, ScriptRepositoryError> {
|
||||
let sandbox_json = serde_json::to_value(input.sandbox.unwrap_or_default())
|
||||
.unwrap_or_else(|_| serde_json::json!({}));
|
||||
let res = sqlx::query_as::<_, ScriptRow>(
|
||||
"INSERT INTO scripts (name, description, source, timeout_seconds, memory_limit_mb) \
|
||||
VALUES ($1, $2, $3, COALESCE($4, 30), COALESCE($5, 256)) \
|
||||
"INSERT INTO scripts ( \
|
||||
name, description, source, \
|
||||
timeout_seconds, memory_limit_mb, sandbox \
|
||||
) VALUES ($1, $2, $3, COALESCE($4, 30), COALESCE($5, 256), $6) \
|
||||
RETURNING id, name, description, version, source, \
|
||||
timeout_seconds, memory_limit_mb, created_at, updated_at",
|
||||
timeout_seconds, memory_limit_mb, sandbox, created_at, updated_at",
|
||||
)
|
||||
.bind(&input.name)
|
||||
.bind(input.description.as_deref())
|
||||
.bind(&input.source)
|
||||
.bind(input.timeout_seconds)
|
||||
.bind(input.memory_limit_mb)
|
||||
.bind(sandbox_json)
|
||||
.fetch_one(&self.pool)
|
||||
.await;
|
||||
|
||||
@@ -128,6 +139,13 @@ impl ScriptRepository for PostgresScriptRepository {
|
||||
// COALESCE-based partial update: `NULL` parameters leave columns
|
||||
// untouched. Description is double-Optioned so callers can
|
||||
// explicitly set it to NULL (Some(None)) vs leave it alone (None).
|
||||
// Sandbox is replaced wholesale when present; per-field merging
|
||||
// happens in the API layer (clearer semantics for a "PUT a new
|
||||
// sandbox config" call).
|
||||
let sandbox_json = patch
|
||||
.sandbox
|
||||
.as_ref()
|
||||
.map(|s| serde_json::to_value(s).unwrap_or_else(|_| serde_json::json!({})));
|
||||
let row = sqlx::query_as::<_, ScriptRow>(
|
||||
"UPDATE scripts SET \
|
||||
name = COALESCE($2, name), \
|
||||
@@ -135,11 +153,12 @@ impl ScriptRepository for PostgresScriptRepository {
|
||||
source = COALESCE($5, source), \
|
||||
timeout_seconds = COALESCE($6, timeout_seconds), \
|
||||
memory_limit_mb = COALESCE($7, memory_limit_mb), \
|
||||
sandbox = COALESCE($8, sandbox), \
|
||||
version = version + 1, \
|
||||
updated_at = NOW() \
|
||||
WHERE id = $1 \
|
||||
RETURNING id, name, description, version, source, \
|
||||
timeout_seconds, memory_limit_mb, created_at, updated_at",
|
||||
timeout_seconds, memory_limit_mb, sandbox, created_at, updated_at",
|
||||
)
|
||||
.bind(id.into_inner())
|
||||
.bind(patch.name.as_deref())
|
||||
@@ -148,6 +167,7 @@ impl ScriptRepository for PostgresScriptRepository {
|
||||
.bind(patch.source.as_deref())
|
||||
.bind(patch.timeout_seconds)
|
||||
.bind(patch.memory_limit_mb)
|
||||
.bind(sandbox_json)
|
||||
.fetch_optional(&self.pool)
|
||||
.await?;
|
||||
|
||||
@@ -177,12 +197,18 @@ struct ScriptRow {
|
||||
source: String,
|
||||
timeout_seconds: i32,
|
||||
memory_limit_mb: i32,
|
||||
sandbox: serde_json::Value,
|
||||
created_at: chrono::DateTime<chrono::Utc>,
|
||||
updated_at: chrono::DateTime<chrono::Utc>,
|
||||
}
|
||||
|
||||
impl From<ScriptRow> for Script {
|
||||
fn from(r: ScriptRow) -> Self {
|
||||
// Tolerate stale rows whose sandbox column predates a future
|
||||
// schema migration: unknown fields are rejected by serde, so
|
||||
// fall back to an empty ScriptSandbox rather than poisoning a
|
||||
// list response.
|
||||
let sandbox = serde_json::from_value(r.sandbox).unwrap_or_default();
|
||||
Self {
|
||||
id: r.id.into(),
|
||||
name: r.name,
|
||||
@@ -191,6 +217,7 @@ impl From<ScriptRow> for Script {
|
||||
source: r.source,
|
||||
timeout_seconds: u32::try_from(r.timeout_seconds).unwrap_or(30),
|
||||
memory_limit_mb: u32::try_from(r.memory_limit_mb).unwrap_or(256),
|
||||
sandbox,
|
||||
created_at: r.created_at,
|
||||
updated_at: r.updated_at,
|
||||
}
|
||||
|
||||
103
crates/manager-core/src/sandbox.rs
Normal file
103
crates/manager-core/src/sandbox.rs
Normal file
@@ -0,0 +1,103 @@
|
||||
//! Admin-set ceiling for per-script sandbox overrides.
|
||||
//!
|
||||
//! The orchestrator-core's default `Limits` is what scripts get when they
|
||||
//! don't override. The ceiling here is the per-field maximum that a
|
||||
//! script's override is allowed to request. Validation runs at write
|
||||
//! time so the executor can trust whatever's stored.
|
||||
|
||||
use std::env;
|
||||
|
||||
use picloud_shared::ScriptSandbox;
|
||||
use thiserror::Error;
|
||||
|
||||
/// Maximum allowed value per sandbox knob. Loaded from env vars at
|
||||
/// startup (with conservative built-in defaults). A `None` field means
|
||||
/// "unbounded" — only useful if the operator explicitly clears the
|
||||
/// ceiling for a given knob (it must still fit `u64`).
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct SandboxCeiling {
|
||||
pub max_operations: u64,
|
||||
pub max_string_size: u64,
|
||||
pub max_array_size: u64,
|
||||
pub max_map_size: u64,
|
||||
pub max_call_levels: u64,
|
||||
pub max_expr_depth: u64,
|
||||
}
|
||||
|
||||
impl SandboxCeiling {
|
||||
/// Conservative built-in ceiling. Matches the executor's defaults —
|
||||
/// scripts can request anything between zero and this, but no
|
||||
/// higher. Operators can widen via env vars (see `from_env`).
|
||||
#[must_use]
|
||||
pub const fn conservative() -> Self {
|
||||
Self {
|
||||
max_operations: 10_000_000,
|
||||
max_string_size: 1024 * 1024, // 1 MiB
|
||||
max_array_size: 100_000,
|
||||
max_map_size: 100_000,
|
||||
max_call_levels: 128,
|
||||
max_expr_depth: 128,
|
||||
}
|
||||
}
|
||||
|
||||
/// Read overrides from env vars, falling back to `conservative()`
|
||||
/// for any unset knob. Invalid values are ignored with a warning
|
||||
/// (we'd rather start with the conservative default than refuse to
|
||||
/// boot on a typo).
|
||||
#[must_use]
|
||||
pub fn from_env() -> Self {
|
||||
let mut c = Self::conservative();
|
||||
macro_rules! load {
|
||||
($field:ident, $key:expr) => {
|
||||
if let Ok(v) = env::var($key) {
|
||||
match v.parse::<u64>() {
|
||||
Ok(n) => c.$field = n,
|
||||
Err(e) => tracing::warn!(env = $key, error = %e, "ignoring invalid sandbox ceiling value"),
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
load!(max_operations, "PICLOUD_SANDBOX_MAX_OPERATIONS");
|
||||
load!(max_string_size, "PICLOUD_SANDBOX_MAX_STRING_SIZE");
|
||||
load!(max_array_size, "PICLOUD_SANDBOX_MAX_ARRAY_SIZE");
|
||||
load!(max_map_size, "PICLOUD_SANDBOX_MAX_MAP_SIZE");
|
||||
load!(max_call_levels, "PICLOUD_SANDBOX_MAX_CALL_LEVELS");
|
||||
load!(max_expr_depth, "PICLOUD_SANDBOX_MAX_EXPR_DEPTH");
|
||||
c
|
||||
}
|
||||
|
||||
/// Returns `Err` if any override exceeds the ceiling on the same
|
||||
/// field. Empty overrides (`ScriptSandbox::empty()`) always pass.
|
||||
pub fn check(&self, s: &ScriptSandbox) -> Result<(), CeilingError> {
|
||||
check_field("max_operations", s.max_operations, self.max_operations)?;
|
||||
check_field("max_string_size", s.max_string_size, self.max_string_size)?;
|
||||
check_field("max_array_size", s.max_array_size, self.max_array_size)?;
|
||||
check_field("max_map_size", s.max_map_size, self.max_map_size)?;
|
||||
check_field("max_call_levels", s.max_call_levels, self.max_call_levels)?;
|
||||
check_field("max_expr_depth", s.max_expr_depth, self.max_expr_depth)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn check_field(name: &'static str, value: Option<u64>, ceiling: u64) -> Result<(), CeilingError> {
|
||||
if let Some(v) = value {
|
||||
if v > ceiling {
|
||||
return Err(CeilingError::Exceeded {
|
||||
field: name,
|
||||
requested: v,
|
||||
ceiling,
|
||||
});
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug, Error, Clone)]
|
||||
pub enum CeilingError {
|
||||
#[error("sandbox override `{field}` = {requested} exceeds admin ceiling of {ceiling}")]
|
||||
Exceeded {
|
||||
field: &'static str,
|
||||
requested: u64,
|
||||
ceiling: u64,
|
||||
},
|
||||
}
|
||||
Reference in New Issue
Block a user