feat: per-script Rhai sandbox overrides with admin ceiling

Adds optional per-script overrides for the six Rhai sandbox knobs
(max_operations, max_string_size, max_array_size, max_map_size,
max_call_levels, max_expr_depth). The executor merges its defaults
with each script's overrides on every call; the manager validates
overrides against an admin-set ceiling at write time, so the
executor trusts whatever is stored.

Storage chose JSONB on the existing scripts table over six new
columns: lets future knobs land as code-only changes, keeps the
sparse common case (most scripts override nothing) cheap to store
and serialize, and matches how the manager + executor pass the
config across the wire.

  * 0002_sandbox.sql — ALTER TABLE scripts ADD COLUMN sandbox
    JSONB NOT NULL DEFAULT '{}'
  * shared::ScriptSandbox — six Option<u64> fields with
    deny_unknown_fields so typos surface as 422
  * Script.sandbox + ExecRequest.sandbox_overrides — typed end
    to end; cluster mode just serializes the same struct
  * executor-core::Limits::with_overrides — field-by-field
    replacement; tests cover the override actually tightening
    the live engine
  * manager-core::SandboxCeiling — built-in conservative
    defaults (10M ops, 1 MiB strings, 100k array/map, 128
    call/expr depth); env vars override per knob, invalid
    values warn-and-skip rather than blocking boot
  * manager-core admin API — POST/PUT accept `sandbox`; values
    above the ceiling return 422 with the specific field +
    requested + ceiling; absent or `{}` keeps platform defaults
  * picloud all-in-one — wires SandboxCeiling::from_env() into
    AdminState
  * memory_limit_mb stays in the schema, marked v1.3+ advisory
    (no enforcement until OS-level isolation lands with
    cluster-mode executors)

Verified live through Caddy:
  * /version reports schema 2, product 0.3.0
  * Script with max_operations: 500 → 507 on a 10k-iteration loop
  * Same script after PUT raising to 1M → succeeds, returns 10000
  * POST with max_operations: 1_000_000_000 → 422 (exceeds ceiling)

Tests:
  * 13 executor-core unit tests (added 2 for override semantics)
  * 20 integration tests (added 6 for sandbox CRUD + ceiling +
    unknown-field rejection + executor honoring overrides)
  * default cargo test --workspace stays green (integration tests
    remain #[ignore]'d until DATABASE_URL is set)

Bumps:
  * schema 1 → 2
  * product 0.2.0 → 0.3.0
  * SDK unchanged (scripts see nothing new)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-23 16:26:12 +02:00
parent 4baaead642
commit f51924fdbc
18 changed files with 491 additions and 22 deletions

View File

@@ -2,7 +2,7 @@ use std::collections::BTreeMap;
use async_trait::async_trait;
use picloud_orchestrator_core::{ResolverError, ScriptResolver};
use picloud_shared::{ExecutionLog, ExecutionStatus, RequestId, Script, ScriptId};
use picloud_shared::{ExecutionLog, ExecutionStatus, RequestId, Script, ScriptId, ScriptSandbox};
use sqlx::PgPool;
#[derive(Debug, thiserror::Error)]
@@ -40,6 +40,9 @@ pub struct NewScript {
pub source: String,
pub timeout_seconds: Option<i32>,
pub memory_limit_mb: Option<i32>,
/// Sandbox overrides; `None` means store an empty object (use
/// platform defaults at exec time).
pub sandbox: Option<ScriptSandbox>,
}
/// Inbound shape for update. `None` fields are left untouched.
@@ -50,6 +53,9 @@ pub struct ScriptPatch {
pub source: Option<String>,
pub timeout_seconds: Option<i32>,
pub memory_limit_mb: Option<i32>,
/// `Some(sandbox)` replaces the stored overrides wholesale (including
/// `Some(empty)` to clear them); `None` leaves them untouched.
pub sandbox: Option<ScriptSandbox>,
}
pub struct PostgresScriptRepository {
@@ -73,7 +79,7 @@ impl ScriptRepository for PostgresScriptRepository {
async fn get(&self, id: ScriptId) -> Result<Option<Script>, ScriptRepositoryError> {
let row = sqlx::query_as::<_, ScriptRow>(
"SELECT id, name, description, version, source, \
timeout_seconds, memory_limit_mb, created_at, updated_at \
timeout_seconds, memory_limit_mb, sandbox, created_at, updated_at \
FROM scripts WHERE id = $1",
)
.bind(id.into_inner())
@@ -85,7 +91,7 @@ impl ScriptRepository for PostgresScriptRepository {
async fn list(&self) -> Result<Vec<Script>, ScriptRepositoryError> {
let rows = sqlx::query_as::<_, ScriptRow>(
"SELECT id, name, description, version, source, \
timeout_seconds, memory_limit_mb, created_at, updated_at \
timeout_seconds, memory_limit_mb, sandbox, created_at, updated_at \
FROM scripts ORDER BY name",
)
.fetch_all(&self.pool)
@@ -94,17 +100,22 @@ impl ScriptRepository for PostgresScriptRepository {
}
async fn create(&self, input: NewScript) -> Result<Script, ScriptRepositoryError> {
let sandbox_json = serde_json::to_value(input.sandbox.unwrap_or_default())
.unwrap_or_else(|_| serde_json::json!({}));
let res = sqlx::query_as::<_, ScriptRow>(
"INSERT INTO scripts (name, description, source, timeout_seconds, memory_limit_mb) \
VALUES ($1, $2, $3, COALESCE($4, 30), COALESCE($5, 256)) \
"INSERT INTO scripts ( \
name, description, source, \
timeout_seconds, memory_limit_mb, sandbox \
) VALUES ($1, $2, $3, COALESCE($4, 30), COALESCE($5, 256), $6) \
RETURNING id, name, description, version, source, \
timeout_seconds, memory_limit_mb, created_at, updated_at",
timeout_seconds, memory_limit_mb, sandbox, created_at, updated_at",
)
.bind(&input.name)
.bind(input.description.as_deref())
.bind(&input.source)
.bind(input.timeout_seconds)
.bind(input.memory_limit_mb)
.bind(sandbox_json)
.fetch_one(&self.pool)
.await;
@@ -128,6 +139,13 @@ impl ScriptRepository for PostgresScriptRepository {
// COALESCE-based partial update: `NULL` parameters leave columns
// untouched. Description is double-Optioned so callers can
// explicitly set it to NULL (Some(None)) vs leave it alone (None).
// Sandbox is replaced wholesale when present; per-field merging
// happens in the API layer (clearer semantics for a "PUT a new
// sandbox config" call).
let sandbox_json = patch
.sandbox
.as_ref()
.map(|s| serde_json::to_value(s).unwrap_or_else(|_| serde_json::json!({})));
let row = sqlx::query_as::<_, ScriptRow>(
"UPDATE scripts SET \
name = COALESCE($2, name), \
@@ -135,11 +153,12 @@ impl ScriptRepository for PostgresScriptRepository {
source = COALESCE($5, source), \
timeout_seconds = COALESCE($6, timeout_seconds), \
memory_limit_mb = COALESCE($7, memory_limit_mb), \
sandbox = COALESCE($8, sandbox), \
version = version + 1, \
updated_at = NOW() \
WHERE id = $1 \
RETURNING id, name, description, version, source, \
timeout_seconds, memory_limit_mb, created_at, updated_at",
timeout_seconds, memory_limit_mb, sandbox, created_at, updated_at",
)
.bind(id.into_inner())
.bind(patch.name.as_deref())
@@ -148,6 +167,7 @@ impl ScriptRepository for PostgresScriptRepository {
.bind(patch.source.as_deref())
.bind(patch.timeout_seconds)
.bind(patch.memory_limit_mb)
.bind(sandbox_json)
.fetch_optional(&self.pool)
.await?;
@@ -177,12 +197,18 @@ struct ScriptRow {
source: String,
timeout_seconds: i32,
memory_limit_mb: i32,
sandbox: serde_json::Value,
created_at: chrono::DateTime<chrono::Utc>,
updated_at: chrono::DateTime<chrono::Utc>,
}
impl From<ScriptRow> for Script {
fn from(r: ScriptRow) -> Self {
// Tolerate stale rows whose sandbox column predates a future
// schema migration: unknown fields are rejected by serde, so
// fall back to an empty ScriptSandbox rather than poisoning a
// list response.
let sandbox = serde_json::from_value(r.sandbox).unwrap_or_default();
Self {
id: r.id.into(),
name: r.name,
@@ -191,6 +217,7 @@ impl From<ScriptRow> for Script {
source: r.source,
timeout_seconds: u32::try_from(r.timeout_seconds).unwrap_or(30),
memory_limit_mb: u32::try_from(r.memory_limit_mb).unwrap_or(256),
sandbox,
created_at: r.created_at,
updated_at: r.updated_at,
}