Files
PiCloud/crates/executor-core/src/engine.rs
MechaCat02 3dbead426f test(v1.1.3-modules): resolver, cache, validator, kind-rejection coverage
Adds ~46 new tests across the v1.1.3 surface:

executor-core/tests/modules.rs (NEW, 23 tests):
- resolver_loads_simple_module / endpoint_can_import_module /
  module_can_import_module — end-to-end through Engine::execute.
- resolver_cross_app_blocked / resolver_cross_app_module_not_found /
  module_cache_keyed_by_app — same-name modules in different apps
  resolve independently; cross-app lookup returns ModuleNotFound.
- resolver_self_import_detected / resolver_circular_detected —
  cycle detector reports the chain.
- resolver_depth_limit_enforced / resolver_depth_limit_just_under_succeeds.
- resolver_module_not_found / resolver_backend_error_surfaces.
- resolver_runtime_validation_rejects_top_level_expr — defense-in-
  depth: a module with a top-level expression that bypassed the
  admin gate is rejected at resolve time.
- module_cache_hit_reuses_compiled_module /
  module_cache_stale_invalidated_on_updated_at_change /
  module_cache_lru_evicts_when_capacity_exceeded.
- validate_module_{accepts_fn_const_import_only,
  rejects_top_level_let, rejects_top_level_expr,
  rejects_top_level_while}.
- validate_endpoint_{extracts_literal_imports,
  top_level_expr_still_allowed,
  skips_dynamic_imports_in_imports_list}.

orchestrator-core/src/client.rs cache_tests (6 tests):
- cache_hit_when_identity_matches / cache_invalidated_when_updated_at_changes
  / distinct_script_ids_cache_independently / lru_eviction_caps_cache_size
  / script_identity_is_copy / compile_error_does_not_poison_cache.

shared/src/script.rs kind_tests (3 tests):
- default_is_endpoint / round_trips_through_serde_lowercase
  / parse_str_round_trip.

manager-core/src/triggers_api.rs v1.1.3 tests (6 tests):
- kv_trigger_rejects_module_target / docs_trigger_rejects_module_target
  / dl_trigger_rejects_module_target — modules cannot be trigger
  targets.
- kv_trigger_rejects_missing_script / kv_trigger_rejects_cross_app_script
  — closes the latent v1.1.1/v1.1.2 isolation gap.
- kv_trigger_accepts_endpoint_target — happy path through the
  validate_trigger_target check.

picloud/tests/api.rs (8 #[ignore]'d Postgres-gated integration tests):
- create_script_default_kind_is_endpoint / create_module_kind_persists.
- create_module_with_top_level_expr_rejected /
  create_module_with_reserved_name_rejected.
- route_bind_rejects_module.
- endpoint_imports_module_end_to_end /
  module_edit_visible_on_next_invocation / cross_app_import_blocked.

Lint cleanup along the way:
- `ScriptKind::from_str` renamed to `parse_str` to dodge the
  `should_implement_trait` lint (FromStr's `Result<…,Err>` shape
  doesn't fit a 0-info lookup).
- `derive(Default)` on `ScriptKind` (Endpoint marked `#[default]`).
- Match-arm collapse in `check_module_shape` for Import + Noop.
- `#[allow(clippy::too_many_lines)]` on `resolve()` (the bridge
  logic is genuinely cohesive and would lose clarity if split).
- Elided `'r` lifetime on `StackGuard`.

Three gates clean on this commit's HEAD:
- cargo fmt --all -- --check: clean
- cargo clippy --all-targets --all-features -- -D warnings: clean
- cargo test --workspace: 358 passed, 140 ignored (Postgres-gated)
- npm run check: 0 errors, 0 warnings

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-03 07:18:18 +02:00

492 lines
18 KiB
Rust

use std::collections::BTreeMap;
use std::sync::{Arc, Mutex};
use std::time::Instant;
use chrono::Utc;
use picloud_shared::{
ScriptValidator, SdkCallCx, Services, TriggerEvent, ValidatedScript, ValidationError,
SDK_VERSION,
};
use rhai::{Dynamic, Engine as RhaiEngine, EvalAltResult, Map, Module, Scope, AST};
use serde_json::Value as Json;
use crate::module_resolver::{
extract_imports, new_module_cache, validate_module_source, ModuleCache, PicloudModuleResolver,
};
use crate::sandbox::Limits;
use crate::sdk;
use crate::sdk::bridge::{dynamic_to_json, json_to_dynamic};
use crate::types::{
ExecError, ExecRequest, ExecResponse, ExecStats, InvocationType, LogEntry, LogLevel,
};
/// Default capacity for the module cache. Sized assuming a small fleet
/// of distinct modules per process; can be overridden via
/// `PICLOUD_MODULE_CACHE_SIZE`.
const DEFAULT_MODULE_CACHE_SIZE: usize = 512;
/// Preconfigured Rhai engine with sandbox limits applied and the SDK
/// `Services` bundle attached.
///
/// One `Engine` is constructed at process startup and reused across
/// invocations. `execute` is **synchronous** — it owns the per-call
/// scope and log buffer. Wall-clock timeouts and offloading off the
/// async runtime belong to the caller (orchestrator-core's
/// `LocalExecutorClient` wraps this with `spawn_blocking` + `timeout`).
///
/// The `Services` bundle is empty in v1.1.0; subsequent v1.1.x PRs add
/// service handles (KV, docs, …) and `sdk::register_all` wires them
/// into each per-call Rhai engine.
pub struct Engine {
limits: Limits,
services: Services,
/// v1.1.3: shared compiled-module cache. Per-key
/// `(app_id, name)`; invalidated lazily by `updated_at` mismatch
/// at resolver time.
module_cache: Arc<ModuleCache>,
}
impl Engine {
#[must_use]
pub fn new(limits: Limits, services: Services) -> Self {
let cap = std::env::var("PICLOUD_MODULE_CACHE_SIZE")
.ok()
.and_then(|s| s.parse::<usize>().ok())
.unwrap_or(DEFAULT_MODULE_CACHE_SIZE);
Self::with_module_cache_capacity(limits, services, cap)
}
/// Explicit capacity for tests that exercise LRU eviction.
#[must_use]
pub fn with_module_cache_capacity(
limits: Limits,
services: Services,
module_cache_capacity: usize,
) -> Self {
Self {
limits,
services,
module_cache: new_module_cache(module_cache_capacity),
}
}
#[must_use]
pub fn limits(&self) -> &Limits {
&self.limits
}
/// Shared compiled-module cache. Exposed so tests can introspect
/// the cache state (length, contents) under a Mutex lock.
#[must_use]
pub fn module_cache(&self) -> &Arc<ModuleCache> {
&self.module_cache
}
/// Parse-only validation for endpoint scripts. Surfaced at script-
/// upload time so syntax errors are caught before the first
/// invocation. Returns the script's literal-path `import "<name>"`
/// declarations so the repo can populate the dep-graph table.
pub fn validate(&self, source: &str) -> Result<ValidatedScript, ExecError> {
// Validation uses a fresh `RhaiEngine` without service hooks
// attached — modules are only resolved at execute() time, so
// the resolver during validate is intentionally Dummy (no DB
// access here; we just need the parser).
let engine = build_engine(self.limits, None);
extract_imports(&engine, source).map_err(ExecError::Parse)
}
/// Module-shape validation (v1.1.3). Compiles, rejects any top-
/// level statement that isn't `fn`/`const`/`import`, and returns
/// the declared imports.
pub fn validate_module(&self, source: &str) -> Result<ValidatedScript, ExecError> {
let engine = build_engine(self.limits, None);
validate_module_source(&engine, source).map_err(ExecError::Parse)
}
/// Compile `source` to a reusable AST. Lets callers (the
/// orchestrator's script cache) compile once and execute many
/// times against the same AST.
pub fn compile(&self, source: &str) -> Result<Arc<AST>, ExecError> {
let engine = build_engine(self.limits, None);
engine
.compile(source)
.map(Arc::new)
.map_err(|e| ExecError::Parse(e.to_string()))
}
/// Execute `source` against `req`. Op-budget protection comes from
/// Rhai's `set_max_operations`; wall-clock enforcement is the
/// caller's responsibility. Per-script sandbox overrides on the
/// request replace the engine's defaults field-by-field; the
/// manager already clamped them against the admin ceiling.
pub fn execute(&self, source: &str, req: ExecRequest) -> Result<ExecResponse, ExecError> {
let effective_limits = self.limits.with_overrides(&req.sandbox_overrides);
// Compile inline so the source-only path stays available for
// tests and one-off callers that don't pre-cache an AST.
let engine_for_compile = build_engine(effective_limits, None);
let ast = engine_for_compile
.compile(source)
.map(Arc::new)
.map_err(|e| ExecError::Parse(e.to_string()))?;
self.execute_ast(&ast, req)
}
/// v1.1.3: execute a pre-compiled AST. The orchestrator's script
/// cache hands compiled ASTs in directly; this path skips the
/// per-call compile.
pub fn execute_ast(&self, ast: &Arc<AST>, req: ExecRequest) -> Result<ExecResponse, ExecError> {
let effective_limits = self.limits.with_overrides(&req.sandbox_overrides);
let logs: Arc<Mutex<Vec<LogEntry>>> = Arc::new(Mutex::new(Vec::new()));
let mut engine = build_engine(effective_limits, Some(logs.clone()));
// Per-call context handed to every stateful SDK service via the
// `sdk::register_all` hook. The Arc lets future service closures
// capture cheap clones of the cx for use at script-call time.
let cx = Arc::new(SdkCallCx {
app_id: req.app_id,
principal: req.principal.clone(),
execution_id: req.execution_id,
request_id: req.request_id,
trigger_depth: req.trigger_depth,
root_execution_id: req.root_execution_id,
is_dead_letter_handler: req.is_dead_letter_handler,
event: req.event.clone(),
});
// v1.1.3: replace the no-op `DummyModuleResolver` build_engine
// installed with the real per-call resolver. The resolver owns
// `cx.clone()` so cross-app isolation derives from this exact
// call's context, not from any script-passed argument.
let resolver = PicloudModuleResolver::new(
self.services.modules.clone(),
cx.clone(),
self.module_cache.clone(),
effective_limits.module_import_depth_max,
);
engine.set_module_resolver(resolver);
sdk::register_all(&mut engine, &self.services, cx);
let mut scope = Scope::new();
scope.push_constant("ctx", build_ctx_map(&req));
let started = Instant::now();
let value: Dynamic = engine
.eval_ast_with_scope(&mut scope, ast.as_ref())
.map_err(map_eval_error)?;
let duration = started.elapsed();
let logs = Arc::try_unwrap(logs).map_or_else(
|arc| arc.lock().map(|g| g.clone()).unwrap_or_default(),
|m| m.into_inner().unwrap_or_default(),
);
let (status_code, headers, body) = parse_response(value)?;
Ok(ExecResponse {
status_code,
headers,
body,
logs,
stats: ExecStats {
duration_ms: u64::try_from(duration.as_millis()).unwrap_or(u64::MAX),
operations: 0,
},
})
}
}
impl ScriptValidator for Engine {
fn validate(&self, source: &str) -> Result<ValidatedScript, ValidationError> {
Engine::validate(self, source).map_err(|e| match e {
ExecError::Parse(msg) => ValidationError::Syntax(msg),
other => ValidationError::Syntax(other.to_string()),
})
}
fn validate_module(&self, source: &str) -> Result<ValidatedScript, ValidationError> {
Engine::validate_module(self, source).map_err(|e| match e {
ExecError::Parse(msg) => ValidationError::ModuleShape(msg),
other => ValidationError::ModuleShape(other.to_string()),
})
}
}
// ----------------------------------------------------------------------------
// Engine construction
// ----------------------------------------------------------------------------
fn build_engine(limits: Limits, logs: Option<Arc<Mutex<Vec<LogEntry>>>>) -> RhaiEngine {
let mut engine = RhaiEngine::new();
engine.set_max_operations(limits.max_operations);
engine.set_max_string_size(limits.max_string_size);
engine.set_max_array_size(limits.max_array_size);
engine.set_max_map_size(limits.max_map_size);
engine.set_max_call_levels(limits.max_call_levels);
engine.set_max_expr_depths(limits.max_expr_depth, limits.max_expr_depth);
// Reject `import` — scripts cannot pull external modules.
engine.set_module_resolver(rhai::module_resolvers::DummyModuleResolver);
// Rhai's built-in `print` and `debug` map to stdout/stderr by
// default; we never want scripts dumping there directly. Disable
// them so scripts route all output through `log::*` instead.
engine.disable_symbol("print");
if let Some(logs) = logs {
engine.register_static_module("log", build_log_module(logs).into());
}
// Stateless utility modules — regex::/random::/time::/json::/base64::/
// hex::/url::. Always registered, including in the parse-only validate
// path, so script authors get consistent surface in both phases.
sdk::stdlib::register_stdlib(&mut engine);
engine
}
fn build_log_module(logs: Arc<Mutex<Vec<LogEntry>>>) -> Module {
let mut module = Module::new();
register_log_fn(&mut module, "trace", LogLevel::Trace, &logs);
register_log_fn(&mut module, "info", LogLevel::Info, &logs);
register_log_fn(&mut module, "warn", LogLevel::Warn, &logs);
register_log_fn(&mut module, "error", LogLevel::Error, &logs);
// No `log::debug` — `debug` is a Rhai reserved keyword. Use
// `log::trace` for sub-info-level diagnostics.
module
}
fn register_log_fn(
module: &mut Module,
name: &str,
level: LogLevel,
logs: &Arc<Mutex<Vec<LogEntry>>>,
) {
// Single-argument form: `log::info("message")`.
let logs_single = logs.clone();
module.set_native_fn(name, move |msg: &str| {
push_log(&logs_single, level, msg, None);
Ok::<_, Box<EvalAltResult>>(())
});
// Two-argument form: `log::info("message", #{ user: 42 })`.
let logs_struct = logs.clone();
module.set_native_fn(name, move |msg: &str, data: Dynamic| {
let json = dynamic_to_json(&data);
push_log(&logs_struct, level, msg, Some(json));
Ok::<_, Box<EvalAltResult>>(())
});
}
fn push_log(logs: &Arc<Mutex<Vec<LogEntry>>>, level: LogLevel, message: &str, data: Option<Json>) {
if let Ok(mut g) = logs.lock() {
g.push(LogEntry {
timestamp: Utc::now(),
level,
message: message.to_string(),
data,
});
}
}
// ----------------------------------------------------------------------------
// ctx construction
// ----------------------------------------------------------------------------
fn build_ctx_map(req: &ExecRequest) -> Map {
let mut ctx = Map::new();
ctx.insert("sdk_version".into(), SDK_VERSION.into());
ctx.insert("execution_id".into(), req.execution_id.to_string().into());
ctx.insert("script_id".into(), req.script_id.to_string().into());
ctx.insert("script_name".into(), req.script_name.clone().into());
ctx.insert("request_id".into(), req.request_id.to_string().into());
ctx.insert(
"invocation_type".into(),
invocation_type_str(req.invocation_type).into(),
);
let mut request = Map::new();
request.insert("path".into(), req.path.clone().into());
let mut headers = Map::new();
for (k, v) in &req.headers {
headers.insert(k.clone().into(), v.clone().into());
}
request.insert("headers".into(), headers.into());
request.insert("body".into(), json_to_dynamic(req.body.clone()));
// SDK 1.1 additions — route-captured params, query string, prefix
// tail. Empty when not applicable so scripts can always read them.
let mut params = Map::new();
for (k, v) in &req.params {
params.insert(k.clone().into(), v.clone().into());
}
request.insert("params".into(), params.into());
let mut query = Map::new();
for (k, v) in &req.query {
query.insert(k.clone().into(), v.clone().into());
}
request.insert("query".into(), query.into());
request.insert("rest".into(), req.rest.clone().into());
ctx.insert("request".into(), request.into());
// Triggered invocations: surface the originating event as
// `ctx.event`. Direct ingress (HTTP request, manual run) leaves
// the key absent so scripts can test `if "event" in ctx`.
if let Some(event) = req.event.as_ref() {
ctx.insert("event".into(), trigger_event_to_dynamic(event));
}
ctx
}
/// Convert a `TriggerEvent` into the `ctx.event` Rhai shape defined in
/// `docs/v1.1.x-design-notes.md` §4 (the dead-letter sub-shape) and
/// §2/blueprint §9 (KV). Each variant becomes a Rhai map with a
/// `source` discriminant plus per-source fields.
fn trigger_event_to_dynamic(event: &TriggerEvent) -> Dynamic {
let mut m = Map::new();
m.insert("source".into(), event.source().into());
match event {
TriggerEvent::Kv {
op,
collection,
key,
value,
} => {
m.insert("op".into(), op.as_str().into());
let mut kv_map = Map::new();
kv_map.insert("collection".into(), collection.clone().into());
kv_map.insert("key".into(), key.clone().into());
kv_map.insert(
"value".into(),
value.clone().map_or(Dynamic::UNIT, json_to_dynamic),
);
m.insert("kv".into(), kv_map.into());
}
TriggerEvent::Docs {
op,
collection,
id,
data,
prev_data,
} => {
m.insert("op".into(), op.as_str().into());
let mut docs_map = Map::new();
docs_map.insert("collection".into(), collection.clone().into());
docs_map.insert("id".into(), id.clone().into());
docs_map.insert(
"data".into(),
data.clone().map_or(Dynamic::UNIT, json_to_dynamic),
);
docs_map.insert(
"prev_data".into(),
prev_data.clone().map_or(Dynamic::UNIT, json_to_dynamic),
);
m.insert("docs".into(), docs_map.into());
}
TriggerEvent::DeadLetter {
dead_letter_id,
original,
attempts,
last_error,
trigger_id,
script_id,
first_attempt_at,
last_attempt_at,
} => {
let mut dl = Map::new();
dl.insert("id".into(), dead_letter_id.to_string().into());
dl.insert("original".into(), trigger_event_to_dynamic(original));
dl.insert("attempts".into(), i64::from(*attempts).into());
dl.insert("last_error".into(), last_error.clone().into());
dl.insert(
"trigger_id".into(),
trigger_id
.map(|id| Dynamic::from(id.to_string()))
.unwrap_or(Dynamic::UNIT),
);
dl.insert(
"script_id".into(),
script_id
.map(|id| Dynamic::from(id.to_string()))
.unwrap_or(Dynamic::UNIT),
);
dl.insert(
"first_attempt_at".into(),
first_attempt_at.to_rfc3339().into(),
);
dl.insert(
"last_attempt_at".into(),
last_attempt_at.to_rfc3339().into(),
);
m.insert("dead_letter".into(), dl.into());
}
}
m.into()
}
fn invocation_type_str(it: InvocationType) -> &'static str {
match it {
InvocationType::Http => "http",
InvocationType::Function => "function",
InvocationType::Scheduled => "scheduled",
}
}
// ----------------------------------------------------------------------------
// Response parsing
// ----------------------------------------------------------------------------
fn parse_response(value: Dynamic) -> Result<(u16, BTreeMap<String, String>, Json), ExecError> {
// Convention: a Map with a `statusCode` field is the structured shape.
// Anything else is treated as a 200 response with the value as body.
if value.is_map() {
if let Some(map) = value.clone().try_cast::<Map>() {
if map.contains_key("statusCode") {
return parse_structured_response(map);
}
}
}
Ok((200, BTreeMap::new(), dynamic_to_json(&value)))
}
fn parse_structured_response(map: Map) -> Result<(u16, BTreeMap<String, String>, Json), ExecError> {
let status_dyn = map
.get("statusCode")
.ok_or_else(|| ExecError::InvalidResponse("missing statusCode".into()))?;
let status_code: i64 = status_dyn
.as_int()
.map_err(|_| ExecError::InvalidResponse("statusCode must be an integer".into()))?;
let status_code = u16::try_from(status_code)
.map_err(|_| ExecError::InvalidResponse("statusCode out of HTTP range".into()))?;
let mut headers: BTreeMap<String, String> = BTreeMap::new();
if let Some(h) = map.get("headers") {
if let Some(h_map) = h.clone().try_cast::<Map>() {
for (k, v) in h_map {
headers.insert(k.to_string(), v.to_string());
}
}
}
let body = map.get("body").map_or(Json::Null, dynamic_to_json);
Ok((status_code, headers, body))
}
// ----------------------------------------------------------------------------
// Error mapping
// ----------------------------------------------------------------------------
fn map_eval_error(err: Box<EvalAltResult>) -> ExecError {
match *err {
EvalAltResult::ErrorTooManyOperations(_) => ExecError::OperationBudgetExceeded,
EvalAltResult::ErrorParsing(parse_err, _) => ExecError::Parse(parse_err.to_string()),
other => ExecError::Runtime(other.to_string()),
}
}