diff --git a/CLAUDE.md b/CLAUDE.md index 9e27af9..1624147 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -8,7 +8,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co Authoritative design: [serverless_cloud_blueprint.md](serverless_cloud_blueprint.md). The blueprint is a living document — when architecture decisions are made in conversation that contradict it, treat the latest decision as truth and update the blueprint. -**Current focus (Phase 4, v1.1.0):** SDK foundation + stdlib utilities — the shape every v1.1.x service module hangs off, see [docs/sdk-shape.md](docs/sdk-shape.md). Subsequent v1.1.x releases (KV in v1.1.1, docs in v1.1.2, …) fill it in; see blueprint §12 for the full table. Phase 3 shipped end-to-end: admin auth, multi-app scoping, and Phase 3.5 capability gating (`manager-core::authz::{can, require, Capability}` + migration `0006_users_authz.sql`). Every v1.1+ table starts with `app_id UUID NOT NULL REFERENCES apps(id) ON DELETE CASCADE` and every Rhai SDK call resolves its app from the execution context. +**Current focus (Phase 4, v1.1.0):** SDK foundation + stdlib utilities — the shape every v1.1.x service module hangs off, see [docs/sdk-shape.md](docs/sdk-shape.md). Stdlib reference at [docs/stdlib-reference.md](docs/stdlib-reference.md). Subsequent v1.1.x releases (KV in v1.1.1, docs in v1.1.2, …) fill it in; see blueprint §12 for the full table. Phase 3 shipped end-to-end: admin auth, multi-app scoping, and Phase 3.5 capability gating (`manager-core::authz::{can, require, Capability}` + migration `0006_users_authz.sql`). Every v1.1+ table starts with `app_id UUID NOT NULL REFERENCES apps(id) ON DELETE CASCADE` and every Rhai SDK call resolves its app from the execution context. ## Three-Service Architecture diff --git a/Cargo.lock b/Cargo.lock index bb8a9c4..977068c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1566,8 +1566,13 @@ dependencies = [ name = "picloud-executor-core" version = "0.6.0" dependencies = [ + "base64", "chrono", + "hex", + "percent-encoding", "picloud-shared", + "rand 0.8.6", + "regex", "rhai", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index d2f1056..b3f5ff5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -74,6 +74,12 @@ sha2 = "0.10" base64 = "0.22" data-encoding = "2.6" +# Stdlib utility crates (v1.1.0 stdlib PR — registered into the +# Rhai engine as the regex::/random::/etc. namespaces) +regex = "1" +hex = "0.4" +percent-encoding = "2" + [workspace.lints.rust] unsafe_code = "forbid" diff --git a/crates/executor-core/Cargo.toml b/crates/executor-core/Cargo.toml index 1ab142b..1b13667 100644 --- a/crates/executor-core/Cargo.toml +++ b/crates/executor-core/Cargo.toml @@ -18,3 +18,10 @@ tracing.workspace = true uuid.workspace = true chrono.workspace = true rhai.workspace = true + +# Stdlib utility modules — see crates/executor-core/src/sdk/stdlib/. +regex.workspace = true +rand.workspace = true +base64.workspace = true +hex.workspace = true +percent-encoding.workspace = true diff --git a/crates/executor-core/src/engine.rs b/crates/executor-core/src/engine.rs index f2849a8..d580cc4 100644 --- a/crates/executor-core/src/engine.rs +++ b/crates/executor-core/src/engine.rs @@ -143,6 +143,11 @@ fn build_engine(limits: Limits, logs: Option>>>) -> Rhai engine.register_static_module("log", build_log_module(logs).into()); } + // Stateless utility modules — regex::/random::/time::/json::/base64::/ + // hex::/url::. Always registered, including in the parse-only validate + // path, so script authors get consistent surface in both phases. + sdk::stdlib::register_stdlib(&mut engine); + engine } diff --git a/crates/executor-core/src/sdk/mod.rs b/crates/executor-core/src/sdk/mod.rs index bbb478c..cff56be 100644 --- a/crates/executor-core/src/sdk/mod.rs +++ b/crates/executor-core/src/sdk/mod.rs @@ -13,6 +13,7 @@ pub mod bridge; pub mod cx; +pub mod stdlib; pub use bridge::{dynamic_to_json, json_to_dynamic}; pub use cx::SdkCallCx; diff --git a/crates/executor-core/src/sdk/stdlib/base64.rs b/crates/executor-core/src/sdk/stdlib/base64.rs new file mode 100644 index 0000000..391553e --- /dev/null +++ b/crates/executor-core/src/sdk/stdlib/base64.rs @@ -0,0 +1,48 @@ +//! `base64::` — standard and URL-safe Base64. +//! +//! Two encoders are exposed: standard alphabet with padding (`encode`/ +//! `decode`) and URL-safe alphabet without padding (`encode_url`/ +//! `decode_url`). Each encoder accepts both `String` and `Blob` inputs +//! as separate Rhai overloads; decoders always return `Blob` — the +//! caller knows whether the original bytes were textual. + +use base64::engine::general_purpose::{STANDARD, URL_SAFE_NO_PAD}; +use base64::Engine as _; +use rhai::{Blob, Engine as RhaiEngine, EvalAltResult, Module}; + +pub fn register(engine: &mut RhaiEngine) { + let mut module = Module::new(); + + module.set_native_fn("encode", |s: &str| -> Result> { + Ok(STANDARD.encode(s.as_bytes())) + }); + module.set_native_fn("encode", |b: Blob| -> Result> { + Ok(STANDARD.encode(&b)) + }); + module.set_native_fn("decode", |s: &str| -> Result> { + STANDARD + .decode(s) + .map_err(|e| format!("base64::decode: {e}").into()) + }); + + module.set_native_fn( + "encode_url", + |s: &str| -> Result> { + Ok(URL_SAFE_NO_PAD.encode(s.as_bytes())) + }, + ); + module.set_native_fn( + "encode_url", + |b: Blob| -> Result> { Ok(URL_SAFE_NO_PAD.encode(&b)) }, + ); + module.set_native_fn( + "decode_url", + |s: &str| -> Result> { + URL_SAFE_NO_PAD + .decode(s) + .map_err(|e| format!("base64::decode_url: {e}").into()) + }, + ); + + engine.register_static_module("base64", module.into()); +} diff --git a/crates/executor-core/src/sdk/stdlib/hex.rs b/crates/executor-core/src/sdk/stdlib/hex.rs new file mode 100644 index 0000000..4357c8a --- /dev/null +++ b/crates/executor-core/src/sdk/stdlib/hex.rs @@ -0,0 +1,21 @@ +//! `hex::` — hexadecimal encode/decode (lowercase output, case- +//! insensitive input). String and Blob inputs are both accepted on +//! encode; decode always returns `Blob`. + +use rhai::{Blob, Engine as RhaiEngine, EvalAltResult, Module}; + +pub fn register(engine: &mut RhaiEngine) { + let mut module = Module::new(); + + module.set_native_fn("encode", |s: &str| -> Result> { + Ok(hex::encode(s.as_bytes())) + }); + module.set_native_fn("encode", |b: Blob| -> Result> { + Ok(hex::encode(&b)) + }); + module.set_native_fn("decode", |s: &str| -> Result> { + hex::decode(s).map_err(|e| format!("hex::decode: {e}").into()) + }); + + engine.register_static_module("hex", module.into()); +} diff --git a/crates/executor-core/src/sdk/stdlib/json.rs b/crates/executor-core/src/sdk/stdlib/json.rs new file mode 100644 index 0000000..ae557eb --- /dev/null +++ b/crates/executor-core/src/sdk/stdlib/json.rs @@ -0,0 +1,43 @@ +//! `json::` — JSON parse and stringify. Reuses the bridge functions in +//! `crate::sdk::bridge` so script-visible JSON has the same shape +//! (numbers, maps, arrays, nulls) as `ctx.request.body` already does. + +use rhai::{Dynamic, Engine as RhaiEngine, EvalAltResult, Module}; + +use crate::sdk::bridge::{dynamic_to_json, json_to_dynamic}; + +pub fn register(engine: &mut RhaiEngine) { + let mut module = Module::new(); + register_parse(&mut module); + register_stringify(&mut module); + register_stringify_pretty(&mut module); + engine.register_static_module("json", module.into()); +} + +fn register_parse(module: &mut Module) { + module.set_native_fn("parse", |s: &str| -> Result> { + let value: serde_json::Value = + serde_json::from_str(s).map_err(|e| format!("json::parse: {e}"))?; + Ok(json_to_dynamic(value)) + }); +} + +fn register_stringify(module: &mut Module) { + module.set_native_fn( + "stringify", + |v: Dynamic| -> Result> { + serde_json::to_string(&dynamic_to_json(&v)) + .map_err(|e| format!("json::stringify: {e}").into()) + }, + ); +} + +fn register_stringify_pretty(module: &mut Module) { + module.set_native_fn( + "stringify_pretty", + |v: Dynamic| -> Result> { + serde_json::to_string_pretty(&dynamic_to_json(&v)) + .map_err(|e| format!("json::stringify_pretty: {e}").into()) + }, + ); +} diff --git a/crates/executor-core/src/sdk/stdlib/mod.rs b/crates/executor-core/src/sdk/stdlib/mod.rs new file mode 100644 index 0000000..d0096aa --- /dev/null +++ b/crates/executor-core/src/sdk/stdlib/mod.rs @@ -0,0 +1,25 @@ +//! Stateless utility modules registered once at engine build via +//! `Engine::register_static_module`. They have no per-call state, no +//! cross-app sensitivity, and no `SdkCallCx` — distinguishing them +//! from stateful service modules (KV, docs, …) which hook into +//! `sdk::register_all` instead. See [docs/sdk-shape.md](../../../../../docs/sdk-shape.md). + +use rhai::Engine as RhaiEngine; + +pub mod base64; +pub mod hex; +pub mod json; +pub mod random; +pub mod regex; +pub mod time; +pub mod url; + +pub fn register_stdlib(engine: &mut RhaiEngine) { + regex::register(engine); + random::register(engine); + time::register(engine); + json::register(engine); + base64::register(engine); + hex::register(engine); + url::register(engine); +} diff --git a/crates/executor-core/src/sdk/stdlib/random.rs b/crates/executor-core/src/sdk/stdlib/random.rs new file mode 100644 index 0000000..84e14f7 --- /dev/null +++ b/crates/executor-core/src/sdk/stdlib/random.rs @@ -0,0 +1,70 @@ +//! `random::` — CSPRNG primitives (`rand::rngs::OsRng`). +//! +//! Only the OS RNG is exposed. No "fast non-crypto" variant — scripts +//! should not pick between secure and insecure entropy. Output sizes +//! are capped to keep a single script call from blowing host memory. + +use rand::distributions::{Alphanumeric, DistString}; +use rand::{rngs::OsRng, Rng, RngCore}; +use rhai::{Blob, Engine as RhaiEngine, EvalAltResult, Module}; +use uuid::Uuid; + +const MAX_BYTES: i64 = 65_536; +const MAX_STRING: i64 = 4_096; + +pub fn register(engine: &mut RhaiEngine) { + let mut module = Module::new(); + register_int(&mut module); + register_float(&mut module); + register_bytes(&mut module); + register_string(&mut module); + register_uuid(&mut module); + engine.register_static_module("random", module.into()); +} + +fn register_int(module: &mut Module) { + module.set_native_fn( + "int", + |min: i64, max: i64| -> Result> { + if min > max { + return Err(format!("random::int: min ({min}) > max ({max})").into()); + } + Ok(OsRng.gen_range(min..=max)) + }, + ); +} + +fn register_float(module: &mut Module) { + module.set_native_fn("float", || -> Result> { + Ok(OsRng.gen::()) + }); +} + +fn register_bytes(module: &mut Module) { + module.set_native_fn("bytes", |n: i64| -> Result> { + if !(0..=MAX_BYTES).contains(&n) { + return Err(format!("random::bytes: n must be in 0..={MAX_BYTES}, got {n}").into()); + } + // Safe: n is non-negative and bounded by MAX_BYTES, which fits in usize. + let len = usize::try_from(n).expect("n bounded above by MAX_BYTES"); + let mut buf = vec![0u8; len]; + OsRng.fill_bytes(&mut buf); + Ok(buf) + }); +} + +fn register_string(module: &mut Module) { + module.set_native_fn("string", |n: i64| -> Result> { + if !(0..=MAX_STRING).contains(&n) { + return Err(format!("random::string: n must be in 0..={MAX_STRING}, got {n}").into()); + } + let len = usize::try_from(n).expect("n bounded above by MAX_STRING"); + Ok(Alphanumeric.sample_string(&mut OsRng, len)) + }); +} + +fn register_uuid(module: &mut Module) { + module.set_native_fn("uuid", || -> Result> { + Ok(Uuid::new_v4().to_string()) + }); +} diff --git a/crates/executor-core/src/sdk/stdlib/regex.rs b/crates/executor-core/src/sdk/stdlib/regex.rs new file mode 100644 index 0000000..7ba5557 --- /dev/null +++ b/crates/executor-core/src/sdk/stdlib/regex.rs @@ -0,0 +1,105 @@ +//! `regex::` — non-backtracking regular expressions (Rust `regex` crate). +//! +//! Patterns compile per call. No cache: premature for v1.1.0, and the +//! `regex` crate's linear-time guarantees keep per-call cost bounded. +//! Catastrophic patterns are rejected at compile time by the crate +//! itself; no extra defense needed. + +use regex::Regex; +use rhai::{Array, Dynamic, Engine as RhaiEngine, EvalAltResult, Module}; + +pub fn register(engine: &mut RhaiEngine) { + let mut module = Module::new(); + register_is_match(&mut module); + register_find(&mut module); + register_find_all(&mut module); + register_replace(&mut module); + register_replace_all(&mut module); + register_split(&mut module); + register_captures(&mut module); + engine.register_static_module("regex", module.into()); +} + +fn compile(pattern: &str) -> Result> { + Regex::new(pattern).map_err(|e| format!("invalid regex: {e}").into()) +} + +fn register_is_match(module: &mut Module) { + module.set_native_fn( + "is_match", + |pattern: &str, text: &str| -> Result> { + Ok(compile(pattern)?.is_match(text)) + }, + ); +} + +fn register_find(module: &mut Module) { + module.set_native_fn( + "find", + |pattern: &str, text: &str| -> Result> { + Ok(compile(pattern)? + .find(text) + .map_or(Dynamic::UNIT, |m| Dynamic::from(m.as_str().to_string()))) + }, + ); +} + +fn register_find_all(module: &mut Module) { + module.set_native_fn( + "find_all", + |pattern: &str, text: &str| -> Result> { + Ok(compile(pattern)? + .find_iter(text) + .map(|m| Dynamic::from(m.as_str().to_string())) + .collect()) + }, + ); +} + +fn register_replace(module: &mut Module) { + module.set_native_fn( + "replace", + |pattern: &str, text: &str, replacement: &str| -> Result> { + Ok(compile(pattern)?.replace(text, replacement).into_owned()) + }, + ); +} + +fn register_replace_all(module: &mut Module) { + module.set_native_fn( + "replace_all", + |pattern: &str, text: &str, replacement: &str| -> Result> { + Ok(compile(pattern)? + .replace_all(text, replacement) + .into_owned()) + }, + ); +} + +fn register_split(module: &mut Module) { + module.set_native_fn( + "split", + |pattern: &str, text: &str| -> Result> { + Ok(compile(pattern)? + .split(text) + .map(|s| Dynamic::from(s.to_string())) + .collect()) + }, + ); +} + +fn register_captures(module: &mut Module) { + module.set_native_fn( + "captures", + |pattern: &str, text: &str| -> Result> { + let re = compile(pattern)?; + Ok(re.captures(text).map_or(Dynamic::UNIT, |caps| { + let arr: Array = caps + .iter() + .map(|m| m.map_or(Dynamic::UNIT, |m| Dynamic::from(m.as_str().to_string()))) + .collect(); + Dynamic::from(arr) + })) + }, + ); +} diff --git a/crates/executor-core/src/sdk/stdlib/time.rs b/crates/executor-core/src/sdk/stdlib/time.rs new file mode 100644 index 0000000..7c0ff6b --- /dev/null +++ b/crates/executor-core/src/sdk/stdlib/time.rs @@ -0,0 +1,68 @@ +//! `time::` — UTC time. The canonical "time value" is milliseconds +//! since the Unix epoch as `i64`. ISO 8601 strings are for parsing and +//! display only. UTC only — no timezone support in v1.1.0 (would pull +//! in chrono-tz, deferred until a real use case demands it). + +use chrono::{DateTime, SecondsFormat, Utc}; +use rhai::{Engine as RhaiEngine, EvalAltResult, Module}; + +pub fn register(engine: &mut RhaiEngine) { + let mut module = Module::new(); + register_now(&mut module); + register_now_ms(&mut module); + register_parse(&mut module); + register_format(&mut module); + register_add_seconds(&mut module); + register_diff_seconds(&mut module); + engine.register_static_module("time", module.into()); +} + +fn register_now(module: &mut Module) { + module.set_native_fn("now", || -> Result> { + Ok(Utc::now().to_rfc3339_opts(SecondsFormat::Millis, true)) + }); +} + +fn register_now_ms(module: &mut Module) { + module.set_native_fn("now_ms", || -> Result> { + Ok(Utc::now().timestamp_millis()) + }); +} + +fn register_parse(module: &mut Module) { + module.set_native_fn("parse", |iso: &str| -> Result> { + DateTime::parse_from_rfc3339(iso) + .map(|dt| dt.timestamp_millis()) + .map_err(|e| format!("time::parse: invalid ISO 8601 / RFC 3339: {e}").into()) + }); +} + +fn register_format(module: &mut Module) { + module.set_native_fn("format", |ms: i64| -> Result> { + DateTime::::from_timestamp_millis(ms) + .map(|dt| dt.to_rfc3339_opts(SecondsFormat::Millis, true)) + .ok_or_else(|| format!("time::format: ms ({ms}) out of representable range").into()) + }); +} + +fn register_add_seconds(module: &mut Module) { + module.set_native_fn( + "add_seconds", + |ms: i64, secs: i64| -> Result> { + secs.checked_mul(1000) + .and_then(|delta| ms.checked_add(delta)) + .ok_or_else(|| format!("time::add_seconds: overflow (ms={ms}, secs={secs})").into()) + }, + ); +} + +fn register_diff_seconds(module: &mut Module) { + module.set_native_fn( + "diff_seconds", + |a_ms: i64, b_ms: i64| -> Result> { + b_ms.checked_sub(a_ms) + .map(|d| d / 1000) + .ok_or_else(|| format!("time::diff_seconds: overflow (a={a_ms}, b={b_ms})").into()) + }, + ); +} diff --git a/crates/executor-core/src/sdk/stdlib/url.rs b/crates/executor-core/src/sdk/stdlib/url.rs new file mode 100644 index 0000000..1f82883 --- /dev/null +++ b/crates/executor-core/src/sdk/stdlib/url.rs @@ -0,0 +1,64 @@ +//! `url::` — RFC 3986 percent-encoding. +//! +//! `encode`/`decode` operate on opaque component values; `encode_query` +//! builds an `application/x-www-form-urlencoded`-style query string +//! from a Rhai `Map`. Key ordering is the map's natural order (Rhai's +//! `Map` is a `BTreeMap`, so keys come out alphabetically — fine for +//! query strings, which RFC 3986 leaves unordered). + +use percent_encoding::{percent_decode_str, utf8_percent_encode, AsciiSet, NON_ALPHANUMERIC}; +use rhai::{Engine as RhaiEngine, EvalAltResult, Map, Module}; + +/// RFC 3986 unreserved set: `A-Z / a-z / 0-9 / - / _ / . / ~`. +/// Everything outside this set gets percent-encoded. +const UNRESERVED: &AsciiSet = &NON_ALPHANUMERIC + .remove(b'-') + .remove(b'_') + .remove(b'.') + .remove(b'~'); + +pub fn register(engine: &mut RhaiEngine) { + let mut module = Module::new(); + register_encode(&mut module); + register_decode(&mut module); + register_encode_query(&mut module); + engine.register_static_module("url", module.into()); +} + +fn register_encode(module: &mut Module) { + module.set_native_fn("encode", |s: &str| -> Result> { + Ok(utf8_percent_encode(s, UNRESERVED).to_string()) + }); +} + +fn register_decode(module: &mut Module) { + module.set_native_fn("decode", |s: &str| -> Result> { + percent_decode_str(s) + .decode_utf8() + .map(std::borrow::Cow::into_owned) + .map_err(|e| format!("url::decode: invalid UTF-8: {e}").into()) + }); +} + +fn register_encode_query(module: &mut Module) { + module.set_native_fn( + "encode_query", + |m: Map| -> Result> { + let mut out = String::new(); + for (k, v) in m { + if !out.is_empty() { + out.push('&'); + } + out.push_str(&utf8_percent_encode(&k, UNRESERVED).to_string()); + out.push('='); + // Coerce values via `to_string` rather than throwing on + // non-strings — scripts commonly pass numbers/bools here + // and a forced cast at the call site is friction with + // no upside. + let value = v.to_string(); + out.push_str(&utf8_percent_encode(&value, UNRESERVED).to_string()); + } + Ok(out) + }, + ); +} diff --git a/crates/executor-core/tests/stdlib.rs b/crates/executor-core/tests/stdlib.rs new file mode 100644 index 0000000..1f119c7 --- /dev/null +++ b/crates/executor-core/tests/stdlib.rs @@ -0,0 +1,382 @@ +//! Integration tests for the v1.1.0 stdlib utility modules. +//! +//! These exist alongside `sdk_contract.rs` rather than inside it +//! because the stateless utilities aren't part of the same versioned +//! SDK contract surface — `sdk_contract.rs` covers things that bump +//! `SDK_VERSION` when they change; stdlib additions don't. + +use std::collections::BTreeMap; + +use picloud_executor_core::{Engine, ExecError, ExecRequest, InvocationType, Limits}; +use picloud_shared::{AppId, ExecutionId, RequestId, ScriptId, ScriptSandbox, Services}; +use serde_json::{json, Value}; + +// ---------------------------------------------------------------------------- +// Test harness — duplicated from sdk_contract.rs (each integration test +// crate has its own; there is no tests/common/). +// ---------------------------------------------------------------------------- + +fn engine() -> Engine { + Engine::new(Limits::default(), Services::new()) +} + +fn baseline_request() -> ExecRequest { + let execution_id = ExecutionId::new(); + ExecRequest { + execution_id, + request_id: RequestId::new(), + script_id: ScriptId::new(), + script_name: "stdlib".into(), + invocation_type: InvocationType::Http, + path: "/stdlib-test".into(), + headers: BTreeMap::new(), + body: Value::Null, + params: BTreeMap::new(), + query: BTreeMap::new(), + rest: String::new(), + sandbox_overrides: ScriptSandbox::default(), + app_id: AppId::new(), + principal: None, + trigger_depth: 0, + root_execution_id: execution_id, + } +} + +fn run(source: &str) -> Value { + engine() + .execute(source, baseline_request()) + .expect("stdlib test should execute cleanly") + .body +} + +fn run_err(source: &str) -> ExecError { + engine() + .execute(source, baseline_request()) + .expect_err("stdlib test expected to throw") +} + +fn assert_runtime_err(err: ExecError, needle: &str) { + match err { + ExecError::Runtime(msg) => assert!( + msg.contains(needle), + "runtime error did not contain `{needle}`: {msg}" + ), + other => panic!("expected Runtime error containing `{needle}`, got {other:?}"), + } +} + +// ============================================================================ +// regex +// ============================================================================ + +#[test] +fn regex_is_match_true_and_false() { + assert_eq!(run(r#"regex::is_match("^h", "hello")"#), json!(true)); + assert_eq!(run(r#"regex::is_match("^x", "hello")"#), json!(false)); +} + +#[test] +fn regex_find_returns_first_match() { + assert_eq!(run(r#"regex::find("\\d+", "abc 42 def 99")"#), json!("42")); +} + +#[test] +fn regex_find_returns_unit_when_no_match() { + // () serializes to JSON null via dynamic_to_json. + assert_eq!(run(r#"regex::find("\\d+", "abc")"#), Value::Null); +} + +#[test] +fn regex_find_all_returns_array() { + assert_eq!( + run(r#"regex::find_all("\\d+", "a1 b22 c333")"#), + json!(["1", "22", "333"]) + ); +} + +#[test] +fn regex_replace_first_only() { + assert_eq!( + run(r#"regex::replace("a", "banana", "X")"#), + json!("bXnana") + ); +} + +#[test] +fn regex_replace_all() { + assert_eq!( + run(r#"regex::replace_all("a", "banana", "X")"#), + json!("bXnXnX") + ); +} + +#[test] +fn regex_split() { + assert_eq!( + run(r#"regex::split(",\\s*", "a, b,c, d")"#), + json!(["a", "b", "c", "d"]) + ); +} + +#[test] +fn regex_captures_extracts_groups() { + assert_eq!( + run(r#"regex::captures("(\\d+)-(\\w+)", "42-abc")"#), + json!(["42-abc", "42", "abc"]) + ); +} + +#[test] +fn regex_captures_returns_unit_when_no_match() { + assert_eq!(run(r#"regex::captures("(\\d+)", "abc")"#), Value::Null); +} + +#[test] +fn regex_invalid_pattern_throws() { + assert_runtime_err(run_err(r#"regex::is_match("(", "x")"#), "invalid regex"); +} + +// ============================================================================ +// random +// ============================================================================ + +#[test] +fn random_int_within_range() { + // Run a few times to exercise the bounds — each call is independent. + let body = run(r" + let n = random::int(10, 20); + n >= 10 && n <= 20 + "); + assert_eq!(body, json!(true)); +} + +#[test] +fn random_int_throws_when_min_greater_than_max() { + assert_runtime_err(run_err("random::int(20, 10)"), "min"); +} + +#[test] +fn random_float_in_unit_interval() { + let body = run(r" + let f = random::float(); + f >= 0.0 && f < 1.0 + "); + assert_eq!(body, json!(true)); +} + +#[test] +fn random_bytes_returns_blob_of_correct_length() { + assert_eq!(run("random::bytes(16).len()"), json!(16)); +} + +#[test] +fn random_bytes_rejects_negative() { + assert_runtime_err(run_err("random::bytes(-1)"), "random::bytes"); +} + +#[test] +fn random_bytes_rejects_oversize() { + assert_runtime_err(run_err("random::bytes(70000)"), "random::bytes"); +} + +#[test] +fn random_string_produces_alphanumeric_of_correct_length() { + let body = run(r#" + let s = random::string(32); + s.len == 32 && regex::is_match("^[A-Za-z0-9]+$", s) + "#); + assert_eq!(body, json!(true)); +} + +#[test] +fn random_uuid_has_canonical_format() { + let body = run( + r#"regex::is_match("^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", random::uuid())"#, + ); + assert_eq!(body, json!(true)); +} + +// ============================================================================ +// time +// ============================================================================ + +#[test] +fn time_now_ms_is_positive() { + let body = run("time::now_ms() > 0"); + assert_eq!(body, json!(true)); +} + +#[test] +fn time_now_string_looks_like_iso() { + let body = run(r#"regex::is_match("^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", time::now())"#); + assert_eq!(body, json!(true)); +} + +#[test] +fn time_parse_format_round_trip() { + let body = run(r" + let ms = 1700000000000; + time::parse(time::format(ms)) == ms + "); + assert_eq!(body, json!(true)); +} + +#[test] +fn time_add_seconds() { + assert_eq!(run("time::add_seconds(0, 60)"), json!(60_000)); + assert_eq!(run("time::add_seconds(1000, -1)"), json!(0)); +} + +#[test] +fn time_diff_seconds_truncates() { + assert_eq!(run("time::diff_seconds(0, 65_500)"), json!(65)); +} + +#[test] +fn time_parse_rejects_garbage() { + assert_runtime_err(run_err(r#"time::parse("nonsense")"#), "time::parse"); +} + +// ============================================================================ +// json +// ============================================================================ + +#[test] +fn json_parse_then_stringify_round_trip() { + let body = run(r#" + let src = `{"a":1,"b":"x"}`; + json::stringify(json::parse(src)) == src + "#); + assert_eq!(body, json!(true)); +} + +#[test] +fn json_stringify_compact() { + assert_eq!(run(r"json::stringify(#{ a: 1 })"), json!(r#"{"a":1}"#)); +} + +#[test] +fn json_stringify_pretty_has_newlines() { + let body = run(r#"json::stringify_pretty(#{ a: 1 }).contains("\n")"#); + assert_eq!(body, json!(true)); +} + +#[test] +fn json_parse_invalid_throws() { + assert_runtime_err(run_err(r#"json::parse("not json")"#), "json::parse"); +} + +// ============================================================================ +// base64 +// ============================================================================ + +#[test] +fn base64_encode_string() { + assert_eq!(run(r#"base64::encode("hi")"#), json!("aGk=")); +} + +#[test] +fn base64_decode_then_re_encode_round_trip() { + assert_eq!( + run(r#"base64::encode(base64::decode("aGVsbG8="))"#), + json!("aGVsbG8=") + ); +} + +#[test] +fn base64_encode_url_has_no_padding() { + let body = run(r#" + let s = base64::encode_url("hello world!?"); + !s.contains("=") && !s.contains("+") && !s.contains("/") + "#); + assert_eq!(body, json!(true)); +} + +#[test] +fn base64_decode_url_round_trip() { + assert_eq!( + run(r#"base64::encode_url(base64::decode_url("aGVsbG8"))"#), + json!("aGVsbG8") + ); +} + +#[test] +fn base64_decode_invalid_throws() { + assert_runtime_err(run_err(r#"base64::decode("!!!")"#), "base64::decode"); +} + +// ============================================================================ +// hex +// ============================================================================ + +#[test] +fn hex_encode_produces_lowercase() { + assert_eq!(run(r#"hex::encode("Z")"#), json!("5a")); +} + +#[test] +fn hex_decode_then_re_encode_round_trip() { + // mixed-case input → lowercase output proves both case-insensitive + // decode and lowercase encode. + assert_eq!( + run(r#"hex::encode(hex::decode("DeAdBeEf"))"#), + json!("deadbeef") + ); +} + +#[test] +fn hex_decode_returns_correct_length() { + assert_eq!(run(r#"hex::decode("deadbeef").len()"#), json!(4)); +} + +#[test] +fn hex_decode_invalid_throws() { + assert_runtime_err(run_err(r#"hex::decode("xyz")"#), "hex::decode"); +} + +// ============================================================================ +// url +// ============================================================================ + +#[test] +fn url_encode_basic() { + assert_eq!(run(r#"url::encode("hello world")"#), json!("hello%20world")); +} + +#[test] +fn url_encode_preserves_unreserved() { + assert_eq!( + run(r#"url::encode("abcXYZ123-_.~")"#), + json!("abcXYZ123-_.~") + ); +} + +#[test] +fn url_decode_round_trip() { + assert_eq!( + run(r#"url::decode(url::encode("hello world!?"))"#), + json!("hello world!?") + ); +} + +#[test] +fn url_encode_query_basic() { + // Map keys come out alphabetically (Rhai's Map is a BTreeMap). + assert_eq!( + run(r#"url::encode_query(#{ a: "1", b: "x y" })"#), + json!("a=1&b=x%20y") + ); +} + +#[test] +fn url_encode_query_coerces_non_strings() { + // Numbers and bools shouldn't throw; they coerce via to_string(). + let body = run(r"url::encode_query(#{ n: 42, b: true })"); + // Order is alphabetical: b before n. + assert_eq!(body, json!("b=true&n=42")); +} + +#[test] +fn url_decode_rejects_invalid_utf8() { + assert_runtime_err(run_err(r#"url::decode("%FF%FE%80")"#), "url::decode"); +} diff --git a/docs/stdlib-reference.md b/docs/stdlib-reference.md new file mode 100644 index 0000000..ce371dc --- /dev/null +++ b/docs/stdlib-reference.md @@ -0,0 +1,215 @@ +# Rhai stdlib reference + +Everything in this document is callable from any user script without +imports — Rhai's built-in standard library plus the seven PiCloud +utility modules added in v1.1.0. Stateful service modules (KV, docs, +HTTP, …) ship in subsequent v1.1.x releases and are documented +separately. + +For the architectural shape (why some modules are stateless and +register at engine build, why others are per-call), see +[sdk-shape.md](sdk-shape.md). + +## Conventions + +- **Throw on failure.** Every function throws a Rhai runtime error on + bad input (invalid pattern, invalid encoding, out-of-range arg). Use + `try { ... } catch (e) { ... }` if you want to handle it. +- **`()` for absent.** Functions that semantically may have no result + (e.g. `regex::find` when nothing matches) return `()`. Test with + `if v == () { ... }`. +- **`bool` for predicates.** Yes/no questions return `bool`. +- **UTC, milliseconds, lowercase hex, RFC 3986.** Defaults chosen once, + not per call. + +--- + +## Rhai built-ins (free with every script) + +These come with the Rhai engine itself. See the +[Rhai book](https://rhai.rs/book/lib/index.html) for full signatures. + +**Math:** `+ - * / %`, `min`, `max`, `abs`, `sqrt`, `pow`, `floor`, +`ceil`, `round`, `to_int`, `to_float`, `sin`, `cos`, `tan`, `asin`, +`acos`, `atan`, `exp`, `ln`, `log`, `PI()`, `E()`. + +**String:** `len`, `is_empty`, `contains`, `starts_with`, `ends_with`, +`index_of`, `split`, `trim`, `to_lower`, `to_upper`, `replace`, `chars`, +`pad`, `sub_string`, `crop`, `+` (concatenation). + +**Array:** `push`, `pop`, `shift`, `insert`, `remove`, `len`, `clear`, +`truncate`, `extend`, `filter`, `map`, `reduce`, `reduce_rev`, `find`, +`find_map`, `any`, `all`, `index_of`, `contains`, `sort`, `reverse`, +`dedup`, `chunks`, `splice`, `[]` indexing. + +**Map:** `len`, `is_empty`, `contains`, `keys`, `values`, `mixin`, +`remove`, `clear`, `fill_with`, `+` (merge), `[]` and `.` access. + +**Blob:** `len`, `push`, `pop`, `clear`, `as_string`, `parse_le_int`, +`write_*`, `[]` indexing. Blobs are `Vec` at the Rust layer. + +**Logging:** `log::trace`, `log::info`, `log::warn`, `log::error` — +each takes a message and optionally a structured-data map. (Documented +with the SDK contract; mentioned here for completeness.) + +--- + +## `regex::` — regular expressions + +Linear-time, no backtracking (powered by the Rust `regex` crate). +Patterns compile per call. + +| Function | Description | +|---|---| +| `regex::is_match(pattern, text) -> bool` | Whether `text` contains a match. | +| `regex::find(pattern, text) -> String \| ()` | First match or `()` if none. | +| `regex::find_all(pattern, text) -> Array` | All matches as `String` array. | +| `regex::replace(pattern, text, replacement) -> String` | Replace first match only. | +| `regex::replace_all(pattern, text, replacement) -> String` | Replace every match. | +| `regex::split(pattern, text) -> Array` | Split `text` on matches. | +| `regex::captures(pattern, text) -> Array \| ()` | `[full, group1, group2, ...]` from the first match; unmatched optional groups appear as `()`. | + +Invalid patterns throw. Use `\\` to escape inside Rhai string literals +(`"\\d+"`) or backtick strings to skip escaping (`` `\d+` ``). + +```rhai +if regex::is_match(`^/api/v\d+/`, ctx.request.path) { + let cap = regex::captures(`/api/v(\d+)/(.+)`, ctx.request.path); + let version = cap[1]; // "1" + let rest = cap[2]; // "users" +} +``` + +--- + +## `random::` — cryptographically-secure randomness + +All randomness comes from `OsRng`. There is deliberately no "fast +non-crypto" variant — scripts shouldn't have to pick. + +| Function | Description | +|---|---| +| `random::int(min, max) -> i64` | Uniform integer in `[min, max]` (inclusive). Throws if `min > max`. | +| `random::float() -> f64` | Uniform float in `[0.0, 1.0)`. | +| `random::bytes(n) -> Blob` | `n` random bytes. `n` in `0..=65536`. | +| `random::string(n) -> String` | `n` random alphanumeric chars (`A-Za-z0-9`). `n` in `0..=4096`. | +| `random::uuid() -> String` | UUID v4 in canonical 8-4-4-4-12 form. | + +```rhai +let token = random::uuid(); +let salt = random::bytes(16); +let pin = random::int(100000, 999999); +``` + +--- + +## `time::` — UTC time + +Canonical time value is **milliseconds since the Unix epoch** as `i64`. +ISO 8601 / RFC 3339 strings are for I/O. UTC only — no timezone support. + +| Function | Description | +|---|---| +| `time::now() -> String` | Current UTC time as ISO 8601 with ms (e.g. `"2026-05-30T20:15:00.123Z"`). | +| `time::now_ms() -> i64` | Current ms since Unix epoch. | +| `time::parse(iso) -> i64` | Parse RFC 3339 / ISO 8601 string to ms. Throws on bad input. | +| `time::format(ms) -> String` | Format ms-since-epoch as ISO 8601 with ms precision. | +| `time::add_seconds(ms, secs) -> i64` | `ms + secs*1000`, with overflow check. | +| `time::diff_seconds(a_ms, b_ms) -> i64` | `(b_ms - a_ms) / 1000`, truncated. | + +```rhai +let started_at = time::now_ms(); +// ... do work ... +let elapsed = time::diff_seconds(started_at, time::now_ms()); + +let deadline = time::format(time::add_seconds(time::now_ms(), 3600)); +``` + +--- + +## `json::` — JSON parse and stringify + +| Function | Description | +|---|---| +| `json::parse(s) -> Dynamic` | Parse a JSON string. Returns Rhai maps, arrays, scalars, or `()` for null. Throws on invalid JSON. | +| `json::stringify(v) -> String` | Compact JSON. | +| `json::stringify_pretty(v) -> String` | Pretty-printed (2-space indent). | + +```rhai +let payload = json::parse(ctx.request.body); // if body came in as a string +let body_str = json::stringify(#{ ok: true, items: [1, 2, 3] }); +``` + +Note: `ctx.request.body` is *already* parsed when the request body is +`Content-Type: application/json` — only call `json::parse` on raw +strings. + +--- + +## `base64::` — standard and URL-safe Base64 + +Two alphabets: standard (with `=` padding) and URL-safe (no padding). +Encoders accept both `String` and `Blob`; decoders always return `Blob`. + +| Function | Description | +|---|---| +| `base64::encode(input) -> String` | Standard alphabet, padded. `input` is `String` or `Blob`. | +| `base64::decode(s) -> Blob` | Decode standard alphabet. Throws on invalid. | +| `base64::encode_url(input) -> String` | URL-safe alphabet, **no padding**. | +| `base64::decode_url(s) -> Blob` | Decode URL-safe alphabet. Throws on invalid. | + +```rhai +let token = base64::encode_url(random::bytes(32)); // URL-safe session token +let raw = base64::decode("aGVsbG8="); +``` + +--- + +## `hex::` — hexadecimal + +Encode produces lowercase. Decode accepts mixed case. + +| Function | Description | +|---|---| +| `hex::encode(input) -> String` | Lowercase hex. `input` is `String` or `Blob`. | +| `hex::decode(s) -> Blob` | Decode hex (case-insensitive). Throws on invalid. | + +```rhai +let fingerprint = hex::encode(random::bytes(20)); +``` + +--- + +## `url::` — percent-encoding + +Unreserved set per RFC 3986 (`A-Z`, `a-z`, `0-9`, `-`, `_`, `.`, `~`) +is preserved; everything else is percent-encoded. + +| Function | Description | +|---|---| +| `url::encode(s) -> String` | Percent-encode a component value. | +| `url::decode(s) -> String` | Percent-decode. Throws on invalid UTF-8 in the decoded output. | +| `url::encode_query(map) -> String` | Build `k1=v1&k2=v2` from a Map. Both keys and values are percent-encoded. Non-string values are coerced via `to_string()`. | + +`url::encode_query` emits keys in the Map's natural order, which is +alphabetical (Rhai's `Map` is a `BTreeMap`). RFC 3986 leaves query +parameter ordering unspecified, so this is fine for any conforming +consumer; if you need a specific ordering, build the string by hand. + +```rhai +let qs = url::encode_query(#{ q: "rust regex", page: 2 }); +// → "page=2&q=rust%20regex" +``` + +--- + +## What's not here + +- **Crypto** (sha256/hmac/argon2/encryption) — deferred to a focused + later PR. +- **Timezones** — UTC only in v1.1.0. Format with an offset upstream + if you need local time. +- **JWT, YAML, XML, CSV, Markdown** — not planned for v1.1.x. +- **Stateful services** (KV, docs, HTTP, cron, files, pubsub, secrets, + email, users, queue, invoke) — land per the v1.1.x roadmap in the + [blueprint §12](../serverless_cloud_blueprint.md).