feat(v1.1.3-modules): top-level script AST cache in LocalExecutorClient
- New `ScriptIdentity { script_id, updated_at }` DTO.
- `ExecutorClient` trait gains an `execute_with_identity` method;
default impl forwards to `execute` so `RemoteExecutorClient` (and
cluster-mode transports later) keep working without bespoke caching.
- `LocalExecutorClient` overrides `execute_with_identity` to consult
an `LruCache<ScriptId, CachedScript>`. Cache hit only when the
cached entry's `updated_at` matches the caller's identity; mismatch
triggers a fresh `Engine::compile`. `Engine::execute_ast(&Arc<AST>, req)`
is called inside `spawn_blocking` exactly as `execute` does today.
- Cache size from `PICLOUD_SCRIPT_CACHE_SIZE` (default 256).
- Orchestrator's HTTP data-plane path and the dispatcher both switch
to `execute_with_identity`. `ResolvedTrigger` carries
`script_updated_at` for the dispatcher's identity construction.
Workspace builds; full test suite (~440 tests) green.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -186,9 +186,13 @@ impl Dispatcher {
|
||||
// wait synchronously here — sync HTTP and dispatcher share the
|
||||
// semaphore so this is intentional.
|
||||
let source = resolved.script_source.clone();
|
||||
let identity = picloud_orchestrator_core::ScriptIdentity {
|
||||
script_id: resolved.script_id,
|
||||
updated_at: resolved.script_updated_at,
|
||||
};
|
||||
let outcome = self
|
||||
.executor
|
||||
.execute(&source, exec_req, ASYNC_EXEC_TIMEOUT)
|
||||
.execute_with_identity(identity, &source, exec_req, ASYNC_EXEC_TIMEOUT)
|
||||
.await;
|
||||
drop(permit);
|
||||
|
||||
@@ -230,6 +234,7 @@ impl Dispatcher {
|
||||
script_id: script.id,
|
||||
script_source: script.source,
|
||||
script_name: script.name,
|
||||
script_updated_at: script.updated_at,
|
||||
sandbox_overrides: script.sandbox,
|
||||
registered_by_principal: trigger.registered_by_principal,
|
||||
retry_max_attempts: trigger.retry_max_attempts,
|
||||
@@ -335,6 +340,7 @@ impl Dispatcher {
|
||||
script_id,
|
||||
script_source: script.source,
|
||||
script_name: payload.script_name,
|
||||
script_updated_at: script.updated_at,
|
||||
sandbox_overrides: script.sandbox,
|
||||
// HTTP outbox rows don't carry a registered_by_principal
|
||||
// — use a sentinel zero UUID since this field isn't used
|
||||
@@ -516,6 +522,11 @@ pub struct ResolvedTrigger {
|
||||
pub script_id: ScriptId,
|
||||
pub script_source: String,
|
||||
pub script_name: String,
|
||||
/// v1.1.3: freshness comparator for the orchestrator's top-level
|
||||
/// script cache. The dispatcher hands `(script_id, updated_at)`
|
||||
/// in alongside the source so cached ASTs can be reused across
|
||||
/// triggered invocations.
|
||||
pub script_updated_at: chrono::DateTime<chrono::Utc>,
|
||||
pub sandbox_overrides: ScriptSandbox,
|
||||
pub registered_by_principal: picloud_shared::AdminUserId,
|
||||
pub retry_max_attempts: u32,
|
||||
|
||||
@@ -21,5 +21,10 @@ tracing.workspace = true
|
||||
uuid.workspace = true
|
||||
chrono.workspace = true
|
||||
reqwest.workspace = true
|
||||
rhai.workspace = true
|
||||
tokio.workspace = true
|
||||
urlencoding.workspace = true
|
||||
|
||||
# v1.1.3 — top-level script AST cache lives in orchestrator-core's
|
||||
# LocalExecutorClient; key is ScriptId, value is `(updated_at, Arc<rhai::AST>)`.
|
||||
lru.workspace = true
|
||||
|
||||
@@ -129,7 +129,14 @@ where
|
||||
|
||||
let timeout = Duration::from_secs(u64::from(script.timeout_seconds));
|
||||
let started = Utc::now();
|
||||
let outcome = state.executor.execute(&script.source, req, timeout).await;
|
||||
let identity = crate::client::ScriptIdentity {
|
||||
script_id: script.id,
|
||||
updated_at: script.updated_at,
|
||||
};
|
||||
let outcome = state
|
||||
.executor
|
||||
.execute_with_identity(identity, &script.source, req, timeout)
|
||||
.await;
|
||||
let finished = Utc::now();
|
||||
|
||||
// Build and dispatch the audit log regardless of outcome. We await
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
use std::sync::Arc;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, Utc};
|
||||
use lru::LruCache;
|
||||
use picloud_executor_core::{Engine, ExecError, ExecRequest, ExecResponse};
|
||||
use picloud_shared::ScriptId;
|
||||
|
||||
use crate::gate::{AcquireError, ExecutionGate};
|
||||
|
||||
@@ -11,6 +15,21 @@ use crate::gate::{AcquireError, ExecutionGate};
|
||||
/// resource usage independent of misconfigured scripts.
|
||||
const HARD_TIMEOUT_CAP: Duration = Duration::from_secs(300);
|
||||
|
||||
/// Default capacity for the top-level script AST cache. Override via
|
||||
/// `PICLOUD_SCRIPT_CACHE_SIZE`. Sized assuming a few hundred distinct
|
||||
/// endpoint scripts per process.
|
||||
const DEFAULT_SCRIPT_CACHE_SIZE: usize = 256;
|
||||
|
||||
/// Identity used by [`ExecutorClient::execute_with_identity`] to key
|
||||
/// the AST cache. `updated_at` is the freshness comparator — an edit
|
||||
/// that bumps `scripts.updated_at` invalidates the cached AST on the
|
||||
/// next lookup, no explicit pub/sub.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct ScriptIdentity {
|
||||
pub script_id: ScriptId,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// The seam between the orchestrator and the executor.
|
||||
///
|
||||
/// Single-node mode plugs in `LocalExecutorClient`, which calls
|
||||
@@ -25,6 +44,21 @@ pub trait ExecutorClient: Send + Sync {
|
||||
req: ExecRequest,
|
||||
timeout: Duration,
|
||||
) -> Result<ExecResponse, ExecError>;
|
||||
|
||||
/// v1.1.3: identity-aware variant for caching. Callers that already
|
||||
/// know the script's `(id, updated_at)` should use this so the local
|
||||
/// executor can reuse a compiled `rhai::AST` across invocations.
|
||||
/// Default impl forwards to `execute` so `RemoteExecutorClient` (and
|
||||
/// any future transport) keeps working without bespoke caching.
|
||||
async fn execute_with_identity(
|
||||
&self,
|
||||
_identity: ScriptIdentity,
|
||||
source: &str,
|
||||
req: ExecRequest,
|
||||
timeout: Duration,
|
||||
) -> Result<ExecResponse, ExecError> {
|
||||
self.execute(source, req, timeout).await
|
||||
}
|
||||
}
|
||||
|
||||
/// In-process executor — wraps `executor-core::Engine` directly.
|
||||
@@ -36,15 +70,106 @@ pub trait ExecutorClient: Send + Sync {
|
||||
/// Holds an `ExecutionGate` and acquires a permit before `spawn_blocking`
|
||||
/// so a script storm can't drain the blocking-thread pool. The permit
|
||||
/// drops with the future, returning the slot.
|
||||
///
|
||||
/// v1.1.3 adds a top-level AST cache keyed by `ScriptId`. On
|
||||
/// `execute_with_identity`, the client compares the caller's
|
||||
/// `updated_at` against the cached entry's; a match reuses the
|
||||
/// `Arc<rhai::AST>` and skips Rhai's parser. A mismatch (or absence)
|
||||
/// triggers a fresh `Engine::compile` + replace.
|
||||
pub struct LocalExecutorClient {
|
||||
engine: Arc<Engine>,
|
||||
gate: Arc<ExecutionGate>,
|
||||
/// `(updated_at, Arc<rhai::AST>)` keyed by `ScriptId`. `Mutex`
|
||||
/// because the cache is shared across invocations of this client;
|
||||
/// LRU eviction caps memory growth.
|
||||
script_cache: Arc<Mutex<LruCache<ScriptId, CachedScript>>>,
|
||||
}
|
||||
|
||||
pub struct CachedScript {
|
||||
pub updated_at: DateTime<Utc>,
|
||||
pub ast: Arc<rhai::AST>,
|
||||
}
|
||||
|
||||
impl LocalExecutorClient {
|
||||
#[must_use]
|
||||
pub fn new(engine: Arc<Engine>, gate: Arc<ExecutionGate>) -> Self {
|
||||
Self { engine, gate }
|
||||
let cap = std::env::var("PICLOUD_SCRIPT_CACHE_SIZE")
|
||||
.ok()
|
||||
.and_then(|s| s.parse::<usize>().ok())
|
||||
.unwrap_or(DEFAULT_SCRIPT_CACHE_SIZE);
|
||||
Self::with_script_cache_capacity(engine, gate, cap)
|
||||
}
|
||||
|
||||
/// Explicit capacity for tests that exercise LRU eviction.
|
||||
#[must_use]
|
||||
pub fn with_script_cache_capacity(
|
||||
engine: Arc<Engine>,
|
||||
gate: Arc<ExecutionGate>,
|
||||
cap: usize,
|
||||
) -> Self {
|
||||
let cap = NonZeroUsize::new(cap.max(1)).expect("max(1) is non-zero");
|
||||
Self {
|
||||
engine,
|
||||
gate,
|
||||
script_cache: Arc::new(Mutex::new(LruCache::new(cap))),
|
||||
}
|
||||
}
|
||||
|
||||
/// Cache lookup with `updated_at` freshness check. Returns the
|
||||
/// cached AST on hit; compiles, inserts, returns the fresh AST on
|
||||
/// miss or stale. Public so tests can introspect the cache.
|
||||
pub fn get_or_compile(
|
||||
&self,
|
||||
identity: ScriptIdentity,
|
||||
source: &str,
|
||||
) -> Result<Arc<rhai::AST>, ExecError> {
|
||||
{
|
||||
let mut cache = self
|
||||
.script_cache
|
||||
.lock()
|
||||
.expect("script cache lock poisoned");
|
||||
if let Some(cached) = cache.get(&identity.script_id) {
|
||||
if cached.updated_at == identity.updated_at {
|
||||
tracing::debug!(
|
||||
target = "picloud::scripts::cache",
|
||||
script_id = %identity.script_id,
|
||||
"cache hit"
|
||||
);
|
||||
return Ok(cached.ast.clone());
|
||||
}
|
||||
tracing::debug!(
|
||||
target = "picloud::scripts::cache",
|
||||
script_id = %identity.script_id,
|
||||
"cache stale; recompiling"
|
||||
);
|
||||
} else {
|
||||
tracing::debug!(
|
||||
target = "picloud::scripts::cache",
|
||||
script_id = %identity.script_id,
|
||||
"cache miss"
|
||||
);
|
||||
}
|
||||
}
|
||||
let ast = self.engine.compile(source)?;
|
||||
let mut cache = self
|
||||
.script_cache
|
||||
.lock()
|
||||
.expect("script cache lock poisoned");
|
||||
cache.put(
|
||||
identity.script_id,
|
||||
CachedScript {
|
||||
updated_at: identity.updated_at,
|
||||
ast: ast.clone(),
|
||||
},
|
||||
);
|
||||
Ok(ast)
|
||||
}
|
||||
|
||||
/// Shared script-AST cache. Exposed so tests can introspect cache
|
||||
/// state (length / contents) under a Mutex lock.
|
||||
#[must_use]
|
||||
pub fn script_cache(&self) -> &Arc<Mutex<LruCache<ScriptId, CachedScript>>> {
|
||||
&self.script_cache
|
||||
}
|
||||
}
|
||||
|
||||
@@ -89,6 +214,39 @@ impl ExecutorClient for LocalExecutorClient {
|
||||
Ok(Ok(res)) => res,
|
||||
}
|
||||
}
|
||||
|
||||
async fn execute_with_identity(
|
||||
&self,
|
||||
identity: ScriptIdentity,
|
||||
source: &str,
|
||||
req: ExecRequest,
|
||||
timeout: Duration,
|
||||
) -> Result<ExecResponse, ExecError> {
|
||||
let _permit =
|
||||
self.gate
|
||||
.try_acquire()
|
||||
.map_err(
|
||||
|AcquireError::Overloaded { retry_after_secs }| ExecError::Overloaded {
|
||||
retry_after_secs,
|
||||
},
|
||||
)?;
|
||||
|
||||
let ast = self.get_or_compile(identity, source)?;
|
||||
|
||||
let timeout = timeout.min(HARD_TIMEOUT_CAP);
|
||||
let timeout_secs = u32::try_from(timeout.as_secs()).unwrap_or(u32::MAX);
|
||||
|
||||
let engine = self.engine.clone();
|
||||
let join = tokio::task::spawn_blocking(move || engine.execute_ast(&ast, req));
|
||||
|
||||
match tokio::time::timeout(timeout, join).await {
|
||||
Err(_) => Err(ExecError::Timeout(timeout_secs)),
|
||||
Ok(Err(join_err)) => Err(ExecError::Runtime(format!(
|
||||
"execution task panicked: {join_err}"
|
||||
))),
|
||||
Ok(Ok(res)) => res,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Remote executor — forwards to a peer executor node over HTTP.
|
||||
|
||||
@@ -16,7 +16,7 @@ pub mod resolver;
|
||||
pub mod routing;
|
||||
|
||||
pub use api::{data_plane_router, user_routes_router, DataPlaneState};
|
||||
pub use client::{ExecutorClient, LocalExecutorClient, RemoteExecutorClient};
|
||||
pub use client::{ExecutorClient, LocalExecutorClient, RemoteExecutorClient, ScriptIdentity};
|
||||
pub use gate::{AcquireError, ExecutionGate};
|
||||
pub use inbox::InboxRegistry;
|
||||
pub use resolver::{ResolverError, ScriptResolver};
|
||||
|
||||
Reference in New Issue
Block a user