use std::sync::Arc; use std::time::Duration; use async_trait::async_trait; use picloud_executor_core::{Engine, ExecError, ExecRequest, ExecResponse}; use crate::gate::{AcquireError, ExecutionGate}; /// Maximum wall-clock time we'll wait for a single invocation, regardless /// of the per-script `timeout_seconds`. Provides a hard ceiling on /// resource usage independent of misconfigured scripts. const HARD_TIMEOUT_CAP: Duration = Duration::from_secs(300); /// The seam between the orchestrator and the executor. /// /// Single-node mode plugs in `LocalExecutorClient`, which calls /// `executor-core` in-process via `spawn_blocking`. Cluster mode plugs /// in `RemoteExecutorClient`, which forwards over HTTP to an executor /// node. Everything else in orchestrator-core depends only on this trait. #[async_trait] pub trait ExecutorClient: Send + Sync { async fn execute( &self, source: &str, req: ExecRequest, timeout: Duration, ) -> Result; } /// In-process executor — wraps `executor-core::Engine` directly. /// /// `executor-core::Engine::execute` is synchronous; we offload it to a /// blocking thread so it doesn't park a Tokio worker, and apply the /// wall-clock timeout here. /// /// Holds an `ExecutionGate` and acquires a permit before `spawn_blocking` /// so a script storm can't drain the blocking-thread pool. The permit /// drops with the future, returning the slot. pub struct LocalExecutorClient { engine: Arc, gate: Arc, } impl LocalExecutorClient { #[must_use] pub fn new(engine: Arc, gate: Arc) -> Self { Self { engine, gate } } } #[async_trait] impl ExecutorClient for LocalExecutorClient { async fn execute( &self, source: &str, req: ExecRequest, timeout: Duration, ) -> Result { // Acquire before spending any wall-clock budget. The permit is // held by this future; on `tokio::time::timeout` firing, the // future drops and the permit returns to the pool — but the // detached `spawn_blocking` thread keeps running until the // Rhai script finishes (or panics). So in-use blocking threads // can briefly exceed the gate's permit count after a timeout. // That is intentional: a new admission can be served while the // already-doomed script winds down, which is preferable to // wedging the slot for the worst-case timeout duration. let _permit = self.gate .try_acquire() .map_err( |AcquireError::Overloaded { retry_after_secs }| ExecError::Overloaded { retry_after_secs, }, )?; let timeout = timeout.min(HARD_TIMEOUT_CAP); let timeout_secs = u32::try_from(timeout.as_secs()).unwrap_or(u32::MAX); let engine = self.engine.clone(); let source = source.to_string(); let join = tokio::task::spawn_blocking(move || engine.execute(&source, req)); match tokio::time::timeout(timeout, join).await { Err(_) => Err(ExecError::Timeout(timeout_secs)), Ok(Err(join_err)) => Err(ExecError::Runtime(format!( "execution task panicked: {join_err}" ))), Ok(Ok(res)) => res, } } } /// Remote executor — forwards to a peer executor node over HTTP. /// /// Skeleton only; fleshed out when cluster mode lands. pub struct RemoteExecutorClient { _client: reqwest::Client, _base_url: String, } impl RemoteExecutorClient { #[must_use] pub fn new(base_url: impl Into) -> Self { Self { _client: reqwest::Client::new(), _base_url: base_url.into(), } } } #[async_trait] impl ExecutorClient for RemoteExecutorClient { async fn execute( &self, _source: &str, _req: ExecRequest, _timeout: Duration, ) -> Result { Err(ExecError::Runtime( "RemoteExecutorClient not implemented (cluster mode is v1.3+)".into(), )) } }