Files
PiCloud/crates/orchestrator-core/src/client.rs
MechaCat02 e375735796 docs(blueprint+gate): drop hstore from Tech Stack; note gate-vs-timeout interaction
Two review-pass nits from the v1.1.0-foundation review:

  - Blueprint §6 Tech Stack table still listed the database as
    "PostgreSQL + hstore" with an hstore-for-KV rationale — directly
    contradicting the §8.1 KV rewrite that explicitly rejected hstore
    in favour of JSONB. Updates the row so the high-level summary
    matches the §8.1 reasoning.
  - LocalExecutorClient::execute now documents the permit-vs-timeout
    interaction: when tokio::time::timeout fires the future drops and
    the permit returns, but the detached spawn_blocking thread keeps
    running until the Rhai script winds down. In-use blocking threads
    can briefly exceed the gate's permit count after a timeout. Calling
    it out so future readers don't read the implementation as buggy.

No behaviour change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-30 20:10:05 +02:00

125 lines
4.1 KiB
Rust

use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use picloud_executor_core::{Engine, ExecError, ExecRequest, ExecResponse};
use crate::gate::{AcquireError, ExecutionGate};
/// Maximum wall-clock time we'll wait for a single invocation, regardless
/// of the per-script `timeout_seconds`. Provides a hard ceiling on
/// resource usage independent of misconfigured scripts.
const HARD_TIMEOUT_CAP: Duration = Duration::from_secs(300);
/// The seam between the orchestrator and the executor.
///
/// Single-node mode plugs in `LocalExecutorClient`, which calls
/// `executor-core` in-process via `spawn_blocking`. Cluster mode plugs
/// in `RemoteExecutorClient`, which forwards over HTTP to an executor
/// node. Everything else in orchestrator-core depends only on this trait.
#[async_trait]
pub trait ExecutorClient: Send + Sync {
async fn execute(
&self,
source: &str,
req: ExecRequest,
timeout: Duration,
) -> Result<ExecResponse, ExecError>;
}
/// In-process executor — wraps `executor-core::Engine` directly.
///
/// `executor-core::Engine::execute` is synchronous; we offload it to a
/// blocking thread so it doesn't park a Tokio worker, and apply the
/// wall-clock timeout here.
///
/// Holds an `ExecutionGate` and acquires a permit before `spawn_blocking`
/// so a script storm can't drain the blocking-thread pool. The permit
/// drops with the future, returning the slot.
pub struct LocalExecutorClient {
engine: Arc<Engine>,
gate: Arc<ExecutionGate>,
}
impl LocalExecutorClient {
#[must_use]
pub fn new(engine: Arc<Engine>, gate: Arc<ExecutionGate>) -> Self {
Self { engine, gate }
}
}
#[async_trait]
impl ExecutorClient for LocalExecutorClient {
async fn execute(
&self,
source: &str,
req: ExecRequest,
timeout: Duration,
) -> Result<ExecResponse, ExecError> {
// Acquire before spending any wall-clock budget. The permit is
// held by this future; on `tokio::time::timeout` firing, the
// future drops and the permit returns to the pool — but the
// detached `spawn_blocking` thread keeps running until the
// Rhai script finishes (or panics). So in-use blocking threads
// can briefly exceed the gate's permit count after a timeout.
// That is intentional: a new admission can be served while the
// already-doomed script winds down, which is preferable to
// wedging the slot for the worst-case timeout duration.
let _permit =
self.gate
.try_acquire()
.map_err(
|AcquireError::Overloaded { retry_after_secs }| ExecError::Overloaded {
retry_after_secs,
},
)?;
let timeout = timeout.min(HARD_TIMEOUT_CAP);
let timeout_secs = u32::try_from(timeout.as_secs()).unwrap_or(u32::MAX);
let engine = self.engine.clone();
let source = source.to_string();
let join = tokio::task::spawn_blocking(move || engine.execute(&source, req));
match tokio::time::timeout(timeout, join).await {
Err(_) => Err(ExecError::Timeout(timeout_secs)),
Ok(Err(join_err)) => Err(ExecError::Runtime(format!(
"execution task panicked: {join_err}"
))),
Ok(Ok(res)) => res,
}
}
}
/// Remote executor — forwards to a peer executor node over HTTP.
///
/// Skeleton only; fleshed out when cluster mode lands.
pub struct RemoteExecutorClient {
_client: reqwest::Client,
_base_url: String,
}
impl RemoteExecutorClient {
#[must_use]
pub fn new(base_url: impl Into<String>) -> Self {
Self {
_client: reqwest::Client::new(),
_base_url: base_url.into(),
}
}
}
#[async_trait]
impl ExecutorClient for RemoteExecutorClient {
async fn execute(
&self,
_source: &str,
_req: ExecRequest,
_timeout: Duration,
) -> Result<ExecResponse, ExecError> {
Err(ExecError::Runtime(
"RemoteExecutorClient not implemented (cluster mode is v1.3+)".into(),
))
}
}