feat(orchestrator-core): ExecutionGate + 503/Retry-After on overflow
Adds a single global concurrency cap on the data-plane dispatch path:
- orchestrator-core::gate::ExecutionGate wraps tokio::Semaphore.
Non-blocking try_acquire — no queue. PICLOUD_MAX_CONCURRENT_EXECUTIONS
env var (default 32) sets the cap.
- LocalExecutorClient acquires a permit before spawn_blocking; the
permit drops with the future so the slot returns automatically.
- On refusal, ExecError::Overloaded { retry_after_secs: 1 } surfaces
upward. ApiError::IntoResponse already maps that to 503 with a
Retry-After header (landed in the previous commit alongside the
variant itself).
- picloud binary constructs the gate once at build_app and shares it
with LocalExecutorClient.
The cap exists so a Rhai script storm can't drain the blocking-thread
pool — pushing back hard beats letting requests pile up against a
finite worker count. Per-app / per-script caps stay deferred until a
real workload demands them.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -4,6 +4,8 @@ use std::time::Duration;
|
||||
use async_trait::async_trait;
|
||||
use picloud_executor_core::{Engine, ExecError, ExecRequest, ExecResponse};
|
||||
|
||||
use crate::gate::{AcquireError, ExecutionGate};
|
||||
|
||||
/// Maximum wall-clock time we'll wait for a single invocation, regardless
|
||||
/// of the per-script `timeout_seconds`. Provides a hard ceiling on
|
||||
/// resource usage independent of misconfigured scripts.
|
||||
@@ -30,14 +32,19 @@ pub trait ExecutorClient: Send + Sync {
|
||||
/// `executor-core::Engine::execute` is synchronous; we offload it to a
|
||||
/// blocking thread so it doesn't park a Tokio worker, and apply the
|
||||
/// wall-clock timeout here.
|
||||
///
|
||||
/// Holds an `ExecutionGate` and acquires a permit before `spawn_blocking`
|
||||
/// so a script storm can't drain the blocking-thread pool. The permit
|
||||
/// drops with the future, returning the slot.
|
||||
pub struct LocalExecutorClient {
|
||||
engine: Arc<Engine>,
|
||||
gate: Arc<ExecutionGate>,
|
||||
}
|
||||
|
||||
impl LocalExecutorClient {
|
||||
#[must_use]
|
||||
pub fn new(engine: Arc<Engine>) -> Self {
|
||||
Self { engine }
|
||||
pub fn new(engine: Arc<Engine>, gate: Arc<ExecutionGate>) -> Self {
|
||||
Self { engine, gate }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,6 +56,18 @@ impl ExecutorClient for LocalExecutorClient {
|
||||
req: ExecRequest,
|
||||
timeout: Duration,
|
||||
) -> Result<ExecResponse, ExecError> {
|
||||
// Acquire before spending any wall-clock budget. The permit is
|
||||
// held until this future returns; spawn_blocking inherits the
|
||||
// gating via the captured `_permit`.
|
||||
let _permit =
|
||||
self.gate
|
||||
.try_acquire()
|
||||
.map_err(
|
||||
|AcquireError::Overloaded { retry_after_secs }| ExecError::Overloaded {
|
||||
retry_after_secs,
|
||||
},
|
||||
)?;
|
||||
|
||||
let timeout = timeout.min(HARD_TIMEOUT_CAP);
|
||||
let timeout_secs = u32::try_from(timeout.as_secs()).unwrap_or(u32::MAX);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user