feat(v1.1.1-dispatcher): dispatcher loop + retry + depth limit + outbox emitter

`OutboxEventEmitter` replaces `NoopEventEmitter` in the picloud
binary's `Services` bundle. KV mutations now fan out to the outbox
via `TriggerRepo::list_matching_kv` — one row per matching trigger,
carrying the serialized `TriggerEvent` payload + the matching
trigger's retry policy.

`Dispatcher` is the single tokio task that polls the outbox every
100ms, claims due rows via FOR UPDATE SKIP LOCKED (with a batch cap),
and routes each to the executor. Shares the `ExecutionGate` with
sync HTTP per design notes §2 — gate saturation reschedules the
row instead of dropping it.

Outcome handling matches design notes §3 and §4:
- reply_to.is_some() (sync HTTP): never retry. Deliver via
  `InboxResolver`; if the receiver was dropped, write an
  `abandoned_executions` row.
- is_dead_letter_handler == true: never retry, never DL. On
  failure, annotate the original DL row with
  `resolution = 'handler_failed'`. Stops the recursion that would
  otherwise re-fire a broken handler script.
- Otherwise async: bump attempt_count, reschedule with exponential
  backoff + ±jitter; once max_attempts is reached, write a
  `dead_letters` row and drop from outbox.
- Trigger-depth limit: `cx.trigger_depth > max_trigger_depth` skips
  execution entirely (log + future metric), NEVER dead-letters.
  Loops are not retried via the DL chain — they're terminated.

`InboxResolver` trait lands in `picloud-shared` with a
`NoopInboxResolver` bootstrap that flags every delivery as
`Abandoned`. Commit 6 replaces the noop with the real
in-process registry in `orchestrator-core`.

`AdminPrincipalResolver` builds a `Principal` from a trigger's
`registered_by_principal` user id so the dispatched script executes
as the trigger registrant (design notes §4).

Unit tests cover backoff math (exponential/linear/constant) +
jitter range + ExecError → InboxFailureKind classification + the
status-code table mapping. Integration tests for the full
dispatcher loop need a real Postgres + executor; reviewer runs them
via the manual smoke flow in the plan / HANDBACK.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-06-01 22:01:42 +02:00
parent 2e92691ee1
commit 6a2971ac70
10 changed files with 953 additions and 24 deletions

View File

@@ -12,22 +12,26 @@ use picloud_executor_core::{Engine, Limits};
use picloud_manager_core::{
admin_router, admins_router, api_keys_router, app_members_router, apps_api, apps_router,
attach_principal_if_present, auth_router, compile_routes, migrations, require_authenticated,
route_admin_router, triggers_router, AdminSessionRepository, AdminState, AdminUserRepository,
AdminsState, ApiKeyRepository, ApiKeysState, AppDomainRepository, AppMembersRepository,
AppMembersState, AppRepository, AppsState, AuthState, AuthzRepo, KvServiceImpl,
PostgresAdminSessionRepository, PostgresAdminUserRepository, PostgresApiKeyRepository,
PostgresAppDomainRepository, PostgresAppMembersRepository, PostgresAppRepository,
PostgresExecutionLogRepository, PostgresExecutionLogSink, PostgresKvRepo,
PostgresRouteRepository, PostgresScriptRepository, PostgresTriggerRepo, RepoResolver,
RouteAdminState, RouteRepository, SandboxCeiling, TriggerConfig, TriggerRepo, TriggersState,
route_admin_router, triggers_router, AbandonedRepo, AdminPrincipalResolver,
AdminSessionRepository, AdminState, AdminUserRepository, AdminsState, ApiKeyRepository,
ApiKeysState, AppDomainRepository, AppMembersRepository, AppMembersState, AppRepository,
AppsState, AuthState, AuthzRepo, DeadLetterRepo, Dispatcher, KvServiceImpl, OutboxEventEmitter,
OutboxRepo, PostgresAbandonedRepo, PostgresAdminSessionRepository, PostgresAdminUserRepository,
PostgresApiKeyRepository, PostgresAppDomainRepository, PostgresAppMembersRepository,
PostgresAppRepository, PostgresDeadLetterRepo, PostgresExecutionLogRepository,
PostgresExecutionLogSink, PostgresKvRepo, PostgresOutboxRepo, PostgresRouteRepository,
PostgresScriptRepository, PostgresTriggerRepo, PrincipalResolver, RepoResolver,
RouteAdminState, RouteRepository, SandboxCeiling, ScriptRepository, TriggerConfig, TriggerRepo,
TriggersState,
};
use picloud_orchestrator_core::routing::{AppDomainTable, RouteTable};
use picloud_orchestrator_core::{
data_plane_router, user_routes_router, DataPlaneState, ExecutionGate, LocalExecutorClient,
};
use picloud_shared::{
ExecutionLogSink, KvService, NoopDeadLetterService, NoopEventEmitter, ScriptValidator,
ServiceEventEmitter, Services, API_VERSION, PRODUCT_VERSION, SDK_VERSION, WIRE_VERSION,
ExecutionLogSink, InboxResolver, KvService, NoopDeadLetterService, NoopInboxResolver,
ScriptValidator, ServiceEventEmitter, Services, API_VERSION, PRODUCT_VERSION, SDK_VERSION,
WIRE_VERSION,
};
use sqlx::postgres::PgPoolOptions;
use sqlx::PgPool;
@@ -99,23 +103,28 @@ pub async fn build_app(pool: PgPool, auth: AuthDeps) -> anyhow::Result<Router> {
let members: Arc<dyn AppMembersRepository> = members_concrete.clone();
let authz: Arc<dyn AuthzRepo> = members_concrete;
// SDK services bundle. v1.1.1 ships the KV store; the outbox-backed
// event emitter replaces `NoopEventEmitter` once the triggers
// dispatcher lands. `NoopDeadLetterService` is a v1.1.1 stub that
// errors loudly until the real `PostgresDeadLetterService` ships.
let kv_repo = Arc::new(PostgresKvRepo::new(pool.clone()));
let events: Arc<dyn ServiceEventEmitter> = Arc::new(NoopEventEmitter);
// Triggers framework storage. The outbox event emitter routes
// KV mutations into the outbox; the dispatcher fans them out.
let trigger_repo: Arc<dyn TriggerRepo> = Arc::new(PostgresTriggerRepo::new(pool.clone()));
let outbox_repo: Arc<dyn OutboxRepo> = Arc::new(PostgresOutboxRepo::new(pool.clone()));
let dl_repo: Arc<dyn DeadLetterRepo> = Arc::new(PostgresDeadLetterRepo::new(pool.clone()));
let abandoned_repo: Arc<dyn AbandonedRepo> = Arc::new(PostgresAbandonedRepo::new(pool.clone()));
let trigger_config = TriggerConfig::from_env();
// SDK services bundle. v1.1.1 ships the KV store and the
// outbox-backed event emitter; `NoopDeadLetterService` is a v1.1.1
// stub that errors loudly until the real `PostgresDeadLetterService`
// ships (commit 8).
let kv_repo = Arc::new(PostgresKvRepo::new(pool));
let events: Arc<dyn ServiceEventEmitter> = Arc::new(OutboxEventEmitter::new(
trigger_repo.clone(),
outbox_repo.clone(),
));
let kv: Arc<dyn KvService> =
Arc::new(KvServiceImpl::new(kv_repo, authz.clone(), events.clone()));
let services = Services::new(kv, Arc::new(NoopDeadLetterService), events);
let engine = Arc::new(Engine::new(Limits::default(), services));
// Trigger repo + config, shared between the admin endpoint and the
// dispatcher (commit 5). Read defaults from env so operators can
// tune retry / depth without rebuilding the binary.
let trigger_repo: Arc<dyn TriggerRepo> = Arc::new(PostgresTriggerRepo::new(pool));
let trigger_config = TriggerConfig::from_env();
// Compile the routes table once at startup; admin writes refresh it.
let route_table = Arc::new(RouteTable::new());
let initial = route_repo.list_all().await?;
@@ -146,7 +155,31 @@ pub async fn build_app(pool: PgPool, auth: AuthDeps) -> anyhow::Result<Router> {
// Single global gate — overflow is rejected with 503 + Retry-After.
// See `ExecutionGate` docs and `PICLOUD_MAX_CONCURRENT_EXECUTIONS`.
let gate = Arc::new(ExecutionGate::from_env());
let executor = Arc::new(LocalExecutorClient::new(engine.clone(), gate));
let executor = Arc::new(LocalExecutorClient::new(engine.clone(), gate.clone()));
// Dispatcher — single tokio task that polls the outbox and routes
// due rows to the executor. Shares the `ExecutionGate` with sync
// HTTP per design notes §2 (one cap for everything). NoopInboxResolver
// until commit 6 wires the real in-process inbox registry.
let dispatcher_script_repo: Arc<dyn ScriptRepository> =
Arc::new(PostgresScriptRepoHandle(script_repo.clone()));
let principals: Arc<dyn PrincipalResolver> =
Arc::new(AdminPrincipalResolver::new(auth.users.clone()));
let inbox: Arc<dyn InboxResolver> = Arc::new(NoopInboxResolver);
Dispatcher {
outbox: outbox_repo.clone(),
triggers: trigger_repo.clone(),
scripts: dispatcher_script_repo,
dead_letters: dl_repo.clone(),
abandoned: abandoned_repo.clone(),
principals,
executor: executor.clone(),
gate,
inbox,
config: trigger_config,
instance_id: format!("picloud-{}", std::process::id()),
}
.spawn();
let admin = AdminState {
repo: Arc::new(PostgresScriptRepoHandle(script_repo.clone())),
@@ -170,6 +203,10 @@ pub async fn build_app(pool: PgPool, auth: AuthDeps) -> anyhow::Result<Router> {
app_domains: app_domain_table.clone(),
routes: route_table,
};
// Silence unused-import warnings for repos handed to the
// dispatcher in this commit; commit 8 wires them into the
// dead-letters admin endpoints and commit 10 into the GC sweeper.
let _ = (&dl_repo, &abandoned_repo);
let triggers_state = TriggersState {
triggers: trigger_repo,
apps: apps_repo.clone(),