fix(v1.1.7-dead-letter): wire dispatcher → list_matching_dead_letter

dead_letter triggers have been registerable since v1.1.1 but their
handlers never fired: dispatcher::handle_failure wrote the dead_letters
row and stopped — list_matching_dead_letter had no production caller.
Any deploy v1.1.1–v1.1.6 with dead_letter triggers had silently
non-functional handlers.

The fix: after the dead-letter row is inserted on retry exhaustion, fan
out to matching dead_letter triggers (filtered by source / originating
trigger_id / script_id) and enqueue one outbox row per match carrying a
real-shape TriggerEvent::DeadLetter (the §6 brief field names were stale
— used the actual variant: dead_letter_id, original: Box<TriggerEvent>,
attempts, last_error, trigger_id, script_id, first/last_attempt_at).
The recursion-stop (a handler's own failure isn't re-dead-lettered)
is upheld by the existing is_dead_letter_handler short-circuit.

Tests (DB-gated): handler actually fires with the nested original event;
existing row-create test now also asserts handler-fire; source_filter
excludes non-matching; failing handler does not recurse.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-06-04 22:30:25 +02:00
parent 1f78937dd2
commit 02335a8132
2 changed files with 256 additions and 32 deletions

View File

@@ -303,23 +303,128 @@ async fn dispatcher_delivers_pubsub_to_handler() {
assert_eq!(event["pubsub"]["message"]["hello"], 1);
}
/// Count dead_letters rows for an app.
async fn dead_letter_count(pool: &PgPool, app_id: &str) -> i64 {
let app_uuid = Uuid::parse_str(app_id).unwrap();
sqlx::query_scalar("SELECT COUNT(*) FROM dead_letters WHERE app_id = $1")
.bind(app_uuid)
.fetch_one(pool)
.await
.expect("count dead_letters")
}
async fn poll_dead_letter_count(pool: &PgPool, app_id: &str, want: i64) -> i64 {
let mut count = 0;
for _ in 0..100 {
count = dead_letter_count(pool, app_id).await;
if count >= want {
break;
}
tokio::time::sleep(Duration::from_millis(100)).await;
}
count
}
/// Register a failing KV trigger on `dlsrc` (single attempt → immediate
/// dead-letter) and a `dead_letter` trigger pointing at the marker
/// handler, then cause the originating KV event. Returns when set up.
async fn setup_dead_letter(server: &TestServer, app_id: &str, dl_handler: &str) {
let failing = create_script(server, app_id, "dl-failing", r#"throw "boom";"#).await;
server
.post(&format!("/api/v1/admin/apps/{app_id}/triggers/kv"))
.json(&json!({
"script_id": failing,
"collection_glob": "dlsrc",
"retry_max_attempts": 1,
"retry_base_ms": 0
}))
.await
.assert_status(axum::http::StatusCode::CREATED);
// The dead_letter trigger (no filters → matches any dead-letter).
server
.post(&format!("/api/v1/admin/apps/{app_id}/triggers/dead_letter"))
.json(&json!({ "script_id": dl_handler }))
.await
.assert_status(axum::http::StatusCode::CREATED);
let source = create_script(
server,
app_id,
"dl-source",
r#"kv::collection("dlsrc").set("k", 1); #{ ok: true }"#,
)
.await;
execute(server, source.as_str()).await;
}
#[tokio::test]
async fn dispatcher_delivers_dead_letter_to_handler() {
// NOTE: the dead-letter creation path (`dispatcher::handle_failure` →
// `DeadLetterRepo::insert`) writes the `dead_letters` row but does not
// appear to enqueue deliveries for `dead_letter`-kind triggers
// (`TriggerRepo::list_matching_dead_letter` has no production caller —
// see HANDBACK latent-findings). So this test asserts the wired
// behavior: a failing handler that exhausts its (single) attempt
// produces a dead-letter row. If/when DL→handler fan-out lands, this
// can be upgraded to assert the handler marker like the others.
// v1.1.7: the dead-letter fan-out is now wired
// (`dispatcher::handle_failure` → `list_matching_dead_letter`
// outbox). This asserts BOTH that the `dead_letters` row is written
// AND that the registered `dead_letter`-kind handler actually fires
// (it was silently non-functional v1.1.1v1.1.6).
let Some(pool) = pool_or_skip().await else {
return;
};
let (server, app_id) = server_for(pool.clone(), "dl").await;
let handler = create_script(&server, &app_id, "dl-handler", MARKER_HANDLER).await;
setup_dead_letter(&server, &app_id, &handler).await;
// Row written.
assert!(
poll_dead_letter_count(&pool, &app_id, 1).await > 0,
"a dead-letter row should have been produced"
);
// Handler fired.
let event = poll_marker(&pool, &app_id)
.await
.expect("dead-letter handler fired");
assert_eq!(event["source"], "dead_letter");
}
#[tokio::test]
async fn dispatcher_delivers_dead_letter_to_handler_actually_fires() {
// Focused on the handler-fire side: the marker handler receives a
// fully-shaped dead-letter event (the original KV event nested under
// `ctx.event.dead_letter.original`, plus the failure metadata).
let Some(pool) = pool_or_skip().await else {
return;
};
let (server, app_id) = server_for(pool.clone(), "dlfire").await;
let handler = create_script(&server, &app_id, "dl-handler", MARKER_HANDLER).await;
setup_dead_letter(&server, &app_id, &handler).await;
let event = poll_marker(&pool, &app_id)
.await
.expect("dead-letter handler fired");
assert_eq!(event["source"], "dead_letter");
// The original KV event is nested verbatim.
assert_eq!(event["dead_letter"]["original"]["source"], "kv");
assert_eq!(
event["dead_letter"]["original"]["kv"]["collection"],
"dlsrc"
);
// Failure metadata is present.
assert!(event["dead_letter"]["last_error"]
.as_str()
.unwrap()
.contains("boom"));
assert!(event["dead_letter"]["attempts"].as_i64().unwrap() >= 1);
}
#[tokio::test]
async fn dead_letter_source_filter_excludes_nonmatching() {
// `list_matching_dead_letter` filters by source (among trigger_id /
// script_id). A dead_letter trigger whose `source_filter` is "docs"
// must NOT fire for a "kv"-sourced dead-letter — the row is still
// written, but no handler delivery is enqueued.
let Some(pool) = pool_or_skip().await else {
return;
};
let (server, app_id) = server_for(pool.clone(), "dlfilter").await;
let handler = create_script(&server, &app_id, "dl-handler", MARKER_HANDLER).await;
// A handler that always throws, with a single attempt so it
// dead-letters immediately (no retry backoff).
let failing = create_script(&server, &app_id, "dl-failing", r#"throw "boom";"#).await;
server
.post(&format!("/api/v1/admin/apps/{app_id}/triggers/kv"))
@@ -331,6 +436,12 @@ async fn dispatcher_delivers_dead_letter_to_handler() {
}))
.await
.assert_status(axum::http::StatusCode::CREATED);
// Filter to a different source so this handler must NOT match.
server
.post(&format!("/api/v1/admin/apps/{app_id}/triggers/dead_letter"))
.json(&json!({ "script_id": handler, "source_filter": "docs" }))
.await
.assert_status(axum::http::StatusCode::CREATED);
let source = create_script(
&server,
@@ -341,19 +452,38 @@ async fn dispatcher_delivers_dead_letter_to_handler() {
.await;
execute(&server, &source).await;
// Poll the dead_letters table for this app.
let app_uuid = Uuid::parse_str(&app_id).unwrap();
let mut count: i64 = 0;
for _ in 0..100 {
count = sqlx::query_scalar("SELECT COUNT(*) FROM dead_letters WHERE app_id = $1")
.bind(app_uuid)
.fetch_one(&pool)
.await
.expect("count dead_letters");
if count > 0 {
break;
}
tokio::time::sleep(Duration::from_millis(100)).await;
}
assert!(count > 0, "a dead-letter row should have been produced");
// The dead-letter row is written…
assert!(poll_dead_letter_count(&pool, &app_id, 1).await >= 1);
// …but the source-filtered handler never fires.
let marker = poll_marker_n(&pool, &app_id, 8).await;
assert!(
marker.is_none(),
"source_filter='docs' must not fire for a kv dead-letter"
);
}
#[tokio::test]
async fn dead_letter_handler_failure_does_not_recurse() {
// Recursion-stop (design notes §4): a dead_letter handler that itself
// throws must NOT produce a second dead-letter row. The
// `is_dead_letter_handler` short-circuit annotates the original row
// and drops the outbox row without re-dead-lettering.
let Some(pool) = pool_or_skip().await else {
return;
};
let (server, app_id) = server_for(pool.clone(), "dlrec").await;
// The DL handler itself throws.
let throwing = create_script(&server, &app_id, "dl-throws", r#"throw "handler boom";"#).await;
setup_dead_letter(&server, &app_id, &throwing).await;
// One dead-letter row appears (the original). Give the throwing
// handler time to run + (not) recurse, then confirm the count stayed
// at exactly 1.
assert!(poll_dead_letter_count(&pool, &app_id, 1).await >= 1);
tokio::time::sleep(Duration::from_millis(800)).await;
assert_eq!(
dead_letter_count(&pool, &app_id).await,
1,
"a failing dead-letter handler must not create a new dead-letter row"
);
}