feat(crawler): reliability fixes — heartbeat, streaming, jitter, timeout, breaker
A1 Lease heartbeat: jobs::renew keeps a long-but-healthy job's lease fresh so it is never stolen mid-flight nor inflated toward max_attempts. A2 Stream chapter pages straight to storage (peak memory = one image) and persist rows + page_count in one short transaction off the network path (S3-ready); roll back stored blobs on failure via Storage::delete. A3 ±20% jitter on exponential backoff to avoid a retry thundering herd. A4 Outer per-dispatch timeout (CRAWLER_JOB_TIMEOUT_SECS, default 600) so a hung job is acked-failed instead of wedging a worker. A5 Metadata circuit-breaker (CRAWLER_METADATA_MAX_CONSECUTIVE_FAILURES, default 10): abort a pass on a source outage without marking a clean exit, so the next tick recovery-sweeps. Adds CRAWLER_BROWSER_RESTART_THRESHOLD config (used by the upcoming coordinated browser restart). Bumps version 0.52.0 -> 0.53.0. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -185,6 +185,68 @@ async fn lease_marks_running_and_bumps_attempts_and_sets_leased_until(pool: PgPo
|
||||
assert!(leased_until > chrono::Utc::now());
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn renew_extends_leased_until_while_running(pool: PgPool) {
|
||||
let id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
EnqueueResult::Skipped => unreachable!(),
|
||||
};
|
||||
|
||||
// Lease with a short window, then collapse leased_until to the recent
|
||||
// past so the renew is unambiguously an extension.
|
||||
let leases = jobs::lease(&pool, None, 1, Duration::from_secs(5))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(leases.len(), 1);
|
||||
sqlx::query("UPDATE crawler_jobs SET leased_until = now() - interval '1 second' WHERE id = $1")
|
||||
.bind(id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let still_owned = jobs::renew(&pool, id, Duration::from_secs(120))
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(still_owned, "renew on a running job returns true");
|
||||
|
||||
let leased_until: chrono::DateTime<chrono::Utc> =
|
||||
sqlx::query_scalar("SELECT leased_until FROM crawler_jobs WHERE id = $1")
|
||||
.bind(id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(
|
||||
leased_until > chrono::Utc::now() + chrono::Duration::seconds(60),
|
||||
"leased_until pushed ~120s into the future"
|
||||
);
|
||||
assert_eq!(job_state(&pool, id).await, "running");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn renew_is_noop_once_job_no_longer_running(pool: PgPool) {
|
||||
let id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
EnqueueResult::Skipped => unreachable!(),
|
||||
};
|
||||
let leases = jobs::lease(&pool, None, 1, Duration::from_secs(60))
|
||||
.await
|
||||
.unwrap();
|
||||
// Job completes — heartbeat should now see it's no longer ours.
|
||||
jobs::ack_done(&pool, leases[0].id).await.unwrap();
|
||||
|
||||
let still_owned = jobs::renew(&pool, id, Duration::from_secs(120))
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(!still_owned, "renew on a non-running job returns false");
|
||||
assert_eq!(job_state(&pool, id).await, "done");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn lease_with_kind_filter_only_matches_that_kind(pool: PgPool) {
|
||||
let manga_id = match jobs::enqueue(&pool, &sync_manga_payload("foo"))
|
||||
|
||||
Reference in New Issue
Block a user