fix(crawler): quarantine recently-dead chapters from re-enqueue (0.35.4)

The partial dedup index only blocks (pending|running) duplicates, so
once a SyncChapterContent job transitions to 'dead' (max_attempts
exhausted) the slot frees. Every subsequent cron tick re-enqueued the
chapter — page_count = 0 and dropped_at IS NULL stay true — burned
another max_attempts retries, and died again. Permanent-failure
chapters spun forever.

enqueue_bookmarked_pending and enqueue_pending_for_manga now skip
chapters whose latest sync_chapter_content job is dead within
CHAPTER_DEAD_QUARANTINE_DAYS (7). A failed chapter goes silent for a
week, then gets one more shot — long enough for a transient site
issue to resolve, short enough that permanent failures don't stay
permanent if conditions change.

Two integration tests pin both halves of the contract.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-29 20:42:41 +02:00
parent 766c6eebac
commit 50763addcf
5 changed files with 181 additions and 4 deletions

View File

@@ -319,8 +319,20 @@ pub async fn run_metadata_pass(
Ok(stats)
}
/// Quarantine window for chapters whose latest `SyncChapterContent` job is
/// `dead`. The partial dedup index `crawler_jobs_chapter_content_dedup_idx`
/// only blocks `(pending|running)` duplicates, so without this gate a
/// permanently-failing chapter is re-enqueued every cron tick, burns
/// `max_attempts` retries, dies again, and spins forever. With the gate,
/// dead chapters get a week of silence before the next attempt — long
/// enough for a transient site issue to resolve, short enough that
/// permanent failures don't stay permanent if conditions change.
const CHAPTER_DEAD_QUARANTINE_DAYS: i64 = 7;
/// Enqueue a `SyncChapterContent` job for every chapter of *any* bookmarked
/// manga that still has `page_count = 0` and a non-dropped source row.
/// Chapters whose latest job is `dead` within `CHAPTER_DEAD_QUARANTINE_DAYS`
/// are excluded to break the dead-letter spin.
/// Returns `(inserted, skipped)` counts. Dedup index handles repeats.
pub async fn enqueue_bookmarked_pending(pool: &PgPool) -> anyhow::Result<EnqueueSummary> {
let rows: Vec<(String, Uuid, String)> = sqlx::query_as(
@@ -331,10 +343,18 @@ pub async fn enqueue_bookmarked_pending(pool: &PgPool) -> anyhow::Result<Enqueue
JOIN chapter_sources cs ON cs.chapter_id = c.id
WHERE c.page_count = 0
AND cs.dropped_at IS NULL
AND NOT EXISTS (
SELECT 1 FROM crawler_jobs cj
WHERE cj.payload->>'kind' = 'sync_chapter_content'
AND cj.payload->>'chapter_id' = c.id::text
AND cj.state = 'dead'
AND cj.updated_at > now() - ($1::bigint || ' days')::interval
)
GROUP BY cs.source_id, c.id, cs.source_chapter_key, c.manga_id, c.created_at
ORDER BY c.manga_id, c.created_at ASC
"#,
)
.bind(CHAPTER_DEAD_QUARANTINE_DAYS)
.fetch_all(pool)
.await
.context("query bookmarked-pending chapters")?;
@@ -363,7 +383,9 @@ pub async fn enqueue_bookmarked_pending(pool: &PgPool) -> anyhow::Result<Enqueue
}
/// Enqueue chapter-content jobs for a *single* manga (the bookmark-create
/// hook). Same dedup semantics as [`enqueue_bookmarked_pending`].
/// hook). Same dedup semantics as [`enqueue_bookmarked_pending`], including
/// the dead-letter quarantine — a freshly bookmarked manga should not
/// burn retries on chapters that just died on the cron tick.
pub async fn enqueue_pending_for_manga(
pool: &PgPool,
manga_id: Uuid,
@@ -376,10 +398,18 @@ pub async fn enqueue_pending_for_manga(
WHERE c.manga_id = $1
AND c.page_count = 0
AND cs.dropped_at IS NULL
AND NOT EXISTS (
SELECT 1 FROM crawler_jobs cj
WHERE cj.payload->>'kind' = 'sync_chapter_content'
AND cj.payload->>'chapter_id' = c.id::text
AND cj.state = 'dead'
AND cj.updated_at > now() - ($2::bigint || ' days')::interval
)
ORDER BY cs.source_id, c.id
"#,
)
.bind(manga_id)
.bind(CHAPTER_DEAD_QUARANTINE_DAYS)
.fetch_all(pool)
.await
.context("query pending chapters for manga")?;