fix(crawler): quarantine recently-dead chapters from re-enqueue (0.35.4)
The partial dedup index only blocks (pending|running) duplicates, so once a SyncChapterContent job transitions to 'dead' (max_attempts exhausted) the slot frees. Every subsequent cron tick re-enqueued the chapter — page_count = 0 and dropped_at IS NULL stay true — burned another max_attempts retries, and died again. Permanent-failure chapters spun forever. enqueue_bookmarked_pending and enqueue_pending_for_manga now skip chapters whose latest sync_chapter_content job is dead within CHAPTER_DEAD_QUARANTINE_DAYS (7). A failed chapter goes silent for a week, then gets one more shot — long enough for a transient site issue to resolve, short enough that permanent failures don't stay permanent if conditions change. Two integration tests pin both halves of the contract. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -319,8 +319,20 @@ pub async fn run_metadata_pass(
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
/// Quarantine window for chapters whose latest `SyncChapterContent` job is
|
||||
/// `dead`. The partial dedup index `crawler_jobs_chapter_content_dedup_idx`
|
||||
/// only blocks `(pending|running)` duplicates, so without this gate a
|
||||
/// permanently-failing chapter is re-enqueued every cron tick, burns
|
||||
/// `max_attempts` retries, dies again, and spins forever. With the gate,
|
||||
/// dead chapters get a week of silence before the next attempt — long
|
||||
/// enough for a transient site issue to resolve, short enough that
|
||||
/// permanent failures don't stay permanent if conditions change.
|
||||
const CHAPTER_DEAD_QUARANTINE_DAYS: i64 = 7;
|
||||
|
||||
/// Enqueue a `SyncChapterContent` job for every chapter of *any* bookmarked
|
||||
/// manga that still has `page_count = 0` and a non-dropped source row.
|
||||
/// Chapters whose latest job is `dead` within `CHAPTER_DEAD_QUARANTINE_DAYS`
|
||||
/// are excluded to break the dead-letter spin.
|
||||
/// Returns `(inserted, skipped)` counts. Dedup index handles repeats.
|
||||
pub async fn enqueue_bookmarked_pending(pool: &PgPool) -> anyhow::Result<EnqueueSummary> {
|
||||
let rows: Vec<(String, Uuid, String)> = sqlx::query_as(
|
||||
@@ -331,10 +343,18 @@ pub async fn enqueue_bookmarked_pending(pool: &PgPool) -> anyhow::Result<Enqueue
|
||||
JOIN chapter_sources cs ON cs.chapter_id = c.id
|
||||
WHERE c.page_count = 0
|
||||
AND cs.dropped_at IS NULL
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM crawler_jobs cj
|
||||
WHERE cj.payload->>'kind' = 'sync_chapter_content'
|
||||
AND cj.payload->>'chapter_id' = c.id::text
|
||||
AND cj.state = 'dead'
|
||||
AND cj.updated_at > now() - ($1::bigint || ' days')::interval
|
||||
)
|
||||
GROUP BY cs.source_id, c.id, cs.source_chapter_key, c.manga_id, c.created_at
|
||||
ORDER BY c.manga_id, c.created_at ASC
|
||||
"#,
|
||||
)
|
||||
.bind(CHAPTER_DEAD_QUARANTINE_DAYS)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.context("query bookmarked-pending chapters")?;
|
||||
@@ -363,7 +383,9 @@ pub async fn enqueue_bookmarked_pending(pool: &PgPool) -> anyhow::Result<Enqueue
|
||||
}
|
||||
|
||||
/// Enqueue chapter-content jobs for a *single* manga (the bookmark-create
|
||||
/// hook). Same dedup semantics as [`enqueue_bookmarked_pending`].
|
||||
/// hook). Same dedup semantics as [`enqueue_bookmarked_pending`], including
|
||||
/// the dead-letter quarantine — a freshly bookmarked manga should not
|
||||
/// burn retries on chapters that just died on the cron tick.
|
||||
pub async fn enqueue_pending_for_manga(
|
||||
pool: &PgPool,
|
||||
manga_id: Uuid,
|
||||
@@ -376,10 +398,18 @@ pub async fn enqueue_pending_for_manga(
|
||||
WHERE c.manga_id = $1
|
||||
AND c.page_count = 0
|
||||
AND cs.dropped_at IS NULL
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM crawler_jobs cj
|
||||
WHERE cj.payload->>'kind' = 'sync_chapter_content'
|
||||
AND cj.payload->>'chapter_id' = c.id::text
|
||||
AND cj.state = 'dead'
|
||||
AND cj.updated_at > now() - ($2::bigint || ' days')::interval
|
||||
)
|
||||
ORDER BY cs.source_id, c.id
|
||||
"#,
|
||||
)
|
||||
.bind(manga_id)
|
||||
.bind(CHAPTER_DEAD_QUARANTINE_DAYS)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.context("query pending chapters for manga")?;
|
||||
|
||||
Reference in New Issue
Block a user