From 50763addcf47ef33d83d18233a148c446ca44367 Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Fri, 29 May 2026 20:42:41 +0200 Subject: [PATCH] fix(crawler): quarantine recently-dead chapters from re-enqueue (0.35.4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The partial dedup index only blocks (pending|running) duplicates, so once a SyncChapterContent job transitions to 'dead' (max_attempts exhausted) the slot frees. Every subsequent cron tick re-enqueued the chapter — page_count = 0 and dropped_at IS NULL stay true — burned another max_attempts retries, and died again. Permanent-failure chapters spun forever. enqueue_bookmarked_pending and enqueue_pending_for_manga now skip chapters whose latest sync_chapter_content job is dead within CHAPTER_DEAD_QUARANTINE_DAYS (7). A failed chapter goes silent for a week, then gets one more shot — long enough for a transient site issue to resolve, short enough that permanent failures don't stay permanent if conditions change. Two integration tests pin both halves of the contract. Co-Authored-By: Claude Opus 4.7 (1M context) --- backend/Cargo.lock | 2 +- backend/Cargo.toml | 2 +- backend/src/crawler/pipeline.rs | 32 ++++++- backend/tests/crawler_daemon.rs | 147 ++++++++++++++++++++++++++++++++ frontend/package.json | 2 +- 5 files changed, 181 insertions(+), 4 deletions(-) diff --git a/backend/Cargo.lock b/backend/Cargo.lock index 37609ca..08bfdd7 100644 --- a/backend/Cargo.lock +++ b/backend/Cargo.lock @@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" [[package]] name = "mangalord" -version = "0.35.3" +version = "0.35.4" dependencies = [ "anyhow", "argon2", diff --git a/backend/Cargo.toml b/backend/Cargo.toml index c592d4b..d23fdfd 100644 --- a/backend/Cargo.toml +++ b/backend/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mangalord" -version = "0.35.3" +version = "0.35.4" edition = "2021" default-run = "mangalord" diff --git a/backend/src/crawler/pipeline.rs b/backend/src/crawler/pipeline.rs index f8afd4b..8367351 100644 --- a/backend/src/crawler/pipeline.rs +++ b/backend/src/crawler/pipeline.rs @@ -319,8 +319,20 @@ pub async fn run_metadata_pass( Ok(stats) } +/// Quarantine window for chapters whose latest `SyncChapterContent` job is +/// `dead`. The partial dedup index `crawler_jobs_chapter_content_dedup_idx` +/// only blocks `(pending|running)` duplicates, so without this gate a +/// permanently-failing chapter is re-enqueued every cron tick, burns +/// `max_attempts` retries, dies again, and spins forever. With the gate, +/// dead chapters get a week of silence before the next attempt — long +/// enough for a transient site issue to resolve, short enough that +/// permanent failures don't stay permanent if conditions change. +const CHAPTER_DEAD_QUARANTINE_DAYS: i64 = 7; + /// Enqueue a `SyncChapterContent` job for every chapter of *any* bookmarked /// manga that still has `page_count = 0` and a non-dropped source row. +/// Chapters whose latest job is `dead` within `CHAPTER_DEAD_QUARANTINE_DAYS` +/// are excluded to break the dead-letter spin. /// Returns `(inserted, skipped)` counts. Dedup index handles repeats. pub async fn enqueue_bookmarked_pending(pool: &PgPool) -> anyhow::Result { let rows: Vec<(String, Uuid, String)> = sqlx::query_as( @@ -331,10 +343,18 @@ pub async fn enqueue_bookmarked_pending(pool: &PgPool) -> anyhow::Result>'kind' = 'sync_chapter_content' + AND cj.payload->>'chapter_id' = c.id::text + AND cj.state = 'dead' + AND cj.updated_at > now() - ($1::bigint || ' days')::interval + ) GROUP BY cs.source_id, c.id, cs.source_chapter_key, c.manga_id, c.created_at ORDER BY c.manga_id, c.created_at ASC "#, ) + .bind(CHAPTER_DEAD_QUARANTINE_DAYS) .fetch_all(pool) .await .context("query bookmarked-pending chapters")?; @@ -363,7 +383,9 @@ pub async fn enqueue_bookmarked_pending(pool: &PgPool) -> anyhow::Result>'kind' = 'sync_chapter_content' + AND cj.payload->>'chapter_id' = c.id::text + AND cj.state = 'dead' + AND cj.updated_at > now() - ($2::bigint || ' days')::interval + ) ORDER BY cs.source_id, c.id "#, ) .bind(manga_id) + .bind(CHAPTER_DEAD_QUARANTINE_DAYS) .fetch_all(pool) .await .context("query pending chapters for manga")?; diff --git a/backend/tests/crawler_daemon.rs b/backend/tests/crawler_daemon.rs index 8d8f71a..e57ec59 100644 --- a/backend/tests/crawler_daemon.rs +++ b/backend/tests/crawler_daemon.rs @@ -370,3 +370,150 @@ async fn enqueue_bookmarked_pending_skips_dropped_sources(pool: PgPool) { ); } +#[sqlx::test(migrations = "./migrations")] +async fn enqueue_bookmarked_pending_skips_recently_dead_chapters(pool: PgPool) { + // Setup: a chapter whose last SyncChapterContent job died yesterday. + // The cron tick must not re-enqueue — without the quarantine, the + // chapter would spin: re-enqueue → max_attempts retries → dies again + // → re-enqueue next tick → forever. + let user_id: Uuid = sqlx::query_scalar( + "INSERT INTO users (username, password_hash) VALUES ($1, $2) RETURNING id", + ) + .bind("alice") + .bind("not-a-real-hash") + .fetch_one(&pool) + .await + .unwrap(); + let manga_id: Uuid = + sqlx::query_scalar("INSERT INTO mangas (title) VALUES ($1) RETURNING id") + .bind("Test") + .fetch_one(&pool) + .await + .unwrap(); + sqlx::query( + "INSERT INTO sources (id, name, base_url) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING", + ) + .bind("target") + .bind("Target") + .bind("https://example.com") + .execute(&pool) + .await + .unwrap(); + let chapter_id: Uuid = sqlx::query_scalar( + "INSERT INTO chapters (manga_id, number, page_count) VALUES ($1, 1, 0) RETURNING id", + ) + .bind(manga_id) + .fetch_one(&pool) + .await + .unwrap(); + sqlx::query( + "INSERT INTO chapter_sources (source_id, source_chapter_key, chapter_id, source_url) \ + VALUES ($1, $2, $3, $4)", + ) + .bind("target") + .bind("ch1") + .bind(chapter_id) + .bind("https://example.com/ch1") + .execute(&pool) + .await + .unwrap(); + sqlx::query("INSERT INTO bookmarks (user_id, manga_id) VALUES ($1, $2)") + .bind(user_id) + .bind(manga_id) + .execute(&pool) + .await + .unwrap(); + // The dead job from the prior tick, updated 1 day ago (well inside the + // 7-day quarantine window). + sqlx::query( + "INSERT INTO crawler_jobs (payload, state, updated_at) \ + VALUES ($1::jsonb, 'dead', now() - interval '1 day')", + ) + .bind(serde_json::json!({ + "kind": "sync_chapter_content", + "source_id": "target", + "chapter_id": chapter_id.to_string(), + "source_chapter_key": "ch1", + })) + .execute(&pool) + .await + .unwrap(); + + let summary = pipeline::enqueue_bookmarked_pending(&pool).await.unwrap(); + assert_eq!(summary.inserted, 0, "recently dead chapter is quarantined"); + assert_eq!(summary.skipped, 0); +} + +#[sqlx::test(migrations = "./migrations")] +async fn enqueue_bookmarked_pending_resumes_after_quarantine_expires(pool: PgPool) { + // Same setup as above but the dead job is 10 days old — past the + // 7-day quarantine. The chapter should be re-enqueued so a once-failed + // chapter eventually gets a second shot at success. + let user_id: Uuid = sqlx::query_scalar( + "INSERT INTO users (username, password_hash) VALUES ($1, $2) RETURNING id", + ) + .bind("alice") + .bind("not-a-real-hash") + .fetch_one(&pool) + .await + .unwrap(); + let manga_id: Uuid = + sqlx::query_scalar("INSERT INTO mangas (title) VALUES ($1) RETURNING id") + .bind("Test") + .fetch_one(&pool) + .await + .unwrap(); + sqlx::query( + "INSERT INTO sources (id, name, base_url) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING", + ) + .bind("target") + .bind("Target") + .bind("https://example.com") + .execute(&pool) + .await + .unwrap(); + let chapter_id: Uuid = sqlx::query_scalar( + "INSERT INTO chapters (manga_id, number, page_count) VALUES ($1, 1, 0) RETURNING id", + ) + .bind(manga_id) + .fetch_one(&pool) + .await + .unwrap(); + sqlx::query( + "INSERT INTO chapter_sources (source_id, source_chapter_key, chapter_id, source_url) \ + VALUES ($1, $2, $3, $4)", + ) + .bind("target") + .bind("ch1") + .bind(chapter_id) + .bind("https://example.com/ch1") + .execute(&pool) + .await + .unwrap(); + sqlx::query("INSERT INTO bookmarks (user_id, manga_id) VALUES ($1, $2)") + .bind(user_id) + .bind(manga_id) + .execute(&pool) + .await + .unwrap(); + sqlx::query( + "INSERT INTO crawler_jobs (payload, state, updated_at) \ + VALUES ($1::jsonb, 'dead', now() - interval '10 days')", + ) + .bind(serde_json::json!({ + "kind": "sync_chapter_content", + "source_id": "target", + "chapter_id": chapter_id.to_string(), + "source_chapter_key": "ch1", + })) + .execute(&pool) + .await + .unwrap(); + + let summary = pipeline::enqueue_bookmarked_pending(&pool).await.unwrap(); + assert_eq!( + summary.inserted, 1, + "dead chapter is re-enqueued after quarantine expires" + ); +} + diff --git a/frontend/package.json b/frontend/package.json index b081d53..1aa4475 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "mangalord-frontend", - "version": "0.35.3", + "version": "0.35.4", "private": true, "type": "module", "scripts": {