fix(crawler): skip sync when empty chapters + prior > 0 (0.36.6)

The wait_for_selector wait in 0.36.2 narrows the partial-render race window but doesn't close it: a render that takes longer than SELECTOR_TIMEOUT (10s) still hands an empty Vec to sync_manga_chapters, and the soft-drop branch flips every existing chapter to dropped_at. The next tick recovers but a manga's reader briefly stops working in between. Close it at the pipeline level. Between fetch_manga and the upsert/ sync, if the parsed chapter list is empty and the prior live count for (source_id, source_manga_key) is > 0, treat the fetch as a transient failure: log, bump mangas_failed, skip upsert + sync + the seen.insert so a later batch / tick retries. Brand-new mangas with genuinely zero chapters (prior == 0) pass through unchanged. New repo helper repo::crawler::live_chapter_count_for_source_manga joins chapters → chapter_sources → manga_sources with dropped_at IS NULL — same lockstep as dispatch_target and the enqueue queries. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-30 20:17:42 +02:00
parent 5c04b0532b
commit eaa5afda50
6 changed files with 144 additions and 3 deletions
--- a/backend/tests/crawler_sync.rs
+++ b/backend/tests/crawler_sync.rs
@@ -232,6 +232,67 @@ async fn sync_chapters_adds_new_refreshes_existing_and_drops_vanished(pool: PgPo
    assert!(dropped.0.is_some(), "ch2 should be soft-dropped");
 }

+#[sqlx::test(migrations = "./migrations")]
+async fn live_chapter_count_returns_zero_for_unknown_source_key(pool: PgPool) {
+    crawler::ensure_source(&pool, "target", "T", "https://x.example")
+        .await
+        .unwrap();
+    // No manga_sources row yet → unknown key path. Must not error and
+    // must report zero so the partial-render guard accepts the
+    // "brand-new manga with no chapters" case as legitimate.
+    let n = crawler::live_chapter_count_for_source_manga(&pool, "target", "nobody")
+        .await
+        .unwrap();
+    assert_eq!(n, 0);
+}
+
+#[sqlx::test(migrations = "./migrations")]
+async fn live_chapter_count_only_counts_live_sources(pool: PgPool) {
+    crawler::ensure_source(&pool, "target", "T", "https://x.example")
+        .await
+        .unwrap();
+    let m = sample_manga("foo", "Foo Manga", "hash-1");
+    let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
+        .await
+        .unwrap();
+    let chapters = vec![
+        SourceChapterRef {
+            source_chapter_key: "1".into(),
+            number: 1,
+            title: Some("Ch.1".into()),
+            url: "https://x.example/foo/1".into(),
+        },
+        SourceChapterRef {
+            source_chapter_key: "2".into(),
+            number: 2,
+            title: Some("Ch.2".into()),
+            url: "https://x.example/foo/2".into(),
+        },
+    ];
+    crawler::sync_manga_chapters(&pool, "target", up.manga_id, &chapters)
+        .await
+        .unwrap();
+    assert_eq!(
+        crawler::live_chapter_count_for_source_manga(&pool, "target", "foo")
+            .await
+            .unwrap(),
+        2
+    );
+    // Soft-drop one source row — count drops by one, the row stays.
+    sqlx::query(
+        "UPDATE chapter_sources SET dropped_at = NOW() WHERE source_chapter_key = '2'",
+    )
+    .execute(&pool)
+    .await
+    .unwrap();
+    assert_eq!(
+        crawler::live_chapter_count_for_source_manga(&pool, "target", "foo")
+            .await
+            .unwrap(),
+        1
+    );
+}
+
 /// Real-world sources publish multiple chapters at the same number
 /// (different uploaders, translator notes, re-releases). After the
 /// (manga_id, number) UNIQUE drop in 0013, each `SourceChapterRef`