fix(crawler): skip sync when empty chapters + prior > 0 (0.36.6)

The wait_for_selector wait in 0.36.2 narrows the partial-render race
window but doesn't close it: a render that takes longer than
SELECTOR_TIMEOUT (10s) still hands an empty Vec to sync_manga_chapters,
and the soft-drop branch flips every existing chapter to dropped_at.
The next tick recovers but a manga's reader briefly stops working in
between.

Close it at the pipeline level. Between fetch_manga and the upsert/
sync, if the parsed chapter list is empty and the prior live count
for (source_id, source_manga_key) is > 0, treat the fetch as a
transient failure: log, bump mangas_failed, skip upsert + sync + the
seen.insert so a later batch / tick retries. Brand-new mangas with
genuinely zero chapters (prior == 0) pass through unchanged.

New repo helper repo::crawler::live_chapter_count_for_source_manga
joins chapters → chapter_sources → manga_sources with dropped_at IS
NULL — same lockstep as dispatch_target and the enqueue queries.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-30 20:17:42 +02:00
parent 5c04b0532b
commit eaa5afda50
6 changed files with 144 additions and 3 deletions

View File

@@ -232,6 +232,67 @@ async fn sync_chapters_adds_new_refreshes_existing_and_drops_vanished(pool: PgPo
assert!(dropped.0.is_some(), "ch2 should be soft-dropped");
}
#[sqlx::test(migrations = "./migrations")]
async fn live_chapter_count_returns_zero_for_unknown_source_key(pool: PgPool) {
crawler::ensure_source(&pool, "target", "T", "https://x.example")
.await
.unwrap();
// No manga_sources row yet → unknown key path. Must not error and
// must report zero so the partial-render guard accepts the
// "brand-new manga with no chapters" case as legitimate.
let n = crawler::live_chapter_count_for_source_manga(&pool, "target", "nobody")
.await
.unwrap();
assert_eq!(n, 0);
}
#[sqlx::test(migrations = "./migrations")]
async fn live_chapter_count_only_counts_live_sources(pool: PgPool) {
crawler::ensure_source(&pool, "target", "T", "https://x.example")
.await
.unwrap();
let m = sample_manga("foo", "Foo Manga", "hash-1");
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
.await
.unwrap();
let chapters = vec![
SourceChapterRef {
source_chapter_key: "1".into(),
number: 1,
title: Some("Ch.1".into()),
url: "https://x.example/foo/1".into(),
},
SourceChapterRef {
source_chapter_key: "2".into(),
number: 2,
title: Some("Ch.2".into()),
url: "https://x.example/foo/2".into(),
},
];
crawler::sync_manga_chapters(&pool, "target", up.manga_id, &chapters)
.await
.unwrap();
assert_eq!(
crawler::live_chapter_count_for_source_manga(&pool, "target", "foo")
.await
.unwrap(),
2
);
// Soft-drop one source row — count drops by one, the row stays.
sqlx::query(
"UPDATE chapter_sources SET dropped_at = NOW() WHERE source_chapter_key = '2'",
)
.execute(&pool)
.await
.unwrap();
assert_eq!(
crawler::live_chapter_count_for_source_manga(&pool, "target", "foo")
.await
.unwrap(),
1
);
}
/// Real-world sources publish multiple chapters at the same number
/// (different uploaders, translator notes, re-releases). After the
/// (manga_id, number) UNIQUE drop in 0013, each `SourceChapterRef`