fix(crawler): skip sync when empty chapters + prior > 0 (0.36.6)
The wait_for_selector wait in 0.36.2 narrows the partial-render race window but doesn't close it: a render that takes longer than SELECTOR_TIMEOUT (10s) still hands an empty Vec to sync_manga_chapters, and the soft-drop branch flips every existing chapter to dropped_at. The next tick recovers but a manga's reader briefly stops working in between. Close it at the pipeline level. Between fetch_manga and the upsert/ sync, if the parsed chapter list is empty and the prior live count for (source_id, source_manga_key) is > 0, treat the fetch as a transient failure: log, bump mangas_failed, skip upsert + sync + the seen.insert so a later batch / tick retries. Brand-new mangas with genuinely zero chapters (prior == 0) pass through unchanged. New repo helper repo::crawler::live_chapter_count_for_source_manga joins chapters → chapter_sources → manga_sources with dropped_at IS NULL — same lockstep as dispatch_target and the enqueue queries. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -232,6 +232,67 @@ async fn sync_chapters_adds_new_refreshes_existing_and_drops_vanished(pool: PgPo
|
||||
assert!(dropped.0.is_some(), "ch2 should be soft-dropped");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn live_chapter_count_returns_zero_for_unknown_source_key(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
// No manga_sources row yet → unknown key path. Must not error and
|
||||
// must report zero so the partial-render guard accepts the
|
||||
// "brand-new manga with no chapters" case as legitimate.
|
||||
let n = crawler::live_chapter_count_for_source_manga(&pool, "target", "nobody")
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(n, 0);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn live_chapter_count_only_counts_live_sources(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
||||
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
let chapters = vec![
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "1".into(),
|
||||
number: 1,
|
||||
title: Some("Ch.1".into()),
|
||||
url: "https://x.example/foo/1".into(),
|
||||
},
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "2".into(),
|
||||
number: 2,
|
||||
title: Some("Ch.2".into()),
|
||||
url: "https://x.example/foo/2".into(),
|
||||
},
|
||||
];
|
||||
crawler::sync_manga_chapters(&pool, "target", up.manga_id, &chapters)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
crawler::live_chapter_count_for_source_manga(&pool, "target", "foo")
|
||||
.await
|
||||
.unwrap(),
|
||||
2
|
||||
);
|
||||
// Soft-drop one source row — count drops by one, the row stays.
|
||||
sqlx::query(
|
||||
"UPDATE chapter_sources SET dropped_at = NOW() WHERE source_chapter_key = '2'",
|
||||
)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
crawler::live_chapter_count_for_source_manga(&pool, "target", "foo")
|
||||
.await
|
||||
.unwrap(),
|
||||
1
|
||||
);
|
||||
}
|
||||
|
||||
/// Real-world sources publish multiple chapters at the same number
|
||||
/// (different uploaders, translator notes, re-releases). After the
|
||||
/// (manga_id, number) UNIQUE drop in 0013, each `SourceChapterRef`
|
||||
|
||||
Reference in New Issue
Block a user