fix(crawler): skip sync when empty chapters + prior > 0 (0.36.6)
The wait_for_selector wait in 0.36.2 narrows the partial-render race window but doesn't close it: a render that takes longer than SELECTOR_TIMEOUT (10s) still hands an empty Vec to sync_manga_chapters, and the soft-drop branch flips every existing chapter to dropped_at. The next tick recovers but a manga's reader briefly stops working in between. Close it at the pipeline level. Between fetch_manga and the upsert/ sync, if the parsed chapter list is empty and the prior live count for (source_id, source_manga_key) is > 0, treat the fetch as a transient failure: log, bump mangas_failed, skip upsert + sync + the seen.insert so a later batch / tick retries. Brand-new mangas with genuinely zero chapters (prior == 0) pass through unchanged. New repo helper repo::crawler::live_chapter_count_for_source_manga joins chapters → chapter_sources → manga_sources with dropped_at IS NULL — same lockstep as dispatch_target and the enqueue queries. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2
backend/Cargo.lock
generated
2
backend/Cargo.lock
generated
@@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mangalord"
|
name = "mangalord"
|
||||||
version = "0.36.5"
|
version = "0.36.6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"argon2",
|
"argon2",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "mangalord"
|
name = "mangalord"
|
||||||
version = "0.36.5"
|
version = "0.36.6"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
default-run = "mangalord"
|
default-run = "mangalord"
|
||||||
|
|
||||||
|
|||||||
@@ -215,6 +215,48 @@ pub async fn run_metadata_pass(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Partial-render guard: an empty chapter list paired with a
|
||||||
|
// prior count > 0 is overwhelmingly a chromium snapshot
|
||||||
|
// taken between the #chapter_table wrapper render and its
|
||||||
|
// rows render. The wait_for_selector wait in `navigate`
|
||||||
|
// narrows this window but cannot close it for slow renders
|
||||||
|
// beyond the selector budget. Treat as a transient failure
|
||||||
|
// here — skip upsert, skip seen.insert — so the next batch
|
||||||
|
// (or the next tick) retries. Skipped in `skip_chapters`
|
||||||
|
// mode because the parser is configured to return an empty
|
||||||
|
// Vec by design there.
|
||||||
|
if !skip_chapters && manga.chapters.is_empty() {
|
||||||
|
match repo::crawler::live_chapter_count_for_source_manga(
|
||||||
|
db, source_id, &r.source_manga_key,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(prior) if prior > 0 => {
|
||||||
|
tracing::warn!(
|
||||||
|
key = %r.source_manga_key,
|
||||||
|
url = %r.url,
|
||||||
|
prior_chapter_count = prior,
|
||||||
|
"fetch_manga returned empty chapters but prior count > 0; treating as partial-render transient and skipping"
|
||||||
|
);
|
||||||
|
stats.mangas_failed += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Ok(_) => {}
|
||||||
|
Err(e) => {
|
||||||
|
// DB lookup failed — fail safe: skip rather
|
||||||
|
// than risk a soft-drop on a manga whose prior
|
||||||
|
// count we couldn't confirm.
|
||||||
|
tracing::warn!(
|
||||||
|
key = %r.source_manga_key,
|
||||||
|
error = ?e,
|
||||||
|
"live_chapter_count_for_source_manga failed; skipping cautiously"
|
||||||
|
);
|
||||||
|
stats.mangas_failed += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let upsert = match repo::crawler::upsert_manga_from_source(
|
let upsert = match repo::crawler::upsert_manga_from_source(
|
||||||
db, source_id, &r.url, &manga,
|
db, source_id, &r.url, &manga,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -458,6 +458,44 @@ pub async fn sync_manga_chapters(
|
|||||||
Ok(diff)
|
Ok(diff)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Count the chapters that the source `(source_id, source_manga_key)`
|
||||||
|
/// is currently known to attach to — i.e. the number of `chapter_sources`
|
||||||
|
/// rows for the manga identified by the (source_id, source_manga_key)
|
||||||
|
/// pair, restricted to live (`dropped_at IS NULL`) rows.
|
||||||
|
///
|
||||||
|
/// Used by the metadata pass's partial-render guard: if `fetch_manga`
|
||||||
|
/// returns an empty `chapters` Vec but the source previously surfaced
|
||||||
|
/// chapters here, that's most likely a chromium snapshot taken between
|
||||||
|
/// the `#chapter_table` wrapper render and its rows render — the
|
||||||
|
/// safest move is to skip `sync_manga_chapters` so the soft-drop
|
||||||
|
/// branch doesn't flip every existing chapter to `dropped_at`.
|
||||||
|
///
|
||||||
|
/// Returns `Ok(0)` when the manga is brand-new (no `manga_sources`
|
||||||
|
/// row yet), which is the legitimate "this manga has no chapters yet"
|
||||||
|
/// case and must NOT be flagged.
|
||||||
|
pub async fn live_chapter_count_for_source_manga(
|
||||||
|
pool: &PgPool,
|
||||||
|
source_id: &str,
|
||||||
|
source_manga_key: &str,
|
||||||
|
) -> sqlx::Result<i64> {
|
||||||
|
let row: Option<(i64,)> = sqlx::query_as(
|
||||||
|
"SELECT COUNT(*) \
|
||||||
|
FROM chapter_sources cs \
|
||||||
|
JOIN chapters c ON c.id = cs.chapter_id \
|
||||||
|
JOIN manga_sources ms \
|
||||||
|
ON ms.manga_id = c.manga_id \
|
||||||
|
AND ms.source_id = cs.source_id \
|
||||||
|
WHERE ms.source_id = $1 \
|
||||||
|
AND ms.source_manga_key = $2 \
|
||||||
|
AND cs.dropped_at IS NULL",
|
||||||
|
)
|
||||||
|
.bind(source_id)
|
||||||
|
.bind(source_manga_key)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await?;
|
||||||
|
Ok(row.map(|(n,)| n).unwrap_or(0))
|
||||||
|
}
|
||||||
|
|
||||||
/// Mark a metadata pass as in-flight for `source_id`. Stamps
|
/// Mark a metadata pass as in-flight for `source_id`. Stamps
|
||||||
/// `last_run_completed:<source_id>` in `crawler_state` with
|
/// `last_run_completed:<source_id>` in `crawler_state` with
|
||||||
/// `{"completed": false, "at": now}`. A crash, panic, or SIGKILL after
|
/// `{"completed": false, "at": now}`. A crash, panic, or SIGKILL after
|
||||||
|
|||||||
@@ -232,6 +232,67 @@ async fn sync_chapters_adds_new_refreshes_existing_and_drops_vanished(pool: PgPo
|
|||||||
assert!(dropped.0.is_some(), "ch2 should be soft-dropped");
|
assert!(dropped.0.is_some(), "ch2 should be soft-dropped");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn live_chapter_count_returns_zero_for_unknown_source_key(pool: PgPool) {
|
||||||
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
// No manga_sources row yet → unknown key path. Must not error and
|
||||||
|
// must report zero so the partial-render guard accepts the
|
||||||
|
// "brand-new manga with no chapters" case as legitimate.
|
||||||
|
let n = crawler::live_chapter_count_for_source_manga(&pool, "target", "nobody")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(n, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn live_chapter_count_only_counts_live_sources(pool: PgPool) {
|
||||||
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
||||||
|
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let chapters = vec![
|
||||||
|
SourceChapterRef {
|
||||||
|
source_chapter_key: "1".into(),
|
||||||
|
number: 1,
|
||||||
|
title: Some("Ch.1".into()),
|
||||||
|
url: "https://x.example/foo/1".into(),
|
||||||
|
},
|
||||||
|
SourceChapterRef {
|
||||||
|
source_chapter_key: "2".into(),
|
||||||
|
number: 2,
|
||||||
|
title: Some("Ch.2".into()),
|
||||||
|
url: "https://x.example/foo/2".into(),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
crawler::sync_manga_chapters(&pool, "target", up.manga_id, &chapters)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
crawler::live_chapter_count_for_source_manga(&pool, "target", "foo")
|
||||||
|
.await
|
||||||
|
.unwrap(),
|
||||||
|
2
|
||||||
|
);
|
||||||
|
// Soft-drop one source row — count drops by one, the row stays.
|
||||||
|
sqlx::query(
|
||||||
|
"UPDATE chapter_sources SET dropped_at = NOW() WHERE source_chapter_key = '2'",
|
||||||
|
)
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
crawler::live_chapter_count_for_source_manga(&pool, "target", "foo")
|
||||||
|
.await
|
||||||
|
.unwrap(),
|
||||||
|
1
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/// Real-world sources publish multiple chapters at the same number
|
/// Real-world sources publish multiple chapters at the same number
|
||||||
/// (different uploaders, translator notes, re-releases). After the
|
/// (different uploaders, translator notes, re-releases). After the
|
||||||
/// (manga_id, number) UNIQUE drop in 0013, each `SourceChapterRef`
|
/// (manga_id, number) UNIQUE drop in 0013, each `SourceChapterRef`
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "mangalord-frontend",
|
"name": "mangalord-frontend",
|
||||||
"version": "0.36.5",
|
"version": "0.36.6",
|
||||||
"private": true,
|
"private": true,
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
|||||||
Reference in New Issue
Block a user