diff --git a/backend/Cargo.lock b/backend/Cargo.lock index 6250d29..c6ffb28 100644 --- a/backend/Cargo.lock +++ b/backend/Cargo.lock @@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" [[package]] name = "mangalord" -version = "0.36.5" +version = "0.36.6" dependencies = [ "anyhow", "argon2", diff --git a/backend/Cargo.toml b/backend/Cargo.toml index 9bb5859..6ddd1e5 100644 --- a/backend/Cargo.toml +++ b/backend/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mangalord" -version = "0.36.5" +version = "0.36.6" edition = "2021" default-run = "mangalord" diff --git a/backend/src/crawler/pipeline.rs b/backend/src/crawler/pipeline.rs index 3ed2d0c..320c49e 100644 --- a/backend/src/crawler/pipeline.rs +++ b/backend/src/crawler/pipeline.rs @@ -215,6 +215,48 @@ pub async fn run_metadata_pass( } }; + // Partial-render guard: an empty chapter list paired with a + // prior count > 0 is overwhelmingly a chromium snapshot + // taken between the #chapter_table wrapper render and its + // rows render. The wait_for_selector wait in `navigate` + // narrows this window but cannot close it for slow renders + // beyond the selector budget. Treat as a transient failure + // here — skip upsert, skip seen.insert — so the next batch + // (or the next tick) retries. Skipped in `skip_chapters` + // mode because the parser is configured to return an empty + // Vec by design there. + if !skip_chapters && manga.chapters.is_empty() { + match repo::crawler::live_chapter_count_for_source_manga( + db, source_id, &r.source_manga_key, + ) + .await + { + Ok(prior) if prior > 0 => { + tracing::warn!( + key = %r.source_manga_key, + url = %r.url, + prior_chapter_count = prior, + "fetch_manga returned empty chapters but prior count > 0; treating as partial-render transient and skipping" + ); + stats.mangas_failed += 1; + continue; + } + Ok(_) => {} + Err(e) => { + // DB lookup failed — fail safe: skip rather + // than risk a soft-drop on a manga whose prior + // count we couldn't confirm. + tracing::warn!( + key = %r.source_manga_key, + error = ?e, + "live_chapter_count_for_source_manga failed; skipping cautiously" + ); + stats.mangas_failed += 1; + continue; + } + } + } + let upsert = match repo::crawler::upsert_manga_from_source( db, source_id, &r.url, &manga, ) diff --git a/backend/src/repo/crawler.rs b/backend/src/repo/crawler.rs index 0cb7d2d..d0b056c 100644 --- a/backend/src/repo/crawler.rs +++ b/backend/src/repo/crawler.rs @@ -458,6 +458,44 @@ pub async fn sync_manga_chapters( Ok(diff) } +/// Count the chapters that the source `(source_id, source_manga_key)` +/// is currently known to attach to — i.e. the number of `chapter_sources` +/// rows for the manga identified by the (source_id, source_manga_key) +/// pair, restricted to live (`dropped_at IS NULL`) rows. +/// +/// Used by the metadata pass's partial-render guard: if `fetch_manga` +/// returns an empty `chapters` Vec but the source previously surfaced +/// chapters here, that's most likely a chromium snapshot taken between +/// the `#chapter_table` wrapper render and its rows render — the +/// safest move is to skip `sync_manga_chapters` so the soft-drop +/// branch doesn't flip every existing chapter to `dropped_at`. +/// +/// Returns `Ok(0)` when the manga is brand-new (no `manga_sources` +/// row yet), which is the legitimate "this manga has no chapters yet" +/// case and must NOT be flagged. +pub async fn live_chapter_count_for_source_manga( + pool: &PgPool, + source_id: &str, + source_manga_key: &str, +) -> sqlx::Result { + let row: Option<(i64,)> = sqlx::query_as( + "SELECT COUNT(*) \ + FROM chapter_sources cs \ + JOIN chapters c ON c.id = cs.chapter_id \ + JOIN manga_sources ms \ + ON ms.manga_id = c.manga_id \ + AND ms.source_id = cs.source_id \ + WHERE ms.source_id = $1 \ + AND ms.source_manga_key = $2 \ + AND cs.dropped_at IS NULL", + ) + .bind(source_id) + .bind(source_manga_key) + .fetch_optional(pool) + .await?; + Ok(row.map(|(n,)| n).unwrap_or(0)) +} + /// Mark a metadata pass as in-flight for `source_id`. Stamps /// `last_run_completed:` in `crawler_state` with /// `{"completed": false, "at": now}`. A crash, panic, or SIGKILL after diff --git a/backend/tests/crawler_sync.rs b/backend/tests/crawler_sync.rs index 2c2be30..b4c7faf 100644 --- a/backend/tests/crawler_sync.rs +++ b/backend/tests/crawler_sync.rs @@ -232,6 +232,67 @@ async fn sync_chapters_adds_new_refreshes_existing_and_drops_vanished(pool: PgPo assert!(dropped.0.is_some(), "ch2 should be soft-dropped"); } +#[sqlx::test(migrations = "./migrations")] +async fn live_chapter_count_returns_zero_for_unknown_source_key(pool: PgPool) { + crawler::ensure_source(&pool, "target", "T", "https://x.example") + .await + .unwrap(); + // No manga_sources row yet → unknown key path. Must not error and + // must report zero so the partial-render guard accepts the + // "brand-new manga with no chapters" case as legitimate. + let n = crawler::live_chapter_count_for_source_manga(&pool, "target", "nobody") + .await + .unwrap(); + assert_eq!(n, 0); +} + +#[sqlx::test(migrations = "./migrations")] +async fn live_chapter_count_only_counts_live_sources(pool: PgPool) { + crawler::ensure_source(&pool, "target", "T", "https://x.example") + .await + .unwrap(); + let m = sample_manga("foo", "Foo Manga", "hash-1"); + let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m) + .await + .unwrap(); + let chapters = vec![ + SourceChapterRef { + source_chapter_key: "1".into(), + number: 1, + title: Some("Ch.1".into()), + url: "https://x.example/foo/1".into(), + }, + SourceChapterRef { + source_chapter_key: "2".into(), + number: 2, + title: Some("Ch.2".into()), + url: "https://x.example/foo/2".into(), + }, + ]; + crawler::sync_manga_chapters(&pool, "target", up.manga_id, &chapters) + .await + .unwrap(); + assert_eq!( + crawler::live_chapter_count_for_source_manga(&pool, "target", "foo") + .await + .unwrap(), + 2 + ); + // Soft-drop one source row — count drops by one, the row stays. + sqlx::query( + "UPDATE chapter_sources SET dropped_at = NOW() WHERE source_chapter_key = '2'", + ) + .execute(&pool) + .await + .unwrap(); + assert_eq!( + crawler::live_chapter_count_for_source_manga(&pool, "target", "foo") + .await + .unwrap(), + 1 + ); +} + /// Real-world sources publish multiple chapters at the same number /// (different uploaders, translator notes, re-releases). After the /// (manga_id, number) UNIQUE drop in 0013, each `SourceChapterRef` diff --git a/frontend/package.json b/frontend/package.json index 9898b5b..644840e 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "mangalord-frontend", - "version": "0.36.5", + "version": "0.36.6", "private": true, "type": "module", "scripts": {