diff --git a/backend/Cargo.toml b/backend/Cargo.toml index 10022ab..38d097d 100644 --- a/backend/Cargo.toml +++ b/backend/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mangalord" -version = "0.35.1" +version = "0.35.2" edition = "2021" default-run = "mangalord" diff --git a/backend/src/crawler/source/target.rs b/backend/src/crawler/source/target.rs index de1ed42..9ce6643 100644 --- a/backend/src/crawler/source/target.rs +++ b/backend/src/crawler/source/target.rs @@ -444,7 +444,7 @@ fn parse_manga_detail( .collect(); let chapters = if include_chapters { - parse_chapter_list(&doc) + parse_chapter_list(&doc)? } else { Vec::new() }; @@ -502,9 +502,22 @@ fn strip_tag_count(s: &str) -> String { trimmed.to_string() } -fn parse_chapter_list(doc: &scraper::Html) -> Vec { +/// Parse the chapter table on a manga detail page. Returns `Transient` if +/// `#chapter_table` isn't in the DOM at all — the table is required even +/// for mangas with no published chapters yet (the source renders an empty +/// ``), so an absent table signals a partial render (post-load JS +/// not done, layout drift) rather than a legitimately empty list. Without +/// this sentinel, an empty `Vec` reaches `sync_manga_chapters` and the +/// soft-drop branch flips every existing chapter to `dropped_at`. +fn parse_chapter_list(doc: &scraper::Html) -> Result, PageError> { + if !has_chapter_table_sentinel(doc) { + return Err(PageError::transient( + "manga detail: #chapter_table sentinel missing", + )); + } let sel = scraper::Selector::parse("#chapter_table td h4 a.chico").unwrap(); - doc.select(&sel) + Ok(doc + .select(&sel) .filter_map(|a| { let url = a.value().attr("href")?.trim().to_string(); if url.is_empty() { @@ -519,7 +532,16 @@ fn parse_chapter_list(doc: &scraper::Html) -> Vec { url, }) }) - .collect() + .collect()) +} + +/// Returns true when the chapter-table container is present in the DOM. +/// Source-specific: the target site uses `#chapter_table` as the wrapper +/// element. Distinguishes "table is present but empty" (legit edge case +/// for new mangas) from "table is missing entirely" (partial render). +fn has_chapter_table_sentinel(doc: &scraper::Html) -> bool { + let sel = scraper::Selector::parse("#chapter_table").expect("valid selector"); + doc.select(&sel).next().is_some() } fn parse_chapter_number(text: &str) -> Option { @@ -880,7 +902,7 @@ mod tests { "../../../tests/fixtures/target/chapter_list_uu.html" ); let doc = scraper::Html::parse_document(html); - let chapters = parse_chapter_list(&doc); + let chapters = parse_chapter_list(&doc).expect("fixture has the table"); assert_eq!(chapters.len(), 15, "every row kept (notices/hiatus included)"); @@ -1027,9 +1049,17 @@ mod tests { #[test] fn missing_optional_fields_parse_to_none() { + // Minimal but well-formed detail page: title is required, every + // other field is optional, but the chapter table is structural — + // its absence is treated as Transient (a freshly added manga + // renders the table empty, not absent). See + // `parse_chapter_list_returns_transient_when_table_missing` for + // the negative case. let html = r#"\
\ -

Minimal

"#; +

Minimal

\ +
\ + "#; let m = parse_manga_detail(html, "min", true).unwrap(); assert_eq!(m.title, "Minimal"); assert!(m.summary.is_none()); @@ -1222,4 +1252,64 @@ mod tests { assert_eq!(displaced[0].source_manga_key, "X"); assert_eq!(outcome, DisplacementOutcome::Shifted(1)); } + + #[test] + fn parse_chapter_list_returns_transient_when_table_missing() { + // Partial render (post-load JS hadn't injected the table, layout + // drift, etc). Returning Vec::new() would silently soft-drop every + // existing chapter for the manga via sync_manga_chapters; Transient + // is the signal the job system retries on. + let html = r#" +
+

Test

+ "#; + let doc = scraper::Html::parse_document(html); + let err = parse_chapter_list(&doc).expect_err("expected Transient"); + assert!(err.is_transient(), "got non-transient: {err}"); + } + + #[test] + fn parse_chapter_list_ok_empty_when_table_present_but_no_rows() { + // A freshly-added manga with no chapters yet — the source renders + // the `` wrapper but no `` rows + // inside. Must stay distinguishable from a missing-table render. + let html = r#" +
+
+ "#; + let doc = scraper::Html::parse_document(html); + let chapters = parse_chapter_list(&doc).expect("present table is not transient"); + assert!(chapters.is_empty()); + } + + #[test] + fn parse_manga_detail_propagates_chapter_table_transient() { + // End-to-end: a detail page that survives the #logo sentinel but + // has the chapter table stripped must fail Transient at the parser + // boundary, not return a SourceManga with empty chapters. + let html = r#" +
+

Test Title

+
+ + "#; + let err = parse_manga_detail(html, "key", true).expect_err("expected Transient"); + assert!(err.is_transient(), "got non-transient: {err}"); + } + + #[test] + fn parse_manga_detail_skips_chapter_sentinel_when_include_chapters_false() { + // Metadata-only mode (`skip_chapters` upstream) must not require + // the chapter table — pipeline.rs avoids calling sync_manga_chapters + // for these mangas, so the absent table is not a correctness issue + // and shouldn't surface as Transient. + let html = r#" +
+

Test Title

+
+ "#; + let manga = parse_manga_detail(html, "key", false) + .expect("metadata-only parse must not require chapter table"); + assert!(manga.chapters.is_empty()); + } } diff --git a/frontend/package.json b/frontend/package.json index 0d0ad92..4b29625 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "mangalord-frontend", - "version": "0.35.1", + "version": "0.35.2", "private": true, "type": "module", "scripts": {