diff --git a/backend/Cargo.lock b/backend/Cargo.lock
index d22b297..f56c520 100644
--- a/backend/Cargo.lock
+++ b/backend/Cargo.lock
@@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "mangalord"
-version = "0.29.0"
+version = "0.30.0"
dependencies = [
"anyhow",
"argon2",
diff --git a/backend/Cargo.toml b/backend/Cargo.toml
index 6bfc280..c31dec1 100644
--- a/backend/Cargo.toml
+++ b/backend/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "mangalord"
-version = "0.29.0"
+version = "0.30.0"
edition = "2021"
default-run = "mangalord"
diff --git a/backend/src/crawler/content.rs b/backend/src/crawler/content.rs
index c804683..71fbfe3 100644
--- a/backend/src/crawler/content.rs
+++ b/backend/src/crawler/content.rs
@@ -16,6 +16,7 @@ use anyhow::Context;
use sqlx::PgPool;
use uuid::Uuid;
+use crate::crawler::detect::PageError;
use crate::crawler::rate_limit::HostRateLimiters;
use crate::crawler::session;
use crate::storage::Storage;
@@ -23,8 +24,18 @@ use crate::storage::Storage;
/// Parse the chapter page DOM and return the page images in `pageN`
/// order. Filters out the loader `
` and any
/// `
` without a numeric `id="pageN"`.
-pub fn parse_chapter_pages(html: &str) -> Vec {
+///
+/// Reader pages don't render the site's `#logo` element, so the
+/// universal logo-sentinel can't apply here — instead we assert
+/// `a#pic_container` is present. Its absence means the response is the
+/// transient broken-page response (or a redirect to some other layout)
+/// and the caller should retry.
+pub fn parse_chapter_pages(html: &str) -> Result, PageError> {
let doc = scraper::Html::parse_document(html);
+ let container_sel = scraper::Selector::parse("a#pic_container").unwrap();
+ if doc.select(&container_sel).next().is_none() {
+ return Err(PageError::transient("reader: a#pic_container missing"));
+ }
let sel = scraper::Selector::parse("a#pic_container img:not(.loading)").unwrap();
let mut pages: Vec = doc
.select(&sel)
@@ -39,7 +50,7 @@ pub fn parse_chapter_pages(html: &str) -> Vec {
})
.collect();
pages.sort_by_key(|p| p.page_number);
- pages
+ Ok(pages)
}
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -109,7 +120,8 @@ pub async fn sync_chapter_content(
let html = page.content().await.context("read chapter html")?;
page.close().await.ok();
- let images = parse_chapter_pages(&html);
+ let images = parse_chapter_pages(&html)
+ .with_context(|| format!("parse chapter pages at {source_url}"))?;
if images.is_empty() {
anyhow::bail!("no page images parsed from {source_url}");
}
@@ -205,7 +217,7 @@ mod tests {