> = vec![
+ Ok(bytes::Bytes::from_static(b"ok")),
+ Err(std::io::Error::other("network blip")),
+ ];
+ let s = stream::iter(chunks);
+ let err = accumulate_capped(s, 100).await.unwrap_err();
+ assert!(err.to_string().contains("network blip"));
+ }
+
+ #[test]
+ fn looks_like_image_accepts_jpeg() {
+ // JPEG SOI + APP0 segment.
+ let jpeg = [0xff, 0xd8, 0xff, 0xe0, 0, 0x10, b'J', b'F', b'I', b'F'];
+ assert!(looks_like_image(&jpeg));
+ }
+
+ #[test]
+ fn looks_like_image_accepts_png() {
+ let png = [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0, 0, 0, 0];
+ assert!(looks_like_image(&png));
+ }
+
+ #[test]
+ fn looks_like_image_rejects_html_disguised_as_image() {
+ let html = b"not an image";
+ assert!(!looks_like_image(html));
+ }
+
+ #[test]
+ fn looks_like_image_rejects_empty() {
+ assert!(!looks_like_image(&[]));
+ }
+
+ #[test]
+ fn looks_like_image_rejects_renderable_but_unsupported_formats() {
+ // BMP, TIFF, ICO, PSD are `infer::MatcherType::Image` but the
+ // /files/*key handler doesn't have Content-Type mappings for
+ // them, so they'd be served as application/octet-stream and
+ // download instead of render. Reject at the crawler so we
+ // never land them in storage.
+ // BMP magic: "BM" + 4-byte size.
+ let bmp = [b'B', b'M', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+ assert!(!looks_like_image(&bmp), "BMP must be rejected (not renderable by /files)");
+
+ // TIFF little-endian magic: "II" + 42.
+ let tiff = [0x49, 0x49, 0x2a, 0x00, 0, 0, 0, 0];
+ assert!(!looks_like_image(&tiff), "TIFF must be rejected");
+
+ // ICO magic: 0x00,0x00,0x01,0x00.
+ let ico = [0x00, 0x00, 0x01, 0x00, 1, 0, 16, 16, 0, 0, 1, 0, 0x18, 0, 0x40, 0, 0, 0, 0x16, 0, 0, 0];
+ assert!(!looks_like_image(&ico), "ICO must be rejected");
+ }
+
+ #[test]
+ fn looks_like_image_accepts_webp_gif_avif() {
+ // Cover the three remaining whitelisted formats so a future
+ // tightening that drops one would fail noisily.
+ let webp = [
+ b'R', b'I', b'F', b'F',
+ 0, 0, 0, 0,
+ b'W', b'E', b'B', b'P',
+ b'V', b'P', b'8', b' ',
+ ];
+ assert!(looks_like_image(&webp));
+
+ let gif = [b'G', b'I', b'F', b'8', b'7', b'a', 0, 0, 0, 0];
+ assert!(looks_like_image(&gif));
+
+ let avif = [
+ 0x00, 0x00, 0x00, 0x18,
+ b'f', b't', b'y', b'p',
+ b'a', b'v', b'i', b'f',
+ 0x00, 0x00, 0x00, 0x00,
+ b'm', b'i', b'f', b'1',
+ b'a', b'v', b'i', b'f',
+ ];
+ assert!(looks_like_image(&avif));
+ }
+}
diff --git a/backend/src/crawler/session.rs b/backend/src/crawler/session.rs
index 209ea5c..b35efe5 100644
--- a/backend/src/crawler/session.rs
+++ b/backend/src/crawler/session.rs
@@ -127,6 +127,54 @@ pub fn classify_probe(html: &str) -> SessionProbe {
}
}
+/// Three-way classification of a chapter page response.
+///
+/// Reader pages don't render `#logo`, so [`classify_probe`] can't be
+/// reused as-is. The chapter-specific marker is `a#pic_container`
+/// (asserted by the reader-page parser at `parse_chapter_pages`).
+///
+/// Order matters: broken-page body wins over selector matches, so a
+/// transient site-wide 5xx that happens to render the avatar widget
+/// elsewhere doesn't falsely reach `Ok`.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ChapterProbe {
+ /// `a#pic_container` present — reader rendered. Whether
+ /// `#avatar_menu` is also there is informational; if the reader
+ /// loaded the session is by definition still good.
+ Ok,
+ /// Site rendered a "logged out" or "please log in" page (no
+ /// reader, no broken-page body, and no avatar widget either).
+ /// Distinguishes the genuine expired-session case from a
+ /// transient site hiccup.
+ Unauthenticated,
+ /// Broken-page body, or reader didn't render but the user is
+ /// still logged in (avatar widget present). Caller should retry
+ /// rather than blame the session.
+ Transient,
+}
+
+pub fn classify_chapter_probe(html: &str) -> ChapterProbe {
+ if is_broken_page_body(html) {
+ return ChapterProbe::Transient;
+ }
+ let doc = scraper::Html::parse_document(html);
+ let container = scraper::Selector::parse("a#pic_container").unwrap();
+ if doc.select(&container).next().is_some() {
+ return ChapterProbe::Ok;
+ }
+ let avatar = scraper::Selector::parse("#avatar_menu").unwrap();
+ if doc.select(&avatar).next().is_some() {
+ // Logged-in user, but the reader didn't render — most likely
+ // the layout shifted or the site is serving an interstitial.
+ ChapterProbe::Transient
+ } else {
+ // No reader, no avatar, no broken-body marker — site rendered
+ // the "please log in" page, which is the genuine session-
+ // expired signal on this route.
+ ChapterProbe::Unauthenticated
+ }
+}
+
/// In-startup retry budget for the session probe. Small but non-zero —
/// startup hitting a 5-second site hiccup shouldn't fail the operator
/// with "PHPSESSID expired" when the session is actually fine.
@@ -273,6 +321,73 @@ mod tests {
assert_eq!(classify_probe(""), SessionProbe::Transient);
}
+ #[test]
+ fn classify_chapter_probe_ok_when_reader_rendered() {
+ let html = r#"
+
+
+
+
+
+ "#;
+ assert_eq!(classify_chapter_probe(html), ChapterProbe::Ok);
+ }
+
+ #[test]
+ fn classify_chapter_probe_unauthenticated_when_no_reader_and_no_avatar() {
+ // What a logged-out hit on a chapter URL renders: a normal
+ // site layout (header etc.) with a "please log in" body, but
+ // no reader and no avatar widget.
+ let html = r#"
+
+
+ Please log in to read this chapter.
+
+ "#;
+ assert_eq!(
+ classify_chapter_probe(html),
+ ChapterProbe::Unauthenticated
+ );
+ }
+
+ #[test]
+ fn classify_chapter_probe_transient_when_logged_in_but_reader_missing() {
+ // Avatar shows the session is still valid; reader didn't
+ // render — site is serving an interstitial or the layout
+ // momentarily shifted. Retry, don't blame the session.
+ let html = r#"
+
+
+ Site maintenance — back in 5 minutes.
+
+ "#;
+ assert_eq!(classify_chapter_probe(html), ChapterProbe::Transient);
+ }
+
+ #[test]
+ fn classify_chapter_probe_transient_on_broken_page_body() {
+ let html =
+ "we're sorry, the request file are not found.
";
+ assert_eq!(classify_chapter_probe(html), ChapterProbe::Transient);
+ }
+
+ #[test]
+ fn classify_chapter_probe_does_not_misfire_on_avatar_alone_without_reader() {
+ // Regression for the original bug: the binary
+ // find_element("#avatar_menu") check treated "no avatar" as
+ // session-expired even when a transient hiccup was the real
+ // cause. classify_chapter_probe must NOT trip on that pattern
+ // when pic_container *is* present.
+ let html = r#"
+
+
+
+
+
+ "#;
+ assert_eq!(classify_chapter_probe(html), ChapterProbe::Ok);
+ }
+
#[test]
fn classify_probe_trusts_broken_body_over_stray_avatar_match() {
// Defensive: if a broken-page body somehow contains an
diff --git a/frontend/package.json b/frontend/package.json
index 159dad9..72a32cd 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -1,6 +1,6 @@
{
"name": "mangalord-frontend",
- "version": "0.34.0",
+ "version": "0.34.1",
"private": true,
"type": "module",
"scripts": {