bugfix: security & correctness bundle (0.34.1)

Five fixes bundled into one release:

- preserve user-attached tags across crawler upserts
  (repo::crawler::sync_tags now scopes to added_by IS NULL; orphaned
  attachments from deleted users are reaped as crawler-owned)
- gate manga PATCH and cover endpoints on uploaded_by (require_can_edit
  in api::mangas; non-NULL uploaded_by must match the caller)
- equalise login response time across user-existence branches
  (run argon2 against a OnceLock-cached dummy hash on the no-user
  branch so timing doesn't leak username existence)
- crawler download defences (SSRF allowlist of host literals
  including IPv4-mapped IPv6 ranges, 32 MiB streamed size cap,
  reject non-whitelisted image types, three-way chapter-probe
  classifier replaces the binary #avatar_menu check)
- tighten validation and clean up dead unload path
  (attach_tag + create_token enforce 64-char caps; LocalStorage
  rejects NUL bytes explicitly; reader flushFinalProgress drops
  the always-405 sendBeacon path)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-28 20:24:51 +02:00
parent c5c1179e9d
commit 8d34132883
25 changed files with 1399 additions and 88 deletions

View File

@@ -100,6 +100,54 @@ pub fn classify_probe(html: &str) -> SessionProbe {
}
}
/// Three-way classification of a chapter page response.
///
/// Reader pages don't render `#logo`, so [`classify_probe`] can't be
/// reused as-is. The chapter-specific marker is `a#pic_container`
/// (asserted by the reader-page parser at `parse_chapter_pages`).
///
/// Order matters: broken-page body wins over selector matches, so a
/// transient site-wide 5xx that happens to render the avatar widget
/// elsewhere doesn't falsely reach `Ok`.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ChapterProbe {
/// `a#pic_container` present — reader rendered. Whether
/// `#avatar_menu` is also there is informational; if the reader
/// loaded the session is by definition still good.
Ok,
/// Site rendered a "logged out" or "please log in" page (no
/// reader, no broken-page body, and no avatar widget either).
/// Distinguishes the genuine expired-session case from a
/// transient site hiccup.
Unauthenticated,
/// Broken-page body, or reader didn't render but the user is
/// still logged in (avatar widget present). Caller should retry
/// rather than blame the session.
Transient,
}
pub fn classify_chapter_probe(html: &str) -> ChapterProbe {
if is_broken_page_body(html) {
return ChapterProbe::Transient;
}
let doc = scraper::Html::parse_document(html);
let container = scraper::Selector::parse("a#pic_container").unwrap();
if doc.select(&container).next().is_some() {
return ChapterProbe::Ok;
}
let avatar = scraper::Selector::parse("#avatar_menu").unwrap();
if doc.select(&avatar).next().is_some() {
// Logged-in user, but the reader didn't render — most likely
// the layout shifted or the site is serving an interstitial.
ChapterProbe::Transient
} else {
// No reader, no avatar, no broken-body marker — site rendered
// the "please log in" page, which is the genuine session-
// expired signal on this route.
ChapterProbe::Unauthenticated
}
}
/// In-startup retry budget for the session probe. Small but non-zero —
/// startup hitting a 5-second site hiccup shouldn't fail the operator
/// with "PHPSESSID expired" when the session is actually fine.
@@ -210,6 +258,73 @@ mod tests {
assert_eq!(classify_probe(""), SessionProbe::Transient);
}
#[test]
fn classify_chapter_probe_ok_when_reader_rendered() {
let html = r#"
<html><body>
<a id="pic_container">
<img id="page1" src="https://cdn/1.jpg">
</a>
</body></html>
"#;
assert_eq!(classify_chapter_probe(html), ChapterProbe::Ok);
}
#[test]
fn classify_chapter_probe_unauthenticated_when_no_reader_and_no_avatar() {
// What a logged-out hit on a chapter URL renders: a normal
// site layout (header etc.) with a "please log in" body, but
// no reader and no avatar widget.
let html = r#"
<html><body>
<header><div id="logo">Catalog</div></header>
<main>Please log in to read this chapter.</main>
</body></html>
"#;
assert_eq!(
classify_chapter_probe(html),
ChapterProbe::Unauthenticated
);
}
#[test]
fn classify_chapter_probe_transient_when_logged_in_but_reader_missing() {
// Avatar shows the session is still valid; reader didn't
// render — site is serving an interstitial or the layout
// momentarily shifted. Retry, don't blame the session.
let html = r#"
<html><body>
<header><div id="logo">Catalog</div><div id="avatar_menu"></div></header>
<main>Site maintenance — back in 5 minutes.</main>
</body></html>
"#;
assert_eq!(classify_chapter_probe(html), ChapterProbe::Transient);
}
#[test]
fn classify_chapter_probe_transient_on_broken_page_body() {
let html =
"<html><body><p>we're sorry, the request file are not found.</p></body></html>";
assert_eq!(classify_chapter_probe(html), ChapterProbe::Transient);
}
#[test]
fn classify_chapter_probe_does_not_misfire_on_avatar_alone_without_reader() {
// Regression for the original bug: the binary
// find_element("#avatar_menu") check treated "no avatar" as
// session-expired even when a transient hiccup was the real
// cause. classify_chapter_probe must NOT trip on that pattern
// when pic_container *is* present.
let html = r#"
<html><body>
<a id="pic_container">
<img id="page1" src="https://cdn/1.jpg">
</a>
</body></html>
"#;
assert_eq!(classify_chapter_probe(html), ChapterProbe::Ok);
}
#[test]
fn classify_probe_trusts_broken_body_over_stray_avatar_match() {
// Defensive: if a broken-page body somehow contains an