bugfix: chapter source key uses chapter id, not /pg-1/ (0.23.1)
Listing links point at the reader's page 1 (`.../uu/br_chapter-N/pg-1/`). The generic `derive_key_from_url` took the last URL segment and returned `"pg-1"` for every chapter, so all parsed chapters collapsed onto a single `chapter_sources` row downstream and the first-manga chapter was the only row that survived. New `derive_chapter_key_from_url` strips a trailing `/pg-\d+/` before picking the chapter-identifying segment (`br_chapter-N` / `to_chapter-N`). Notices, hiatus rows, and duplicate-numbered chapters are preserved as distinct parser entries. The (manga_id, number) UNIQUE collapse in the chapters table is a separate, follow-up concern handled in feat/chapter-id-routing. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2
backend/Cargo.lock
generated
2
backend/Cargo.lock
generated
@@ -1415,7 +1415,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
||||
|
||||
[[package]]
|
||||
name = "mangalord"
|
||||
version = "0.23.0"
|
||||
version = "0.23.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"argon2",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "mangalord"
|
||||
version = "0.23.0"
|
||||
version = "0.23.1"
|
||||
edition = "2021"
|
||||
default-run = "mangalord"
|
||||
|
||||
|
||||
@@ -334,7 +334,7 @@ fn parse_chapter_list(doc: &scraper::Html) -> Vec<SourceChapterRef> {
|
||||
let title_text = collapse_whitespace(&a.text().collect::<String>());
|
||||
let number = parse_chapter_number(&title_text).unwrap_or(0);
|
||||
Some(SourceChapterRef {
|
||||
source_chapter_key: derive_key_from_url(&url),
|
||||
source_chapter_key: derive_chapter_key_from_url(&url),
|
||||
number,
|
||||
title: (!title_text.is_empty()).then_some(title_text),
|
||||
url,
|
||||
@@ -366,6 +366,29 @@ fn derive_key_from_url(url: &str) -> String {
|
||||
.to_string()
|
||||
}
|
||||
|
||||
/// Chapter URLs on this source point at the reader's page 1, e.g.
|
||||
/// `.../uu/br_chapter-379272/pg-1/`. The chapter identity is the
|
||||
/// `br_chapter-N` (or `to_chapter-N`) segment — the `pg-\d+` segment
|
||||
/// identifies a page *within* a chapter, so naively taking the last
|
||||
/// path component returns `"pg-1"` for every chapter and collapses
|
||||
/// them all under one source_chapter_key downstream.
|
||||
fn derive_chapter_key_from_url(url: &str) -> String {
|
||||
let trimmed = url.split('?').next().unwrap_or(url).trim_end_matches('/');
|
||||
let without_reader_page = match trimmed.rsplit_once('/') {
|
||||
Some((prefix, last)) if is_reader_page_segment(last) => prefix,
|
||||
_ => trimmed,
|
||||
};
|
||||
without_reader_page
|
||||
.rsplit('/')
|
||||
.find(|s| !s.is_empty())
|
||||
.unwrap_or(url)
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn is_reader_page_segment(s: &str) -> bool {
|
||||
s.len() > 3 && s.starts_with("pg-") && s[3..].bytes().all(|b| b.is_ascii_digit())
|
||||
}
|
||||
|
||||
fn first_text(doc: &scraper::Html, sel: &str) -> Option<String> {
|
||||
let s = scraper::Selector::parse(sel).ok()?;
|
||||
let el = doc.select(&s).next()?;
|
||||
@@ -577,6 +600,61 @@ mod tests {
|
||||
assert_eq!(strip_tag_count("Tag (a) (12)"), "Tag (a)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_chapter_list_keeps_all_chapters_with_unique_keys() {
|
||||
// Real listing fixture from the target site. 15 rows: chapters
|
||||
// with various Ch.N markup, one hiatus row, three "notice." rows,
|
||||
// and duplicates of Ch.1 and Ch.52 from different uploaders.
|
||||
// Every row must survive parsing and every chapter must have a
|
||||
// distinct source_chapter_key — chapter URLs all end in `/pg-1/`
|
||||
// (the reader's page-1 entry point), and a naive
|
||||
// last-segment-of-URL derivation returns "pg-1" for every row,
|
||||
// collapsing the whole list into one downstream chapter row.
|
||||
let html = include_str!(
|
||||
"../../../tests/fixtures/target/chapter_list_uu.html"
|
||||
);
|
||||
let doc = scraper::Html::parse_document(html);
|
||||
let chapters = parse_chapter_list(&doc);
|
||||
|
||||
assert_eq!(chapters.len(), 15, "every row kept (notices/hiatus included)");
|
||||
|
||||
let mut keys: Vec<&str> =
|
||||
chapters.iter().map(|c| c.source_chapter_key.as_str()).collect();
|
||||
keys.sort();
|
||||
let dupe = keys.windows(2).find(|w| w[0] == w[1]).map(|w| w[0]);
|
||||
assert!(dupe.is_none(), "duplicate chapter key: {dupe:?}");
|
||||
for c in &chapters {
|
||||
assert_ne!(
|
||||
c.source_chapter_key, "pg-1",
|
||||
"key must not be the reader-page segment: {:?}", c
|
||||
);
|
||||
}
|
||||
|
||||
// Latest chapter is first (source orders newest → oldest).
|
||||
assert_eq!(chapters[0].number, 67);
|
||||
assert_eq!(chapters[0].title.as_deref(), Some("Ch.67 : Official"));
|
||||
assert_eq!(chapters[0].source_chapter_key, "br_chapter-379272");
|
||||
|
||||
// Duplicate-number chapters (different uploaders) survive as
|
||||
// two rows. The (manga_id, number) UNIQUE collapse is a
|
||||
// downstream schema concern handled separately.
|
||||
assert_eq!(
|
||||
chapters.iter().filter(|c| c.number == 52).count(),
|
||||
2,
|
||||
"two Ch.52 uploads must both survive parsing"
|
||||
);
|
||||
assert_eq!(
|
||||
chapters.iter().filter(|c| c.number == 1).count(),
|
||||
2,
|
||||
"Ch.1 Official and Ch.1 Team Hazama are both kept"
|
||||
);
|
||||
|
||||
// Notices / hiatus rows have no leading digit so they parse to
|
||||
// number=0. They are not filtered out.
|
||||
let zero = chapters.iter().filter(|c| c.number == 0).count();
|
||||
assert!(zero >= 4, "hiatus + 3 notices kept; got {zero}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_chapter_number_grabs_first_integer_run() {
|
||||
assert_eq!(parse_chapter_number("Ch.1"), Some(1));
|
||||
@@ -630,6 +708,45 @@ mod tests {
|
||||
assert_eq!(derive_key_from_url("/manga/bar"), "bar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn derive_chapter_key_strips_trailing_reader_page_segment() {
|
||||
// Listing links go to page 1 of the reader; strip /pg-\d+/.
|
||||
assert_eq!(
|
||||
derive_chapter_key_from_url(".../uu/br_chapter-379272/pg-1/"),
|
||||
"br_chapter-379272"
|
||||
);
|
||||
assert_eq!(
|
||||
derive_chapter_key_from_url(".../uu/to_chapter-13/pg-1/"),
|
||||
"to_chapter-13"
|
||||
);
|
||||
// Defensive: deep-link to a non-first page should still resolve
|
||||
// to the same chapter identity.
|
||||
assert_eq!(
|
||||
derive_chapter_key_from_url(".../uu/br_chapter-379272/pg-25/"),
|
||||
"br_chapter-379272"
|
||||
);
|
||||
// No reader-page suffix → behaves like derive_key_from_url.
|
||||
assert_eq!(
|
||||
derive_chapter_key_from_url(".../uu/br_chapter-379272/"),
|
||||
"br_chapter-379272"
|
||||
);
|
||||
// Query strings are stripped.
|
||||
assert_eq!(
|
||||
derive_chapter_key_from_url(".../uu/br_chapter-379272/pg-1/?ref=x"),
|
||||
"br_chapter-379272"
|
||||
);
|
||||
// `pg-foo` is not a valid reader-page segment; treated as identity.
|
||||
assert_eq!(
|
||||
derive_chapter_key_from_url(".../uu/something/pg-foo/"),
|
||||
"pg-foo"
|
||||
);
|
||||
// Bare `pg-` (no digits) likewise not stripped.
|
||||
assert_eq!(
|
||||
derive_chapter_key_from_url(".../uu/something/pg-/"),
|
||||
"pg-"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn metadata_hash_is_stable_and_field_sensitive() {
|
||||
let base = parse_manga_detail(DETAIL_HTML, "k", true).unwrap();
|
||||
|
||||
194
backend/tests/fixtures/target/chapter_list_uu.html
vendored
Normal file
194
backend/tests/fixtures/target/chapter_list_uu.html
vendored
Normal file
@@ -0,0 +1,194 @@
|
||||
<table class="listing" id="chapter_table">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-379272/pg-1/"><b>Ch.67</b>
|
||||
: Official </a>
|
||||
<b style="color:#FEFD7F;width;30px;display:inline-block;margin-left:5px">new</b>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../2843005/upload/">bloomingdale</a>
|
||||
</td>
|
||||
<td class="no">May 20, 2026</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-328248/pg-1/"><b>hitaus.</b>
|
||||
</a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../2843005/upload/">bloomingdale</a>
|
||||
</td>
|
||||
<td class="no">Jan 15, 2026</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-326351/pg-1/"><b>Ch.66</b>
|
||||
: Official </a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../2843005/upload/">bloomingdale</a>
|
||||
</td>
|
||||
<td class="no">Jan 10, 2026</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-295078/pg-1/"><b>Ch.52</b>
|
||||
: Official </a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../2843005/upload/">bloomingdale</a>
|
||||
</td>
|
||||
<td class="no">Aug 28, 2025</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-294815/pg-1/"><b>Ch.52</b>
|
||||
: Official </a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../4300634/upload/">mina</a>
|
||||
</td>
|
||||
<td class="no">Aug 27, 2025</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-249964/pg-1/"><b>Ch.10</b>
|
||||
: Official </a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../2843005/upload/">bloomingdale</a>
|
||||
</td>
|
||||
<td class="no">Jan 5, 2025</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/to_chapter-13/pg-1/"><b>Ch.13</b>
|
||||
: Thank you, we'll see you in the next one! </a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no"></td>
|
||||
<td class="no">Dec 30, 2024</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-249095/pg-1/"><b>Ch.9</b>
|
||||
: Official </a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../2843005/upload/">bloomingdale</a>
|
||||
</td>
|
||||
<td class="no">Dec 28, 2024</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-248930/pg-1/"><b>Ch.1</b>
|
||||
: Official </a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../2843005/upload/">bloomingdale</a>
|
||||
</td>
|
||||
<td class="no">Dec 26, 2024</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/to_chapter-12/pg-1/"><b>Ch.12</b>
|
||||
</a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no"></td>
|
||||
<td class="no">Dec 1, 2024</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-244844/pg-1/"><b>notice.</b>
|
||||
: Officials </a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../2843005/upload/">bloomingdale</a>
|
||||
</td>
|
||||
<td class="no">Nov 26, 2024</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/to_chapter-11/pg-1/"><b>Ch.11</b>
|
||||
</a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no"></td>
|
||||
<td class="no">Nov 18, 2024</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-221180/pg-1/"><b>notice.</b>
|
||||
</a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../3781074/upload/">Izanami</a>
|
||||
</td>
|
||||
<td class="no">Jun 21, 2024</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-234803/pg-1/"><b>notice.</b>
|
||||
</a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../2843005/upload/">bloomingdale</a>
|
||||
</td>
|
||||
<td class="no">Sep 13, 2024</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-220299/pg-1/"><b>Ch.1</b>
|
||||
: Team Hazama </a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../1457681/upload/">purplepandabear</a>
|
||||
</td>
|
||||
<td class="no">Jun 16, 2024</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "mangalord-frontend",
|
||||
"version": "0.23.0",
|
||||
"version": "0.23.1",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
|
||||
Reference in New Issue
Block a user