feat(chapter): preserve source-site order in chapter list (0.52.0)
The user-facing chapter list ordered by (number ASC, created_at ASC),
which broke the source site's order in two ways: non-numeric entries
("notice. : Officials") parsed to number=0 and clustered at the top,
even though the site placed them mid-list, and variants sharing a
number ("Ch.14 : PH" / "Ch.14 : Official") were torn apart by the
created_at tiebreak.
Capture each chapter's position in the source DOM as `source_index`
(0 = first = newest on this site) on every crawler sync, including the
UPDATE branch so a new chapter prepended on the source shifts every
existing row down by one on the next tick. The list query reverses
this with `ORDER BY source_index DESC NULLS LAST, number ASC,
created_at ASC` so the oldest chapter appears first, variants stay
adjacent in the order the site shows them, and non-numeric entries
land where the site placed them. User-uploaded chapters and pre-
migration rows keep their NULL source_index and fall through to the
prior number/created_at tiebreak via NULLS LAST.
The reader's client-side `[...chapters].sort((a,b) => a.number - b.number)`
is dropped; prev/next now walks the server-ordered array positionally
so it traverses variants and non-numeric entries in display order.
Existing data populates on the next cron tick or via admin force-resync.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2
backend/Cargo.lock
generated
2
backend/Cargo.lock
generated
@@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
||||
|
||||
[[package]]
|
||||
name = "mangalord"
|
||||
version = "0.51.2"
|
||||
version = "0.52.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"argon2",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "mangalord"
|
||||
version = "0.51.2"
|
||||
version = "0.52.0"
|
||||
edition = "2021"
|
||||
default-run = "mangalord"
|
||||
|
||||
|
||||
18
backend/migrations/0021_chapter_source_index.sql
Normal file
18
backend/migrations/0021_chapter_source_index.sql
Normal file
@@ -0,0 +1,18 @@
|
||||
-- Capture each chapter's position in the source site's chapter list so
|
||||
-- the user-facing list can preserve site order: variants of the same
|
||||
-- chapter number (e.g. "Ch.14 : PH" next to "Ch.14 : Official") stay
|
||||
-- adjacent, and non-numeric entries like "notice. : Officials" land
|
||||
-- where the site placed them rather than clustering at the top under
|
||||
-- number = 0.
|
||||
--
|
||||
-- Lower source_index = closer to the top of the source DOM = newer
|
||||
-- chapter on this site (it renders newest-first). The list query
|
||||
-- reverses this with ORDER BY source_index DESC so the oldest chapter
|
||||
-- appears first in our UI.
|
||||
--
|
||||
-- NULL is the sentinel for user-uploaded chapters (no source row) and
|
||||
-- for crawled rows that pre-date this migration. The list query keeps
|
||||
-- the existing (number, created_at) tiebreak via NULLS LAST so those
|
||||
-- fall through to the prior behaviour until the next crawler tick
|
||||
-- populates the column.
|
||||
ALTER TABLE chapters ADD COLUMN source_index INTEGER;
|
||||
@@ -12,15 +12,20 @@ pub async fn list_for_manga(
|
||||
limit: i64,
|
||||
offset: i64,
|
||||
) -> AppResult<Vec<Chapter>> {
|
||||
// Secondary sort by created_at gives duplicate-numbered chapters
|
||||
// (multiple uploaders/translations of the same number) a stable
|
||||
// order in lists and prev/next reader navigation.
|
||||
// Display order = source-site order reversed. The crawler stamps
|
||||
// `source_index` = position in the source DOM (0 = first = newest
|
||||
// on this site, see migration 0021), so DESC puts the oldest
|
||||
// chapter first and keeps the site's variant grouping and the
|
||||
// placement of non-numeric entries (e.g. "notice. : Officials")
|
||||
// intact. NULLS LAST keeps user-uploaded chapters (no source row)
|
||||
// and rows that pre-date the migration below crawled rows; the
|
||||
// (number, created_at) tail then orders them deterministically.
|
||||
let rows = sqlx::query_as::<_, Chapter>(
|
||||
r#"
|
||||
SELECT id, manga_id, number, title, page_count, created_at
|
||||
FROM chapters
|
||||
WHERE manga_id = $1
|
||||
ORDER BY number ASC, created_at ASC
|
||||
ORDER BY source_index DESC NULLS LAST, number ASC, created_at ASC
|
||||
LIMIT $2 OFFSET $3
|
||||
"#,
|
||||
)
|
||||
|
||||
@@ -352,7 +352,14 @@ pub async fn sync_manga_chapters(
|
||||
.map(|c| c.source_chapter_key.clone())
|
||||
.collect();
|
||||
|
||||
for c in chapters {
|
||||
for (idx, c) in chapters.iter().enumerate() {
|
||||
// `source_index` captures the chapter's position in the source
|
||||
// DOM (0 = first = newest on this site) so the list query can
|
||||
// reverse it for the user-facing list — see migration 0021.
|
||||
// Every sync overwrites the value on both branches, so a new
|
||||
// chapter inserted at the top of the source shifts every other
|
||||
// row down by one on the next tick.
|
||||
let source_index = idx as i32;
|
||||
// Lookup is constrained by manga_id (via the chapters join) so a
|
||||
// source whose chapter slugs collide across mangas (e.g.
|
||||
// "chapter-1" appearing under two different mangas) attributes
|
||||
@@ -382,14 +389,15 @@ pub async fn sync_manga_chapters(
|
||||
// identity is the UUID, not the number.
|
||||
let (chapter_id,): (Uuid,) = sqlx::query_as(
|
||||
r#"
|
||||
INSERT INTO chapters (manga_id, number, title, page_count)
|
||||
VALUES ($1, $2, $3, 0)
|
||||
INSERT INTO chapters (manga_id, number, title, page_count, source_index)
|
||||
VALUES ($1, $2, $3, 0, $4)
|
||||
RETURNING id
|
||||
"#,
|
||||
)
|
||||
.bind(manga_id)
|
||||
.bind(c.number)
|
||||
.bind(c.title.as_deref())
|
||||
.bind(source_index)
|
||||
.fetch_one(&mut *tx)
|
||||
.await?;
|
||||
sqlx::query(
|
||||
@@ -408,8 +416,11 @@ pub async fn sync_manga_chapters(
|
||||
diff.new += 1;
|
||||
}
|
||||
Some((chapter_id,)) => {
|
||||
sqlx::query("UPDATE chapters SET title = $1 WHERE id = $2")
|
||||
sqlx::query(
|
||||
"UPDATE chapters SET title = $1, source_index = $2 WHERE id = $3",
|
||||
)
|
||||
.bind(c.title.as_deref())
|
||||
.bind(source_index)
|
||||
.bind(chapter_id)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
use mangalord::crawler::source::{SourceChapterRef, SourceManga};
|
||||
use mangalord::repo::crawler::{self, ChapterDiff, UpsertStatus};
|
||||
use mangalord::repo::chapter as chapter_repo;
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
@@ -961,3 +962,261 @@ async fn re_appearing_manga_clears_dropped_at(pool: PgPool) {
|
||||
assert!(dropped.0.is_none());
|
||||
assert_eq!(dropped.1, up.manga_id);
|
||||
}
|
||||
|
||||
// ---- source_index: site-order preservation ----
|
||||
//
|
||||
// The user-facing chapter list reverses the source-site order so that
|
||||
// the oldest chapter appears first. The crawler records each row's DOM
|
||||
// position in `chapters.source_index` (0 = first in source DOM = newest
|
||||
// on this site) on every sync; the list query orders by source_index
|
||||
// DESC NULLS LAST, falling through to number/created_at for rows with
|
||||
// no source row (e.g. user uploads).
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn source_index_set_on_insert_matches_dom_order(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
||||
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let chapters = vec![
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "a".into(),
|
||||
number: 30,
|
||||
title: Some("Ch.30".into()),
|
||||
url: "https://x.example/foo/a".into(),
|
||||
},
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "b".into(),
|
||||
number: 29,
|
||||
title: Some("Ch.29".into()),
|
||||
url: "https://x.example/foo/b".into(),
|
||||
},
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "c".into(),
|
||||
number: 28,
|
||||
title: Some("Ch.28".into()),
|
||||
url: "https://x.example/foo/c".into(),
|
||||
},
|
||||
];
|
||||
crawler::sync_manga_chapters(&pool, "target", up.manga_id, &chapters)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let rows: Vec<(String, Option<i32>)> = sqlx::query_as(
|
||||
"SELECT cs.source_chapter_key, c.source_index \
|
||||
FROM chapters c \
|
||||
JOIN chapter_sources cs ON cs.chapter_id = c.id \
|
||||
WHERE c.manga_id = $1 \
|
||||
ORDER BY cs.source_chapter_key",
|
||||
)
|
||||
.bind(up.manga_id)
|
||||
.fetch_all(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
rows,
|
||||
vec![
|
||||
("a".to_string(), Some(0)),
|
||||
("b".to_string(), Some(1)),
|
||||
("c".to_string(), Some(2)),
|
||||
],
|
||||
"source_index reflects enumerate() position in the input slice",
|
||||
);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn source_index_rewritten_on_resync_when_new_chapter_prepended(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
||||
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let first = vec![
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "a".into(),
|
||||
number: 1,
|
||||
title: Some("Ch.1".into()),
|
||||
url: "https://x.example/foo/a".into(),
|
||||
},
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "b".into(),
|
||||
number: 2,
|
||||
title: Some("Ch.2".into()),
|
||||
url: "https://x.example/foo/b".into(),
|
||||
},
|
||||
];
|
||||
crawler::sync_manga_chapters(&pool, "target", up.manga_id, &first)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Second sync: a brand-new chapter appears at the top of the source
|
||||
// (newest first on the site). All existing rows must shift their
|
||||
// source_index down by one so the display order stays correct.
|
||||
let second = vec![
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "new".into(),
|
||||
number: 3,
|
||||
title: Some("Ch.3".into()),
|
||||
url: "https://x.example/foo/new".into(),
|
||||
},
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "a".into(),
|
||||
number: 1,
|
||||
title: Some("Ch.1".into()),
|
||||
url: "https://x.example/foo/a".into(),
|
||||
},
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "b".into(),
|
||||
number: 2,
|
||||
title: Some("Ch.2".into()),
|
||||
url: "https://x.example/foo/b".into(),
|
||||
},
|
||||
];
|
||||
crawler::sync_manga_chapters(&pool, "target", up.manga_id, &second)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let rows: Vec<(String, Option<i32>)> = sqlx::query_as(
|
||||
"SELECT cs.source_chapter_key, c.source_index \
|
||||
FROM chapters c \
|
||||
JOIN chapter_sources cs ON cs.chapter_id = c.id \
|
||||
WHERE c.manga_id = $1 \
|
||||
ORDER BY cs.source_chapter_key",
|
||||
)
|
||||
.bind(up.manga_id)
|
||||
.fetch_all(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
rows,
|
||||
vec![
|
||||
("a".to_string(), Some(1)),
|
||||
("b".to_string(), Some(2)),
|
||||
("new".to_string(), Some(0)),
|
||||
],
|
||||
"new chapter takes index 0, existing rows shift down on UPDATE",
|
||||
);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn list_for_manga_returns_source_order_reversed(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
||||
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Site DOM order (top-down = newest-first):
|
||||
// ch11 (number = 11)
|
||||
// notice (number = 0, non-numeric label on the site)
|
||||
// ch10 (number = 10)
|
||||
// Numbers deliberately disagree with DOM order: a number-based sort
|
||||
// would put notice first, but the site places it between ch10 and
|
||||
// ch11. Reversed-DOM display should yield [ch10, notice, ch11].
|
||||
let chapters = vec![
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "ch11".into(),
|
||||
number: 11,
|
||||
title: Some("Ch.11 : Official".into()),
|
||||
url: "https://x.example/foo/11".into(),
|
||||
},
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "notice".into(),
|
||||
number: 0,
|
||||
title: Some("notice. : Officials".into()),
|
||||
url: "https://x.example/foo/notice".into(),
|
||||
},
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "ch10".into(),
|
||||
number: 10,
|
||||
title: Some("Ch.10 : Official".into()),
|
||||
url: "https://x.example/foo/10".into(),
|
||||
},
|
||||
];
|
||||
crawler::sync_manga_chapters(&pool, "target", up.manga_id, &chapters)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let listed = chapter_repo::list_for_manga(&pool, up.manga_id, 50, 0)
|
||||
.await
|
||||
.unwrap();
|
||||
let keys: Vec<String> = listed
|
||||
.iter()
|
||||
.map(|c| c.title.clone().unwrap_or_default())
|
||||
.collect();
|
||||
assert_eq!(
|
||||
keys,
|
||||
vec![
|
||||
"Ch.10 : Official".to_string(),
|
||||
"notice. : Officials".to_string(),
|
||||
"Ch.11 : Official".to_string(),
|
||||
],
|
||||
"list returns chapters in reversed source-DOM order, so the \
|
||||
oldest appears first and non-numeric entries land where the \
|
||||
site placed them",
|
||||
);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn list_for_manga_places_null_source_index_last(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
||||
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Crawled chapters get source_index 0 and 1; the upload path leaves
|
||||
// it NULL. NULLS LAST plus the (number, created_at) tail means the
|
||||
// upload sits after both crawled rows even though its number is in
|
||||
// the middle.
|
||||
let crawled = vec![
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "a".into(),
|
||||
number: 1,
|
||||
title: Some("Ch.1".into()),
|
||||
url: "https://x.example/foo/a".into(),
|
||||
},
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "b".into(),
|
||||
number: 3,
|
||||
title: Some("Ch.3".into()),
|
||||
url: "https://x.example/foo/b".into(),
|
||||
},
|
||||
];
|
||||
crawler::sync_manga_chapters(&pool, "target", up.manga_id, &crawled)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
chapter_repo::create(&pool, up.manga_id, 2, Some("User upload Ch.2"), None)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let listed = chapter_repo::list_for_manga(&pool, up.manga_id, 50, 0)
|
||||
.await
|
||||
.unwrap();
|
||||
let titles: Vec<String> = listed
|
||||
.iter()
|
||||
.map(|c| c.title.clone().unwrap_or_default())
|
||||
.collect();
|
||||
assert_eq!(
|
||||
titles,
|
||||
vec![
|
||||
"Ch.3".to_string(),
|
||||
"Ch.1".to_string(),
|
||||
"User upload Ch.2".to_string(),
|
||||
],
|
||||
"crawled rows ordered by reversed source_index; user upload \
|
||||
(NULL source_index) falls through to the end",
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "mangalord-frontend",
|
||||
"version": "0.51.2",
|
||||
"version": "0.52.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
|
||||
@@ -33,22 +33,21 @@
|
||||
);
|
||||
|
||||
// Prev/next chapter computed from the chapter list. listChapters
|
||||
// returns chapters in number ASC order; we still resolve via find
|
||||
// rather than index because the current chapter's position may
|
||||
// not be `chapter.number - 1` (sparse numbering / chapter 0.5 /
|
||||
// future skipped numbers).
|
||||
const sortedChapters = $derived(
|
||||
[...chapters].sort((a, b) => a.number - b.number)
|
||||
);
|
||||
// returns chapters in display order (reversed source-site order, so
|
||||
// oldest first — see backend repo::chapter::list_for_manga), and
|
||||
// prev/next walks that order positionally. Resolving the current
|
||||
// index via `find` rather than `chapter.number - 1` matters because
|
||||
// numbers aren't a reliable index: variants share numbers, non-
|
||||
// numeric entries pin to 0, and uploads can sparse-fill.
|
||||
const currentIdx = $derived(
|
||||
sortedChapters.findIndex((c) => c.id === chapter.id)
|
||||
chapters.findIndex((c) => c.id === chapter.id)
|
||||
);
|
||||
const prevChapter = $derived(
|
||||
currentIdx > 0 ? sortedChapters[currentIdx - 1] : null
|
||||
currentIdx > 0 ? chapters[currentIdx - 1] : null
|
||||
);
|
||||
const nextChapter = $derived(
|
||||
currentIdx >= 0 && currentIdx < sortedChapters.length - 1
|
||||
? sortedChapters[currentIdx + 1]
|
||||
currentIdx >= 0 && currentIdx < chapters.length - 1
|
||||
? chapters[currentIdx + 1]
|
||||
: null
|
||||
);
|
||||
|
||||
@@ -471,7 +470,7 @@
|
||||
}}
|
||||
data-testid="reader-chapter-select"
|
||||
>
|
||||
{#each sortedChapters as c (c.id)}
|
||||
{#each chapters as c (c.id)}
|
||||
<option value={c.id}>
|
||||
{chapterLabel(c)}
|
||||
</option>
|
||||
|
||||
Reference in New Issue
Block a user