feat(chapter): preserve source-site order in chapter list (0.52.0)
Some checks failed
deploy / test-backend (push) Failing after 11m48s
deploy / test-frontend (push) Successful in 9m45s
deploy / build-and-push (push) Has been skipped
deploy / deploy (push) Has been skipped

The user-facing chapter list ordered by (number ASC, created_at ASC),
which broke the source site's order in two ways: non-numeric entries
("notice. : Officials") parsed to number=0 and clustered at the top,
even though the site placed them mid-list, and variants sharing a
number ("Ch.14 : PH" / "Ch.14 : Official") were torn apart by the
created_at tiebreak.

Capture each chapter's position in the source DOM as `source_index`
(0 = first = newest on this site) on every crawler sync, including the
UPDATE branch so a new chapter prepended on the source shifts every
existing row down by one on the next tick. The list query reverses
this with `ORDER BY source_index DESC NULLS LAST, number ASC,
created_at ASC` so the oldest chapter appears first, variants stay
adjacent in the order the site shows them, and non-numeric entries
land where the site placed them. User-uploaded chapters and pre-
migration rows keep their NULL source_index and fall through to the
prior number/created_at tiebreak via NULLS LAST.

The reader's client-side `[...chapters].sort((a,b) => a.number - b.number)`
is dropped; prev/next now walks the server-ordered array positionally
so it traverses variants and non-numeric entries in display order.

Existing data populates on the next cron tick or via admin force-resync.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-06-03 07:25:09 +02:00
parent b812c6d16c
commit 679abae736
8 changed files with 315 additions and 23 deletions

2
backend/Cargo.lock generated
View File

@@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "mangalord"
version = "0.51.2"
version = "0.52.0"
dependencies = [
"anyhow",
"argon2",

View File

@@ -1,6 +1,6 @@
[package]
name = "mangalord"
version = "0.51.2"
version = "0.52.0"
edition = "2021"
default-run = "mangalord"

View File

@@ -0,0 +1,18 @@
-- Capture each chapter's position in the source site's chapter list so
-- the user-facing list can preserve site order: variants of the same
-- chapter number (e.g. "Ch.14 : PH" next to "Ch.14 : Official") stay
-- adjacent, and non-numeric entries like "notice. : Officials" land
-- where the site placed them rather than clustering at the top under
-- number = 0.
--
-- Lower source_index = closer to the top of the source DOM = newer
-- chapter on this site (it renders newest-first). The list query
-- reverses this with ORDER BY source_index DESC so the oldest chapter
-- appears first in our UI.
--
-- NULL is the sentinel for user-uploaded chapters (no source row) and
-- for crawled rows that pre-date this migration. The list query keeps
-- the existing (number, created_at) tiebreak via NULLS LAST so those
-- fall through to the prior behaviour until the next crawler tick
-- populates the column.
ALTER TABLE chapters ADD COLUMN source_index INTEGER;

View File

@@ -12,15 +12,20 @@ pub async fn list_for_manga(
limit: i64,
offset: i64,
) -> AppResult<Vec<Chapter>> {
// Secondary sort by created_at gives duplicate-numbered chapters
// (multiple uploaders/translations of the same number) a stable
// order in lists and prev/next reader navigation.
// Display order = source-site order reversed. The crawler stamps
// `source_index` = position in the source DOM (0 = first = newest
// on this site, see migration 0021), so DESC puts the oldest
// chapter first and keeps the site's variant grouping and the
// placement of non-numeric entries (e.g. "notice. : Officials")
// intact. NULLS LAST keeps user-uploaded chapters (no source row)
// and rows that pre-date the migration below crawled rows; the
// (number, created_at) tail then orders them deterministically.
let rows = sqlx::query_as::<_, Chapter>(
r#"
SELECT id, manga_id, number, title, page_count, created_at
FROM chapters
WHERE manga_id = $1
ORDER BY number ASC, created_at ASC
ORDER BY source_index DESC NULLS LAST, number ASC, created_at ASC
LIMIT $2 OFFSET $3
"#,
)

View File

@@ -352,7 +352,14 @@ pub async fn sync_manga_chapters(
.map(|c| c.source_chapter_key.clone())
.collect();
for c in chapters {
for (idx, c) in chapters.iter().enumerate() {
// `source_index` captures the chapter's position in the source
// DOM (0 = first = newest on this site) so the list query can
// reverse it for the user-facing list — see migration 0021.
// Every sync overwrites the value on both branches, so a new
// chapter inserted at the top of the source shifts every other
// row down by one on the next tick.
let source_index = idx as i32;
// Lookup is constrained by manga_id (via the chapters join) so a
// source whose chapter slugs collide across mangas (e.g.
// "chapter-1" appearing under two different mangas) attributes
@@ -382,14 +389,15 @@ pub async fn sync_manga_chapters(
// identity is the UUID, not the number.
let (chapter_id,): (Uuid,) = sqlx::query_as(
r#"
INSERT INTO chapters (manga_id, number, title, page_count)
VALUES ($1, $2, $3, 0)
INSERT INTO chapters (manga_id, number, title, page_count, source_index)
VALUES ($1, $2, $3, 0, $4)
RETURNING id
"#,
)
.bind(manga_id)
.bind(c.number)
.bind(c.title.as_deref())
.bind(source_index)
.fetch_one(&mut *tx)
.await?;
sqlx::query(
@@ -408,8 +416,11 @@ pub async fn sync_manga_chapters(
diff.new += 1;
}
Some((chapter_id,)) => {
sqlx::query("UPDATE chapters SET title = $1 WHERE id = $2")
sqlx::query(
"UPDATE chapters SET title = $1, source_index = $2 WHERE id = $3",
)
.bind(c.title.as_deref())
.bind(source_index)
.bind(chapter_id)
.execute(&mut *tx)
.await?;

View File

@@ -6,6 +6,7 @@
use mangalord::crawler::source::{SourceChapterRef, SourceManga};
use mangalord::repo::crawler::{self, ChapterDiff, UpsertStatus};
use mangalord::repo::chapter as chapter_repo;
use sqlx::PgPool;
use uuid::Uuid;
@@ -961,3 +962,261 @@ async fn re_appearing_manga_clears_dropped_at(pool: PgPool) {
assert!(dropped.0.is_none());
assert_eq!(dropped.1, up.manga_id);
}
// ---- source_index: site-order preservation ----
//
// The user-facing chapter list reverses the source-site order so that
// the oldest chapter appears first. The crawler records each row's DOM
// position in `chapters.source_index` (0 = first in source DOM = newest
// on this site) on every sync; the list query orders by source_index
// DESC NULLS LAST, falling through to number/created_at for rows with
// no source row (e.g. user uploads).
#[sqlx::test(migrations = "./migrations")]
async fn source_index_set_on_insert_matches_dom_order(pool: PgPool) {
crawler::ensure_source(&pool, "target", "T", "https://x.example")
.await
.unwrap();
let m = sample_manga("foo", "Foo Manga", "hash-1");
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
.await
.unwrap();
let chapters = vec![
SourceChapterRef {
source_chapter_key: "a".into(),
number: 30,
title: Some("Ch.30".into()),
url: "https://x.example/foo/a".into(),
},
SourceChapterRef {
source_chapter_key: "b".into(),
number: 29,
title: Some("Ch.29".into()),
url: "https://x.example/foo/b".into(),
},
SourceChapterRef {
source_chapter_key: "c".into(),
number: 28,
title: Some("Ch.28".into()),
url: "https://x.example/foo/c".into(),
},
];
crawler::sync_manga_chapters(&pool, "target", up.manga_id, &chapters)
.await
.unwrap();
let rows: Vec<(String, Option<i32>)> = sqlx::query_as(
"SELECT cs.source_chapter_key, c.source_index \
FROM chapters c \
JOIN chapter_sources cs ON cs.chapter_id = c.id \
WHERE c.manga_id = $1 \
ORDER BY cs.source_chapter_key",
)
.bind(up.manga_id)
.fetch_all(&pool)
.await
.unwrap();
assert_eq!(
rows,
vec![
("a".to_string(), Some(0)),
("b".to_string(), Some(1)),
("c".to_string(), Some(2)),
],
"source_index reflects enumerate() position in the input slice",
);
}
#[sqlx::test(migrations = "./migrations")]
async fn source_index_rewritten_on_resync_when_new_chapter_prepended(pool: PgPool) {
crawler::ensure_source(&pool, "target", "T", "https://x.example")
.await
.unwrap();
let m = sample_manga("foo", "Foo Manga", "hash-1");
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
.await
.unwrap();
let first = vec![
SourceChapterRef {
source_chapter_key: "a".into(),
number: 1,
title: Some("Ch.1".into()),
url: "https://x.example/foo/a".into(),
},
SourceChapterRef {
source_chapter_key: "b".into(),
number: 2,
title: Some("Ch.2".into()),
url: "https://x.example/foo/b".into(),
},
];
crawler::sync_manga_chapters(&pool, "target", up.manga_id, &first)
.await
.unwrap();
// Second sync: a brand-new chapter appears at the top of the source
// (newest first on the site). All existing rows must shift their
// source_index down by one so the display order stays correct.
let second = vec![
SourceChapterRef {
source_chapter_key: "new".into(),
number: 3,
title: Some("Ch.3".into()),
url: "https://x.example/foo/new".into(),
},
SourceChapterRef {
source_chapter_key: "a".into(),
number: 1,
title: Some("Ch.1".into()),
url: "https://x.example/foo/a".into(),
},
SourceChapterRef {
source_chapter_key: "b".into(),
number: 2,
title: Some("Ch.2".into()),
url: "https://x.example/foo/b".into(),
},
];
crawler::sync_manga_chapters(&pool, "target", up.manga_id, &second)
.await
.unwrap();
let rows: Vec<(String, Option<i32>)> = sqlx::query_as(
"SELECT cs.source_chapter_key, c.source_index \
FROM chapters c \
JOIN chapter_sources cs ON cs.chapter_id = c.id \
WHERE c.manga_id = $1 \
ORDER BY cs.source_chapter_key",
)
.bind(up.manga_id)
.fetch_all(&pool)
.await
.unwrap();
assert_eq!(
rows,
vec![
("a".to_string(), Some(1)),
("b".to_string(), Some(2)),
("new".to_string(), Some(0)),
],
"new chapter takes index 0, existing rows shift down on UPDATE",
);
}
#[sqlx::test(migrations = "./migrations")]
async fn list_for_manga_returns_source_order_reversed(pool: PgPool) {
crawler::ensure_source(&pool, "target", "T", "https://x.example")
.await
.unwrap();
let m = sample_manga("foo", "Foo Manga", "hash-1");
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
.await
.unwrap();
// Site DOM order (top-down = newest-first):
// ch11 (number = 11)
// notice (number = 0, non-numeric label on the site)
// ch10 (number = 10)
// Numbers deliberately disagree with DOM order: a number-based sort
// would put notice first, but the site places it between ch10 and
// ch11. Reversed-DOM display should yield [ch10, notice, ch11].
let chapters = vec![
SourceChapterRef {
source_chapter_key: "ch11".into(),
number: 11,
title: Some("Ch.11 : Official".into()),
url: "https://x.example/foo/11".into(),
},
SourceChapterRef {
source_chapter_key: "notice".into(),
number: 0,
title: Some("notice. : Officials".into()),
url: "https://x.example/foo/notice".into(),
},
SourceChapterRef {
source_chapter_key: "ch10".into(),
number: 10,
title: Some("Ch.10 : Official".into()),
url: "https://x.example/foo/10".into(),
},
];
crawler::sync_manga_chapters(&pool, "target", up.manga_id, &chapters)
.await
.unwrap();
let listed = chapter_repo::list_for_manga(&pool, up.manga_id, 50, 0)
.await
.unwrap();
let keys: Vec<String> = listed
.iter()
.map(|c| c.title.clone().unwrap_or_default())
.collect();
assert_eq!(
keys,
vec![
"Ch.10 : Official".to_string(),
"notice. : Officials".to_string(),
"Ch.11 : Official".to_string(),
],
"list returns chapters in reversed source-DOM order, so the \
oldest appears first and non-numeric entries land where the \
site placed them",
);
}
#[sqlx::test(migrations = "./migrations")]
async fn list_for_manga_places_null_source_index_last(pool: PgPool) {
crawler::ensure_source(&pool, "target", "T", "https://x.example")
.await
.unwrap();
let m = sample_manga("foo", "Foo Manga", "hash-1");
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
.await
.unwrap();
// Crawled chapters get source_index 0 and 1; the upload path leaves
// it NULL. NULLS LAST plus the (number, created_at) tail means the
// upload sits after both crawled rows even though its number is in
// the middle.
let crawled = vec![
SourceChapterRef {
source_chapter_key: "a".into(),
number: 1,
title: Some("Ch.1".into()),
url: "https://x.example/foo/a".into(),
},
SourceChapterRef {
source_chapter_key: "b".into(),
number: 3,
title: Some("Ch.3".into()),
url: "https://x.example/foo/b".into(),
},
];
crawler::sync_manga_chapters(&pool, "target", up.manga_id, &crawled)
.await
.unwrap();
chapter_repo::create(&pool, up.manga_id, 2, Some("User upload Ch.2"), None)
.await
.unwrap();
let listed = chapter_repo::list_for_manga(&pool, up.manga_id, 50, 0)
.await
.unwrap();
let titles: Vec<String> = listed
.iter()
.map(|c| c.title.clone().unwrap_or_default())
.collect();
assert_eq!(
titles,
vec![
"Ch.3".to_string(),
"Ch.1".to_string(),
"User upload Ch.2".to_string(),
],
"crawled rows ordered by reversed source_index; user upload \
(NULL source_index) falls through to the end",
);
}

View File

@@ -1,6 +1,6 @@
{
"name": "mangalord-frontend",
"version": "0.51.2",
"version": "0.52.0",
"private": true,
"type": "module",
"scripts": {

View File

@@ -33,22 +33,21 @@
);
// Prev/next chapter computed from the chapter list. listChapters
// returns chapters in number ASC order; we still resolve via find
// rather than index because the current chapter's position may
// not be `chapter.number - 1` (sparse numbering / chapter 0.5 /
// future skipped numbers).
const sortedChapters = $derived(
[...chapters].sort((a, b) => a.number - b.number)
);
// returns chapters in display order (reversed source-site order, so
// oldest first — see backend repo::chapter::list_for_manga), and
// prev/next walks that order positionally. Resolving the current
// index via `find` rather than `chapter.number - 1` matters because
// numbers aren't a reliable index: variants share numbers, non-
// numeric entries pin to 0, and uploads can sparse-fill.
const currentIdx = $derived(
sortedChapters.findIndex((c) => c.id === chapter.id)
chapters.findIndex((c) => c.id === chapter.id)
);
const prevChapter = $derived(
currentIdx > 0 ? sortedChapters[currentIdx - 1] : null
currentIdx > 0 ? chapters[currentIdx - 1] : null
);
const nextChapter = $derived(
currentIdx >= 0 && currentIdx < sortedChapters.length - 1
? sortedChapters[currentIdx + 1]
currentIdx >= 0 && currentIdx < chapters.length - 1
? chapters[currentIdx + 1]
: null
);
@@ -471,7 +470,7 @@
}}
data-testid="reader-chapter-select"
>
{#each sortedChapters as c (c.id)}
{#each chapters as c (c.id)}
<option value={c.id}>
{chapterLabel(c)}
</option>