feat(chapter): preserve source-site order in chapter list (0.52.0)

The user-facing chapter list ordered by (number ASC, created_at ASC), which broke the source site's order in two ways: non-numeric entries ("notice. : Officials") parsed to number=0 and clustered at the top, even though the site placed them mid-list, and variants sharing a number ("Ch.14 : PH" / "Ch.14 : Official") were torn apart by the created_at tiebreak. Capture each chapter's position in the source DOM as `source_index` (0 = first = newest on this site) on every crawler sync, including the UPDATE branch so a new chapter prepended on the source shifts every existing row down by one on the next tick. The list query reverses this with `ORDER BY source_index DESC NULLS LAST, number ASC, created_at ASC` so the oldest chapter appears first, variants stay adjacent in the order the site shows them, and non-numeric entries land where the site placed them. User-uploaded chapters and pre- migration rows keep their NULL source_index and fall through to the prior number/created_at tiebreak via NULLS LAST. The reader's client-side `[...chapters].sort((a,b) => a.number - b.number)` is dropped; prev/next now walks the server-ordered array positionally so it traverses variants and non-numeric entries in display order. Existing data populates on the next cron tick or via admin force-resync. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-03 07:25:09 +02:00
parent b812c6d16c
commit 679abae736
8 changed files with 315 additions and 23 deletions
--- a/backend/Cargo.lock
+++ b/backend/Cargo.lock
@@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"

 [[package]]
 name = "mangalord"
-version = "0.51.2"
+version = "0.52.0"
 dependencies = [
 "anyhow",
 "argon2",
--- a/backend/Cargo.toml
+++ b/backend/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "mangalord"
-version = "0.51.2"
+version = "0.52.0"
 edition = "2021"
 default-run = "mangalord"

--- a/backend/migrations/0021_chapter_source_index.sql
+++ b/backend/migrations/0021_chapter_source_index.sql
@@ -0,0 +1,18 @@
+-- Capture each chapter's position in the source site's chapter list so
+-- the user-facing list can preserve site order: variants of the same
+-- chapter number (e.g. "Ch.14 : PH" next to "Ch.14 : Official") stay
+-- adjacent, and non-numeric entries like "notice. : Officials" land
+-- where the site placed them rather than clustering at the top under
+-- number = 0.
+--
+-- Lower source_index = closer to the top of the source DOM = newer
+-- chapter on this site (it renders newest-first). The list query
+-- reverses this with ORDER BY source_index DESC so the oldest chapter
+-- appears first in our UI.
+--
+-- NULL is the sentinel for user-uploaded chapters (no source row) and
+-- for crawled rows that pre-date this migration. The list query keeps
+-- the existing (number, created_at) tiebreak via NULLS LAST so those
+-- fall through to the prior behaviour until the next crawler tick
+-- populates the column.
+ALTER TABLE chapters ADD COLUMN source_index INTEGER;
--- a/backend/src/repo/chapter.rs
+++ b/backend/src/repo/chapter.rs
@@ -12,15 +12,20 @@ pub async fn list_for_manga(
    limit: i64,
    offset: i64,
 ) -> AppResult<Vec<Chapter>> {
-    // Secondary sort by created_at gives duplicate-numbered chapters
-    // (multiple uploaders/translations of the same number) a stable
-    // order in lists and prev/next reader navigation.
+    // Display order = source-site order reversed. The crawler stamps
+    // `source_index` = position in the source DOM (0 = first = newest
+    // on this site, see migration 0021), so DESC puts the oldest
+    // chapter first and keeps the site's variant grouping and the
+    // placement of non-numeric entries (e.g. "notice. : Officials")
+    // intact. NULLS LAST keeps user-uploaded chapters (no source row)
+    // and rows that pre-date the migration below crawled rows; the
+    // (number, created_at) tail then orders them deterministically.
    let rows = sqlx::query_as::<_, Chapter>(
        r#"
        SELECT id, manga_id, number, title, page_count, created_at
        FROM chapters
        WHERE manga_id = $1
-        ORDER BY number ASC, created_at ASC
+        ORDER BY source_index DESC NULLS LAST, number ASC, created_at ASC
        LIMIT $2 OFFSET $3
        "#,
    )
--- a/backend/src/repo/crawler.rs
+++ b/backend/src/repo/crawler.rs
@@ -352,7 +352,14 @@ pub async fn sync_manga_chapters(
        .map(|c| c.source_chapter_key.clone())
        .collect();

-    for c in chapters {
+    for (idx, c) in chapters.iter().enumerate() {
+        // `source_index` captures the chapter's position in the source
+        // DOM (0 = first = newest on this site) so the list query can
+        // reverse it for the user-facing list — see migration 0021.
+        // Every sync overwrites the value on both branches, so a new
+        // chapter inserted at the top of the source shifts every other
+        // row down by one on the next tick.
+        let source_index = idx as i32;
        // Lookup is constrained by manga_id (via the chapters join) so a
        // source whose chapter slugs collide across mangas (e.g.
        // "chapter-1" appearing under two different mangas) attributes
@@ -382,14 +389,15 @@ pub async fn sync_manga_chapters(
                // identity is the UUID, not the number.
                let (chapter_id,): (Uuid,) = sqlx::query_as(
                    r#"
-                    INSERT INTO chapters (manga_id, number, title, page_count)
-                    VALUES ($1, $2, $3, 0)
+                    INSERT INTO chapters (manga_id, number, title, page_count, source_index)
+                    VALUES ($1, $2, $3, 0, $4)
                    RETURNING id
                    "#,
                )
                .bind(manga_id)
                .bind(c.number)
                .bind(c.title.as_deref())
+                .bind(source_index)
                .fetch_one(&mut *tx)
                .await?;
                sqlx::query(
@@ -408,8 +416,11 @@ pub async fn sync_manga_chapters(
                diff.new += 1;
            }
            Some((chapter_id,)) => {
-                sqlx::query("UPDATE chapters SET title = $1 WHERE id = $2")
+                sqlx::query(
+                    "UPDATE chapters SET title = $1, source_index = $2 WHERE id = $3",
+                )
                    .bind(c.title.as_deref())
+                    .bind(source_index)
                    .bind(chapter_id)
                    .execute(&mut *tx)
                    .await?;
--- a/backend/tests/crawler_sync.rs
+++ b/backend/tests/crawler_sync.rs
@@ -6,6 +6,7 @@

 use mangalord::crawler::source::{SourceChapterRef, SourceManga};
 use mangalord::repo::crawler::{self, ChapterDiff, UpsertStatus};
+use mangalord::repo::chapter as chapter_repo;
 use sqlx::PgPool;
 use uuid::Uuid;

@@ -961,3 +962,261 @@ async fn re_appearing_manga_clears_dropped_at(pool: PgPool) {
    assert!(dropped.0.is_none());
    assert_eq!(dropped.1, up.manga_id);
 }
+
+// ---- source_index: site-order preservation ----
+//
+// The user-facing chapter list reverses the source-site order so that
+// the oldest chapter appears first. The crawler records each row's DOM
+// position in `chapters.source_index` (0 = first in source DOM = newest
+// on this site) on every sync; the list query orders by source_index
+// DESC NULLS LAST, falling through to number/created_at for rows with
+// no source row (e.g. user uploads).
+
+#[sqlx::test(migrations = "./migrations")]
+async fn source_index_set_on_insert_matches_dom_order(pool: PgPool) {
+    crawler::ensure_source(&pool, "target", "T", "https://x.example")
+        .await
+        .unwrap();
+    let m = sample_manga("foo", "Foo Manga", "hash-1");
+    let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
+        .await
+        .unwrap();
+
+    let chapters = vec![
+        SourceChapterRef {
+            source_chapter_key: "a".into(),
+            number: 30,
+            title: Some("Ch.30".into()),
+            url: "https://x.example/foo/a".into(),
+        },
+        SourceChapterRef {
+            source_chapter_key: "b".into(),
+            number: 29,
+            title: Some("Ch.29".into()),
+            url: "https://x.example/foo/b".into(),
+        },
+        SourceChapterRef {
+            source_chapter_key: "c".into(),
+            number: 28,
+            title: Some("Ch.28".into()),
+            url: "https://x.example/foo/c".into(),
+        },
+    ];
+    crawler::sync_manga_chapters(&pool, "target", up.manga_id, &chapters)
+        .await
+        .unwrap();
+
+    let rows: Vec<(String, Option<i32>)> = sqlx::query_as(
+        "SELECT cs.source_chapter_key, c.source_index \
+           FROM chapters c \
+           JOIN chapter_sources cs ON cs.chapter_id = c.id \
+          WHERE c.manga_id = $1 \
+          ORDER BY cs.source_chapter_key",
+    )
+    .bind(up.manga_id)
+    .fetch_all(&pool)
+    .await
+    .unwrap();
+    assert_eq!(
+        rows,
+        vec![
+            ("a".to_string(), Some(0)),
+            ("b".to_string(), Some(1)),
+            ("c".to_string(), Some(2)),
+        ],
+        "source_index reflects enumerate() position in the input slice",
+    );
+}
+
+#[sqlx::test(migrations = "./migrations")]
+async fn source_index_rewritten_on_resync_when_new_chapter_prepended(pool: PgPool) {
+    crawler::ensure_source(&pool, "target", "T", "https://x.example")
+        .await
+        .unwrap();
+    let m = sample_manga("foo", "Foo Manga", "hash-1");
+    let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
+        .await
+        .unwrap();
+
+    let first = vec![
+        SourceChapterRef {
+            source_chapter_key: "a".into(),
+            number: 1,
+            title: Some("Ch.1".into()),
+            url: "https://x.example/foo/a".into(),
+        },
+        SourceChapterRef {
+            source_chapter_key: "b".into(),
+            number: 2,
+            title: Some("Ch.2".into()),
+            url: "https://x.example/foo/b".into(),
+        },
+    ];
+    crawler::sync_manga_chapters(&pool, "target", up.manga_id, &first)
+        .await
+        .unwrap();
+
+    // Second sync: a brand-new chapter appears at the top of the source
+    // (newest first on the site). All existing rows must shift their
+    // source_index down by one so the display order stays correct.
+    let second = vec![
+        SourceChapterRef {
+            source_chapter_key: "new".into(),
+            number: 3,
+            title: Some("Ch.3".into()),
+            url: "https://x.example/foo/new".into(),
+        },
+        SourceChapterRef {
+            source_chapter_key: "a".into(),
+            number: 1,
+            title: Some("Ch.1".into()),
+            url: "https://x.example/foo/a".into(),
+        },
+        SourceChapterRef {
+            source_chapter_key: "b".into(),
+            number: 2,
+            title: Some("Ch.2".into()),
+            url: "https://x.example/foo/b".into(),
+        },
+    ];
+    crawler::sync_manga_chapters(&pool, "target", up.manga_id, &second)
+        .await
+        .unwrap();
+
+    let rows: Vec<(String, Option<i32>)> = sqlx::query_as(
+        "SELECT cs.source_chapter_key, c.source_index \
+           FROM chapters c \
+           JOIN chapter_sources cs ON cs.chapter_id = c.id \
+          WHERE c.manga_id = $1 \
+          ORDER BY cs.source_chapter_key",
+    )
+    .bind(up.manga_id)
+    .fetch_all(&pool)
+    .await
+    .unwrap();
+    assert_eq!(
+        rows,
+        vec![
+            ("a".to_string(), Some(1)),
+            ("b".to_string(), Some(2)),
+            ("new".to_string(), Some(0)),
+        ],
+        "new chapter takes index 0, existing rows shift down on UPDATE",
+    );
+}
+
+#[sqlx::test(migrations = "./migrations")]
+async fn list_for_manga_returns_source_order_reversed(pool: PgPool) {
+    crawler::ensure_source(&pool, "target", "T", "https://x.example")
+        .await
+        .unwrap();
+    let m = sample_manga("foo", "Foo Manga", "hash-1");
+    let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
+        .await
+        .unwrap();
+
+    // Site DOM order (top-down = newest-first):
+    //   ch11  (number = 11)
+    //   notice (number = 0, non-numeric label on the site)
+    //   ch10  (number = 10)
+    // Numbers deliberately disagree with DOM order: a number-based sort
+    // would put notice first, but the site places it between ch10 and
+    // ch11. Reversed-DOM display should yield [ch10, notice, ch11].
+    let chapters = vec![
+        SourceChapterRef {
+            source_chapter_key: "ch11".into(),
+            number: 11,
+            title: Some("Ch.11 : Official".into()),
+            url: "https://x.example/foo/11".into(),
+        },
+        SourceChapterRef {
+            source_chapter_key: "notice".into(),
+            number: 0,
+            title: Some("notice. : Officials".into()),
+            url: "https://x.example/foo/notice".into(),
+        },
+        SourceChapterRef {
+            source_chapter_key: "ch10".into(),
+            number: 10,
+            title: Some("Ch.10 : Official".into()),
+            url: "https://x.example/foo/10".into(),
+        },
+    ];
+    crawler::sync_manga_chapters(&pool, "target", up.manga_id, &chapters)
+        .await
+        .unwrap();
+
+    let listed = chapter_repo::list_for_manga(&pool, up.manga_id, 50, 0)
+        .await
+        .unwrap();
+    let keys: Vec<String> = listed
+        .iter()
+        .map(|c| c.title.clone().unwrap_or_default())
+        .collect();
+    assert_eq!(
+        keys,
+        vec![
+            "Ch.10 : Official".to_string(),
+            "notice. : Officials".to_string(),
+            "Ch.11 : Official".to_string(),
+        ],
+        "list returns chapters in reversed source-DOM order, so the \
+         oldest appears first and non-numeric entries land where the \
+         site placed them",
+    );
+}
+
+#[sqlx::test(migrations = "./migrations")]
+async fn list_for_manga_places_null_source_index_last(pool: PgPool) {
+    crawler::ensure_source(&pool, "target", "T", "https://x.example")
+        .await
+        .unwrap();
+    let m = sample_manga("foo", "Foo Manga", "hash-1");
+    let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
+        .await
+        .unwrap();
+
+    // Crawled chapters get source_index 0 and 1; the upload path leaves
+    // it NULL. NULLS LAST plus the (number, created_at) tail means the
+    // upload sits after both crawled rows even though its number is in
+    // the middle.
+    let crawled = vec![
+        SourceChapterRef {
+            source_chapter_key: "a".into(),
+            number: 1,
+            title: Some("Ch.1".into()),
+            url: "https://x.example/foo/a".into(),
+        },
+        SourceChapterRef {
+            source_chapter_key: "b".into(),
+            number: 3,
+            title: Some("Ch.3".into()),
+            url: "https://x.example/foo/b".into(),
+        },
+    ];
+    crawler::sync_manga_chapters(&pool, "target", up.manga_id, &crawled)
+        .await
+        .unwrap();
+
+    chapter_repo::create(&pool, up.manga_id, 2, Some("User upload Ch.2"), None)
+        .await
+        .unwrap();
+
+    let listed = chapter_repo::list_for_manga(&pool, up.manga_id, 50, 0)
+        .await
+        .unwrap();
+    let titles: Vec<String> = listed
+        .iter()
+        .map(|c| c.title.clone().unwrap_or_default())
+        .collect();
+    assert_eq!(
+        titles,
+        vec![
+            "Ch.3".to_string(),
+            "Ch.1".to_string(),
+            "User upload Ch.2".to_string(),
+        ],
+        "crawled rows ordered by reversed source_index; user upload \
+         (NULL source_index) falls through to the end",
+    );
+}
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -1,6 +1,6 @@
 {
  "name": "mangalord-frontend",
-  "version": "0.51.2",
+  "version": "0.52.0",
  "private": true,
  "type": "module",
  "scripts": {
--- a/frontend/src/routes/manga/[id]/chapter/[chapter_id]/+page.svelte
+++ b/frontend/src/routes/manga/[id]/chapter/[chapter_id]/+page.svelte
@@ -33,22 +33,21 @@
    );

    // Prev/next chapter computed from the chapter list. listChapters
-    // returns chapters in number ASC order; we still resolve via find
-    // rather than index because the current chapter's position may
-    // not be `chapter.number - 1` (sparse numbering / chapter 0.5 /
-    // future skipped numbers).
-    const sortedChapters = $derived(
-        [...chapters].sort((a, b) => a.number - b.number)
-    );
+    // returns chapters in display order (reversed source-site order, so
+    // oldest first — see backend repo::chapter::list_for_manga), and
+    // prev/next walks that order positionally. Resolving the current
+    // index via `find` rather than `chapter.number - 1` matters because
+    // numbers aren't a reliable index: variants share numbers, non-
+    // numeric entries pin to 0, and uploads can sparse-fill.
    const currentIdx = $derived(
-        sortedChapters.findIndex((c) => c.id === chapter.id)
+        chapters.findIndex((c) => c.id === chapter.id)
    );
    const prevChapter = $derived(
-        currentIdx > 0 ? sortedChapters[currentIdx - 1] : null
+        currentIdx > 0 ? chapters[currentIdx - 1] : null
    );
    const nextChapter = $derived(
-        currentIdx >= 0 && currentIdx < sortedChapters.length - 1
-            ? sortedChapters[currentIdx + 1]
+        currentIdx >= 0 && currentIdx < chapters.length - 1
+            ? chapters[currentIdx + 1]
            : null
    );

@@ -471,7 +470,7 @@
                }}
                data-testid="reader-chapter-select"
            >
-                {#each sortedChapters as c (c.id)}
+                {#each chapters as c (c.id)}
                    <option value={c.id}>
                        {chapterLabel(c)}
                    </option>