fix(crawler): scope chapter_sources lookup per-manga (0.35.5)

chapter_sources's PRIMARY KEY was (source_id, source_chapter_key) and the lookup in sync_manga_chapters didn't constrain by manga_id, so a source whose chapter slugs aren't globally unique (e.g. "chapter-1" appearing under multiple mangas) silently attributed every collision to the first manga that synced it. The INSERT path would have conflicted on the second manga's sync. Migration 0017 drops the old PK and rekeys on (source_id, chapter_id) — the natural identity of a per-source chapter attachment — and adds an index on (source_id, source_chapter_key) for the lookup path. The repo lookup now joins chapters and filters by manga_id; the UPDATE path keys on chapter_id directly (the row's natural identifier post-migration). Test sync_chapters_isolates_colliding_keys_across_mangas pins the contract end-to-end. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-29 20:43:08 +02:00
parent 50763addcf
commit c6bb9160e3
6 changed files with 156 additions and 6 deletions
--- a/backend/src/repo/crawler.rs
+++ b/backend/src/repo/crawler.rs
@@ -335,11 +335,23 @@ pub async fn sync_manga_chapters(
        .collect();

    for c in chapters {
+        // Lookup is constrained by manga_id (via the chapters join) so a
+        // source whose chapter slugs collide across mangas (e.g.
+        // "chapter-1" appearing under two different mangas) attributes
+        // each row to the correct manga. Migration 0017 dropped the
+        // (source_id, source_chapter_key) PK in favour of
+        // (source_id, chapter_id) for exactly this reason.
        let existing: Option<(Uuid,)> = sqlx::query_as(
-            "SELECT chapter_id FROM chapter_sources WHERE source_id = $1 AND source_chapter_key = $2",
+            "SELECT cs.chapter_id \
+               FROM chapter_sources cs \
+               JOIN chapters ch ON ch.id = cs.chapter_id \
+              WHERE cs.source_id = $1 \
+                AND cs.source_chapter_key = $2 \
+                AND ch.manga_id = $3",
        )
        .bind(source_id)
        .bind(&c.source_chapter_key)
+        .bind(manga_id)
        .fetch_optional(&mut *tx)
        .await?;

@@ -383,16 +395,19 @@ pub async fn sync_manga_chapters(
                    .bind(chapter_id)
                    .execute(&mut *tx)
                    .await?;
+                // chapter_id is now the natural per-(source, chapter)
+                // identifier — use it directly instead of re-keying on
+                // (source_id, source_chapter_key) which may not be unique.
                sqlx::query(
                    r#"
                    UPDATE chapter_sources
                       SET source_url = $1, last_seen_at = NOW(), dropped_at = NULL
-                     WHERE source_id = $2 AND source_chapter_key = $3
+                     WHERE source_id = $2 AND chapter_id = $3
                    "#,
                )
                .bind(&c.url)
                .bind(source_id)
-                .bind(&c.source_chapter_key)
+                .bind(chapter_id)
                .execute(&mut *tx)
                .await?;
                diff.refreshed += 1;