diff --git a/backend/Cargo.lock b/backend/Cargo.lock index 08bfdd7..4b3ce7b 100644 --- a/backend/Cargo.lock +++ b/backend/Cargo.lock @@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" [[package]] name = "mangalord" -version = "0.35.4" +version = "0.35.5" dependencies = [ "anyhow", "argon2", diff --git a/backend/Cargo.toml b/backend/Cargo.toml index d23fdfd..41bc9d0 100644 --- a/backend/Cargo.toml +++ b/backend/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mangalord" -version = "0.35.4" +version = "0.35.5" edition = "2021" default-run = "mangalord" diff --git a/backend/migrations/0017_chapter_sources_per_manga.sql b/backend/migrations/0017_chapter_sources_per_manga.sql new file mode 100644 index 0000000..140997b --- /dev/null +++ b/backend/migrations/0017_chapter_sources_per_manga.sql @@ -0,0 +1,20 @@ +-- chapter_sources: drop the global (source_id, source_chapter_key) PK +-- and rekey on (source_id, chapter_id). +-- +-- The old PK assumed chapter slugs are unique per source. Sources whose +-- chapter naming is per-manga (chapter-1, chapter-2, ...) instead of per- +-- catalog (br_chapter-379272 with a global counter) would collide on the +-- second manga: the INSERT would conflict on (source_id, "chapter-1") and +-- the lookup would attribute the row to the first manga's chapter_id. +-- +-- The new key is the natural identity of a source attachment: "this source +-- has this chapter". An (source_id, source_chapter_key) index preserves +-- the lookup path (find existing source row by source's identifier) but +-- no longer enforces uniqueness — the application combines it with the +-- chapters table's manga_id to scope the lookup per-manga. + +ALTER TABLE chapter_sources DROP CONSTRAINT chapter_sources_pkey; +ALTER TABLE chapter_sources ADD PRIMARY KEY (source_id, chapter_id); + +CREATE INDEX chapter_sources_source_key_idx + ON chapter_sources (source_id, source_chapter_key); diff --git a/backend/src/repo/crawler.rs b/backend/src/repo/crawler.rs index eeb5bfd..cfca00c 100644 --- a/backend/src/repo/crawler.rs +++ b/backend/src/repo/crawler.rs @@ -335,11 +335,23 @@ pub async fn sync_manga_chapters( .collect(); for c in chapters { + // Lookup is constrained by manga_id (via the chapters join) so a + // source whose chapter slugs collide across mangas (e.g. + // "chapter-1" appearing under two different mangas) attributes + // each row to the correct manga. Migration 0017 dropped the + // (source_id, source_chapter_key) PK in favour of + // (source_id, chapter_id) for exactly this reason. let existing: Option<(Uuid,)> = sqlx::query_as( - "SELECT chapter_id FROM chapter_sources WHERE source_id = $1 AND source_chapter_key = $2", + "SELECT cs.chapter_id \ + FROM chapter_sources cs \ + JOIN chapters ch ON ch.id = cs.chapter_id \ + WHERE cs.source_id = $1 \ + AND cs.source_chapter_key = $2 \ + AND ch.manga_id = $3", ) .bind(source_id) .bind(&c.source_chapter_key) + .bind(manga_id) .fetch_optional(&mut *tx) .await?; @@ -383,16 +395,19 @@ pub async fn sync_manga_chapters( .bind(chapter_id) .execute(&mut *tx) .await?; + // chapter_id is now the natural per-(source, chapter) + // identifier — use it directly instead of re-keying on + // (source_id, source_chapter_key) which may not be unique. sqlx::query( r#" UPDATE chapter_sources SET source_url = $1, last_seen_at = NOW(), dropped_at = NULL - WHERE source_id = $2 AND source_chapter_key = $3 + WHERE source_id = $2 AND chapter_id = $3 "#, ) .bind(&c.url) .bind(source_id) - .bind(&c.source_chapter_key) + .bind(chapter_id) .execute(&mut *tx) .await?; diff.refreshed += 1; diff --git a/backend/tests/crawler_sync.rs b/backend/tests/crawler_sync.rs index d6f7df2..725f2af 100644 --- a/backend/tests/crawler_sync.rs +++ b/backend/tests/crawler_sync.rs @@ -308,6 +308,121 @@ async fn sync_chapters_keeps_duplicate_numbered_chapters_as_separate_rows(pool: assert_eq!(ch52_count.0, 2, "both Ch.52 uploads survive as separate rows"); } +#[sqlx::test(migrations = "./migrations")] +async fn sync_chapters_isolates_colliding_keys_across_mangas(pool: PgPool) { + // Two mangas, both with a chapter whose source_chapter_key is + // "chapter-1". Pre-migration-0017 the PK enforced (source_id, + // source_chapter_key) globally and the lookup didn't filter by + // manga_id, so the second manga's sync would adopt the first manga's + // chapter_id (silent attribution corruption). After 0017 each manga + // owns its own row. + crawler::ensure_source(&pool, "target", "T", "https://x.example") + .await + .unwrap(); + let m1 = sample_manga("foo", "Manga Foo", "hash-foo"); + let m2 = sample_manga("bar", "Manga Bar", "hash-bar"); + let up1 = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m1) + .await + .unwrap(); + let up2 = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/bar", &m2) + .await + .unwrap(); + assert_ne!(up1.manga_id, up2.manga_id); + + let shared = vec![SourceChapterRef { + source_chapter_key: "chapter-1".into(), + number: 1, + title: Some("Ch.1".into()), + url: "https://x.example/foo/chapter-1/".into(), + }]; + let diff1 = crawler::sync_manga_chapters(&pool, "target", up1.manga_id, &shared) + .await + .unwrap(); + assert_eq!(diff1.new, 1, "manga foo: chapter inserted fresh"); + + // Manga bar now syncs *the same key*. Under the old schema this would + // either fail on PK conflict or attribute the chapter to foo. Under + // the new schema bar gets its own chapter row. + let bar_chapters = vec![SourceChapterRef { + source_chapter_key: "chapter-1".into(), + number: 1, + title: Some("Ch.1 (bar)".into()), + url: "https://x.example/bar/chapter-1/".into(), + }]; + let diff2 = crawler::sync_manga_chapters(&pool, "target", up2.manga_id, &bar_chapters) + .await + .unwrap(); + assert_eq!( + diff2.new, 1, + "manga bar: same key resolved per-manga to a fresh row" + ); + + let foo_count: (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM chapters WHERE manga_id = $1", + ) + .bind(up1.manga_id) + .fetch_one(&pool) + .await + .unwrap(); + let bar_count: (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM chapters WHERE manga_id = $1", + ) + .bind(up2.manga_id) + .fetch_one(&pool) + .await + .unwrap(); + assert_eq!(foo_count.0, 1); + assert_eq!(bar_count.0, 1); + + let bar_title: (Option,) = sqlx::query_as( + "SELECT title FROM chapters WHERE manga_id = $1 AND number = 1", + ) + .bind(up2.manga_id) + .fetch_one(&pool) + .await + .unwrap(); + assert_eq!( + bar_title.0.as_deref(), + Some("Ch.1 (bar)"), + "bar's chapter has bar's title, not foo's" + ); + + // A subsequent re-sync of foo with the same key correctly refreshes + // foo's row, not bar's. + let foo_resync = vec![SourceChapterRef { + source_chapter_key: "chapter-1".into(), + number: 1, + title: Some("Ch.1 (foo updated)".into()), + url: "https://x.example/foo/chapter-1/".into(), + }]; + let diff_refresh = crawler::sync_manga_chapters(&pool, "target", up1.manga_id, &foo_resync) + .await + .unwrap(); + assert_eq!(diff_refresh.refreshed, 1); + assert_eq!(diff_refresh.new, 0); + + let foo_title: (Option,) = sqlx::query_as( + "SELECT title FROM chapters WHERE manga_id = $1 AND number = 1", + ) + .bind(up1.manga_id) + .fetch_one(&pool) + .await + .unwrap(); + assert_eq!(foo_title.0.as_deref(), Some("Ch.1 (foo updated)")); + let bar_title_after: (Option,) = sqlx::query_as( + "SELECT title FROM chapters WHERE manga_id = $1 AND number = 1", + ) + .bind(up2.manga_id) + .fetch_one(&pool) + .await + .unwrap(); + assert_eq!( + bar_title_after.0.as_deref(), + Some("Ch.1 (bar)"), + "bar's row is untouched by foo's refresh" + ); +} + #[sqlx::test(migrations = "./migrations")] async fn mark_dropped_mangas_only_drops_unseen(pool: PgPool) { crawler::ensure_source(&pool, "target", "T", "https://x.example") diff --git a/frontend/package.json b/frontend/package.json index 1aa4475..14ff174 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "mangalord-frontend", - "version": "0.35.4", + "version": "0.35.5", "private": true, "type": "module", "scripts": {