From 79eb4d24779179a32da8fb2e80f32d48013e8e0a Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Thu, 28 May 2026 07:36:09 +0200 Subject: [PATCH] bugfix: preserve user-attached tags across crawler upserts (0.34.1) repo::crawler::sync_tags ran an unconditional DELETE FROM manga_tags WHERE manga_id = $1, wiping every user attachment on every metadata pass (including Unchanged ticks). Scope the delete to crawler-owned rows (added_by IS NULL); user-attached tags now survive. Co-Authored-By: Claude Opus 4.7 (1M context) --- backend/Cargo.toml | 2 +- backend/src/repo/crawler.rs | 15 +++- backend/tests/crawler_sync.rs | 164 ++++++++++++++++++++++++++++++++++ frontend/package.json | 2 +- 4 files changed, 180 insertions(+), 3 deletions(-) diff --git a/backend/Cargo.toml b/backend/Cargo.toml index c091570..a126262 100644 --- a/backend/Cargo.toml +++ b/backend/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mangalord" -version = "0.34.0" +version = "0.34.1" edition = "2021" default-run = "mangalord" diff --git a/backend/src/repo/crawler.rs b/backend/src/repo/crawler.rs index eb83877..eeb5bfd 100644 --- a/backend/src/repo/crawler.rs +++ b/backend/src/repo/crawler.rs @@ -274,7 +274,20 @@ async fn sync_tags( manga_id: Uuid, tags: &[String], ) -> sqlx::Result<()> { - sqlx::query("DELETE FROM manga_tags WHERE manga_id = $1") + // Only clear crawler-owned attachments (added_by IS NULL). User- + // attached tags are owned by the attaching user and must survive + // the recurring metadata pass — see manga_tags.added_by in + // migration 0009. + // + // Note on orphans: `manga_tags.added_by` is `ON DELETE SET NULL`, + // so an attachment whose user was deleted becomes + // indistinguishable from a crawler-owned row and is cleaned up + // here. That mirrors how `api::mangas::detach_tag` already treats + // orphans ("nobody owns it, refuse to let anyone but admin clear + // them") — the crawler now becomes the eventual reaper. Tracked + // by `sync_tags_garbage_collects_orphan_user_attachments` in + // backend/tests/crawler_sync.rs. + sqlx::query("DELETE FROM manga_tags WHERE manga_id = $1 AND added_by IS NULL") .bind(manga_id) .execute(&mut **tx) .await?; diff --git a/backend/tests/crawler_sync.rs b/backend/tests/crawler_sync.rs index 27f6f3b..d6f7df2 100644 --- a/backend/tests/crawler_sync.rs +++ b/backend/tests/crawler_sync.rs @@ -440,6 +440,170 @@ async fn arbitrary_genres_from_source_get_inserted(pool: PgPool) { assert_eq!(webtoons_count.0, 1, "case-insensitive lookup reuses the existing row"); } +/// User-attached tags (rows with non-NULL `added_by` in `manga_tags`) +/// must survive a crawler upsert. The crawler owns source-attached tags +/// (added_by IS NULL); user attachments are owned by the user who made +/// them and the recurring metadata pass must not delete them. +#[sqlx::test(migrations = "./migrations")] +async fn sync_tags_preserves_user_attached_tags(pool: PgPool) { + crawler::ensure_source(&pool, "target", "T", "https://x.example") + .await + .unwrap(); + let m = sample_manga("foo", "Foo Manga", "hash-1"); + let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m) + .await + .unwrap(); + + // A real user attaches a personal tag. + let user = mangalord::repo::user::create(&pool, "alice", "phc-stub") + .await + .unwrap(); + let outcome = mangalord::repo::tag::attach_to_manga(&pool, up.manga_id, "personal", user.id) + .await + .unwrap(); + assert!(outcome.created_attachment); + + // Second crawler pass. Use a different metadata_hash so the upsert + // takes the Updated branch, but the bug also fires on Unchanged + // ticks since sync_tags runs unconditionally. + let mut m2 = m.clone(); + m2.metadata_hash = "hash-2".into(); + m2.tags = vec!["popular".into(), "weekly".into()]; + let _ = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m2) + .await + .unwrap(); + + // The user tag must still be attached. + let user_tag_rows: (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM manga_tags mt \ + JOIN tags t ON t.id = mt.tag_id \ + WHERE mt.manga_id = $1 AND lower(t.name) = 'personal' \ + AND mt.added_by = $2", + ) + .bind(up.manga_id) + .bind(user.id) + .fetch_one(&pool) + .await + .unwrap(); + assert_eq!( + user_tag_rows.0, 1, + "user-attached tag must survive a crawler upsert" + ); + + // The source's tags should still attach as well, as crawler-owned. + let source_tag_rows: (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM manga_tags mt \ + JOIN tags t ON t.id = mt.tag_id \ + WHERE mt.manga_id = $1 \ + AND mt.added_by IS NULL \ + AND lower(t.name) IN ('popular', 'weekly')", + ) + .bind(up.manga_id) + .fetch_one(&pool) + .await + .unwrap(); + assert_eq!(source_tag_rows.0, 2, "source tags re-attach on each pass"); + + // A subsequent pass where the source drops a previously-seen tag + // must clear that crawler-owned attachment (otherwise crawler-tags + // would only ever accumulate). + let mut m3 = m2.clone(); + m3.metadata_hash = "hash-3".into(); + m3.tags = vec!["popular".into()]; + let _ = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m3) + .await + .unwrap(); + let weekly_rows: (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM manga_tags mt \ + JOIN tags t ON t.id = mt.tag_id \ + WHERE mt.manga_id = $1 AND lower(t.name) = 'weekly'", + ) + .bind(up.manga_id) + .fetch_one(&pool) + .await + .unwrap(); + assert_eq!(weekly_rows.0, 0, "source-owned tag dropped by source goes away"); + + // And the user tag still survives that third pass. + let user_tag_rows: (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM manga_tags mt \ + JOIN tags t ON t.id = mt.tag_id \ + WHERE mt.manga_id = $1 AND lower(t.name) = 'personal' \ + AND mt.added_by = $2", + ) + .bind(up.manga_id) + .bind(user.id) + .fetch_one(&pool) + .await + .unwrap(); + assert_eq!(user_tag_rows.0, 1); +} + +/// `manga_tags.added_by` is `ON DELETE SET NULL` on the user FK. When +/// the attaching user is deleted, their attachments become orphans +/// indistinguishable from crawler-owned rows — and the crawler should +/// reap them on the next pass. Pins the semantic so a future change +/// can't quietly leave orphan rows lying around. +#[sqlx::test(migrations = "./migrations")] +async fn sync_tags_garbage_collects_orphan_user_attachments(pool: PgPool) { + crawler::ensure_source(&pool, "target", "T", "https://x.example") + .await + .unwrap(); + let m = sample_manga("foo", "Foo", "hash-1"); + let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m) + .await + .unwrap(); + + // A user attaches "personal", then the user gets deleted. The + // attachment row stays (manga_tags.manga_id FK is CASCADE on + // mangas only; we never CASCADE-delete user attachments). The FK + // on added_by is `ON DELETE SET NULL`, so the row's owner column + // goes NULL — same shape as a crawler-owned row. + let user = mangalord::repo::user::create(&pool, "bob", "phc-stub") + .await + .unwrap(); + let _ = mangalord::repo::tag::attach_to_manga(&pool, up.manga_id, "personal", user.id) + .await + .unwrap(); + sqlx::query("DELETE FROM users WHERE id = $1") + .bind(user.id) + .execute(&pool) + .await + .unwrap(); + + // Sanity: the orphan still exists post-user-delete with added_by NULL. + let (orphan_rows,): (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM manga_tags mt \ + JOIN tags t ON t.id = mt.tag_id \ + WHERE mt.manga_id = $1 AND lower(t.name) = 'personal' \ + AND mt.added_by IS NULL", + ) + .bind(up.manga_id) + .fetch_one(&pool) + .await + .unwrap(); + assert_eq!(orphan_rows, 1); + + // Next crawler pass — orphan should be reaped along with any + // other source-owned rows that aren't in the new tag list. + let mut m2 = m.clone(); + m2.metadata_hash = "hash-2".into(); + m2.tags = vec!["popular".into()]; + let _ = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m2) + .await + .unwrap(); + let (orphan_rows,): (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM manga_tags mt \ + JOIN tags t ON t.id = mt.tag_id \ + WHERE mt.manga_id = $1 AND lower(t.name) = 'personal'", + ) + .bind(up.manga_id) + .fetch_one(&pool) + .await + .unwrap(); + assert_eq!(orphan_rows, 0, "orphan user-attached tag should be reaped"); +} + #[sqlx::test(migrations = "./migrations")] async fn re_appearing_manga_clears_dropped_at(pool: PgPool) { crawler::ensure_source(&pool, "target", "T", "https://x.example") diff --git a/frontend/package.json b/frontend/package.json index 159dad9..72a32cd 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "mangalord-frontend", - "version": "0.34.0", + "version": "0.34.1", "private": true, "type": "module", "scripts": {