bugfix: preserve user-attached tags across crawler upserts (0.34.1)
repo::crawler::sync_tags ran an unconditional DELETE FROM manga_tags WHERE manga_id = $1, wiping every user attachment on every metadata pass (including Unchanged ticks). Scope the delete to crawler-owned rows (added_by IS NULL); user-attached tags now survive. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "mangalord"
|
||||
version = "0.34.0"
|
||||
version = "0.34.1"
|
||||
edition = "2021"
|
||||
default-run = "mangalord"
|
||||
|
||||
|
||||
@@ -274,7 +274,20 @@ async fn sync_tags(
|
||||
manga_id: Uuid,
|
||||
tags: &[String],
|
||||
) -> sqlx::Result<()> {
|
||||
sqlx::query("DELETE FROM manga_tags WHERE manga_id = $1")
|
||||
// Only clear crawler-owned attachments (added_by IS NULL). User-
|
||||
// attached tags are owned by the attaching user and must survive
|
||||
// the recurring metadata pass — see manga_tags.added_by in
|
||||
// migration 0009.
|
||||
//
|
||||
// Note on orphans: `manga_tags.added_by` is `ON DELETE SET NULL`,
|
||||
// so an attachment whose user was deleted becomes
|
||||
// indistinguishable from a crawler-owned row and is cleaned up
|
||||
// here. That mirrors how `api::mangas::detach_tag` already treats
|
||||
// orphans ("nobody owns it, refuse to let anyone but admin clear
|
||||
// them") — the crawler now becomes the eventual reaper. Tracked
|
||||
// by `sync_tags_garbage_collects_orphan_user_attachments` in
|
||||
// backend/tests/crawler_sync.rs.
|
||||
sqlx::query("DELETE FROM manga_tags WHERE manga_id = $1 AND added_by IS NULL")
|
||||
.bind(manga_id)
|
||||
.execute(&mut **tx)
|
||||
.await?;
|
||||
|
||||
@@ -440,6 +440,170 @@ async fn arbitrary_genres_from_source_get_inserted(pool: PgPool) {
|
||||
assert_eq!(webtoons_count.0, 1, "case-insensitive lookup reuses the existing row");
|
||||
}
|
||||
|
||||
/// User-attached tags (rows with non-NULL `added_by` in `manga_tags`)
|
||||
/// must survive a crawler upsert. The crawler owns source-attached tags
|
||||
/// (added_by IS NULL); user attachments are owned by the user who made
|
||||
/// them and the recurring metadata pass must not delete them.
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn sync_tags_preserves_user_attached_tags(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
||||
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// A real user attaches a personal tag.
|
||||
let user = mangalord::repo::user::create(&pool, "alice", "phc-stub")
|
||||
.await
|
||||
.unwrap();
|
||||
let outcome = mangalord::repo::tag::attach_to_manga(&pool, up.manga_id, "personal", user.id)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(outcome.created_attachment);
|
||||
|
||||
// Second crawler pass. Use a different metadata_hash so the upsert
|
||||
// takes the Updated branch, but the bug also fires on Unchanged
|
||||
// ticks since sync_tags runs unconditionally.
|
||||
let mut m2 = m.clone();
|
||||
m2.metadata_hash = "hash-2".into();
|
||||
m2.tags = vec!["popular".into(), "weekly".into()];
|
||||
let _ = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m2)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// The user tag must still be attached.
|
||||
let user_tag_rows: (i64,) = sqlx::query_as(
|
||||
"SELECT COUNT(*) FROM manga_tags mt \
|
||||
JOIN tags t ON t.id = mt.tag_id \
|
||||
WHERE mt.manga_id = $1 AND lower(t.name) = 'personal' \
|
||||
AND mt.added_by = $2",
|
||||
)
|
||||
.bind(up.manga_id)
|
||||
.bind(user.id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
user_tag_rows.0, 1,
|
||||
"user-attached tag must survive a crawler upsert"
|
||||
);
|
||||
|
||||
// The source's tags should still attach as well, as crawler-owned.
|
||||
let source_tag_rows: (i64,) = sqlx::query_as(
|
||||
"SELECT COUNT(*) FROM manga_tags mt \
|
||||
JOIN tags t ON t.id = mt.tag_id \
|
||||
WHERE mt.manga_id = $1 \
|
||||
AND mt.added_by IS NULL \
|
||||
AND lower(t.name) IN ('popular', 'weekly')",
|
||||
)
|
||||
.bind(up.manga_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(source_tag_rows.0, 2, "source tags re-attach on each pass");
|
||||
|
||||
// A subsequent pass where the source drops a previously-seen tag
|
||||
// must clear that crawler-owned attachment (otherwise crawler-tags
|
||||
// would only ever accumulate).
|
||||
let mut m3 = m2.clone();
|
||||
m3.metadata_hash = "hash-3".into();
|
||||
m3.tags = vec!["popular".into()];
|
||||
let _ = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m3)
|
||||
.await
|
||||
.unwrap();
|
||||
let weekly_rows: (i64,) = sqlx::query_as(
|
||||
"SELECT COUNT(*) FROM manga_tags mt \
|
||||
JOIN tags t ON t.id = mt.tag_id \
|
||||
WHERE mt.manga_id = $1 AND lower(t.name) = 'weekly'",
|
||||
)
|
||||
.bind(up.manga_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(weekly_rows.0, 0, "source-owned tag dropped by source goes away");
|
||||
|
||||
// And the user tag still survives that third pass.
|
||||
let user_tag_rows: (i64,) = sqlx::query_as(
|
||||
"SELECT COUNT(*) FROM manga_tags mt \
|
||||
JOIN tags t ON t.id = mt.tag_id \
|
||||
WHERE mt.manga_id = $1 AND lower(t.name) = 'personal' \
|
||||
AND mt.added_by = $2",
|
||||
)
|
||||
.bind(up.manga_id)
|
||||
.bind(user.id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(user_tag_rows.0, 1);
|
||||
}
|
||||
|
||||
/// `manga_tags.added_by` is `ON DELETE SET NULL` on the user FK. When
|
||||
/// the attaching user is deleted, their attachments become orphans
|
||||
/// indistinguishable from crawler-owned rows — and the crawler should
|
||||
/// reap them on the next pass. Pins the semantic so a future change
|
||||
/// can't quietly leave orphan rows lying around.
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn sync_tags_garbage_collects_orphan_user_attachments(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let m = sample_manga("foo", "Foo", "hash-1");
|
||||
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// A user attaches "personal", then the user gets deleted. The
|
||||
// attachment row stays (manga_tags.manga_id FK is CASCADE on
|
||||
// mangas only; we never CASCADE-delete user attachments). The FK
|
||||
// on added_by is `ON DELETE SET NULL`, so the row's owner column
|
||||
// goes NULL — same shape as a crawler-owned row.
|
||||
let user = mangalord::repo::user::create(&pool, "bob", "phc-stub")
|
||||
.await
|
||||
.unwrap();
|
||||
let _ = mangalord::repo::tag::attach_to_manga(&pool, up.manga_id, "personal", user.id)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query("DELETE FROM users WHERE id = $1")
|
||||
.bind(user.id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Sanity: the orphan still exists post-user-delete with added_by NULL.
|
||||
let (orphan_rows,): (i64,) = sqlx::query_as(
|
||||
"SELECT COUNT(*) FROM manga_tags mt \
|
||||
JOIN tags t ON t.id = mt.tag_id \
|
||||
WHERE mt.manga_id = $1 AND lower(t.name) = 'personal' \
|
||||
AND mt.added_by IS NULL",
|
||||
)
|
||||
.bind(up.manga_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(orphan_rows, 1);
|
||||
|
||||
// Next crawler pass — orphan should be reaped along with any
|
||||
// other source-owned rows that aren't in the new tag list.
|
||||
let mut m2 = m.clone();
|
||||
m2.metadata_hash = "hash-2".into();
|
||||
m2.tags = vec!["popular".into()];
|
||||
let _ = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m2)
|
||||
.await
|
||||
.unwrap();
|
||||
let (orphan_rows,): (i64,) = sqlx::query_as(
|
||||
"SELECT COUNT(*) FROM manga_tags mt \
|
||||
JOIN tags t ON t.id = mt.tag_id \
|
||||
WHERE mt.manga_id = $1 AND lower(t.name) = 'personal'",
|
||||
)
|
||||
.bind(up.manga_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(orphan_rows, 0, "orphan user-attached tag should be reaped");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn re_appearing_manga_clears_dropped_at(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
|
||||
Reference in New Issue
Block a user