chapter_sources's PRIMARY KEY was (source_id, source_chapter_key) and the lookup in sync_manga_chapters didn't constrain by manga_id, so a source whose chapter slugs aren't globally unique (e.g. "chapter-1" appearing under multiple mangas) silently attributed every collision to the first manga that synced it. The INSERT path would have conflicted on the second manga's sync. Migration 0017 drops the old PK and rekeys on (source_id, chapter_id) — the natural identity of a per-source chapter attachment — and adds an index on (source_id, source_chapter_key) for the lookup path. The repo lookup now joins chapters and filters by manga_id; the UPDATE path keys on chapter_id directly (the row's natural identifier post-migration). Test sync_chapters_isolates_colliding_keys_across_mangas pins the contract end-to-end. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
753 lines
26 KiB
Rust
753 lines
26 KiB
Rust
//! Integration tests for `repo::crawler`.
|
|
//!
|
|
//! Each test runs against a fresh, migrated DB via `#[sqlx::test]`.
|
|
//! `DATABASE_URL` must point to a Postgres where the test user can
|
|
//! `CREATEDB`.
|
|
|
|
use mangalord::crawler::source::{SourceChapterRef, SourceManga};
|
|
use mangalord::repo::crawler::{self, ChapterDiff, UpsertStatus};
|
|
use sqlx::PgPool;
|
|
use uuid::Uuid;
|
|
|
|
/// Helper to spin up a `SourceManga` fixture with a stable shape so
|
|
/// each test can tweak just the fields it cares about.
|
|
fn sample_manga(key: &str, title: &str, hash: &str) -> SourceManga {
|
|
SourceManga {
|
|
source_manga_key: key.to_string(),
|
|
title: title.to_string(),
|
|
alternative_titles: vec!["Alt 1".into()],
|
|
authors: vec!["Author One".into()],
|
|
// Action is in the seeded `genres` table; Fantasy is too.
|
|
genres: vec!["Action".into(), "Fantasy".into()],
|
|
tags: vec!["popular".into()],
|
|
status: Some("ongoing".into()),
|
|
summary: Some("Sample summary.".into()),
|
|
cover_url: Some("/cover.jpg".into()),
|
|
chapters: vec![],
|
|
metadata_hash: hash.to_string(),
|
|
}
|
|
}
|
|
|
|
#[sqlx::test(migrations = "./migrations")]
|
|
async fn ensure_source_is_idempotent(pool: PgPool) {
|
|
crawler::ensure_source(&pool, "target", "Target Site", "https://x.example")
|
|
.await
|
|
.unwrap();
|
|
crawler::ensure_source(&pool, "target", "Target Site v2", "https://x.example")
|
|
.await
|
|
.unwrap();
|
|
let count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM sources WHERE id = 'target'")
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(count.0, 1);
|
|
let name: (String,) = sqlx::query_as("SELECT name FROM sources WHERE id = 'target'")
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(name.0, "Target Site v2", "name updates on re-call");
|
|
}
|
|
|
|
#[sqlx::test(migrations = "./migrations")]
|
|
async fn first_upsert_inserts_manga_and_links_metadata(pool: PgPool) {
|
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
|
.await
|
|
.unwrap();
|
|
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
|
|
|
let res = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(res.status, UpsertStatus::New);
|
|
|
|
// mangas row created
|
|
let row: (String, String, Vec<String>) =
|
|
sqlx::query_as("SELECT title, status, alt_titles FROM mangas WHERE id = $1")
|
|
.bind(res.manga_id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(row.0, "Foo Manga");
|
|
assert_eq!(row.1, "ongoing");
|
|
assert_eq!(row.2, vec!["Alt 1"]);
|
|
|
|
// manga_sources row links the two
|
|
let link: (String, Uuid, Option<String>) = sqlx::query_as(
|
|
"SELECT source_id, manga_id, metadata_hash FROM manga_sources WHERE source_manga_key = $1",
|
|
)
|
|
.bind("foo")
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(link.0, "target");
|
|
assert_eq!(link.1, res.manga_id);
|
|
assert_eq!(link.2.as_deref(), Some("hash-1"));
|
|
|
|
// Authors, genres, tags M2M populated
|
|
let n_authors: (i64,) =
|
|
sqlx::query_as("SELECT COUNT(*) FROM manga_authors WHERE manga_id = $1")
|
|
.bind(res.manga_id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(n_authors.0, 1);
|
|
let n_genres: (i64,) =
|
|
sqlx::query_as("SELECT COUNT(*) FROM manga_genres WHERE manga_id = $1")
|
|
.bind(res.manga_id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(n_genres.0, 2, "Action + Fantasy");
|
|
let n_tags: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM manga_tags WHERE manga_id = $1")
|
|
.bind(res.manga_id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(n_tags.0, 1);
|
|
}
|
|
|
|
#[sqlx::test(migrations = "./migrations")]
|
|
async fn second_upsert_with_same_hash_reports_unchanged(pool: PgPool) {
|
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
|
.await
|
|
.unwrap();
|
|
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
|
let first = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
|
.await
|
|
.unwrap();
|
|
let second = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(second.status, UpsertStatus::Unchanged);
|
|
assert_eq!(second.manga_id, first.manga_id);
|
|
}
|
|
|
|
#[sqlx::test(migrations = "./migrations")]
|
|
async fn upsert_with_changed_hash_updates_fields(pool: PgPool) {
|
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
|
.await
|
|
.unwrap();
|
|
let mut m = sample_manga("foo", "Foo Manga", "hash-1");
|
|
let first = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
|
.await
|
|
.unwrap();
|
|
|
|
m.title = "Foo Manga (Revised)".into();
|
|
m.status = Some("completed".into());
|
|
m.metadata_hash = "hash-2".into();
|
|
let second = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
|
.await
|
|
.unwrap();
|
|
|
|
assert_eq!(second.status, UpsertStatus::Updated);
|
|
assert_eq!(second.manga_id, first.manga_id);
|
|
|
|
let row: (String, String) =
|
|
sqlx::query_as("SELECT title, status FROM mangas WHERE id = $1")
|
|
.bind(first.manga_id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(row.0, "Foo Manga (Revised)");
|
|
assert_eq!(row.1, "completed");
|
|
}
|
|
|
|
#[sqlx::test(migrations = "./migrations")]
|
|
async fn sync_chapters_adds_new_refreshes_existing_and_drops_vanished(pool: PgPool) {
|
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
|
.await
|
|
.unwrap();
|
|
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
|
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
|
.await
|
|
.unwrap();
|
|
|
|
let initial = vec![
|
|
SourceChapterRef {
|
|
source_chapter_key: "1".into(),
|
|
number: 1,
|
|
title: Some("Ch.1".into()),
|
|
url: "https://x.example/foo/1".into(),
|
|
},
|
|
SourceChapterRef {
|
|
source_chapter_key: "2".into(),
|
|
number: 2,
|
|
title: Some("Ch.2".into()),
|
|
url: "https://x.example/foo/2".into(),
|
|
},
|
|
];
|
|
let diff = crawler::sync_manga_chapters(&pool, "target", up.manga_id, &initial)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(
|
|
diff,
|
|
ChapterDiff {
|
|
new: 2,
|
|
refreshed: 0,
|
|
dropped: 0
|
|
}
|
|
);
|
|
|
|
// Second run: keep ch1, replace ch2 with ch3 — ch2 should be dropped.
|
|
let second = vec![
|
|
SourceChapterRef {
|
|
source_chapter_key: "1".into(),
|
|
number: 1,
|
|
title: Some("Ch.1 (renamed)".into()),
|
|
url: "https://x.example/foo/1".into(),
|
|
},
|
|
SourceChapterRef {
|
|
source_chapter_key: "3".into(),
|
|
number: 3,
|
|
title: Some("Ch.3".into()),
|
|
url: "https://x.example/foo/3".into(),
|
|
},
|
|
];
|
|
let diff = crawler::sync_manga_chapters(&pool, "target", up.manga_id, &second)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(
|
|
diff,
|
|
ChapterDiff {
|
|
new: 1,
|
|
refreshed: 1,
|
|
dropped: 1
|
|
}
|
|
);
|
|
|
|
// Renamed title propagated to chapters.title
|
|
let title: (Option<String>,) =
|
|
sqlx::query_as("SELECT c.title FROM chapters c JOIN chapter_sources cs ON cs.chapter_id = c.id WHERE cs.source_chapter_key = '1'")
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(title.0.as_deref(), Some("Ch.1 (renamed)"));
|
|
|
|
// Vanished chapter is soft-dropped (row still exists, dropped_at set).
|
|
let dropped: (Option<chrono::DateTime<chrono::Utc>>,) =
|
|
sqlx::query_as("SELECT dropped_at FROM chapter_sources WHERE source_chapter_key = '2'")
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert!(dropped.0.is_some(), "ch2 should be soft-dropped");
|
|
}
|
|
|
|
/// Real-world sources publish multiple chapters at the same number
|
|
/// (different uploaders, translator notes, re-releases). After the
|
|
/// (manga_id, number) UNIQUE drop in 0013, each `SourceChapterRef`
|
|
/// becomes its own `chapters` row even when the parsed number matches
|
|
/// — chapter identity is now the chapter id, not the number.
|
|
#[sqlx::test(migrations = "./migrations")]
|
|
async fn sync_chapters_keeps_duplicate_numbered_chapters_as_separate_rows(pool: PgPool) {
|
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
|
.await
|
|
.unwrap();
|
|
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
|
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Two distinct uploads of Ch.52 (different uploaders → different
|
|
// URLs/keys, same parsed number) plus a notice/hiatus row that
|
|
// parses to number=0 alongside a real chapter at number 1.
|
|
let chapters = vec![
|
|
SourceChapterRef {
|
|
source_chapter_key: "br_chapter-A".into(),
|
|
number: 52,
|
|
title: Some("Ch.52 : Official".into()),
|
|
url: "https://x.example/foo/A/pg-1/".into(),
|
|
},
|
|
SourceChapterRef {
|
|
source_chapter_key: "br_chapter-B".into(),
|
|
number: 52,
|
|
title: Some("Ch.52 : Official (alt)".into()),
|
|
url: "https://x.example/foo/B/pg-1/".into(),
|
|
},
|
|
SourceChapterRef {
|
|
source_chapter_key: "br_chapter-NOTICE".into(),
|
|
number: 0,
|
|
title: Some("hitaus.".into()),
|
|
url: "https://x.example/foo/notice/pg-1/".into(),
|
|
},
|
|
SourceChapterRef {
|
|
source_chapter_key: "br_chapter-1".into(),
|
|
number: 1,
|
|
title: Some("Ch.1 : Official".into()),
|
|
url: "https://x.example/foo/1/pg-1/".into(),
|
|
},
|
|
];
|
|
|
|
let diff = crawler::sync_manga_chapters(&pool, "target", up.manga_id, &chapters)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(
|
|
diff,
|
|
ChapterDiff {
|
|
new: 4,
|
|
refreshed: 0,
|
|
dropped: 0
|
|
},
|
|
"every source ref yields a new chapter row"
|
|
);
|
|
|
|
let rows: (i64,) =
|
|
sqlx::query_as("SELECT COUNT(*) FROM chapters WHERE manga_id = $1")
|
|
.bind(up.manga_id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(rows.0, 4, "4 distinct chapter rows even with duplicate numbers");
|
|
|
|
let ch52_count: (i64,) = sqlx::query_as(
|
|
"SELECT COUNT(*) FROM chapters WHERE manga_id = $1 AND number = 52",
|
|
)
|
|
.bind(up.manga_id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(ch52_count.0, 2, "both Ch.52 uploads survive as separate rows");
|
|
}
|
|
|
|
#[sqlx::test(migrations = "./migrations")]
|
|
async fn sync_chapters_isolates_colliding_keys_across_mangas(pool: PgPool) {
|
|
// Two mangas, both with a chapter whose source_chapter_key is
|
|
// "chapter-1". Pre-migration-0017 the PK enforced (source_id,
|
|
// source_chapter_key) globally and the lookup didn't filter by
|
|
// manga_id, so the second manga's sync would adopt the first manga's
|
|
// chapter_id (silent attribution corruption). After 0017 each manga
|
|
// owns its own row.
|
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
|
.await
|
|
.unwrap();
|
|
let m1 = sample_manga("foo", "Manga Foo", "hash-foo");
|
|
let m2 = sample_manga("bar", "Manga Bar", "hash-bar");
|
|
let up1 = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m1)
|
|
.await
|
|
.unwrap();
|
|
let up2 = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/bar", &m2)
|
|
.await
|
|
.unwrap();
|
|
assert_ne!(up1.manga_id, up2.manga_id);
|
|
|
|
let shared = vec![SourceChapterRef {
|
|
source_chapter_key: "chapter-1".into(),
|
|
number: 1,
|
|
title: Some("Ch.1".into()),
|
|
url: "https://x.example/foo/chapter-1/".into(),
|
|
}];
|
|
let diff1 = crawler::sync_manga_chapters(&pool, "target", up1.manga_id, &shared)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(diff1.new, 1, "manga foo: chapter inserted fresh");
|
|
|
|
// Manga bar now syncs *the same key*. Under the old schema this would
|
|
// either fail on PK conflict or attribute the chapter to foo. Under
|
|
// the new schema bar gets its own chapter row.
|
|
let bar_chapters = vec![SourceChapterRef {
|
|
source_chapter_key: "chapter-1".into(),
|
|
number: 1,
|
|
title: Some("Ch.1 (bar)".into()),
|
|
url: "https://x.example/bar/chapter-1/".into(),
|
|
}];
|
|
let diff2 = crawler::sync_manga_chapters(&pool, "target", up2.manga_id, &bar_chapters)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(
|
|
diff2.new, 1,
|
|
"manga bar: same key resolved per-manga to a fresh row"
|
|
);
|
|
|
|
let foo_count: (i64,) = sqlx::query_as(
|
|
"SELECT COUNT(*) FROM chapters WHERE manga_id = $1",
|
|
)
|
|
.bind(up1.manga_id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
let bar_count: (i64,) = sqlx::query_as(
|
|
"SELECT COUNT(*) FROM chapters WHERE manga_id = $1",
|
|
)
|
|
.bind(up2.manga_id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(foo_count.0, 1);
|
|
assert_eq!(bar_count.0, 1);
|
|
|
|
let bar_title: (Option<String>,) = sqlx::query_as(
|
|
"SELECT title FROM chapters WHERE manga_id = $1 AND number = 1",
|
|
)
|
|
.bind(up2.manga_id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(
|
|
bar_title.0.as_deref(),
|
|
Some("Ch.1 (bar)"),
|
|
"bar's chapter has bar's title, not foo's"
|
|
);
|
|
|
|
// A subsequent re-sync of foo with the same key correctly refreshes
|
|
// foo's row, not bar's.
|
|
let foo_resync = vec![SourceChapterRef {
|
|
source_chapter_key: "chapter-1".into(),
|
|
number: 1,
|
|
title: Some("Ch.1 (foo updated)".into()),
|
|
url: "https://x.example/foo/chapter-1/".into(),
|
|
}];
|
|
let diff_refresh = crawler::sync_manga_chapters(&pool, "target", up1.manga_id, &foo_resync)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(diff_refresh.refreshed, 1);
|
|
assert_eq!(diff_refresh.new, 0);
|
|
|
|
let foo_title: (Option<String>,) = sqlx::query_as(
|
|
"SELECT title FROM chapters WHERE manga_id = $1 AND number = 1",
|
|
)
|
|
.bind(up1.manga_id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(foo_title.0.as_deref(), Some("Ch.1 (foo updated)"));
|
|
let bar_title_after: (Option<String>,) = sqlx::query_as(
|
|
"SELECT title FROM chapters WHERE manga_id = $1 AND number = 1",
|
|
)
|
|
.bind(up2.manga_id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(
|
|
bar_title_after.0.as_deref(),
|
|
Some("Ch.1 (bar)"),
|
|
"bar's row is untouched by foo's refresh"
|
|
);
|
|
}
|
|
|
|
#[sqlx::test(migrations = "./migrations")]
|
|
async fn mark_dropped_mangas_only_drops_unseen(pool: PgPool) {
|
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
|
.await
|
|
.unwrap();
|
|
// Seed two mangas before "now" so a later run_started_at sees them as stale.
|
|
let _ = crawler::upsert_manga_from_source(
|
|
&pool,
|
|
"target",
|
|
"https://x.example/foo",
|
|
&sample_manga("foo", "Foo", "hf"),
|
|
)
|
|
.await
|
|
.unwrap();
|
|
let _ = crawler::upsert_manga_from_source(
|
|
&pool,
|
|
"target",
|
|
"https://x.example/bar",
|
|
&sample_manga("bar", "Bar", "hb"),
|
|
)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Now mark a new "run" beginning. Re-upsert only `foo` — `bar`
|
|
// should be the one flagged dropped.
|
|
let run_started = chrono::Utc::now();
|
|
// Sleep briefly so the second upsert's NOW() > run_started_at.
|
|
tokio::time::sleep(std::time::Duration::from_millis(20)).await;
|
|
let _ = crawler::upsert_manga_from_source(
|
|
&pool,
|
|
"target",
|
|
"https://x.example/foo",
|
|
&sample_manga("foo", "Foo", "hf"),
|
|
)
|
|
.await
|
|
.unwrap();
|
|
|
|
let n = crawler::mark_dropped_mangas(&pool, "target", run_started)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(n, 1, "only bar should have been dropped");
|
|
|
|
let foo_dropped: (Option<chrono::DateTime<chrono::Utc>>,) =
|
|
sqlx::query_as("SELECT dropped_at FROM manga_sources WHERE source_manga_key = 'foo'")
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert!(foo_dropped.0.is_none(), "foo seen this run, must not be dropped");
|
|
let bar_dropped: (Option<chrono::DateTime<chrono::Utc>>,) =
|
|
sqlx::query_as("SELECT dropped_at FROM manga_sources WHERE source_manga_key = 'bar'")
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert!(bar_dropped.0.is_some());
|
|
}
|
|
|
|
#[sqlx::test(migrations = "./migrations")]
|
|
async fn upsert_surfaces_cover_image_path_for_backfill_decisions(pool: PgPool) {
|
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
|
.await
|
|
.unwrap();
|
|
let m = sample_manga("foo", "Foo", "h1");
|
|
|
|
// First upsert: row is brand new, no cover stored yet.
|
|
let first = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
|
.await
|
|
.unwrap();
|
|
assert!(first.cover_image_path.is_none(), "new manga has no cover yet");
|
|
|
|
// Simulate cover landing in storage post-upsert.
|
|
sqlx::query("UPDATE mangas SET cover_image_path = $1 WHERE id = $2")
|
|
.bind("mangas/foo/cover.jpg")
|
|
.bind(first.manga_id)
|
|
.execute(&pool)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Second upsert with same hash → Unchanged, but cover path is now
|
|
// surfaced so the caller knows the backfill is done.
|
|
let second = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(second.status, UpsertStatus::Unchanged);
|
|
assert_eq!(
|
|
second.cover_image_path.as_deref(),
|
|
Some("mangas/foo/cover.jpg")
|
|
);
|
|
}
|
|
|
|
#[sqlx::test(migrations = "./migrations")]
|
|
async fn arbitrary_genres_from_source_get_inserted(pool: PgPool) {
|
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
|
.await
|
|
.unwrap();
|
|
let mut m = sample_manga("foo", "Foo", "h");
|
|
// "Action" is seeded by migration 0009. "Webtoons" is not.
|
|
m.genres = vec!["Action".into(), "Webtoons".into()];
|
|
|
|
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
|
.await
|
|
.unwrap();
|
|
|
|
let n_genre_links: (i64,) =
|
|
sqlx::query_as("SELECT COUNT(*) FROM manga_genres WHERE manga_id = $1")
|
|
.bind(up.manga_id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(n_genre_links.0, 2, "both seeded and source-added genres attach");
|
|
|
|
let webtoons: (i64,) =
|
|
sqlx::query_as("SELECT COUNT(*) FROM genres WHERE name = 'Webtoons'")
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(webtoons.0, 1, "non-seeded genre was inserted");
|
|
|
|
// Case-insensitive de-dup: a second sync with the genre re-cased
|
|
// attaches the existing row, not a new one.
|
|
let mut m2 = sample_manga("bar", "Bar", "h2");
|
|
m2.genres = vec!["webtoons".into()];
|
|
let _ = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/bar", &m2)
|
|
.await
|
|
.unwrap();
|
|
let webtoons_count: (i64,) =
|
|
sqlx::query_as("SELECT COUNT(*) FROM genres WHERE lower(name) = 'webtoons'")
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(webtoons_count.0, 1, "case-insensitive lookup reuses the existing row");
|
|
}
|
|
|
|
/// User-attached tags (rows with non-NULL `added_by` in `manga_tags`)
|
|
/// must survive a crawler upsert. The crawler owns source-attached tags
|
|
/// (added_by IS NULL); user attachments are owned by the user who made
|
|
/// them and the recurring metadata pass must not delete them.
|
|
#[sqlx::test(migrations = "./migrations")]
|
|
async fn sync_tags_preserves_user_attached_tags(pool: PgPool) {
|
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
|
.await
|
|
.unwrap();
|
|
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
|
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
|
.await
|
|
.unwrap();
|
|
|
|
// A real user attaches a personal tag.
|
|
let user = mangalord::repo::user::create(&pool, "alice", "phc-stub")
|
|
.await
|
|
.unwrap();
|
|
let outcome = mangalord::repo::tag::attach_to_manga(&pool, up.manga_id, "personal", user.id)
|
|
.await
|
|
.unwrap();
|
|
assert!(outcome.created_attachment);
|
|
|
|
// Second crawler pass. Use a different metadata_hash so the upsert
|
|
// takes the Updated branch, but the bug also fires on Unchanged
|
|
// ticks since sync_tags runs unconditionally.
|
|
let mut m2 = m.clone();
|
|
m2.metadata_hash = "hash-2".into();
|
|
m2.tags = vec!["popular".into(), "weekly".into()];
|
|
let _ = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m2)
|
|
.await
|
|
.unwrap();
|
|
|
|
// The user tag must still be attached.
|
|
let user_tag_rows: (i64,) = sqlx::query_as(
|
|
"SELECT COUNT(*) FROM manga_tags mt \
|
|
JOIN tags t ON t.id = mt.tag_id \
|
|
WHERE mt.manga_id = $1 AND lower(t.name) = 'personal' \
|
|
AND mt.added_by = $2",
|
|
)
|
|
.bind(up.manga_id)
|
|
.bind(user.id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(
|
|
user_tag_rows.0, 1,
|
|
"user-attached tag must survive a crawler upsert"
|
|
);
|
|
|
|
// The source's tags should still attach as well, as crawler-owned.
|
|
let source_tag_rows: (i64,) = sqlx::query_as(
|
|
"SELECT COUNT(*) FROM manga_tags mt \
|
|
JOIN tags t ON t.id = mt.tag_id \
|
|
WHERE mt.manga_id = $1 \
|
|
AND mt.added_by IS NULL \
|
|
AND lower(t.name) IN ('popular', 'weekly')",
|
|
)
|
|
.bind(up.manga_id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(source_tag_rows.0, 2, "source tags re-attach on each pass");
|
|
|
|
// A subsequent pass where the source drops a previously-seen tag
|
|
// must clear that crawler-owned attachment (otherwise crawler-tags
|
|
// would only ever accumulate).
|
|
let mut m3 = m2.clone();
|
|
m3.metadata_hash = "hash-3".into();
|
|
m3.tags = vec!["popular".into()];
|
|
let _ = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m3)
|
|
.await
|
|
.unwrap();
|
|
let weekly_rows: (i64,) = sqlx::query_as(
|
|
"SELECT COUNT(*) FROM manga_tags mt \
|
|
JOIN tags t ON t.id = mt.tag_id \
|
|
WHERE mt.manga_id = $1 AND lower(t.name) = 'weekly'",
|
|
)
|
|
.bind(up.manga_id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(weekly_rows.0, 0, "source-owned tag dropped by source goes away");
|
|
|
|
// And the user tag still survives that third pass.
|
|
let user_tag_rows: (i64,) = sqlx::query_as(
|
|
"SELECT COUNT(*) FROM manga_tags mt \
|
|
JOIN tags t ON t.id = mt.tag_id \
|
|
WHERE mt.manga_id = $1 AND lower(t.name) = 'personal' \
|
|
AND mt.added_by = $2",
|
|
)
|
|
.bind(up.manga_id)
|
|
.bind(user.id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(user_tag_rows.0, 1);
|
|
}
|
|
|
|
/// `manga_tags.added_by` is `ON DELETE SET NULL` on the user FK. When
|
|
/// the attaching user is deleted, their attachments become orphans
|
|
/// indistinguishable from crawler-owned rows — and the crawler should
|
|
/// reap them on the next pass. Pins the semantic so a future change
|
|
/// can't quietly leave orphan rows lying around.
|
|
#[sqlx::test(migrations = "./migrations")]
|
|
async fn sync_tags_garbage_collects_orphan_user_attachments(pool: PgPool) {
|
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
|
.await
|
|
.unwrap();
|
|
let m = sample_manga("foo", "Foo", "hash-1");
|
|
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
|
.await
|
|
.unwrap();
|
|
|
|
// A user attaches "personal", then the user gets deleted. The
|
|
// attachment row stays (manga_tags.manga_id FK is CASCADE on
|
|
// mangas only; we never CASCADE-delete user attachments). The FK
|
|
// on added_by is `ON DELETE SET NULL`, so the row's owner column
|
|
// goes NULL — same shape as a crawler-owned row.
|
|
let user = mangalord::repo::user::create(&pool, "bob", "phc-stub")
|
|
.await
|
|
.unwrap();
|
|
let _ = mangalord::repo::tag::attach_to_manga(&pool, up.manga_id, "personal", user.id)
|
|
.await
|
|
.unwrap();
|
|
sqlx::query("DELETE FROM users WHERE id = $1")
|
|
.bind(user.id)
|
|
.execute(&pool)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Sanity: the orphan still exists post-user-delete with added_by NULL.
|
|
let (orphan_rows,): (i64,) = sqlx::query_as(
|
|
"SELECT COUNT(*) FROM manga_tags mt \
|
|
JOIN tags t ON t.id = mt.tag_id \
|
|
WHERE mt.manga_id = $1 AND lower(t.name) = 'personal' \
|
|
AND mt.added_by IS NULL",
|
|
)
|
|
.bind(up.manga_id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(orphan_rows, 1);
|
|
|
|
// Next crawler pass — orphan should be reaped along with any
|
|
// other source-owned rows that aren't in the new tag list.
|
|
let mut m2 = m.clone();
|
|
m2.metadata_hash = "hash-2".into();
|
|
m2.tags = vec!["popular".into()];
|
|
let _ = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m2)
|
|
.await
|
|
.unwrap();
|
|
let (orphan_rows,): (i64,) = sqlx::query_as(
|
|
"SELECT COUNT(*) FROM manga_tags mt \
|
|
JOIN tags t ON t.id = mt.tag_id \
|
|
WHERE mt.manga_id = $1 AND lower(t.name) = 'personal'",
|
|
)
|
|
.bind(up.manga_id)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert_eq!(orphan_rows, 0, "orphan user-attached tag should be reaped");
|
|
}
|
|
|
|
#[sqlx::test(migrations = "./migrations")]
|
|
async fn re_appearing_manga_clears_dropped_at(pool: PgPool) {
|
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
|
.await
|
|
.unwrap();
|
|
let m = sample_manga("foo", "Foo", "h1");
|
|
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Drop it manually.
|
|
sqlx::query(
|
|
"UPDATE manga_sources SET dropped_at = NOW() WHERE source_manga_key = 'foo'",
|
|
)
|
|
.execute(&pool)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Re-upsert: the link should un-drop.
|
|
let _ = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
|
.await
|
|
.unwrap();
|
|
let dropped: (Option<chrono::DateTime<chrono::Utc>>, Uuid) = sqlx::query_as(
|
|
"SELECT dropped_at, manga_id FROM manga_sources WHERE source_manga_key = 'foo'",
|
|
)
|
|
.fetch_one(&pool)
|
|
.await
|
|
.unwrap();
|
|
assert!(dropped.0.is_none());
|
|
assert_eq!(dropped.1, up.manga_id);
|
|
}
|