//! Integration tests for `repo::crawler`. //! //! Each test runs against a fresh, migrated DB via `#[sqlx::test]`. //! `DATABASE_URL` must point to a Postgres where the test user can //! `CREATEDB`. use mangalord::crawler::source::{SourceChapterRef, SourceManga}; use mangalord::repo::crawler::{self, ChapterDiff, UpsertStatus}; use sqlx::PgPool; use uuid::Uuid; /// Helper to spin up a `SourceManga` fixture with a stable shape so /// each test can tweak just the fields it cares about. fn sample_manga(key: &str, title: &str, hash: &str) -> SourceManga { SourceManga { source_manga_key: key.to_string(), title: title.to_string(), alternative_titles: vec!["Alt 1".into()], authors: vec!["Author One".into()], // Action is in the seeded `genres` table; Fantasy is too. genres: vec!["Action".into(), "Fantasy".into()], tags: vec!["popular".into()], status: Some("ongoing".into()), summary: Some("Sample summary.".into()), cover_url: Some("/cover.jpg".into()), chapters: vec![], metadata_hash: hash.to_string(), } } #[sqlx::test(migrations = "./migrations")] async fn ensure_source_is_idempotent(pool: PgPool) { crawler::ensure_source(&pool, "target", "Target Site", "https://x.example") .await .unwrap(); crawler::ensure_source(&pool, "target", "Target Site v2", "https://x.example") .await .unwrap(); let count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM sources WHERE id = 'target'") .fetch_one(&pool) .await .unwrap(); assert_eq!(count.0, 1); let name: (String,) = sqlx::query_as("SELECT name FROM sources WHERE id = 'target'") .fetch_one(&pool) .await .unwrap(); assert_eq!(name.0, "Target Site v2", "name updates on re-call"); } #[sqlx::test(migrations = "./migrations")] async fn first_upsert_inserts_manga_and_links_metadata(pool: PgPool) { crawler::ensure_source(&pool, "target", "T", "https://x.example") .await .unwrap(); let m = sample_manga("foo", "Foo Manga", "hash-1"); let res = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m) .await .unwrap(); assert_eq!(res.status, UpsertStatus::New); // mangas row created let row: (String, String, Vec) = sqlx::query_as("SELECT title, status, alt_titles FROM mangas WHERE id = $1") .bind(res.manga_id) .fetch_one(&pool) .await .unwrap(); assert_eq!(row.0, "Foo Manga"); assert_eq!(row.1, "ongoing"); assert_eq!(row.2, vec!["Alt 1"]); // manga_sources row links the two let link: (String, Uuid, Option) = sqlx::query_as( "SELECT source_id, manga_id, metadata_hash FROM manga_sources WHERE source_manga_key = $1", ) .bind("foo") .fetch_one(&pool) .await .unwrap(); assert_eq!(link.0, "target"); assert_eq!(link.1, res.manga_id); assert_eq!(link.2.as_deref(), Some("hash-1")); // Authors, genres, tags M2M populated let n_authors: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM manga_authors WHERE manga_id = $1") .bind(res.manga_id) .fetch_one(&pool) .await .unwrap(); assert_eq!(n_authors.0, 1); let n_genres: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM manga_genres WHERE manga_id = $1") .bind(res.manga_id) .fetch_one(&pool) .await .unwrap(); assert_eq!(n_genres.0, 2, "Action + Fantasy"); let n_tags: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM manga_tags WHERE manga_id = $1") .bind(res.manga_id) .fetch_one(&pool) .await .unwrap(); assert_eq!(n_tags.0, 1); } #[sqlx::test(migrations = "./migrations")] async fn second_upsert_with_same_hash_reports_unchanged(pool: PgPool) { crawler::ensure_source(&pool, "target", "T", "https://x.example") .await .unwrap(); let m = sample_manga("foo", "Foo Manga", "hash-1"); let first = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m) .await .unwrap(); let second = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m) .await .unwrap(); assert_eq!(second.status, UpsertStatus::Unchanged); assert_eq!(second.manga_id, first.manga_id); } #[sqlx::test(migrations = "./migrations")] async fn upsert_with_changed_hash_updates_fields(pool: PgPool) { crawler::ensure_source(&pool, "target", "T", "https://x.example") .await .unwrap(); let mut m = sample_manga("foo", "Foo Manga", "hash-1"); let first = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m) .await .unwrap(); m.title = "Foo Manga (Revised)".into(); m.status = Some("completed".into()); m.metadata_hash = "hash-2".into(); let second = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m) .await .unwrap(); assert_eq!(second.status, UpsertStatus::Updated); assert_eq!(second.manga_id, first.manga_id); let row: (String, String) = sqlx::query_as("SELECT title, status FROM mangas WHERE id = $1") .bind(first.manga_id) .fetch_one(&pool) .await .unwrap(); assert_eq!(row.0, "Foo Manga (Revised)"); assert_eq!(row.1, "completed"); } #[sqlx::test(migrations = "./migrations")] async fn sync_chapters_adds_new_refreshes_existing_and_drops_vanished(pool: PgPool) { crawler::ensure_source(&pool, "target", "T", "https://x.example") .await .unwrap(); let m = sample_manga("foo", "Foo Manga", "hash-1"); let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m) .await .unwrap(); let initial = vec![ SourceChapterRef { source_chapter_key: "1".into(), number: 1, title: Some("Ch.1".into()), url: "https://x.example/foo/1".into(), }, SourceChapterRef { source_chapter_key: "2".into(), number: 2, title: Some("Ch.2".into()), url: "https://x.example/foo/2".into(), }, ]; let diff = crawler::sync_manga_chapters(&pool, "target", up.manga_id, &initial) .await .unwrap(); assert_eq!( diff, ChapterDiff { new: 2, refreshed: 0, dropped: 0 } ); // Second run: keep ch1, replace ch2 with ch3 — ch2 should be dropped. let second = vec![ SourceChapterRef { source_chapter_key: "1".into(), number: 1, title: Some("Ch.1 (renamed)".into()), url: "https://x.example/foo/1".into(), }, SourceChapterRef { source_chapter_key: "3".into(), number: 3, title: Some("Ch.3".into()), url: "https://x.example/foo/3".into(), }, ]; let diff = crawler::sync_manga_chapters(&pool, "target", up.manga_id, &second) .await .unwrap(); assert_eq!( diff, ChapterDiff { new: 1, refreshed: 1, dropped: 1 } ); // Renamed title propagated to chapters.title let title: (Option,) = sqlx::query_as("SELECT c.title FROM chapters c JOIN chapter_sources cs ON cs.chapter_id = c.id WHERE cs.source_chapter_key = '1'") .fetch_one(&pool) .await .unwrap(); assert_eq!(title.0.as_deref(), Some("Ch.1 (renamed)")); // Vanished chapter is soft-dropped (row still exists, dropped_at set). let dropped: (Option>,) = sqlx::query_as("SELECT dropped_at FROM chapter_sources WHERE source_chapter_key = '2'") .fetch_one(&pool) .await .unwrap(); assert!(dropped.0.is_some(), "ch2 should be soft-dropped"); } #[sqlx::test(migrations = "./migrations")] async fn mark_dropped_mangas_only_drops_unseen(pool: PgPool) { crawler::ensure_source(&pool, "target", "T", "https://x.example") .await .unwrap(); // Seed two mangas before "now" so a later run_started_at sees them as stale. let _ = crawler::upsert_manga_from_source( &pool, "target", "https://x.example/foo", &sample_manga("foo", "Foo", "hf"), ) .await .unwrap(); let _ = crawler::upsert_manga_from_source( &pool, "target", "https://x.example/bar", &sample_manga("bar", "Bar", "hb"), ) .await .unwrap(); // Now mark a new "run" beginning. Re-upsert only `foo` — `bar` // should be the one flagged dropped. let run_started = chrono::Utc::now(); // Sleep briefly so the second upsert's NOW() > run_started_at. tokio::time::sleep(std::time::Duration::from_millis(20)).await; let _ = crawler::upsert_manga_from_source( &pool, "target", "https://x.example/foo", &sample_manga("foo", "Foo", "hf"), ) .await .unwrap(); let n = crawler::mark_dropped_mangas(&pool, "target", run_started) .await .unwrap(); assert_eq!(n, 1, "only bar should have been dropped"); let foo_dropped: (Option>,) = sqlx::query_as("SELECT dropped_at FROM manga_sources WHERE source_manga_key = 'foo'") .fetch_one(&pool) .await .unwrap(); assert!(foo_dropped.0.is_none(), "foo seen this run, must not be dropped"); let bar_dropped: (Option>,) = sqlx::query_as("SELECT dropped_at FROM manga_sources WHERE source_manga_key = 'bar'") .fetch_one(&pool) .await .unwrap(); assert!(bar_dropped.0.is_some()); } #[sqlx::test(migrations = "./migrations")] async fn upsert_surfaces_cover_image_path_for_backfill_decisions(pool: PgPool) { crawler::ensure_source(&pool, "target", "T", "https://x.example") .await .unwrap(); let m = sample_manga("foo", "Foo", "h1"); // First upsert: row is brand new, no cover stored yet. let first = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m) .await .unwrap(); assert!(first.cover_image_path.is_none(), "new manga has no cover yet"); // Simulate cover landing in storage post-upsert. sqlx::query("UPDATE mangas SET cover_image_path = $1 WHERE id = $2") .bind("mangas/foo/cover.jpg") .bind(first.manga_id) .execute(&pool) .await .unwrap(); // Second upsert with same hash → Unchanged, but cover path is now // surfaced so the caller knows the backfill is done. let second = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m) .await .unwrap(); assert_eq!(second.status, UpsertStatus::Unchanged); assert_eq!( second.cover_image_path.as_deref(), Some("mangas/foo/cover.jpg") ); } #[sqlx::test(migrations = "./migrations")] async fn arbitrary_genres_from_source_get_inserted(pool: PgPool) { crawler::ensure_source(&pool, "target", "T", "https://x.example") .await .unwrap(); let mut m = sample_manga("foo", "Foo", "h"); // "Action" is seeded by migration 0009. "Webtoons" is not. m.genres = vec!["Action".into(), "Webtoons".into()]; let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m) .await .unwrap(); let n_genre_links: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM manga_genres WHERE manga_id = $1") .bind(up.manga_id) .fetch_one(&pool) .await .unwrap(); assert_eq!(n_genre_links.0, 2, "both seeded and source-added genres attach"); let webtoons: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM genres WHERE name = 'Webtoons'") .fetch_one(&pool) .await .unwrap(); assert_eq!(webtoons.0, 1, "non-seeded genre was inserted"); // Case-insensitive de-dup: a second sync with the genre re-cased // attaches the existing row, not a new one. let mut m2 = sample_manga("bar", "Bar", "h2"); m2.genres = vec!["webtoons".into()]; let _ = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/bar", &m2) .await .unwrap(); let webtoons_count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM genres WHERE lower(name) = 'webtoons'") .fetch_one(&pool) .await .unwrap(); assert_eq!(webtoons_count.0, 1, "case-insensitive lookup reuses the existing row"); } #[sqlx::test(migrations = "./migrations")] async fn re_appearing_manga_clears_dropped_at(pool: PgPool) { crawler::ensure_source(&pool, "target", "T", "https://x.example") .await .unwrap(); let m = sample_manga("foo", "Foo", "h1"); let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m) .await .unwrap(); // Drop it manually. sqlx::query( "UPDATE manga_sources SET dropped_at = NOW() WHERE source_manga_key = 'foo'", ) .execute(&pool) .await .unwrap(); // Re-upsert: the link should un-drop. let _ = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m) .await .unwrap(); let dropped: (Option>, Uuid) = sqlx::query_as( "SELECT dropped_at, manga_id FROM manga_sources WHERE source_manga_key = 'foo'", ) .fetch_one(&pool) .await .unwrap(); assert!(dropped.0.is_none()); assert_eq!(dropped.1, up.manga_id); }