//! Integration tests for the incremental-mode coordination state: //! `mark_seed_completed` / `seed_completed_at` round-trip via the //! `crawler_state` table. //! //! End-to-end pipeline behavior (walker + stop-on-Unchanged) requires //! a real `chromiumoxide::Browser` to construct a `FetchContext`, so //! the live integration of that path is covered by //! `crawler_browser_smoke.rs` instead. The pure stop logic itself is //! unit-tested in `crawler::pipeline::tests`. use chrono::Utc; use mangalord::repo::crawler; use sqlx::PgPool; #[sqlx::test(migrations = "./migrations")] async fn seed_completed_at_none_before_any_run(pool: PgPool) { crawler::ensure_source(&pool, "target", "T", "https://x.example") .await .unwrap(); let res = crawler::seed_completed_at(&pool, "target").await.unwrap(); assert!(res.is_none(), "fresh source has no seed marker"); } #[sqlx::test(migrations = "./migrations")] async fn mark_seed_completed_then_read_round_trips_timestamp(pool: PgPool) { crawler::ensure_source(&pool, "target", "T", "https://x.example") .await .unwrap(); let at = Utc::now(); crawler::mark_seed_completed(&pool, "target", at) .await .unwrap(); let read = crawler::seed_completed_at(&pool, "target") .await .unwrap() .expect("marker present after mark"); // RFC3339 round-trip is millisecond-precise on chrono::Utc; allow a // 1ms tolerance to absorb postgres jsonb whitespace canonicalization. let drift = (read - at).num_milliseconds().abs(); assert!(drift <= 1, "round-trip drift: {drift}ms (at={at}, read={read})"); } #[sqlx::test(migrations = "./migrations")] async fn mark_seed_completed_overwrites_previous_value(pool: PgPool) { crawler::ensure_source(&pool, "target", "T", "https://x.example") .await .unwrap(); let first = Utc::now() - chrono::Duration::hours(1); let second = Utc::now(); crawler::mark_seed_completed(&pool, "target", first) .await .unwrap(); crawler::mark_seed_completed(&pool, "target", second) .await .unwrap(); let read = crawler::seed_completed_at(&pool, "target") .await .unwrap() .expect("marker present"); let drift = (read - second).num_milliseconds().abs(); assert!(drift <= 1, "should reflect the latest mark, not the first"); } #[sqlx::test(migrations = "./migrations")] async fn seed_completed_is_per_source(pool: PgPool) { // Two sources, only one is marked complete. The other must still // report None — the key is namespaced by source_id. crawler::ensure_source(&pool, "target", "T", "https://x.example") .await .unwrap(); crawler::ensure_source(&pool, "other", "O", "https://y.example") .await .unwrap(); crawler::mark_seed_completed(&pool, "target", Utc::now()) .await .unwrap(); assert!(crawler::seed_completed_at(&pool, "target") .await .unwrap() .is_some()); assert!(crawler::seed_completed_at(&pool, "other") .await .unwrap() .is_none()); }