//! Integration tests for the per-source recovery flag: //! `mark_run_started` / `mark_run_completed` / `last_run_completed_cleanly` //! round-trip via the `crawler_state` table. //! //! End-to-end pipeline behavior (a crashed run forcing a recovery sweep //! on the next tick) requires a real `chromiumoxide::Browser` to drive //! the walker, so that path is covered by `crawler_browser_smoke.rs`. //! The pure stop-condition logic itself is unit-tested in //! `crawler::pipeline::tests`. use mangalord::repo::crawler; use sqlx::PgPool; #[sqlx::test(migrations = "./migrations")] async fn defaults_to_clean_when_no_marker(pool: PgPool) { // First-ever run semantics: absence of the key must NOT trigger a // recovery walk on a virgin DB. Treat missing as "previous run // completed cleanly" so the first tick can take the early-stop path. crawler::ensure_source(&pool, "target", "T", "https://x.example") .await .unwrap(); let clean = crawler::last_run_completed_cleanly(&pool, "target") .await .unwrap(); assert!(clean, "absent marker must read as clean"); } #[sqlx::test(migrations = "./migrations")] async fn mark_run_started_flips_to_false(pool: PgPool) { crawler::ensure_source(&pool, "target", "T", "https://x.example") .await .unwrap(); crawler::mark_run_started(&pool, "target").await.unwrap(); let clean = crawler::last_run_completed_cleanly(&pool, "target") .await .unwrap(); assert!(!clean, "after mark_run_started, flag must read false"); } #[sqlx::test(migrations = "./migrations")] async fn started_then_completed_round_trips_to_clean(pool: PgPool) { // Steady-state: a run starts (flag → false) and exits cleanly // (flag → true). The next tick should see "clean" and apply the // normal stop condition. crawler::ensure_source(&pool, "target", "T", "https://x.example") .await .unwrap(); crawler::mark_run_started(&pool, "target").await.unwrap(); crawler::mark_run_completed(&pool, "target").await.unwrap(); let clean = crawler::last_run_completed_cleanly(&pool, "target") .await .unwrap(); assert!( clean, "after start → complete the flag must round-trip to clean" ); } #[sqlx::test(migrations = "./migrations")] async fn flag_is_per_source(pool: PgPool) { // Two sources, only one is mid-run. The other must still report // clean — the crawler_state key is namespaced by source_id. crawler::ensure_source(&pool, "target", "T", "https://x.example") .await .unwrap(); crawler::ensure_source(&pool, "other", "O", "https://y.example") .await .unwrap(); crawler::mark_run_started(&pool, "target").await.unwrap(); assert!( !crawler::last_run_completed_cleanly(&pool, "target") .await .unwrap(), "target is mid-run" ); assert!( crawler::last_run_completed_cleanly(&pool, "other") .await .unwrap(), "other source is untouched and reads clean" ); }