Mangalord/backend/tests/crawler_recovery_flag.rs

//! Integration tests for the per-source recovery flag:
//! `mark_run_started` / `mark_run_completed` / `last_run_completed_cleanly`
//! round-trip via the `crawler_state` table.
//!
//! End-to-end pipeline behavior (a crashed run forcing a recovery sweep
//! on the next tick) requires a real `chromiumoxide::Browser` to drive
//! the walker, so that path is covered by `crawler_browser_smoke.rs`.
//! The pure stop-condition logic itself is unit-tested in
//! `crawler::pipeline::tests`.

use mangalord::repo::crawler;
use sqlx::PgPool;

#[sqlx::test(migrations = "./migrations")]
async fn defaults_to_clean_when_no_marker(pool: PgPool) {
    // First-ever run semantics: absence of the key must NOT trigger a
    // recovery walk on a virgin DB. Treat missing as "previous run
    // completed cleanly" so the first tick can take the early-stop path.
    crawler::ensure_source(&pool, "target", "T", "https://x.example")
        .await
        .unwrap();
    let clean = crawler::last_run_completed_cleanly(&pool, "target")
        .await
        .unwrap();
    assert!(clean, "absent marker must read as clean");
}

#[sqlx::test(migrations = "./migrations")]
async fn mark_run_started_flips_to_false(pool: PgPool) {
    crawler::ensure_source(&pool, "target", "T", "https://x.example")
        .await
        .unwrap();
    crawler::mark_run_started(&pool, "target").await.unwrap();
    let clean = crawler::last_run_completed_cleanly(&pool, "target")
        .await
        .unwrap();
    assert!(!clean, "after mark_run_started, flag must read false");
}

#[sqlx::test(migrations = "./migrations")]
async fn started_then_completed_round_trips_to_clean(pool: PgPool) {
    // Steady-state: a run starts (flag → false) and exits cleanly
    // (flag → true). The next tick should see "clean" and apply the
    // normal stop condition.
    crawler::ensure_source(&pool, "target", "T", "https://x.example")
        .await
        .unwrap();
    crawler::mark_run_started(&pool, "target").await.unwrap();
    crawler::mark_run_completed(&pool, "target").await.unwrap();
    let clean = crawler::last_run_completed_cleanly(&pool, "target")
        .await
        .unwrap();
    assert!(
        clean,
        "after start → complete the flag must round-trip to clean"
    );
}

#[sqlx::test(migrations = "./migrations")]
async fn flag_is_per_source(pool: PgPool) {
    // Two sources, only one is mid-run. The other must still report
    // clean — the crawler_state key is namespaced by source_id.
    crawler::ensure_source(&pool, "target", "T", "https://x.example")
        .await
        .unwrap();
    crawler::ensure_source(&pool, "other", "O", "https://y.example")
        .await
        .unwrap();
    crawler::mark_run_started(&pool, "target").await.unwrap();
    assert!(
        !crawler::last_run_completed_cleanly(&pool, "target")
            .await
            .unwrap(),
        "target is mid-run"
    );
    assert!(
        crawler::last_run_completed_cleanly(&pool, "other")
            .await
            .unwrap(),
        "other source is untouched and reads clean"
    );
}