Mangalord/backend/tests/repo_chapter.rs

//! Integration tests for `repo::chapter` — focused on
//! `dispatch_target`, the resolver the daemon's chapter dispatcher
//! uses to look up the URL it needs to hand to
//! `content::sync_chapter_content`.
//!
//! The query must:
//! 1. Skip `chapter_sources` rows where `dropped_at IS NOT NULL` —
//!    otherwise a soft-dropped source URL is dispatched as if live and
//!    burns the chapter's retry budget against guaranteed transients.
//! 2. Order the remaining rows by `last_seen_at DESC` so the freshest
//!    surviving source is the one we'll fetch from.
//!
//! The fix lives in `backend/src/repo/chapter.rs:dispatch_target`. The
//! enqueue queries at `pipeline.rs:381` and `:435` already filter on
//! `cs.dropped_at IS NULL`; this brings the resolver into line.

use mangalord::crawler::source::{SourceChapterRef, SourceManga};
use mangalord::repo::{
    chapter::dispatch_target,
    crawler::{ensure_source, sync_manga_chapters, upsert_manga_from_source},
};
use sqlx::PgPool;
use uuid::Uuid;

fn sample_manga(key: &str, title: &str, hash: &str) -> SourceManga {
    SourceManga {
        source_manga_key: key.to_string(),
        title: title.to_string(),
        alternative_titles: vec![],
        authors: vec![],
        genres: vec![],
        tags: vec![],
        status: None,
        summary: None,
        cover_url: None,
        chapters: vec![],
        metadata_hash: hash.to_string(),
    }
}

/// Seed a manga with one chapter, plus a second `chapter_sources` row
/// pointing at the same chapter with a *newer* `last_seen_at` so the
/// `ORDER BY cs.last_seen_at DESC` branch of the fixed query can
/// distinguish "freshest live source" from "any live source."
async fn seed_chapter_with_two_live_sources(pool: &PgPool) -> (Uuid, String, String) {
    // Two distinct sources both pointing at the same chapter is the
    // realistic shape of the multi-source state — each source row is
    // keyed (source_id, chapter_id) after migration 0017.
    ensure_source(pool, "target", "T", "https://x.example")
        .await
        .unwrap();
    ensure_source(pool, "mirror", "Mirror", "https://m.example")
        .await
        .unwrap();
    let m = sample_manga("foo", "Foo Manga", "hash-1");
    let up = upsert_manga_from_source(pool, "target", "https://x.example/foo", &m)
        .await
        .unwrap();
    let initial = vec![SourceChapterRef {
        source_chapter_key: "1".into(),
        number: 1,
        title: Some("Ch.1".into()),
        url: "https://x.example/foo/1/old".into(),
    }];
    sync_manga_chapters(pool, "target", up.manga_id, &initial)
        .await
        .unwrap();

    let (chapter_id,): (Uuid,) = sqlx::query_as(
        "SELECT c.id FROM chapters c \
           JOIN chapter_sources cs ON cs.chapter_id = c.id \
          WHERE cs.source_chapter_key = '1' AND cs.source_id = 'target'",
    )
    .fetch_one(pool)
    .await
    .unwrap();

    let old_url = "https://x.example/foo/1/old".to_string();
    let new_url = "https://m.example/foo/1/mirror".to_string();
    // Backdate the existing (old/target) source row and add a fresher
    // row from the mirror source. The fix uses `last_seen_at DESC` to
    // break the tie deterministically.
    sqlx::query(
        "UPDATE chapter_sources \
            SET last_seen_at = NOW() - INTERVAL '2 days' \
          WHERE chapter_id = $1 AND source_id = 'target'",
    )
    .bind(chapter_id)
    .execute(pool)
    .await
    .unwrap();
    sqlx::query(
        "INSERT INTO chapter_sources \
            (source_id, chapter_id, source_chapter_key, source_url, last_seen_at) \
         VALUES ('mirror', $1, '1', $2, NOW())",
    )
    .bind(chapter_id)
    .bind(&new_url)
    .execute(pool)
    .await
    .unwrap();

    (chapter_id, old_url, new_url)
}

#[sqlx::test(migrations = "./migrations")]
async fn dispatch_target_prefers_most_recent_live_source(pool: PgPool) {
    let (chapter_id, _old_url, new_url) =
        seed_chapter_with_two_live_sources(&pool).await;

    let row = dispatch_target(&pool, chapter_id).await.unwrap();
    let (_manga_id, source_url, _title, _number) =
        row.expect("two live sources should yield a dispatch target");
    assert_eq!(
        source_url, new_url,
        "ORDER BY last_seen_at DESC LIMIT 1 must return the freshest source"
    );
}

#[sqlx::test(migrations = "./migrations")]
async fn dispatch_target_skips_dropped_sources(pool: PgPool) {
    let (chapter_id, _old_url, new_url) =
        seed_chapter_with_two_live_sources(&pool).await;

    // Soft-drop the fresher row. The dispatcher must now return the
    // *older* still-live row instead of the dropped one.
    sqlx::query(
        "UPDATE chapter_sources SET dropped_at = NOW() WHERE source_url = $1",
    )
    .bind(&new_url)
    .execute(&pool)
    .await
    .unwrap();

    let row = dispatch_target(&pool, chapter_id).await.unwrap();
    let (_manga_id, source_url, _title, _number) =
        row.expect("a single live source should still yield a dispatch target");
    assert!(
        source_url != new_url,
        "dispatch_target must not return a dropped source"
    );
}

#[sqlx::test(migrations = "./migrations")]
async fn dispatch_target_returns_none_when_only_dropped_sources_remain(
    pool: PgPool,
) {
    let (chapter_id, _old_url, _new_url) =
        seed_chapter_with_two_live_sources(&pool).await;

    sqlx::query("UPDATE chapter_sources SET dropped_at = NOW() WHERE chapter_id = $1")
        .bind(chapter_id)
        .execute(&pool)
        .await
        .unwrap();

    let row = dispatch_target(&pool, chapter_id).await.unwrap();
    assert!(
        row.is_none(),
        "every source is dropped — dispatch_target must return None"
    );
}