Files
Mangalord/backend/tests/crawler_dead_jobs.rs
MechaCat02 e02d125f51 feat(crawler): live cover + chapter-content observability with realtime page counts
Extends the live dashboard so an operator can see exactly what's being
fetched, in realtime:

- Chapters being crawled now are tracked in the status as `active_chapters`
  (manga title · ch.N) with a live page counter that climbs per stored page
  (set_chapter_pages, pushed via the existing watch→SSE). The dispatcher
  registers each via an RAII ChapterGuard (sync Mutex) that removes the
  entry on completion, panic, or timeout-drop — replacing the old per-worker
  slot model.
- Covers: status now carries the cover being fetched now (`current_cover`,
  set around download_and_store_cover in both the metadata pass and backfill)
  and a `covers_queued` backlog count; CoverBackfill phase gains index/total.
- Two paginated backlog endpoints (fetched on demand, auto-refreshed when the
  live counts change): GET /admin/crawler/active-jobs (which chapters of which
  mangas are queued/running) and GET /admin/crawler/covers (mangas missing a
  cover). repo: list_active_jobs, list_missing_cover_mangas, count_missing_covers.
- dispatch_target now also returns manga title + chapter number.

Frontend: the crawler page replaces the Workers table with an Active-chapters
table (live page bars), adds a current-cover line + covers-queued figure, and
two backlog sections (Queued chapters / Queued covers) with search + Pager,
auto-refetched via $effect on the live counts.

Tests: status guard/page + cover unit tests; repo list/count tests; endpoint
tests; frontend api tests. Version 0.53.1 -> 0.54.0.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-04 20:41:51 +02:00

305 lines
11 KiB
Rust

//! Integration tests for the dead-letter admin queries in
//! `repo::crawler`: listing dead jobs with manga/chapter context and the
//! scoped requeue (all / per-manga / single) used by the admin dashboard.
use mangalord::repo::crawler::{self, RequeueScope};
use serde_json::json;
use sqlx::PgPool;
use uuid::Uuid;
/// Seed a manga with no cover + a live source row (so it's "queued for a
/// cover fetch"). Returns the manga id.
async fn seed_missing_cover(pool: &PgPool, title: &str) -> Uuid {
let manga_id = Uuid::new_v4();
sqlx::query("INSERT INTO mangas (id, title, cover_image_path) VALUES ($1, $2, NULL)")
.bind(manga_id)
.bind(title)
.execute(pool)
.await
.unwrap();
sqlx::query("INSERT INTO sources (id, name, base_url) VALUES ('target', 'T', 'http://x') ON CONFLICT DO NOTHING")
.execute(pool)
.await
.unwrap();
sqlx::query(
"INSERT INTO manga_sources (source_id, source_manga_key, manga_id, source_url) \
VALUES ('target', $1, $2, 'http://x/m')",
)
.bind(format!("k-{manga_id}"))
.bind(manga_id)
.execute(pool)
.await
.unwrap();
manga_id
}
/// Seed a manga + chapter and return their ids.
async fn seed_chapter(pool: &PgPool, title: &str, number: i32) -> (Uuid, Uuid) {
let manga_id = Uuid::new_v4();
let chapter_id = Uuid::new_v4();
sqlx::query("INSERT INTO mangas (id, title) VALUES ($1, $2)")
.bind(manga_id)
.bind(title)
.execute(pool)
.await
.unwrap();
sqlx::query("INSERT INTO chapters (id, manga_id, number) VALUES ($1, $2, $3)")
.bind(chapter_id)
.bind(manga_id)
.bind(number)
.execute(pool)
.await
.unwrap();
(manga_id, chapter_id)
}
/// Insert a crawler_jobs row in a given state for a chapter-content job.
async fn insert_job(pool: &PgPool, chapter_id: Uuid, state: &str, attempts: i32) -> Uuid {
let id = Uuid::new_v4();
let payload = json!({
"kind": "sync_chapter_content",
"source_id": "target",
"chapter_id": chapter_id,
"source_chapter_key": "k",
});
sqlx::query(
"INSERT INTO crawler_jobs (id, payload, state, attempts, last_error) \
VALUES ($1, $2, $3, $4, 'boom')",
)
.bind(id)
.bind(payload)
.bind(state)
.bind(attempts)
.execute(pool)
.await
.unwrap();
id
}
async fn state_of(pool: &PgPool, id: Uuid) -> String {
sqlx::query_scalar::<_, String>("SELECT state FROM crawler_jobs WHERE id = $1")
.bind(id)
.fetch_one(pool)
.await
.unwrap()
}
#[sqlx::test(migrations = "./migrations")]
async fn list_dead_jobs_returns_context_and_total(pool: PgPool) {
let (_m, c1) = seed_chapter(&pool, "Naruto", 700).await;
insert_job(&pool, c1, "dead", 5).await;
// A non-dead job must not appear.
let (_m2, c2) = seed_chapter(&pool, "Bleach", 1).await;
insert_job(&pool, c2, "pending", 0).await;
let (items, total) = crawler::list_dead_jobs(&pool, None, 50, 0).await.unwrap();
assert_eq!(total, 1);
assert_eq!(items.len(), 1);
let row = &items[0];
assert_eq!(row.manga_title.as_deref(), Some("Naruto"));
assert_eq!(row.chapter_number, Some(700));
assert_eq!(row.attempts, 5);
assert_eq!(row.last_error.as_deref(), Some("boom"));
}
#[sqlx::test(migrations = "./migrations")]
async fn list_dead_jobs_filters_by_title_search(pool: PgPool) {
let (_m, c1) = seed_chapter(&pool, "Naruto", 700).await;
insert_job(&pool, c1, "dead", 5).await;
let (_m2, c2) = seed_chapter(&pool, "One Piece", 1).await;
insert_job(&pool, c2, "dead", 5).await;
let (items, total) = crawler::list_dead_jobs(&pool, Some("piece"), 50, 0)
.await
.unwrap();
assert_eq!(total, 1);
assert_eq!(items[0].manga_title.as_deref(), Some("One Piece"));
}
#[sqlx::test(migrations = "./migrations")]
async fn requeue_all_resets_dead_jobs_to_pending(pool: PgPool) {
let (_m, c1) = seed_chapter(&pool, "A", 1).await;
let (_m2, c2) = seed_chapter(&pool, "B", 1).await;
let j1 = insert_job(&pool, c1, "dead", 5).await;
let j2 = insert_job(&pool, c2, "dead", 5).await;
let n = crawler::requeue_dead_jobs(&pool, RequeueScope::All)
.await
.unwrap();
assert_eq!(n, 2);
assert_eq!(state_of(&pool, j1).await, "pending");
assert_eq!(state_of(&pool, j2).await, "pending");
let attempts: i32 = sqlx::query_scalar("SELECT attempts FROM crawler_jobs WHERE id = $1")
.bind(j1)
.fetch_one(&pool)
.await
.unwrap();
assert_eq!(attempts, 0, "attempts reset on requeue");
}
#[sqlx::test(migrations = "./migrations")]
async fn requeue_by_manga_scopes_to_that_manga(pool: PgPool) {
let (m1, c1) = seed_chapter(&pool, "A", 1).await;
let (_m2, c2) = seed_chapter(&pool, "B", 1).await;
let j1 = insert_job(&pool, c1, "dead", 5).await;
let j2 = insert_job(&pool, c2, "dead", 5).await;
let n = crawler::requeue_dead_jobs(&pool, RequeueScope::Manga(m1))
.await
.unwrap();
assert_eq!(n, 1);
assert_eq!(state_of(&pool, j1).await, "pending");
assert_eq!(state_of(&pool, j2).await, "dead", "other manga untouched");
}
#[sqlx::test(migrations = "./migrations")]
async fn requeue_by_chapter_scopes_to_that_chapter(pool: PgPool) {
let (_m, c1) = seed_chapter(&pool, "A", 1).await;
let (_m2, c2) = seed_chapter(&pool, "A", 2).await;
let j1 = insert_job(&pool, c1, "dead", 5).await;
let j2 = insert_job(&pool, c2, "dead", 5).await;
let n = crawler::requeue_dead_jobs(&pool, RequeueScope::Chapter(c1))
.await
.unwrap();
assert_eq!(n, 1);
assert_eq!(state_of(&pool, j1).await, "pending");
assert_eq!(state_of(&pool, j2).await, "dead", "other chapter untouched");
}
#[sqlx::test(migrations = "./migrations")]
async fn requeue_single_job(pool: PgPool) {
let (_m, c1) = seed_chapter(&pool, "A", 1).await;
let (_m2, c2) = seed_chapter(&pool, "B", 1).await;
let j1 = insert_job(&pool, c1, "dead", 5).await;
let j2 = insert_job(&pool, c2, "dead", 5).await;
let n = crawler::requeue_dead_jobs(&pool, RequeueScope::Job(j1))
.await
.unwrap();
assert_eq!(n, 1);
assert_eq!(state_of(&pool, j1).await, "pending");
assert_eq!(state_of(&pool, j2).await, "dead");
}
#[sqlx::test(migrations = "./migrations")]
async fn requeue_skips_dead_when_live_job_exists_for_same_chapter(pool: PgPool) {
let (_m, c1) = seed_chapter(&pool, "A", 1).await;
let dead = insert_job(&pool, c1, "dead", 5).await;
// A live pending job for the SAME chapter already exists.
insert_job(&pool, c1, "pending", 0).await;
let n = crawler::requeue_dead_jobs(&pool, RequeueScope::All)
.await
.unwrap();
assert_eq!(n, 0, "must not resurrect a dead job that has a live counterpart");
assert_eq!(state_of(&pool, dead).await, "dead");
}
#[sqlx::test(migrations = "./migrations")]
async fn requeue_with_two_dead_jobs_for_one_chapter_revives_one_not_500(pool: PgPool) {
// Regression: two dead jobs for the SAME chapter must not both flip to
// pending in one statement — that would violate the partial unique
// dedup index and abort the whole requeue.
let (manga_id, c1) = seed_chapter(&pool, "A", 1).await;
let older = insert_job(&pool, c1, "dead", 5).await;
let newer = insert_job(&pool, c1, "dead", 5).await;
// Make `newer` unambiguously newer.
sqlx::query("UPDATE crawler_jobs SET updated_at = now() - interval '1 hour' WHERE id = $1")
.bind(older)
.execute(&pool)
.await
.unwrap();
for scope in [RequeueScope::All, RequeueScope::Manga(manga_id), RequeueScope::Chapter(c1)] {
// Reset to two-dead before each scope variant.
sqlx::query("UPDATE crawler_jobs SET state = 'dead' WHERE id = ANY($1)")
.bind(vec![older, newer])
.execute(&pool)
.await
.unwrap();
let n = crawler::requeue_dead_jobs(&pool, scope)
.await
.expect("requeue must not error on duplicate dead jobs");
assert_eq!(n, 1, "exactly one dead job per chapter is revived");
// The newest one is the survivor; the other stays dead.
assert_eq!(state_of(&pool, newer).await, "pending");
assert_eq!(state_of(&pool, older).await, "dead");
}
}
#[sqlx::test(migrations = "./migrations")]
async fn list_active_jobs_returns_pending_and_running_running_first(pool: PgPool) {
let (_m, c1) = seed_chapter(&pool, "Naruto", 700).await;
let (_m2, c2) = seed_chapter(&pool, "Bleach", 10).await;
insert_job(&pool, c1, "pending", 0).await;
insert_job(&pool, c2, "running", 1).await;
// A dead + a done job must NOT appear.
let (_m3, c3) = seed_chapter(&pool, "Gone", 1).await;
insert_job(&pool, c3, "dead", 5).await;
let (items, total) = crawler::list_active_jobs(&pool, None, 50, 0).await.unwrap();
assert_eq!(total, 2);
assert_eq!(items.len(), 2);
// Running first.
assert_eq!(items[0].state, "running");
assert_eq!(items[0].manga_title.as_deref(), Some("Bleach"));
assert_eq!(items[1].state, "pending");
assert_eq!(items[1].chapter_number, Some(700));
}
#[sqlx::test(migrations = "./migrations")]
async fn list_active_jobs_filters_by_title(pool: PgPool) {
let (_m, c1) = seed_chapter(&pool, "Naruto", 1).await;
let (_m2, c2) = seed_chapter(&pool, "One Piece", 1).await;
insert_job(&pool, c1, "pending", 0).await;
insert_job(&pool, c2, "pending", 0).await;
let (items, total) = crawler::list_active_jobs(&pool, Some("piece"), 50, 0)
.await
.unwrap();
assert_eq!(total, 1);
assert_eq!(items[0].manga_title.as_deref(), Some("One Piece"));
}
#[sqlx::test(migrations = "./migrations")]
async fn missing_covers_count_and_list(pool: PgPool) {
seed_missing_cover(&pool, "Naruto").await;
seed_missing_cover(&pool, "Bleach").await;
// A manga WITH a cover must not be counted.
let with_cover = Uuid::new_v4();
sqlx::query("INSERT INTO mangas (id, title, cover_image_path) VALUES ($1, 'Done', 'k.jpg')")
.bind(with_cover)
.execute(&pool)
.await
.unwrap();
assert_eq!(crawler::count_missing_covers(&pool).await.unwrap(), 2);
let (items, total) = crawler::list_missing_cover_mangas(&pool, None, 50, 0)
.await
.unwrap();
assert_eq!(total, 2);
assert_eq!(items.len(), 2);
let (items, total) = crawler::list_missing_cover_mangas(&pool, Some("naru"), 50, 0)
.await
.unwrap();
assert_eq!(total, 1);
assert_eq!(items[0].manga_title, "Naruto");
}
#[sqlx::test(migrations = "./migrations")]
async fn job_state_counts_groups_by_state(pool: PgPool) {
let (_m, c1) = seed_chapter(&pool, "A", 1).await;
let (_m2, c2) = seed_chapter(&pool, "B", 1).await;
let (_m3, c3) = seed_chapter(&pool, "C", 1).await;
insert_job(&pool, c1, "pending", 0).await;
insert_job(&pool, c2, "dead", 5).await;
insert_job(&pool, c3, "dead", 5).await;
let (pending, running, dead) = crawler::job_state_counts(&pool).await.unwrap();
assert_eq!(pending, 1);
assert_eq!(running, 0);
assert_eq!(dead, 2);
}