Files
Mangalord/backend/tests/api_admin_crawler.rs
MechaCat02 e02d125f51 feat(crawler): live cover + chapter-content observability with realtime page counts
Extends the live dashboard so an operator can see exactly what's being
fetched, in realtime:

- Chapters being crawled now are tracked in the status as `active_chapters`
  (manga title · ch.N) with a live page counter that climbs per stored page
  (set_chapter_pages, pushed via the existing watch→SSE). The dispatcher
  registers each via an RAII ChapterGuard (sync Mutex) that removes the
  entry on completion, panic, or timeout-drop — replacing the old per-worker
  slot model.
- Covers: status now carries the cover being fetched now (`current_cover`,
  set around download_and_store_cover in both the metadata pass and backfill)
  and a `covers_queued` backlog count; CoverBackfill phase gains index/total.
- Two paginated backlog endpoints (fetched on demand, auto-refreshed when the
  live counts change): GET /admin/crawler/active-jobs (which chapters of which
  mangas are queued/running) and GET /admin/crawler/covers (mangas missing a
  cover). repo: list_active_jobs, list_missing_cover_mangas, count_missing_covers.
- dispatch_target now also returns manga title + chapter number.

Frontend: the crawler page replaces the Workers table with an Active-chapters
table (live page bars), adds a current-cover line + covers-queued figure, and
two backlog sections (Queued chapters / Queued covers) with search + Pager,
auto-refetched via $effect on the live counts.

Tests: status guard/page + cover unit tests; repo list/count tests; endpoint
tests; frontend api tests. Version 0.53.1 -> 0.54.0.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-04 20:41:51 +02:00

345 lines
11 KiB
Rust

//! Integration tests for the admin crawler observability/control API.
//!
//! The default test harness wires `AppState.crawler = None` (no daemon),
//! so the *control* endpoints return 503 and the *read* endpoints that
//! work off the DB (status shell, dead-jobs list/requeue) still function.
//! This is exactly the production "daemon disabled" posture.
mod common;
use std::time::Duration;
use axum::http::StatusCode;
use axum::Router;
use http_body_util::BodyExt;
use serde_json::json;
use sqlx::PgPool;
use tower::ServiceExt;
use uuid::Uuid;
use common::{body_json, get, get_with_cookie, post_json_with_cookie, register_user, harness};
async fn seed_admin(pool: &PgPool, app: &Router) -> String {
let (username, cookie) = register_user(app).await;
let u = mangalord::repo::user::find_by_username(pool, &username)
.await
.unwrap()
.unwrap();
mangalord::repo::user::set_is_admin_unchecked(pool, u.id, true)
.await
.unwrap();
cookie
}
async fn seed_dead_job(pool: &PgPool, title: &str) -> Uuid {
let manga_id = Uuid::new_v4();
let chapter_id = Uuid::new_v4();
sqlx::query("INSERT INTO mangas (id, title) VALUES ($1, $2)")
.bind(manga_id)
.bind(title)
.execute(pool)
.await
.unwrap();
sqlx::query("INSERT INTO chapters (id, manga_id, number) VALUES ($1, $2, 1)")
.bind(chapter_id)
.bind(manga_id)
.execute(pool)
.await
.unwrap();
let job_id = Uuid::new_v4();
sqlx::query(
"INSERT INTO crawler_jobs (id, payload, state, attempts, last_error) \
VALUES ($1, $2, 'dead', 5, 'boom')",
)
.bind(job_id)
.bind(json!({
"kind": "sync_chapter_content",
"source_id": "target",
"chapter_id": chapter_id,
"source_chapter_key": "k",
}))
.execute(pool)
.await
.unwrap();
job_id
}
/// Seed a chapter-content job in a given state ('pending'/'running').
async fn seed_job(pool: &PgPool, title: &str, state: &str) {
let manga_id = Uuid::new_v4();
let chapter_id = Uuid::new_v4();
sqlx::query("INSERT INTO mangas (id, title) VALUES ($1, $2)")
.bind(manga_id)
.bind(title)
.execute(pool)
.await
.unwrap();
sqlx::query("INSERT INTO chapters (id, manga_id, number) VALUES ($1, $2, 1)")
.bind(chapter_id)
.bind(manga_id)
.execute(pool)
.await
.unwrap();
sqlx::query("INSERT INTO crawler_jobs (id, payload, state) VALUES ($1, $2, $3)")
.bind(Uuid::new_v4())
.bind(json!({
"kind": "sync_chapter_content",
"source_id": "target",
"chapter_id": chapter_id,
"source_chapter_key": "k",
}))
.bind(state)
.execute(pool)
.await
.unwrap();
}
/// Seed a manga with no cover + a live source row (queued for cover fetch).
async fn seed_missing_cover(pool: &PgPool, title: &str) {
let manga_id = Uuid::new_v4();
sqlx::query("INSERT INTO mangas (id, title, cover_image_path) VALUES ($1, $2, NULL)")
.bind(manga_id)
.bind(title)
.execute(pool)
.await
.unwrap();
sqlx::query("INSERT INTO sources (id, name, base_url) VALUES ('target','T','http://x') ON CONFLICT DO NOTHING")
.execute(pool)
.await
.unwrap();
sqlx::query(
"INSERT INTO manga_sources (source_id, source_manga_key, manga_id, source_url) \
VALUES ('target', $1, $2, 'http://x/m')",
)
.bind(format!("k-{manga_id}"))
.bind(manga_id)
.execute(pool)
.await
.unwrap();
}
#[sqlx::test(migrations = "./migrations")]
async fn active_jobs_and_covers_lists_over_http(pool: PgPool) {
seed_job(&pool, "Naruto", "pending").await;
seed_job(&pool, "Bleach", "running").await;
seed_missing_cover(&pool, "One Piece").await;
let h = harness(pool.clone());
let cookie = seed_admin(&pool, &h.app).await;
// Queued/active chapters.
let resp = h
.app
.clone()
.oneshot(get_with_cookie("/api/v1/admin/crawler/active-jobs", &cookie))
.await
.unwrap();
assert_eq!(resp.status(), StatusCode::OK);
let body = body_json(resp).await;
assert_eq!(body["page"]["total"], 2);
// Queued covers.
let resp = h
.app
.clone()
.oneshot(get_with_cookie("/api/v1/admin/crawler/covers", &cookie))
.await
.unwrap();
assert_eq!(resp.status(), StatusCode::OK);
let body = body_json(resp).await;
assert_eq!(body["page"]["total"], 1);
assert_eq!(body["items"][0]["manga_title"], "One Piece");
// Both are admin-gated.
let (_u, plain) = register_user(&h.app).await;
let resp = h
.app
.clone()
.oneshot(get_with_cookie("/api/v1/admin/crawler/active-jobs", &plain))
.await
.unwrap();
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
}
#[sqlx::test(migrations = "./migrations")]
async fn get_status_requires_admin(pool: PgPool) {
let h = harness(pool);
// Unauthenticated → 401.
let resp = h.app.clone().oneshot(get("/api/v1/admin/crawler")).await.unwrap();
assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
// Authenticated non-admin → 403.
let (_u, cookie) = register_user(&h.app).await;
let resp = h
.app
.clone()
.oneshot(get_with_cookie("/api/v1/admin/crawler", &cookie))
.await
.unwrap();
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
}
#[sqlx::test(migrations = "./migrations")]
async fn get_status_reports_disabled_daemon_with_queue_counts(pool: PgPool) {
seed_dead_job(&pool, "Naruto").await;
let h = harness(pool.clone());
let cookie = seed_admin(&pool, &h.app).await;
let resp = h
.app
.clone()
.oneshot(get_with_cookie("/api/v1/admin/crawler", &cookie))
.await
.unwrap();
assert_eq!(resp.status(), StatusCode::OK);
let body = body_json(resp).await;
assert_eq!(body["daemon"], "disabled");
assert_eq!(body["queue"]["dead"], 1);
assert_eq!(body["browser"], "down");
}
#[sqlx::test(migrations = "./migrations")]
async fn control_endpoints_return_503_when_daemon_disabled(pool: PgPool) {
let h = harness(pool.clone());
let cookie = seed_admin(&pool, &h.app).await;
for uri in [
"/api/v1/admin/crawler/run",
"/api/v1/admin/crawler/browser/restart",
"/api/v1/admin/crawler/session/clear-expired",
] {
let resp = h
.app
.clone()
.oneshot(post_json_with_cookie(uri, json!({}), &cookie))
.await
.unwrap();
assert_eq!(
resp.status(),
StatusCode::SERVICE_UNAVAILABLE,
"{uri} should be 503 when daemon disabled"
);
}
}
#[sqlx::test(migrations = "./migrations")]
async fn status_stream_requires_admin(pool: PgPool) {
let h = harness(pool);
// Unauthenticated → 401.
let resp = h
.app
.clone()
.oneshot(get("/api/v1/admin/crawler/stream"))
.await
.unwrap();
assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
// Non-admin → 403.
let (_u, cookie) = register_user(&h.app).await;
let resp = h
.app
.clone()
.oneshot(get_with_cookie("/api/v1/admin/crawler/stream", &cookie))
.await
.unwrap();
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
}
#[sqlx::test(migrations = "./migrations")]
async fn status_stream_emits_initial_event(pool: PgPool) {
let h = harness(pool.clone());
let cookie = seed_admin(&pool, &h.app).await;
let resp = h
.app
.clone()
.oneshot(get_with_cookie("/api/v1/admin/crawler/stream", &cookie))
.await
.unwrap();
assert_eq!(resp.status(), StatusCode::OK);
let ct = resp
.headers()
.get(axum::http::header::CONTENT_TYPE)
.and_then(|v| v.to_str().ok())
.unwrap_or_default()
.to_string();
assert!(ct.starts_with("text/event-stream"), "content-type was {ct:?}");
// Accumulate frames (the immediate snapshot may arrive split across
// frames) until the status payload appears, with an overall timeout so
// the never-ending stream can't hang the test.
let mut body = resp.into_body();
let mut acc = String::new();
let deadline = tokio::time::timeout(Duration::from_secs(5), async {
loop {
let Some(frame) = body.frame().await else { break };
if let Ok(data) = frame.expect("frame ok").into_data() {
acc.push_str(&String::from_utf8_lossy(&data));
if acc.contains("\"daemon\"") {
break;
}
}
}
})
.await;
assert!(deadline.is_ok(), "did not receive status within 5s; got: {acc:?}");
assert!(acc.contains("\"daemon\""), "missing status payload: {acc}");
assert!(acc.contains("status"), "missing SSE event name: {acc}");
}
#[sqlx::test(migrations = "./migrations")]
async fn mutating_endpoints_reject_non_admin(pool: PgPool) {
let h = harness(pool);
// A logged-in non-admin must be forbidden from a mutating endpoint.
let (_u, cookie) = register_user(&h.app).await;
let resp = h
.app
.clone()
.oneshot(post_json_with_cookie(
"/api/v1/admin/crawler/dead-jobs/requeue",
json!({ "scope": "all" }),
&cookie,
))
.await
.unwrap();
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
}
#[sqlx::test(migrations = "./migrations")]
async fn dead_jobs_list_and_requeue_over_http(pool: PgPool) {
let job_id = seed_dead_job(&pool, "Bleach").await;
let h = harness(pool.clone());
let cookie = seed_admin(&pool, &h.app).await;
// List.
let resp = h
.app
.clone()
.oneshot(get_with_cookie("/api/v1/admin/crawler/dead-jobs", &cookie))
.await
.unwrap();
assert_eq!(resp.status(), StatusCode::OK);
let body = body_json(resp).await;
assert_eq!(body["page"]["total"], 1);
assert_eq!(body["items"][0]["manga_title"], "Bleach");
// Requeue the single job.
let resp = h
.app
.clone()
.oneshot(post_json_with_cookie(
"/api/v1/admin/crawler/dead-jobs/requeue",
json!({ "scope": "job", "job_id": job_id }),
&cookie,
))
.await
.unwrap();
assert_eq!(resp.status(), StatusCode::OK);
let body = body_json(resp).await;
assert_eq!(body["requeued"], 1);
let state: String = sqlx::query_scalar("SELECT state FROM crawler_jobs WHERE id = $1")
.bind(job_id)
.fetch_one(&pool)
.await
.unwrap();
assert_eq!(state, "pending");
}