feat(crawler): live cover + chapter-content observability with realtime page counts
Extends the live dashboard so an operator can see exactly what's being fetched, in realtime: - Chapters being crawled now are tracked in the status as `active_chapters` (manga title · ch.N) with a live page counter that climbs per stored page (set_chapter_pages, pushed via the existing watch→SSE). The dispatcher registers each via an RAII ChapterGuard (sync Mutex) that removes the entry on completion, panic, or timeout-drop — replacing the old per-worker slot model. - Covers: status now carries the cover being fetched now (`current_cover`, set around download_and_store_cover in both the metadata pass and backfill) and a `covers_queued` backlog count; CoverBackfill phase gains index/total. - Two paginated backlog endpoints (fetched on demand, auto-refreshed when the live counts change): GET /admin/crawler/active-jobs (which chapters of which mangas are queued/running) and GET /admin/crawler/covers (mangas missing a cover). repo: list_active_jobs, list_missing_cover_mangas, count_missing_covers. - dispatch_target now also returns manga title + chapter number. Frontend: the crawler page replaces the Workers table with an Active-chapters table (live page bars), adds a current-cover line + covers-queued figure, and two backlog sections (Queued chapters / Queued covers) with search + Pager, auto-refetched via $effect on the live counts. Tests: status guard/page + cover unit tests; repo list/count tests; endpoint tests; frontend api tests. Version 0.53.1 -> 0.54.0. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -64,6 +64,102 @@ async fn seed_dead_job(pool: &PgPool, title: &str) -> Uuid {
|
||||
job_id
|
||||
}
|
||||
|
||||
/// Seed a chapter-content job in a given state ('pending'/'running').
|
||||
async fn seed_job(pool: &PgPool, title: &str, state: &str) {
|
||||
let manga_id = Uuid::new_v4();
|
||||
let chapter_id = Uuid::new_v4();
|
||||
sqlx::query("INSERT INTO mangas (id, title) VALUES ($1, $2)")
|
||||
.bind(manga_id)
|
||||
.bind(title)
|
||||
.execute(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query("INSERT INTO chapters (id, manga_id, number) VALUES ($1, $2, 1)")
|
||||
.bind(chapter_id)
|
||||
.bind(manga_id)
|
||||
.execute(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query("INSERT INTO crawler_jobs (id, payload, state) VALUES ($1, $2, $3)")
|
||||
.bind(Uuid::new_v4())
|
||||
.bind(json!({
|
||||
"kind": "sync_chapter_content",
|
||||
"source_id": "target",
|
||||
"chapter_id": chapter_id,
|
||||
"source_chapter_key": "k",
|
||||
}))
|
||||
.bind(state)
|
||||
.execute(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
/// Seed a manga with no cover + a live source row (queued for cover fetch).
|
||||
async fn seed_missing_cover(pool: &PgPool, title: &str) {
|
||||
let manga_id = Uuid::new_v4();
|
||||
sqlx::query("INSERT INTO mangas (id, title, cover_image_path) VALUES ($1, $2, NULL)")
|
||||
.bind(manga_id)
|
||||
.bind(title)
|
||||
.execute(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query("INSERT INTO sources (id, name, base_url) VALUES ('target','T','http://x') ON CONFLICT DO NOTHING")
|
||||
.execute(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query(
|
||||
"INSERT INTO manga_sources (source_id, source_manga_key, manga_id, source_url) \
|
||||
VALUES ('target', $1, $2, 'http://x/m')",
|
||||
)
|
||||
.bind(format!("k-{manga_id}"))
|
||||
.bind(manga_id)
|
||||
.execute(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn active_jobs_and_covers_lists_over_http(pool: PgPool) {
|
||||
seed_job(&pool, "Naruto", "pending").await;
|
||||
seed_job(&pool, "Bleach", "running").await;
|
||||
seed_missing_cover(&pool, "One Piece").await;
|
||||
let h = harness(pool.clone());
|
||||
let cookie = seed_admin(&pool, &h.app).await;
|
||||
|
||||
// Queued/active chapters.
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(get_with_cookie("/api/v1/admin/crawler/active-jobs", &cookie))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = body_json(resp).await;
|
||||
assert_eq!(body["page"]["total"], 2);
|
||||
|
||||
// Queued covers.
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(get_with_cookie("/api/v1/admin/crawler/covers", &cookie))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = body_json(resp).await;
|
||||
assert_eq!(body["page"]["total"], 1);
|
||||
assert_eq!(body["items"][0]["manga_title"], "One Piece");
|
||||
|
||||
// Both are admin-gated.
|
||||
let (_u, plain) = register_user(&h.app).await;
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(get_with_cookie("/api/v1/admin/crawler/active-jobs", &plain))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn get_status_requires_admin(pool: PgPool) {
|
||||
let h = harness(pool);
|
||||
|
||||
@@ -7,6 +7,32 @@ use serde_json::json;
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Seed a manga with no cover + a live source row (so it's "queued for a
|
||||
/// cover fetch"). Returns the manga id.
|
||||
async fn seed_missing_cover(pool: &PgPool, title: &str) -> Uuid {
|
||||
let manga_id = Uuid::new_v4();
|
||||
sqlx::query("INSERT INTO mangas (id, title, cover_image_path) VALUES ($1, $2, NULL)")
|
||||
.bind(manga_id)
|
||||
.bind(title)
|
||||
.execute(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query("INSERT INTO sources (id, name, base_url) VALUES ('target', 'T', 'http://x') ON CONFLICT DO NOTHING")
|
||||
.execute(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query(
|
||||
"INSERT INTO manga_sources (source_id, source_manga_key, manga_id, source_url) \
|
||||
VALUES ('target', $1, $2, 'http://x/m')",
|
||||
)
|
||||
.bind(format!("k-{manga_id}"))
|
||||
.bind(manga_id)
|
||||
.execute(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
manga_id
|
||||
}
|
||||
|
||||
/// Seed a manga + chapter and return their ids.
|
||||
async fn seed_chapter(pool: &PgPool, title: &str, number: i32) -> (Uuid, Uuid) {
|
||||
let manga_id = Uuid::new_v4();
|
||||
@@ -202,6 +228,66 @@ async fn requeue_with_two_dead_jobs_for_one_chapter_revives_one_not_500(pool: Pg
|
||||
}
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn list_active_jobs_returns_pending_and_running_running_first(pool: PgPool) {
|
||||
let (_m, c1) = seed_chapter(&pool, "Naruto", 700).await;
|
||||
let (_m2, c2) = seed_chapter(&pool, "Bleach", 10).await;
|
||||
insert_job(&pool, c1, "pending", 0).await;
|
||||
insert_job(&pool, c2, "running", 1).await;
|
||||
// A dead + a done job must NOT appear.
|
||||
let (_m3, c3) = seed_chapter(&pool, "Gone", 1).await;
|
||||
insert_job(&pool, c3, "dead", 5).await;
|
||||
|
||||
let (items, total) = crawler::list_active_jobs(&pool, None, 50, 0).await.unwrap();
|
||||
assert_eq!(total, 2);
|
||||
assert_eq!(items.len(), 2);
|
||||
// Running first.
|
||||
assert_eq!(items[0].state, "running");
|
||||
assert_eq!(items[0].manga_title.as_deref(), Some("Bleach"));
|
||||
assert_eq!(items[1].state, "pending");
|
||||
assert_eq!(items[1].chapter_number, Some(700));
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn list_active_jobs_filters_by_title(pool: PgPool) {
|
||||
let (_m, c1) = seed_chapter(&pool, "Naruto", 1).await;
|
||||
let (_m2, c2) = seed_chapter(&pool, "One Piece", 1).await;
|
||||
insert_job(&pool, c1, "pending", 0).await;
|
||||
insert_job(&pool, c2, "pending", 0).await;
|
||||
let (items, total) = crawler::list_active_jobs(&pool, Some("piece"), 50, 0)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(total, 1);
|
||||
assert_eq!(items[0].manga_title.as_deref(), Some("One Piece"));
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn missing_covers_count_and_list(pool: PgPool) {
|
||||
seed_missing_cover(&pool, "Naruto").await;
|
||||
seed_missing_cover(&pool, "Bleach").await;
|
||||
// A manga WITH a cover must not be counted.
|
||||
let with_cover = Uuid::new_v4();
|
||||
sqlx::query("INSERT INTO mangas (id, title, cover_image_path) VALUES ($1, 'Done', 'k.jpg')")
|
||||
.bind(with_cover)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(crawler::count_missing_covers(&pool).await.unwrap(), 2);
|
||||
|
||||
let (items, total) = crawler::list_missing_cover_mangas(&pool, None, 50, 0)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(total, 2);
|
||||
assert_eq!(items.len(), 2);
|
||||
|
||||
let (items, total) = crawler::list_missing_cover_mangas(&pool, Some("naru"), 50, 0)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(total, 1);
|
||||
assert_eq!(items[0].manga_title, "Naruto");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn job_state_counts_groups_by_state(pool: PgPool) {
|
||||
let (_m, c1) = seed_chapter(&pool, "A", 1).await;
|
||||
|
||||
@@ -109,7 +109,7 @@ async fn dispatch_target_prefers_most_recent_live_source(pool: PgPool) {
|
||||
seed_chapter_with_two_live_sources(&pool).await;
|
||||
|
||||
let row = dispatch_target(&pool, chapter_id).await.unwrap();
|
||||
let (_manga_id, source_url) =
|
||||
let (_manga_id, source_url, _title, _number) =
|
||||
row.expect("two live sources should yield a dispatch target");
|
||||
assert_eq!(
|
||||
source_url, new_url,
|
||||
@@ -133,7 +133,7 @@ async fn dispatch_target_skips_dropped_sources(pool: PgPool) {
|
||||
.unwrap();
|
||||
|
||||
let row = dispatch_target(&pool, chapter_id).await.unwrap();
|
||||
let (_manga_id, source_url) =
|
||||
let (_manga_id, source_url, _title, _number) =
|
||||
row.expect("a single live source should still yield a dispatch target");
|
||||
assert!(
|
||||
source_url != new_url,
|
||||
|
||||
Reference in New Issue
Block a user