feat(crawler): live status surface, runtime session, dead-job repo, auto-restart
Adds the in-process observability + control infrastructure the admin dashboard consumes: - status.rs: CrawlerStatus/Phase/WorkerState + StatusHandle. The daemon publishes its current phase (idle/walking/fetching-metadata/cover-backfill), per-worker activity, and last-pass summary. Wired through the cron, run_metadata_pass, and the worker loop. - session_control.rs: SessionController refreshes PHPSESSID at runtime — rewrites the shared reqwest cookie jar, updates the value on_launch reads, persists to crawler_state (survives restart), and clears the expired flag. on_launch now reads the live session instead of a startup snapshot. - RealChapterDispatcher auto-triggers a coordinated browser restart after CRAWLER_BROWSER_RESTART_THRESHOLD consecutive transient failures. - repo::crawler: list_dead_jobs, requeue_dead_jobs (all/manga/job, bypassing the quarantine, skipping live duplicates), job_state_counts. - AppState gains CrawlerControl bundling browser_manager + session + status + metadata_pass for the admin endpoints. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
171
backend/tests/crawler_dead_jobs.rs
Normal file
171
backend/tests/crawler_dead_jobs.rs
Normal file
@@ -0,0 +1,171 @@
|
||||
//! Integration tests for the dead-letter admin queries in
|
||||
//! `repo::crawler`: listing dead jobs with manga/chapter context and the
|
||||
//! scoped requeue (all / per-manga / single) used by the admin dashboard.
|
||||
|
||||
use mangalord::repo::crawler::{self, RequeueScope};
|
||||
use serde_json::json;
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Seed a manga + chapter and return their ids.
|
||||
async fn seed_chapter(pool: &PgPool, title: &str, number: i32) -> (Uuid, Uuid) {
|
||||
let manga_id = Uuid::new_v4();
|
||||
let chapter_id = Uuid::new_v4();
|
||||
sqlx::query("INSERT INTO mangas (id, title) VALUES ($1, $2)")
|
||||
.bind(manga_id)
|
||||
.bind(title)
|
||||
.execute(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query("INSERT INTO chapters (id, manga_id, number) VALUES ($1, $2, $3)")
|
||||
.bind(chapter_id)
|
||||
.bind(manga_id)
|
||||
.bind(number)
|
||||
.execute(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
(manga_id, chapter_id)
|
||||
}
|
||||
|
||||
/// Insert a crawler_jobs row in a given state for a chapter-content job.
|
||||
async fn insert_job(pool: &PgPool, chapter_id: Uuid, state: &str, attempts: i32) -> Uuid {
|
||||
let id = Uuid::new_v4();
|
||||
let payload = json!({
|
||||
"kind": "sync_chapter_content",
|
||||
"source_id": "target",
|
||||
"chapter_id": chapter_id,
|
||||
"source_chapter_key": "k",
|
||||
});
|
||||
sqlx::query(
|
||||
"INSERT INTO crawler_jobs (id, payload, state, attempts, last_error) \
|
||||
VALUES ($1, $2, $3, $4, 'boom')",
|
||||
)
|
||||
.bind(id)
|
||||
.bind(payload)
|
||||
.bind(state)
|
||||
.bind(attempts)
|
||||
.execute(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
id
|
||||
}
|
||||
|
||||
async fn state_of(pool: &PgPool, id: Uuid) -> String {
|
||||
sqlx::query_scalar::<_, String>("SELECT state FROM crawler_jobs WHERE id = $1")
|
||||
.bind(id)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn list_dead_jobs_returns_context_and_total(pool: PgPool) {
|
||||
let (_m, c1) = seed_chapter(&pool, "Naruto", 700).await;
|
||||
insert_job(&pool, c1, "dead", 5).await;
|
||||
// A non-dead job must not appear.
|
||||
let (_m2, c2) = seed_chapter(&pool, "Bleach", 1).await;
|
||||
insert_job(&pool, c2, "pending", 0).await;
|
||||
|
||||
let (items, total) = crawler::list_dead_jobs(&pool, None, 50, 0).await.unwrap();
|
||||
assert_eq!(total, 1);
|
||||
assert_eq!(items.len(), 1);
|
||||
let row = &items[0];
|
||||
assert_eq!(row.manga_title.as_deref(), Some("Naruto"));
|
||||
assert_eq!(row.chapter_number, Some(700));
|
||||
assert_eq!(row.attempts, 5);
|
||||
assert_eq!(row.last_error.as_deref(), Some("boom"));
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn list_dead_jobs_filters_by_title_search(pool: PgPool) {
|
||||
let (_m, c1) = seed_chapter(&pool, "Naruto", 700).await;
|
||||
insert_job(&pool, c1, "dead", 5).await;
|
||||
let (_m2, c2) = seed_chapter(&pool, "One Piece", 1).await;
|
||||
insert_job(&pool, c2, "dead", 5).await;
|
||||
|
||||
let (items, total) = crawler::list_dead_jobs(&pool, Some("piece"), 50, 0)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(total, 1);
|
||||
assert_eq!(items[0].manga_title.as_deref(), Some("One Piece"));
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn requeue_all_resets_dead_jobs_to_pending(pool: PgPool) {
|
||||
let (_m, c1) = seed_chapter(&pool, "A", 1).await;
|
||||
let (_m2, c2) = seed_chapter(&pool, "B", 1).await;
|
||||
let j1 = insert_job(&pool, c1, "dead", 5).await;
|
||||
let j2 = insert_job(&pool, c2, "dead", 5).await;
|
||||
|
||||
let n = crawler::requeue_dead_jobs(&pool, RequeueScope::All)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(n, 2);
|
||||
assert_eq!(state_of(&pool, j1).await, "pending");
|
||||
assert_eq!(state_of(&pool, j2).await, "pending");
|
||||
let attempts: i32 = sqlx::query_scalar("SELECT attempts FROM crawler_jobs WHERE id = $1")
|
||||
.bind(j1)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(attempts, 0, "attempts reset on requeue");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn requeue_by_manga_scopes_to_that_manga(pool: PgPool) {
|
||||
let (m1, c1) = seed_chapter(&pool, "A", 1).await;
|
||||
let (_m2, c2) = seed_chapter(&pool, "B", 1).await;
|
||||
let j1 = insert_job(&pool, c1, "dead", 5).await;
|
||||
let j2 = insert_job(&pool, c2, "dead", 5).await;
|
||||
|
||||
let n = crawler::requeue_dead_jobs(&pool, RequeueScope::Manga(m1))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(n, 1);
|
||||
assert_eq!(state_of(&pool, j1).await, "pending");
|
||||
assert_eq!(state_of(&pool, j2).await, "dead", "other manga untouched");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn requeue_single_job(pool: PgPool) {
|
||||
let (_m, c1) = seed_chapter(&pool, "A", 1).await;
|
||||
let (_m2, c2) = seed_chapter(&pool, "B", 1).await;
|
||||
let j1 = insert_job(&pool, c1, "dead", 5).await;
|
||||
let j2 = insert_job(&pool, c2, "dead", 5).await;
|
||||
|
||||
let n = crawler::requeue_dead_jobs(&pool, RequeueScope::Job(j1))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(n, 1);
|
||||
assert_eq!(state_of(&pool, j1).await, "pending");
|
||||
assert_eq!(state_of(&pool, j2).await, "dead");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn requeue_skips_dead_when_live_job_exists_for_same_chapter(pool: PgPool) {
|
||||
let (_m, c1) = seed_chapter(&pool, "A", 1).await;
|
||||
let dead = insert_job(&pool, c1, "dead", 5).await;
|
||||
// A live pending job for the SAME chapter already exists.
|
||||
insert_job(&pool, c1, "pending", 0).await;
|
||||
|
||||
let n = crawler::requeue_dead_jobs(&pool, RequeueScope::All)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(n, 0, "must not resurrect a dead job that has a live counterpart");
|
||||
assert_eq!(state_of(&pool, dead).await, "dead");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn job_state_counts_groups_by_state(pool: PgPool) {
|
||||
let (_m, c1) = seed_chapter(&pool, "A", 1).await;
|
||||
let (_m2, c2) = seed_chapter(&pool, "B", 1).await;
|
||||
let (_m3, c3) = seed_chapter(&pool, "C", 1).await;
|
||||
insert_job(&pool, c1, "pending", 0).await;
|
||||
insert_job(&pool, c2, "dead", 5).await;
|
||||
insert_job(&pool, c3, "dead", 5).await;
|
||||
|
||||
let (pending, running, dead) = crawler::job_state_counts(&pool).await.unwrap();
|
||||
assert_eq!(pending, 1);
|
||||
assert_eq!(running, 0);
|
||||
assert_eq!(dead, 2);
|
||||
}
|
||||
Reference in New Issue
Block a user