feat(crawler): live status surface, runtime session, dead-job repo, auto-restart
Adds the in-process observability + control infrastructure the admin dashboard consumes: - status.rs: CrawlerStatus/Phase/WorkerState + StatusHandle. The daemon publishes its current phase (idle/walking/fetching-metadata/cover-backfill), per-worker activity, and last-pass summary. Wired through the cron, run_metadata_pass, and the worker loop. - session_control.rs: SessionController refreshes PHPSESSID at runtime — rewrites the shared reqwest cookie jar, updates the value on_launch reads, persists to crawler_state (survives restart), and clears the expired flag. on_launch now reads the live session instead of a startup snapshot. - RealChapterDispatcher auto-triggers a coordinated browser restart after CRAWLER_BROWSER_RESTART_THRESHOLD consecutive transient failures. - repo::crawler: list_dead_jobs, requeue_dead_jobs (all/manga/job, bypassing the quarantine, skipping live duplicates), job_state_counts. - AppState gains CrawlerControl bundling browser_manager + session + status + metadata_pass for the admin endpoints. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -17,8 +17,9 @@
|
||||
//! Each public function is a transaction boundary so a partial failure
|
||||
//! mid-call leaves the DB in its pre-call state.
|
||||
|
||||
use chrono::Utc;
|
||||
use sqlx::{PgPool, Postgres, Transaction};
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::Serialize;
|
||||
use sqlx::{FromRow, PgPool, Postgres, Transaction};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::crawler::source::{SourceChapterRef, SourceManga};
|
||||
@@ -618,3 +619,169 @@ pub async fn last_run_completed_cleanly(
|
||||
.unwrap_or(true))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Dead-letter jobs: admin observability + requeue.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// A `dead` crawler job joined to its chapter/manga context for the admin
|
||||
/// dead-letter view. Chapter columns are `Option` because the join is
|
||||
/// best-effort (the chapter may have been removed since the job died, or
|
||||
/// the job may be a non-chapter kind).
|
||||
#[derive(Debug, Clone, Serialize, FromRow)]
|
||||
pub struct DeadJob {
|
||||
pub id: Uuid,
|
||||
pub kind: String,
|
||||
pub chapter_id: Option<Uuid>,
|
||||
pub manga_id: Option<Uuid>,
|
||||
pub manga_title: Option<String>,
|
||||
pub chapter_number: Option<i32>,
|
||||
pub attempts: i32,
|
||||
pub max_attempts: i32,
|
||||
pub last_error: Option<String>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// Paginated list of `dead` jobs, newest-failed first, joined to chapter +
|
||||
/// manga context. `search` filters on manga title (case-insensitive
|
||||
/// substring). Returns the page slice plus the unfiltered-by-page total.
|
||||
pub async fn list_dead_jobs(
|
||||
pool: &PgPool,
|
||||
search: Option<&str>,
|
||||
limit: i64,
|
||||
offset: i64,
|
||||
) -> sqlx::Result<(Vec<DeadJob>, i64)> {
|
||||
let search_pat = search
|
||||
.map(|s| format!("%{}%", s.trim()))
|
||||
.filter(|p| p.len() > 2);
|
||||
|
||||
let items: Vec<DeadJob> = sqlx::query_as(
|
||||
r#"
|
||||
SELECT
|
||||
cj.id,
|
||||
cj.payload->>'kind' AS kind,
|
||||
(cj.payload->>'chapter_id')::uuid AS chapter_id,
|
||||
c.manga_id AS manga_id,
|
||||
m.title AS manga_title,
|
||||
c.number AS chapter_number,
|
||||
cj.attempts,
|
||||
cj.max_attempts,
|
||||
cj.last_error,
|
||||
cj.updated_at
|
||||
FROM crawler_jobs cj
|
||||
LEFT JOIN chapters c ON c.id = (cj.payload->>'chapter_id')::uuid
|
||||
LEFT JOIN mangas m ON m.id = c.manga_id
|
||||
WHERE cj.state = 'dead'
|
||||
AND ($1::text IS NULL OR m.title ILIKE $1)
|
||||
ORDER BY cj.updated_at DESC
|
||||
LIMIT $2 OFFSET $3
|
||||
"#,
|
||||
)
|
||||
.bind(&search_pat)
|
||||
.bind(limit)
|
||||
.bind(offset)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
let total: i64 = sqlx::query_scalar(
|
||||
r#"
|
||||
SELECT COUNT(*)
|
||||
FROM crawler_jobs cj
|
||||
LEFT JOIN chapters c ON c.id = (cj.payload->>'chapter_id')::uuid
|
||||
LEFT JOIN mangas m ON m.id = c.manga_id
|
||||
WHERE cj.state = 'dead'
|
||||
AND ($1::text IS NULL OR m.title ILIKE $1)
|
||||
"#,
|
||||
)
|
||||
.bind(&search_pat)
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
|
||||
Ok((items, total))
|
||||
}
|
||||
|
||||
/// Scope of a dead-job requeue.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum RequeueScope {
|
||||
/// Every dead job.
|
||||
All,
|
||||
/// Dead jobs whose chapter belongs to this manga.
|
||||
Manga(Uuid),
|
||||
/// A single dead job by its id.
|
||||
Job(Uuid),
|
||||
}
|
||||
|
||||
/// Requeue dead jobs back to `pending` with a fresh attempt budget. This is
|
||||
/// an explicit operator override, so it bypasses the dead-letter quarantine
|
||||
/// the enqueue helpers honour (we act directly on the row). Skips any dead
|
||||
/// job whose chapter already has a `pending`/`running` job so the partial
|
||||
/// dedup index is never violated. Returns the number of rows requeued.
|
||||
pub async fn requeue_dead_jobs(pool: &PgPool, scope: RequeueScope) -> sqlx::Result<u64> {
|
||||
// Guard against resurrecting a dead job when a live one already covers
|
||||
// the same chapter (would otherwise hit the dedup unique index).
|
||||
const NO_LIVE_DUP: &str = r#"
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM crawler_jobs live
|
||||
WHERE live.payload->>'kind' = 'sync_chapter_content'
|
||||
AND live.payload->>'chapter_id' = crawler_jobs.payload->>'chapter_id'
|
||||
AND live.state IN ('pending','running')
|
||||
)
|
||||
"#;
|
||||
const SET: &str = "SET state = 'pending', attempts = 0, leased_until = NULL, \
|
||||
last_error = NULL, scheduled_at = now(), updated_at = now()";
|
||||
|
||||
let affected = match scope {
|
||||
RequeueScope::All => {
|
||||
sqlx::query(&format!(
|
||||
"UPDATE crawler_jobs {SET} WHERE state = 'dead' {NO_LIVE_DUP}"
|
||||
))
|
||||
.execute(pool)
|
||||
.await?
|
||||
.rows_affected()
|
||||
}
|
||||
RequeueScope::Manga(manga_id) => {
|
||||
sqlx::query(&format!(
|
||||
"UPDATE crawler_jobs {SET} \
|
||||
WHERE state = 'dead' \
|
||||
AND (payload->>'chapter_id')::uuid IN \
|
||||
(SELECT id FROM chapters WHERE manga_id = $1) \
|
||||
{NO_LIVE_DUP}"
|
||||
))
|
||||
.bind(manga_id)
|
||||
.execute(pool)
|
||||
.await?
|
||||
.rows_affected()
|
||||
}
|
||||
RequeueScope::Job(job_id) => {
|
||||
sqlx::query(&format!(
|
||||
"UPDATE crawler_jobs {SET} WHERE state = 'dead' AND id = $1 {NO_LIVE_DUP}"
|
||||
))
|
||||
.bind(job_id)
|
||||
.execute(pool)
|
||||
.await?
|
||||
.rows_affected()
|
||||
}
|
||||
};
|
||||
Ok(affected)
|
||||
}
|
||||
|
||||
/// Count crawler jobs grouped by state — drives the dashboard queue
|
||||
/// gauges. Returns `(pending, running, dead)`.
|
||||
pub async fn job_state_counts(pool: &PgPool) -> sqlx::Result<(i64, i64, i64)> {
|
||||
let rows: Vec<(String, i64)> =
|
||||
sqlx::query_as("SELECT state, COUNT(*) FROM crawler_jobs GROUP BY state")
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
let mut pending = 0;
|
||||
let mut running = 0;
|
||||
let mut dead = 0;
|
||||
for (state, n) in rows {
|
||||
match state.as_str() {
|
||||
"pending" => pending = n,
|
||||
"running" => running = n,
|
||||
"dead" => dead = n,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Ok((pending, running, dead))
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user