Files
Mangalord/backend/src/repo/crawler.rs
MechaCat02 6f0a8d88c9 feat(api): add per-chapter requeue scope for dead jobs
Lets the admin manga page requeue a single failed chapter's dead job(s)
inline, without a job id. Adds RequeueScope::Chapter + the matching
request variant and a repo test.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:48:13 +02:00

802 lines
27 KiB
Rust

//! Persistence for crawled mangas.
//!
//! High-level operations:
//! - [`ensure_source`]: idempotent registration of a source row.
//! - [`upsert_manga_from_source`]: end-to-end "I saw this manga" —
//! creates or updates the `mangas` row, threads `manga_sources`, and
//! refreshes authors/genres/tags. Returns whether the manga is new,
//! updated (metadata_hash changed), or unchanged.
//! - [`sync_manga_chapters`]: per-manga chapter reconciliation. Adds
//! new ones, refreshes URLs on existing ones, soft-drops vanished.
//! - [`mark_run_started`] / [`mark_run_completed`] /
//! [`last_run_completed_cleanly`]: per-source recovery flag in
//! `crawler_state`. A `false` flag on tick start means the previous
//! run did not exit cleanly and the next walk should ignore the
//! early-stop condition.
//!
//! Each public function is a transaction boundary so a partial failure
//! mid-call leaves the DB in its pre-call state.
use chrono::{DateTime, Utc};
use serde::Serialize;
use sqlx::{FromRow, PgPool, Postgres, Transaction};
use uuid::Uuid;
use crate::crawler::source::{SourceChapterRef, SourceManga};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum UpsertStatus {
New,
Updated,
Unchanged,
}
#[derive(Debug, Clone)]
pub struct UpsertedManga {
pub manga_id: Uuid,
pub status: UpsertStatus,
/// Current value of `mangas.cover_image_path` after the upsert.
/// `None` means the cover hasn't been downloaded yet — the caller
/// uses this to backfill covers for mangas that were synced before
/// cover-download support existed.
pub cover_image_path: Option<String>,
}
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub struct ChapterDiff {
pub new: usize,
pub refreshed: usize,
pub dropped: usize,
}
pub async fn ensure_source(
pool: &PgPool,
id: &str,
name: &str,
base_url: &str,
) -> sqlx::Result<()> {
sqlx::query(
r#"
INSERT INTO sources (id, name, base_url, enabled)
VALUES ($1, $2, $3, true)
ON CONFLICT (id) DO UPDATE
SET name = EXCLUDED.name,
base_url = EXCLUDED.base_url
"#,
)
.bind(id)
.bind(name)
.bind(base_url)
.execute(pool)
.await?;
Ok(())
}
pub async fn upsert_manga_from_source(
pool: &PgPool,
source_id: &str,
source_url: &str,
sm: &SourceManga,
) -> sqlx::Result<UpsertedManga> {
let mut tx = pool.begin().await?;
let existing: Option<(Uuid, Option<String>)> = sqlx::query_as(
r#"
SELECT manga_id, metadata_hash
FROM manga_sources
WHERE source_id = $1 AND source_manga_key = $2
"#,
)
.bind(source_id)
.bind(&sm.source_manga_key)
.fetch_optional(&mut *tx)
.await?;
let status_db = sm.status.as_deref().unwrap_or("ongoing");
// Note: `cover_image_path` is intentionally not written here.
// The repo layer doesn't know about the storage backend, so the
// caller (crawler binary) downloads the cover via the `Storage`
// trait and sets the path with `repo::manga::set_cover_image_path`
// once the bytes have landed.
let (manga_id, status) = match existing {
None => {
let (id,): (Uuid,) = sqlx::query_as(
r#"
INSERT INTO mangas (title, description, status, alt_titles)
VALUES ($1, $2, $3, $4)
RETURNING id
"#,
)
.bind(&sm.title)
.bind(sm.summary.as_deref())
.bind(status_db)
.bind(&sm.alternative_titles)
.fetch_one(&mut *tx)
.await?;
(id, UpsertStatus::New)
}
Some((id, prev_hash)) if prev_hash.as_deref() == Some(&sm.metadata_hash) => {
(id, UpsertStatus::Unchanged)
}
Some((id, _)) => {
sqlx::query(
r#"
UPDATE mangas
SET title = $1,
description = $2,
status = $3,
alt_titles = $4,
updated_at = NOW()
WHERE id = $5
"#,
)
.bind(&sm.title)
.bind(sm.summary.as_deref())
.bind(status_db)
.bind(&sm.alternative_titles)
.bind(id)
.execute(&mut *tx)
.await?;
(id, UpsertStatus::Updated)
}
};
sqlx::query(
r#"
INSERT INTO manga_sources
(source_id, source_manga_key, manga_id, source_url, metadata_hash, last_seen_at, dropped_at)
VALUES ($1, $2, $3, $4, $5, NOW(), NULL)
ON CONFLICT (source_id, source_manga_key) DO UPDATE
SET source_url = EXCLUDED.source_url,
metadata_hash = EXCLUDED.metadata_hash,
last_seen_at = NOW(),
dropped_at = NULL
"#,
)
.bind(source_id)
.bind(&sm.source_manga_key)
.bind(manga_id)
.bind(source_url)
.bind(&sm.metadata_hash)
.execute(&mut *tx)
.await?;
sync_authors(&mut tx, manga_id, &sm.authors).await?;
sync_genres(&mut tx, manga_id, &sm.genres).await?;
sync_tags(&mut tx, manga_id, &sm.tags).await?;
let cover_image_path: Option<String> =
sqlx::query_scalar("SELECT cover_image_path FROM mangas WHERE id = $1")
.bind(manga_id)
.fetch_one(&mut *tx)
.await?;
tx.commit().await?;
Ok(UpsertedManga {
manga_id,
status,
cover_image_path,
})
}
async fn sync_authors(
tx: &mut Transaction<'_, Postgres>,
manga_id: Uuid,
authors: &[String],
) -> sqlx::Result<()> {
sqlx::query("DELETE FROM manga_authors WHERE manga_id = $1")
.bind(manga_id)
.execute(&mut **tx)
.await?;
for (i, name) in authors.iter().enumerate() {
let trimmed = name.trim();
if trimmed.is_empty() {
continue;
}
// Self-update on conflict so the row id is always returned —
// can't use DO NOTHING because that suppresses RETURNING.
let (author_id,): (Uuid,) = sqlx::query_as(
r#"
INSERT INTO authors (name) VALUES ($1)
ON CONFLICT (lower(name)) DO UPDATE SET name = authors.name
RETURNING id
"#,
)
.bind(trimmed)
.fetch_one(&mut **tx)
.await?;
sqlx::query(
r#"
INSERT INTO manga_authors (manga_id, author_id, position)
VALUES ($1, $2, $3)
ON CONFLICT DO NOTHING
"#,
)
.bind(manga_id)
.bind(author_id)
.bind(i as i32)
.execute(&mut **tx)
.await?;
}
Ok(())
}
async fn sync_genres(
tx: &mut Transaction<'_, Postgres>,
manga_id: Uuid,
genres: &[String],
) -> sqlx::Result<()> {
sqlx::query("DELETE FROM manga_genres WHERE manga_id = $1")
.bind(manga_id)
.execute(&mut **tx)
.await?;
for name in genres {
let trimmed = name.trim();
if trimmed.is_empty() {
continue;
}
// Case-insensitive lookup so a source-supplied "action"
// attaches to the seeded "Action" rather than creating a
// second row.
let existing: Option<(Uuid,)> =
sqlx::query_as("SELECT id FROM genres WHERE lower(name) = lower($1)")
.bind(trimmed)
.fetch_optional(&mut **tx)
.await?;
let genre_id = match existing {
Some((id,)) => id,
None => {
let (id,): (Uuid,) = sqlx::query_as(
r#"
INSERT INTO genres (name) VALUES ($1)
ON CONFLICT (name) DO UPDATE SET name = genres.name
RETURNING id
"#,
)
.bind(trimmed)
.fetch_one(&mut **tx)
.await?;
tracing::info!(genre = trimmed, "added new genre from source");
id
}
};
sqlx::query(
"INSERT INTO manga_genres (manga_id, genre_id) VALUES ($1, $2) ON CONFLICT DO NOTHING",
)
.bind(manga_id)
.bind(genre_id)
.execute(&mut **tx)
.await?;
}
Ok(())
}
async fn sync_tags(
tx: &mut Transaction<'_, Postgres>,
manga_id: Uuid,
tags: &[String],
) -> sqlx::Result<()> {
// Only clear crawler-owned attachments (added_by IS NULL). User-
// attached tags are owned by the attaching user and must survive
// the recurring metadata pass — see manga_tags.added_by in
// migration 0009.
//
// Note on orphans: `manga_tags.added_by` is `ON DELETE SET NULL`,
// so an attachment whose user was deleted becomes
// indistinguishable from a crawler-owned row and is cleaned up
// here. That mirrors how `api::mangas::detach_tag` already treats
// orphans ("nobody owns it, refuse to let anyone but admin clear
// them") — the crawler now becomes the eventual reaper. Tracked
// by `sync_tags_garbage_collects_orphan_user_attachments` in
// backend/tests/crawler_sync.rs.
sqlx::query("DELETE FROM manga_tags WHERE manga_id = $1 AND added_by IS NULL")
.bind(manga_id)
.execute(&mut **tx)
.await?;
for name in tags {
let trimmed = name.trim();
if trimmed.is_empty() {
continue;
}
let (tag_id,): (Uuid,) = sqlx::query_as(
r#"
INSERT INTO tags (name) VALUES ($1)
ON CONFLICT (lower(name)) DO UPDATE SET name = tags.name
RETURNING id
"#,
)
.bind(trimmed)
.fetch_one(&mut **tx)
.await?;
sqlx::query(
r#"
INSERT INTO manga_tags (manga_id, tag_id, added_by)
VALUES ($1, $2, NULL)
ON CONFLICT DO NOTHING
"#,
)
.bind(manga_id)
.bind(tag_id)
.execute(&mut **tx)
.await?;
}
Ok(())
}
pub async fn sync_manga_chapters(
pool: &PgPool,
source_id: &str,
manga_id: Uuid,
chapters: &[SourceChapterRef],
) -> sqlx::Result<ChapterDiff> {
let mut tx = pool.begin().await?;
// Per-manga advisory lock. Two concurrent calls for the same manga
// would otherwise both read `seen_keys`, both run the drop UPDATE
// filtered on `NOT (key = ANY $3)`, and the later commit could soft-
// drop a chapter the earlier commit had just inserted (lost-update
// shape under MVCC). `pg_advisory_xact_lock` is scoped to this
// transaction: it auto-releases on COMMIT/ROLLBACK so a Rust-side
// panic mid-call doesn't strand the lock. The single-arg int8 form
// keyed by `hashtextextended(manga_id::text, 0)` shares Postgres'
// global advisory-lock namespace with `CRON_LOCK_KEY`, but collision
// is 2^-64 per pair (a UUID-derived hash hitting the fixed cron key
// is effectively impossible).
sqlx::query("SELECT pg_advisory_xact_lock(hashtextextended($1::text, 0))")
.bind(manga_id)
.execute(&mut *tx)
.await?;
let mut diff = ChapterDiff::default();
let seen_keys: Vec<String> = chapters
.iter()
.map(|c| c.source_chapter_key.clone())
.collect();
for (idx, c) in chapters.iter().enumerate() {
// `source_index` captures the chapter's position in the source
// DOM (0 = first = newest on this site) so the list query can
// reverse it for the user-facing list — see migration 0021.
// Every sync overwrites the value on both branches, so a new
// chapter inserted at the top of the source shifts every other
// row down by one on the next tick.
let source_index = idx as i32;
// Lookup is constrained by manga_id (via the chapters join) so a
// source whose chapter slugs collide across mangas (e.g.
// "chapter-1" appearing under two different mangas) attributes
// each row to the correct manga. Migration 0017 dropped the
// (source_id, source_chapter_key) PK in favour of
// (source_id, chapter_id) for exactly this reason.
let existing: Option<(Uuid,)> = sqlx::query_as(
"SELECT cs.chapter_id \
FROM chapter_sources cs \
JOIN chapters ch ON ch.id = cs.chapter_id \
WHERE cs.source_id = $1 \
AND cs.source_chapter_key = $2 \
AND ch.manga_id = $3",
)
.bind(source_id)
.bind(&c.source_chapter_key)
.bind(manga_id)
.fetch_optional(&mut *tx)
.await?;
match existing {
None => {
// New chapter row. As of 0013 there's no (manga_id,
// number) UNIQUE, so duplicate-numbered chapters from
// the source (different uploaders, notices, alt
// translations) each get their own row — chapter
// identity is the UUID, not the number.
let (chapter_id,): (Uuid,) = sqlx::query_as(
r#"
INSERT INTO chapters (manga_id, number, title, page_count, source_index)
VALUES ($1, $2, $3, 0, $4)
RETURNING id
"#,
)
.bind(manga_id)
.bind(c.number)
.bind(c.title.as_deref())
.bind(source_index)
.fetch_one(&mut *tx)
.await?;
sqlx::query(
r#"
INSERT INTO chapter_sources
(source_id, source_chapter_key, chapter_id, source_url, last_seen_at, dropped_at)
VALUES ($1, $2, $3, $4, NOW(), NULL)
"#,
)
.bind(source_id)
.bind(&c.source_chapter_key)
.bind(chapter_id)
.bind(&c.url)
.execute(&mut *tx)
.await?;
diff.new += 1;
}
Some((chapter_id,)) => {
sqlx::query(
"UPDATE chapters SET title = $1, source_index = $2 WHERE id = $3",
)
.bind(c.title.as_deref())
.bind(source_index)
.bind(chapter_id)
.execute(&mut *tx)
.await?;
// chapter_id is now the natural per-(source, chapter)
// identifier — use it directly instead of re-keying on
// (source_id, source_chapter_key) which may not be unique.
sqlx::query(
r#"
UPDATE chapter_sources
SET source_url = $1, last_seen_at = NOW(), dropped_at = NULL
WHERE source_id = $2 AND chapter_id = $3
"#,
)
.bind(&c.url)
.bind(source_id)
.bind(chapter_id)
.execute(&mut *tx)
.await?;
diff.refreshed += 1;
}
}
}
// Soft-drop any chapter previously seen from this source for this
// manga that's not in the current list.
let result = sqlx::query(
r#"
UPDATE chapter_sources cs
SET dropped_at = NOW()
FROM chapters ch
WHERE cs.chapter_id = ch.id
AND ch.manga_id = $1
AND cs.source_id = $2
AND cs.dropped_at IS NULL
AND NOT (cs.source_chapter_key = ANY($3))
"#,
)
.bind(manga_id)
.bind(source_id)
.bind(&seen_keys)
.execute(&mut *tx)
.await?;
diff.dropped = result.rows_affected() as usize;
tx.commit().await?;
Ok(diff)
}
/// Count the chapters that the source `(source_id, source_manga_key)`
/// is currently known to attach to — i.e. the number of `chapter_sources`
/// rows for the manga identified by the (source_id, source_manga_key)
/// pair, restricted to live (`dropped_at IS NULL`) rows.
///
/// Used by the metadata pass's partial-render guard: if `fetch_manga`
/// returns an empty `chapters` Vec but the source previously surfaced
/// chapters here, that's most likely a chromium snapshot taken between
/// the `#chapter_table` wrapper render and its rows render — the
/// safest move is to skip `sync_manga_chapters` so the soft-drop
/// branch doesn't flip every existing chapter to `dropped_at`.
///
/// Returns `Ok(0)` when the manga is brand-new (no `manga_sources`
/// row yet), which is the legitimate "this manga has no chapters yet"
/// case and must NOT be flagged.
pub async fn live_chapter_count_for_source_manga(
pool: &PgPool,
source_id: &str,
source_manga_key: &str,
) -> sqlx::Result<i64> {
let row: Option<(i64,)> = sqlx::query_as(
"SELECT COUNT(*) \
FROM chapter_sources cs \
JOIN chapters c ON c.id = cs.chapter_id \
JOIN manga_sources ms \
ON ms.manga_id = c.manga_id \
AND ms.source_id = cs.source_id \
WHERE ms.source_id = $1 \
AND ms.source_manga_key = $2 \
AND cs.dropped_at IS NULL",
)
.bind(source_id)
.bind(source_manga_key)
.fetch_optional(pool)
.await?;
Ok(row.map(|(n,)| n).unwrap_or(0))
}
/// Mark a metadata pass as in-flight for `source_id`. Stamps
/// `last_run_completed:<source_id>` in `crawler_state` with
/// `{"completed": false, "at": now}`. A crash, panic, or SIGKILL after
/// this point leaves the flag at `false`, which the next tick reads as
/// "previous run did not exit cleanly — walk the full catalog this
/// time" (recovery sweep).
pub async fn mark_run_started(pool: &PgPool, source_id: &str) -> sqlx::Result<()> {
let key = format!("last_run_completed:{source_id}");
sqlx::query(
"INSERT INTO crawler_state (key, value, updated_at) \
VALUES ($1, $2, now()) \
ON CONFLICT (key) DO UPDATE \
SET value = EXCLUDED.value, updated_at = now()",
)
.bind(&key)
.bind(serde_json::json!({
"completed": false,
"at": Utc::now().to_rfc3339(),
}))
.execute(pool)
.await?;
Ok(())
}
/// Mark a metadata pass as completed cleanly for `source_id`. Called
/// from the same place a run decides it reached end-of-walk or hit the
/// intentional stop. The next tick reads `true` and applies the normal
/// stop condition.
pub async fn mark_run_completed(pool: &PgPool, source_id: &str) -> sqlx::Result<()> {
let key = format!("last_run_completed:{source_id}");
sqlx::query(
"INSERT INTO crawler_state (key, value, updated_at) \
VALUES ($1, $2, now()) \
ON CONFLICT (key) DO UPDATE \
SET value = EXCLUDED.value, updated_at = now()",
)
.bind(&key)
.bind(serde_json::json!({
"completed": true,
"at": Utc::now().to_rfc3339(),
}))
.execute(pool)
.await?;
Ok(())
}
/// List mangas whose `cover_image_path IS NULL` but a live
/// `manga_sources` row still attaches them to a source. The bounded
/// result feeds the cover-backfill pass in [`crate::crawler::pipeline`]:
/// each entry is one (manga, freshest source row) pair where a cover
/// re-download is in order.
///
/// Per-manga deduplication uses `DISTINCT ON (m.id)` keyed on the row
/// with the newest `last_seen_at`, so a manga that's surfaced by
/// multiple sources only produces one row (the freshest). Sort is
/// stable for tests.
pub async fn list_missing_covers(
pool: &PgPool,
max: i64,
) -> sqlx::Result<Vec<MissingCoverEntry>> {
let rows: Vec<(Uuid, String, String)> = sqlx::query_as(
r#"
SELECT DISTINCT ON (m.id) m.id, ms.source_manga_key, ms.source_url
FROM mangas m
JOIN manga_sources ms ON ms.manga_id = m.id
WHERE m.cover_image_path IS NULL
AND ms.dropped_at IS NULL
ORDER BY m.id, ms.last_seen_at DESC
LIMIT $1
"#,
)
.bind(max)
.fetch_all(pool)
.await?;
Ok(rows
.into_iter()
.map(|(manga_id, source_manga_key, source_url)| MissingCoverEntry {
manga_id,
source_manga_key,
source_url,
})
.collect())
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct MissingCoverEntry {
pub manga_id: Uuid,
pub source_manga_key: String,
pub source_url: String,
}
/// Read the recovery flag for `source_id`. A missing row OR an
/// unparseable value reads as `true` ("clean") — the former covers the
/// first-ever run on a virgin DB (no recovery needed), the latter
/// covers forward-compat against future schema changes; both fail-safe
/// toward not making an operator pay for an unnecessary full sweep.
pub async fn last_run_completed_cleanly(
pool: &PgPool,
source_id: &str,
) -> sqlx::Result<bool> {
let key = format!("last_run_completed:{source_id}");
let row: Option<serde_json::Value> =
sqlx::query_scalar("SELECT value FROM crawler_state WHERE key = $1")
.bind(&key)
.fetch_optional(pool)
.await?;
Ok(row
.and_then(|v| v.get("completed").and_then(|b| b.as_bool()))
.unwrap_or(true))
}
// ---------------------------------------------------------------------------
// Dead-letter jobs: admin observability + requeue.
// ---------------------------------------------------------------------------
/// A `dead` crawler job joined to its chapter/manga context for the admin
/// dead-letter view. Chapter columns are `Option` because the join is
/// best-effort (the chapter may have been removed since the job died, or
/// the job may be a non-chapter kind).
#[derive(Debug, Clone, Serialize, FromRow)]
pub struct DeadJob {
pub id: Uuid,
pub kind: String,
pub chapter_id: Option<Uuid>,
pub manga_id: Option<Uuid>,
pub manga_title: Option<String>,
pub chapter_number: Option<i32>,
pub attempts: i32,
pub max_attempts: i32,
pub last_error: Option<String>,
pub updated_at: DateTime<Utc>,
}
/// Paginated list of `dead` jobs, newest-failed first, joined to chapter +
/// manga context. `search` filters on manga title (case-insensitive
/// substring). Returns the page slice plus the unfiltered-by-page total.
pub async fn list_dead_jobs(
pool: &PgPool,
search: Option<&str>,
limit: i64,
offset: i64,
) -> sqlx::Result<(Vec<DeadJob>, i64)> {
let search_pat = search
.map(|s| format!("%{}%", s.trim()))
.filter(|p| p.len() > 2);
let items: Vec<DeadJob> = sqlx::query_as(
r#"
SELECT
cj.id,
cj.payload->>'kind' AS kind,
(cj.payload->>'chapter_id')::uuid AS chapter_id,
c.manga_id AS manga_id,
m.title AS manga_title,
c.number AS chapter_number,
cj.attempts,
cj.max_attempts,
cj.last_error,
cj.updated_at
FROM crawler_jobs cj
LEFT JOIN chapters c ON c.id = (cj.payload->>'chapter_id')::uuid
LEFT JOIN mangas m ON m.id = c.manga_id
WHERE cj.state = 'dead'
AND ($1::text IS NULL OR m.title ILIKE $1)
ORDER BY cj.updated_at DESC
LIMIT $2 OFFSET $3
"#,
)
.bind(&search_pat)
.bind(limit)
.bind(offset)
.fetch_all(pool)
.await?;
let total: i64 = sqlx::query_scalar(
r#"
SELECT COUNT(*)
FROM crawler_jobs cj
LEFT JOIN chapters c ON c.id = (cj.payload->>'chapter_id')::uuid
LEFT JOIN mangas m ON m.id = c.manga_id
WHERE cj.state = 'dead'
AND ($1::text IS NULL OR m.title ILIKE $1)
"#,
)
.bind(&search_pat)
.fetch_one(pool)
.await?;
Ok((items, total))
}
/// Scope of a dead-job requeue.
#[derive(Debug, Clone)]
pub enum RequeueScope {
/// Every dead job.
All,
/// Dead jobs whose chapter belongs to this manga.
Manga(Uuid),
/// Dead jobs for a single chapter.
Chapter(Uuid),
/// A single dead job by its id.
Job(Uuid),
}
/// Requeue dead jobs back to `pending` with a fresh attempt budget. This is
/// an explicit operator override, so it bypasses the dead-letter quarantine
/// the enqueue helpers honour (we act directly on the row). Skips any dead
/// job whose chapter already has a `pending`/`running` job so the partial
/// dedup index is never violated. Returns the number of rows requeued.
pub async fn requeue_dead_jobs(pool: &PgPool, scope: RequeueScope) -> sqlx::Result<u64> {
// Guard against resurrecting a dead job when a live one already covers
// the same chapter (would otherwise hit the dedup unique index).
const NO_LIVE_DUP: &str = r#"
AND NOT EXISTS (
SELECT 1 FROM crawler_jobs live
WHERE live.payload->>'kind' = 'sync_chapter_content'
AND live.payload->>'chapter_id' = crawler_jobs.payload->>'chapter_id'
AND live.state IN ('pending','running')
)
"#;
const SET: &str = "SET state = 'pending', attempts = 0, leased_until = NULL, \
last_error = NULL, scheduled_at = now(), updated_at = now()";
let affected = match scope {
RequeueScope::All => {
sqlx::query(&format!(
"UPDATE crawler_jobs {SET} WHERE state = 'dead' {NO_LIVE_DUP}"
))
.execute(pool)
.await?
.rows_affected()
}
RequeueScope::Manga(manga_id) => {
sqlx::query(&format!(
"UPDATE crawler_jobs {SET} \
WHERE state = 'dead' \
AND (payload->>'chapter_id')::uuid IN \
(SELECT id FROM chapters WHERE manga_id = $1) \
{NO_LIVE_DUP}"
))
.bind(manga_id)
.execute(pool)
.await?
.rows_affected()
}
RequeueScope::Chapter(chapter_id) => {
sqlx::query(&format!(
"UPDATE crawler_jobs {SET} \
WHERE state = 'dead' \
AND (payload->>'chapter_id')::uuid = $1 \
{NO_LIVE_DUP}"
))
.bind(chapter_id)
.execute(pool)
.await?
.rows_affected()
}
RequeueScope::Job(job_id) => {
sqlx::query(&format!(
"UPDATE crawler_jobs {SET} WHERE state = 'dead' AND id = $1 {NO_LIVE_DUP}"
))
.bind(job_id)
.execute(pool)
.await?
.rows_affected()
}
};
Ok(affected)
}
/// Count crawler jobs grouped by state — drives the dashboard queue
/// gauges. Returns `(pending, running, dead)`.
pub async fn job_state_counts(pool: &PgPool) -> sqlx::Result<(i64, i64, i64)> {
let rows: Vec<(String, i64)> =
sqlx::query_as("SELECT state, COUNT(*) FROM crawler_jobs GROUP BY state")
.fetch_all(pool)
.await?;
let mut pending = 0;
let mut running = 0;
let mut dead = 0;
for (state, n) in rows {
match state.as_str() {
"pending" => pending = n,
"running" => running = n,
"dead" => dead = n,
_ => {}
}
}
Ok((pending, running, dead))
}