fix(crawler): queue chapter content in ascending number order (0.51.1)

Both enqueue paths now order by chapters.number so the cron tick and the
bookmark hook insert jobs from chapter 1 upward instead of source-discovery
or random-UUID order. The lease query tiebreaks on created_at so jobs
sharing a batch's scheduled_at come off the queue in insertion order,
propagating the enqueue intent through to dequeue. Concurrent workers
and per-CDN latency can still drift actual completion order.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-06-02 21:13:51 +02:00
parent 8818c890c5
commit e93eec89e5
7 changed files with 227 additions and 8 deletions

View File

@@ -104,6 +104,12 @@ pub async fn enqueue(pool: &PgPool, payload: &JobPayload) -> sqlx::Result<Enqueu
///
/// `kind_filter` matches against `payload->>'kind'`; `None` means
/// any kind.
///
/// Ties on `scheduled_at` (the common case: a cron batch enqueues
/// everything with the same default `now()`) break by `created_at`, so
/// jobs come off the queue in insertion order. The enqueue paths insert
/// chapter-content jobs in ascending `chapters.number` order, so this
/// tiebreaker is what propagates that intent through to dequeue.
pub async fn lease(
pool: &PgPool,
kind_filter: Option<&str>,
@@ -118,7 +124,7 @@ pub async fn lease(
WHERE (state = 'pending' OR (state = 'running' AND leased_until < now()))
AND scheduled_at <= now()
AND ($1::text IS NULL OR payload->>'kind' = $1)
ORDER BY scheduled_at
ORDER BY scheduled_at, created_at
LIMIT $2
FOR UPDATE SKIP LOCKED
)

View File

@@ -429,8 +429,8 @@ pub async fn enqueue_bookmarked_pending(pool: &PgPool) -> anyhow::Result<Enqueue
AND cj.state = 'dead'
AND cj.updated_at > now() - ($1::bigint || ' days')::interval
)
GROUP BY cs.source_id, c.id, cs.source_chapter_key, c.manga_id, c.created_at
ORDER BY c.manga_id, c.created_at ASC
GROUP BY cs.source_id, c.id, cs.source_chapter_key, c.manga_id, c.number, c.created_at
ORDER BY c.manga_id, c.number ASC, c.created_at ASC
"#,
)
.bind(CHAPTER_DEAD_QUARANTINE_DAYS)
@@ -471,7 +471,7 @@ pub async fn enqueue_pending_for_manga(
) -> anyhow::Result<EnqueueSummary> {
let rows: Vec<(String, Uuid, String)> = sqlx::query_as(
r#"
SELECT DISTINCT cs.source_id, c.id AS chapter_id, cs.source_chapter_key
SELECT cs.source_id, c.id AS chapter_id, cs.source_chapter_key
FROM chapters c
JOIN chapter_sources cs ON cs.chapter_id = c.id
WHERE c.manga_id = $1
@@ -484,7 +484,8 @@ pub async fn enqueue_pending_for_manga(
AND cj.state = 'dead'
AND cj.updated_at > now() - ($2::bigint || ' days')::interval
)
ORDER BY cs.source_id, c.id
GROUP BY cs.source_id, c.id, cs.source_chapter_key, c.number, c.created_at
ORDER BY c.number ASC, c.created_at ASC, cs.source_id
"#,
)
.bind(manga_id)