fix(crawler): queue chapter content in ascending number order (0.51.1)
Both enqueue paths now order by chapters.number so the cron tick and the bookmark hook insert jobs from chapter 1 upward instead of source-discovery or random-UUID order. The lease query tiebreaks on created_at so jobs sharing a batch's scheduled_at come off the queue in insertion order, propagating the enqueue intent through to dequeue. Concurrent workers and per-CDN latency can still drift actual completion order. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -531,6 +531,89 @@ async fn reap_done_deletes_old_rows_keeps_fresh(pool: PgPool) {
|
||||
assert_eq!(remaining, vec![fresh_id], "only fresh row remains");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn lease_ties_on_scheduled_at_break_by_created_at(pool: PgPool) {
|
||||
// Locks in the tiebreaker that lets enqueue order survive the lease
|
||||
// step: when many jobs share `scheduled_at` (the common cron-batch
|
||||
// case), the worker must pick the earliest-inserted row, not whatever
|
||||
// Postgres returns in heap order. The enqueue path inserts chapters
|
||||
// in chapter-number order, so this tiebreaker is what makes "queue
|
||||
// in rising order" observable at the dequeue side too.
|
||||
let a = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let b = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let c = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
// Pin `scheduled_at` to a single literal instant (shared across all
|
||||
// three rows — `now()` would yield a different microsecond per UPDATE
|
||||
// and make scheduled_at the actual sort key). Reverse `created_at`
|
||||
// against insertion order so heap order would give the wrong answer.
|
||||
let shared_scheduled = chrono::Utc::now() - chrono::Duration::hours(1);
|
||||
sqlx::query(
|
||||
"UPDATE crawler_jobs \
|
||||
SET scheduled_at = $2, \
|
||||
created_at = $3 \
|
||||
WHERE id = $1",
|
||||
)
|
||||
.bind(a)
|
||||
.bind(shared_scheduled)
|
||||
.bind(chrono::Utc::now() - chrono::Duration::seconds(10))
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query(
|
||||
"UPDATE crawler_jobs \
|
||||
SET scheduled_at = $2, \
|
||||
created_at = $3 \
|
||||
WHERE id = $1",
|
||||
)
|
||||
.bind(b)
|
||||
.bind(shared_scheduled)
|
||||
.bind(chrono::Utc::now() - chrono::Duration::seconds(20))
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query(
|
||||
"UPDATE crawler_jobs \
|
||||
SET scheduled_at = $2, \
|
||||
created_at = $3 \
|
||||
WHERE id = $1",
|
||||
)
|
||||
.bind(c)
|
||||
.bind(shared_scheduled)
|
||||
.bind(chrono::Utc::now() - chrono::Duration::seconds(30))
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let leases = jobs::lease(&pool, None, 10, Duration::from_secs(60))
|
||||
.await
|
||||
.unwrap();
|
||||
let order: Vec<Uuid> = leases.iter().map(|l| l.id).collect();
|
||||
assert_eq!(
|
||||
order,
|
||||
vec![c, b, a],
|
||||
"lease must return jobs in created_at order when scheduled_at ties"
|
||||
);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn reap_done_zero_is_a_no_op(pool: PgPool) {
|
||||
let id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||
|
||||
Reference in New Issue
Block a user