fix(crawler): queue chapter content in ascending number order (0.51.1)
Both enqueue paths now order by chapters.number so the cron tick and the bookmark hook insert jobs from chapter 1 upward instead of source-discovery or random-UUID order. The lease query tiebreaks on created_at so jobs sharing a batch's scheduled_at come off the queue in insertion order, propagating the enqueue intent through to dequeue. Concurrent workers and per-CDN latency can still drift actual completion order. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -517,3 +517,132 @@ async fn enqueue_bookmarked_pending_resumes_after_quarantine_expires(pool: PgPoo
|
||||
);
|
||||
}
|
||||
|
||||
/// Helper: insert a chapter with the given `number` and a non-dropped
|
||||
/// source row, returning the chapter id. Used by the ordering tests so
|
||||
/// the setup boilerplate doesn't drown the assertion.
|
||||
async fn insert_pending_chapter(
|
||||
pool: &PgPool,
|
||||
manga_id: Uuid,
|
||||
number: i32,
|
||||
source_chapter_key: &str,
|
||||
) -> Uuid {
|
||||
let chapter_id: Uuid = sqlx::query_scalar(
|
||||
"INSERT INTO chapters (manga_id, number, page_count) VALUES ($1, $2, 0) RETURNING id",
|
||||
)
|
||||
.bind(manga_id)
|
||||
.bind(number)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query(
|
||||
"INSERT INTO chapter_sources (source_id, source_chapter_key, chapter_id, source_url) \
|
||||
VALUES ($1, $2, $3, $4)",
|
||||
)
|
||||
.bind("target")
|
||||
.bind(source_chapter_key)
|
||||
.bind(chapter_id)
|
||||
.bind(format!("https://example.com/{source_chapter_key}"))
|
||||
.execute(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
chapter_id
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn enqueue_bookmarked_pending_queues_chapters_in_ascending_number_order(pool: PgPool) {
|
||||
// Insert chapters with `number` values 3, 1, 2 in that insertion
|
||||
// order — so `created_at` order (the previous tiebreaker) does NOT
|
||||
// match number order. After enqueue + lease, the worker should see
|
||||
// chapters 1, 2, 3 in that sequence.
|
||||
let user_id: Uuid = sqlx::query_scalar(
|
||||
"INSERT INTO users (username, password_hash) VALUES ($1, $2) RETURNING id",
|
||||
)
|
||||
.bind("alice")
|
||||
.bind("not-a-real-hash")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
let manga_id: Uuid = sqlx::query_scalar("INSERT INTO mangas (title) VALUES ($1) RETURNING id")
|
||||
.bind("Test")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query(
|
||||
"INSERT INTO sources (id, name, base_url) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING",
|
||||
)
|
||||
.bind("target")
|
||||
.bind("Target")
|
||||
.bind("https://example.com")
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
let c3 = insert_pending_chapter(&pool, manga_id, 3, "ch3").await;
|
||||
let c1 = insert_pending_chapter(&pool, manga_id, 1, "ch1").await;
|
||||
let c2 = insert_pending_chapter(&pool, manga_id, 2, "ch2").await;
|
||||
sqlx::query("INSERT INTO bookmarks (user_id, manga_id) VALUES ($1, $2)")
|
||||
.bind(user_id)
|
||||
.bind(manga_id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let summary = pipeline::enqueue_bookmarked_pending(&pool).await.unwrap();
|
||||
assert_eq!(summary.inserted, 3);
|
||||
|
||||
let leases = jobs::lease(&pool, None, 10, std::time::Duration::from_secs(60))
|
||||
.await
|
||||
.unwrap();
|
||||
let leased_chapter_ids: Vec<Uuid> = leases
|
||||
.iter()
|
||||
.map(|l| match &l.payload {
|
||||
JobPayload::SyncChapterContent { chapter_id, .. } => *chapter_id,
|
||||
other => panic!("unexpected payload kind: {other:?}"),
|
||||
})
|
||||
.collect();
|
||||
assert_eq!(
|
||||
leased_chapter_ids,
|
||||
vec![c1, c2, c3],
|
||||
"chapters must be leased in ascending chapter-number order, not insertion order"
|
||||
);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn enqueue_pending_for_manga_queues_chapters_in_ascending_number_order(pool: PgPool) {
|
||||
// Same scenario as above but exercising the bookmark-create hook path
|
||||
// (`enqueue_pending_for_manga`) which has its own ORDER BY.
|
||||
let manga_id: Uuid = sqlx::query_scalar("INSERT INTO mangas (title) VALUES ($1) RETURNING id")
|
||||
.bind("Test")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query(
|
||||
"INSERT INTO sources (id, name, base_url) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING",
|
||||
)
|
||||
.bind("target")
|
||||
.bind("Target")
|
||||
.bind("https://example.com")
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
let c3 = insert_pending_chapter(&pool, manga_id, 3, "ch3").await;
|
||||
let c1 = insert_pending_chapter(&pool, manga_id, 1, "ch1").await;
|
||||
let c2 = insert_pending_chapter(&pool, manga_id, 2, "ch2").await;
|
||||
|
||||
let summary = pipeline::enqueue_pending_for_manga(&pool, manga_id)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(summary.inserted, 3);
|
||||
|
||||
let leases = jobs::lease(&pool, None, 10, std::time::Duration::from_secs(60))
|
||||
.await
|
||||
.unwrap();
|
||||
let leased_chapter_ids: Vec<Uuid> = leases
|
||||
.iter()
|
||||
.map(|l| match &l.payload {
|
||||
JobPayload::SyncChapterContent { chapter_id, .. } => *chapter_id,
|
||||
other => panic!("unexpected payload kind: {other:?}"),
|
||||
})
|
||||
.collect();
|
||||
assert_eq!(leased_chapter_ids, vec![c1, c2, c3]);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user