fix(crawler): review findings — requeue dedup, restart result, session validation
- requeue_dead_jobs: when a chapter has multiple dead jobs, revive only the newest (DISTINCT ON the chapter key) so a single UPDATE can't flip two dead rows for one chapter to pending and violate the partial unique dedup index (was a 500 that requeued nothing). Non-chapter jobs fall back to row id. Regression test added. (critical) - coordinated_restart: a caller that coalesces into an in-progress restart now reports that restart's real outcome instead of a blind success, so the session-update "valid" / restart "ok" signal can't be falsely positive. - SessionController::update: reject control chars / ';' / ',' in PHPSESSID before it reaches the cookie string + CDP cookie. Test added. - Add non-admin 403 test on a mutating crawler endpoint; fix stale stream-to-storage doc comment. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -121,6 +121,24 @@ async fn control_endpoints_return_503_when_daemon_disabled(pool: PgPool) {
|
||||
}
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn mutating_endpoints_reject_non_admin(pool: PgPool) {
|
||||
let h = harness(pool);
|
||||
// A logged-in non-admin must be forbidden from a mutating endpoint.
|
||||
let (_u, cookie) = register_user(&h.app).await;
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(post_json_with_cookie(
|
||||
"/api/v1/admin/crawler/dead-jobs/requeue",
|
||||
json!({ "scope": "all" }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn dead_jobs_list_and_requeue_over_http(pool: PgPool) {
|
||||
let job_id = seed_dead_job(&pool, "Bleach").await;
|
||||
|
||||
@@ -170,6 +170,38 @@ async fn requeue_skips_dead_when_live_job_exists_for_same_chapter(pool: PgPool)
|
||||
assert_eq!(state_of(&pool, dead).await, "dead");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn requeue_with_two_dead_jobs_for_one_chapter_revives_one_not_500(pool: PgPool) {
|
||||
// Regression: two dead jobs for the SAME chapter must not both flip to
|
||||
// pending in one statement — that would violate the partial unique
|
||||
// dedup index and abort the whole requeue.
|
||||
let (manga_id, c1) = seed_chapter(&pool, "A", 1).await;
|
||||
let older = insert_job(&pool, c1, "dead", 5).await;
|
||||
let newer = insert_job(&pool, c1, "dead", 5).await;
|
||||
// Make `newer` unambiguously newer.
|
||||
sqlx::query("UPDATE crawler_jobs SET updated_at = now() - interval '1 hour' WHERE id = $1")
|
||||
.bind(older)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
for scope in [RequeueScope::All, RequeueScope::Manga(manga_id), RequeueScope::Chapter(c1)] {
|
||||
// Reset to two-dead before each scope variant.
|
||||
sqlx::query("UPDATE crawler_jobs SET state = 'dead' WHERE id = ANY($1)")
|
||||
.bind(vec![older, newer])
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
let n = crawler::requeue_dead_jobs(&pool, scope)
|
||||
.await
|
||||
.expect("requeue must not error on duplicate dead jobs");
|
||||
assert_eq!(n, 1, "exactly one dead job per chapter is revived");
|
||||
// The newest one is the survivor; the other stays dead.
|
||||
assert_eq!(state_of(&pool, newer).await, "pending");
|
||||
assert_eq!(state_of(&pool, older).await, "dead");
|
||||
}
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn job_state_counts_groups_by_state(pool: PgPool) {
|
||||
let (_m, c1) = seed_chapter(&pool, "A", 1).await;
|
||||
|
||||
Reference in New Issue
Block a user