fix(crawler): review findings — requeue dedup, restart result, session validation

- requeue_dead_jobs: when a chapter has multiple dead jobs, revive only the
  newest (DISTINCT ON the chapter key) so a single UPDATE can't flip two
  dead rows for one chapter to pending and violate the partial unique dedup
  index (was a 500 that requeued nothing). Non-chapter jobs fall back to row
  id. Regression test added. (critical)
- coordinated_restart: a caller that coalesces into an in-progress restart
  now reports that restart's real outcome instead of a blind success, so the
  session-update "valid" / restart "ok" signal can't be falsely positive.
- SessionController::update: reject control chars / ';' / ',' in PHPSESSID
  before it reaches the cookie string + CDP cookie. Test added.
- Add non-admin 403 test on a mutating crawler endpoint; fix stale
  stream-to-storage doc comment.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-06-03 21:07:10 +02:00
parent ec0a8f2b5d
commit 832042d2b7
6 changed files with 169 additions and 67 deletions

View File

@@ -121,6 +121,24 @@ async fn control_endpoints_return_503_when_daemon_disabled(pool: PgPool) {
}
}
#[sqlx::test(migrations = "./migrations")]
async fn mutating_endpoints_reject_non_admin(pool: PgPool) {
let h = harness(pool);
// A logged-in non-admin must be forbidden from a mutating endpoint.
let (_u, cookie) = register_user(&h.app).await;
let resp = h
.app
.clone()
.oneshot(post_json_with_cookie(
"/api/v1/admin/crawler/dead-jobs/requeue",
json!({ "scope": "all" }),
&cookie,
))
.await
.unwrap();
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
}
#[sqlx::test(migrations = "./migrations")]
async fn dead_jobs_list_and_requeue_over_http(pool: PgPool) {
let job_id = seed_dead_job(&pool, "Bleach").await;