feat(crawler): single-mode walker gated by recovery flag (0.36.0)
Collapses the crawler to a single newest-first walker and replaces the N-consecutive-unchanged streak with a per-manga rule: stop on the first manga where metadata is Unchanged AND chapter sync reports zero new chapters. The early stop is gated by a per-source recovery flag stored in `crawler_state` — set to `false` when a run starts, back to `true` only on a clean exit (end-of-walk or intentional stop). A crashed run leaves the flag `false` automatically (no shutdown code runs), so the next tick walks the full catalog instead of bailing at the first caught-up manga. This means a crashed mid-walk run self-heals on the next tick: the flag stays `false`, the next walk visits every page (recovering anything the crash missed past its crash point), and steady state resumes once the recovery sweep reaches end-of-walk. Removed: - DiscoverMode enum, Backfill mode, the boundary re-check + displaced-refs machinery in TargetSourceWalker. - Drop-pass (mark_dropped_mangas) and seed-completion plumbing (mark_seed_completed / seed_completed_at). The recovery flag subsumes the seed-completion signal; drop detection was explicitly opted out. - JobPayload::Discover (no production callers). - CRAWLER_MODE / CRAWLER_INCREMENTAL_STOP_AFTER env vars and the CrawlerModePref config type. `should_mark_clean_exit(walked_to_completion, hit_stop_condition)` encodes the clean-exit truth table in its signature — `hit_limit` is deliberately absent so a future edit cannot accidentally count a caller-imposed cap as a clean exit. Net -501 lines, 261 backend tests passing. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -9,7 +9,6 @@ use std::time::Duration;
|
||||
use mangalord::crawler::jobs::{
|
||||
self, EnqueueResult, JobPayload, KIND_SYNC_CHAPTER_CONTENT,
|
||||
};
|
||||
use mangalord::crawler::source::DiscoverMode;
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
@@ -21,10 +20,13 @@ fn chapter_content_payload(chapter_id: Uuid) -> JobPayload {
|
||||
}
|
||||
}
|
||||
|
||||
fn discover_payload() -> JobPayload {
|
||||
JobPayload::Discover {
|
||||
/// A non-`SyncChapterContent` payload, used to assert that only the
|
||||
/// chapter-content kind is deduplicated by the partial index and that
|
||||
/// `lease`'s kind filter correctly excludes other kinds.
|
||||
fn sync_manga_payload(key: &str) -> JobPayload {
|
||||
JobPayload::SyncManga {
|
||||
source_id: "target".into(),
|
||||
mode: DiscoverMode::Backfill,
|
||||
source_manga_key: key.into(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -141,7 +143,7 @@ async fn different_chapter_ids_can_coexist(pool: PgPool) {
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn non_chapter_content_payloads_are_never_deduped(pool: PgPool) {
|
||||
let p = discover_payload();
|
||||
let p = sync_manga_payload("foo");
|
||||
assert!(matches!(
|
||||
jobs::enqueue(&pool, &p).await.unwrap(),
|
||||
EnqueueResult::Inserted(_)
|
||||
@@ -185,7 +187,10 @@ async fn lease_marks_running_and_bumps_attempts_and_sets_leased_until(pool: PgPo
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn lease_with_kind_filter_only_matches_that_kind(pool: PgPool) {
|
||||
let discover_id = match jobs::enqueue(&pool, &discover_payload()).await.unwrap() {
|
||||
let manga_id = match jobs::enqueue(&pool, &sync_manga_payload("foo"))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
@@ -207,8 +212,8 @@ async fn lease_with_kind_filter_only_matches_that_kind(pool: PgPool) {
|
||||
.unwrap();
|
||||
assert_eq!(leases.len(), 1, "only chapter content payload leases");
|
||||
assert_eq!(leases[0].id, chapter_id);
|
||||
// discover is still pending
|
||||
assert_eq!(job_state(&pool, discover_id).await, "pending");
|
||||
// sync_manga is still pending
|
||||
assert_eq!(job_state(&pool, manga_id).await, "pending");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
|
||||
Reference in New Issue
Block a user