feat: crawler scaffold with chromium launcher (0.22.0)
- crawler module (browser, source trait, jobs, diff) + binary - chromiumoxide launcher with fetcher feature (auto-downloads Chromium on first run, caches under ~/.cache/mangalord/chromium) - LaunchOptions struct with extra_args, parseable from CRAWLER_BROWSER_MODE and CRAWLER_BROWSER_ARGS - migration 0012 introduces sources, manga_sources, chapter_sources, crawler_jobs - integration tests for headed + headless launch, ipify load+parse, and extra-args propagation (all #[ignore], opt-in)
This commit is contained in:
55
backend/src/crawler/jobs.rs
Normal file
55
backend/src/crawler/jobs.rs
Normal file
@@ -0,0 +1,55 @@
|
||||
//! Persistent job queue and the four job kinds.
|
||||
//!
|
||||
//! Backed by Postgres (the `crawler_jobs` table). Workers lease rows
|
||||
//! with `SELECT ... FOR UPDATE SKIP LOCKED`, heartbeat via
|
||||
//! `leased_until`, and ack by transitioning to `done` (or backoff /
|
||||
//! `dead`). Handlers are idempotent so a crash mid-run is recoverable
|
||||
//! by replay.
|
||||
//!
|
||||
//! Scaffold only — the actual queue wrapper and handler dispatch land
|
||||
//! once we have the first `Source` impl exercising the pipeline.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::source::DiscoverMode;
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "kind", rename_all = "snake_case")]
|
||||
pub enum JobPayload {
|
||||
/// Walk the source index and enqueue `SyncManga` jobs.
|
||||
Discover {
|
||||
source_id: String,
|
||||
mode: DiscoverMode,
|
||||
},
|
||||
/// Fetch one manga's detail page, upsert metadata, enqueue
|
||||
/// `SyncChapterList`.
|
||||
SyncManga {
|
||||
source_id: String,
|
||||
source_manga_key: String,
|
||||
},
|
||||
/// Diff the chapter list, enqueue `SyncChapterContent` for new
|
||||
/// chapters, soft-drop vanished ones.
|
||||
SyncChapterList {
|
||||
source_id: String,
|
||||
manga_id: Uuid,
|
||||
source_manga_key: String,
|
||||
},
|
||||
/// Download a single chapter's page images into storage.
|
||||
SyncChapterContent {
|
||||
source_id: String,
|
||||
chapter_id: Uuid,
|
||||
source_chapter_key: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, sqlx::Type, Serialize, Deserialize)]
|
||||
#[sqlx(type_name = "text", rename_all = "snake_case")]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum JobState {
|
||||
Pending,
|
||||
Running,
|
||||
Done,
|
||||
Failed,
|
||||
Dead,
|
||||
}
|
||||
Reference in New Issue
Block a user