feat(crawler): live cover + chapter-content observability with realtime page counts
Extends the live dashboard so an operator can see exactly what's being fetched, in realtime: - Chapters being crawled now are tracked in the status as `active_chapters` (manga title · ch.N) with a live page counter that climbs per stored page (set_chapter_pages, pushed via the existing watch→SSE). The dispatcher registers each via an RAII ChapterGuard (sync Mutex) that removes the entry on completion, panic, or timeout-drop — replacing the old per-worker slot model. - Covers: status now carries the cover being fetched now (`current_cover`, set around download_and_store_cover in both the metadata pass and backfill) and a `covers_queued` backlog count; CoverBackfill phase gains index/total. - Two paginated backlog endpoints (fetched on demand, auto-refreshed when the live counts change): GET /admin/crawler/active-jobs (which chapters of which mangas are queued/running) and GET /admin/crawler/covers (mangas missing a cover). repo: list_active_jobs, list_missing_cover_mangas, count_missing_covers. - dispatch_target now also returns manga title + chapter number. Frontend: the crawler page replaces the Workers table with an Active-chapters table (live page bars), adds a current-cover line + covers-queued figure, and two backlog sections (Queued chapters / Queued covers) with search + Pager, auto-refetched via $effect on the live counts. Tests: status guard/page + cover unit tests; repo list/count tests; endpoint tests; frontend api tests. Version 0.53.1 -> 0.54.0. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -349,7 +349,14 @@ pub async fn run_metadata_pass(
|
||||
|| matches!(upsert.status, repo::crawler::UpsertStatus::Updated);
|
||||
if needs_cover {
|
||||
if let Some(cover_url) = manga.cover_url.as_deref() {
|
||||
match download_and_store_cover(
|
||||
if let Some(s) = status {
|
||||
s.set_current_cover(Some(crate::crawler::status::CoverTarget {
|
||||
manga_id: upsert.manga_id,
|
||||
manga_title: manga.title.clone(),
|
||||
}))
|
||||
.await;
|
||||
}
|
||||
let cover_result = download_and_store_cover(
|
||||
db,
|
||||
storage,
|
||||
http,
|
||||
@@ -360,8 +367,11 @@ pub async fn run_metadata_pass(
|
||||
allowlist,
|
||||
max_image_bytes,
|
||||
)
|
||||
.await
|
||||
{
|
||||
.await;
|
||||
if let Some(s) = status {
|
||||
s.set_current_cover(None).await;
|
||||
}
|
||||
match cover_result {
|
||||
Ok(()) => stats.covers_fetched += 1,
|
||||
Err(e) => tracing::warn!(
|
||||
manga_id = %upsert.manga_id,
|
||||
@@ -615,6 +625,7 @@ pub async fn backfill_missing_covers(
|
||||
max_mangas: usize,
|
||||
allowlist: &DownloadAllowlist,
|
||||
max_image_bytes: usize,
|
||||
status: Option<&crate::crawler::status::StatusHandle>,
|
||||
tor: Option<&crate::crawler::tor::TorController>,
|
||||
) -> anyhow::Result<CoverBackfillStats> {
|
||||
let mut stats = CoverBackfillStats::default();
|
||||
@@ -637,8 +648,13 @@ pub async fn backfill_missing_covers(
|
||||
let browser_ref: &chromiumoxide::Browser = &lease;
|
||||
let ctx = FetchContext { browser: browser_ref, rate, tor };
|
||||
|
||||
for entry in entries {
|
||||
let total = entries.len();
|
||||
for (index, entry) in entries.into_iter().enumerate() {
|
||||
stats.considered += 1;
|
||||
if let Some(s) = status {
|
||||
s.set_phase(crate::crawler::status::Phase::CoverBackfill { index, total })
|
||||
.await;
|
||||
}
|
||||
// Metadata-only TargetSource: skip chapter-list parsing so a
|
||||
// missing-cover refetch doesn't soft-drop chapters on a partial
|
||||
// render. Cover URL alone is what we need.
|
||||
@@ -648,8 +664,8 @@ pub async fn backfill_missing_covers(
|
||||
title: String::new(),
|
||||
url: entry.source_url.clone(),
|
||||
};
|
||||
let cover_url = match source.fetch_manga(&ctx, &r).await {
|
||||
Ok(manga) => manga.cover_url,
|
||||
let manga = match source.fetch_manga(&ctx, &r).await {
|
||||
Ok(manga) => manga,
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
manga_id = %entry.manga_id,
|
||||
@@ -661,7 +677,7 @@ pub async fn backfill_missing_covers(
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let Some(cover_url) = cover_url else {
|
||||
let Some(cover_url) = manga.cover_url.clone() else {
|
||||
tracing::warn!(
|
||||
manga_id = %entry.manga_id,
|
||||
url = %entry.source_url,
|
||||
@@ -670,7 +686,14 @@ pub async fn backfill_missing_covers(
|
||||
stats.failed += 1;
|
||||
continue;
|
||||
};
|
||||
match download_and_store_cover(
|
||||
if let Some(s) = status {
|
||||
s.set_current_cover(Some(crate::crawler::status::CoverTarget {
|
||||
manga_id: entry.manga_id,
|
||||
manga_title: manga.title.clone(),
|
||||
}))
|
||||
.await;
|
||||
}
|
||||
let cover_result = download_and_store_cover(
|
||||
db,
|
||||
storage,
|
||||
http,
|
||||
@@ -681,8 +704,11 @@ pub async fn backfill_missing_covers(
|
||||
allowlist,
|
||||
max_image_bytes,
|
||||
)
|
||||
.await
|
||||
{
|
||||
.await;
|
||||
if let Some(s) = status {
|
||||
s.set_current_cover(None).await;
|
||||
}
|
||||
match cover_result {
|
||||
Ok(()) => stats.fetched += 1,
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
|
||||
Reference in New Issue
Block a user