feat: cover retry backfill + admin force-resync for manga & chapter (0.50.0)

Adds a per-tick cover-backfill pass to the crawler daemon so mangas whose
cover download failed on first attempt get retried — the metadata pass's
early-stop optimisation otherwise prevents the walk from revisiting them.

Adds admin-only POST /admin/mangas/:id/resync and POST /admin/chapters/:id/resync
that refetch metadata + cover (or chapter content with force_refetch) from the
crawler source synchronously and return the refreshed row. Surfaced in the
UI as "Force resync" buttons on the manga detail and reader pages,
admin-only via session.user.is_admin.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-06-01 22:00:09 +02:00
parent 5c22dfdb41
commit c134bdbbde
19 changed files with 1505 additions and 17 deletions

View File

@@ -13,7 +13,7 @@ use crate::crawler::jobs::{self, EnqueueResult, JobPayload};
use crate::crawler::rate_limit::HostRateLimiters;
use crate::crawler::safety::{fetch_bytes_capped, looks_like_image, DownloadAllowlist};
use crate::crawler::source::target::TargetSource;
use crate::crawler::source::{FetchContext, Source};
use crate::crawler::source::{FetchContext, Source, SourceMangaRef};
use crate::repo;
use crate::repo::crawler::UpsertStatus;
use crate::storage::Storage;
@@ -523,12 +523,133 @@ pub struct EnqueueSummary {
pub failed: usize,
}
#[derive(Debug, Default, Clone, Copy)]
pub struct CoverBackfillStats {
pub considered: usize,
pub fetched: usize,
pub failed: usize,
}
/// Default per-tick cap for [`backfill_missing_covers`]. The metadata pass
/// already retries covers when its walk reaches the affected manga; this
/// backfill exists to catch the residual case where the early-stop
/// optimisation prevents the walk from reaching mangas whose cover failed
/// on first attempt. A small cap is enough because the backlog only grows
/// from sporadic download failures, not from systematic misses.
pub const COVER_BACKFILL_DEFAULT_MAX: usize = 10;
/// Re-attempt cover downloads for mangas where `cover_image_path IS NULL`
/// but a live `manga_sources` row exists. Refetches the source detail
/// page (which is where the cover URL lives) and downloads the cover.
///
/// Bounded by `max_mangas` per call so a steady stream of failing covers
/// — e.g. a CDN host that's persistently 502 — can't monopolise a cron
/// tick. Orders by `manga_sources.last_seen_at DESC` so the freshest
/// missing-cover mangas are addressed first.
///
/// Failures are logged and counted, not raised: a single bad cover URL
/// must not stall every other backfill behind it.
#[allow(clippy::too_many_arguments)]
pub async fn backfill_missing_covers(
browser_manager: &BrowserManager,
db: &PgPool,
storage: &dyn Storage,
http: &reqwest::Client,
rate: &HostRateLimiters,
max_mangas: usize,
allowlist: &DownloadAllowlist,
max_image_bytes: usize,
tor: Option<&crate::crawler::tor::TorController>,
) -> anyhow::Result<CoverBackfillStats> {
let mut stats = CoverBackfillStats::default();
if max_mangas == 0 {
return Ok(stats);
}
let entries = repo::crawler::list_missing_covers(db, max_mangas as i64)
.await
.context("list_missing_covers")?;
if entries.is_empty() {
return Ok(stats);
}
let lease = browser_manager
.acquire()
.await
.context("acquire browser lease for cover backfill")?;
let browser_ref: &chromiumoxide::Browser = &lease;
let ctx = FetchContext { browser: browser_ref, rate, tor };
for entry in entries {
stats.considered += 1;
// Metadata-only TargetSource: skip chapter-list parsing so a
// missing-cover refetch doesn't soft-drop chapters on a partial
// render. Cover URL alone is what we need.
let source = TargetSource::new(entry.source_url.clone()).without_chapter_parsing();
let r = SourceMangaRef {
source_manga_key: entry.source_manga_key.clone(),
title: String::new(),
url: entry.source_url.clone(),
};
let cover_url = match source.fetch_manga(&ctx, &r).await {
Ok(manga) => manga.cover_url,
Err(e) => {
tracing::warn!(
manga_id = %entry.manga_id,
url = %entry.source_url,
error = ?e,
"cover backfill: fetch_manga failed"
);
stats.failed += 1;
continue;
}
};
let Some(cover_url) = cover_url else {
tracing::warn!(
manga_id = %entry.manga_id,
url = %entry.source_url,
"cover backfill: source returned no cover_url"
);
stats.failed += 1;
continue;
};
match download_and_store_cover(
db,
storage,
http,
rate,
&entry.source_url,
entry.manga_id,
&cover_url,
allowlist,
max_image_bytes,
)
.await
{
Ok(()) => stats.fetched += 1,
Err(e) => {
tracing::warn!(
manga_id = %entry.manga_id,
url = %entry.source_url,
error = ?e,
"cover backfill: download failed"
);
stats.failed += 1;
}
}
}
drop(lease);
Ok(stats)
}
/// Download a cover image and persist its storage path. Local to the
/// pipeline because the CLI still calls it from its inline chapter-content
/// loop; once the worker pool fully replaces that path we can fold this
/// into `pipeline` proper.
#[allow(clippy::too_many_arguments)]
async fn download_and_store_cover(
pub(crate) async fn download_and_store_cover(
db: &PgPool,
storage: &dyn Storage,
http: &reqwest::Client,