Extends the live dashboard so an operator can see exactly what's being fetched, in realtime: - Chapters being crawled now are tracked in the status as `active_chapters` (manga title · ch.N) with a live page counter that climbs per stored page (set_chapter_pages, pushed via the existing watch→SSE). The dispatcher registers each via an RAII ChapterGuard (sync Mutex) that removes the entry on completion, panic, or timeout-drop — replacing the old per-worker slot model. - Covers: status now carries the cover being fetched now (`current_cover`, set around download_and_store_cover in both the metadata pass and backfill) and a `covers_queued` backlog count; CoverBackfill phase gains index/total. - Two paginated backlog endpoints (fetched on demand, auto-refreshed when the live counts change): GET /admin/crawler/active-jobs (which chapters of which mangas are queued/running) and GET /admin/crawler/covers (mangas missing a cover). repo: list_active_jobs, list_missing_cover_mangas, count_missing_covers. - dispatch_target now also returns manga title + chapter number. Frontend: the crawler page replaces the Workers table with an Active-chapters table (live page bars), adds a current-cover line + covers-queued figure, and two backlog sections (Queued chapters / Queued covers) with search + Pager, auto-refetched via $effect on the live counts. Tests: status guard/page + cover unit tests; repo list/count tests; endpoint tests; frontend api tests. Version 0.53.1 -> 0.54.0. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
163 lines
5.6 KiB
Rust
163 lines
5.6 KiB
Rust
//! Integration tests for `repo::chapter` — focused on
|
|
//! `dispatch_target`, the resolver the daemon's chapter dispatcher
|
|
//! uses to look up the URL it needs to hand to
|
|
//! `content::sync_chapter_content`.
|
|
//!
|
|
//! The query must:
|
|
//! 1. Skip `chapter_sources` rows where `dropped_at IS NOT NULL` —
|
|
//! otherwise a soft-dropped source URL is dispatched as if live and
|
|
//! burns the chapter's retry budget against guaranteed transients.
|
|
//! 2. Order the remaining rows by `last_seen_at DESC` so the freshest
|
|
//! surviving source is the one we'll fetch from.
|
|
//!
|
|
//! The fix lives in `backend/src/repo/chapter.rs:dispatch_target`. The
|
|
//! enqueue queries at `pipeline.rs:381` and `:435` already filter on
|
|
//! `cs.dropped_at IS NULL`; this brings the resolver into line.
|
|
|
|
use mangalord::crawler::source::{SourceChapterRef, SourceManga};
|
|
use mangalord::repo::{
|
|
chapter::dispatch_target,
|
|
crawler::{ensure_source, sync_manga_chapters, upsert_manga_from_source},
|
|
};
|
|
use sqlx::PgPool;
|
|
use uuid::Uuid;
|
|
|
|
fn sample_manga(key: &str, title: &str, hash: &str) -> SourceManga {
|
|
SourceManga {
|
|
source_manga_key: key.to_string(),
|
|
title: title.to_string(),
|
|
alternative_titles: vec![],
|
|
authors: vec![],
|
|
genres: vec![],
|
|
tags: vec![],
|
|
status: None,
|
|
summary: None,
|
|
cover_url: None,
|
|
chapters: vec![],
|
|
metadata_hash: hash.to_string(),
|
|
}
|
|
}
|
|
|
|
/// Seed a manga with one chapter, plus a second `chapter_sources` row
|
|
/// pointing at the same chapter with a *newer* `last_seen_at` so the
|
|
/// `ORDER BY cs.last_seen_at DESC` branch of the fixed query can
|
|
/// distinguish "freshest live source" from "any live source."
|
|
async fn seed_chapter_with_two_live_sources(pool: &PgPool) -> (Uuid, String, String) {
|
|
// Two distinct sources both pointing at the same chapter is the
|
|
// realistic shape of the multi-source state — each source row is
|
|
// keyed (source_id, chapter_id) after migration 0017.
|
|
ensure_source(pool, "target", "T", "https://x.example")
|
|
.await
|
|
.unwrap();
|
|
ensure_source(pool, "mirror", "Mirror", "https://m.example")
|
|
.await
|
|
.unwrap();
|
|
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
|
let up = upsert_manga_from_source(pool, "target", "https://x.example/foo", &m)
|
|
.await
|
|
.unwrap();
|
|
let initial = vec![SourceChapterRef {
|
|
source_chapter_key: "1".into(),
|
|
number: 1,
|
|
title: Some("Ch.1".into()),
|
|
url: "https://x.example/foo/1/old".into(),
|
|
}];
|
|
sync_manga_chapters(pool, "target", up.manga_id, &initial)
|
|
.await
|
|
.unwrap();
|
|
|
|
let (chapter_id,): (Uuid,) = sqlx::query_as(
|
|
"SELECT c.id FROM chapters c \
|
|
JOIN chapter_sources cs ON cs.chapter_id = c.id \
|
|
WHERE cs.source_chapter_key = '1' AND cs.source_id = 'target'",
|
|
)
|
|
.fetch_one(pool)
|
|
.await
|
|
.unwrap();
|
|
|
|
let old_url = "https://x.example/foo/1/old".to_string();
|
|
let new_url = "https://m.example/foo/1/mirror".to_string();
|
|
// Backdate the existing (old/target) source row and add a fresher
|
|
// row from the mirror source. The fix uses `last_seen_at DESC` to
|
|
// break the tie deterministically.
|
|
sqlx::query(
|
|
"UPDATE chapter_sources \
|
|
SET last_seen_at = NOW() - INTERVAL '2 days' \
|
|
WHERE chapter_id = $1 AND source_id = 'target'",
|
|
)
|
|
.bind(chapter_id)
|
|
.execute(pool)
|
|
.await
|
|
.unwrap();
|
|
sqlx::query(
|
|
"INSERT INTO chapter_sources \
|
|
(source_id, chapter_id, source_chapter_key, source_url, last_seen_at) \
|
|
VALUES ('mirror', $1, '1', $2, NOW())",
|
|
)
|
|
.bind(chapter_id)
|
|
.bind(&new_url)
|
|
.execute(pool)
|
|
.await
|
|
.unwrap();
|
|
|
|
(chapter_id, old_url, new_url)
|
|
}
|
|
|
|
#[sqlx::test(migrations = "./migrations")]
|
|
async fn dispatch_target_prefers_most_recent_live_source(pool: PgPool) {
|
|
let (chapter_id, _old_url, new_url) =
|
|
seed_chapter_with_two_live_sources(&pool).await;
|
|
|
|
let row = dispatch_target(&pool, chapter_id).await.unwrap();
|
|
let (_manga_id, source_url, _title, _number) =
|
|
row.expect("two live sources should yield a dispatch target");
|
|
assert_eq!(
|
|
source_url, new_url,
|
|
"ORDER BY last_seen_at DESC LIMIT 1 must return the freshest source"
|
|
);
|
|
}
|
|
|
|
#[sqlx::test(migrations = "./migrations")]
|
|
async fn dispatch_target_skips_dropped_sources(pool: PgPool) {
|
|
let (chapter_id, _old_url, new_url) =
|
|
seed_chapter_with_two_live_sources(&pool).await;
|
|
|
|
// Soft-drop the fresher row. The dispatcher must now return the
|
|
// *older* still-live row instead of the dropped one.
|
|
sqlx::query(
|
|
"UPDATE chapter_sources SET dropped_at = NOW() WHERE source_url = $1",
|
|
)
|
|
.bind(&new_url)
|
|
.execute(&pool)
|
|
.await
|
|
.unwrap();
|
|
|
|
let row = dispatch_target(&pool, chapter_id).await.unwrap();
|
|
let (_manga_id, source_url, _title, _number) =
|
|
row.expect("a single live source should still yield a dispatch target");
|
|
assert!(
|
|
source_url != new_url,
|
|
"dispatch_target must not return a dropped source"
|
|
);
|
|
}
|
|
|
|
#[sqlx::test(migrations = "./migrations")]
|
|
async fn dispatch_target_returns_none_when_only_dropped_sources_remain(
|
|
pool: PgPool,
|
|
) {
|
|
let (chapter_id, _old_url, _new_url) =
|
|
seed_chapter_with_two_live_sources(&pool).await;
|
|
|
|
sqlx::query("UPDATE chapter_sources SET dropped_at = NOW() WHERE chapter_id = $1")
|
|
.bind(chapter_id)
|
|
.execute(&pool)
|
|
.await
|
|
.unwrap();
|
|
|
|
let row = dispatch_target(&pool, chapter_id).await.unwrap();
|
|
assert!(
|
|
row.is_none(),
|
|
"every source is dropped — dispatch_target must return None"
|
|
);
|
|
}
|