Files
Mangalord/backend/src/repo/chapter.rs
MechaCat02 655ea42731 fix(crawler): scope dispatch_target to live sources, newest first (0.36.4)
The chapter dispatcher's URL resolver had no dropped_at filter and no
ORDER BY — a chapter whose only chapter_sources row had been soft-
dropped was still dispatched against the stale URL, eating retry
budget on guaranteed transients. With multiple live sources the LIMIT
1 winner was nondeterministic.

Add `AND cs.dropped_at IS NULL` and `ORDER BY cs.last_seen_at DESC`
to dispatch_target, bringing it in lockstep with the enqueue queries
in pipeline.rs that already filter on dropped_at. Returns None when
all sources are dropped — callers in daemon.rs already treat None
as "ack the job, skip the work."

Tests in tests/repo_chapter.rs cover the three branches (freshest
live wins, dropped sources skipped, all-dropped returns None).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-30 20:03:45 +02:00

167 lines
5.1 KiB
Rust

//! Chapter persistence.
use sqlx::{PgExecutor, PgPool};
use uuid::Uuid;
use crate::domain::Chapter;
use crate::error::AppResult;
pub async fn list_for_manga(
pool: &PgPool,
manga_id: Uuid,
limit: i64,
offset: i64,
) -> AppResult<Vec<Chapter>> {
// Secondary sort by created_at gives duplicate-numbered chapters
// (multiple uploaders/translations of the same number) a stable
// order in lists and prev/next reader navigation.
let rows = sqlx::query_as::<_, Chapter>(
r#"
SELECT id, manga_id, number, title, page_count, created_at
FROM chapters
WHERE manga_id = $1
ORDER BY number ASC, created_at ASC
LIMIT $2 OFFSET $3
"#,
)
.bind(manga_id)
.bind(limit)
.bind(offset)
.fetch_all(pool)
.await?;
Ok(rows)
}
/// Look up a chapter by its UUID, scoped to its manga so a UUID guessed
/// from a different manga's URL doesn't accidentally resolve.
pub async fn find_by_id_in_manga(
pool: &PgPool,
manga_id: Uuid,
chapter_id: Uuid,
) -> AppResult<Option<Chapter>> {
let row = sqlx::query_as::<_, Chapter>(
r#"
SELECT id, manga_id, number, title, page_count, created_at
FROM chapters
WHERE manga_id = $1 AND id = $2
"#,
)
.bind(manga_id)
.bind(chapter_id)
.fetch_optional(pool)
.await?;
Ok(row)
}
/// Accepts any `PgExecutor` so the upload handler can run this inside a
/// transaction with the per-page inserts.
///
/// `uploaded_by` records who uploaded the chapter and feeds the
/// per-user upload history. `None` means "historical / API token with
/// no associated user" — kept nullable to support that case.
///
/// Chapter identity is the row UUID; the same (manga_id, number)
/// combination can repeat (multiple translations, re-uploads). The
/// 0013 migration dropped the (manga_id, number) UNIQUE, so duplicate
/// inserts succeed by design. If a future migration re-adds any
/// uniqueness, surface a 409 by adding a unique-violation arm here.
pub async fn create<'e, E: PgExecutor<'e>>(
executor: E,
manga_id: Uuid,
number: i32,
title: Option<&str>,
uploaded_by: Option<Uuid>,
) -> AppResult<Chapter> {
let row = sqlx::query_as::<_, Chapter>(
r#"
INSERT INTO chapters (manga_id, number, title, uploaded_by)
VALUES ($1, $2, $3, $4)
RETURNING id, manga_id, number, title, page_count, created_at
"#,
)
.bind(manga_id)
.bind(number)
.bind(title)
.bind(uploaded_by)
.fetch_one(executor)
.await?;
Ok(row)
}
/// Cross-link guard for `POST /bookmarks`: the bookmarks FK accepts
/// any valid chapter id, but a chapter must belong to the bookmark's
/// manga or the bookmark would dangle on a foreign manga. Handlers
/// call this before the insert and surface `NotFound` when it
/// returns `false`.
pub async fn belongs_to_manga(
pool: &PgPool,
chapter_id: Uuid,
manga_id: Uuid,
) -> AppResult<bool> {
let (exists,): (bool,) = sqlx::query_as(
"SELECT EXISTS(SELECT 1 FROM chapters WHERE id = $1 AND manga_id = $2)",
)
.bind(chapter_id)
.bind(manga_id)
.fetch_one(pool)
.await?;
Ok(exists)
}
/// Read just the page_count for a chapter. Used by the crawler
/// daemon's consumer-side dedup safety net so it can ack-done a job
/// whose chapter has already been fetched by a racing worker.
pub async fn page_count(pool: &PgPool, id: Uuid) -> sqlx::Result<Option<i32>> {
sqlx::query_scalar("SELECT page_count FROM chapters WHERE id = $1")
.bind(id)
.fetch_optional(pool)
.await
}
/// Look up the manga_id + most recent live source_url for a chapter.
/// Used by the daemon's chapter dispatcher to resolve the URL it needs
/// to hand to `content::sync_chapter_content`.
///
/// Skips soft-dropped sources (`cs.dropped_at IS NOT NULL`) and breaks
/// ties between multiple live sources by `last_seen_at DESC`, so the
/// freshest still-attached URL wins. Returns `None` when the chapter
/// is gone or all its source rows are dropped — callers in the
/// dispatcher treat `None` as "ack the job, skip the work."
///
/// The enqueue queries (`pipeline::enqueue_bookmarked_pending` and
/// `enqueue_pending_for_manga`) apply the same `dropped_at IS NULL`
/// filter — this resolver stays in lockstep so a chapter that was
/// dropped between enqueue and lease isn't dispatched against a stale
/// URL.
pub async fn dispatch_target(
pool: &PgPool,
chapter_id: Uuid,
) -> sqlx::Result<Option<(Uuid, String)>> {
sqlx::query_as(
"SELECT c.manga_id, cs.source_url \
FROM chapters c \
JOIN chapter_sources cs ON cs.chapter_id = c.id \
WHERE c.id = $1 \
AND cs.dropped_at IS NULL \
ORDER BY cs.last_seen_at DESC \
LIMIT 1",
)
.bind(chapter_id)
.fetch_optional(pool)
.await
}
pub async fn set_page_count<'e, E: PgExecutor<'e>>(
executor: E,
id: Uuid,
page_count: i32,
) -> AppResult<()> {
sqlx::query("UPDATE chapters SET page_count = $1 WHERE id = $2")
.bind(page_count)
.bind(id)
.execute(executor)
.await?;
Ok(())
}