Compare commits
18 Commits
bugfix/cra
...
feat/cover
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ee070d8878 | ||
|
|
5c22dfdb41 | ||
|
|
e50fc093c3 | ||
|
|
72756cfef2 | ||
|
|
4e20350645 | ||
|
|
713ca139c4 | ||
|
|
e3cff9d874 | ||
|
|
d47e832613 | ||
|
|
c30c7a546f | ||
|
|
a0db7beb81 | ||
|
|
ecbbebafc4 | ||
|
|
8c6378b877 | ||
|
|
8557e432a2 | ||
|
|
d6d84dedcb | ||
|
|
d37b94871e | ||
| 8e39fadd21 | |||
| 3b3d13a0f6 | |||
| 0f90af80cb |
41
.env.example
41
.env.example
@@ -74,6 +74,10 @@ CRAWLER_DOWNLOAD_ALLOWLIST=
|
||||
CRAWLER_ALLOW_ANY_HOST=false
|
||||
# Hard cap on a single image body. Default 32 MiB.
|
||||
CRAWLER_MAX_IMAGE_BYTES=33554432
|
||||
# Max manga detail fetches per metadata pass (both the in-process daemon
|
||||
# and the `bin/crawler` CLI). 0 means no cap — let the source walker run
|
||||
# to completion. Useful for capped test runs against a new source.
|
||||
CRAWLER_LIMIT=0
|
||||
# Path to a system Chromium binary. When set, the crawler skips the
|
||||
# bundled-fetcher download. Required on platforms without a usable
|
||||
# upstream Chromium build (notably Linux_arm64 / Raspberry Pi). On
|
||||
@@ -83,6 +87,43 @@ CRAWLER_MAX_IMAGE_BYTES=33554432
|
||||
# the image actually contains the binary.
|
||||
CRAWLER_CHROMIUM_BINARY=
|
||||
|
||||
# ----- Crawler TOR proxy + recircuit -----
|
||||
# The compose stack ships a `tor` service (dockurr/tor) and defaults
|
||||
# CRAWLER_PROXY to it, so by default all crawler traffic exits via the
|
||||
# TOR network. To opt out, set CRAWLER_PROXY= (empty) AND
|
||||
# CRAWLER_TOR_CONTROL_URL= (empty) below — the tor service can stay
|
||||
# running, it just won't be used.
|
||||
#
|
||||
# Going through TOR adds latency to every fetch; image downloads in
|
||||
# particular slow noticeably. The win is on sites that rate-limit or
|
||||
# fingerprint by exit IP — NEWNYM recirculation makes a fresh exit
|
||||
# cheap to reach for.
|
||||
#
|
||||
# CRAWLER_PROXY: SOCKS5(h) URL. Use `socks5h://` (not `socks5://`) so
|
||||
# DNS resolution also goes through TOR, avoiding leaks via the host's
|
||||
# resolver. Leave unset to talk to the upstream directly.
|
||||
CRAWLER_PROXY=socks5h://tor:9050
|
||||
# Control-port URL for SIGNAL NEWNYM ("get a fresh circuit"). Triggered
|
||||
# automatically on bad pages (broken-page body, missing #logo) and on
|
||||
# the Unauthenticated session probe outcome. Leave unset to disable
|
||||
# the recircuit feature (the SOCKS proxy still works).
|
||||
CRAWLER_TOR_CONTROL_URL=tcp://tor:9051
|
||||
# Max NEWNYM-and-retry cycles per recircuit-eligible failure. Default 3.
|
||||
CRAWLER_TOR_RECIRCUIT_MAX_ATTEMPTS=3
|
||||
|
||||
# ----- TOR control-port password -----
|
||||
# Shared between the bundled dockurr/tor service (which hashes it into
|
||||
# its HashedControlPassword) and the backend's
|
||||
# CRAWLER_TOR_CONTROL_PASSWORD. REQUIRED — docker-compose.yml fails
|
||||
# fast if absent. Generate a strong random string; rotate by setting
|
||||
# a new value and restarting both `tor` and `backend`.
|
||||
#
|
||||
# Operators running their own non-dockurr tor daemon with cookie-file
|
||||
# auth can ignore this var and instead set
|
||||
# CRAWLER_TOR_CONTROL_COOKIE_PATH on the backend — the TorController
|
||||
# prefers cookie when both are present.
|
||||
TOR_CONTROL_PASSWORD=change-me-to-a-strong-random-string
|
||||
|
||||
# ----- Frontend -----
|
||||
# The frontend container runs SvelteKit's Node adapter on :3000 and
|
||||
# proxies /api/* to BACKEND_URL via src/hooks.server.ts. In compose the
|
||||
|
||||
@@ -10,8 +10,6 @@ on:
|
||||
jobs:
|
||||
test-backend:
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: rust:1-slim
|
||||
services:
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
@@ -28,10 +26,18 @@ jobs:
|
||||
DATABASE_URL: postgres://mangalord:mangalord@postgres:5432/mangalord
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install build deps
|
||||
# ubuntu-latest has node (so JS actions like checkout/cache run) but no
|
||||
# Rust. We intentionally avoid `container: rust:1-slim` because act_runner
|
||||
# runs JS actions with node *inside* the job container, and the slim Rust
|
||||
# image ships no node (checkout would fail with exit 127).
|
||||
- name: Install Rust + build deps
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends pkg-config libssl-dev ca-certificates
|
||||
set -eu
|
||||
SUDO=""; [ "$(id -u)" = "0" ] || SUDO="sudo"
|
||||
$SUDO apt-get update
|
||||
$SUDO apt-get install -y --no-install-recommends pkg-config libssl-dev ca-certificates curl
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --default-toolchain stable
|
||||
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
|
||||
- name: Cache cargo registry and target
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
@@ -66,9 +72,17 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-backend, test-frontend]
|
||||
# PRs only run the test jobs; build + deploy are reserved for
|
||||
# post-merge pushes to main. Without this gate every PR would push
|
||||
# a tagged image to the registry and SSH-deploy to prod.
|
||||
# post-merge pushes to main.
|
||||
if: github.event_name != 'pull_request'
|
||||
# Build on the host docker daemon directly (docker-outside-of-docker):
|
||||
# the runner shares the deploy host's daemon, so a plain `docker build`
|
||||
# reuses the host's layer cache and avoids buildx's docker-container
|
||||
# driver + the gha cache exporter — neither works against this single-host
|
||||
# act_runner, and there is no in-job daemon socket unless we mount it.
|
||||
container:
|
||||
image: docker.gitea.com/runner-images:ubuntu-latest
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
outputs:
|
||||
image_tag: ${{ steps.meta.outputs.image_tag }}
|
||||
version: ${{ steps.meta.outputs.version }}
|
||||
@@ -87,48 +101,32 @@ jobs:
|
||||
echo "image_tag=${GITHUB_SHA}" >> "$GITHUB_OUTPUT"
|
||||
echo "version=${version}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: docker login
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ secrets.REGISTRY_URL }}
|
||||
username: ${{ secrets.REGISTRY_USERNAME }}
|
||||
password: ${{ secrets.REGISTRY_PASSWORD }}
|
||||
|
||||
- name: Build & push backend
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./backend
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.REGISTRY_URL }}/mangalord-backend:latest
|
||||
${{ secrets.REGISTRY_URL }}/mangalord-backend:${{ steps.meta.outputs.image_tag }}
|
||||
${{ secrets.REGISTRY_URL }}/mangalord-backend:${{ steps.meta.outputs.version }}
|
||||
cache-from: type=gha,scope=backend
|
||||
cache-to: type=gha,mode=max,scope=backend
|
||||
|
||||
- name: Build & push frontend
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./frontend
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.REGISTRY_URL }}/mangalord-frontend:latest
|
||||
${{ secrets.REGISTRY_URL }}/mangalord-frontend:${{ steps.meta.outputs.image_tag }}
|
||||
${{ secrets.REGISTRY_URL }}/mangalord-frontend:${{ steps.meta.outputs.version }}
|
||||
cache-from: type=gha,scope=frontend
|
||||
cache-to: type=gha,mode=max,scope=frontend
|
||||
- name: Build & push backend + frontend
|
||||
env:
|
||||
REGISTRY_URL: ${{ secrets.REGISTRY_URL }}
|
||||
REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }}
|
||||
REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }}
|
||||
IMAGE_TAG: ${{ steps.meta.outputs.image_tag }}
|
||||
VERSION: ${{ steps.meta.outputs.version }}
|
||||
run: |
|
||||
set -eu
|
||||
echo "$REGISTRY_PASSWORD" | docker login "$REGISTRY_URL" -u "$REGISTRY_USERNAME" --password-stdin
|
||||
for svc in backend frontend; do
|
||||
img="$REGISTRY_URL/mangalord-$svc"
|
||||
docker build -t "$img:$IMAGE_TAG" -t "$img:latest" -t "$img:$VERSION" "./$svc"
|
||||
for tag in "$IMAGE_TAG" latest "$VERSION"; do docker push "$img:$tag"; done
|
||||
done
|
||||
docker logout "$REGISTRY_URL"
|
||||
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build-and-push
|
||||
if: github.event_name != 'pull_request'
|
||||
# Single-host deploy: the runner lives on the same box as the stack, so we
|
||||
# drive the host docker daemon directly (act_runner shares its socket via
|
||||
# `docker_host: "-"`) instead of SSHing out. The compose dir is bind-mounted
|
||||
# at its REAL host path so compose's relative bind-mounts (./mangalord/...,
|
||||
# ./Caddyfile) resolve; this requires `/mnt/ssd/docker-data` in the runner's
|
||||
# drive the host docker daemon directly (the job mounts the host docker
|
||||
# socket) instead of SSHing out. The compose dir is bind-mounted at its
|
||||
# REAL host path so compose's relative bind-mounts (./mangalord/...,
|
||||
# ./Caddyfile) resolve; both paths must be in the runner's
|
||||
# container.valid_volumes. The central compose references the images as
|
||||
# registry.mc02.dev/mangalord-*:${MANGALORD_TAG:-latest}, so we only pull
|
||||
# and recreate the two mangalord services at the freshly built SHA.
|
||||
@@ -136,6 +134,7 @@ jobs:
|
||||
image: docker:cli
|
||||
volumes:
|
||||
- /mnt/ssd/docker-data:/mnt/ssd/docker-data
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
steps:
|
||||
- name: Deploy to the local stack
|
||||
working-directory: /mnt/ssd/docker-data
|
||||
|
||||
2
backend/Cargo.lock
generated
2
backend/Cargo.lock
generated
@@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
||||
|
||||
[[package]]
|
||||
name = "mangalord"
|
||||
version = "0.45.0"
|
||||
version = "0.50.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"argon2",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "mangalord"
|
||||
version = "0.45.0"
|
||||
version = "0.50.0"
|
||||
edition = "2021"
|
||||
default-run = "mangalord"
|
||||
|
||||
@@ -57,3 +57,13 @@ http-body-util = "0.1"
|
||||
mime = "0.3"
|
||||
futures-util = "0.3"
|
||||
tokio = { version = "1", features = ["test-util"] }
|
||||
|
||||
# Trim debug builds: keep line numbers in panics / backtraces but drop the
|
||||
# full DWARF info (variable-level inspection in gdb/lldb). With a sqlx +
|
||||
# axum + tokio dep tree the default ("full") leaves backend/target on the
|
||||
# order of tens of GiB; this typically cuts ~50–70% off that.
|
||||
[profile.dev]
|
||||
debug = "line-tables-only"
|
||||
|
||||
[profile.test]
|
||||
debug = "line-tables-only"
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
//! `crate::auth::extractor::RequireAdmin`).
|
||||
|
||||
pub mod mangas;
|
||||
pub mod resync;
|
||||
pub mod system;
|
||||
pub mod users;
|
||||
|
||||
@@ -16,5 +17,6 @@ pub fn routes() -> Router<AppState> {
|
||||
Router::new()
|
||||
.merge(users::routes())
|
||||
.merge(mangas::routes())
|
||||
.merge(resync::routes())
|
||||
.merge(system::routes())
|
||||
}
|
||||
|
||||
176
backend/src/api/admin/resync.rs
Normal file
176
backend/src/api/admin/resync.rs
Normal file
@@ -0,0 +1,176 @@
|
||||
//! Admin-triggered force resync of a single manga's metadata + cover,
|
||||
//! or a single chapter's content.
|
||||
//!
|
||||
//! Both endpoints are admin-only (`RequireAdmin`, cookie-only) and run
|
||||
//! synchronously with the request — the response carries the refreshed
|
||||
//! resource so the UI can swap it in without a follow-up GET. The work
|
||||
//! itself is delegated to [`ResyncService`] (set on AppState by
|
||||
//! `app::build` when the crawler daemon is enabled); when the daemon
|
||||
//! is disabled, both handlers return 503.
|
||||
|
||||
use axum::extract::{Path, State};
|
||||
use axum::routing::post;
|
||||
use axum::{Json, Router};
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::app::AppState;
|
||||
use crate::auth::extractor::RequireAdmin;
|
||||
use crate::crawler::resync::{ChapterResyncOutcome, ResyncError};
|
||||
use crate::domain::manga::MangaDetail;
|
||||
use crate::domain::Chapter;
|
||||
use crate::error::{AppError, AppResult};
|
||||
use crate::repo;
|
||||
use crate::repo::crawler::UpsertStatus;
|
||||
|
||||
pub fn routes() -> Router<AppState> {
|
||||
Router::new()
|
||||
.route("/admin/mangas/:id/resync", post(resync_manga))
|
||||
.route("/admin/chapters/:id/resync", post(resync_chapter))
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct MangaResyncResponse {
|
||||
pub manga: MangaDetail,
|
||||
/// `"new" | "updated" | "unchanged"` — mirrors [`UpsertStatus`].
|
||||
pub metadata_status: &'static str,
|
||||
pub cover_fetched: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct ChapterResyncResponse {
|
||||
pub chapter: Chapter,
|
||||
/// `"fetched" | "skipped"` — whether new pages landed or the
|
||||
/// service short-circuited (e.g. chapter already had pages and the
|
||||
/// session was lost so force was downgraded).
|
||||
pub outcome: &'static str,
|
||||
/// Page count when `outcome == "fetched"`. `None` for `skipped`.
|
||||
pub pages: Option<usize>,
|
||||
}
|
||||
|
||||
async fn resync_manga(
|
||||
State(state): State<AppState>,
|
||||
admin: RequireAdmin,
|
||||
Path(manga_id): Path<Uuid>,
|
||||
) -> AppResult<Json<MangaResyncResponse>> {
|
||||
if !repo::manga::exists(&state.db, manga_id).await? {
|
||||
return Err(AppError::NotFound);
|
||||
}
|
||||
let resync = state
|
||||
.resync
|
||||
.as_ref()
|
||||
.ok_or_else(|| AppError::ServiceUnavailable(
|
||||
"crawler daemon is disabled; force resync unavailable".into(),
|
||||
))?;
|
||||
|
||||
let outcome = resync.resync_manga(manga_id).await.map_err(map_resync_err)?;
|
||||
|
||||
// Audit the action with the actor + the resync outcome so an
|
||||
// operator-of-operators can answer "who refetched this manga, and
|
||||
// did the cover land?" from the log alone.
|
||||
repo::admin_audit::insert(
|
||||
&state.db,
|
||||
admin.0.id,
|
||||
"manga_resync",
|
||||
"manga",
|
||||
Some(manga_id),
|
||||
json!({
|
||||
"metadata_status": status_str(outcome.metadata_status),
|
||||
"cover_fetched": outcome.cover_fetched,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let manga = repo::manga::get_detail(&state.db, manga_id).await?;
|
||||
Ok(Json(MangaResyncResponse {
|
||||
manga,
|
||||
metadata_status: status_str(outcome.metadata_status),
|
||||
cover_fetched: outcome.cover_fetched,
|
||||
}))
|
||||
}
|
||||
|
||||
async fn resync_chapter(
|
||||
State(state): State<AppState>,
|
||||
admin: RequireAdmin,
|
||||
Path(chapter_id): Path<Uuid>,
|
||||
) -> AppResult<Json<ChapterResyncResponse>> {
|
||||
let resync = state
|
||||
.resync
|
||||
.as_ref()
|
||||
.ok_or_else(|| AppError::ServiceUnavailable(
|
||||
"crawler daemon is disabled; force resync unavailable".into(),
|
||||
))?;
|
||||
|
||||
// Look up the manga the chapter belongs to so we can return the
|
||||
// refreshed chapter row in the response and 404 for unknown ids.
|
||||
let manga_id: Option<Uuid> =
|
||||
sqlx::query_scalar("SELECT manga_id FROM chapters WHERE id = $1")
|
||||
.bind(chapter_id)
|
||||
.fetch_optional(&state.db)
|
||||
.await?;
|
||||
let Some(manga_id) = manga_id else {
|
||||
return Err(AppError::NotFound);
|
||||
};
|
||||
|
||||
let outcome = resync
|
||||
.resync_chapter(chapter_id)
|
||||
.await
|
||||
.map_err(map_resync_err)?;
|
||||
|
||||
let (outcome_str, pages) = match &outcome {
|
||||
ChapterResyncOutcome::Fetched { pages, .. } => ("fetched", Some(*pages)),
|
||||
ChapterResyncOutcome::Skipped { .. } => ("skipped", None),
|
||||
};
|
||||
|
||||
repo::admin_audit::insert(
|
||||
&state.db,
|
||||
admin.0.id,
|
||||
"chapter_resync",
|
||||
"chapter",
|
||||
Some(chapter_id),
|
||||
json!({
|
||||
"outcome": outcome_str,
|
||||
"pages": pages,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let chapter = repo::chapter::find_by_id_in_manga(&state.db, manga_id, chapter_id)
|
||||
.await?
|
||||
.ok_or(AppError::NotFound)?;
|
||||
Ok(Json(ChapterResyncResponse {
|
||||
chapter,
|
||||
outcome: outcome_str,
|
||||
pages,
|
||||
}))
|
||||
}
|
||||
|
||||
fn status_str(s: UpsertStatus) -> &'static str {
|
||||
match s {
|
||||
UpsertStatus::New => "new",
|
||||
UpsertStatus::Updated => "updated",
|
||||
UpsertStatus::Unchanged => "unchanged",
|
||||
}
|
||||
}
|
||||
|
||||
/// Map [`ResyncError`] (and the anyhow envelopes wrapping it) onto the
|
||||
/// right [`AppError`]. Anything else surfaces as a generic 500 via the
|
||||
/// `Other` arm — the operator sees the underlying anyhow chain in
|
||||
/// server logs, the client sees a clean envelope.
|
||||
fn map_resync_err(err: anyhow::Error) -> AppError {
|
||||
if let Some(rerr) = err.downcast_ref::<ResyncError>() {
|
||||
match rerr {
|
||||
ResyncError::NoMangaSource => AppError::ValidationFailed {
|
||||
message: "manga has no live crawler source — cannot resync".into(),
|
||||
details: json!({ "manga": "no_source" }),
|
||||
},
|
||||
ResyncError::NoChapterSource => AppError::ValidationFailed {
|
||||
message: "chapter has no live crawler source — cannot resync".into(),
|
||||
details: json!({ "chapter": "no_source" }),
|
||||
},
|
||||
}
|
||||
} else {
|
||||
AppError::Other(err)
|
||||
}
|
||||
}
|
||||
@@ -42,18 +42,22 @@ pub fn routes() -> Router<AppState> {
|
||||
.route("/auth/tokens/:id", delete(delete_token))
|
||||
}
|
||||
|
||||
/// Public, unauthenticated. Exposes anonymous-relevant auth policy
|
||||
/// (currently just whether self-registration is open) so the frontend
|
||||
/// can render its login / register affordances correctly without a
|
||||
/// probe request that would conflate "disabled" with "rate-limited".
|
||||
/// Public, unauthenticated. Exposes anonymous-relevant auth policy so
|
||||
/// the frontend can render its login / register affordances correctly
|
||||
/// without a probe request that would conflate "disabled" with
|
||||
/// "rate-limited". `self_register_enabled` is the *effective* value
|
||||
/// (`allow_self_register && !private_mode`), so a private-mode
|
||||
/// instance reports `false` even if the raw flag is on.
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct AuthConfigResponse {
|
||||
pub self_register_enabled: bool,
|
||||
pub private_mode: bool,
|
||||
}
|
||||
|
||||
async fn auth_config(State(state): State<AppState>) -> Json<AuthConfigResponse> {
|
||||
Json(AuthConfigResponse {
|
||||
self_register_enabled: state.auth.allow_self_register,
|
||||
self_register_enabled: state.auth.allow_self_register && !state.auth.private_mode,
|
||||
private_mode: state.auth.private_mode,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -103,7 +107,10 @@ async fn register(
|
||||
// disabled and enabled paths both consume a token, and disabled
|
||||
// returns 403 instead of running argon2.
|
||||
check_auth_rate_limit(&state, "register")?;
|
||||
if !state.auth.allow_self_register {
|
||||
// Private mode force-blocks self-registration regardless of
|
||||
// ALLOW_SELF_REGISTER — operators of locked-down instances mint
|
||||
// accounts via `POST /admin/users` instead.
|
||||
if !state.auth.allow_self_register || state.auth.private_mode {
|
||||
return Err(AppError::Forbidden);
|
||||
}
|
||||
let username = input.username.trim();
|
||||
|
||||
@@ -3,8 +3,10 @@ use std::sync::atomic::AtomicBool;
|
||||
|
||||
use anyhow::Context;
|
||||
use async_trait::async_trait;
|
||||
use axum::extract::DefaultBodyLimit;
|
||||
use axum::extract::{DefaultBodyLimit, FromRequestParts, Request, State};
|
||||
use axum::http::{HeaderName, HeaderValue, Method};
|
||||
use axum::middleware::{self, Next};
|
||||
use axum::response::Response;
|
||||
use axum::Router;
|
||||
use sqlx::postgres::PgPoolOptions;
|
||||
use sqlx::PgPool;
|
||||
@@ -12,7 +14,9 @@ use tokio_util::sync::CancellationToken;
|
||||
use tower_http::cors::{AllowOrigin, CorsLayer};
|
||||
use tower_http::trace::TraceLayer;
|
||||
|
||||
use crate::auth::extractor::CurrentUser;
|
||||
use crate::auth::rate_limit::AuthRateLimiter;
|
||||
use crate::error::AppError;
|
||||
use crate::config::{AuthConfig, Config, CrawlerConfig, UploadConfig};
|
||||
use crate::crawler::browser_manager::{self, BrowserManager};
|
||||
use crate::crawler::content::{self, SyncOutcome};
|
||||
@@ -20,6 +24,7 @@ use crate::crawler::daemon::{self, ChapterDispatcher, DaemonConfig, MetadataPass
|
||||
use crate::crawler::jobs::JobPayload;
|
||||
use crate::crawler::pipeline::{self, MetadataStats};
|
||||
use crate::crawler::rate_limit::HostRateLimiters;
|
||||
use crate::crawler::resync::{RealResyncService, ResyncService};
|
||||
use crate::crawler::safety::DownloadAllowlist;
|
||||
use crate::crawler::session;
|
||||
use crate::repo;
|
||||
@@ -35,6 +40,12 @@ pub struct AppState {
|
||||
/// One instance per AppState so tests stay isolated across the
|
||||
/// same process.
|
||||
pub auth_limiter: Arc<AuthRateLimiter>,
|
||||
/// Admin-triggered force resync. `None` when the crawler daemon
|
||||
/// is disabled (`CRAWLER_DAEMON=false`); admin handlers gate on
|
||||
/// `.is_some()` and return 503 otherwise. Set by [`build`] from the
|
||||
/// same wiring that builds the daemon's chapter dispatcher, so a
|
||||
/// force resync uses the daemon's BrowserManager + rate limiters.
|
||||
pub resync: Option<Arc<dyn ResyncService>>,
|
||||
}
|
||||
|
||||
/// Bundle returned by [`build`]. The router is what `axum::serve` consumes;
|
||||
@@ -69,11 +80,12 @@ pub async fn build(config: Config) -> anyhow::Result<AppHandle> {
|
||||
|
||||
let storage: Arc<dyn Storage> = Arc::new(LocalStorage::new(config.storage_dir.clone()));
|
||||
|
||||
let daemon = if config.crawler.daemon_enabled {
|
||||
Some(spawn_crawler_daemon(db.clone(), Arc::clone(&storage), &config.crawler).await?)
|
||||
let (daemon, resync) = if config.crawler.daemon_enabled {
|
||||
let spawned = spawn_crawler_daemon(db.clone(), Arc::clone(&storage), &config.crawler).await?;
|
||||
(Some(spawned.handle), Some(spawned.resync))
|
||||
} else {
|
||||
tracing::info!("crawler daemon disabled (CRAWLER_DAEMON=false)");
|
||||
None
|
||||
(None, None)
|
||||
};
|
||||
|
||||
let auth_limiter = Arc::new(AuthRateLimiter::new(config.auth.rate_limit));
|
||||
@@ -83,16 +95,26 @@ pub async fn build(config: Config) -> anyhow::Result<AppHandle> {
|
||||
auth: config.auth.clone(),
|
||||
upload: config.upload.clone(),
|
||||
auth_limiter,
|
||||
resync,
|
||||
};
|
||||
let router = router(state).layer(cors_layer(&config.cors_allowed_origins));
|
||||
Ok(AppHandle { router, daemon })
|
||||
}
|
||||
|
||||
/// Bundle returned by [`spawn_crawler_daemon`]. The handle owns the
|
||||
/// daemon's tasks; `resync` is the operator-trigger service shared with
|
||||
/// `AppState` so admin endpoints can call into the same browser /
|
||||
/// rate-limit machinery.
|
||||
struct SpawnedDaemon {
|
||||
handle: daemon::DaemonHandle,
|
||||
resync: Arc<dyn ResyncService>,
|
||||
}
|
||||
|
||||
async fn spawn_crawler_daemon(
|
||||
db: PgPool,
|
||||
storage: Arc<dyn Storage>,
|
||||
cfg: &CrawlerConfig,
|
||||
) -> anyhow::Result<daemon::DaemonHandle> {
|
||||
) -> anyhow::Result<SpawnedDaemon> {
|
||||
// Reqwest client with cookie jar pre-seeded so CDN image fetches
|
||||
// include PHPSESSID. Same shape as bin/crawler.rs main().
|
||||
let cookie_jar = Arc::new(reqwest::cookie::Jar::default());
|
||||
@@ -123,29 +145,49 @@ async fn spawn_crawler_daemon(
|
||||
}
|
||||
let rate = Arc::new(rate);
|
||||
|
||||
let tor = crate::crawler::tor::TorController::from_parts(
|
||||
cfg.tor_control_url.as_deref(),
|
||||
cfg.tor_control_password.as_deref(),
|
||||
cfg.tor_control_cookie_path.as_deref(),
|
||||
)
|
||||
.context("build TorController from CRAWLER_TOR_CONTROL_* env")?
|
||||
.map(Arc::new);
|
||||
if let Some(t) = &tor {
|
||||
tracing::info!(?t, "TOR control configured; transient pages will trigger NEWNYM");
|
||||
}
|
||||
let tor_recircuit_max = cfg.tor_recircuit_max_attempts;
|
||||
|
||||
// Browser manager. on_launch re-injects PHPSESSID on every fresh
|
||||
// chromium spawn so an idle teardown followed by re-launch stays
|
||||
// authenticated without operator action.
|
||||
let mut launch_opts = cfg.browser.clone();
|
||||
if let Some(proxy) = &cfg.proxy {
|
||||
launch_opts.extra_args.push(format!("--proxy-server={proxy}"));
|
||||
let chromium_proxy = crate::crawler::url_utils::chromium_proxy_arg(proxy);
|
||||
launch_opts.extra_args.push(format!("--proxy-server={chromium_proxy}"));
|
||||
}
|
||||
let on_launch = match (&cfg.phpsessid, &cfg.cookie_domain, &cfg.start_url) {
|
||||
(Some(sid), Some(domain), Some(start_url)) => {
|
||||
let sid = sid.clone();
|
||||
let domain = domain.clone();
|
||||
let start_url = start_url.clone();
|
||||
let tor_for_launch = tor.as_ref().map(Arc::clone);
|
||||
let on_launch: browser_manager::OnLaunch = Arc::new(move |browser| {
|
||||
let sid = sid.clone();
|
||||
let domain = domain.clone();
|
||||
let start_url = start_url.clone();
|
||||
let tor_for_launch = tor_for_launch.as_ref().map(Arc::clone);
|
||||
Box::pin(async move {
|
||||
session::inject_phpsessid(&browser, &sid, &domain)
|
||||
.await
|
||||
.context("on_launch: inject_phpsessid")?;
|
||||
session::verify_session(&browser, &start_url)
|
||||
.await
|
||||
.context("on_launch: verify_session")?;
|
||||
session::verify_session_with_recircuit(
|
||||
&browser,
|
||||
&start_url,
|
||||
tor_for_launch.as_deref(),
|
||||
tor_recircuit_max,
|
||||
)
|
||||
.await
|
||||
.context("on_launch: verify_session")?;
|
||||
Ok(())
|
||||
})
|
||||
});
|
||||
@@ -165,13 +207,26 @@ async fn spawn_crawler_daemon(
|
||||
http: http.clone(),
|
||||
rate: Arc::clone(&rate),
|
||||
start_url: url.clone(),
|
||||
manga_limit: cfg.manga_limit,
|
||||
download_allowlist: cfg.download_allowlist.clone(),
|
||||
max_image_bytes: cfg.max_image_bytes,
|
||||
tor: tor.as_ref().map(Arc::clone),
|
||||
});
|
||||
m
|
||||
});
|
||||
|
||||
let dispatcher: Arc<dyn ChapterDispatcher> = Arc::new(RealChapterDispatcher {
|
||||
browser_manager: Arc::clone(&browser_manager),
|
||||
db: db.clone(),
|
||||
storage: Arc::clone(&storage),
|
||||
http: http.clone(),
|
||||
rate: Arc::clone(&rate),
|
||||
download_allowlist: cfg.download_allowlist.clone(),
|
||||
max_image_bytes: cfg.max_image_bytes,
|
||||
tor: tor.as_ref().map(Arc::clone),
|
||||
});
|
||||
|
||||
let resync: Arc<dyn ResyncService> = Arc::new(RealResyncService {
|
||||
browser_manager: Arc::clone(&browser_manager),
|
||||
db: db.clone(),
|
||||
storage: Arc::clone(&storage),
|
||||
@@ -179,6 +234,7 @@ async fn spawn_crawler_daemon(
|
||||
rate: Arc::clone(&rate),
|
||||
download_allowlist: cfg.download_allowlist.clone(),
|
||||
max_image_bytes: cfg.max_image_bytes,
|
||||
tor: tor.as_ref().map(Arc::clone),
|
||||
});
|
||||
|
||||
// Shared cancellation: daemon shutdown cancels the BrowserManager's
|
||||
@@ -215,7 +271,10 @@ async fn spawn_crawler_daemon(
|
||||
},
|
||||
);
|
||||
|
||||
Ok(daemon_handle)
|
||||
Ok(SpawnedDaemon {
|
||||
handle: daemon_handle,
|
||||
resync,
|
||||
})
|
||||
}
|
||||
|
||||
// Real impls of the daemon traits, owning the browser manager + I/O. Kept
|
||||
@@ -230,8 +289,10 @@ struct RealMetadataPass {
|
||||
http: reqwest::Client,
|
||||
rate: Arc<HostRateLimiters>,
|
||||
start_url: String,
|
||||
manga_limit: usize,
|
||||
download_allowlist: DownloadAllowlist,
|
||||
max_image_bytes: usize,
|
||||
tor: Option<Arc<crate::crawler::tor::TorController>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -244,10 +305,11 @@ impl MetadataPass for RealMetadataPass {
|
||||
&self.http,
|
||||
&self.rate,
|
||||
&self.start_url,
|
||||
0,
|
||||
self.manga_limit,
|
||||
false,
|
||||
&self.download_allowlist,
|
||||
self.max_image_bytes,
|
||||
self.tor.as_deref(),
|
||||
)
|
||||
.await;
|
||||
if let Err(e) = &result {
|
||||
@@ -255,6 +317,36 @@ impl MetadataPass for RealMetadataPass {
|
||||
self.browser_manager.invalidate().await;
|
||||
}
|
||||
}
|
||||
// Cover backfill follows the metadata pass even when the pass
|
||||
// errored — the early-stop walk can complete its work and bail
|
||||
// late, and a transient browser failure shouldn't cancel the
|
||||
// residual cover backlog. The backfill has its own per-call cap
|
||||
// so a runaway error stream can't monopolise the tick.
|
||||
match pipeline::backfill_missing_covers(
|
||||
&self.browser_manager,
|
||||
&self.db,
|
||||
self.storage.as_ref(),
|
||||
&self.http,
|
||||
&self.rate,
|
||||
pipeline::COVER_BACKFILL_DEFAULT_MAX,
|
||||
&self.download_allowlist,
|
||||
self.max_image_bytes,
|
||||
self.tor.as_deref(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(stats) => {
|
||||
if stats.considered > 0 {
|
||||
tracing::info!(?stats, "cover backfill complete");
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(error = ?e, "cover backfill failed");
|
||||
if crate::crawler::nav::anyhow_looks_browser_dead(&e) {
|
||||
self.browser_manager.invalidate().await;
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
@@ -267,6 +359,7 @@ struct RealChapterDispatcher {
|
||||
rate: Arc<HostRateLimiters>,
|
||||
download_allowlist: DownloadAllowlist,
|
||||
max_image_bytes: usize,
|
||||
tor: Option<Arc<crate::crawler::tor::TorController>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -298,6 +391,7 @@ impl ChapterDispatcher for RealChapterDispatcher {
|
||||
false,
|
||||
&self.download_allowlist,
|
||||
self.max_image_bytes,
|
||||
self.tor.as_deref(),
|
||||
)
|
||||
.await;
|
||||
drop(lease);
|
||||
@@ -325,11 +419,62 @@ pub fn router(state: AppState) -> Router {
|
||||
let max_request_bytes = state.upload.max_request_bytes;
|
||||
Router::new()
|
||||
.nest("/api/v1", crate::api::routes())
|
||||
.layer(middleware::from_fn_with_state(
|
||||
state.clone(),
|
||||
private_mode_guard,
|
||||
))
|
||||
.layer(DefaultBodyLimit::max(max_request_bytes))
|
||||
.with_state(state)
|
||||
.layer(TraceLayer::new_for_http())
|
||||
}
|
||||
|
||||
/// Paths reachable anonymously even when `PRIVATE_MODE=true`. Login and
|
||||
/// logout are needed for the auth flow itself; `/health` is reserved
|
||||
/// for load-balancer probes; `/auth/config` lets the frontend decide
|
||||
/// whether to render the login form or its anonymous alternatives;
|
||||
/// `/auth/register` is exempted from the gate so the handler can
|
||||
/// return its informative `registration_disabled` 403 (the same code
|
||||
/// public-mode deployments use when `ALLOW_SELF_REGISTER=false`) —
|
||||
/// the handler itself force-blocks the request body in private mode,
|
||||
/// so no account ever gets created here. Everything else demands a
|
||||
/// valid session cookie or bearer token.
|
||||
fn is_public_in_private_mode(path: &str) -> bool {
|
||||
matches!(
|
||||
path,
|
||||
"/api/v1/health"
|
||||
| "/api/v1/auth/config"
|
||||
| "/api/v1/auth/login"
|
||||
| "/api/v1/auth/logout"
|
||||
| "/api/v1/auth/register"
|
||||
)
|
||||
}
|
||||
|
||||
/// Site-wide auth gate for `PRIVATE_MODE=true`. With the flag off this
|
||||
/// is a no-op pass-through, so public deployments take no extra DB
|
||||
/// hit. With it on, the guard reuses [`CurrentUser`] — the same
|
||||
/// session-cookie-then-bearer-token logic the per-handler extractor
|
||||
/// uses — so the two paths can never drift.
|
||||
async fn private_mode_guard(
|
||||
State(state): State<AppState>,
|
||||
req: Request,
|
||||
next: Next,
|
||||
) -> Result<Response, AppError> {
|
||||
if !state.auth.private_mode {
|
||||
return Ok(next.run(req).await);
|
||||
}
|
||||
if is_public_in_private_mode(req.uri().path()) {
|
||||
return Ok(next.run(req).await);
|
||||
}
|
||||
let (mut parts, body) = req.into_parts();
|
||||
match CurrentUser::from_request_parts(&mut parts, &state).await {
|
||||
Ok(_) => {
|
||||
let req = Request::from_parts(parts, body);
|
||||
Ok(next.run(req).await)
|
||||
}
|
||||
Err(_) => Err(AppError::Unauthenticated),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn cors_layer(allowed_origins: &[String]) -> CorsLayer {
|
||||
if allowed_origins.is_empty() {
|
||||
// Same-origin only — no CORS headers emitted.
|
||||
|
||||
@@ -78,6 +78,21 @@ async fn main() -> anyhow::Result<()> {
|
||||
let proxy_url = std::env::var("CRAWLER_PROXY")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty());
|
||||
let tor_control_url = std::env::var("CRAWLER_TOR_CONTROL_URL")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty());
|
||||
let tor_control_password = std::env::var("CRAWLER_TOR_CONTROL_PASSWORD")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty());
|
||||
let tor_control_cookie_path = std::env::var("CRAWLER_TOR_CONTROL_COOKIE_PATH")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty())
|
||||
.map(std::path::PathBuf::from);
|
||||
let tor_recircuit_max_attempts: u32 = std::env::var("CRAWLER_TOR_RECIRCUIT_MAX_ATTEMPTS")
|
||||
.ok()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(3)
|
||||
.max(1);
|
||||
let keep_browser_open = env_bool("CRAWLER_KEEP_BROWSER_OPEN", false);
|
||||
|
||||
let db = PgPoolOptions::new()
|
||||
@@ -112,7 +127,8 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
let mut options = LaunchOptions::from_env();
|
||||
if let Some(proxy) = &proxy_url {
|
||||
options.extra_args.push(format!("--proxy-server={proxy}"));
|
||||
let chromium_proxy = mangalord::crawler::url_utils::chromium_proxy_arg(proxy);
|
||||
options.extra_args.push(format!("--proxy-server={chromium_proxy}"));
|
||||
}
|
||||
let keep_open = match (keep_browser_open, options.mode) {
|
||||
(true, BrowserMode::Headed) => true,
|
||||
@@ -144,6 +160,17 @@ async fn main() -> anyhow::Result<()> {
|
||||
"starting crawler"
|
||||
);
|
||||
|
||||
let tor = mangalord::crawler::tor::TorController::from_parts(
|
||||
tor_control_url.as_deref(),
|
||||
tor_control_password.as_deref(),
|
||||
tor_control_cookie_path.as_deref(),
|
||||
)
|
||||
.context("build TorController from CRAWLER_TOR_CONTROL_* env")?
|
||||
.map(Arc::new);
|
||||
if let Some(t) = &tor {
|
||||
tracing::info!(?t, "TOR control configured");
|
||||
}
|
||||
|
||||
// BrowserManager with idle_timeout = ZERO so the CLI keeps Chromium
|
||||
// alive for the entire run — same lifecycle as the old direct
|
||||
// `browser::launch()` flow. on_launch re-injects PHPSESSID + runs the
|
||||
@@ -153,17 +180,24 @@ async fn main() -> anyhow::Result<()> {
|
||||
let sid = sid.clone();
|
||||
let domain = domain.clone();
|
||||
let start_url_clone = start_url.clone();
|
||||
let tor_for_launch = tor.as_ref().map(Arc::clone);
|
||||
Arc::new(move |browser| {
|
||||
let sid = sid.clone();
|
||||
let domain = domain.clone();
|
||||
let start_url = start_url_clone.clone();
|
||||
let tor_for_launch = tor_for_launch.as_ref().map(Arc::clone);
|
||||
Box::pin(async move {
|
||||
session::inject_phpsessid(&browser, &sid, &domain)
|
||||
.await
|
||||
.context("inject_phpsessid")?;
|
||||
session::verify_session(&browser, &start_url)
|
||||
.await
|
||||
.context("verify_session")?;
|
||||
session::verify_session_with_recircuit(
|
||||
&browser,
|
||||
&start_url,
|
||||
tor_for_launch.as_deref(),
|
||||
tor_recircuit_max_attempts,
|
||||
)
|
||||
.await
|
||||
.context("verify_session")?;
|
||||
Ok(())
|
||||
})
|
||||
})
|
||||
@@ -187,6 +221,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
skip_chapter_content || !session_ready,
|
||||
chapter_workers,
|
||||
force_refetch_chapters,
|
||||
tor.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -216,6 +251,7 @@ async fn run(
|
||||
skip_chapter_content: bool,
|
||||
chapter_workers: usize,
|
||||
force_refetch_chapters: bool,
|
||||
tor: Option<Arc<mangalord::crawler::tor::TorController>>,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut rate = HostRateLimiters::new(Duration::from_millis(rate_ms));
|
||||
if let Some(host) = cdn_host {
|
||||
@@ -267,6 +303,7 @@ async fn run(
|
||||
skip_chapters,
|
||||
allowlist.as_ref(),
|
||||
max_image_bytes,
|
||||
tor.as_deref(),
|
||||
)
|
||||
.await?;
|
||||
tracing::info!(?stats, "metadata pass complete");
|
||||
@@ -283,6 +320,7 @@ async fn run(
|
||||
force_refetch_chapters,
|
||||
Arc::clone(&allowlist),
|
||||
max_image_bytes,
|
||||
tor.clone(),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
@@ -308,6 +346,7 @@ async fn sync_bookmarked_chapter_content(
|
||||
force_refetch: bool,
|
||||
allowlist: Arc<mangalord::crawler::safety::DownloadAllowlist>,
|
||||
max_image_bytes: usize,
|
||||
tor: Option<Arc<mangalord::crawler::tor::TorController>>,
|
||||
) -> anyhow::Result<()> {
|
||||
let pending: Vec<(Uuid, Uuid, String)> = sqlx::query_as(
|
||||
r#"
|
||||
@@ -345,6 +384,7 @@ async fn sync_bookmarked_chapter_content(
|
||||
let rate = Arc::clone(&rate);
|
||||
let manager = Arc::clone(&manager);
|
||||
let allowlist = Arc::clone(&allowlist);
|
||||
let tor = tor.clone();
|
||||
let stats = &stats;
|
||||
async move {
|
||||
if session_expired.load(std::sync::atomic::Ordering::Relaxed) {
|
||||
@@ -371,6 +411,7 @@ async fn sync_bookmarked_chapter_content(
|
||||
force_refetch,
|
||||
allowlist.as_ref(),
|
||||
max_image_bytes,
|
||||
tor.as_deref(),
|
||||
)
|
||||
.await;
|
||||
drop(lease);
|
||||
|
||||
@@ -19,6 +19,14 @@ pub struct AuthConfig {
|
||||
/// `POST /admin/users`. Defaults to `true` (open registration)
|
||||
/// for backward compatibility.
|
||||
pub allow_self_register: bool,
|
||||
/// When `true`, every API path except a small allowlist
|
||||
/// (`/health`, `/auth/config`, `/auth/login`, `/auth/logout`)
|
||||
/// requires a valid session cookie or bearer token — anonymous
|
||||
/// reads are rejected with 401. Self-registration is also
|
||||
/// force-disabled regardless of [`Self::allow_self_register`]
|
||||
/// so a private instance is locked down with a single switch.
|
||||
/// Defaults to `false` (current public behaviour).
|
||||
pub private_mode: bool,
|
||||
}
|
||||
|
||||
impl Default for AuthConfig {
|
||||
@@ -33,6 +41,7 @@ impl Default for AuthConfig {
|
||||
// defaults.
|
||||
rate_limit: crate::auth::rate_limit::RateLimitConfig::default(),
|
||||
allow_self_register: true,
|
||||
private_mode: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -97,6 +106,20 @@ pub struct CrawlerConfig {
|
||||
pub cookie_domain: Option<String>,
|
||||
pub user_agent: Option<String>,
|
||||
pub proxy: Option<String>,
|
||||
/// `tcp://host:port`, `host:port`, or bare `host` (default port
|
||||
/// 9051). When `None`, TOR-recircuit-on-transient is disabled and
|
||||
/// the crawler behaves identically to pre-TOR releases.
|
||||
pub tor_control_url: Option<String>,
|
||||
/// HashedControlPassword auth. Used only when
|
||||
/// `tor_control_cookie_path` is `None`.
|
||||
pub tor_control_password: Option<String>,
|
||||
/// Cookie-file auth path (e.g.
|
||||
/// `/var/lib/tor/control_auth_cookie`). Takes precedence over
|
||||
/// password when both are set.
|
||||
pub tor_control_cookie_path: Option<PathBuf>,
|
||||
/// Maximum NEWNYM-and-retry cycles per recircuit-eligible failure.
|
||||
/// Defaults to 3.
|
||||
pub tor_recircuit_max_attempts: u32,
|
||||
pub browser: LaunchOptions,
|
||||
/// Hosts the crawler is allowed to download images / covers from.
|
||||
/// Always seeded with the host of `start_url` and (when set) the
|
||||
@@ -105,6 +128,10 @@ pub struct CrawlerConfig {
|
||||
pub download_allowlist: DownloadAllowlist,
|
||||
/// Hard upper bound on a single image download. Defaults to 32 MiB.
|
||||
pub max_image_bytes: usize,
|
||||
/// Max manga detail fetches per metadata pass. `0` means no cap
|
||||
/// (full sweep up to the source's own bound). Sourced from
|
||||
/// `CRAWLER_LIMIT`, mirroring the CLI binary.
|
||||
pub manga_limit: usize,
|
||||
}
|
||||
|
||||
impl Default for CrawlerConfig {
|
||||
@@ -124,9 +151,14 @@ impl Default for CrawlerConfig {
|
||||
cookie_domain: None,
|
||||
user_agent: None,
|
||||
proxy: None,
|
||||
tor_control_url: None,
|
||||
tor_control_password: None,
|
||||
tor_control_cookie_path: None,
|
||||
tor_recircuit_max_attempts: 3,
|
||||
browser: LaunchOptions::headless(),
|
||||
download_allowlist: DownloadAllowlist::new(),
|
||||
max_image_bytes: DEFAULT_MAX_IMAGE_BYTES,
|
||||
manga_limit: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -158,6 +190,7 @@ impl Config {
|
||||
) as u32,
|
||||
},
|
||||
allow_self_register: env_bool("ALLOW_SELF_REGISTER", true),
|
||||
private_mode: env_bool("PRIVATE_MODE", false),
|
||||
},
|
||||
upload: UploadConfig {
|
||||
max_request_bytes: env_usize("MAX_REQUEST_BYTES", 200 * 1024 * 1024),
|
||||
@@ -234,9 +267,22 @@ impl CrawlerConfig {
|
||||
proxy: std::env::var("CRAWLER_PROXY")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty()),
|
||||
tor_control_url: std::env::var("CRAWLER_TOR_CONTROL_URL")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty()),
|
||||
tor_control_password: std::env::var("CRAWLER_TOR_CONTROL_PASSWORD")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty()),
|
||||
tor_control_cookie_path: std::env::var("CRAWLER_TOR_CONTROL_COOKIE_PATH")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty())
|
||||
.map(PathBuf::from),
|
||||
tor_recircuit_max_attempts: env_u64("CRAWLER_TOR_RECIRCUIT_MAX_ATTEMPTS", 3)
|
||||
.max(1) as u32,
|
||||
browser: LaunchOptions::from_env(),
|
||||
download_allowlist,
|
||||
max_image_bytes: env_usize("CRAWLER_MAX_IMAGE_BYTES", DEFAULT_MAX_IMAGE_BYTES),
|
||||
manga_limit: env_usize("CRAWLER_LIMIT", 0),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -310,3 +356,64 @@ fn env_usize(name: &str, default: usize) -> usize {
|
||||
.unwrap_or(default)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::sync::Mutex;
|
||||
|
||||
// Serialise env-touching tests so concurrent cargo-test threads don't
|
||||
// race on the process-global env. Re-acquire on poison since a
|
||||
// panicking test still leaves the env in a consistent state for us
|
||||
// (we set/unset within each guard region).
|
||||
static ENV_GUARD: Mutex<()> = Mutex::new(());
|
||||
|
||||
#[test]
|
||||
fn crawler_limit_env_populates_manga_limit() {
|
||||
let _g = ENV_GUARD.lock().unwrap_or_else(|p| p.into_inner());
|
||||
std::env::set_var("CRAWLER_LIMIT", "96");
|
||||
let cfg = CrawlerConfig::from_env().expect("from_env");
|
||||
std::env::remove_var("CRAWLER_LIMIT");
|
||||
assert_eq!(cfg.manga_limit, 96);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn crawler_limit_unset_defaults_to_zero() {
|
||||
let _g = ENV_GUARD.lock().unwrap_or_else(|p| p.into_inner());
|
||||
std::env::remove_var("CRAWLER_LIMIT");
|
||||
let cfg = CrawlerConfig::from_env().expect("from_env");
|
||||
assert_eq!(cfg.manga_limit, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn private_mode_env_parses_true() {
|
||||
let _g = ENV_GUARD.lock().unwrap_or_else(|p| p.into_inner());
|
||||
std::env::set_var("PRIVATE_MODE", "true");
|
||||
std::env::set_var("DATABASE_URL", "postgres://test");
|
||||
let cfg = Config::from_env().expect("from_env");
|
||||
std::env::remove_var("PRIVATE_MODE");
|
||||
std::env::remove_var("DATABASE_URL");
|
||||
assert!(cfg.auth.private_mode);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn private_mode_env_parses_false() {
|
||||
let _g = ENV_GUARD.lock().unwrap_or_else(|p| p.into_inner());
|
||||
std::env::set_var("PRIVATE_MODE", "false");
|
||||
std::env::set_var("DATABASE_URL", "postgres://test");
|
||||
let cfg = Config::from_env().expect("from_env");
|
||||
std::env::remove_var("PRIVATE_MODE");
|
||||
std::env::remove_var("DATABASE_URL");
|
||||
assert!(!cfg.auth.private_mode);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn private_mode_defaults_to_false() {
|
||||
let _g = ENV_GUARD.lock().unwrap_or_else(|p| p.into_inner());
|
||||
std::env::remove_var("PRIVATE_MODE");
|
||||
std::env::set_var("DATABASE_URL", "postgres://test");
|
||||
let cfg = Config::from_env().expect("from_env");
|
||||
std::env::remove_var("DATABASE_URL");
|
||||
assert!(!cfg.auth.private_mode);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -73,39 +73,36 @@ pub enum SyncOutcome {
|
||||
SessionExpired,
|
||||
}
|
||||
|
||||
/// Fetch all images for one chapter and persist them atomically. On
|
||||
/// any error after the first storage put, the DB transaction rolls
|
||||
/// back so the chapter stays at `page_count = 0` and is retried on the
|
||||
/// next run. Bytes already written to storage become orphans; a future
|
||||
/// reaper sweeps them.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn sync_chapter_content(
|
||||
browser: &chromiumoxide::Browser,
|
||||
db: &PgPool,
|
||||
storage: &dyn Storage,
|
||||
http: &reqwest::Client,
|
||||
rate: &HostRateLimiters,
|
||||
chapter_id: Uuid,
|
||||
manga_id: Uuid,
|
||||
source_url: &str,
|
||||
force_refetch: bool,
|
||||
allowlist: &DownloadAllowlist,
|
||||
max_image_bytes: usize,
|
||||
) -> anyhow::Result<SyncOutcome> {
|
||||
// Skip if already fetched, unless caller explicitly forces.
|
||||
if !force_refetch {
|
||||
let (page_count,): (i32,) =
|
||||
sqlx::query_as("SELECT page_count FROM chapters WHERE id = $1")
|
||||
.bind(chapter_id)
|
||||
.fetch_one(db)
|
||||
.await
|
||||
.context("read chapter page_count")?;
|
||||
if page_count > 0 {
|
||||
return Ok(SyncOutcome::Skipped);
|
||||
}
|
||||
}
|
||||
/// Per-chapter max fetch attempts when TOR is configured. `N = 3` means
|
||||
/// up to 3 total page fetches with 2 NEWNYM signals between them. When
|
||||
/// TOR is not configured the effective budget collapses to 1 (single
|
||||
/// attempt, no retry, no recircuit — bit-for-bit pre-TOR behavior).
|
||||
const CHAPTER_RECIRCUIT_MAX_ATTEMPTS: u32 = 3;
|
||||
|
||||
// Nav to chapter page (rate-limited per host).
|
||||
/// Outcome of [`fetch_chapter_html_with_recircuit`]. `Ok` carries the
|
||||
/// final reader HTML; the other two map to `sync_chapter_content`'s
|
||||
/// existing failure modes.
|
||||
#[derive(Debug)]
|
||||
enum ChapterFetchOutcome {
|
||||
Ok(String),
|
||||
/// `ChapterProbe::Unauthenticated` after exhausting recircuit
|
||||
/// budget (or with budget=0). Caller returns
|
||||
/// `SyncOutcome::SessionExpired`.
|
||||
SessionExpired,
|
||||
/// `ChapterProbe::Transient` after exhausting recircuit budget
|
||||
/// (or with budget=0). Caller bails so the dispatcher does
|
||||
/// exponential backoff.
|
||||
PersistentTransient,
|
||||
}
|
||||
|
||||
/// Single rate-limited Chromium navigation to the chapter URL,
|
||||
/// returning the page HTML. Extracted from `sync_chapter_content` so
|
||||
/// the recircuit loop can call it once per attempt.
|
||||
async fn fetch_chapter_html_once(
|
||||
browser: &chromiumoxide::Browser,
|
||||
rate: &HostRateLimiters,
|
||||
source_url: &str,
|
||||
) -> anyhow::Result<String> {
|
||||
rate.wait_for(source_url).await?;
|
||||
let page = browser
|
||||
.new_page(source_url)
|
||||
@@ -124,28 +121,135 @@ pub async fn sync_chapter_content(
|
||||
crate::crawler::nav::SELECTOR_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
|
||||
let html = page.content().await.context("read chapter html")?;
|
||||
page.close().await.ok();
|
||||
Ok(html)
|
||||
}
|
||||
|
||||
// Three-way session classification: distinguishes a transient
|
||||
// hiccup (broken-page body or logged-in-but-no-reader) from a
|
||||
// genuine PHPSESSID expiry (no reader and no avatar widget). The
|
||||
// earlier binary `#avatar_menu` check conflated both and froze
|
||||
// every worker on a layout shift.
|
||||
match session::classify_chapter_probe(&html) {
|
||||
ChapterProbe::Unauthenticated => return Ok(SyncOutcome::SessionExpired),
|
||||
ChapterProbe::Transient => {
|
||||
/// Pure-over-IO loop: fetch + classify, up to `max_attempts` total
|
||||
/// fetches. Between attempts, `recircuit` is invoked (a no-op when
|
||||
/// TOR isn't configured). `max_attempts = 1` collapses to the
|
||||
/// original single-shot behavior — `Unauthenticated` →
|
||||
/// `SessionExpired`, `Transient` → `PersistentTransient` on the first
|
||||
/// hit, no recircuit.
|
||||
///
|
||||
/// Semantics match [`crate::crawler::detect::retry_on_transient`] and
|
||||
/// [`run_session_probe_loop`]: `N` is **total attempts including the
|
||||
/// first**, so `N = 3` means 3 fetches and up to 2 NEWNYM calls.
|
||||
/// `Unauthenticated` and `Transient` share the budget — the loop
|
||||
/// doesn't distinguish, so a sequence like Transient → Unauth → Ok
|
||||
/// counts as 3 attempts.
|
||||
async fn fetch_chapter_html_with_recircuit<F, Fut, R, RFut>(
|
||||
mut fetch: F,
|
||||
mut recircuit: R,
|
||||
max_attempts: u32,
|
||||
source_url_for_msg: &str,
|
||||
) -> anyhow::Result<ChapterFetchOutcome>
|
||||
where
|
||||
F: FnMut() -> Fut,
|
||||
Fut: std::future::Future<Output = anyhow::Result<String>>,
|
||||
R: FnMut() -> RFut,
|
||||
RFut: std::future::Future<Output = ()>,
|
||||
{
|
||||
debug_assert!(max_attempts >= 1, "max_attempts must be at least 1");
|
||||
let mut attempt = 0u32;
|
||||
loop {
|
||||
attempt += 1;
|
||||
let html = fetch().await?;
|
||||
match session::classify_chapter_probe(&html) {
|
||||
ChapterProbe::Ok => return Ok(ChapterFetchOutcome::Ok(html)),
|
||||
ChapterProbe::Unauthenticated => {
|
||||
if attempt >= max_attempts {
|
||||
return Ok(ChapterFetchOutcome::SessionExpired);
|
||||
}
|
||||
tracing::warn!(
|
||||
attempt,
|
||||
max = max_attempts,
|
||||
url = source_url_for_msg,
|
||||
"chapter probe Unauthenticated; signaling TOR NEWNYM and retrying"
|
||||
);
|
||||
recircuit().await;
|
||||
}
|
||||
ChapterProbe::Transient => {
|
||||
if attempt >= max_attempts {
|
||||
return Ok(ChapterFetchOutcome::PersistentTransient);
|
||||
}
|
||||
tracing::warn!(
|
||||
attempt,
|
||||
max = max_attempts,
|
||||
url = source_url_for_msg,
|
||||
"chapter probe Transient; signaling TOR NEWNYM and retrying"
|
||||
);
|
||||
recircuit().await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch all images for one chapter and persist them atomically. On
|
||||
/// any error after the first storage put, the DB transaction rolls
|
||||
/// back so the chapter stays at `page_count = 0` and is retried on the
|
||||
/// next run. Bytes already written to storage become orphans; a future
|
||||
/// reaper sweeps them.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn sync_chapter_content(
|
||||
browser: &chromiumoxide::Browser,
|
||||
db: &PgPool,
|
||||
storage: &dyn Storage,
|
||||
http: &reqwest::Client,
|
||||
rate: &HostRateLimiters,
|
||||
chapter_id: Uuid,
|
||||
manga_id: Uuid,
|
||||
source_url: &str,
|
||||
force_refetch: bool,
|
||||
allowlist: &DownloadAllowlist,
|
||||
max_image_bytes: usize,
|
||||
tor: Option<&crate::crawler::tor::TorController>,
|
||||
) -> anyhow::Result<SyncOutcome> {
|
||||
// Skip if already fetched, unless caller explicitly forces.
|
||||
if !force_refetch {
|
||||
let (page_count,): (i32,) =
|
||||
sqlx::query_as("SELECT page_count FROM chapters WHERE id = $1")
|
||||
.bind(chapter_id)
|
||||
.fetch_one(db)
|
||||
.await
|
||||
.context("read chapter page_count")?;
|
||||
if page_count > 0 {
|
||||
return Ok(SyncOutcome::Skipped);
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch + classify. With TOR configured, allow up to
|
||||
// CHAPTER_RECIRCUIT_MAX_ATTEMPTS total page fetches with NEWNYM
|
||||
// between each. Without TOR, collapse to 1 attempt (no retry, no
|
||||
// recircuit) — matches the pre-TOR single-shot behavior bit-for-bit.
|
||||
let max_attempts = if tor.is_some() { CHAPTER_RECIRCUIT_MAX_ATTEMPTS } else { 1 };
|
||||
let html = match fetch_chapter_html_with_recircuit(
|
||||
|| fetch_chapter_html_once(browser, rate, source_url),
|
||||
|| async {
|
||||
if let Some(t) = tor {
|
||||
if let Err(e) = t.new_identity().await {
|
||||
tracing::warn!(error = %e, "TOR NEWNYM failed; continuing with same circuit");
|
||||
}
|
||||
}
|
||||
},
|
||||
max_attempts,
|
||||
source_url,
|
||||
)
|
||||
.await?
|
||||
{
|
||||
ChapterFetchOutcome::Ok(html) => html,
|
||||
ChapterFetchOutcome::SessionExpired => return Ok(SyncOutcome::SessionExpired),
|
||||
ChapterFetchOutcome::PersistentTransient => {
|
||||
// Surface as a typed Err so the dispatcher path runs
|
||||
// ack_failed with exponential backoff (rather than the
|
||||
// session-expired sticky flag).
|
||||
anyhow::bail!(
|
||||
"chapter page at {source_url} returned a transient response \
|
||||
(broken-page body or reader didn't render); will retry"
|
||||
"chapter page at {source_url} returned a transient response after \
|
||||
{max_attempts} attempt(s); will retry"
|
||||
);
|
||||
}
|
||||
ChapterProbe::Ok => {}
|
||||
}
|
||||
};
|
||||
|
||||
let images = parse_chapter_pages(&html)
|
||||
.with_context(|| format!("parse chapter pages at {source_url}"))?;
|
||||
@@ -304,4 +408,214 @@ mod tests {
|
||||
let err = parse_chapter_pages(html).expect_err("expected Transient");
|
||||
assert!(err.is_transient(), "got non-transient: {err}");
|
||||
}
|
||||
|
||||
// --- fetch_chapter_html_with_recircuit -------------------------------
|
||||
|
||||
const OK_HTML: &str = r#"<html><body><a id="pic_container"><img id="page1" src="x"/></a></body></html>"#;
|
||||
const UNAUTH_HTML: &str = r#"<html><body><header><div id="logo">x</div></header><main>please log in</main></body></html>"#;
|
||||
const TRANSIENT_HTML: &str = "<html><body><p>we're sorry, the request file are not found.</p></body></html>";
|
||||
|
||||
#[tokio::test]
|
||||
async fn recircuit_loop_ok_first_attempt() {
|
||||
let mut recircuits = 0u32;
|
||||
let mut fetches = 0u32;
|
||||
let outcome = fetch_chapter_html_with_recircuit(
|
||||
|| {
|
||||
fetches += 1;
|
||||
async { Ok(OK_HTML.to_string()) }
|
||||
},
|
||||
|| {
|
||||
recircuits += 1;
|
||||
async {}
|
||||
},
|
||||
3,
|
||||
"https://example/c",
|
||||
)
|
||||
.await
|
||||
.expect("ok");
|
||||
assert!(matches!(outcome, ChapterFetchOutcome::Ok(_)));
|
||||
assert_eq!(fetches, 1);
|
||||
assert_eq!(recircuits, 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn recircuit_loop_unauth_with_single_attempt_returns_session_expired() {
|
||||
// max_attempts=1 = TOR disabled, fail-fast on first Unauthenticated.
|
||||
let mut recircuits = 0u32;
|
||||
let mut fetches = 0u32;
|
||||
let outcome = fetch_chapter_html_with_recircuit(
|
||||
|| {
|
||||
fetches += 1;
|
||||
async { Ok(UNAUTH_HTML.to_string()) }
|
||||
},
|
||||
|| {
|
||||
recircuits += 1;
|
||||
async {}
|
||||
},
|
||||
1,
|
||||
"https://example/c",
|
||||
)
|
||||
.await
|
||||
.expect("ok-result");
|
||||
assert!(matches!(outcome, ChapterFetchOutcome::SessionExpired));
|
||||
assert_eq!(fetches, 1);
|
||||
assert_eq!(recircuits, 0, "no recircuit when budget is 1 (TOR disabled)");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn recircuit_loop_unauth_then_ok_within_budget() {
|
||||
// max_attempts=3 = up to 3 fetches with 2 recircuits between.
|
||||
let mut recircuits = 0u32;
|
||||
let mut fetch_n = 0u32;
|
||||
let outcome = fetch_chapter_html_with_recircuit(
|
||||
|| {
|
||||
fetch_n += 1;
|
||||
let n = fetch_n;
|
||||
async move {
|
||||
if n == 1 {
|
||||
Ok(UNAUTH_HTML.to_string())
|
||||
} else {
|
||||
Ok(OK_HTML.to_string())
|
||||
}
|
||||
}
|
||||
},
|
||||
|| {
|
||||
recircuits += 1;
|
||||
async {}
|
||||
},
|
||||
3,
|
||||
"https://example/c",
|
||||
)
|
||||
.await
|
||||
.expect("ok");
|
||||
assert!(matches!(outcome, ChapterFetchOutcome::Ok(_)));
|
||||
assert_eq!(fetch_n, 2);
|
||||
assert_eq!(recircuits, 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn recircuit_loop_unauth_exhausts_budget_returns_session_expired() {
|
||||
let mut recircuits = 0u32;
|
||||
let mut fetch_n = 0u32;
|
||||
let outcome = fetch_chapter_html_with_recircuit(
|
||||
|| {
|
||||
fetch_n += 1;
|
||||
async { Ok(UNAUTH_HTML.to_string()) }
|
||||
},
|
||||
|| {
|
||||
recircuits += 1;
|
||||
async {}
|
||||
},
|
||||
3,
|
||||
"https://example/c",
|
||||
)
|
||||
.await
|
||||
.expect("ok-result");
|
||||
assert!(matches!(outcome, ChapterFetchOutcome::SessionExpired));
|
||||
assert_eq!(fetch_n, 3, "max_attempts=3 → 3 fetches total");
|
||||
assert_eq!(recircuits, 2, "2 recircuits between 3 fetches");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn recircuit_loop_transient_then_ok_within_budget() {
|
||||
let mut recircuits = 0u32;
|
||||
let mut fetch_n = 0u32;
|
||||
let outcome = fetch_chapter_html_with_recircuit(
|
||||
|| {
|
||||
fetch_n += 1;
|
||||
let n = fetch_n;
|
||||
async move {
|
||||
if n < 3 {
|
||||
Ok(TRANSIENT_HTML.to_string())
|
||||
} else {
|
||||
Ok(OK_HTML.to_string())
|
||||
}
|
||||
}
|
||||
},
|
||||
|| {
|
||||
recircuits += 1;
|
||||
async {}
|
||||
},
|
||||
3,
|
||||
"https://example/c",
|
||||
)
|
||||
.await
|
||||
.expect("ok");
|
||||
assert!(matches!(outcome, ChapterFetchOutcome::Ok(_)));
|
||||
assert_eq!(fetch_n, 3);
|
||||
assert_eq!(recircuits, 2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn recircuit_loop_transient_exhausts_budget_returns_persistent() {
|
||||
let mut recircuits = 0u32;
|
||||
let mut fetch_n = 0u32;
|
||||
let outcome = fetch_chapter_html_with_recircuit(
|
||||
|| {
|
||||
fetch_n += 1;
|
||||
async { Ok(TRANSIENT_HTML.to_string()) }
|
||||
},
|
||||
|| {
|
||||
recircuits += 1;
|
||||
async {}
|
||||
},
|
||||
3,
|
||||
"https://example/c",
|
||||
)
|
||||
.await
|
||||
.expect("ok-result");
|
||||
assert!(matches!(outcome, ChapterFetchOutcome::PersistentTransient));
|
||||
assert_eq!(fetch_n, 3, "max_attempts=3 → 3 fetches total");
|
||||
assert_eq!(recircuits, 2, "2 recircuits between 3 fetches");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn recircuit_loop_mixed_transient_then_unauth_then_ok_shares_budget() {
|
||||
// Audit-prompted regression: outcomes share the attempt counter.
|
||||
// Sequence: Transient (attempt 1) → Unauth (attempt 2) → Ok (3).
|
||||
let mut recircuits = 0u32;
|
||||
let mut fetch_n = 0u32;
|
||||
let outcome = fetch_chapter_html_with_recircuit(
|
||||
|| {
|
||||
fetch_n += 1;
|
||||
let n = fetch_n;
|
||||
async move {
|
||||
match n {
|
||||
1 => Ok(TRANSIENT_HTML.to_string()),
|
||||
2 => Ok(UNAUTH_HTML.to_string()),
|
||||
_ => Ok(OK_HTML.to_string()),
|
||||
}
|
||||
}
|
||||
},
|
||||
|| {
|
||||
recircuits += 1;
|
||||
async {}
|
||||
},
|
||||
3,
|
||||
"https://example/c",
|
||||
)
|
||||
.await
|
||||
.expect("ok");
|
||||
assert!(matches!(outcome, ChapterFetchOutcome::Ok(_)));
|
||||
assert_eq!(fetch_n, 3);
|
||||
assert_eq!(recircuits, 2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn recircuit_loop_propagates_fetch_errors() {
|
||||
let mut fetch_n = 0u32;
|
||||
let err = fetch_chapter_html_with_recircuit(
|
||||
|| {
|
||||
fetch_n += 1;
|
||||
async { Err(anyhow::anyhow!("nav timeout")) }
|
||||
},
|
||||
|| async {},
|
||||
3,
|
||||
"https://example/c",
|
||||
)
|
||||
.await
|
||||
.expect_err("fetch error bubbles");
|
||||
assert_eq!(fetch_n, 1);
|
||||
assert!(format!("{err:#}").contains("nav timeout"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,13 +80,36 @@ pub fn has_logo_sentinel(doc: &scraper::Html) -> bool {
|
||||
/// caller can fall back on the job system's retry/backoff once the
|
||||
/// inline budget is exhausted.
|
||||
pub async fn retry_on_transient<F, Fut, T>(
|
||||
mut op: F,
|
||||
op: F,
|
||||
max_attempts: u32,
|
||||
delay: Duration,
|
||||
) -> Result<T, PageError>
|
||||
where
|
||||
F: FnMut() -> Fut,
|
||||
Fut: Future<Output = Result<T, PageError>>,
|
||||
{
|
||||
retry_on_transient_with_hook(op, max_attempts, delay, || async {}).await
|
||||
}
|
||||
|
||||
/// Like [`retry_on_transient`] but invokes `on_retry` between a
|
||||
/// transient failure and the subsequent sleep+retry. The hook does
|
||||
/// **not** fire on the first attempt, after a non-transient error, or
|
||||
/// after the final attempt (no retry follows). Hook failures are not
|
||||
/// propagated — return `()` from the future and log inside if needed.
|
||||
///
|
||||
/// Wire the TOR controller's `new_identity` here to rotate circuits
|
||||
/// between page-fetch retries; see [`crate::crawler::tor`].
|
||||
pub async fn retry_on_transient_with_hook<F, Fut, T, H, HFut>(
|
||||
mut op: F,
|
||||
max_attempts: u32,
|
||||
delay: Duration,
|
||||
mut on_retry: H,
|
||||
) -> Result<T, PageError>
|
||||
where
|
||||
F: FnMut() -> Fut,
|
||||
Fut: Future<Output = Result<T, PageError>>,
|
||||
H: FnMut() -> HFut,
|
||||
HFut: Future<Output = ()>,
|
||||
{
|
||||
debug_assert!(max_attempts >= 1, "max_attempts must be at least 1");
|
||||
let mut attempt = 0u32;
|
||||
@@ -101,8 +124,9 @@ where
|
||||
attempt,
|
||||
max_attempts,
|
||||
error = %e,
|
||||
"transient error; sleeping before retry"
|
||||
"transient error; running on-retry hook and sleeping before retry"
|
||||
);
|
||||
on_retry().await;
|
||||
tokio::time::sleep(delay).await;
|
||||
}
|
||||
}
|
||||
@@ -247,4 +271,92 @@ mod tests {
|
||||
assert_eq!(result.unwrap(), 7);
|
||||
assert_eq!(attempt, 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn hook_fires_once_between_transient_and_success() {
|
||||
let mut attempt = 0u32;
|
||||
let mut hook_calls = 0u32;
|
||||
let result: Result<i32, PageError> = retry_on_transient_with_hook(
|
||||
|| {
|
||||
attempt += 1;
|
||||
let n = attempt;
|
||||
async move {
|
||||
if n < 2 {
|
||||
Err(PageError::transient("once"))
|
||||
} else {
|
||||
Ok(99)
|
||||
}
|
||||
}
|
||||
},
|
||||
5,
|
||||
Duration::from_millis(0),
|
||||
|| {
|
||||
hook_calls += 1;
|
||||
async {}
|
||||
},
|
||||
)
|
||||
.await;
|
||||
assert_eq!(result.unwrap(), 99);
|
||||
assert_eq!(attempt, 2);
|
||||
assert_eq!(hook_calls, 1, "hook fires exactly once between attempts");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn hook_does_not_fire_when_first_attempt_succeeds() {
|
||||
let mut hook_calls = 0u32;
|
||||
let result: Result<i32, PageError> = retry_on_transient_with_hook(
|
||||
|| async { Ok(1) },
|
||||
5,
|
||||
Duration::from_millis(0),
|
||||
|| {
|
||||
hook_calls += 1;
|
||||
async {}
|
||||
},
|
||||
)
|
||||
.await;
|
||||
assert!(result.is_ok());
|
||||
assert_eq!(hook_calls, 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn hook_does_not_fire_after_non_transient_error() {
|
||||
let mut hook_calls = 0u32;
|
||||
let result: Result<i32, PageError> = retry_on_transient_with_hook(
|
||||
|| async { Err(PageError::Other(anyhow::anyhow!("permanent"))) },
|
||||
5,
|
||||
Duration::from_millis(0),
|
||||
|| {
|
||||
hook_calls += 1;
|
||||
async {}
|
||||
},
|
||||
)
|
||||
.await;
|
||||
assert!(result.is_err());
|
||||
assert_eq!(hook_calls, 0, "non-transient must short-circuit before hook");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn hook_does_not_fire_after_final_failed_attempt() {
|
||||
// With max_attempts=3 and three persistent transients, the hook
|
||||
// should run twice (between 1→2 and 2→3) — never a third time,
|
||||
// because no retry follows attempt 3.
|
||||
let mut attempt = 0u32;
|
||||
let mut hook_calls = 0u32;
|
||||
let result: Result<i32, PageError> = retry_on_transient_with_hook(
|
||||
|| {
|
||||
attempt += 1;
|
||||
async { Err(PageError::transient("always")) }
|
||||
},
|
||||
3,
|
||||
Duration::from_millis(0),
|
||||
|| {
|
||||
hook_calls += 1;
|
||||
async {}
|
||||
},
|
||||
)
|
||||
.await;
|
||||
assert!(result.is_err());
|
||||
assert_eq!(attempt, 3);
|
||||
assert_eq!(hook_calls, 2, "hook fires N-1 times for N attempts that all fail transient");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,7 +23,9 @@ pub mod jobs;
|
||||
pub mod nav;
|
||||
pub mod pipeline;
|
||||
pub mod rate_limit;
|
||||
pub mod resync;
|
||||
pub mod safety;
|
||||
pub mod session;
|
||||
pub mod source;
|
||||
pub mod tor;
|
||||
pub mod url_utils;
|
||||
|
||||
@@ -13,7 +13,7 @@ use crate::crawler::jobs::{self, EnqueueResult, JobPayload};
|
||||
use crate::crawler::rate_limit::HostRateLimiters;
|
||||
use crate::crawler::safety::{fetch_bytes_capped, looks_like_image, DownloadAllowlist};
|
||||
use crate::crawler::source::target::TargetSource;
|
||||
use crate::crawler::source::{FetchContext, Source};
|
||||
use crate::crawler::source::{FetchContext, Source, SourceMangaRef};
|
||||
use crate::repo;
|
||||
use crate::repo::crawler::UpsertStatus;
|
||||
use crate::storage::Storage;
|
||||
@@ -103,6 +103,7 @@ pub async fn run_metadata_pass(
|
||||
skip_chapters: bool,
|
||||
allowlist: &DownloadAllowlist,
|
||||
max_image_bytes: usize,
|
||||
tor: Option<&crate::crawler::tor::TorController>,
|
||||
) -> anyhow::Result<MetadataStats> {
|
||||
let lease = browser_manager
|
||||
.acquire()
|
||||
@@ -121,6 +122,7 @@ pub async fn run_metadata_pass(
|
||||
let ctx = FetchContext {
|
||||
browser: browser_ref,
|
||||
rate,
|
||||
tor,
|
||||
};
|
||||
|
||||
let source_id = source.id();
|
||||
@@ -521,12 +523,133 @@ pub struct EnqueueSummary {
|
||||
pub failed: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy)]
|
||||
pub struct CoverBackfillStats {
|
||||
pub considered: usize,
|
||||
pub fetched: usize,
|
||||
pub failed: usize,
|
||||
}
|
||||
|
||||
/// Default per-tick cap for [`backfill_missing_covers`]. The metadata pass
|
||||
/// already retries covers when its walk reaches the affected manga; this
|
||||
/// backfill exists to catch the residual case where the early-stop
|
||||
/// optimisation prevents the walk from reaching mangas whose cover failed
|
||||
/// on first attempt. A small cap is enough because the backlog only grows
|
||||
/// from sporadic download failures, not from systematic misses.
|
||||
pub const COVER_BACKFILL_DEFAULT_MAX: usize = 10;
|
||||
|
||||
/// Re-attempt cover downloads for mangas where `cover_image_path IS NULL`
|
||||
/// but a live `manga_sources` row exists. Refetches the source detail
|
||||
/// page (which is where the cover URL lives) and downloads the cover.
|
||||
///
|
||||
/// Bounded by `max_mangas` per call so a steady stream of failing covers
|
||||
/// — e.g. a CDN host that's persistently 502 — can't monopolise a cron
|
||||
/// tick. Orders by `manga_sources.last_seen_at DESC` so the freshest
|
||||
/// missing-cover mangas are addressed first.
|
||||
///
|
||||
/// Failures are logged and counted, not raised: a single bad cover URL
|
||||
/// must not stall every other backfill behind it.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn backfill_missing_covers(
|
||||
browser_manager: &BrowserManager,
|
||||
db: &PgPool,
|
||||
storage: &dyn Storage,
|
||||
http: &reqwest::Client,
|
||||
rate: &HostRateLimiters,
|
||||
max_mangas: usize,
|
||||
allowlist: &DownloadAllowlist,
|
||||
max_image_bytes: usize,
|
||||
tor: Option<&crate::crawler::tor::TorController>,
|
||||
) -> anyhow::Result<CoverBackfillStats> {
|
||||
let mut stats = CoverBackfillStats::default();
|
||||
if max_mangas == 0 {
|
||||
return Ok(stats);
|
||||
}
|
||||
|
||||
let entries = repo::crawler::list_missing_covers(db, max_mangas as i64)
|
||||
.await
|
||||
.context("list_missing_covers")?;
|
||||
|
||||
if entries.is_empty() {
|
||||
return Ok(stats);
|
||||
}
|
||||
|
||||
let lease = browser_manager
|
||||
.acquire()
|
||||
.await
|
||||
.context("acquire browser lease for cover backfill")?;
|
||||
let browser_ref: &chromiumoxide::Browser = &lease;
|
||||
let ctx = FetchContext { browser: browser_ref, rate, tor };
|
||||
|
||||
for entry in entries {
|
||||
stats.considered += 1;
|
||||
// Metadata-only TargetSource: skip chapter-list parsing so a
|
||||
// missing-cover refetch doesn't soft-drop chapters on a partial
|
||||
// render. Cover URL alone is what we need.
|
||||
let source = TargetSource::new(entry.source_url.clone()).without_chapter_parsing();
|
||||
let r = SourceMangaRef {
|
||||
source_manga_key: entry.source_manga_key.clone(),
|
||||
title: String::new(),
|
||||
url: entry.source_url.clone(),
|
||||
};
|
||||
let cover_url = match source.fetch_manga(&ctx, &r).await {
|
||||
Ok(manga) => manga.cover_url,
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
manga_id = %entry.manga_id,
|
||||
url = %entry.source_url,
|
||||
error = ?e,
|
||||
"cover backfill: fetch_manga failed"
|
||||
);
|
||||
stats.failed += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let Some(cover_url) = cover_url else {
|
||||
tracing::warn!(
|
||||
manga_id = %entry.manga_id,
|
||||
url = %entry.source_url,
|
||||
"cover backfill: source returned no cover_url"
|
||||
);
|
||||
stats.failed += 1;
|
||||
continue;
|
||||
};
|
||||
match download_and_store_cover(
|
||||
db,
|
||||
storage,
|
||||
http,
|
||||
rate,
|
||||
&entry.source_url,
|
||||
entry.manga_id,
|
||||
&cover_url,
|
||||
allowlist,
|
||||
max_image_bytes,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => stats.fetched += 1,
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
manga_id = %entry.manga_id,
|
||||
url = %entry.source_url,
|
||||
error = ?e,
|
||||
"cover backfill: download failed"
|
||||
);
|
||||
stats.failed += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
drop(lease);
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
/// Download a cover image and persist its storage path. Local to the
|
||||
/// pipeline because the CLI still calls it from its inline chapter-content
|
||||
/// loop; once the worker pool fully replaces that path we can fold this
|
||||
/// into `pipeline` proper.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn download_and_store_cover(
|
||||
pub(crate) async fn download_and_store_cover(
|
||||
db: &PgPool,
|
||||
storage: &dyn Storage,
|
||||
http: &reqwest::Client,
|
||||
|
||||
277
backend/src/crawler/resync.rs
Normal file
277
backend/src/crawler/resync.rs
Normal file
@@ -0,0 +1,277 @@
|
||||
//! Admin-triggered resync of a single manga's metadata + cover, or a
|
||||
//! single chapter's content.
|
||||
//!
|
||||
//! The cron tick already retries covers and chapter content on its own
|
||||
//! schedule. This module exists for the operator-controlled path:
|
||||
//! "this manga's metadata is stale / its cover never landed / this
|
||||
//! chapter is broken — pull from source now, not at the next daily
|
||||
//! tick." Wired into the admin API, never into the queue, so the work
|
||||
//! happens synchronously with the HTTP request and the admin sees the
|
||||
//! refreshed row in the response.
|
||||
//!
|
||||
//! Shares the daemon's [`BrowserManager`], rate limiter, HTTP client,
|
||||
//! and TOR controller so a force resync respects the same per-host
|
||||
//! pacing and recircuit budget the daily crawl uses — admin actions
|
||||
//! must not let an operator accidentally hammer the source.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Context;
|
||||
use async_trait::async_trait;
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::crawler::browser_manager::BrowserManager;
|
||||
use crate::crawler::content::{self, SyncOutcome};
|
||||
use crate::crawler::pipeline;
|
||||
use crate::crawler::rate_limit::HostRateLimiters;
|
||||
use crate::crawler::safety::DownloadAllowlist;
|
||||
use crate::crawler::source::target::TargetSource;
|
||||
use crate::crawler::source::{FetchContext, Source, SourceMangaRef};
|
||||
use crate::crawler::tor::TorController;
|
||||
use crate::repo;
|
||||
use crate::repo::crawler::UpsertStatus;
|
||||
use crate::storage::Storage;
|
||||
|
||||
/// Outcome of [`ResyncService::resync_manga`]. Mirrors the bits the
|
||||
/// admin UI cares about — was the row actually re-upserted, did the
|
||||
/// cover land — so the response can show "metadata refreshed, cover
|
||||
/// re-downloaded" or "metadata unchanged" without a second round-trip.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct MangaResyncOutcome {
|
||||
pub manga_id: Uuid,
|
||||
pub metadata_status: UpsertStatus,
|
||||
pub cover_fetched: bool,
|
||||
}
|
||||
|
||||
/// Outcome of [`ResyncService::resync_chapter`]. `Fetched(pages)` is the
|
||||
/// success case; `Skipped` means the source row was already gone or the
|
||||
/// chapter had no live source.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ChapterResyncOutcome {
|
||||
Fetched { chapter_id: Uuid, pages: usize },
|
||||
Skipped { chapter_id: Uuid, reason: String },
|
||||
}
|
||||
|
||||
/// Service exposed by the daemon to the admin API. Optional on
|
||||
/// [`AppState`] — `None` when the crawler daemon is disabled
|
||||
/// (`CRAWLER_DAEMON=false`), in which case admin handlers return 503.
|
||||
#[async_trait]
|
||||
pub trait ResyncService: Send + Sync {
|
||||
async fn resync_manga(&self, manga_id: Uuid) -> anyhow::Result<MangaResyncOutcome>;
|
||||
async fn resync_chapter(&self, chapter_id: Uuid) -> anyhow::Result<ChapterResyncOutcome>;
|
||||
}
|
||||
|
||||
/// Errors with a stable shape so the API layer can map them to the
|
||||
/// right HTTP status (404 vs 422 vs 5xx). Anything else surfaces as a
|
||||
/// generic 500.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ResyncError {
|
||||
#[error("manga has no source to resync from")]
|
||||
NoMangaSource,
|
||||
#[error("chapter has no source to resync from")]
|
||||
NoChapterSource,
|
||||
}
|
||||
|
||||
pub struct RealResyncService {
|
||||
pub browser_manager: Arc<BrowserManager>,
|
||||
pub db: PgPool,
|
||||
pub storage: Arc<dyn Storage>,
|
||||
pub http: reqwest::Client,
|
||||
pub rate: Arc<HostRateLimiters>,
|
||||
pub download_allowlist: DownloadAllowlist,
|
||||
pub max_image_bytes: usize,
|
||||
pub tor: Option<Arc<TorController>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ResyncService for RealResyncService {
|
||||
async fn resync_manga(&self, manga_id: Uuid) -> anyhow::Result<MangaResyncOutcome> {
|
||||
// Pick the freshest live source row. Multi-source mangas
|
||||
// (theoretical — only one Source impl today) get the row whose
|
||||
// `last_seen_at` is newest; soft-dropped rows are skipped.
|
||||
let row: Option<(String, String, String)> = sqlx::query_as(
|
||||
"SELECT source_id, source_manga_key, source_url \
|
||||
FROM manga_sources \
|
||||
WHERE manga_id = $1 AND dropped_at IS NULL \
|
||||
ORDER BY last_seen_at DESC \
|
||||
LIMIT 1",
|
||||
)
|
||||
.bind(manga_id)
|
||||
.fetch_optional(&self.db)
|
||||
.await
|
||||
.context("look up manga_sources for resync")?;
|
||||
let Some((_source_id, source_manga_key, source_url)) = row else {
|
||||
return Err(ResyncError::NoMangaSource.into());
|
||||
};
|
||||
|
||||
let lease = self
|
||||
.browser_manager
|
||||
.acquire()
|
||||
.await
|
||||
.context("acquire browser lease for manga resync")?;
|
||||
let browser_ref: &chromiumoxide::Browser = &lease;
|
||||
let ctx = FetchContext {
|
||||
browser: browser_ref,
|
||||
rate: &self.rate,
|
||||
tor: self.tor.as_deref(),
|
||||
};
|
||||
|
||||
// Parse chapters too — a force resync is "make this manga fully
|
||||
// current," not just metadata. The full pipeline handles the
|
||||
// partial-render guard for us; we replicate the same caution
|
||||
// here by skipping the chapter sync when the parser returned
|
||||
// empty but the manga previously had chapters.
|
||||
let source = TargetSource::new(source_url.clone());
|
||||
let r = SourceMangaRef {
|
||||
source_manga_key: source_manga_key.clone(),
|
||||
title: String::new(),
|
||||
url: source_url.clone(),
|
||||
};
|
||||
let manga = source
|
||||
.fetch_manga(&ctx, &r)
|
||||
.await
|
||||
.with_context(|| format!("fetch_manga during resync of {manga_id}"))?;
|
||||
|
||||
// Partial-render guard: same logic as run_metadata_pass.
|
||||
let source_id = source.id();
|
||||
if !manga.chapters.is_empty() || {
|
||||
let prior = repo::crawler::live_chapter_count_for_source_manga(
|
||||
&self.db,
|
||||
source_id,
|
||||
&source_manga_key,
|
||||
)
|
||||
.await
|
||||
.unwrap_or(0);
|
||||
prior == 0
|
||||
} {
|
||||
// Either the new fetch surfaced chapters, or there were
|
||||
// none before either — chapter sync is safe to run.
|
||||
} else {
|
||||
tracing::warn!(
|
||||
%manga_id,
|
||||
source_url = %source_url,
|
||||
"resync_manga: fetch returned empty chapters but prior count > 0; skipping chapter sync to avoid soft-drop"
|
||||
);
|
||||
}
|
||||
|
||||
let upsert = repo::crawler::upsert_manga_from_source(
|
||||
&self.db,
|
||||
source_id,
|
||||
&source_url,
|
||||
&manga,
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("upsert_manga_from_source during resync of {manga_id}"))?;
|
||||
|
||||
// Cover refetch: force-download regardless of UpsertStatus.
|
||||
// Admin clicked "resync" because they want the cover too.
|
||||
let mut cover_fetched = false;
|
||||
if let Some(cover_url) = manga.cover_url.as_deref() {
|
||||
match pipeline::download_and_store_cover(
|
||||
&self.db,
|
||||
self.storage.as_ref(),
|
||||
&self.http,
|
||||
&self.rate,
|
||||
&source_url,
|
||||
upsert.manga_id,
|
||||
cover_url,
|
||||
&self.download_allowlist,
|
||||
self.max_image_bytes,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => cover_fetched = true,
|
||||
Err(e) => tracing::warn!(
|
||||
%manga_id,
|
||||
error = ?e,
|
||||
"resync_manga: cover download failed"
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
// Chapter sync — only when the partial-render guard above
|
||||
// didn't bail.
|
||||
let prior_chapter_count = repo::crawler::live_chapter_count_for_source_manga(
|
||||
&self.db,
|
||||
source_id,
|
||||
&source_manga_key,
|
||||
)
|
||||
.await
|
||||
.unwrap_or(0);
|
||||
if !manga.chapters.is_empty() || prior_chapter_count == 0 {
|
||||
match repo::crawler::sync_manga_chapters(
|
||||
&self.db,
|
||||
source_id,
|
||||
upsert.manga_id,
|
||||
&manga.chapters,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(diff) => tracing::info!(
|
||||
%manga_id,
|
||||
new = diff.new,
|
||||
refreshed = diff.refreshed,
|
||||
dropped = diff.dropped,
|
||||
"resync_manga: chapters synced"
|
||||
),
|
||||
Err(e) => tracing::warn!(
|
||||
%manga_id,
|
||||
error = ?e,
|
||||
"resync_manga: chapter sync failed"
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
drop(lease);
|
||||
Ok(MangaResyncOutcome {
|
||||
manga_id: upsert.manga_id,
|
||||
metadata_status: upsert.status,
|
||||
cover_fetched,
|
||||
})
|
||||
}
|
||||
|
||||
async fn resync_chapter(&self, chapter_id: Uuid) -> anyhow::Result<ChapterResyncOutcome> {
|
||||
let row = repo::chapter::dispatch_target(&self.db, chapter_id)
|
||||
.await
|
||||
.context("look up chapter_sources for resync")?;
|
||||
let Some((manga_id, source_url)) = row else {
|
||||
return Err(ResyncError::NoChapterSource.into());
|
||||
};
|
||||
|
||||
let lease = self
|
||||
.browser_manager
|
||||
.acquire()
|
||||
.await
|
||||
.context("acquire browser lease for chapter resync")?;
|
||||
let result = content::sync_chapter_content(
|
||||
&lease,
|
||||
&self.db,
|
||||
self.storage.as_ref(),
|
||||
&self.http,
|
||||
&self.rate,
|
||||
chapter_id,
|
||||
manga_id,
|
||||
&source_url,
|
||||
true,
|
||||
&self.download_allowlist,
|
||||
self.max_image_bytes,
|
||||
self.tor.as_deref(),
|
||||
)
|
||||
.await;
|
||||
drop(lease);
|
||||
|
||||
match result? {
|
||||
SyncOutcome::Fetched { pages } => {
|
||||
Ok(ChapterResyncOutcome::Fetched { chapter_id, pages })
|
||||
}
|
||||
SyncOutcome::Skipped => Ok(ChapterResyncOutcome::Skipped {
|
||||
chapter_id,
|
||||
reason: "chapter already had pages on disk".to_string(),
|
||||
}),
|
||||
SyncOutcome::SessionExpired => {
|
||||
anyhow::bail!("source session expired — operator must refresh PHPSESSID")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -162,37 +162,123 @@ const PROBE_RETRY_DELAY: Duration = Duration::from_secs(2);
|
||||
/// limiter. The trade is worth it — failing here costs ~1s; failing 30
|
||||
/// minutes into a backfill costs 30 minutes.
|
||||
pub async fn verify_session(browser: &Browser, probe_url: &str) -> anyhow::Result<()> {
|
||||
let mut attempt = 0u32;
|
||||
verify_session_with_recircuit(browser, probe_url, None, 0).await
|
||||
}
|
||||
|
||||
/// Like [`verify_session`] but, when `tor` is `Some`, signals
|
||||
/// `SIGNAL NEWNYM` between retries on transient pages AND treats
|
||||
/// `Unauthenticated` as recoverable (up to `tor_max_attempts` total
|
||||
/// probes, calling NEWNYM between each).
|
||||
///
|
||||
/// `verify_session` is `verify_session_with_recircuit(..., None, _)`,
|
||||
/// which collapses the `Unauthenticated` budget to 1 attempt — i.e.
|
||||
/// fail-fast, exactly the pre-TOR behavior.
|
||||
pub async fn verify_session_with_recircuit(
|
||||
browser: &Browser,
|
||||
probe_url: &str,
|
||||
tor: Option<&crate::crawler::tor::TorController>,
|
||||
tor_max_attempts: u32,
|
||||
) -> anyhow::Result<()> {
|
||||
let unauth_max_attempts = if tor.is_some() { tor_max_attempts.max(1) } else { 1 };
|
||||
run_session_probe_loop(
|
||||
|| fetch_probe_html(browser, probe_url),
|
||||
|| async {
|
||||
if let Some(t) = tor {
|
||||
if let Err(e) = t.new_identity().await {
|
||||
tracing::warn!(error = %e, "TOR NEWNYM failed; continuing with same circuit");
|
||||
}
|
||||
}
|
||||
},
|
||||
PROBE_MAX_ATTEMPTS,
|
||||
unauth_max_attempts,
|
||||
PROBE_RETRY_DELAY,
|
||||
probe_url,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Pure-over-IO loop body for the session probe. Generic over the
|
||||
/// fetch and recircuit closures so it can be unit-tested without a
|
||||
/// real browser or TOR daemon.
|
||||
///
|
||||
/// Both budgets count **total attempts**, including the first — so
|
||||
/// `transient_max_attempts = 3` allows 3 fetches and 2 recircuits
|
||||
/// between them, and `unauth_max_attempts = 1` means "fail-fast, no
|
||||
/// retry". This matches [`crate::crawler::detect::retry_on_transient`]
|
||||
/// and the content-path recircuit loop.
|
||||
///
|
||||
/// Outcomes:
|
||||
/// - `SessionProbe::Ok` → return `Ok(())`.
|
||||
/// - `SessionProbe::Unauthenticated` → recircuit + retry while
|
||||
/// under the unauth budget. After the cap, bail with the
|
||||
/// "PHPSESSID expired" diagnostic, mentioning the attempt count so
|
||||
/// a TOR-misconfig diagnosis is easier.
|
||||
/// - `SessionProbe::Transient` → same shape against the transient
|
||||
/// budget; bails with "site down or rate-limiting" after the cap.
|
||||
async fn run_session_probe_loop<F, Fut, R, RFut>(
|
||||
mut fetch_html: F,
|
||||
mut recircuit: R,
|
||||
transient_max_attempts: u32,
|
||||
unauth_max_attempts: u32,
|
||||
retry_delay: Duration,
|
||||
probe_url_for_msg: &str,
|
||||
) -> anyhow::Result<()>
|
||||
where
|
||||
F: FnMut() -> Fut,
|
||||
Fut: std::future::Future<Output = anyhow::Result<String>>,
|
||||
R: FnMut() -> RFut,
|
||||
RFut: std::future::Future<Output = ()>,
|
||||
{
|
||||
debug_assert!(transient_max_attempts >= 1);
|
||||
debug_assert!(unauth_max_attempts >= 1);
|
||||
let mut transient_attempts = 0u32;
|
||||
let mut unauth_attempts = 0u32;
|
||||
loop {
|
||||
attempt += 1;
|
||||
let html = fetch_probe_html(browser, probe_url).await?;
|
||||
let html = fetch_html().await?;
|
||||
match classify_probe(&html) {
|
||||
SessionProbe::Ok => {
|
||||
tracing::info!(attempt, "session probe ok — #logo + #avatar_menu present");
|
||||
tracing::info!(
|
||||
transient_attempts,
|
||||
unauth_attempts,
|
||||
"session probe ok — #logo + #avatar_menu present"
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
SessionProbe::Unauthenticated => {
|
||||
return Err(anyhow!(
|
||||
"session probe failed — #avatar_menu not present at {probe_url} \
|
||||
(page rendered the normal layout); PHPSESSID is missing, expired, \
|
||||
or revoked. Refresh CRAWLER_PHPSESSID and re-run."
|
||||
));
|
||||
}
|
||||
SessionProbe::Transient if attempt < PROBE_MAX_ATTEMPTS => {
|
||||
unauth_attempts += 1;
|
||||
if unauth_attempts >= unauth_max_attempts {
|
||||
return Err(anyhow!(
|
||||
"session probe failed — #avatar_menu not present at {probe_url_for_msg} \
|
||||
after {unauth_attempts} attempt(s); PHPSESSID is missing, \
|
||||
expired, or revoked. Refresh CRAWLER_PHPSESSID and re-run."
|
||||
));
|
||||
}
|
||||
tracing::warn!(
|
||||
attempt,
|
||||
max_attempts = PROBE_MAX_ATTEMPTS,
|
||||
"session probe got a transient page; retrying"
|
||||
attempt = unauth_attempts,
|
||||
max_attempts = unauth_max_attempts,
|
||||
"session probe Unauthenticated despite PHPSESSID; signaling TOR \
|
||||
NEWNYM and retrying"
|
||||
);
|
||||
tokio::time::sleep(PROBE_RETRY_DELAY).await;
|
||||
recircuit().await;
|
||||
tokio::time::sleep(retry_delay).await;
|
||||
}
|
||||
SessionProbe::Transient => {
|
||||
return Err(anyhow!(
|
||||
"session probe failed — probe page at {probe_url} returned a \
|
||||
broken-page response after {PROBE_MAX_ATTEMPTS} attempts. \
|
||||
The site appears to be down or rate-limiting us; try again \
|
||||
later before refreshing CRAWLER_PHPSESSID."
|
||||
));
|
||||
transient_attempts += 1;
|
||||
if transient_attempts >= transient_max_attempts {
|
||||
return Err(anyhow!(
|
||||
"session probe failed — probe page at {probe_url_for_msg} returned \
|
||||
a broken-page response after {transient_max_attempts} attempts. \
|
||||
The site appears to be down or rate-limiting us; try again \
|
||||
later before refreshing CRAWLER_PHPSESSID."
|
||||
));
|
||||
}
|
||||
tracing::warn!(
|
||||
attempt = transient_attempts,
|
||||
max_attempts = transient_max_attempts,
|
||||
"session probe got a transient page; recircuit + retry"
|
||||
);
|
||||
recircuit().await;
|
||||
tokio::time::sleep(retry_delay).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -336,6 +422,204 @@ mod tests {
|
||||
assert_eq!(classify_chapter_probe(html), ChapterProbe::Ok);
|
||||
}
|
||||
|
||||
// --- run_session_probe_loop -----------------------------------------
|
||||
//
|
||||
// These tests exercise the recircuit-aware loop without a real
|
||||
// browser. The fetch and recircuit closures are mocked over Vecs of
|
||||
// canned outcomes / counters.
|
||||
|
||||
const OK_HTML: &str = r#"<html><body><div id="logo"></div><div id="avatar_menu"></div></body></html>"#;
|
||||
const UNAUTH_HTML: &str = r#"<html><body><div id="logo"></div></body></html>"#;
|
||||
const TRANSIENT_HTML: &str = "<html><body><p>we're sorry, the request file are not found.</p></body></html>";
|
||||
|
||||
#[tokio::test]
|
||||
async fn probe_loop_ok_on_first_attempt_does_not_recircuit() {
|
||||
let mut recircuits = 0u32;
|
||||
let mut fetched = 0u32;
|
||||
run_session_probe_loop(
|
||||
|| {
|
||||
fetched += 1;
|
||||
async { Ok(OK_HTML.to_string()) }
|
||||
},
|
||||
|| {
|
||||
recircuits += 1;
|
||||
async {}
|
||||
},
|
||||
3,
|
||||
3,
|
||||
Duration::from_millis(0),
|
||||
"https://example/probe",
|
||||
)
|
||||
.await
|
||||
.expect("ok on first attempt");
|
||||
assert_eq!(fetched, 1);
|
||||
assert_eq!(recircuits, 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn probe_loop_unauth_then_ok_when_attempt_budget_available() {
|
||||
// Budget = 3 total attempts. Unauth on call 1, ok on call 2.
|
||||
let mut recircuits = 0u32;
|
||||
let mut call = 0u32;
|
||||
run_session_probe_loop(
|
||||
|| {
|
||||
call += 1;
|
||||
let n = call;
|
||||
async move {
|
||||
if n == 1 {
|
||||
Ok(UNAUTH_HTML.to_string())
|
||||
} else {
|
||||
Ok(OK_HTML.to_string())
|
||||
}
|
||||
}
|
||||
},
|
||||
|| {
|
||||
recircuits += 1;
|
||||
async {}
|
||||
},
|
||||
3,
|
||||
3,
|
||||
Duration::from_millis(0),
|
||||
"https://example/probe",
|
||||
)
|
||||
.await
|
||||
.expect("recovers after one recircuit");
|
||||
assert_eq!(call, 2);
|
||||
assert_eq!(recircuits, 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn probe_loop_unauth_with_single_attempt_budget_fails_fast() {
|
||||
// Budget = 1 total attempt = no retry (matches no-TOR behavior).
|
||||
let mut recircuits = 0u32;
|
||||
let mut call = 0u32;
|
||||
let err = run_session_probe_loop(
|
||||
|| {
|
||||
call += 1;
|
||||
async { Ok(UNAUTH_HTML.to_string()) }
|
||||
},
|
||||
|| {
|
||||
recircuits += 1;
|
||||
async {}
|
||||
},
|
||||
3,
|
||||
1,
|
||||
Duration::from_millis(0),
|
||||
"https://example/probe",
|
||||
)
|
||||
.await
|
||||
.expect_err("budget=1 → fail-fast");
|
||||
assert_eq!(call, 1, "no retry when budget is 1");
|
||||
assert_eq!(recircuits, 0);
|
||||
let msg = format!("{err:#}");
|
||||
assert!(msg.contains("Refresh CRAWLER_PHPSESSID"), "msg: {msg}");
|
||||
assert!(msg.contains("after 1 attempt"), "expected attempt count in msg: {msg}");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn probe_loop_unauth_after_exhausting_budget_emits_attempt_count() {
|
||||
let mut recircuits = 0u32;
|
||||
let mut call = 0u32;
|
||||
let err = run_session_probe_loop(
|
||||
|| {
|
||||
call += 1;
|
||||
async { Ok(UNAUTH_HTML.to_string()) }
|
||||
},
|
||||
|| {
|
||||
recircuits += 1;
|
||||
async {}
|
||||
},
|
||||
10, // transient budget irrelevant here
|
||||
3, // 3 attempts total, 2 recircuits between
|
||||
Duration::from_millis(0),
|
||||
"https://example/probe",
|
||||
)
|
||||
.await
|
||||
.expect_err("exhausts unauth budget");
|
||||
assert_eq!(call, 3);
|
||||
assert_eq!(recircuits, 2);
|
||||
let msg = format!("{err:#}");
|
||||
assert!(msg.contains("after 3 attempt"), "expected attempt count in error, got: {msg}");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn probe_loop_transient_repeats_until_max_then_errors() {
|
||||
let mut recircuits = 0u32;
|
||||
let mut call = 0u32;
|
||||
let err = run_session_probe_loop(
|
||||
|| {
|
||||
call += 1;
|
||||
async { Ok(TRANSIENT_HTML.to_string()) }
|
||||
},
|
||||
|| {
|
||||
recircuits += 1;
|
||||
async {}
|
||||
},
|
||||
3,
|
||||
1,
|
||||
Duration::from_millis(0),
|
||||
"https://example/probe",
|
||||
)
|
||||
.await
|
||||
.expect_err("transient until max → fail");
|
||||
assert_eq!(call, 3);
|
||||
// Recircuit fires between attempts: 3 attempts → 2 recircuits.
|
||||
assert_eq!(recircuits, 2);
|
||||
let msg = format!("{err:#}");
|
||||
assert!(msg.contains("broken-page response after 3 attempts"), "msg: {msg}");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn probe_loop_transient_then_ok_returns_ok_after_one_recircuit() {
|
||||
let mut recircuits = 0u32;
|
||||
let mut call = 0u32;
|
||||
run_session_probe_loop(
|
||||
|| {
|
||||
call += 1;
|
||||
let n = call;
|
||||
async move {
|
||||
if n == 1 {
|
||||
Ok(TRANSIENT_HTML.to_string())
|
||||
} else {
|
||||
Ok(OK_HTML.to_string())
|
||||
}
|
||||
}
|
||||
},
|
||||
|| {
|
||||
recircuits += 1;
|
||||
async {}
|
||||
},
|
||||
3,
|
||||
1,
|
||||
Duration::from_millis(0),
|
||||
"https://example/probe",
|
||||
)
|
||||
.await
|
||||
.expect("ok on second try");
|
||||
assert_eq!(call, 2);
|
||||
assert_eq!(recircuits, 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn probe_loop_propagates_fetch_errors_immediately() {
|
||||
let mut call = 0u32;
|
||||
let err = run_session_probe_loop(
|
||||
|| {
|
||||
call += 1;
|
||||
async { Err(anyhow!("nav timeout")) }
|
||||
},
|
||||
|| async {},
|
||||
5,
|
||||
5,
|
||||
Duration::from_millis(0),
|
||||
"https://example/probe",
|
||||
)
|
||||
.await
|
||||
.expect_err("fetch error bubbles");
|
||||
assert_eq!(call, 1);
|
||||
assert!(format!("{err:#}").contains("nav timeout"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_probe_trusts_broken_body_over_stray_avatar_match() {
|
||||
// Defensive: if a broken-page body somehow contains an
|
||||
|
||||
@@ -67,6 +67,10 @@ pub struct SourceChapter {
|
||||
pub struct FetchContext<'a> {
|
||||
pub browser: &'a Browser,
|
||||
pub rate: &'a crate::crawler::rate_limit::HostRateLimiters,
|
||||
/// Optional TOR control-port client. When `Some`, retry helpers
|
||||
/// signal `NEWNYM` between transient-page attempts so the next try
|
||||
/// draws a fresh exit. `None` keeps pre-TOR behavior.
|
||||
pub tor: Option<&'a crate::crawler::tor::TorController>,
|
||||
}
|
||||
|
||||
/// Lazy iterator over discovered manga refs. The caller drives the
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
//! (`td:has(label:contains("Author:"))`) are implemented by walking
|
||||
//! the parsed tree.
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Context;
|
||||
@@ -19,7 +18,7 @@ use super::{
|
||||
SourceMangaRef,
|
||||
};
|
||||
use crate::crawler::detect::{
|
||||
has_logo_sentinel, is_broken_page_body, retry_on_transient, PageError,
|
||||
has_logo_sentinel, is_broken_page_body, retry_on_transient_with_hook, PageError,
|
||||
};
|
||||
use crate::crawler::nav::{wait_for_nav, wait_for_selector, NavError, SELECTOR_TIMEOUT};
|
||||
|
||||
@@ -75,33 +74,24 @@ impl Source for TargetSource {
|
||||
&self,
|
||||
ctx: &FetchContext<'_>,
|
||||
) -> anyhow::Result<Box<dyn DiscoverWalk + Send>> {
|
||||
// Always visit page 1 first because that's the only way to
|
||||
// discover `last_page`. Retry it on transient — a broken first
|
||||
// page would otherwise abort the whole walk before we've even
|
||||
// started.
|
||||
let first_html = retry_on_transient(
|
||||
// Probe page 1 up front (with transient retry) for two reasons:
|
||||
// a broken first page should abort cleanly rather than mid-walk,
|
||||
// and the HTML is handed straight to the first `next_batch` call
|
||||
// so the walker doesn't re-fetch it. Page count is discovered
|
||||
// incrementally — see `TargetSourceWalker::next_batch`.
|
||||
let first_html = retry_on_transient_with_hook(
|
||||
|| async {
|
||||
navigate(ctx, self.base_url.as_str(), LIST_PAGE_MARKER).await
|
||||
},
|
||||
PAGE_TRANSIENT_RETRY_ATTEMPTS,
|
||||
PAGE_TRANSIENT_RETRY_DELAY,
|
||||
|| async { recircuit_if_configured(ctx.tor).await },
|
||||
)
|
||||
.await?;
|
||||
let last_page = {
|
||||
let doc = scraper::Html::parse_document(&first_html);
|
||||
parse_last_page(&doc)
|
||||
};
|
||||
|
||||
let order = build_page_order(last_page);
|
||||
tracing::info!(
|
||||
last_page = ?last_page,
|
||||
page_count = order.len(),
|
||||
"walking pagination"
|
||||
);
|
||||
|
||||
Ok(Box::new(TargetSourceWalker {
|
||||
base_url: self.base_url.clone(),
|
||||
pages_remaining: order,
|
||||
next_page: 1,
|
||||
first_page_html: Some(first_html),
|
||||
}))
|
||||
}
|
||||
@@ -147,24 +137,19 @@ impl Source for TargetSource {
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the queue of page numbers `TargetSource::discover` will walk.
|
||||
/// The site orders by `update_date DESC`, so newest-first is just the
|
||||
/// natural page order: `1..=last`. If `last_page` is unknown (source
|
||||
/// surfaces no pagination) only page 1 is visited.
|
||||
fn build_page_order(last_page: Option<i32>) -> VecDeque<i32> {
|
||||
match last_page {
|
||||
None => VecDeque::from([1]),
|
||||
Some(last) => (1..=last).collect(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Walker returned by [`TargetSource::discover`]. Pops one source-index
|
||||
/// page per `next_batch` call. Page 1's HTML is cached at construction
|
||||
/// time (the discover call needed it to read `last_page` anyway) so the
|
||||
/// batch covering page 1 doesn't re-fetch.
|
||||
/// Walker returned by [`TargetSource::discover`]. Walks pages `1..` in
|
||||
/// order, terminating as soon as a page renders cleanly with zero entries
|
||||
/// — that's the "we ran off the end of the index" signal. Page 1's HTML
|
||||
/// is cached at construction time (discover already had to fetch it for
|
||||
/// the transient probe) so the first batch doesn't re-fetch.
|
||||
///
|
||||
/// A genuinely empty `Ok(vec![])` from `parse_manga_list_from` is what
|
||||
/// stops us: the parser's `#logo` sentinel converts unrendered pages
|
||||
/// into transient errors before they reach this loop, so an empty
|
||||
/// parse result reliably means "no more entries."
|
||||
struct TargetSourceWalker {
|
||||
base_url: String,
|
||||
pages_remaining: VecDeque<i32>,
|
||||
next_page: i32,
|
||||
first_page_html: Option<String>,
|
||||
}
|
||||
|
||||
@@ -174,20 +159,18 @@ impl DiscoverWalk for TargetSourceWalker {
|
||||
&mut self,
|
||||
ctx: &FetchContext<'_>,
|
||||
) -> anyhow::Result<Option<Vec<SourceMangaRef>>> {
|
||||
let Some(page_num) = self.pages_remaining.pop_front() else {
|
||||
return Ok(None);
|
||||
};
|
||||
let page_num = self.next_page;
|
||||
let page_refs = if page_num == 1 {
|
||||
// Reuse the cached page-1 HTML from the initial probe. Take
|
||||
// it (rather than clone) so a malformed page-order queue
|
||||
// that re-visits page 1 still falls back to a real fetch.
|
||||
// it (rather than clone) so a future re-entry that somehow
|
||||
// revisits page 1 still falls back to a real fetch.
|
||||
match self.first_page_html.take() {
|
||||
Some(html) => {
|
||||
let doc = scraper::Html::parse_document(&html);
|
||||
parse_manga_list_from(&doc)?
|
||||
}
|
||||
None => {
|
||||
retry_on_transient(
|
||||
retry_on_transient_with_hook(
|
||||
|| async {
|
||||
let html = navigate(
|
||||
ctx,
|
||||
@@ -200,12 +183,13 @@ impl DiscoverWalk for TargetSourceWalker {
|
||||
},
|
||||
PAGE_TRANSIENT_RETRY_ATTEMPTS,
|
||||
PAGE_TRANSIENT_RETRY_DELAY,
|
||||
|| async { recircuit_if_configured(ctx.tor).await },
|
||||
)
|
||||
.await?
|
||||
}
|
||||
}
|
||||
} else {
|
||||
retry_on_transient(
|
||||
retry_on_transient_with_hook(
|
||||
|| async {
|
||||
let url = page_url(&self.base_url, page_num);
|
||||
let html = navigate(ctx, &url, LIST_PAGE_MARKER).await?;
|
||||
@@ -214,10 +198,15 @@ impl DiscoverWalk for TargetSourceWalker {
|
||||
},
|
||||
PAGE_TRANSIENT_RETRY_ATTEMPTS,
|
||||
PAGE_TRANSIENT_RETRY_DELAY,
|
||||
|| async { recircuit_if_configured(ctx.tor).await },
|
||||
)
|
||||
.await?
|
||||
};
|
||||
tracing::info!(page_num, count = page_refs.len(), "page walked");
|
||||
if page_refs.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
self.next_page += 1;
|
||||
Ok(Some(page_refs))
|
||||
}
|
||||
}
|
||||
@@ -288,20 +277,20 @@ fn classify_navigate_html(html: String) -> Result<String, PageError> {
|
||||
Ok(html)
|
||||
}
|
||||
|
||||
fn parse_last_page(doc: &scraper::Html) -> Option<i32> {
|
||||
// Pagination links carry their page number as text. Take the
|
||||
// numeric maximum so we don't depend on a specific layout (Prev,
|
||||
// Next, ellipses, etc. all get filtered out by .parse).
|
||||
let sel = scraper::Selector::parse("#left_side .pagination a").unwrap();
|
||||
doc.select(&sel)
|
||||
.filter_map(|a| {
|
||||
collapse_whitespace(&a.text().collect::<String>())
|
||||
.parse::<i32>()
|
||||
.ok()
|
||||
})
|
||||
.max()
|
||||
/// Hook for [`retry_on_transient_with_hook`]: when TOR is configured,
|
||||
/// signal `NEWNYM` so the next navigation draws a fresh exit. Errors
|
||||
/// from the controller are logged and swallowed — failing to recircuit
|
||||
/// shouldn't take down the crawl, the next attempt just runs on the
|
||||
/// same circuit as before.
|
||||
async fn recircuit_if_configured(tor: Option<&crate::crawler::tor::TorController>) {
|
||||
if let Some(t) = tor {
|
||||
if let Err(e) = t.new_identity().await {
|
||||
tracing::warn!(error = %e, "TOR NEWNYM failed; retrying on same circuit");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Substitutes the first `/N/` path segment with the target page
|
||||
/// number. Source impls that paginate via a different URL shape can
|
||||
/// override this — for the modeled site the segment is always present.
|
||||
@@ -853,29 +842,6 @@ mod tests {
|
||||
assert_eq!(parse_chapter_number("Special"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_last_page_picks_highest_pagination_link() {
|
||||
let html = r#"
|
||||
<div id="left_side"><div class="pagination">
|
||||
<a href="/list/1/">Prev</a>
|
||||
<ol>
|
||||
<li><a href="/list/1/">1</a></li>
|
||||
<li><a href="/list/2/">2</a></li>
|
||||
<li><a href="/list/47/">47</a></li>
|
||||
<li><a href="/list/2/">Next</a></li>
|
||||
</ol>
|
||||
</div></div>
|
||||
"#;
|
||||
let doc = scraper::Html::parse_document(html);
|
||||
assert_eq!(parse_last_page(&doc), Some(47));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_last_page_none_when_no_pagination() {
|
||||
let doc = scraper::Html::parse_document("<html></html>");
|
||||
assert!(parse_last_page(&doc).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn page_url_substitutes_numeric_path_segment() {
|
||||
assert_eq!(
|
||||
@@ -1024,28 +990,6 @@ mod tests {
|
||||
assert!(err.is_transient(), "got non-transient: {err}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_page_order_is_natural_one_to_last() {
|
||||
// Newest-first is just the source's natural pagination order:
|
||||
// (update_date DESC) lives at page 1, oldest at the last page.
|
||||
let order = build_page_order(Some(3));
|
||||
assert_eq!(Vec::from(order), vec![1, 2, 3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_page_order_falls_back_to_page_one_only_without_pagination() {
|
||||
// Source surfaced no pagination control — visit page 1 alone
|
||||
// and let the walk end after one batch.
|
||||
let order = build_page_order(None);
|
||||
assert_eq!(Vec::from(order), vec![1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_page_order_single_page_index_yields_one_entry() {
|
||||
let order = build_page_order(Some(1));
|
||||
assert_eq!(Vec::from(order), vec![1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_chapter_list_returns_transient_when_table_missing() {
|
||||
// Partial render (post-load JS hadn't injected the table, layout
|
||||
|
||||
446
backend/src/crawler/tor.rs
Normal file
446
backend/src/crawler/tor.rs
Normal file
@@ -0,0 +1,446 @@
|
||||
//! TOR control-port client for `SIGNAL NEWNYM` ("recircuit").
|
||||
//!
|
||||
//! The crawler can be proxied through TOR (`CRAWLER_PROXY=socks5h://tor:9050`)
|
||||
//! to randomize the exit IP seen by the target site. When the target
|
||||
//! returns a "bad page" (its broken-template body, missing layout
|
||||
//! sentinel, or unauthenticated probe despite a valid PHPSESSID), it
|
||||
//! is often the current exit being rate-limited or fingerprinted rather
|
||||
//! than a real failure. Asking the local TOR daemon for a new identity
|
||||
//! over its control port (port 9051 by default) makes subsequent
|
||||
//! connections draw a fresh circuit; combined with `IsolateDestAddr`
|
||||
//! in torrc this is usually enough to clear the failure.
|
||||
//!
|
||||
//! Scope is deliberately tiny — `AUTHENTICATE` + `SIGNAL NEWNYM` over
|
||||
//! a one-shot TCP connection. No `torut` dep, no hidden-service
|
||||
//! plumbing, no event streaming.
|
||||
//!
|
||||
//! **Caveat for in-flight connections:** Chromium reuses sockets, so a
|
||||
//! `NEWNYM` only affects *new* connections (in TOR terms, new circuits).
|
||||
//! That's fine for our retry path — the next navigation opens a fresh
|
||||
//! connection. We do not try to forcibly close existing streams.
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{anyhow, bail, Context};
|
||||
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::time::timeout;
|
||||
|
||||
/// Default control-port (`tor --defaults-torrc` ships 9051).
|
||||
const DEFAULT_CONTROL_PORT: u16 = 9051;
|
||||
/// Connect timeout — generous enough for a slow compose start, short
|
||||
/// enough that a misconfigured controller doesn't stall a crawl.
|
||||
const CONNECT_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
/// Per-command read timeout. `SIGNAL NEWNYM` returns instantly on the
|
||||
/// happy path; bound it so a half-broken control port can't hang us.
|
||||
const READ_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
|
||||
/// How the controller authenticates to the control port.
|
||||
///
|
||||
/// `Cookie` is preferred for compose deploys where the auth cookie file
|
||||
/// is shared between the `tor` and `backend` containers via a named
|
||||
/// volume. `Password` is the fallback when the cookie file isn't
|
||||
/// reachable (different gid, no shared volume, etc.). `None` matches a
|
||||
/// torrc with no `CookieAuthentication 1` and no `HashedControlPassword`
|
||||
/// — useful for local experimentation, not for production.
|
||||
///
|
||||
/// `Debug` is implemented manually to redact the password (and the
|
||||
/// cookie path, which is non-sensitive but uninteresting in logs).
|
||||
/// Don't add `#[derive(Debug)]` — the controller is `?`-logged at
|
||||
/// startup and a derive would expand the password into the trace.
|
||||
#[derive(Clone)]
|
||||
pub enum TorAuth {
|
||||
None,
|
||||
Password(String),
|
||||
Cookie(PathBuf),
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for TorAuth {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
TorAuth::None => f.write_str("None"),
|
||||
TorAuth::Password(_) => f.write_str("Password(<redacted>)"),
|
||||
TorAuth::Cookie(_) => f.write_str("Cookie(<path>)"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TorController {
|
||||
/// `host:port` string. Kept as a string (not a `SocketAddr`) so
|
||||
/// docker-compose hostnames like `tor:9051` resolve at connect time.
|
||||
addr: String,
|
||||
auth: TorAuth,
|
||||
}
|
||||
|
||||
impl TorController {
|
||||
pub fn new(addr: impl Into<String>, auth: TorAuth) -> Self {
|
||||
Self { addr: addr.into(), auth }
|
||||
}
|
||||
|
||||
/// Build a controller from the env-config shape:
|
||||
/// `url` (e.g. `tcp://tor:9051`, `127.0.0.1:9051`, or `tor`),
|
||||
/// optional password, optional cookie path. Returns `Ok(None)` when
|
||||
/// `url` is absent — that's the "TOR feature disabled" signal.
|
||||
/// Cookie wins over password when both are set (rotates with TOR;
|
||||
/// no secret to manage).
|
||||
pub fn from_parts(
|
||||
url: Option<&str>,
|
||||
password: Option<&str>,
|
||||
cookie_path: Option<&Path>,
|
||||
) -> anyhow::Result<Option<Self>> {
|
||||
let Some(url) = url else { return Ok(None) };
|
||||
let addr = parse_control_url(url)?;
|
||||
let auth = match (cookie_path, password) {
|
||||
(Some(p), _) => TorAuth::Cookie(p.to_path_buf()),
|
||||
(None, Some(p)) => TorAuth::Password(p.to_string()),
|
||||
(None, None) => TorAuth::None,
|
||||
};
|
||||
Ok(Some(Self { addr, auth }))
|
||||
}
|
||||
|
||||
/// Open the control port, `AUTHENTICATE`, `SIGNAL NEWNYM`, `QUIT`.
|
||||
/// Each invocation is a fresh connection; the controller is cheap
|
||||
/// to clone and stateless across calls.
|
||||
pub async fn new_identity(&self) -> anyhow::Result<()> {
|
||||
let stream = timeout(CONNECT_TIMEOUT, TcpStream::connect(&self.addr))
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("timed out connecting to TOR control port {}", self.addr)
|
||||
})?
|
||||
.with_context(|| format!("connect to TOR control port {}", self.addr))?;
|
||||
let (read, mut write) = stream.into_split();
|
||||
let mut read = BufReader::new(read);
|
||||
|
||||
let auth_line = self.build_auth_line().await?;
|
||||
write_line(&mut write, &auth_line).await?;
|
||||
timeout(READ_TIMEOUT, expect_250(&mut read))
|
||||
.await
|
||||
.map_err(|_| anyhow!("TOR control AUTHENTICATE timed out"))?
|
||||
.context("AUTHENTICATE")?;
|
||||
|
||||
write_line(&mut write, "SIGNAL NEWNYM").await?;
|
||||
timeout(READ_TIMEOUT, expect_250(&mut read))
|
||||
.await
|
||||
.map_err(|_| anyhow!("TOR control SIGNAL NEWNYM timed out"))?
|
||||
.context("SIGNAL NEWNYM")?;
|
||||
|
||||
// QUIT is courtesy; ignore errors — the daemon may close the
|
||||
// socket before our QUIT lands and that's perfectly fine.
|
||||
let _ = write_line(&mut write, "QUIT").await;
|
||||
// Debug-level: a busy crawl can rotate circuits many times per
|
||||
// minute, INFO is too chatty. Failures still log at WARN.
|
||||
tracing::debug!(addr = %self.addr, "TOR NEWNYM signaled");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn build_auth_line(&self) -> anyhow::Result<String> {
|
||||
match &self.auth {
|
||||
TorAuth::None => Ok("AUTHENTICATE".to_string()),
|
||||
TorAuth::Password(p) => Ok(format!("AUTHENTICATE \"{}\"", escape_quoted(p))),
|
||||
TorAuth::Cookie(path) => {
|
||||
let bytes = tokio::fs::read(path)
|
||||
.await
|
||||
.with_context(|| format!("read TOR cookie file {}", path.display()))?;
|
||||
Ok(format!("AUTHENTICATE {}", hex_encode(&bytes)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse `tcp://host:port`, `host:port`, or bare `host` into a
|
||||
/// connect-time string. Default port is [`DEFAULT_CONTROL_PORT`].
|
||||
fn parse_control_url(url: &str) -> anyhow::Result<String> {
|
||||
let stripped = url.strip_prefix("tcp://").unwrap_or(url);
|
||||
if stripped.is_empty() {
|
||||
bail!("TOR control url is empty");
|
||||
}
|
||||
if stripped.contains(':') {
|
||||
Ok(stripped.to_string())
|
||||
} else {
|
||||
Ok(format!("{stripped}:{DEFAULT_CONTROL_PORT}"))
|
||||
}
|
||||
}
|
||||
|
||||
fn escape_quoted(s: &str) -> String {
|
||||
s.replace('\\', r"\\").replace('"', r#"\""#)
|
||||
}
|
||||
|
||||
fn hex_encode(bytes: &[u8]) -> String {
|
||||
let mut s = String::with_capacity(bytes.len() * 2);
|
||||
for b in bytes {
|
||||
s.push_str(&format!("{b:02x}"));
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
async fn write_line<W: tokio::io::AsyncWrite + Unpin>(
|
||||
w: &mut W,
|
||||
line: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
w.write_all(line.as_bytes()).await?;
|
||||
w.write_all(b"\r\n").await?;
|
||||
w.flush().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Drain a TOR control reply, accepting only status `250`. Handles
|
||||
/// the protocol's three line forms: `XYZ ...` (single/end), `XYZ-...`
|
||||
/// (continuation), `XYZ+...` (data block ended by a lone `.`). Our
|
||||
/// commands only ever produce single-line `250 OK`, but we honor the
|
||||
/// continuation forms so a future torrc that adds events / banners
|
||||
/// doesn't confuse the parser.
|
||||
async fn expect_250<R: AsyncBufReadExt + Unpin>(r: &mut R) -> anyhow::Result<()> {
|
||||
loop {
|
||||
let mut line = String::new();
|
||||
let n = r.read_line(&mut line).await?;
|
||||
if n == 0 {
|
||||
bail!("TOR control port closed connection mid-reply");
|
||||
}
|
||||
let trimmed = line.trim_end_matches(['\r', '\n']);
|
||||
if trimmed.len() < 4 {
|
||||
bail!("malformed TOR control reply: {trimmed:?}");
|
||||
}
|
||||
let (code, rest) = trimmed.split_at(3);
|
||||
if code != "250" {
|
||||
bail!("TOR control replied {trimmed:?}");
|
||||
}
|
||||
let sep = rest.as_bytes()[0];
|
||||
match sep {
|
||||
b' ' => return Ok(()),
|
||||
b'-' => continue,
|
||||
b'+' => {
|
||||
// Data block — read until a line consisting of only ".".
|
||||
loop {
|
||||
let mut data = String::new();
|
||||
let n = r.read_line(&mut data).await?;
|
||||
if n == 0 {
|
||||
bail!("TOR control port closed mid-data-block");
|
||||
}
|
||||
if data.trim_end_matches(['\r', '\n']) == "." {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => bail!("malformed TOR control reply separator: {trimmed:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tokio::net::TcpListener;
|
||||
|
||||
/// Spawn a mock control port that responds to each \r\n-terminated
|
||||
/// inbound line with the next entry from `replies`. Each reply has
|
||||
/// its own `\r\n` appended. Records received lines into `recorder`.
|
||||
/// After `replies.len()` exchanges the task drops the socket — this
|
||||
/// matches the real TOR behavior for QUIT (close after acking).
|
||||
async fn spawn_mock(
|
||||
replies: Vec<&'static str>,
|
||||
recorder: Arc<Mutex<Vec<String>>>,
|
||||
) -> String {
|
||||
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let addr = listener.local_addr().unwrap().to_string();
|
||||
tokio::spawn(async move {
|
||||
let (sock, _) = listener.accept().await.unwrap();
|
||||
let (r, mut w) = sock.into_split();
|
||||
let mut r = BufReader::new(r);
|
||||
for reply in replies {
|
||||
let mut line = String::new();
|
||||
let n = r.read_line(&mut line).await.unwrap_or(0);
|
||||
if n == 0 {
|
||||
return;
|
||||
}
|
||||
recorder
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push(line.trim_end_matches(['\r', '\n']).to_string());
|
||||
w.write_all(reply.as_bytes()).await.unwrap();
|
||||
w.write_all(b"\r\n").await.unwrap();
|
||||
w.flush().await.unwrap();
|
||||
}
|
||||
});
|
||||
addr
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn password_auth_then_newnym_writes_expected_sequence() {
|
||||
let recorder = Arc::new(Mutex::new(Vec::new()));
|
||||
// Two replies: AUTHENTICATE then SIGNAL NEWNYM. QUIT is
|
||||
// fire-and-forget; the mock dropping the socket is the
|
||||
// expected real-world behavior.
|
||||
let addr =
|
||||
spawn_mock(vec!["250 OK", "250 OK"], Arc::clone(&recorder)).await;
|
||||
let controller = TorController::new(addr, TorAuth::Password("secret".into()));
|
||||
controller.new_identity().await.expect("new_identity ok");
|
||||
let recorded = recorder.lock().unwrap().clone();
|
||||
assert_eq!(recorded.first().map(String::as_str), Some("AUTHENTICATE \"secret\""));
|
||||
assert_eq!(recorded.get(1).map(String::as_str), Some("SIGNAL NEWNYM"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn cookie_auth_hex_encodes_file_bytes() {
|
||||
let tmp = tempfile::NamedTempFile::new().unwrap();
|
||||
let cookie: Vec<u8> = (0u8..32).collect();
|
||||
std::fs::write(tmp.path(), &cookie).unwrap();
|
||||
let recorder = Arc::new(Mutex::new(Vec::new()));
|
||||
let addr =
|
||||
spawn_mock(vec!["250 OK", "250 OK"], Arc::clone(&recorder)).await;
|
||||
let controller =
|
||||
TorController::new(addr, TorAuth::Cookie(tmp.path().to_path_buf()));
|
||||
controller.new_identity().await.expect("new_identity ok");
|
||||
let recorded = recorder.lock().unwrap().clone();
|
||||
let expected_hex: String = cookie.iter().map(|b| format!("{b:02x}")).collect();
|
||||
assert_eq!(
|
||||
recorded.first().map(String::as_str),
|
||||
Some(format!("AUTHENTICATE {expected_hex}").as_str())
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn no_auth_sends_bare_authenticate() {
|
||||
let recorder = Arc::new(Mutex::new(Vec::new()));
|
||||
let addr =
|
||||
spawn_mock(vec!["250 OK", "250 OK"], Arc::clone(&recorder)).await;
|
||||
let controller = TorController::new(addr, TorAuth::None);
|
||||
controller.new_identity().await.expect("new_identity ok");
|
||||
let recorded = recorder.lock().unwrap().clone();
|
||||
assert_eq!(recorded.first().map(String::as_str), Some("AUTHENTICATE"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn non_250_reply_returns_err_with_reply_text() {
|
||||
let recorder = Arc::new(Mutex::new(Vec::new()));
|
||||
let addr = spawn_mock(
|
||||
vec!["515 Bad authentication"],
|
||||
Arc::clone(&recorder),
|
||||
)
|
||||
.await;
|
||||
let controller =
|
||||
TorController::new(addr, TorAuth::Password("wrong".into()));
|
||||
let err = controller.new_identity().await.expect_err("should fail");
|
||||
let msg = format!("{err:#}");
|
||||
assert!(msg.contains("515"), "expected 515 in error, got: {msg}");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn closed_connection_mid_reply_is_an_error() {
|
||||
// Listener accepts the AUTH line then drops without replying —
|
||||
// this exercises the EOF-mid-reply path in expect_250 (rather
|
||||
// than tor's own error replies which are covered elsewhere).
|
||||
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let addr = listener.local_addr().unwrap().to_string();
|
||||
tokio::spawn(async move {
|
||||
if let Ok((sock, _)) = listener.accept().await {
|
||||
let (r, _w) = sock.into_split();
|
||||
let mut r = BufReader::new(r);
|
||||
let mut line = String::new();
|
||||
let _ = r.read_line(&mut line).await; // read AUTH, ignore
|
||||
// Drop _w (and the read half via scope exit) so the
|
||||
// peer sees an immediate EOF on the next read.
|
||||
}
|
||||
});
|
||||
let controller = TorController::new(addr, TorAuth::None);
|
||||
let err = controller.new_identity().await.expect_err("should fail");
|
||||
let msg = format!("{err:#}");
|
||||
assert!(
|
||||
msg.contains("closed connection"),
|
||||
"expected EOF-mid-reply error, got: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn multi_line_250_continuation_is_accepted() {
|
||||
let recorder = Arc::new(Mutex::new(Vec::new()));
|
||||
// AUTHENTICATE reply uses the `250-...\r\n250 OK\r\n` form.
|
||||
// Single reply string contains the whole multi-line response.
|
||||
let addr = spawn_mock(
|
||||
vec!["250-banner=foo\r\n250 OK", "250 OK"],
|
||||
Arc::clone(&recorder),
|
||||
)
|
||||
.await;
|
||||
let controller = TorController::new(addr, TorAuth::None);
|
||||
controller.new_identity().await.expect("new_identity ok");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_parts_returns_none_when_url_unset() {
|
||||
let c = TorController::from_parts(None, None, None).unwrap();
|
||||
assert!(c.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_parts_prefers_cookie_over_password() {
|
||||
let c = TorController::from_parts(
|
||||
Some("tor:9051"),
|
||||
Some("pw"),
|
||||
Some(Path::new("/var/lib/tor/control_auth_cookie")),
|
||||
)
|
||||
.unwrap()
|
||||
.expect("controller built");
|
||||
assert!(matches!(c.auth, TorAuth::Cookie(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_parts_falls_back_to_password_without_cookie() {
|
||||
let c = TorController::from_parts(Some("tor:9051"), Some("pw"), None)
|
||||
.unwrap()
|
||||
.expect("controller built");
|
||||
assert!(matches!(c.auth, TorAuth::Password(p) if p == "pw"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_control_url_accepts_tcp_scheme() {
|
||||
assert_eq!(parse_control_url("tcp://127.0.0.1:9051").unwrap(), "127.0.0.1:9051");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_control_url_defaults_port_when_omitted() {
|
||||
assert_eq!(parse_control_url("tor").unwrap(), "tor:9051");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_control_url_passes_through_host_port() {
|
||||
assert_eq!(parse_control_url("tor:9999").unwrap(), "tor:9999");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_control_url_rejects_empty() {
|
||||
assert!(parse_control_url("").is_err());
|
||||
assert!(parse_control_url("tcp://").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escape_quoted_handles_quotes_and_backslashes() {
|
||||
assert_eq!(escape_quoted(r#"a"b\c"#), r#"a\"b\\c"#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn debug_format_redacts_password_and_cookie_path() {
|
||||
// Regression: app.rs / bin/crawler.rs log the controller at
|
||||
// startup via `tracing::info!(?t, ...)`. A derived Debug on
|
||||
// TorAuth would expand TorAuth::Password(p) and leak the
|
||||
// plaintext into logs.
|
||||
let c = TorController::new("tor:9051", TorAuth::Password("super-secret".into()));
|
||||
let dbg = format!("{c:?}");
|
||||
assert!(!dbg.contains("super-secret"), "password leaked: {dbg}");
|
||||
assert!(dbg.contains("<redacted>"), "expected <redacted>, got: {dbg}");
|
||||
|
||||
let c = TorController::new(
|
||||
"tor:9051",
|
||||
TorAuth::Cookie("/var/lib/tor/control_auth_cookie".into()),
|
||||
);
|
||||
let dbg = format!("{c:?}");
|
||||
assert!(!dbg.contains("control_auth_cookie"), "cookie path leaked: {dbg}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hex_encode_zero_pads_low_bytes() {
|
||||
assert_eq!(hex_encode(&[0x00, 0x0f, 0xff]), "000fff");
|
||||
}
|
||||
}
|
||||
@@ -91,6 +91,26 @@ pub fn registrable_domain(url: &str) -> Option<String> {
|
||||
Some(format!(".{}", registrable.join(".")))
|
||||
}
|
||||
|
||||
/// Normalise a SOCKS proxy URL for Chromium's `--proxy-server=` flag.
|
||||
///
|
||||
/// reqwest accepts both `socks5://` (resolve locally) and
|
||||
/// `socks5h://` (resolve via the SOCKS server — important when the
|
||||
/// proxy is TOR and we don't want the host's resolver to see the
|
||||
/// target hostname). Chromium does **not** know the `socks5h` scheme
|
||||
/// and refuses navigations with `ERR_NO_SUPPORTED_PROXIES`. It
|
||||
/// already sends destination hostnames over SOCKS5 by default
|
||||
/// regardless, so stripping the `h` is a pure scheme rename — the
|
||||
/// remote-DNS behaviour is preserved.
|
||||
///
|
||||
/// Non-SOCKS schemes pass through unchanged.
|
||||
pub fn chromium_proxy_arg(proxy: &str) -> String {
|
||||
if let Some(rest) = proxy.strip_prefix("socks5h://") {
|
||||
format!("socks5://{rest}")
|
||||
} else {
|
||||
proxy.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -191,4 +211,34 @@ mod tests {
|
||||
Some("[2001:db8::1]")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chromium_proxy_arg_strips_socks5h_to_socks5() {
|
||||
// Regression: passing socks5h:// to Chromium yields
|
||||
// ERR_NO_SUPPORTED_PROXIES at navigation time.
|
||||
assert_eq!(
|
||||
chromium_proxy_arg("socks5h://127.0.0.1:9050"),
|
||||
"socks5://127.0.0.1:9050"
|
||||
);
|
||||
assert_eq!(
|
||||
chromium_proxy_arg("socks5h://tor:9050"),
|
||||
"socks5://tor:9050"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chromium_proxy_arg_passes_socks5_unchanged() {
|
||||
assert_eq!(
|
||||
chromium_proxy_arg("socks5://127.0.0.1:9050"),
|
||||
"socks5://127.0.0.1:9050"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chromium_proxy_arg_passes_non_socks_unchanged() {
|
||||
assert_eq!(
|
||||
chromium_proxy_arg("http://proxy.example:8080"),
|
||||
"http://proxy.example:8080"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,11 @@ pub enum AppError {
|
||||
PayloadTooLarge(String),
|
||||
#[error("unsupported media type: {0}")]
|
||||
UnsupportedMediaType(String),
|
||||
/// 503 — a feature is currently unavailable, distinct from a 5xx
|
||||
/// internal error. Used when admin actions require the crawler
|
||||
/// daemon but it's been disabled (`CRAWLER_DAEMON=false`).
|
||||
#[error("service unavailable: {0}")]
|
||||
ServiceUnavailable(String),
|
||||
/// 429 with an optional `Retry-After` header value (in seconds).
|
||||
#[error("too many requests")]
|
||||
TooManyRequests {
|
||||
@@ -56,6 +61,7 @@ impl AppError {
|
||||
AppError::Conflict(_) => "conflict",
|
||||
AppError::PayloadTooLarge(_) => "payload_too_large",
|
||||
AppError::UnsupportedMediaType(_) => "unsupported_media_type",
|
||||
AppError::ServiceUnavailable(_) => "service_unavailable",
|
||||
AppError::TooManyRequests { .. } => "too_many_requests",
|
||||
AppError::ValidationFailed { .. } => "validation_failed",
|
||||
AppError::Database(sqlx::Error::RowNotFound) => "not_found",
|
||||
@@ -85,6 +91,9 @@ impl IntoResponse for AppError {
|
||||
AppError::UnsupportedMediaType(msg) => {
|
||||
(StatusCode::UNSUPPORTED_MEDIA_TYPE, msg.clone(), None)
|
||||
}
|
||||
AppError::ServiceUnavailable(msg) => {
|
||||
(StatusCode::SERVICE_UNAVAILABLE, msg.clone(), None)
|
||||
}
|
||||
AppError::TooManyRequests { retry_after_secs } => {
|
||||
// Emit `Retry-After: N` (RFC 6585 §4) so a well-behaved
|
||||
// client can back off correctly. Done by building the
|
||||
|
||||
@@ -542,6 +542,51 @@ pub async fn mark_run_completed(pool: &PgPool, source_id: &str) -> sqlx::Result<
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// List mangas whose `cover_image_path IS NULL` but a live
|
||||
/// `manga_sources` row still attaches them to a source. The bounded
|
||||
/// result feeds the cover-backfill pass in [`crate::crawler::pipeline`]:
|
||||
/// each entry is one (manga, freshest source row) pair where a cover
|
||||
/// re-download is in order.
|
||||
///
|
||||
/// Per-manga deduplication uses `DISTINCT ON (m.id)` keyed on the row
|
||||
/// with the newest `last_seen_at`, so a manga that's surfaced by
|
||||
/// multiple sources only produces one row (the freshest). Sort is
|
||||
/// stable for tests.
|
||||
pub async fn list_missing_covers(
|
||||
pool: &PgPool,
|
||||
max: i64,
|
||||
) -> sqlx::Result<Vec<MissingCoverEntry>> {
|
||||
let rows: Vec<(Uuid, String, String)> = sqlx::query_as(
|
||||
r#"
|
||||
SELECT DISTINCT ON (m.id) m.id, ms.source_manga_key, ms.source_url
|
||||
FROM mangas m
|
||||
JOIN manga_sources ms ON ms.manga_id = m.id
|
||||
WHERE m.cover_image_path IS NULL
|
||||
AND ms.dropped_at IS NULL
|
||||
ORDER BY m.id, ms.last_seen_at DESC
|
||||
LIMIT $1
|
||||
"#,
|
||||
)
|
||||
.bind(max)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
Ok(rows
|
||||
.into_iter()
|
||||
.map(|(manga_id, source_manga_key, source_url)| MissingCoverEntry {
|
||||
manga_id,
|
||||
source_manga_key,
|
||||
source_url,
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct MissingCoverEntry {
|
||||
pub manga_id: Uuid,
|
||||
pub source_manga_key: String,
|
||||
pub source_url: String,
|
||||
}
|
||||
|
||||
/// Read the recovery flag for `source_id`. A missing row OR an
|
||||
/// unparseable value reads as `true` ("clean") — the former covers the
|
||||
/// first-ever run on a virgin DB (no recovery needed), the latter
|
||||
|
||||
350
backend/tests/api_admin_resync.rs
Normal file
350
backend/tests/api_admin_resync.rs
Normal file
@@ -0,0 +1,350 @@
|
||||
//! Integration tests for the admin force-resync endpoints.
|
||||
//!
|
||||
//! Real resync work requires Chromium, so these tests swap in a stub
|
||||
//! [`ResyncService`] to assert the handler-level contract: routing,
|
||||
//! admin gate, 503 when the daemon is disabled, 404 / 422 mapping for
|
||||
//! missing-resource / no-source cases, and the audit-log side effect.
|
||||
|
||||
mod common;
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use axum::http::StatusCode;
|
||||
use serde_json::json;
|
||||
use sqlx::PgPool;
|
||||
use tower::ServiceExt;
|
||||
use uuid::Uuid;
|
||||
|
||||
use mangalord::crawler::resync::{
|
||||
ChapterResyncOutcome, MangaResyncOutcome, ResyncError, ResyncService,
|
||||
};
|
||||
use mangalord::repo;
|
||||
use mangalord::repo::crawler::UpsertStatus;
|
||||
|
||||
/// Stub that records call counts and returns a canned outcome.
|
||||
struct StubResync {
|
||||
manga_calls: AtomicUsize,
|
||||
chapter_calls: AtomicUsize,
|
||||
/// When true, returns NoMangaSource / NoChapterSource.
|
||||
no_source: bool,
|
||||
}
|
||||
|
||||
impl StubResync {
|
||||
fn new() -> Arc<Self> {
|
||||
Arc::new(Self {
|
||||
manga_calls: AtomicUsize::new(0),
|
||||
chapter_calls: AtomicUsize::new(0),
|
||||
no_source: false,
|
||||
})
|
||||
}
|
||||
fn no_source() -> Arc<Self> {
|
||||
Arc::new(Self {
|
||||
manga_calls: AtomicUsize::new(0),
|
||||
chapter_calls: AtomicUsize::new(0),
|
||||
no_source: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ResyncService for StubResync {
|
||||
async fn resync_manga(&self, manga_id: Uuid) -> anyhow::Result<MangaResyncOutcome> {
|
||||
self.manga_calls.fetch_add(1, Ordering::SeqCst);
|
||||
if self.no_source {
|
||||
return Err(ResyncError::NoMangaSource.into());
|
||||
}
|
||||
Ok(MangaResyncOutcome {
|
||||
manga_id,
|
||||
metadata_status: UpsertStatus::Updated,
|
||||
cover_fetched: true,
|
||||
})
|
||||
}
|
||||
async fn resync_chapter(&self, chapter_id: Uuid) -> anyhow::Result<ChapterResyncOutcome> {
|
||||
self.chapter_calls.fetch_add(1, Ordering::SeqCst);
|
||||
if self.no_source {
|
||||
return Err(ResyncError::NoChapterSource.into());
|
||||
}
|
||||
Ok(ChapterResyncOutcome::Fetched {
|
||||
chapter_id,
|
||||
pages: 7,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
async fn promote_admin(pool: &PgPool, username: &str) {
|
||||
let u = repo::user::find_by_username(pool, username)
|
||||
.await
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
repo::user::set_is_admin_unchecked(pool, u.id, true)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
async fn insert_manga(pool: &PgPool, title: &str) -> Uuid {
|
||||
let (id,): (Uuid,) = sqlx::query_as(
|
||||
"INSERT INTO mangas (title, status, alt_titles) VALUES ($1, 'ongoing', ARRAY[]::text[]) RETURNING id",
|
||||
)
|
||||
.bind(title)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
id
|
||||
}
|
||||
|
||||
async fn insert_chapter(pool: &PgPool, manga_id: Uuid, number: i32, pages: i32) -> Uuid {
|
||||
let (id,): (Uuid,) = sqlx::query_as(
|
||||
"INSERT INTO chapters (manga_id, number, title, page_count) VALUES ($1, $2, NULL, $3) RETURNING id",
|
||||
)
|
||||
.bind(manga_id)
|
||||
.bind(number)
|
||||
.bind(pages)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
id
|
||||
}
|
||||
|
||||
// ----- manga resync ---------------------------------------------------------
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn manga_resync_calls_service_and_returns_refreshed_detail(pool: PgPool) {
|
||||
let stub = StubResync::new();
|
||||
let h = common::harness_with_resync(pool.clone(), stub.clone());
|
||||
let (username, cookie) = common::register_user(&h.app).await;
|
||||
promote_admin(&pool, &username).await;
|
||||
let manga_id = insert_manga(&pool, "Hello").await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
&format!("/api/v1/admin/mangas/{manga_id}/resync"),
|
||||
json!({}),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = common::body_json(resp).await;
|
||||
// Stub returned Updated + cover_fetched=true.
|
||||
assert_eq!(body["metadata_status"], "updated");
|
||||
assert_eq!(body["cover_fetched"], true);
|
||||
// Response includes the refreshed manga detail.
|
||||
assert_eq!(body["manga"]["id"], manga_id.to_string());
|
||||
assert_eq!(body["manga"]["title"], "Hello");
|
||||
|
||||
assert_eq!(stub.manga_calls.load(Ordering::SeqCst), 1);
|
||||
|
||||
// Audit row written.
|
||||
let (audit_count,): (i64,) =
|
||||
sqlx::query_as("SELECT count(*) FROM admin_audit WHERE action = 'manga_resync' AND target_id = $1")
|
||||
.bind(manga_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(audit_count, 1);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn manga_resync_returns_404_for_unknown_id(pool: PgPool) {
|
||||
let stub = StubResync::new();
|
||||
let h = common::harness_with_resync(pool.clone(), stub.clone());
|
||||
let (username, cookie) = common::register_user(&h.app).await;
|
||||
promote_admin(&pool, &username).await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
&format!("/api/v1/admin/mangas/{}/resync", Uuid::new_v4()),
|
||||
json!({}),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
||||
// Service must not have been called when the manga doesn't exist.
|
||||
assert_eq!(stub.manga_calls.load(Ordering::SeqCst), 0);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn manga_resync_maps_no_source_to_422(pool: PgPool) {
|
||||
let stub = StubResync::no_source();
|
||||
let h = common::harness_with_resync(pool.clone(), stub);
|
||||
let (username, cookie) = common::register_user(&h.app).await;
|
||||
promote_admin(&pool, &username).await;
|
||||
let manga_id = insert_manga(&pool, "Manual upload, no crawler source").await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
&format!("/api/v1/admin/mangas/{manga_id}/resync"),
|
||||
json!({}),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::UNPROCESSABLE_ENTITY);
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["error"]["details"]["manga"], "no_source");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn manga_resync_returns_503_when_daemon_disabled(pool: PgPool) {
|
||||
let h = common::harness(pool.clone());
|
||||
let (username, cookie) = common::register_user(&h.app).await;
|
||||
promote_admin(&pool, &username).await;
|
||||
let manga_id = insert_manga(&pool, "Z").await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
&format!("/api/v1/admin/mangas/{manga_id}/resync"),
|
||||
json!({}),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::SERVICE_UNAVAILABLE);
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["error"]["code"], "service_unavailable");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn manga_resync_requires_admin(pool: PgPool) {
|
||||
let stub = StubResync::new();
|
||||
let h = common::harness_with_resync(pool.clone(), stub);
|
||||
// Non-admin user.
|
||||
let (_u, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = insert_manga(&pool, "M").await;
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
&format!("/api/v1/admin/mangas/{manga_id}/resync"),
|
||||
json!({}),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
|
||||
}
|
||||
|
||||
// ----- chapter resync -------------------------------------------------------
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn chapter_resync_calls_service_and_returns_refreshed_chapter(pool: PgPool) {
|
||||
let stub = StubResync::new();
|
||||
let h = common::harness_with_resync(pool.clone(), stub.clone());
|
||||
let (username, cookie) = common::register_user(&h.app).await;
|
||||
promote_admin(&pool, &username).await;
|
||||
let manga_id = insert_manga(&pool, "M").await;
|
||||
let chapter_id = insert_chapter(&pool, manga_id, 1, 0).await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
&format!("/api/v1/admin/chapters/{chapter_id}/resync"),
|
||||
json!({}),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["outcome"], "fetched");
|
||||
assert_eq!(body["pages"], 7);
|
||||
assert_eq!(body["chapter"]["id"], chapter_id.to_string());
|
||||
assert_eq!(stub.chapter_calls.load(Ordering::SeqCst), 1);
|
||||
|
||||
let (audit_count,): (i64,) = sqlx::query_as(
|
||||
"SELECT count(*) FROM admin_audit WHERE action = 'chapter_resync' AND target_id = $1",
|
||||
)
|
||||
.bind(chapter_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(audit_count, 1);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn chapter_resync_returns_404_for_unknown_id(pool: PgPool) {
|
||||
let stub = StubResync::new();
|
||||
let h = common::harness_with_resync(pool.clone(), stub.clone());
|
||||
let (username, cookie) = common::register_user(&h.app).await;
|
||||
promote_admin(&pool, &username).await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
&format!("/api/v1/admin/chapters/{}/resync", Uuid::new_v4()),
|
||||
json!({}),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
||||
assert_eq!(stub.chapter_calls.load(Ordering::SeqCst), 0);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn chapter_resync_maps_no_source_to_422(pool: PgPool) {
|
||||
let stub = StubResync::no_source();
|
||||
let h = common::harness_with_resync(pool.clone(), stub);
|
||||
let (username, cookie) = common::register_user(&h.app).await;
|
||||
promote_admin(&pool, &username).await;
|
||||
let manga_id = insert_manga(&pool, "M").await;
|
||||
let chapter_id = insert_chapter(&pool, manga_id, 1, 0).await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
&format!("/api/v1/admin/chapters/{chapter_id}/resync"),
|
||||
json!({}),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::UNPROCESSABLE_ENTITY);
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["error"]["details"]["chapter"], "no_source");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn chapter_resync_returns_503_when_daemon_disabled(pool: PgPool) {
|
||||
let h = common::harness(pool.clone());
|
||||
let (username, cookie) = common::register_user(&h.app).await;
|
||||
promote_admin(&pool, &username).await;
|
||||
let manga_id = insert_manga(&pool, "M").await;
|
||||
let chapter_id = insert_chapter(&pool, manga_id, 1, 0).await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
&format!("/api/v1/admin/chapters/{chapter_id}/resync"),
|
||||
json!({}),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::SERVICE_UNAVAILABLE);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn chapter_resync_requires_admin(pool: PgPool) {
|
||||
let stub = StubResync::new();
|
||||
let h = common::harness_with_resync(pool.clone(), stub);
|
||||
let (_u, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = insert_manga(&pool, "M").await;
|
||||
let chapter_id = insert_chapter(&pool, manga_id, 1, 0).await;
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
&format!("/api/v1/admin/chapters/{chapter_id}/resync"),
|
||||
json!({}),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
|
||||
}
|
||||
@@ -49,6 +49,7 @@ fn admin_test_router(pool: PgPool) -> (Router, TempDir) {
|
||||
auth,
|
||||
upload: UploadConfig::default(),
|
||||
auth_limiter,
|
||||
resync: None,
|
||||
};
|
||||
let app = Router::new()
|
||||
.nest("/api/v1", api::routes())
|
||||
|
||||
189
backend/tests/api_private_mode.rs
Normal file
189
backend/tests/api_private_mode.rs
Normal file
@@ -0,0 +1,189 @@
|
||||
//! Site-wide auth gate (`PRIVATE_MODE=true`).
|
||||
//!
|
||||
//! With private mode on, every API path except a small allowlist
|
||||
//! (`/health`, `/auth/config`, `/auth/login`, `/auth/logout`) requires
|
||||
//! a valid session cookie or bearer token, and `/auth/register` is
|
||||
//! force-blocked regardless of `ALLOW_SELF_REGISTER`. With private mode
|
||||
//! off (the default), nothing changes — the `public_mode_*` test
|
||||
//! pins that regression guard.
|
||||
|
||||
mod common;
|
||||
|
||||
use serde_json::json;
|
||||
use sqlx::PgPool;
|
||||
use tower::ServiceExt;
|
||||
|
||||
use axum::http::StatusCode;
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn private_mode_blocks_anonymous_manga_list(pool: PgPool) {
|
||||
let h = common::harness_with_private_mode(pool);
|
||||
let resp = h.app.oneshot(common::get("/api/v1/mangas")).await.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn private_mode_blocks_anonymous_files(pool: PgPool) {
|
||||
let h = common::harness_with_private_mode(pool);
|
||||
// The path doesn't have to exist — the guard runs before routing,
|
||||
// so the response is 401 (not 404). That's the property the test
|
||||
// is pinning: nothing leaks via crafted URLs.
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get("/api/v1/files/anything.png"))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn private_mode_allows_session_cookie_read(pool: PgPool) {
|
||||
// Register through a non-private harness sharing the same DB pool
|
||||
// so the session row exists. Then exercise the gate using a fresh
|
||||
// private-mode harness against the same DB.
|
||||
let public = common::harness(pool.clone());
|
||||
let (_, cookie) = common::register_user(&public.app).await;
|
||||
|
||||
let private = common::harness_with_private_mode(pool);
|
||||
let resp = private
|
||||
.app
|
||||
.oneshot(common::get_with_cookie("/api/v1/mangas", &cookie))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn private_mode_allows_bearer_token_read(pool: PgPool) {
|
||||
let public = common::harness(pool.clone());
|
||||
let (_, cookie) = common::register_user(&public.app).await;
|
||||
|
||||
let resp = public
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
"/api/v1/auth/tokens",
|
||||
json!({ "name": "private-mode-bot" }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::CREATED);
|
||||
let body = common::body_json(resp).await;
|
||||
let bearer = body["bearer"].as_str().unwrap().to_string();
|
||||
|
||||
let private = common::harness_with_private_mode(pool);
|
||||
let resp = private
|
||||
.app
|
||||
.oneshot(common::get_with_bearer("/api/v1/mangas", &bearer))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn private_mode_allows_login_endpoint_anonymous(pool: PgPool) {
|
||||
// Seed a user via the public harness so login has credentials to
|
||||
// verify against.
|
||||
let public = common::harness(pool.clone());
|
||||
let _ = public
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::post_json(
|
||||
"/api/v1/auth/register",
|
||||
json!({ "username": "alice", "password": "hunter2hunter2" }),
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let private = common::harness_with_private_mode(pool);
|
||||
let resp = private
|
||||
.app
|
||||
.oneshot(common::post_json(
|
||||
"/api/v1/auth/login",
|
||||
json!({ "username": "alice", "password": "hunter2hunter2" }),
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
// Reaches the login handler and succeeds — *not* 401 from the
|
||||
// gate. That's the property we're pinning.
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn private_mode_allows_health_and_config_anonymous(pool: PgPool) {
|
||||
let h = common::harness_with_private_mode(pool);
|
||||
|
||||
let r = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::get("/api/v1/health"))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(r.status(), StatusCode::OK);
|
||||
|
||||
let r = h
|
||||
.app
|
||||
.oneshot(common::get("/api/v1/auth/config"))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(r.status(), StatusCode::OK);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn private_mode_blocks_register_even_when_self_register_enabled(pool: PgPool) {
|
||||
// harness_with_private_mode keeps `allow_self_register=true` (the
|
||||
// default) — private mode is supposed to force-block register
|
||||
// regardless. That's what this test pins.
|
||||
let h = common::harness_with_private_mode(pool);
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::post_json(
|
||||
"/api/v1/auth/register",
|
||||
json!({ "username": "alice", "password": "hunter2hunter2" }),
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["error"]["code"], "forbidden");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn auth_config_reports_private_mode_and_effective_self_register(pool: PgPool) {
|
||||
let h = common::harness_with_private_mode(pool);
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get("/api/v1/auth/config"))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["private_mode"], true);
|
||||
// Effective value: `allow_self_register && !private_mode` is false
|
||||
// here even though the raw `allow_self_register` is true.
|
||||
assert_eq!(body["self_register_enabled"], false);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn public_mode_does_not_gate_anonymous_reads(pool: PgPool) {
|
||||
// Regression guard: with private_mode off (the default), the gate
|
||||
// must be a no-op so existing public deployments stay public.
|
||||
let h = common::harness(pool);
|
||||
let resp = h.app.oneshot(common::get("/api/v1/mangas")).await.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn public_mode_reports_private_mode_false(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get("/api/v1/auth/config"))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["private_mode"], false);
|
||||
assert_eq!(body["self_register_enabled"], true);
|
||||
}
|
||||
@@ -74,6 +74,10 @@ fn harness_with_auth_config(
|
||||
max_file_bytes: 256 * 1024,
|
||||
},
|
||||
auth_limiter,
|
||||
// Default harness has no crawler daemon wired up; admin resync
|
||||
// handlers return 503 in this config. Tests that need a stub
|
||||
// resync service swap it in via `harness_with_resync`.
|
||||
resync: None,
|
||||
};
|
||||
Harness { app: router(state), _storage_dir: storage_dir }
|
||||
}
|
||||
@@ -92,6 +96,21 @@ pub fn harness_with_self_register_disabled(pool: PgPool) -> Harness {
|
||||
harness_with_auth_config(pool, storage, storage_dir, auth)
|
||||
}
|
||||
|
||||
/// Like [`harness`] but flips `PRIVATE_MODE` on so the site-wide auth
|
||||
/// gate is exercised. `allow_self_register` stays at its default `true`
|
||||
/// to verify that private mode force-disables self-registration on top
|
||||
/// of whatever `ALLOW_SELF_REGISTER` says.
|
||||
pub fn harness_with_private_mode(pool: PgPool) -> Harness {
|
||||
let storage_dir = tempfile::tempdir().expect("tempdir");
|
||||
let storage = Arc::new(LocalStorage::new(storage_dir.path()));
|
||||
let auth = AuthConfig {
|
||||
cookie_secure: false,
|
||||
private_mode: true,
|
||||
..AuthConfig::default()
|
||||
};
|
||||
harness_with_auth_config(pool, storage, storage_dir, auth)
|
||||
}
|
||||
|
||||
/// Like [`harness`] but configures a tight auth rate limit. Used by
|
||||
/// the brute-force-rate-limiting test.
|
||||
pub fn harness_with_auth_rate_limit(
|
||||
@@ -109,6 +128,37 @@ pub fn harness_with_auth_rate_limit(
|
||||
harness_with_auth_config(pool, storage, storage_dir, auth)
|
||||
}
|
||||
|
||||
/// Like [`harness`] but slots a caller-supplied [`ResyncService`] stub
|
||||
/// into `AppState.resync`. Used by the admin resync tests so the
|
||||
/// endpoint path is exercised without standing up a real Chromium.
|
||||
pub fn harness_with_resync(
|
||||
pool: PgPool,
|
||||
resync: Arc<dyn mangalord::crawler::resync::ResyncService>,
|
||||
) -> Harness {
|
||||
let storage_dir = tempfile::tempdir().expect("tempdir");
|
||||
let storage = Arc::new(LocalStorage::new(storage_dir.path()));
|
||||
let auth = AuthConfig {
|
||||
cookie_secure: false,
|
||||
..AuthConfig::default()
|
||||
};
|
||||
let auth_limiter = Arc::new(AuthRateLimiter::new(auth.rate_limit));
|
||||
let state = AppState {
|
||||
db: pool,
|
||||
storage,
|
||||
auth,
|
||||
upload: UploadConfig {
|
||||
max_request_bytes: 4 * 1024 * 1024,
|
||||
max_file_bytes: 256 * 1024,
|
||||
},
|
||||
auth_limiter,
|
||||
resync: Some(resync),
|
||||
};
|
||||
Harness {
|
||||
app: router(state),
|
||||
_storage_dir: storage_dir,
|
||||
}
|
||||
}
|
||||
|
||||
/// Wraps a real `Storage` and fails on the N-th `put` call so tests can
|
||||
/// assert that handlers roll their DB writes back when storage errors
|
||||
/// mid-upload. Reads and other operations delegate to `inner`.
|
||||
|
||||
@@ -829,6 +829,107 @@ async fn sync_tags_garbage_collects_orphan_user_attachments(pool: PgPool) {
|
||||
assert_eq!(orphan_rows, 0, "orphan user-attached tag should be reaped");
|
||||
}
|
||||
|
||||
// ---- list_missing_covers ---------------------------------------------------
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn list_missing_covers_only_returns_rows_without_cover(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let with_cover = sample_manga("with", "With Cover", "h1");
|
||||
let without_cover = sample_manga("without", "No Cover", "h2");
|
||||
let _w = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/with", &with_cover)
|
||||
.await
|
||||
.unwrap();
|
||||
let nc = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/without", &without_cover)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Manually set a cover for `with` only.
|
||||
sqlx::query("UPDATE mangas SET cover_image_path = 'mangas/x/cover.jpg' WHERE id = $1")
|
||||
.bind(_w.manga_id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let entries = crawler::list_missing_covers(&pool, 50).await.unwrap();
|
||||
assert_eq!(entries.len(), 1, "exactly the manga without a cover");
|
||||
assert_eq!(entries[0].manga_id, nc.manga_id);
|
||||
assert_eq!(entries[0].source_manga_key, "without");
|
||||
assert_eq!(entries[0].source_url, "https://x.example/without");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn list_missing_covers_skips_dropped_source_rows(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let m = sample_manga("foo", "Foo", "h1");
|
||||
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query("UPDATE manga_sources SET dropped_at = NOW() WHERE manga_id = $1")
|
||||
.bind(up.manga_id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let entries = crawler::list_missing_covers(&pool, 50).await.unwrap();
|
||||
assert!(
|
||||
entries.is_empty(),
|
||||
"dropped-source mangas must not be backfilled — no live source to fetch from"
|
||||
);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn list_missing_covers_respects_limit(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
for i in 0..5 {
|
||||
let key = format!("m{i}");
|
||||
let url = format!("https://x.example/{key}");
|
||||
let m = sample_manga(&key, &format!("M{i}"), &format!("h{i}"));
|
||||
let _ = crawler::upsert_manga_from_source(&pool, "target", &url, &m)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
let entries = crawler::list_missing_covers(&pool, 3).await.unwrap();
|
||||
assert_eq!(entries.len(), 3, "limit caps the result set");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn list_missing_covers_deduplicates_per_manga(pool: PgPool) {
|
||||
// A manga surfaced by two sources should produce ONE backfill
|
||||
// entry, not two — otherwise the per-tick cap could be eaten by
|
||||
// duplicates and starve other mangas.
|
||||
crawler::ensure_source(&pool, "src-a", "A", "https://a.example")
|
||||
.await
|
||||
.unwrap();
|
||||
crawler::ensure_source(&pool, "src-b", "B", "https://b.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let m = sample_manga("foo", "Foo", "h1");
|
||||
let up = crawler::upsert_manga_from_source(&pool, "src-a", "https://a.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
// Second source attaches to the SAME manga row.
|
||||
sqlx::query(
|
||||
"INSERT INTO manga_sources (source_id, source_manga_key, manga_id, source_url) \
|
||||
VALUES ($1, $2, $3, $4)",
|
||||
)
|
||||
.bind("src-b")
|
||||
.bind("foo-on-b")
|
||||
.bind(up.manga_id)
|
||||
.bind("https://b.example/foo")
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let entries = crawler::list_missing_covers(&pool, 50).await.unwrap();
|
||||
assert_eq!(entries.len(), 1, "DISTINCT ON (m.id) collapses duplicate source rows");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn re_appearing_manga_clears_dropped_at(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
|
||||
@@ -17,5 +17,28 @@ services:
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
|
||||
# Optional: TOR daemon for crawler dev. Ports bind to 127.0.0.1 only
|
||||
# — never the LAN — so a native `cargo run` on the host can reach
|
||||
# 127.0.0.1:9050 / 9051. Mirrors the prod tor service (see
|
||||
# docker-compose.yml), just with host-loopback ports and a default
|
||||
# password baked in for friction-free dev.
|
||||
tor:
|
||||
image: dockurr/tor:latest
|
||||
entrypoint: ["/bin/sh", "/usr/local/bin/mangalord-entrypoint.sh"]
|
||||
environment:
|
||||
PASSWORD: ${TOR_CONTROL_PASSWORD:-dev-tor-password}
|
||||
volumes:
|
||||
- ./tor/torrc:/etc/tor/torrc:ro
|
||||
- ./tor/entrypoint.sh:/usr/local/bin/mangalord-entrypoint.sh:ro
|
||||
ports:
|
||||
- "127.0.0.1:9050:9050"
|
||||
- "127.0.0.1:9051:9051"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "nc -z 127.0.0.1 9050 && nc -z 127.0.0.1 9051"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 20
|
||||
start_period: 30s
|
||||
|
||||
volumes:
|
||||
mangalord-postgres-dev:
|
||||
|
||||
@@ -19,11 +19,48 @@ services:
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
|
||||
tor:
|
||||
# SOCKS5 proxy for the crawler, plus a control port so the backend
|
||||
# can signal NEWNYM on bad pages. See tor/torrc for the daemon
|
||||
# config; both ports are only `expose`d (compose-internal), never
|
||||
# bound on the host.
|
||||
#
|
||||
# We bypass dockurr/tor's stock entrypoint because it binds the
|
||||
# control port to localhost (unreachable from the backend
|
||||
# container) and skips its own HashedControlPassword injection
|
||||
# when the user's torrc declares a ControlPort. Our wrapper
|
||||
# (tor/entrypoint.sh) generates the hash from $PASSWORD and execs
|
||||
# tor with our torrc. Backend authenticates with the same plain
|
||||
# string via CRAWLER_TOR_CONTROL_PASSWORD.
|
||||
image: dockurr/tor:latest
|
||||
entrypoint: ["/bin/sh", "/usr/local/bin/mangalord-entrypoint.sh"]
|
||||
environment:
|
||||
PASSWORD: ${TOR_CONTROL_PASSWORD:?TOR_CONTROL_PASSWORD must be set in .env}
|
||||
volumes:
|
||||
- ./tor/torrc:/etc/tor/torrc:ro
|
||||
- ./tor/entrypoint.sh:/usr/local/bin/mangalord-entrypoint.sh:ro
|
||||
expose:
|
||||
- "9050"
|
||||
- "9051"
|
||||
# Wait for both control + SOCKS ports to listen before downstream
|
||||
# services start. dockurr/tor's main process spawns before tor
|
||||
# itself is bound, so `service_started` alone races the first
|
||||
# NEWNYM call.
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "nc -z 127.0.0.1 9050 && nc -z 127.0.0.1 9051"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 20
|
||||
start_period: 30s
|
||||
restart: unless-stopped
|
||||
|
||||
backend:
|
||||
build: ./backend
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
tor:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
DATABASE_URL: postgres://${POSTGRES_USER:-mangalord}:${POSTGRES_PASSWORD:?POSTGRES_PASSWORD must be set in .env}@postgres:5432/${POSTGRES_DB:-mangalord}
|
||||
BIND_ADDRESS: 0.0.0.0:8080
|
||||
@@ -44,6 +81,16 @@ services:
|
||||
# arm64 deployments. Pair with `--build-arg INSTALL_CHROMIUM=true`
|
||||
# so the image actually contains the binary.
|
||||
CRAWLER_CHROMIUM_BINARY: ${CRAWLER_CHROMIUM_BINARY:-}
|
||||
# TOR proxy + NEWNYM recircuit (see .env.example for details).
|
||||
# Defaults assume the bundled `tor` service above; override
|
||||
# CRAWLER_PROXY= and CRAWLER_TOR_CONTROL_URL= (both empty) in
|
||||
# .env to disable. CRAWLER_TOR_CONTROL_PASSWORD MUST match the
|
||||
# tor service's PASSWORD (both wired to the same TOR_CONTROL_PASSWORD
|
||||
# .env var below).
|
||||
CRAWLER_PROXY: ${CRAWLER_PROXY-socks5h://tor:9050}
|
||||
CRAWLER_TOR_CONTROL_URL: ${CRAWLER_TOR_CONTROL_URL-tcp://tor:9051}
|
||||
CRAWLER_TOR_CONTROL_PASSWORD: ${TOR_CONTROL_PASSWORD:?TOR_CONTROL_PASSWORD must be set in .env}
|
||||
CRAWLER_TOR_RECIRCUIT_MAX_ATTEMPTS: ${CRAWLER_TOR_RECIRCUIT_MAX_ATTEMPTS:-3}
|
||||
volumes:
|
||||
- storage-data:/var/lib/mangalord/storage
|
||||
# No host port mapping in the default setup — the frontend proxies
|
||||
|
||||
@@ -10,6 +10,15 @@ import { test, expect, type Page } from '@playwright/test';
|
||||
const emptyPage = { items: [], page: { limit: 50, offset: 0, total: null } };
|
||||
|
||||
async function mockAnonymous(page: Page) {
|
||||
// Force public mode so the root +layout.ts doesn't bounce us to /login
|
||||
// (a dev backend with PRIVATE_MODE=true must not leak into E2E runs).
|
||||
await page.route('**/api/v1/auth/config', async (route) => {
|
||||
await route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({ self_register_enabled: true, private_mode: false })
|
||||
});
|
||||
});
|
||||
await page.route('**/api/v1/auth/me', async (route) => {
|
||||
await route.fulfill({
|
||||
status: 401,
|
||||
@@ -69,3 +78,53 @@ test('search updates the manga list', async ({ page }) => {
|
||||
await expect(page.getByTestId('manga-list')).toContainText('Berserk');
|
||||
expect(lastSearch).toBe('berserk');
|
||||
});
|
||||
|
||||
test('clicking Next paginates to page 2 and updates the URL', async ({ page }) => {
|
||||
await mockAnonymous(page);
|
||||
|
||||
// Fake a catalogue of 75 mangas; page 1 is ids 1..50, page 2 is ids 51..75.
|
||||
const TOTAL = 75;
|
||||
function mangaAt(i: number) {
|
||||
return {
|
||||
id: `m${i}`,
|
||||
title: `Manga ${i}`,
|
||||
author: 'Test',
|
||||
description: null,
|
||||
cover_image_path: null,
|
||||
created_at: '2026-01-01T00:00:00Z',
|
||||
updated_at: '2026-01-01T00:00:00Z',
|
||||
authors: [],
|
||||
genres: []
|
||||
};
|
||||
}
|
||||
|
||||
await page.route('**/api/v1/mangas*', async (route) => {
|
||||
const url = new URL(route.request().url());
|
||||
const limit = Number(url.searchParams.get('limit') ?? '50');
|
||||
const offset = Number(url.searchParams.get('offset') ?? '0');
|
||||
const items: ReturnType<typeof mangaAt>[] = [];
|
||||
for (let i = offset + 1; i <= Math.min(offset + limit, TOTAL); i++) {
|
||||
items.push(mangaAt(i));
|
||||
}
|
||||
await route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
items,
|
||||
page: { limit, offset, total: TOTAL }
|
||||
})
|
||||
});
|
||||
});
|
||||
|
||||
await page.goto('/');
|
||||
await expect(page.getByTestId('manga-total')).toContainText('Showing 1–50 of 75');
|
||||
await expect(page.getByTestId('manga-list')).toContainText('Manga 1');
|
||||
await expect(page.getByTestId('manga-list')).not.toContainText('Manga 75');
|
||||
|
||||
await page.getByTestId('manga-pager').getByRole('button', { name: /next/i }).click();
|
||||
|
||||
await expect(page).toHaveURL(/[?&]page=2(&|$)/);
|
||||
await expect(page.getByTestId('manga-total')).toContainText('Showing 51–75 of 75');
|
||||
await expect(page.getByTestId('manga-list')).toContainText('Manga 75');
|
||||
await expect(page.getByTestId('manga-list')).not.toContainText('Manga 1');
|
||||
});
|
||||
|
||||
67
frontend/e2e/page-title.spec.ts
Normal file
67
frontend/e2e/page-title.spec.ts
Normal file
@@ -0,0 +1,67 @@
|
||||
import { test, expect, type Page } from '@playwright/test';
|
||||
|
||||
// Guards the title-on-nav behavior: without this, a stale title from
|
||||
// the last manga / author page lingers when the user navigates to a
|
||||
// generic page like /upload.
|
||||
|
||||
async function mockAnonymous(page: Page) {
|
||||
await page.route('**/api/v1/auth/config', async (route) => {
|
||||
await route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({ self_register_enabled: true, private_mode: false })
|
||||
});
|
||||
});
|
||||
await page.route('**/api/v1/auth/me', async (route) => {
|
||||
await route.fulfill({
|
||||
status: 401,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({ error: { code: 'unauthenticated', message: 'unauthenticated' } })
|
||||
});
|
||||
});
|
||||
await page.route('**/api/v1/mangas*', async (route) => {
|
||||
await route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({ items: [], page: { limit: 50, offset: 0, total: 0 } })
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
test('static route titles use the brand-first layout map', async ({ page }) => {
|
||||
await mockAnonymous(page);
|
||||
|
||||
await page.goto('/');
|
||||
await expect(page).toHaveTitle('Mangalord');
|
||||
|
||||
await page.goto('/upload');
|
||||
await expect(page).toHaveTitle('Mangalord | Upload');
|
||||
|
||||
await page.goto('/login');
|
||||
await expect(page).toHaveTitle('Mangalord | Login');
|
||||
|
||||
await page.goto('/bookmarks');
|
||||
await expect(page).toHaveTitle('Mangalord | Bookmarks');
|
||||
|
||||
await page.goto('/collections');
|
||||
await expect(page).toHaveTitle('Mangalord | Collections');
|
||||
});
|
||||
|
||||
test('title updates when navigating away from a content page', async ({ page }) => {
|
||||
await mockAnonymous(page);
|
||||
|
||||
// Pretend we just left a manga detail page — the document title
|
||||
// would have been overridden to "Mangalord | Berserk". Use evaluate
|
||||
// to set it synthetically so we can assert the regression cleanly
|
||||
// even though the dynamic page itself isn't mocked here.
|
||||
await page.goto('/');
|
||||
await page.evaluate(() => {
|
||||
document.title = 'Mangalord | Berserk';
|
||||
});
|
||||
expect(await page.title()).toBe('Mangalord | Berserk');
|
||||
|
||||
// Client-side nav to /upload — the root layout must reassert its
|
||||
// mapped title or the stale "Berserk" lingers.
|
||||
await page.goto('/upload');
|
||||
await expect(page).toHaveTitle('Mangalord | Upload');
|
||||
});
|
||||
101
frontend/e2e/private-mode.spec.ts
Normal file
101
frontend/e2e/private-mode.spec.ts
Normal file
@@ -0,0 +1,101 @@
|
||||
import { test, expect, type Page } from '@playwright/test';
|
||||
|
||||
// Network-level mocks for the private-mode UX. The backend integration
|
||||
// tests (api_private_mode.rs) cover the actual gate; here we only
|
||||
// verify that the SvelteKit universal load redirects anonymous
|
||||
// visitors to /login and then back to where they were going.
|
||||
|
||||
const userFixture = {
|
||||
id: 'user-1',
|
||||
username: 'alice',
|
||||
created_at: '2026-01-01T00:00:00Z',
|
||||
is_admin: false
|
||||
};
|
||||
const emptyPage = { items: [], page: { limit: 50, offset: 0, total: null } };
|
||||
|
||||
async function stubPrivateInstance(page: Page) {
|
||||
let loggedIn = false;
|
||||
|
||||
// The flag that flips the gate on. Frontend reads it in
|
||||
// `+layout.ts` to decide whether to redirect.
|
||||
await page.route('**/api/v1/auth/config', async (route) => {
|
||||
await route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
self_register_enabled: false,
|
||||
private_mode: true
|
||||
})
|
||||
});
|
||||
});
|
||||
|
||||
await page.route('**/api/v1/auth/me', async (route) => {
|
||||
if (loggedIn) {
|
||||
await route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({ user: userFixture })
|
||||
});
|
||||
} else {
|
||||
await route.fulfill({
|
||||
status: 401,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
error: { code: 'unauthenticated', message: 'unauthenticated' }
|
||||
})
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
await page.route('**/api/v1/auth/login', async (route) => {
|
||||
loggedIn = true;
|
||||
await route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({ user: userFixture })
|
||||
});
|
||||
});
|
||||
|
||||
// The real backend would 401 these too in private mode; we stub
|
||||
// success so the post-login navigation can render the home page
|
||||
// without an additional redirect cycle.
|
||||
await page.route('**/api/v1/mangas*', async (route) => {
|
||||
await route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify(emptyPage)
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
test('private mode: anonymous visit to / redirects to /login?next=%2F', async ({ page }) => {
|
||||
await stubPrivateInstance(page);
|
||||
await page.goto('/');
|
||||
await expect(page).toHaveURL(/\/login\?next=%2F$/);
|
||||
await expect(page.getByTestId('login-username')).toBeVisible();
|
||||
});
|
||||
|
||||
test('private mode: register link is hidden', async ({ page }) => {
|
||||
await stubPrivateInstance(page);
|
||||
await page.goto('/login');
|
||||
await expect(page.getByTestId('nav-login')).toBeVisible();
|
||||
// self_register_enabled is the effective value (false in private
|
||||
// mode regardless of ALLOW_SELF_REGISTER), so the navbar must
|
||||
// never render the register affordance here.
|
||||
await expect(page.getByTestId('nav-register')).toHaveCount(0);
|
||||
});
|
||||
|
||||
test('private mode: after login the user lands back on the requested page', async ({ page }) => {
|
||||
await stubPrivateInstance(page);
|
||||
|
||||
// Visit a deep link → bounced to /login with next= preserving it.
|
||||
await page.goto('/');
|
||||
await expect(page).toHaveURL(/\/login\?next=%2F$/);
|
||||
|
||||
await page.getByTestId('login-username').fill('alice');
|
||||
await page.getByTestId('login-password').fill('hunter2hunter2');
|
||||
await page.getByTestId('login-submit').click();
|
||||
|
||||
// Authenticated → can now reach the home page without bouncing.
|
||||
await expect(page.getByTestId('session-user')).toContainText('alice');
|
||||
});
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "mangalord-frontend",
|
||||
"version": "0.45.0",
|
||||
"version": "0.50.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
|
||||
@@ -14,7 +14,9 @@ import {
|
||||
createAdminUser,
|
||||
listAdminMangas,
|
||||
listAdminChapters,
|
||||
getSystemStats
|
||||
getSystemStats,
|
||||
resyncManga,
|
||||
resyncChapter
|
||||
} from './admin';
|
||||
|
||||
function ok(body: unknown, status = 200): Response {
|
||||
@@ -242,4 +244,88 @@ describe('admin api client', () => {
|
||||
const s = await getSystemStats();
|
||||
expect(s.disk).toBeNull();
|
||||
});
|
||||
|
||||
// ---- force resync ----
|
||||
|
||||
it('resyncManga POSTs to /v1/admin/mangas/{id}/resync and returns the envelope', async () => {
|
||||
const resp = {
|
||||
manga: {
|
||||
id: 'm-1',
|
||||
title: 'T',
|
||||
status: 'ongoing',
|
||||
alt_titles: [],
|
||||
description: null,
|
||||
cover_image_path: 'mangas/m-1/cover.jpg',
|
||||
created_at: '2026-01-01T00:00:00Z',
|
||||
updated_at: '2026-01-02T00:00:00Z',
|
||||
authors: [],
|
||||
genres: [],
|
||||
tags: []
|
||||
},
|
||||
metadata_status: 'updated',
|
||||
cover_fetched: true
|
||||
};
|
||||
fetchSpy.mockResolvedValueOnce(ok(resp));
|
||||
const got = await resyncManga('m-1');
|
||||
expect(got.metadata_status).toBe('updated');
|
||||
expect(got.cover_fetched).toBe(true);
|
||||
expect(got.manga.id).toBe('m-1');
|
||||
const url = fetchSpy.mock.calls[0][0] as string;
|
||||
expect(url).toMatch(/\/v1\/admin\/mangas\/m-1\/resync$/);
|
||||
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||
expect(init.method).toBe('POST');
|
||||
});
|
||||
|
||||
it('resyncManga surfaces 503 service_unavailable when the daemon is off', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(
|
||||
envelope(503, 'service_unavailable', 'crawler daemon is disabled')
|
||||
);
|
||||
await expect(resyncManga('m-1')).rejects.toMatchObject({
|
||||
status: 503,
|
||||
code: 'service_unavailable'
|
||||
});
|
||||
});
|
||||
|
||||
it('resyncChapter POSTs to /v1/admin/chapters/{id}/resync and returns the envelope', async () => {
|
||||
const resp = {
|
||||
chapter: {
|
||||
id: 'c-1',
|
||||
manga_id: 'm-1',
|
||||
number: 1,
|
||||
title: 'Foo',
|
||||
page_count: 7,
|
||||
created_at: '2026-01-01T00:00:00Z'
|
||||
},
|
||||
outcome: 'fetched',
|
||||
pages: 7
|
||||
};
|
||||
fetchSpy.mockResolvedValueOnce(ok(resp));
|
||||
const got = await resyncChapter('c-1');
|
||||
expect(got.outcome).toBe('fetched');
|
||||
expect(got.pages).toBe(7);
|
||||
expect(got.chapter.page_count).toBe(7);
|
||||
const url = fetchSpy.mock.calls[0][0] as string;
|
||||
expect(url).toMatch(/\/v1\/admin\/chapters\/c-1\/resync$/);
|
||||
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||
expect(init.method).toBe('POST');
|
||||
});
|
||||
|
||||
it('resyncChapter handles the "skipped" outcome envelope', async () => {
|
||||
const resp = {
|
||||
chapter: {
|
||||
id: 'c-1',
|
||||
manga_id: 'm-1',
|
||||
number: 1,
|
||||
title: null,
|
||||
page_count: 7,
|
||||
created_at: '2026-01-01T00:00:00Z'
|
||||
},
|
||||
outcome: 'skipped',
|
||||
pages: null
|
||||
};
|
||||
fetchSpy.mockResolvedValueOnce(ok(resp));
|
||||
const got = await resyncChapter('c-1');
|
||||
expect(got.outcome).toBe('skipped');
|
||||
expect(got.pages).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
|
||||
import { request, type Page } from './client';
|
||||
import type { User } from './auth';
|
||||
import type { MangaDetail } from './mangas';
|
||||
import type { Chapter } from './chapters';
|
||||
|
||||
// ---- users -----------------------------------------------------------------
|
||||
|
||||
@@ -176,3 +178,39 @@ export type SystemStats = {
|
||||
export async function getSystemStats(): Promise<SystemStats> {
|
||||
return request<SystemStats>('/v1/admin/system');
|
||||
}
|
||||
|
||||
// ---- force resync ----------------------------------------------------------
|
||||
|
||||
export type MangaResyncResponse = {
|
||||
manga: MangaDetail;
|
||||
metadata_status: 'new' | 'updated' | 'unchanged';
|
||||
cover_fetched: boolean;
|
||||
};
|
||||
|
||||
export type ChapterResyncResponse = {
|
||||
chapter: Chapter;
|
||||
outcome: 'fetched' | 'skipped';
|
||||
/** Page count when `outcome === 'fetched'`; null when skipped. */
|
||||
pages: number | null;
|
||||
};
|
||||
|
||||
/** POST /v1/admin/mangas/:id/resync — refetches metadata + cover from
|
||||
* the manga's live crawler source. Long-running (one HTTP request per
|
||||
* Chromium nav + image download), so the UI should disable the trigger
|
||||
* and surface progress. */
|
||||
export async function resyncManga(id: string): Promise<MangaResyncResponse> {
|
||||
return request<MangaResyncResponse>(
|
||||
`/v1/admin/mangas/${encodeURIComponent(id)}/resync`,
|
||||
{ method: 'POST' }
|
||||
);
|
||||
}
|
||||
|
||||
/** POST /v1/admin/chapters/:id/resync — force-refetches a chapter's
|
||||
* pages even if `page_count > 0`. Same long-running caveat as
|
||||
* `resyncManga`. */
|
||||
export async function resyncChapter(id: string): Promise<ChapterResyncResponse> {
|
||||
return request<ChapterResyncResponse>(
|
||||
`/v1/admin/chapters/${encodeURIComponent(id)}/resync`,
|
||||
{ method: 'POST' }
|
||||
);
|
||||
}
|
||||
|
||||
@@ -102,10 +102,14 @@ export async function deleteToken(id: string): Promise<void> {
|
||||
}
|
||||
|
||||
export type AuthConfig = {
|
||||
/** When false, /v1/auth/register returns 403 and the UI should
|
||||
/** Effective value (`allow_self_register && !private_mode`).
|
||||
* When false, /v1/auth/register returns 403 and the UI should
|
||||
* hide its register affordance. Admins can still mint accounts
|
||||
* via POST /v1/admin/users. */
|
||||
self_register_enabled: boolean;
|
||||
/** When true, every read endpoint requires auth and anonymous
|
||||
* visitors are redirected to `/login` (see `+layout.ts`). */
|
||||
private_mode: boolean;
|
||||
};
|
||||
|
||||
/** Public — no auth, no cookie required. */
|
||||
|
||||
@@ -16,6 +16,7 @@ import { getAuthConfig } from './api/auth';
|
||||
|
||||
class AuthConfigStore {
|
||||
self_register_enabled = $state(true);
|
||||
private_mode = $state(false);
|
||||
loaded = $state(false);
|
||||
private loading = false;
|
||||
|
||||
@@ -25,6 +26,7 @@ class AuthConfigStore {
|
||||
try {
|
||||
const cfg = await getAuthConfig();
|
||||
this.self_register_enabled = cfg.self_register_enabled;
|
||||
this.private_mode = cfg.private_mode;
|
||||
this.loaded = true;
|
||||
} catch {
|
||||
// Keep optimistic default; next page mount will retry.
|
||||
@@ -32,6 +34,16 @@ class AuthConfigStore {
|
||||
this.loading = false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Seed from server-rendered layout data so the very first paint
|
||||
* doesn't flash the loading state. Used by `+layout.ts` /
|
||||
* `+layout.svelte` on the universal-load path. Safe to call from
|
||||
* SSR (no `browser` guard) since it touches only reactive state. */
|
||||
seed(cfg: { self_register_enabled: boolean; private_mode: boolean }): void {
|
||||
this.self_register_enabled = cfg.self_register_enabled;
|
||||
this.private_mode = cfg.private_mode;
|
||||
this.loaded = true;
|
||||
}
|
||||
}
|
||||
|
||||
export const authConfig = new AuthConfigStore();
|
||||
|
||||
128
frontend/src/lib/components/Pager.svelte
Normal file
128
frontend/src/lib/components/Pager.svelte
Normal file
@@ -0,0 +1,128 @@
|
||||
<script lang="ts">
|
||||
type Props = {
|
||||
page: number;
|
||||
totalPages: number;
|
||||
onChange: (page: number) => void;
|
||||
testid?: string;
|
||||
};
|
||||
|
||||
let { page, totalPages, onChange, testid }: Props = $props();
|
||||
|
||||
type Slot = number | 'ellipsis';
|
||||
|
||||
// Compact layout: always show first + last, surround the current page with
|
||||
// its direct neighbours, and use "…" to elide the rest. Keeps the bar to
|
||||
// at most 7 buttons regardless of totalPages.
|
||||
function buildSlots(p: number, total: number): Slot[] {
|
||||
if (total <= 7) {
|
||||
return Array.from({ length: total }, (_, i) => i + 1);
|
||||
}
|
||||
const out: Slot[] = [1];
|
||||
if (p <= 4) {
|
||||
for (let i = 2; i <= 5; i++) out.push(i);
|
||||
out.push('ellipsis');
|
||||
out.push(total);
|
||||
} else if (p >= total - 3) {
|
||||
out.push('ellipsis');
|
||||
for (let i = total - 4; i <= total; i++) out.push(i);
|
||||
} else {
|
||||
out.push('ellipsis');
|
||||
out.push(p - 1);
|
||||
out.push(p);
|
||||
out.push(p + 1);
|
||||
out.push('ellipsis');
|
||||
out.push(total);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
const slots = $derived(buildSlots(page, totalPages));
|
||||
</script>
|
||||
|
||||
{#if totalPages > 1}
|
||||
<nav class="pager" aria-label="Pagination" data-testid={testid}>
|
||||
<button
|
||||
type="button"
|
||||
class="step"
|
||||
disabled={page <= 1}
|
||||
onclick={() => onChange(page - 1)}
|
||||
aria-label="Previous page"
|
||||
>
|
||||
‹ Prev
|
||||
</button>
|
||||
|
||||
{#each slots as slot, i (i)}
|
||||
{#if slot === 'ellipsis'}
|
||||
<span class="ellipsis" aria-hidden="true">…</span>
|
||||
{:else}
|
||||
<button
|
||||
type="button"
|
||||
class="num"
|
||||
class:active={slot === page}
|
||||
aria-current={slot === page ? 'page' : undefined}
|
||||
aria-label={`Go to page ${slot}`}
|
||||
onclick={() => onChange(slot)}
|
||||
>
|
||||
{slot}
|
||||
</button>
|
||||
{/if}
|
||||
{/each}
|
||||
|
||||
<button
|
||||
type="button"
|
||||
class="step"
|
||||
disabled={page >= totalPages}
|
||||
onclick={() => onChange(page + 1)}
|
||||
aria-label="Next page"
|
||||
>
|
||||
Next ›
|
||||
</button>
|
||||
</nav>
|
||||
{/if}
|
||||
|
||||
<style>
|
||||
.pager {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
align-items: center;
|
||||
gap: var(--space-1);
|
||||
margin: var(--space-4) 0;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.step,
|
||||
.num {
|
||||
min-width: 36px;
|
||||
height: 36px;
|
||||
padding: 0 var(--space-2);
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius-md);
|
||||
color: var(--text);
|
||||
cursor: pointer;
|
||||
font-size: var(--font-sm);
|
||||
}
|
||||
|
||||
.step:hover:not(:disabled),
|
||||
.num:hover:not(.active) {
|
||||
border-color: var(--primary);
|
||||
}
|
||||
|
||||
.step:disabled {
|
||||
opacity: 0.4;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.num.active {
|
||||
background: var(--primary);
|
||||
color: var(--primary-contrast);
|
||||
border-color: var(--primary);
|
||||
cursor: default;
|
||||
}
|
||||
|
||||
.ellipsis {
|
||||
padding: 0 var(--space-1);
|
||||
color: var(--text-muted);
|
||||
user-select: none;
|
||||
}
|
||||
</style>
|
||||
77
frontend/src/lib/components/Pager.svelte.test.ts
Normal file
77
frontend/src/lib/components/Pager.svelte.test.ts
Normal file
@@ -0,0 +1,77 @@
|
||||
import { describe, it, expect, vi, afterEach } from 'vitest';
|
||||
import { render, screen, cleanup } from '@testing-library/svelte';
|
||||
import Pager from './Pager.svelte';
|
||||
|
||||
afterEach(() => cleanup());
|
||||
|
||||
describe('Pager', () => {
|
||||
it('renders nothing when totalPages <= 1', () => {
|
||||
const { container } = render(Pager, { props: { page: 1, totalPages: 1, onChange: () => {} } });
|
||||
expect(container.querySelector('nav')).toBeNull();
|
||||
});
|
||||
|
||||
it('disables Prev on the first page and Next on the last', () => {
|
||||
const { rerender } = render(Pager, {
|
||||
props: { page: 1, totalPages: 5, onChange: () => {} }
|
||||
});
|
||||
expect((screen.getByRole('button', { name: /prev/i }) as HTMLButtonElement).disabled).toBe(true);
|
||||
expect((screen.getByRole('button', { name: /next/i }) as HTMLButtonElement).disabled).toBe(false);
|
||||
|
||||
rerender({ page: 5, totalPages: 5, onChange: () => {} });
|
||||
expect((screen.getByRole('button', { name: /prev/i }) as HTMLButtonElement).disabled).toBe(false);
|
||||
expect((screen.getByRole('button', { name: /next/i }) as HTMLButtonElement).disabled).toBe(true);
|
||||
});
|
||||
|
||||
it('marks the current page button as aria-current', () => {
|
||||
render(Pager, { props: { page: 3, totalPages: 5, onChange: () => {} } });
|
||||
const current = screen.getByRole('button', { name: /go to page 3/i });
|
||||
expect(current.getAttribute('aria-current')).toBe('page');
|
||||
});
|
||||
|
||||
it('fires onChange with the clicked page number', async () => {
|
||||
const onChange = vi.fn();
|
||||
render(Pager, { props: { page: 1, totalPages: 5, onChange } });
|
||||
screen.getByRole('button', { name: /go to page 3/i }).click();
|
||||
expect(onChange).toHaveBeenCalledWith(3);
|
||||
});
|
||||
|
||||
it('Prev decrements and Next increments via onChange', () => {
|
||||
const onChange = vi.fn();
|
||||
render(Pager, { props: { page: 3, totalPages: 5, onChange } });
|
||||
screen.getByRole('button', { name: /prev/i }).click();
|
||||
screen.getByRole('button', { name: /next/i }).click();
|
||||
expect(onChange).toHaveBeenNthCalledWith(1, 2);
|
||||
expect(onChange).toHaveBeenNthCalledWith(2, 4);
|
||||
});
|
||||
|
||||
it('shows every page button when totalPages <= 7', () => {
|
||||
render(Pager, { props: { page: 4, totalPages: 7, onChange: () => {} } });
|
||||
for (let n = 1; n <= 7; n++) {
|
||||
expect(screen.getByRole('button', { name: new RegExp(`go to page ${n}$`, 'i') })).toBeTruthy();
|
||||
}
|
||||
});
|
||||
|
||||
it('collapses middle pages with ellipsis when totalPages > 7 and current is in the middle', () => {
|
||||
render(Pager, { props: { page: 10, totalPages: 24, onChange: () => {} } });
|
||||
// First and last are always shown
|
||||
expect(screen.getByRole('button', { name: /go to page 1$/i })).toBeTruthy();
|
||||
expect(screen.getByRole('button', { name: /go to page 24$/i })).toBeTruthy();
|
||||
// Current and direct neighbours are shown
|
||||
expect(screen.getByRole('button', { name: /go to page 9$/i })).toBeTruthy();
|
||||
expect(screen.getByRole('button', { name: /go to page 10$/i })).toBeTruthy();
|
||||
expect(screen.getByRole('button', { name: /go to page 11$/i })).toBeTruthy();
|
||||
// Distant pages are NOT rendered as buttons
|
||||
expect(screen.queryByRole('button', { name: /go to page 2$/i })).toBeNull();
|
||||
expect(screen.queryByRole('button', { name: /go to page 23$/i })).toBeNull();
|
||||
// Ellipsis appears on both sides
|
||||
const ellipses = screen.getAllByText('…');
|
||||
expect(ellipses.length).toBeGreaterThanOrEqual(2);
|
||||
});
|
||||
|
||||
it('does not duplicate boundary buttons when current is near the edge', () => {
|
||||
render(Pager, { props: { page: 2, totalPages: 20, onChange: () => {} } });
|
||||
// Each page button rendered should be unique — no duplicate "go to page 1"
|
||||
const first = screen.getAllByRole('button', { name: /go to page 1$/i });
|
||||
expect(first.length).toBe(1);
|
||||
});
|
||||
});
|
||||
@@ -1,6 +1,7 @@
|
||||
<script lang="ts">
|
||||
import { onMount, onDestroy } from 'svelte';
|
||||
import { goto } from '$app/navigation';
|
||||
import { page } from '$app/stores';
|
||||
import { logout } from '$lib/api/auth';
|
||||
import { authConfig } from '$lib/auth-config.svelte';
|
||||
import { preferences } from '$lib/preferences.svelte';
|
||||
@@ -14,15 +15,49 @@
|
||||
import Shield from '@lucide/svelte/icons/shield';
|
||||
import '$lib/styles/tokens.css';
|
||||
|
||||
let { children } = $props();
|
||||
let { children, data } = $props();
|
||||
let loggingOut = $state(false);
|
||||
let headerEl: HTMLElement | undefined = $state();
|
||||
|
||||
// Static-route title map. Dynamic pages (manga / author / collection /
|
||||
// chapter) override this via their own <svelte:head><title>, since the
|
||||
// title depends on data the layout doesn't have. Routes omitted here
|
||||
// (notably the dynamic ones) fall through to the bare brand and rely
|
||||
// on the page to set the descriptive form.
|
||||
const STATIC_TITLES: Record<string, string> = {
|
||||
'/': 'Mangalord',
|
||||
'/login': 'Mangalord | Login',
|
||||
'/register': 'Mangalord | Register',
|
||||
'/upload': 'Mangalord | Upload',
|
||||
'/bookmarks': 'Mangalord | Bookmarks',
|
||||
'/collections': 'Mangalord | Collections',
|
||||
'/profile': 'Mangalord | Profile',
|
||||
'/profile/account': 'Mangalord | Account',
|
||||
'/profile/bookmarks': 'Mangalord | Bookmarks',
|
||||
'/profile/collections': 'Mangalord | Collections',
|
||||
'/profile/history': 'Mangalord | Reading history',
|
||||
'/profile/preferences': 'Mangalord | Preferences',
|
||||
'/admin': 'Mangalord | Admin',
|
||||
'/admin/mangas': 'Mangalord | Admin · Mangas',
|
||||
'/admin/users': 'Mangalord | Admin · Users',
|
||||
'/admin/system': 'Mangalord | Admin · System'
|
||||
};
|
||||
|
||||
const layoutTitle = $derived(STATIC_TITLES[$page.route?.id ?? ''] ?? 'Mangalord');
|
||||
|
||||
// Seed authConfig from the universal layout load. $effect keeps
|
||||
// the store in sync if `data` is replaced by a subsequent layout
|
||||
// load (client-side nav). The first run also covers initial
|
||||
// hydration so the navbar's register link reflects the real
|
||||
// server flag without a separate fetch.
|
||||
$effect(() => {
|
||||
authConfig.seed(data.authConfig);
|
||||
});
|
||||
|
||||
onMount(() => {
|
||||
theme.init();
|
||||
preferences.init();
|
||||
if (!session.loaded) session.refresh();
|
||||
if (!authConfig.loaded) authConfig.load();
|
||||
|
||||
// Publish the header's measured height as a CSS custom
|
||||
// property so sticky descendants (e.g. the reader nav) can
|
||||
@@ -70,6 +105,10 @@
|
||||
}
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>{layoutTitle}</title>
|
||||
</svelte:head>
|
||||
|
||||
<header bind:this={headerEl}>
|
||||
<nav aria-label="primary">
|
||||
<a class="brand" href="/">Mangalord</a>
|
||||
|
||||
41
frontend/src/routes/+layout.ts
Normal file
41
frontend/src/routes/+layout.ts
Normal file
@@ -0,0 +1,41 @@
|
||||
// Universal root load. Surfaces /auth/config to every page so the
|
||||
// navbar + layout can render without an extra round-trip, and — when
|
||||
// the backend reports PRIVATE_MODE=true — bounces anonymous visitors
|
||||
// to /login before any page-specific load fires. The backend
|
||||
// middleware is still the source of truth for the gate; this just
|
||||
// matches the UX so users don't see a page full of failed fetches.
|
||||
import type { LayoutLoad } from './$types';
|
||||
import { redirect } from '@sveltejs/kit';
|
||||
import { getAuthConfig, me, type AuthConfig } from '$lib/api/auth';
|
||||
|
||||
// Paths reachable anonymously even when private_mode is on. /login is
|
||||
// the entry point of the auth flow; everything else (including
|
||||
// /register, which is force-blocked in private mode) bounces.
|
||||
const PRIVATE_MODE_BYPASS = new Set(['/login']);
|
||||
|
||||
const PUBLIC_DEFAULTS: AuthConfig = {
|
||||
self_register_enabled: true,
|
||||
private_mode: false
|
||||
};
|
||||
|
||||
export const load: LayoutLoad = async ({ url }) => {
|
||||
let authConfig: AuthConfig = PUBLIC_DEFAULTS;
|
||||
try {
|
||||
authConfig = await getAuthConfig();
|
||||
} catch {
|
||||
// Fail-soft: keep the optimistic public-mode defaults so a
|
||||
// backend hiccup doesn't lock anyone out of the login page.
|
||||
// No private data can leak through here — the backend
|
||||
// middleware is still authoritative for the gate.
|
||||
}
|
||||
|
||||
if (authConfig.private_mode && !PRIVATE_MODE_BYPASS.has(url.pathname)) {
|
||||
const user = await me().catch(() => null);
|
||||
if (!user) {
|
||||
const next = url.pathname + url.search;
|
||||
redirect(302, `/login?next=${encodeURIComponent(next)}`);
|
||||
}
|
||||
}
|
||||
|
||||
return { authConfig };
|
||||
};
|
||||
@@ -13,10 +13,13 @@
|
||||
import { listTags, type Tag } from '$lib/api/tags';
|
||||
import Chip from '$lib/components/Chip.svelte';
|
||||
import MangaCard from '$lib/components/MangaCard.svelte';
|
||||
import Pager from '$lib/components/Pager.svelte';
|
||||
import Search from '@lucide/svelte/icons/search';
|
||||
import SlidersHorizontal from '@lucide/svelte/icons/sliders-horizontal';
|
||||
import Plus from '@lucide/svelte/icons/plus';
|
||||
|
||||
const PAGE_SIZE = 50;
|
||||
|
||||
let mangas: MangaCardData[] = $state([]);
|
||||
let search = $state('');
|
||||
let sort: MangaSort = $state('recent');
|
||||
@@ -36,11 +39,21 @@
|
||||
let total: number | null = $state(null);
|
||||
let loading = $state(true);
|
||||
let error: string | null = $state(null);
|
||||
let currentPage = $state(1);
|
||||
|
||||
const activeFilterCount = $derived(
|
||||
(statusFilter ? 1 : 0) + selectedGenres.length + selectedTags.length
|
||||
);
|
||||
|
||||
const totalPages = $derived(
|
||||
total != null && total > 0 ? Math.ceil(total / PAGE_SIZE) : 1
|
||||
);
|
||||
|
||||
// 1-indexed range like "51–100 of 237", clamped to the actual loaded set
|
||||
// in case the last page is short.
|
||||
const rangeStart = $derived(mangas.length === 0 ? 0 : (currentPage - 1) * PAGE_SIZE + 1);
|
||||
const rangeEnd = $derived((currentPage - 1) * PAGE_SIZE + mangas.length);
|
||||
|
||||
async function load() {
|
||||
loading = true;
|
||||
error = null;
|
||||
@@ -50,7 +63,9 @@
|
||||
status: statusFilter || undefined,
|
||||
genreIds: selectedGenres.map((g) => g.id),
|
||||
tagIds: selectedTags.map((t) => t.id),
|
||||
sort
|
||||
sort,
|
||||
limit: PAGE_SIZE,
|
||||
offset: (currentPage - 1) * PAGE_SIZE
|
||||
});
|
||||
mangas = result.items;
|
||||
total = result.page.total;
|
||||
@@ -71,11 +86,29 @@
|
||||
params.set('genres', selectedGenres.map((g) => g.id).join(','));
|
||||
if (selectedTags.length)
|
||||
params.set('tags', selectedTags.map((t) => t.id).join(','));
|
||||
if (currentPage > 1) params.set('page', String(currentPage));
|
||||
const qs = params.toString();
|
||||
const url = qs ? `/?${qs}` : '/';
|
||||
goto(url, { replaceState: true, keepFocus: true, noScroll: true });
|
||||
}
|
||||
|
||||
// Filter / search / sort changes invalidate the current page — drop back
|
||||
// to page 1 so the user isn't stranded on an out-of-range page when the
|
||||
// result set shrinks. Direct page navigation calls `goToPage()` instead.
|
||||
function resetAndReload() {
|
||||
currentPage = 1;
|
||||
syncUrl();
|
||||
load();
|
||||
}
|
||||
|
||||
function goToPage(p: number) {
|
||||
if (p === currentPage) return;
|
||||
currentPage = p;
|
||||
syncUrl();
|
||||
load();
|
||||
if (browser) window.scrollTo({ top: 0, behavior: 'smooth' });
|
||||
}
|
||||
|
||||
async function hydrateFromUrl() {
|
||||
// Parse the query and resolve the supplied ids back to full Tag /
|
||||
// Genre objects so the chip rows render real labels.
|
||||
@@ -100,6 +133,8 @@
|
||||
const tags = await listTags({ limit: 50 });
|
||||
selectedTags = tags.filter((t) => tagIds.includes(t.id));
|
||||
}
|
||||
const pageParam = Number(url.searchParams.get('page') ?? '1');
|
||||
currentPage = Number.isFinite(pageParam) && pageParam >= 1 ? Math.floor(pageParam) : 1;
|
||||
// Open the filters panel if anything is active so the user can see why.
|
||||
if (statusFilter || selectedGenres.length || selectedTags.length) {
|
||||
filtersOpen = true;
|
||||
@@ -108,32 +143,27 @@
|
||||
|
||||
async function onSubmit(e: SubmitEvent) {
|
||||
e.preventDefault();
|
||||
syncUrl();
|
||||
await load();
|
||||
resetAndReload();
|
||||
}
|
||||
|
||||
function onSortChange() {
|
||||
syncUrl();
|
||||
load();
|
||||
resetAndReload();
|
||||
}
|
||||
|
||||
function onStatusChange() {
|
||||
syncUrl();
|
||||
load();
|
||||
resetAndReload();
|
||||
}
|
||||
|
||||
function toggleGenre(g: Genre) {
|
||||
selectedGenres = selectedGenres.some((x) => x.id === g.id)
|
||||
? selectedGenres.filter((x) => x.id !== g.id)
|
||||
: [...selectedGenres, g];
|
||||
syncUrl();
|
||||
load();
|
||||
resetAndReload();
|
||||
}
|
||||
|
||||
function removeTag(t: Tag) {
|
||||
selectedTags = selectedTags.filter((x) => x.id !== t.id);
|
||||
syncUrl();
|
||||
load();
|
||||
resetAndReload();
|
||||
}
|
||||
|
||||
function pickTag(t: Tag) {
|
||||
@@ -143,8 +173,7 @@
|
||||
tagDraft = '';
|
||||
tagSuggestions = [];
|
||||
tagSuggestHighlight = -1;
|
||||
syncUrl();
|
||||
load();
|
||||
resetAndReload();
|
||||
}
|
||||
|
||||
function onTagDraftInput() {
|
||||
@@ -192,8 +221,7 @@
|
||||
statusFilter = '';
|
||||
selectedGenres = [];
|
||||
selectedTags = [];
|
||||
syncUrl();
|
||||
load();
|
||||
resetAndReload();
|
||||
}
|
||||
|
||||
onMount(async () => {
|
||||
@@ -383,7 +411,7 @@
|
||||
{:else}
|
||||
{#if total !== null}
|
||||
<p class="count" data-testid="manga-total">
|
||||
Showing {mangas.length} of {total}
|
||||
Showing {rangeStart}–{rangeEnd} of {total}
|
||||
</p>
|
||||
{/if}
|
||||
<ul class="manga-grid" data-testid="manga-list">
|
||||
@@ -391,6 +419,12 @@
|
||||
<MangaCard manga={m} authors={m.authors} genres={m.genres} />
|
||||
{/each}
|
||||
</ul>
|
||||
<Pager
|
||||
page={currentPage}
|
||||
{totalPages}
|
||||
onChange={goToPage}
|
||||
testid="manga-pager"
|
||||
/>
|
||||
{/if}
|
||||
|
||||
<style>
|
||||
|
||||
@@ -71,16 +71,19 @@
|
||||
>
|
||||
<input
|
||||
type="search"
|
||||
placeholder="search by title"
|
||||
placeholder="Search by title"
|
||||
bind:value={search}
|
||||
data-testid="admin-mangas-search"
|
||||
/>
|
||||
<select bind:value={syncFilter} aria-label="sync state">
|
||||
<option value="">all states</option>
|
||||
<option value="in_progress">in progress</option>
|
||||
<option value="dropped">dropped</option>
|
||||
<option value="synced">synced</option>
|
||||
</select>
|
||||
<label class="sync-label">
|
||||
<span>Sync state</span>
|
||||
<select bind:value={syncFilter} aria-label="sync state">
|
||||
<option value="">All</option>
|
||||
<option value="in_progress">In progress</option>
|
||||
<option value="dropped">Dropped</option>
|
||||
<option value="synced">Synced</option>
|
||||
</select>
|
||||
</label>
|
||||
<button type="submit">Search</button>
|
||||
</form>
|
||||
|
||||
@@ -173,17 +176,28 @@
|
||||
}
|
||||
form {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
align-items: center;
|
||||
gap: var(--space-2);
|
||||
margin-bottom: var(--space-3);
|
||||
}
|
||||
input[type='search'] {
|
||||
flex: 1;
|
||||
min-width: 0;
|
||||
max-width: 24rem;
|
||||
padding: var(--space-2) var(--space-3);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius-md);
|
||||
background: var(--surface);
|
||||
color: var(--text);
|
||||
}
|
||||
.sync-label {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: var(--space-2);
|
||||
color: var(--text-muted);
|
||||
font-size: var(--font-sm);
|
||||
}
|
||||
select {
|
||||
padding: var(--space-2) var(--space-3);
|
||||
border-radius: var(--radius-md);
|
||||
|
||||
@@ -1,15 +1,33 @@
|
||||
<script lang="ts">
|
||||
import MangaCard from '$lib/components/MangaCard.svelte';
|
||||
import Pager from '$lib/components/Pager.svelte';
|
||||
import ArrowLeft from '@lucide/svelte/icons/arrow-left';
|
||||
import { goto } from '$app/navigation';
|
||||
import { page } from '$app/stores';
|
||||
|
||||
let { data } = $props();
|
||||
const author = $derived(data.author);
|
||||
const mangas = $derived(data.mangas);
|
||||
const total = $derived(data.total);
|
||||
const currentPage = $derived(data.currentPage);
|
||||
const pageSize = $derived(data.pageSize);
|
||||
const totalPages = $derived(
|
||||
total != null && total > 0 ? Math.ceil(total / pageSize) : 1
|
||||
);
|
||||
const rangeStart = $derived(mangas.length === 0 ? 0 : (currentPage - 1) * pageSize + 1);
|
||||
const rangeEnd = $derived((currentPage - 1) * pageSize + mangas.length);
|
||||
|
||||
function goToPage(p: number) {
|
||||
if (p === currentPage) return;
|
||||
const url = new URL($page.url);
|
||||
if (p === 1) url.searchParams.delete('page');
|
||||
else url.searchParams.set('page', String(p));
|
||||
goto(url.pathname + url.search, { noScroll: false });
|
||||
}
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>{author.name} — Mangalord</title>
|
||||
<title>Mangalord | {author.name}</title>
|
||||
</svelte:head>
|
||||
|
||||
<nav class="back">
|
||||
@@ -34,7 +52,7 @@
|
||||
{:else}
|
||||
{#if total != null}
|
||||
<p class="meta" data-testid="author-shown-of-total">
|
||||
Showing {mangas.length} of {total}
|
||||
Showing {rangeStart}–{rangeEnd} of {total}
|
||||
</p>
|
||||
{/if}
|
||||
<ul class="manga-grid" data-testid="author-manga-list">
|
||||
@@ -42,6 +60,12 @@
|
||||
<MangaCard manga={m} testid={`author-manga-${m.id}`} />
|
||||
{/each}
|
||||
</ul>
|
||||
<Pager
|
||||
page={currentPage}
|
||||
{totalPages}
|
||||
onChange={goToPage}
|
||||
testid="author-pager"
|
||||
/>
|
||||
{/if}
|
||||
|
||||
<style>
|
||||
|
||||
@@ -5,13 +5,27 @@ import type { PageLoad } from './$types';
|
||||
|
||||
export const ssr = false;
|
||||
|
||||
export const load: PageLoad = async ({ params }) => {
|
||||
const PAGE_SIZE = 50;
|
||||
|
||||
export const load: PageLoad = async ({ params, url }) => {
|
||||
const pageParam = Number(url.searchParams.get('page') ?? '1');
|
||||
const currentPage =
|
||||
Number.isFinite(pageParam) && pageParam >= 1 ? Math.floor(pageParam) : 1;
|
||||
try {
|
||||
const [author, mangas] = await Promise.all([
|
||||
getAuthor(params.id),
|
||||
listAuthorMangas(params.id, { limit: 50 })
|
||||
listAuthorMangas(params.id, {
|
||||
limit: PAGE_SIZE,
|
||||
offset: (currentPage - 1) * PAGE_SIZE
|
||||
})
|
||||
]);
|
||||
return { author, mangas: mangas.items, total: mangas.page.total };
|
||||
return {
|
||||
author,
|
||||
mangas: mangas.items,
|
||||
total: mangas.page.total,
|
||||
currentPage,
|
||||
pageSize: PAGE_SIZE
|
||||
};
|
||||
} catch (e) {
|
||||
// 404 surfaces as a real SvelteKit error so the framework shell
|
||||
// renders the standard not-found page instead of the route's
|
||||
|
||||
@@ -7,10 +7,6 @@
|
||||
const error = $derived(data.error);
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>Bookmarks — Mangalord</title>
|
||||
</svelte:head>
|
||||
|
||||
<h1>Bookmarks</h1>
|
||||
|
||||
{#if error}
|
||||
|
||||
@@ -5,10 +5,6 @@
|
||||
const collections = $derived(data.collections);
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>Collections — Mangalord</title>
|
||||
</svelte:head>
|
||||
|
||||
<h1>Collections</h1>
|
||||
|
||||
{#if !data.authenticated}
|
||||
|
||||
@@ -75,7 +75,7 @@
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>{collection.name} — Mangalord</title>
|
||||
<title>Mangalord | {collection.name}</title>
|
||||
</svelte:head>
|
||||
|
||||
<nav class="back">
|
||||
|
||||
113
frontend/src/routes/layout.test.ts
Normal file
113
frontend/src/routes/layout.test.ts
Normal file
@@ -0,0 +1,113 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
|
||||
// Mock the API client *before* importing the load function so the
|
||||
// module under test picks up the mock when it resolves its imports.
|
||||
vi.mock('$lib/api/auth', () => ({
|
||||
getAuthConfig: vi.fn(),
|
||||
me: vi.fn()
|
||||
}));
|
||||
|
||||
import { load } from './+layout';
|
||||
import { getAuthConfig, me, type AuthConfig } from '$lib/api/auth';
|
||||
|
||||
type MinimalLoadEvent = { url: { pathname: string; search: string } };
|
||||
|
||||
function event(pathname: string, search = ''): MinimalLoadEvent {
|
||||
return { url: { pathname, search } };
|
||||
}
|
||||
|
||||
// `LayoutLoad`'s declared return type is `void | …`. Our `load`
|
||||
// always returns `{ authConfig }`, but TypeScript can't narrow on
|
||||
// that at the call site. Wrap to remove the `void` arm so the
|
||||
// assertions stay terse.
|
||||
async function callLoad(ev: MinimalLoadEvent): Promise<{ authConfig: AuthConfig }> {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const result = await load(ev as any);
|
||||
return result as { authConfig: AuthConfig };
|
||||
}
|
||||
|
||||
const PUBLIC_CFG = { self_register_enabled: true, private_mode: false };
|
||||
const PRIVATE_CFG = { self_register_enabled: false, private_mode: true };
|
||||
|
||||
const aliceUser = {
|
||||
id: 'u1',
|
||||
username: 'alice',
|
||||
created_at: '2026-01-01T00:00:00Z',
|
||||
is_admin: false
|
||||
};
|
||||
|
||||
describe('root +layout load', () => {
|
||||
beforeEach(() => {
|
||||
vi.mocked(getAuthConfig).mockReset();
|
||||
vi.mocked(me).mockReset();
|
||||
});
|
||||
|
||||
it('public mode: returns authConfig data, never calls me()', async () => {
|
||||
vi.mocked(getAuthConfig).mockResolvedValue(PUBLIC_CFG);
|
||||
const data = await callLoad(event('/'));
|
||||
expect(data.authConfig).toEqual(PUBLIC_CFG);
|
||||
expect(me).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('private mode + anonymous on `/`: throws redirect(302) to /login with next=', async () => {
|
||||
vi.mocked(getAuthConfig).mockResolvedValue(PRIVATE_CFG);
|
||||
vi.mocked(me).mockResolvedValue(null);
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
await expect(load(event('/') as any)).rejects.toMatchObject({
|
||||
status: 302,
|
||||
location: '/login?next=%2F'
|
||||
});
|
||||
});
|
||||
|
||||
it('private mode + anonymous on `/login`: passes through without redirect', async () => {
|
||||
vi.mocked(getAuthConfig).mockResolvedValue(PRIVATE_CFG);
|
||||
const data = await callLoad(event('/login'));
|
||||
expect(data.authConfig.private_mode).toBe(true);
|
||||
// me() must not run on the login page itself, otherwise anonymous
|
||||
// visits make an extra round-trip every page load.
|
||||
expect(me).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('private mode + logged-in user: no redirect, returns authConfig', async () => {
|
||||
vi.mocked(getAuthConfig).mockResolvedValue(PRIVATE_CFG);
|
||||
vi.mocked(me).mockResolvedValue(aliceUser);
|
||||
const data = await callLoad(event('/'));
|
||||
expect(data.authConfig).toEqual(PRIVATE_CFG);
|
||||
});
|
||||
|
||||
it('private mode + anonymous: preserves pathname AND search in next=', async () => {
|
||||
vi.mocked(getAuthConfig).mockResolvedValue(PRIVATE_CFG);
|
||||
vi.mocked(me).mockResolvedValue(null);
|
||||
await expect(
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
load(event('/manga/abc', '?page=3') as any)
|
||||
).rejects.toMatchObject({
|
||||
status: 302,
|
||||
location: '/login?next=%2Fmanga%2Fabc%3Fpage%3D3'
|
||||
});
|
||||
});
|
||||
|
||||
it('private mode + anonymous on /register: redirects to /login (register is never reachable in private mode)', async () => {
|
||||
vi.mocked(getAuthConfig).mockResolvedValue(PRIVATE_CFG);
|
||||
vi.mocked(me).mockResolvedValue(null);
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
await expect(load(event('/register') as any)).rejects.toMatchObject({
|
||||
status: 302,
|
||||
location: '/login?next=%2Fregister'
|
||||
});
|
||||
});
|
||||
|
||||
it('getAuthConfig failure: falls back to public-mode defaults, no redirect', async () => {
|
||||
// The backend middleware is the source of truth for the gate;
|
||||
// if the config probe blips, fail soft so a brief outage doesn't
|
||||
// lock everyone out of even the login page. No private data
|
||||
// can leak because the backend still 401s every request.
|
||||
vi.mocked(getAuthConfig).mockRejectedValue(new Error('network'));
|
||||
const data = await callLoad(event('/'));
|
||||
expect(data.authConfig).toEqual({
|
||||
self_register_enabled: true,
|
||||
private_mode: false
|
||||
});
|
||||
expect(me).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
@@ -1,13 +1,15 @@
|
||||
<script lang="ts">
|
||||
import { fileUrl } from '$lib/api/client';
|
||||
import { fileUrl, ApiError } from '$lib/api/client';
|
||||
import { createBookmark, deleteBookmark, type Bookmark } from '$lib/api/bookmarks';
|
||||
import {
|
||||
attachTag,
|
||||
detachTag,
|
||||
type AuthorRef,
|
||||
type GenreRef,
|
||||
type MangaDetail,
|
||||
type TagRef
|
||||
} from '$lib/api/mangas';
|
||||
import { resyncManga } from '$lib/api/admin';
|
||||
import { listTags, type Tag } from '$lib/api/tags';
|
||||
import { session } from '$lib/session.svelte';
|
||||
import Chip from '$lib/components/Chip.svelte';
|
||||
@@ -16,9 +18,15 @@
|
||||
import FolderPlus from '@lucide/svelte/icons/folder-plus';
|
||||
import Pencil from '@lucide/svelte/icons/pencil';
|
||||
import UploadCloud from '@lucide/svelte/icons/upload-cloud';
|
||||
import RefreshCw from '@lucide/svelte/icons/refresh-cw';
|
||||
|
||||
let { data } = $props();
|
||||
const manga = $derived(data.manga);
|
||||
// `manga` is locally overridable so a successful force resync can
|
||||
// swap in the refreshed detail (new cover URL, refreshed status,
|
||||
// etc.) without a router reload. Falls back to the server-loaded
|
||||
// data otherwise.
|
||||
let mangaOverride = $state<MangaDetail | null>(null);
|
||||
const manga = $derived<MangaDetail>(mangaOverride ?? data.manga);
|
||||
const chapters = $derived(data.chapters);
|
||||
const readProgress = $derived(data.readProgress);
|
||||
/** Chapter row from the local chapters list when present (so we
|
||||
@@ -171,10 +179,35 @@
|
||||
const statusLabel = $derived(manga.status === 'completed' ? 'Completed' : 'Ongoing');
|
||||
|
||||
let collectionModalOpen = $state(false);
|
||||
|
||||
// ---- Admin force resync ----
|
||||
let resyncBusy = $state(false);
|
||||
let resyncMessage = $state<{ kind: 'ok' | 'err'; text: string } | null>(null);
|
||||
async function forceResync() {
|
||||
if (!session.user?.is_admin || resyncBusy) return;
|
||||
resyncBusy = true;
|
||||
resyncMessage = null;
|
||||
try {
|
||||
const r = await resyncManga(manga.id);
|
||||
mangaOverride = r.manga;
|
||||
const coverNote = r.cover_fetched
|
||||
? ' Cover re-downloaded.'
|
||||
: ' Cover unchanged.';
|
||||
resyncMessage = {
|
||||
kind: 'ok',
|
||||
text: `Metadata ${r.metadata_status}.${coverNote}`
|
||||
};
|
||||
} catch (e) {
|
||||
const msg = e instanceof ApiError ? e.message : (e as Error).message;
|
||||
resyncMessage = { kind: 'err', text: msg };
|
||||
} finally {
|
||||
resyncBusy = false;
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>{manga.title} — Mangalord</title>
|
||||
<title>Mangalord | {manga.title}</title>
|
||||
</svelte:head>
|
||||
|
||||
<article>
|
||||
@@ -344,7 +377,34 @@
|
||||
<UploadCloud size={16} aria-hidden="true" />
|
||||
<span>Upload chapter</span>
|
||||
</a>
|
||||
{#if session.user.is_admin}
|
||||
<button
|
||||
type="button"
|
||||
class="action"
|
||||
onclick={forceResync}
|
||||
disabled={resyncBusy}
|
||||
title="Refetch metadata + cover from the crawler source"
|
||||
data-testid="force-resync-manga"
|
||||
>
|
||||
<RefreshCw
|
||||
size={16}
|
||||
aria-hidden="true"
|
||||
class={resyncBusy ? 'spin' : ''}
|
||||
/>
|
||||
<span>{resyncBusy ? 'Resyncing…' : 'Force resync'}</span>
|
||||
</button>
|
||||
{/if}
|
||||
</div>
|
||||
{#if resyncMessage}
|
||||
<p
|
||||
class="resync-msg"
|
||||
class:err={resyncMessage.kind === 'err'}
|
||||
role="status"
|
||||
data-testid="force-resync-message"
|
||||
>
|
||||
{resyncMessage.text}
|
||||
</p>
|
||||
{/if}
|
||||
{:else}
|
||||
<a class="action" href="/login" data-testid="bookmark-signin">
|
||||
Sign in to bookmark or collect
|
||||
@@ -586,6 +646,29 @@
|
||||
color: var(--text);
|
||||
}
|
||||
|
||||
.resync-msg {
|
||||
margin-top: var(--space-2);
|
||||
color: var(--text-muted);
|
||||
font-size: var(--font-sm);
|
||||
}
|
||||
|
||||
.resync-msg.err {
|
||||
color: var(--danger);
|
||||
}
|
||||
|
||||
:global(.spin) {
|
||||
animation: spin 0.9s linear infinite;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
from {
|
||||
transform: rotate(0deg);
|
||||
}
|
||||
to {
|
||||
transform: rotate(360deg);
|
||||
}
|
||||
}
|
||||
|
||||
.continue {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
<script lang="ts">
|
||||
import { onMount, onDestroy } from 'svelte';
|
||||
import { goto } from '$app/navigation';
|
||||
import { fileUrl } from '$lib/api/client';
|
||||
import { goto, invalidateAll } from '$app/navigation';
|
||||
import { fileUrl, ApiError } from '$lib/api/client';
|
||||
import { GAP_PX, type ReaderPageGap } from '$lib/api/preferences';
|
||||
import { preferences } from '$lib/preferences.svelte';
|
||||
import { updateReadProgress } from '$lib/api/read_progress';
|
||||
import { resyncChapter } from '$lib/api/admin';
|
||||
import { readerFullscreen } from '$lib/reader-fullscreen.svelte';
|
||||
import { session } from '$lib/session.svelte';
|
||||
import ChevronLeft from '@lucide/svelte/icons/chevron-left';
|
||||
@@ -15,6 +16,7 @@
|
||||
import ScrollText from '@lucide/svelte/icons/scroll-text';
|
||||
import Maximize2 from '@lucide/svelte/icons/maximize-2';
|
||||
import Minimize2 from '@lucide/svelte/icons/minimize-2';
|
||||
import RefreshCw from '@lucide/svelte/icons/refresh-cw';
|
||||
|
||||
let { data } = $props();
|
||||
const manga = $derived(data.manga);
|
||||
@@ -27,8 +29,8 @@
|
||||
|
||||
const pageTitle = $derived(
|
||||
chapter.title
|
||||
? `${manga.title} — Ch. ${chapter.number}: ${chapter.title}`
|
||||
: `${manga.title} — Ch. ${chapter.number}`
|
||||
? `Mangalord | ${manga.title} · Ch. ${chapter.number}: ${chapter.title}`
|
||||
: `Mangalord | ${manga.title} · Ch. ${chapter.number}`
|
||||
);
|
||||
|
||||
// Prev/next chapter computed from the chapter list. listChapters
|
||||
@@ -256,6 +258,36 @@
|
||||
if (typeof window !== 'undefined') window.removeEventListener('keydown', onKeydown);
|
||||
});
|
||||
|
||||
// ---- Admin force resync (current chapter) ----
|
||||
let resyncBusy = $state(false);
|
||||
let resyncMessage = $state<{ kind: 'ok' | 'err'; text: string } | null>(null);
|
||||
async function forceResync() {
|
||||
if (!session.user?.is_admin || resyncBusy) return;
|
||||
resyncBusy = true;
|
||||
resyncMessage = null;
|
||||
try {
|
||||
const r = await resyncChapter(chapter.id);
|
||||
if (r.outcome === 'fetched') {
|
||||
resyncMessage = {
|
||||
kind: 'ok',
|
||||
text: `Refetched ${r.pages} page${r.pages === 1 ? '' : 's'}. Reloading…`
|
||||
};
|
||||
// Re-run all loaders for this route so the reader picks
|
||||
// up the freshly-downloaded pages. The page.ts loader
|
||||
// doesn't `depends()` on anything explicitly, so
|
||||
// invalidateAll is the right brush here.
|
||||
await invalidateAll();
|
||||
} else {
|
||||
resyncMessage = { kind: 'ok', text: 'No new pages — source had nothing fresh.' };
|
||||
}
|
||||
} catch (e) {
|
||||
const msg = e instanceof ApiError ? e.message : (e as Error).message;
|
||||
resyncMessage = { kind: 'err', text: msg };
|
||||
} finally {
|
||||
resyncBusy = false;
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Reading progress tracking ----
|
||||
//
|
||||
// High-water mark seeded from the server: progress only ever moves
|
||||
@@ -481,6 +513,23 @@
|
||||
{/if}
|
||||
</span>
|
||||
|
||||
{#if session.user?.is_admin}
|
||||
<button
|
||||
type="button"
|
||||
class="reader-resync"
|
||||
onclick={forceResync}
|
||||
disabled={resyncBusy}
|
||||
title={resyncMessage?.kind === 'err'
|
||||
? resyncMessage.text
|
||||
: 'Force refetch this chapter from the crawler source'}
|
||||
aria-label="Force resync chapter"
|
||||
data-testid="force-resync-chapter"
|
||||
>
|
||||
<RefreshCw size={16} aria-hidden="true" class={resyncBusy ? 'spin' : ''} />
|
||||
<span>{resyncBusy ? 'Resyncing…' : 'Force resync'}</span>
|
||||
</button>
|
||||
{/if}
|
||||
|
||||
<button
|
||||
type="button"
|
||||
class="fullscreen-toggle"
|
||||
@@ -494,6 +543,17 @@
|
||||
</button>
|
||||
</nav>
|
||||
|
||||
{#if resyncMessage}
|
||||
<p
|
||||
class="resync-toast"
|
||||
class:err={resyncMessage.kind === 'err'}
|
||||
role="status"
|
||||
data-testid="force-resync-message"
|
||||
>
|
||||
{resyncMessage.text}
|
||||
</p>
|
||||
{/if}
|
||||
|
||||
<!--
|
||||
Floating exit affordance — only rendered while focus mode is on.
|
||||
Lives in the top-right corner with a low resting opacity so it
|
||||
@@ -911,7 +971,8 @@
|
||||
}
|
||||
|
||||
/* ===== Focus-mode controls ===== */
|
||||
.fullscreen-toggle {
|
||||
.fullscreen-toggle,
|
||||
.reader-resync {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: var(--space-1);
|
||||
@@ -925,12 +986,52 @@
|
||||
font-size: var(--font-xs);
|
||||
}
|
||||
|
||||
.fullscreen-toggle:hover {
|
||||
.fullscreen-toggle:hover,
|
||||
.reader-resync:hover:not(:disabled) {
|
||||
background: var(--surface-elevated);
|
||||
color: var(--text);
|
||||
border-color: var(--primary);
|
||||
}
|
||||
|
||||
.reader-resync:disabled {
|
||||
opacity: 0.7;
|
||||
cursor: progress;
|
||||
}
|
||||
|
||||
.resync-toast {
|
||||
position: fixed;
|
||||
top: calc(var(--app-header-h) + var(--reader-nav-h, 48px) + var(--space-2));
|
||||
right: var(--space-3);
|
||||
z-index: 11;
|
||||
margin: 0;
|
||||
padding: var(--space-2) var(--space-3);
|
||||
max-width: min(420px, calc(100vw - 2 * var(--space-3)));
|
||||
background: var(--surface);
|
||||
color: var(--text);
|
||||
border: 1px solid var(--primary);
|
||||
border-radius: var(--radius-md);
|
||||
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.12);
|
||||
font-size: var(--font-sm);
|
||||
}
|
||||
|
||||
.resync-toast.err {
|
||||
border-color: var(--danger);
|
||||
color: var(--danger);
|
||||
}
|
||||
|
||||
:global(.spin) {
|
||||
animation: spin 0.9s linear infinite;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
from {
|
||||
transform: rotate(0deg);
|
||||
}
|
||||
to {
|
||||
transform: rotate(360deg);
|
||||
}
|
||||
}
|
||||
|
||||
/* Small floating exit affordance — corner-pinned, low resting
|
||||
opacity so it doesn't sit on the chapter image too aggressively
|
||||
but is still findable without hover. */
|
||||
|
||||
@@ -135,7 +135,7 @@
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>Edit {manga.title} — Mangalord</title>
|
||||
<title>Mangalord | Edit · {manga.title}</title>
|
||||
</svelte:head>
|
||||
|
||||
<h1>Edit manga</h1>
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>Upload chapter — {manga.title} — Mangalord</title>
|
||||
<title>Mangalord | Upload chapter · {manga.title}</title>
|
||||
</svelte:head>
|
||||
|
||||
<nav class="back">
|
||||
|
||||
@@ -35,10 +35,6 @@
|
||||
);
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>Profile — Mangalord</title>
|
||||
</svelte:head>
|
||||
|
||||
<header class="profile-header">
|
||||
<h1>Profile</h1>
|
||||
{#if !session.loaded}
|
||||
|
||||
@@ -184,10 +184,6 @@
|
||||
}
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>Upload — Mangalord</title>
|
||||
</svelte:head>
|
||||
|
||||
<h1>Create manga</h1>
|
||||
|
||||
{#if !session.loaded}
|
||||
|
||||
@@ -21,6 +21,12 @@ export default defineConfig(({ mode }) => {
|
||||
environment: 'jsdom',
|
||||
include: ['src/**/*.test.ts'],
|
||||
globals: false
|
||||
},
|
||||
resolve: {
|
||||
// Use Svelte's browser entry under vitest so component tests can
|
||||
// mount with @testing-library/svelte. The default (server entry)
|
||||
// throws lifecycle_function_unavailable on mount().
|
||||
conditions: mode === 'test' ? ['browser'] : []
|
||||
}
|
||||
};
|
||||
});
|
||||
|
||||
40
tor/entrypoint.sh
Executable file
40
tor/entrypoint.sh
Executable file
@@ -0,0 +1,40 @@
|
||||
#!/bin/sh
|
||||
# Mangalord wrapper around dockurr/tor's tor binary.
|
||||
#
|
||||
# We bypass the image's stock entrypoint for two reasons:
|
||||
# 1. It generates a `ControlPort 9051` line that binds to localhost
|
||||
# only (tor's default), but our backend lives in a separate
|
||||
# container and needs to reach 0.0.0.0:9051.
|
||||
# 2. It then *skips* writing HashedControlPassword whenever the
|
||||
# user's torrc declares a ControlPort, so we can't both bind to
|
||||
# 0.0.0.0 and benefit from its auto-hashing — it's one or the
|
||||
# other. Doing the hashing ourselves is simpler than threading
|
||||
# around its logic.
|
||||
#
|
||||
# This wrapper hashes $PASSWORD with `tor --hash-password`, appends a
|
||||
# `HashedControlPassword` line to a writable copy of /etc/tor/torrc,
|
||||
# then execs tor. Container runs as root (image default); tor binds
|
||||
# 9050/9051 which don't require root and is fine inside a single-
|
||||
# purpose container.
|
||||
|
||||
set -eu
|
||||
|
||||
if [ -z "${PASSWORD:-}" ]; then
|
||||
echo "ERROR: PASSWORD env must be set (the plain string the backend will" >&2
|
||||
echo " send as CRAWLER_TOR_CONTROL_PASSWORD)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# `tor --hash-password` prints the hash on the last line of stdout
|
||||
# (preceded by initialization noise).
|
||||
HASH=$(tor --hash-password "$PASSWORD" 2>/dev/null | tail -n1)
|
||||
if [ -z "$HASH" ]; then
|
||||
echo "ERROR: 'tor --hash-password' produced no output" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# /etc/tor/torrc is bind-mounted read-only, so copy + append.
|
||||
cp /etc/tor/torrc /tmp/torrc
|
||||
printf '\n# Injected by mangalord-entrypoint.sh from $PASSWORD env.\nHashedControlPassword %s\n' "$HASH" >> /tmp/torrc
|
||||
|
||||
exec tor -f /tmp/torrc
|
||||
38
tor/torrc
Normal file
38
tor/torrc
Normal file
@@ -0,0 +1,38 @@
|
||||
# torrc for the Mangalord crawler.
|
||||
#
|
||||
# Mounted into the dockurr/tor container at /etc/tor/torrc. The
|
||||
# crawler talks to this daemon over the internal compose network only:
|
||||
# `expose:` on the tor service surfaces 9050/9051 to sibling
|
||||
# containers, never to the host.
|
||||
|
||||
# SOCKS5 proxy that reqwest and Chromium use. IsolateDestAddr +
|
||||
# IsolateDestPort means each new (destination IP, port) draws a fresh
|
||||
# circuit — so a SIGNAL NEWNYM picks up promptly on the next
|
||||
# navigation instead of having to wait for an existing dirty circuit
|
||||
# to age out.
|
||||
SOCKSPort 0.0.0.0:9050 IsolateDestAddr IsolateDestPort
|
||||
|
||||
# Control port for SIGNAL NEWNYM. We rely on the dockurr/tor
|
||||
# entrypoint to inject `HashedControlPassword <hash>` from its
|
||||
# PASSWORD env var (see docker-compose.yml `tor.environment.PASSWORD`)
|
||||
# via a higher-priority --defaults-torrc. We just need to declare the
|
||||
# port itself here.
|
||||
ControlPort 0.0.0.0:9051
|
||||
|
||||
# Keep circuits dirty for a while so a single chapter (which serial-
|
||||
# fetches all its images through the same SOCKS endpoint) finishes on
|
||||
# one circuit rather than mid-circuit-rotating in a way that looks like
|
||||
# anti-bot evasion to the target. NEWNYM still forces a fresh circuit
|
||||
# immediately when we want one — this is just the idle-rotation knob.
|
||||
MaxCircuitDirtiness 600
|
||||
|
||||
# Drop privileges to the image's `tor` user after binding ports.
|
||||
# Required because /var/lib/tor (the image's DataDirectory volume)
|
||||
# is owned by tor:tor and tor refuses to use a data dir it doesn't
|
||||
# own. Our entrypoint runs as root only so it can call
|
||||
# `tor --hash-password` and write /tmp/torrc.
|
||||
User tor
|
||||
|
||||
# Data + logs.
|
||||
DataDirectory /var/lib/tor
|
||||
Log notice stdout
|
||||
Reference in New Issue
Block a user