Compare commits
40 Commits
feat/incre
...
feat/crawl
| Author | SHA1 | Date | |
|---|---|---|---|
| 2f9037e210 | |||
| e851355f28 | |||
| 2a0cc24c07 | |||
| a615b0aee7 | |||
|
|
0b5f5d1692 | ||
|
|
a2826d6467 | ||
|
|
1eebb90e25 | ||
|
|
030b27754b | ||
|
|
2f47faa11c | ||
|
|
6dd21451a8 | ||
|
|
f6728dc71a | ||
|
|
aa2159ca06 | ||
|
|
b434c9b68d | ||
|
|
cc4ec76d17 | ||
|
|
bf7c9b5c2a | ||
|
|
0b2018ceca | ||
|
|
ab8b7acc34 | ||
|
|
9925f54695 | ||
|
|
eaa5afda50 | ||
|
|
5c04b0532b | ||
|
|
655ea42731 | ||
|
|
70e8a7895c | ||
|
|
8e0b638e3f | ||
|
|
e2bd1462ba | ||
|
|
9f56f283d4 | ||
|
|
33f7e19077 | ||
|
|
c6bb9160e3 | ||
|
|
50763addcf | ||
|
|
766c6eebac | ||
|
|
c686d6eb51 | ||
|
|
dea9b1aaa8 | ||
|
|
f57ca8e45c | ||
|
|
8d34132883 | ||
|
|
c5c1179e9d | ||
|
|
c320eda7cd | ||
|
|
bd9a6bd257 | ||
|
|
ebc1966103 | ||
|
|
e4333631e1 | ||
|
|
e7662d18d6 | ||
|
|
45ce0d8f12 |
48
.env.example
48
.env.example
@@ -1,20 +1,30 @@
|
|||||||
# Copy to .env for `docker compose up --build`. Local-dev runs (cargo run
|
# Copy to .env for `docker compose up --build`. Local-dev runs (cargo run
|
||||||
# / npm run dev) read backend/.env if present, or pick up the variables
|
# / npm run dev) read backend/.env if present, or pick up the variables
|
||||||
# from your shell.
|
# from your shell.
|
||||||
|
#
|
||||||
|
# Production note: COOKIE_SECURE=true (the default below) makes browsers
|
||||||
|
# refuse to send the session cookie over plain HTTP. Run with a TLS-
|
||||||
|
# terminating reverse proxy (Caddy, Traefik, nginx) in front — the
|
||||||
|
# compose file here doesn't ship one. Local/dev runs without HTTPS
|
||||||
|
# should set COOKIE_SECURE=false.
|
||||||
|
|
||||||
# ----- Postgres -----
|
# ----- Postgres -----
|
||||||
# These are read by the Postgres container *and* by DATABASE_URL below;
|
# These are read by the Postgres container *and* by DATABASE_URL below;
|
||||||
# changing them after the first boot won't migrate existing data, so set
|
# changing them after the first boot won't migrate existing data, so set
|
||||||
# them up front for any new deployment.
|
# them up front for any new deployment.
|
||||||
|
#
|
||||||
|
# POSTGRES_PASSWORD is REQUIRED — docker-compose.yml fails fast if it
|
||||||
|
# isn't set in this file, to prevent a deploy without an .env booting
|
||||||
|
# Postgres with a publicly-known credential.
|
||||||
POSTGRES_USER=mangalord
|
POSTGRES_USER=mangalord
|
||||||
POSTGRES_PASSWORD=mangalord
|
POSTGRES_PASSWORD=change-me-to-a-strong-random-string
|
||||||
POSTGRES_DB=mangalord
|
POSTGRES_DB=mangalord
|
||||||
|
|
||||||
# ----- Backend -----
|
# ----- Backend -----
|
||||||
DATABASE_URL=postgres://mangalord:mangalord@postgres:5432/mangalord
|
DATABASE_URL=postgres://mangalord:mangalord@postgres:5432/mangalord
|
||||||
BIND_ADDRESS=0.0.0.0:8080
|
BIND_ADDRESS=0.0.0.0:8080
|
||||||
STORAGE_DIR=/var/lib/mangalord/storage
|
STORAGE_DIR=/var/lib/mangalord/storage
|
||||||
RUST_LOG=info,mangalord=debug
|
RUST_LOG=info,mangalord=debug,chromiumoxide::conn=off,chromiumoxide::handler=off
|
||||||
|
|
||||||
# ----- Auth / cookies -----
|
# ----- Auth / cookies -----
|
||||||
# COOKIE_SECURE controls whether the `Secure` flag is set on the session
|
# COOKIE_SECURE controls whether the `Secure` flag is set on the session
|
||||||
@@ -29,6 +39,13 @@ COOKIE_DOMAIN=
|
|||||||
# get reaped lazily.
|
# get reaped lazily.
|
||||||
SESSION_TTL_DAYS=30
|
SESSION_TTL_DAYS=30
|
||||||
|
|
||||||
|
# ----- Auth brute-force rate limits -----
|
||||||
|
# Token-bucket budget shared across /auth/login, /auth/register, and
|
||||||
|
# /auth/me/password. Set per_sec=0 to disable (e.g. behind a
|
||||||
|
# rate-limiting reverse proxy that already enforces a budget).
|
||||||
|
AUTH_RATE_PER_SEC=5
|
||||||
|
AUTH_RATE_BURST=10
|
||||||
|
|
||||||
# ----- CORS -----
|
# ----- CORS -----
|
||||||
# Comma-separated origins allowed to call the API with credentials.
|
# Comma-separated origins allowed to call the API with credentials.
|
||||||
# Default is empty: same-origin only. Set when frontend and backend live
|
# Default is empty: same-origin only. Set when frontend and backend live
|
||||||
@@ -44,6 +61,28 @@ MAX_REQUEST_BYTES=209715200
|
|||||||
# Default 20 MiB.
|
# Default 20 MiB.
|
||||||
MAX_FILE_BYTES=20971520
|
MAX_FILE_BYTES=20971520
|
||||||
|
|
||||||
|
# ----- Crawler download safety -----
|
||||||
|
# Hosts the crawler is allowed to fetch images/covers from, in addition
|
||||||
|
# to CRAWLER_START_URL's host and CRAWLER_CDN_HOST. Comma-separated.
|
||||||
|
# Defends against SSRF via scraped <img src="http://10.0.0.1/...">.
|
||||||
|
CRAWLER_DOWNLOAD_ALLOWLIST=
|
||||||
|
# Bypass the host allowlist entirely. Intended for sources that shard
|
||||||
|
# images across numbered CDN subdomains (cdn1/cdn2/…) where enumerating
|
||||||
|
# every host upfront is impractical. The private-IP / localhost / non-
|
||||||
|
# http(s) scheme defenses STAY ON — a scraped <img src="http://10.0.0.1/">
|
||||||
|
# is still refused with this flag set.
|
||||||
|
CRAWLER_ALLOW_ANY_HOST=false
|
||||||
|
# Hard cap on a single image body. Default 32 MiB.
|
||||||
|
CRAWLER_MAX_IMAGE_BYTES=33554432
|
||||||
|
# Path to a system Chromium binary. When set, the crawler skips the
|
||||||
|
# bundled-fetcher download. Required on platforms without a usable
|
||||||
|
# upstream Chromium build (notably Linux_arm64 / Raspberry Pi). On
|
||||||
|
# Debian: /usr/bin/chromium-headless-shell or /usr/bin/chromium. On
|
||||||
|
# Ubuntu the package is chromium-browser (different path). Pair with
|
||||||
|
# `docker compose build --build-arg INSTALL_CHROMIUM=true backend` so
|
||||||
|
# the image actually contains the binary.
|
||||||
|
CRAWLER_CHROMIUM_BINARY=
|
||||||
|
|
||||||
# ----- Frontend -----
|
# ----- Frontend -----
|
||||||
# The frontend container runs SvelteKit's Node adapter on :3000 and
|
# The frontend container runs SvelteKit's Node adapter on :3000 and
|
||||||
# proxies /api/* to BACKEND_URL via src/hooks.server.ts. In compose the
|
# proxies /api/* to BACKEND_URL via src/hooks.server.ts. In compose the
|
||||||
@@ -51,3 +90,8 @@ MAX_FILE_BYTES=20971520
|
|||||||
# internal docker network. Override only if you're running the
|
# internal docker network. Override only if you're running the
|
||||||
# frontend container against a backend somewhere else.
|
# frontend container against a backend somewhere else.
|
||||||
BACKEND_URL=http://backend:8080
|
BACKEND_URL=http://backend:8080
|
||||||
|
# Per-request wall-clock cap for the /api/* reverse proxy (milliseconds).
|
||||||
|
# Default 300000 (5 min) covers a typical 200 MiB chapter upload over
|
||||||
|
# 25 Mbps; raise for users on slower upstream links or lower if a
|
||||||
|
# tighter front proxy already bounds the request lifetime.
|
||||||
|
BACKEND_PROXY_TIMEOUT_MS=300000
|
||||||
|
|||||||
71
.gitea/README.md
Normal file
71
.gitea/README.md
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
# Gitea Actions
|
||||||
|
|
||||||
|
The [`deploy`](workflows/deploy.yml) workflow runs on every push to `main`
|
||||||
|
(and via manual `workflow_dispatch`). It tests, builds, pushes the images
|
||||||
|
to a private registry, and rolls the stack over by SSH on the target host.
|
||||||
|
|
||||||
|
## Required secrets
|
||||||
|
|
||||||
|
Set under *Repo Settings → Actions → Secrets*:
|
||||||
|
|
||||||
|
| Name | Example | Purpose |
|
||||||
|
| -------------------- | ------------------------ | ---------------------------------------------------------------- |
|
||||||
|
| `REGISTRY_URL` | `registry.example.com` | Registry host. No scheme, no trailing slash. |
|
||||||
|
| `REGISTRY_USERNAME` | `mangalord-ci` | `docker login` user. |
|
||||||
|
| `REGISTRY_PASSWORD` | `<token>` | `docker login` token/password. |
|
||||||
|
| `SSH_HOST` | `mangalord.example.com` | Deploy target hostname/IP. |
|
||||||
|
| `SSH_USER` | `deploy` | SSH user on the target (must be in the `docker` group). |
|
||||||
|
| `SSH_PRIVATE_KEY` | `-----BEGIN OPENSSH...` | Private key authorised in the target user's `authorized_keys`. |
|
||||||
|
| `SSH_PORT` | `22` | Optional. Defaults to `22` if unset. |
|
||||||
|
|
||||||
|
## Required variables
|
||||||
|
|
||||||
|
Set under *Repo Settings → Actions → Variables* (not secrets — they appear
|
||||||
|
in logs):
|
||||||
|
|
||||||
|
| Name | Example | Purpose |
|
||||||
|
| ------------- | ------------------------ | ---------------------------------------------------------------------- |
|
||||||
|
| `DEPLOY_PATH` | `/srv/mangalord` | Directory on target holding `docker-compose.yml`, `.env`, and the prod overlay. |
|
||||||
|
|
||||||
|
## One-time host setup
|
||||||
|
|
||||||
|
The workflow assumes the deploy target already has:
|
||||||
|
|
||||||
|
1. Docker + Docker Compose v2 installed and the `SSH_USER` in the `docker` group.
|
||||||
|
2. `$DEPLOY_PATH/docker-compose.yml` (copy of the repo's [docker-compose.yml](../docker-compose.yml)).
|
||||||
|
3. `$DEPLOY_PATH/docker-compose.prod.yml` (copy of the repo's [docker-compose.prod.yml](../docker-compose.prod.yml)).
|
||||||
|
4. `$DEPLOY_PATH/.env` populated from [.env.example](../.env.example) with production values (real `POSTGRES_PASSWORD`, `COOKIE_SECURE=true`, etc.).
|
||||||
|
|
||||||
|
Bootstrap once:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh deploy@mangalord.example.com
|
||||||
|
sudo mkdir -p /srv/mangalord && sudo chown deploy:deploy /srv/mangalord
|
||||||
|
cd /srv/mangalord
|
||||||
|
# place docker-compose.yml, docker-compose.prod.yml, and .env here
|
||||||
|
```
|
||||||
|
|
||||||
|
The first workflow run will pull the images, bring the stack up, and run
|
||||||
|
the embedded migrations on startup.
|
||||||
|
|
||||||
|
## Image tags
|
||||||
|
|
||||||
|
Every push produces three tags per image:
|
||||||
|
|
||||||
|
- `mangalord-{backend,frontend}:latest`
|
||||||
|
- `mangalord-{backend,frontend}:<git-sha>` — used by the deploy job; lets
|
||||||
|
you pin a deploy to a specific commit
|
||||||
|
- `mangalord-{backend,frontend}:<version>` — the version from
|
||||||
|
[backend/Cargo.toml](../backend/Cargo.toml) (verified in lockstep with
|
||||||
|
[frontend/package.json](../frontend/package.json))
|
||||||
|
|
||||||
|
## Rollback
|
||||||
|
|
||||||
|
SSH to the target, set `IMAGE_TAG` to a previous commit SHA, and re-up:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /srv/mangalord
|
||||||
|
export REGISTRY_URL=registry.example.com
|
||||||
|
export IMAGE_TAG=<previous-sha>
|
||||||
|
docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d
|
||||||
|
```
|
||||||
154
.gitea/workflows/deploy.yml
Normal file
154
.gitea/workflows/deploy.yml
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
name: deploy
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
pull_request:
|
||||||
|
branches: [main]
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test-backend:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
container:
|
||||||
|
image: rust:1-slim
|
||||||
|
services:
|
||||||
|
postgres:
|
||||||
|
image: postgres:16-alpine
|
||||||
|
env:
|
||||||
|
POSTGRES_USER: mangalord
|
||||||
|
POSTGRES_PASSWORD: mangalord
|
||||||
|
POSTGRES_DB: mangalord
|
||||||
|
options: >-
|
||||||
|
--health-cmd "pg_isready -U mangalord"
|
||||||
|
--health-interval 5s
|
||||||
|
--health-timeout 5s
|
||||||
|
--health-retries 10
|
||||||
|
env:
|
||||||
|
DATABASE_URL: postgres://mangalord:mangalord@postgres:5432/mangalord
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- name: Install build deps
|
||||||
|
run: |
|
||||||
|
apt-get update
|
||||||
|
apt-get install -y --no-install-recommends pkg-config libssl-dev ca-certificates
|
||||||
|
- name: Cache cargo registry and target
|
||||||
|
uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/.cargo/registry
|
||||||
|
~/.cargo/git
|
||||||
|
backend/target
|
||||||
|
key: cargo-${{ runner.os }}-${{ hashFiles('backend/Cargo.lock') }}
|
||||||
|
restore-keys: |
|
||||||
|
cargo-${{ runner.os }}-
|
||||||
|
- name: cargo test
|
||||||
|
working-directory: backend
|
||||||
|
run: cargo test --locked
|
||||||
|
|
||||||
|
test-frontend:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: '22'
|
||||||
|
cache: npm
|
||||||
|
cache-dependency-path: frontend/package-lock.json
|
||||||
|
- name: npm ci
|
||||||
|
working-directory: frontend
|
||||||
|
run: npm ci
|
||||||
|
- name: vitest
|
||||||
|
working-directory: frontend
|
||||||
|
run: npm test
|
||||||
|
|
||||||
|
build-and-push:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: [test-backend, test-frontend]
|
||||||
|
# PRs only run the test jobs; build + deploy are reserved for
|
||||||
|
# post-merge pushes to main. Without this gate every PR would push
|
||||||
|
# a tagged image to the registry and SSH-deploy to prod.
|
||||||
|
if: github.event_name != 'pull_request'
|
||||||
|
outputs:
|
||||||
|
image_tag: ${{ steps.meta.outputs.image_tag }}
|
||||||
|
version: ${{ steps.meta.outputs.version }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Resolve image tags
|
||||||
|
id: meta
|
||||||
|
run: |
|
||||||
|
version="$(grep -m1 '^version' backend/Cargo.toml | cut -d'"' -f2)"
|
||||||
|
frontend_version="$(grep -m1 '"version"' frontend/package.json | cut -d'"' -f4)"
|
||||||
|
if [ "$version" != "$frontend_version" ]; then
|
||||||
|
echo "Version mismatch: backend=$version frontend=$frontend_version" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "image_tag=${GITHUB_SHA}" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "version=${version}" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
|
- uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: docker login
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ${{ secrets.REGISTRY_URL }}
|
||||||
|
username: ${{ secrets.REGISTRY_USERNAME }}
|
||||||
|
password: ${{ secrets.REGISTRY_PASSWORD }}
|
||||||
|
|
||||||
|
- name: Build & push backend
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: ./backend
|
||||||
|
push: true
|
||||||
|
tags: |
|
||||||
|
${{ secrets.REGISTRY_URL }}/mangalord-backend:latest
|
||||||
|
${{ secrets.REGISTRY_URL }}/mangalord-backend:${{ steps.meta.outputs.image_tag }}
|
||||||
|
${{ secrets.REGISTRY_URL }}/mangalord-backend:${{ steps.meta.outputs.version }}
|
||||||
|
cache-from: type=gha,scope=backend
|
||||||
|
cache-to: type=gha,mode=max,scope=backend
|
||||||
|
|
||||||
|
- name: Build & push frontend
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: ./frontend
|
||||||
|
push: true
|
||||||
|
tags: |
|
||||||
|
${{ secrets.REGISTRY_URL }}/mangalord-frontend:latest
|
||||||
|
${{ secrets.REGISTRY_URL }}/mangalord-frontend:${{ steps.meta.outputs.image_tag }}
|
||||||
|
${{ secrets.REGISTRY_URL }}/mangalord-frontend:${{ steps.meta.outputs.version }}
|
||||||
|
cache-from: type=gha,scope=frontend
|
||||||
|
cache-to: type=gha,mode=max,scope=frontend
|
||||||
|
|
||||||
|
deploy:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build-and-push
|
||||||
|
if: github.event_name != 'pull_request'
|
||||||
|
# Single-host deploy: the runner lives on the same box as the stack, so we
|
||||||
|
# drive the host docker daemon directly (act_runner shares its socket via
|
||||||
|
# `docker_host: "-"`) instead of SSHing out. The compose dir is bind-mounted
|
||||||
|
# at its REAL host path so compose's relative bind-mounts (./mangalord/...,
|
||||||
|
# ./Caddyfile) resolve; this requires `/mnt/ssd/docker-data` in the runner's
|
||||||
|
# container.valid_volumes. The central compose references the images as
|
||||||
|
# registry.mc02.dev/mangalord-*:${MANGALORD_TAG:-latest}, so we only pull
|
||||||
|
# and recreate the two mangalord services at the freshly built SHA.
|
||||||
|
container:
|
||||||
|
image: docker:cli
|
||||||
|
volumes:
|
||||||
|
- /mnt/ssd/docker-data:/mnt/ssd/docker-data
|
||||||
|
steps:
|
||||||
|
- name: Deploy to the local stack
|
||||||
|
working-directory: /mnt/ssd/docker-data
|
||||||
|
env:
|
||||||
|
REGISTRY_URL: ${{ secrets.REGISTRY_URL }}
|
||||||
|
REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }}
|
||||||
|
REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }}
|
||||||
|
IMAGE_TAG: ${{ needs.build-and-push.outputs.image_tag }}
|
||||||
|
run: |
|
||||||
|
set -eu
|
||||||
|
echo "$REGISTRY_PASSWORD" | docker login "$REGISTRY_URL" -u "$REGISTRY_USERNAME" --password-stdin
|
||||||
|
export MANGALORD_TAG="$IMAGE_TAG"
|
||||||
|
docker compose pull mangalord-backend mangalord-frontend
|
||||||
|
docker compose up -d mangalord-backend mangalord-frontend
|
||||||
|
docker image prune -f
|
||||||
|
docker logout "$REGISTRY_URL"
|
||||||
139
backend/Cargo.lock
generated
139
backend/Cargo.lock
generated
@@ -1202,7 +1202,7 @@ dependencies = [
|
|||||||
"js-sys",
|
"js-sys",
|
||||||
"log",
|
"log",
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
"windows-core",
|
"windows-core 0.62.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mangalord"
|
name = "mangalord"
|
||||||
version = "0.33.0"
|
version = "0.45.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"argon2",
|
"argon2",
|
||||||
@@ -1488,6 +1488,7 @@ dependencies = [
|
|||||||
"http-body-util",
|
"http-body-util",
|
||||||
"infer",
|
"infer",
|
||||||
"mime",
|
"mime",
|
||||||
|
"nix 0.29.0",
|
||||||
"rand 0.8.6",
|
"rand 0.8.6",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"scraper",
|
"scraper",
|
||||||
@@ -1496,6 +1497,7 @@ dependencies = [
|
|||||||
"sha2",
|
"sha2",
|
||||||
"sqlx",
|
"sqlx",
|
||||||
"subtle",
|
"subtle",
|
||||||
|
"sysinfo",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"thiserror 1.0.69",
|
"thiserror 1.0.69",
|
||||||
"time",
|
"time",
|
||||||
@@ -1603,6 +1605,18 @@ version = "1.0.6"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
|
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nix"
|
||||||
|
version = "0.29.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"cfg-if",
|
||||||
|
"cfg_aliases",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nix"
|
name = "nix"
|
||||||
version = "0.31.3"
|
version = "0.31.3"
|
||||||
@@ -1615,6 +1629,15 @@ dependencies = [
|
|||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ntapi"
|
||||||
|
version = "0.4.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae"
|
||||||
|
dependencies = [
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nu-ansi-term"
|
name = "nu-ansi-term"
|
||||||
version = "0.50.3"
|
version = "0.50.3"
|
||||||
@@ -1855,7 +1878,7 @@ checksum = "9cf20a545b305cf1da722b236b5155c9bb35f1d5ceb28c048bd96ca842f41b5b"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"android_system_properties",
|
"android_system_properties",
|
||||||
"log",
|
"log",
|
||||||
"nix",
|
"nix 0.31.3",
|
||||||
"objc2",
|
"objc2",
|
||||||
"objc2-foundation",
|
"objc2-foundation",
|
||||||
"objc2-ui-kit",
|
"objc2-ui-kit",
|
||||||
@@ -2324,6 +2347,7 @@ dependencies = [
|
|||||||
"cookie",
|
"cookie",
|
||||||
"cookie_store",
|
"cookie_store",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
|
"futures-util",
|
||||||
"http",
|
"http",
|
||||||
"http-body",
|
"http-body",
|
||||||
"http-body-util",
|
"http-body-util",
|
||||||
@@ -2343,12 +2367,14 @@ dependencies = [
|
|||||||
"sync_wrapper",
|
"sync_wrapper",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-rustls",
|
"tokio-rustls",
|
||||||
|
"tokio-util",
|
||||||
"tower",
|
"tower",
|
||||||
"tower-http",
|
"tower-http",
|
||||||
"tower-service",
|
"tower-service",
|
||||||
"url",
|
"url",
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
"wasm-bindgen-futures",
|
"wasm-bindgen-futures",
|
||||||
|
"wasm-streams",
|
||||||
"web-sys",
|
"web-sys",
|
||||||
"webpki-roots",
|
"webpki-roots",
|
||||||
]
|
]
|
||||||
@@ -2982,6 +3008,19 @@ dependencies = [
|
|||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sysinfo"
|
||||||
|
version = "0.32.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4c33cd241af0f2e9e3b5c32163b873b29956890b5342e6745b917ce9d490f4af"
|
||||||
|
dependencies = [
|
||||||
|
"core-foundation-sys",
|
||||||
|
"libc",
|
||||||
|
"memchr",
|
||||||
|
"ntapi",
|
||||||
|
"windows",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tempfile"
|
name = "tempfile"
|
||||||
version = "3.27.0"
|
version = "3.27.0"
|
||||||
@@ -3527,6 +3566,19 @@ dependencies = [
|
|||||||
"wasmparser",
|
"wasmparser",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasm-streams"
|
||||||
|
version = "0.4.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
|
||||||
|
dependencies = [
|
||||||
|
"futures-util",
|
||||||
|
"js-sys",
|
||||||
|
"wasm-bindgen",
|
||||||
|
"wasm-bindgen-futures",
|
||||||
|
"web-sys",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wasmparser"
|
name = "wasmparser"
|
||||||
version = "0.244.0"
|
version = "0.244.0"
|
||||||
@@ -3590,19 +3642,74 @@ dependencies = [
|
|||||||
"wasite",
|
"wasite",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi"
|
||||||
|
version = "0.3.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||||
|
dependencies = [
|
||||||
|
"winapi-i686-pc-windows-gnu",
|
||||||
|
"winapi-x86_64-pc-windows-gnu",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-i686-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-x86_64-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows"
|
||||||
|
version = "0.57.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "12342cb4d8e3b046f3d80effd474a7a02447231330ef77d71daa6fbc40681143"
|
||||||
|
dependencies = [
|
||||||
|
"windows-core 0.57.0",
|
||||||
|
"windows-targets 0.52.6",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-core"
|
||||||
|
version = "0.57.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d2ed2439a290666cd67ecce2b0ffaad89c2a56b976b736e6ece670297897832d"
|
||||||
|
dependencies = [
|
||||||
|
"windows-implement 0.57.0",
|
||||||
|
"windows-interface 0.57.0",
|
||||||
|
"windows-result 0.1.2",
|
||||||
|
"windows-targets 0.52.6",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-core"
|
name = "windows-core"
|
||||||
version = "0.62.2"
|
version = "0.62.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
|
checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"windows-implement",
|
"windows-implement 0.60.2",
|
||||||
"windows-interface",
|
"windows-interface 0.59.3",
|
||||||
"windows-link",
|
"windows-link",
|
||||||
"windows-result",
|
"windows-result 0.4.1",
|
||||||
"windows-strings",
|
"windows-strings",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-implement"
|
||||||
|
version = "0.57.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-implement"
|
name = "windows-implement"
|
||||||
version = "0.60.2"
|
version = "0.60.2"
|
||||||
@@ -3614,6 +3721,17 @@ dependencies = [
|
|||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-interface"
|
||||||
|
version = "0.57.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-interface"
|
name = "windows-interface"
|
||||||
version = "0.59.3"
|
version = "0.59.3"
|
||||||
@@ -3631,6 +3749,15 @@ version = "0.2.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-result"
|
||||||
|
version = "0.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8"
|
||||||
|
dependencies = [
|
||||||
|
"windows-targets 0.52.6",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-result"
|
name = "windows-result"
|
||||||
version = "0.4.1"
|
version = "0.4.1"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "mangalord"
|
name = "mangalord"
|
||||||
version = "0.33.0"
|
version = "0.45.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
default-run = "mangalord"
|
default-run = "mangalord"
|
||||||
|
|
||||||
@@ -45,8 +45,10 @@ futures-core = "0.3"
|
|||||||
futures-util = "0.3"
|
futures-util = "0.3"
|
||||||
bytes = "1"
|
bytes = "1"
|
||||||
chromiumoxide = { version = "0.7", features = ["tokio-runtime", "_fetcher-rusttls-tokio"], default-features = false }
|
chromiumoxide = { version = "0.7", features = ["tokio-runtime", "_fetcher-rusttls-tokio"], default-features = false }
|
||||||
|
sysinfo = { version = "0.32", default-features = false, features = ["system"] }
|
||||||
|
nix = { version = "0.29", features = ["fs"] }
|
||||||
scraper = "0.20"
|
scraper = "0.20"
|
||||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "socks", "cookies"] }
|
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "socks", "cookies", "stream"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tempfile = "3"
|
tempfile = "3"
|
||||||
|
|||||||
@@ -10,7 +10,8 @@ RUN apt-get update \
|
|||||||
# exact crate versions CI tested. Without Cargo.lock + the flag, cargo
|
# exact crate versions CI tested. Without Cargo.lock + the flag, cargo
|
||||||
# would silently resolve fresh on every image build.
|
# would silently resolve fresh on every image build.
|
||||||
COPY Cargo.toml Cargo.lock ./
|
COPY Cargo.toml Cargo.lock ./
|
||||||
RUN mkdir src && echo "fn main() {}" > src/main.rs && echo "" > src/lib.rs \
|
RUN mkdir -p src/bin && echo "fn main() {}" > src/main.rs && echo "" > src/lib.rs \
|
||||||
|
&& echo "fn main() {}" > src/bin/crawler.rs \
|
||||||
&& cargo build --locked --release \
|
&& cargo build --locked --release \
|
||||||
&& rm -rf src
|
&& rm -rf src
|
||||||
|
|
||||||
@@ -18,13 +19,68 @@ COPY src ./src
|
|||||||
COPY migrations ./migrations
|
COPY migrations ./migrations
|
||||||
RUN touch src/main.rs src/lib.rs && cargo build --locked --release
|
RUN touch src/main.rs src/lib.rs && cargo build --locked --release
|
||||||
|
|
||||||
FROM debian:bookworm-slim
|
FROM debian:trixie-slim
|
||||||
|
# Runtime base must match the builder's Debian release: `rust:1-slim` tracks
|
||||||
|
# trixie (glibc 2.41), so a bookworm runtime (glibc 2.36) can't run the
|
||||||
|
# binary ("GLIBC_2.39 not found"). Keep these two in lockstep on bumps.
|
||||||
|
# `curl` is for the container HEALTHCHECK; `ca-certificates` is for
|
||||||
|
# outbound HTTPS (crawler covers/pages).
|
||||||
|
#
|
||||||
|
# INSTALL_CHROMIUM is an opt-in for deployments that can't use the
|
||||||
|
# chromiumoxide fetcher path (notably Linux_arm64 / Raspberry Pi, where
|
||||||
|
# the upstream snapshot bucket has no usable build). When `true`, adds
|
||||||
|
# Debian's apt-packaged headless chromium plus a baseline font set —
|
||||||
|
# pair with `CRAWLER_CHROMIUM_BINARY=/usr/bin/chromium-headless-shell`
|
||||||
|
# at runtime so the launcher uses it. Default `false` keeps cloud/x86
|
||||||
|
# images slim.
|
||||||
|
#
|
||||||
|
# Build the Pi image with:
|
||||||
|
# docker compose build --build-arg INSTALL_CHROMIUM=true backend
|
||||||
|
ARG INSTALL_CHROMIUM=false
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
&& apt-get install -y --no-install-recommends ca-certificates \
|
&& apt-get install -y --no-install-recommends ca-certificates curl \
|
||||||
|
&& if [ "$INSTALL_CHROMIUM" = "true" ]; then \
|
||||||
|
apt-get install -y --no-install-recommends chromium-headless-shell fonts-liberation; \
|
||||||
|
fi \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Non-root runtime user. The API binary doesn't need any root
|
||||||
|
# privilege; the crawler daemon's Chromium launcher uses --no-sandbox
|
||||||
|
# precisely because user-namespace sandboxing is fragile, so dropping
|
||||||
|
# privileges costs nothing operationally and shrinks the blast radius
|
||||||
|
# of any RCE.
|
||||||
|
ARG APP_UID=10001
|
||||||
|
ARG APP_GID=10001
|
||||||
|
RUN groupadd --system --gid ${APP_GID} app \
|
||||||
|
&& useradd --system --uid ${APP_UID} --gid app --home-dir /home/app --create-home --shell /usr/sbin/nologin app
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY --from=builder /app/target/release/mangalord /usr/local/bin/mangalord
|
COPY --from=builder /app/target/release/mangalord /usr/local/bin/mangalord
|
||||||
COPY --from=builder /app/migrations /app/migrations
|
COPY --from=builder /app/migrations /app/migrations
|
||||||
|
|
||||||
ENV STORAGE_DIR=/var/lib/mangalord/storage
|
ENV STORAGE_DIR=/var/lib/mangalord/storage
|
||||||
|
# Pre-create the storage dir so the entrypoint doesn't need to
|
||||||
|
# mkdir-as-root and so the named volume mount inherits the right
|
||||||
|
# ownership.
|
||||||
|
#
|
||||||
|
# UPGRADE NOTE for operators: if you're moving from an older image
|
||||||
|
# that ran as root, the existing `storage-data` volume has files owned
|
||||||
|
# by UID 0 and the new UID-10001 user can't write them. Run once
|
||||||
|
# before the upgrade:
|
||||||
|
# docker compose run --rm --user 0 backend \
|
||||||
|
# chown -R 10001:10001 /var/lib/mangalord/storage
|
||||||
|
# (Postgres is unaffected — that image's `postgres` user UID hasn't
|
||||||
|
# changed.)
|
||||||
|
RUN mkdir -p ${STORAGE_DIR} \
|
||||||
|
&& chown -R app:app ${STORAGE_DIR} /app /home/app
|
||||||
|
|
||||||
|
USER app
|
||||||
EXPOSE 8080
|
EXPOSE 8080
|
||||||
|
|
||||||
|
# `--start-period` is generous because first boot runs sqlx::migrate
|
||||||
|
# against postgres which can take a few seconds; subsequent restarts
|
||||||
|
# are sub-second.
|
||||||
|
HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 \
|
||||||
|
CMD curl -fsS http://localhost:8080/api/v1/health > /dev/null || exit 1
|
||||||
|
|
||||||
CMD ["mangalord"]
|
CMD ["mangalord"]
|
||||||
|
|||||||
15
backend/migrations/0016_crawler_jobs_drop_failed_state.sql
Normal file
15
backend/migrations/0016_crawler_jobs_drop_failed_state.sql
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
-- The original 0012 partial index covers `state IN ('pending','failed')`,
|
||||||
|
-- but `ack_failed` in src/crawler/jobs.rs only writes `dead` or
|
||||||
|
-- `pending` — `failed` is never set. The index branch on `failed`
|
||||||
|
-- never matches any row, so it's dead weight on every write.
|
||||||
|
--
|
||||||
|
-- Drop and recreate the index without the dead branch. The CHECK
|
||||||
|
-- constraint on `state` still allows `'failed'` so a future migration
|
||||||
|
-- can adopt that terminal-but-retryable state without a second
|
||||||
|
-- schema change.
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS crawler_jobs_ready_idx;
|
||||||
|
|
||||||
|
CREATE INDEX crawler_jobs_ready_idx
|
||||||
|
ON crawler_jobs (scheduled_at)
|
||||||
|
WHERE state = 'pending';
|
||||||
20
backend/migrations/0017_chapter_sources_per_manga.sql
Normal file
20
backend/migrations/0017_chapter_sources_per_manga.sql
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
-- chapter_sources: drop the global (source_id, source_chapter_key) PK
|
||||||
|
-- and rekey on (source_id, chapter_id).
|
||||||
|
--
|
||||||
|
-- The old PK assumed chapter slugs are unique per source. Sources whose
|
||||||
|
-- chapter naming is per-manga (chapter-1, chapter-2, ...) instead of per-
|
||||||
|
-- catalog (br_chapter-379272 with a global counter) would collide on the
|
||||||
|
-- second manga: the INSERT would conflict on (source_id, "chapter-1") and
|
||||||
|
-- the lookup would attribute the row to the first manga's chapter_id.
|
||||||
|
--
|
||||||
|
-- The new key is the natural identity of a source attachment: "this source
|
||||||
|
-- has this chapter". An (source_id, source_chapter_key) index preserves
|
||||||
|
-- the lookup path (find existing source row by source's identifier) but
|
||||||
|
-- no longer enforces uniqueness — the application combines it with the
|
||||||
|
-- chapters table's manga_id to scope the lookup per-manga.
|
||||||
|
|
||||||
|
ALTER TABLE chapter_sources DROP CONSTRAINT chapter_sources_pkey;
|
||||||
|
ALTER TABLE chapter_sources ADD PRIMARY KEY (source_id, chapter_id);
|
||||||
|
|
||||||
|
CREATE INDEX chapter_sources_source_key_idx
|
||||||
|
ON chapter_sources (source_id, source_chapter_key);
|
||||||
5
backend/migrations/0018_admin_role.sql
Normal file
5
backend/migrations/0018_admin_role.sql
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
-- Admin role flag on users. Booted from ADMIN_USERNAME / ADMIN_PASSWORD env at
|
||||||
|
-- startup (see app::build). Demotion is instant: the RequireAdmin extractor
|
||||||
|
-- re-reads the user row every request, so flipping this column takes effect on
|
||||||
|
-- the next call without a session purge.
|
||||||
|
ALTER TABLE users ADD COLUMN is_admin BOOLEAN NOT NULL DEFAULT false;
|
||||||
20
backend/migrations/0019_admin_audit.sql
Normal file
20
backend/migrations/0019_admin_audit.sql
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
-- Admin audit log. Written from inside the same transaction as the action
|
||||||
|
-- it records, so a failed COMMIT also rolls back the audit row — the log
|
||||||
|
-- never claims an action happened that didn't.
|
||||||
|
--
|
||||||
|
-- `actor_user_id` is ON DELETE SET NULL so audit rows outlive a deleted
|
||||||
|
-- admin (the answer to "who promoted Bob to admin?" survives even after
|
||||||
|
-- Alice's account is removed). `target_id` is intentionally not a FK
|
||||||
|
-- because future audit kinds may target non-user rows (manga, source,
|
||||||
|
-- etc.) and a single typed FK can't express that.
|
||||||
|
CREATE TABLE admin_audit (
|
||||||
|
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
actor_user_id uuid REFERENCES users(id) ON DELETE SET NULL,
|
||||||
|
action text NOT NULL,
|
||||||
|
target_kind text NOT NULL,
|
||||||
|
target_id uuid,
|
||||||
|
payload jsonb NOT NULL DEFAULT '{}'::jsonb,
|
||||||
|
at timestamptz NOT NULL DEFAULT now()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX admin_audit_at_idx ON admin_audit (at DESC);
|
||||||
14
backend/migrations/0020_admin_jobs_payload_index.sql
Normal file
14
backend/migrations/0020_admin_jobs_payload_index.sql
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
-- Per-manga sync-state derivation joins crawler_jobs to manga_sources via
|
||||||
|
-- (payload->>'source_id', payload->>'source_manga_key') for the
|
||||||
|
-- `sync_manga` job kind (whose payload doesn't carry a manga_id directly).
|
||||||
|
-- Without this index the join falls back to a seqscan of crawler_jobs on
|
||||||
|
-- every admin manga listing — a noticeable cost as the job table grows
|
||||||
|
-- with the daily metadata pass.
|
||||||
|
--
|
||||||
|
-- Partial on `state IN ('pending','running')` so it covers only in-flight
|
||||||
|
-- jobs (the bulk of the table is done/dead and irrelevant to "is this
|
||||||
|
-- manga being synced right now").
|
||||||
|
CREATE INDEX crawler_jobs_sync_manga_key_idx
|
||||||
|
ON crawler_jobs ((payload->>'source_manga_key'))
|
||||||
|
WHERE state IN ('pending', 'running')
|
||||||
|
AND payload->>'kind' = 'sync_manga';
|
||||||
110
backend/src/api/admin/mangas.rs
Normal file
110
backend/src/api/admin/mangas.rs
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
//! Admin manga/chapter overview with derived sync state.
|
||||||
|
//!
|
||||||
|
//! Sync state comes from `repo::admin_view`, which joins the manga /
|
||||||
|
//! chapter tables with the crawler signals at query time — there is no
|
||||||
|
//! persisted sync_state column. See [`repo::admin_view`] for the
|
||||||
|
//! derivation priority order.
|
||||||
|
|
||||||
|
use axum::extract::{Path, Query, State};
|
||||||
|
use axum::routing::get;
|
||||||
|
use axum::{Json, Router};
|
||||||
|
use serde::Deserialize;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::api::pagination::PagedResponse;
|
||||||
|
use crate::app::AppState;
|
||||||
|
use crate::auth::extractor::RequireAdmin;
|
||||||
|
use crate::domain::MangaSyncState;
|
||||||
|
use crate::error::{AppError, AppResult};
|
||||||
|
use crate::repo;
|
||||||
|
use crate::repo::admin_view::{AdminChapterRow, AdminMangaRow};
|
||||||
|
|
||||||
|
pub fn routes() -> Router<AppState> {
|
||||||
|
Router::new()
|
||||||
|
.route("/admin/mangas", get(list_mangas))
|
||||||
|
.route("/admin/mangas/:id/chapters", get(list_chapters))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, Default)]
|
||||||
|
pub struct ListChaptersParams {
|
||||||
|
#[serde(default = "default_chapter_limit")]
|
||||||
|
pub limit: i64,
|
||||||
|
#[serde(default)]
|
||||||
|
pub offset: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_chapter_limit() -> i64 {
|
||||||
|
200
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, Default)]
|
||||||
|
pub struct ListMangasParams {
|
||||||
|
#[serde(default)]
|
||||||
|
pub search: Option<String>,
|
||||||
|
/// `in_progress` | `dropped` | `synced`. Unrecognised values are a 400.
|
||||||
|
#[serde(default)]
|
||||||
|
pub sync_state: Option<String>,
|
||||||
|
#[serde(default = "default_limit")]
|
||||||
|
pub limit: i64,
|
||||||
|
#[serde(default)]
|
||||||
|
pub offset: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_limit() -> i64 {
|
||||||
|
50
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_mangas(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
_admin: RequireAdmin,
|
||||||
|
Query(params): Query<ListMangasParams>,
|
||||||
|
) -> AppResult<Json<PagedResponse<AdminMangaRow>>> {
|
||||||
|
let limit = params.limit.clamp(1, 200);
|
||||||
|
let offset = params.offset.max(0);
|
||||||
|
|
||||||
|
let sync_state = match params.sync_state.as_deref() {
|
||||||
|
None | Some("") => None,
|
||||||
|
Some("in_progress") => Some(MangaSyncState::InProgress),
|
||||||
|
Some("dropped") => Some(MangaSyncState::Dropped),
|
||||||
|
Some("synced") => Some(MangaSyncState::Synced),
|
||||||
|
Some(other) => {
|
||||||
|
return Err(AppError::InvalidInput(format!(
|
||||||
|
"sync_state must be one of in_progress|dropped|synced (got {other:?})"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let q = repo::admin_view::ListAdminMangasQuery {
|
||||||
|
search: params.search.filter(|s| !s.trim().is_empty()),
|
||||||
|
sync_state,
|
||||||
|
limit,
|
||||||
|
offset,
|
||||||
|
};
|
||||||
|
let (items, total) = repo::admin_view::list_mangas_with_sync_state(&state.db, &q).await?;
|
||||||
|
Ok(Json(PagedResponse::with_total(items, limit, offset, total)))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_chapters(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
_admin: RequireAdmin,
|
||||||
|
Path(manga_id): Path<Uuid>,
|
||||||
|
Query(params): Query<ListChaptersParams>,
|
||||||
|
) -> AppResult<Json<PagedResponse<AdminChapterRow>>> {
|
||||||
|
// Explicit existence check so a typo / deleted manga returns 404
|
||||||
|
// rather than a misleading "no chapters" 200.
|
||||||
|
if !repo::manga::exists(&state.db, manga_id).await? {
|
||||||
|
return Err(AppError::NotFound);
|
||||||
|
}
|
||||||
|
// Cap at 500 to bound the per-row scalar-subquery cost on
|
||||||
|
// long-runners with thousands of chapters; default 200 covers
|
||||||
|
// typical browsing without paging round-trips.
|
||||||
|
let limit = params.limit.clamp(1, 500);
|
||||||
|
let offset = params.offset.max(0);
|
||||||
|
let q = repo::admin_view::ListAdminChaptersQuery {
|
||||||
|
manga_id,
|
||||||
|
limit,
|
||||||
|
offset,
|
||||||
|
};
|
||||||
|
let (items, total) = repo::admin_view::list_chapters_with_sync_state(&state.db, &q).await?;
|
||||||
|
Ok(Json(PagedResponse::with_total(items, limit, offset, total)))
|
||||||
|
}
|
||||||
20
backend/src/api/admin/mod.rs
Normal file
20
backend/src/api/admin/mod.rs
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
//! Admin-only endpoints. Mounted under `/api/v1/admin/*` by
|
||||||
|
//! `crate::api::routes`. Every handler in this subtree is guarded by
|
||||||
|
//! `RequireAdmin`, which only accepts session-cookie authentication —
|
||||||
|
//! bot/API tokens cannot reach admin routes (see
|
||||||
|
//! `crate::auth::extractor::RequireAdmin`).
|
||||||
|
|
||||||
|
pub mod mangas;
|
||||||
|
pub mod system;
|
||||||
|
pub mod users;
|
||||||
|
|
||||||
|
use axum::Router;
|
||||||
|
|
||||||
|
use crate::app::AppState;
|
||||||
|
|
||||||
|
pub fn routes() -> Router<AppState> {
|
||||||
|
Router::new()
|
||||||
|
.merge(users::routes())
|
||||||
|
.merge(mangas::routes())
|
||||||
|
.merge(system::routes())
|
||||||
|
}
|
||||||
163
backend/src/api/admin/system.rs
Normal file
163
backend/src/api/admin/system.rs
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
//! System metrics for the admin dashboard.
|
||||||
|
//!
|
||||||
|
//! Disk is `statvfs(storage_dir)` so the number reflects the volume the
|
||||||
|
//! app actually writes to (not the root filesystem of the host). When the
|
||||||
|
//! storage backend doesn't expose a local path (e.g. a future S3 impl)
|
||||||
|
//! the disk fields are `null` rather than fabricated.
|
||||||
|
//!
|
||||||
|
//! Memory and CPU come from `sysinfo`. CPU requires two refreshes with
|
||||||
|
//! at least 200ms between them to compute a meaningful delta; the
|
||||||
|
//! handler eats the 250ms wall-clock cost on each request. Admin
|
||||||
|
//! traffic is low-volume so a background cache isn't worth the moving
|
||||||
|
//! parts yet — revisit if polling becomes frequent.
|
||||||
|
|
||||||
|
use std::path::Path;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use axum::extract::State;
|
||||||
|
use axum::routing::get;
|
||||||
|
use axum::{Json, Router};
|
||||||
|
use serde::Serialize;
|
||||||
|
use sysinfo::{CpuRefreshKind, MemoryRefreshKind, RefreshKind, System};
|
||||||
|
|
||||||
|
use crate::app::AppState;
|
||||||
|
use crate::auth::extractor::RequireAdmin;
|
||||||
|
use crate::error::AppResult;
|
||||||
|
|
||||||
|
const ALERT_THRESHOLD_PERCENT: f64 = 90.0;
|
||||||
|
|
||||||
|
pub fn routes() -> Router<AppState> {
|
||||||
|
Router::new().route("/admin/system", get(system))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct SystemStats {
|
||||||
|
pub disk: Option<DiskStats>,
|
||||||
|
pub memory: MemoryStats,
|
||||||
|
pub cpu: CpuStats,
|
||||||
|
pub alerts: Vec<Alert>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct DiskStats {
|
||||||
|
pub total_bytes: u64,
|
||||||
|
pub used_bytes: u64,
|
||||||
|
pub free_bytes: u64,
|
||||||
|
pub percent_used: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct MemoryStats {
|
||||||
|
pub total_bytes: u64,
|
||||||
|
pub used_bytes: u64,
|
||||||
|
pub percent_used: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct CpuStats {
|
||||||
|
pub percent_used: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct Alert {
|
||||||
|
pub level: AlertLevel,
|
||||||
|
pub message: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Clone, Copy)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
pub enum AlertLevel {
|
||||||
|
Warning,
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn system(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
_admin: RequireAdmin,
|
||||||
|
) -> AppResult<Json<SystemStats>> {
|
||||||
|
let disk = state.storage.local_root().and_then(disk_stats_for);
|
||||||
|
let (memory, cpu) = memory_and_cpu().await;
|
||||||
|
let mut alerts = Vec::new();
|
||||||
|
if let Some(d) = &disk {
|
||||||
|
if d.percent_used >= ALERT_THRESHOLD_PERCENT {
|
||||||
|
alerts.push(Alert {
|
||||||
|
level: AlertLevel::Warning,
|
||||||
|
message: format!(
|
||||||
|
"disk near full ({:.0}% used)",
|
||||||
|
d.percent_used
|
||||||
|
),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if memory.percent_used >= ALERT_THRESHOLD_PERCENT {
|
||||||
|
alerts.push(Alert {
|
||||||
|
level: AlertLevel::Warning,
|
||||||
|
message: format!(
|
||||||
|
"memory near full ({:.0}% used)",
|
||||||
|
memory.percent_used
|
||||||
|
),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Ok(Json(SystemStats {
|
||||||
|
disk,
|
||||||
|
memory,
|
||||||
|
cpu,
|
||||||
|
alerts,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn disk_stats_for(root: &Path) -> Option<DiskStats> {
|
||||||
|
let s = nix::sys::statvfs::statvfs(root).ok()?;
|
||||||
|
// statvfs reports `f_frsize * f_blocks` for total bytes. `f_bavail`
|
||||||
|
// is "free to non-root callers" which is what an operator actually
|
||||||
|
// cares about — `f_bfree` includes blocks reserved for root.
|
||||||
|
let block = s.fragment_size();
|
||||||
|
let total = block * s.blocks();
|
||||||
|
let avail = block * s.blocks_available();
|
||||||
|
let used = total.saturating_sub(avail);
|
||||||
|
let percent_used = if total > 0 {
|
||||||
|
(used as f64) * 100.0 / (total as f64)
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
};
|
||||||
|
Some(DiskStats {
|
||||||
|
total_bytes: total,
|
||||||
|
used_bytes: used,
|
||||||
|
free_bytes: avail,
|
||||||
|
percent_used,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn memory_and_cpu() -> (MemoryStats, CpuStats) {
|
||||||
|
// sysinfo's CPU sampling needs two refreshes with a delay between
|
||||||
|
// them — the first seeds the delta counters, the second measures.
|
||||||
|
// We do this once per request; admin traffic is low enough that the
|
||||||
|
// 250ms cost is invisible.
|
||||||
|
let mut sys = System::new_with_specifics(
|
||||||
|
RefreshKind::new()
|
||||||
|
.with_cpu(CpuRefreshKind::everything())
|
||||||
|
.with_memory(MemoryRefreshKind::everything()),
|
||||||
|
);
|
||||||
|
sys.refresh_cpu_all();
|
||||||
|
// Yield the runtime instead of blocking it for the gap.
|
||||||
|
tokio::time::sleep(Duration::from_millis(250)).await;
|
||||||
|
sys.refresh_cpu_all();
|
||||||
|
sys.refresh_memory();
|
||||||
|
|
||||||
|
let total = sys.total_memory();
|
||||||
|
let used = sys.used_memory();
|
||||||
|
let mem_pct = if total > 0 {
|
||||||
|
(used as f64) * 100.0 / (total as f64)
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
};
|
||||||
|
let memory = MemoryStats {
|
||||||
|
total_bytes: total,
|
||||||
|
used_bytes: used,
|
||||||
|
percent_used: mem_pct,
|
||||||
|
};
|
||||||
|
|
||||||
|
let cpu = CpuStats {
|
||||||
|
percent_used: sys.global_cpu_usage() as f64,
|
||||||
|
};
|
||||||
|
(memory, cpu)
|
||||||
|
}
|
||||||
128
backend/src/api/admin/users.rs
Normal file
128
backend/src/api/admin/users.rs
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
//! Admin user management: list, delete, promote/demote.
|
||||||
|
//!
|
||||||
|
//! All handlers are gated by `RequireAdmin` and rely on
|
||||||
|
//! `repo::user::admin_safe_*` for self-protection and the last-admin
|
||||||
|
//! invariant. Audit rows are written inside the same DB transaction as
|
||||||
|
//! the action they record.
|
||||||
|
|
||||||
|
use axum::extract::{Path, Query, State};
|
||||||
|
use axum::http::StatusCode;
|
||||||
|
use axum::routing::{delete, get};
|
||||||
|
use axum::{Json, Router};
|
||||||
|
use serde::Deserialize;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::api::auth::{validate_password, validate_username};
|
||||||
|
use crate::api::pagination::PagedResponse;
|
||||||
|
use crate::app::AppState;
|
||||||
|
use crate::auth::extractor::RequireAdmin;
|
||||||
|
use crate::auth::password::hash_password;
|
||||||
|
use crate::domain::User;
|
||||||
|
use crate::error::{AppError, AppResult};
|
||||||
|
use crate::repo;
|
||||||
|
|
||||||
|
pub fn routes() -> Router<AppState> {
|
||||||
|
Router::new()
|
||||||
|
.route("/admin/users", get(list_users).post(create_user))
|
||||||
|
.route(
|
||||||
|
"/admin/users/:id",
|
||||||
|
delete(delete_user).patch(update_user),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, Default)]
|
||||||
|
pub struct ListUsersParams {
|
||||||
|
#[serde(default)]
|
||||||
|
pub search: Option<String>,
|
||||||
|
#[serde(default = "default_limit")]
|
||||||
|
pub limit: i64,
|
||||||
|
#[serde(default)]
|
||||||
|
pub offset: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_limit() -> i64 {
|
||||||
|
50
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_users(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
_admin: RequireAdmin,
|
||||||
|
Query(params): Query<ListUsersParams>,
|
||||||
|
) -> AppResult<Json<PagedResponse<User>>> {
|
||||||
|
let limit = params.limit.clamp(1, 200);
|
||||||
|
let offset = params.offset.max(0);
|
||||||
|
let (items, total) = repo::user::list_with_total(
|
||||||
|
&state.db,
|
||||||
|
&repo::user::ListUsersQuery {
|
||||||
|
search: params.search.filter(|s| !s.trim().is_empty()),
|
||||||
|
limit,
|
||||||
|
offset,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(Json(PagedResponse::with_total(items, limit, offset, total)))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct UpdateUserInput {
|
||||||
|
pub is_admin: Option<bool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn update_user(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
RequireAdmin(actor): RequireAdmin,
|
||||||
|
Path(id): Path<Uuid>,
|
||||||
|
Json(input): Json<UpdateUserInput>,
|
||||||
|
) -> AppResult<Json<User>> {
|
||||||
|
let Some(is_admin) = input.is_admin else {
|
||||||
|
return Err(AppError::InvalidInput(
|
||||||
|
"no updatable fields supplied".into(),
|
||||||
|
));
|
||||||
|
};
|
||||||
|
let updated =
|
||||||
|
repo::user::admin_safe_set_is_admin(&state.db, actor.id, id, is_admin).await?;
|
||||||
|
Ok(Json(updated))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn delete_user(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
RequireAdmin(actor): RequireAdmin,
|
||||||
|
Path(id): Path<Uuid>,
|
||||||
|
) -> AppResult<StatusCode> {
|
||||||
|
repo::user::admin_safe_delete(&state.db, actor.id, id).await?;
|
||||||
|
Ok(StatusCode::NO_CONTENT)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct CreateUserInput {
|
||||||
|
pub username: String,
|
||||||
|
pub password: String,
|
||||||
|
/// Defaults to false; admins may mint other admins in a single
|
||||||
|
/// call. Doing it as one POST avoids a second audit row for the
|
||||||
|
/// common "invite a co-admin" flow.
|
||||||
|
#[serde(default)]
|
||||||
|
pub is_admin: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_user(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
RequireAdmin(actor): RequireAdmin,
|
||||||
|
Json(input): Json<CreateUserInput>,
|
||||||
|
) -> AppResult<(StatusCode, Json<User>)> {
|
||||||
|
let username = input.username.trim();
|
||||||
|
// Reuse the canonical self-register validators so the admin-create
|
||||||
|
// path can never produce a username that self-register would
|
||||||
|
// reject (and vice versa).
|
||||||
|
validate_username(username)?;
|
||||||
|
validate_password(&input.password)?;
|
||||||
|
let pwhash = hash_password(&input.password)?;
|
||||||
|
let user = repo::user::admin_create_user(
|
||||||
|
&state.db,
|
||||||
|
actor.id,
|
||||||
|
username,
|
||||||
|
&pwhash,
|
||||||
|
input.is_admin,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok((StatusCode::CREATED, Json(user)))
|
||||||
|
}
|
||||||
@@ -4,6 +4,8 @@
|
|||||||
//! expire naturally rather than being explicitly invalidated, so other
|
//! expire naturally rather than being explicitly invalidated, so other
|
||||||
//! devices keep their existing logins).
|
//! devices keep their existing logins).
|
||||||
|
|
||||||
|
use std::sync::OnceLock;
|
||||||
|
|
||||||
use axum::extract::{Path, State};
|
use axum::extract::{Path, State};
|
||||||
use axum::http::StatusCode;
|
use axum::http::StatusCode;
|
||||||
use axum::response::IntoResponse;
|
use axum::response::IntoResponse;
|
||||||
@@ -26,6 +28,7 @@ use crate::repo;
|
|||||||
|
|
||||||
pub fn routes() -> Router<AppState> {
|
pub fn routes() -> Router<AppState> {
|
||||||
Router::new()
|
Router::new()
|
||||||
|
.route("/auth/config", get(auth_config))
|
||||||
.route("/auth/register", post(register))
|
.route("/auth/register", post(register))
|
||||||
.route("/auth/login", post(login))
|
.route("/auth/login", post(login))
|
||||||
.route("/auth/logout", post(logout))
|
.route("/auth/logout", post(logout))
|
||||||
@@ -39,6 +42,21 @@ pub fn routes() -> Router<AppState> {
|
|||||||
.route("/auth/tokens/:id", delete(delete_token))
|
.route("/auth/tokens/:id", delete(delete_token))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Public, unauthenticated. Exposes anonymous-relevant auth policy
|
||||||
|
/// (currently just whether self-registration is open) so the frontend
|
||||||
|
/// can render its login / register affordances correctly without a
|
||||||
|
/// probe request that would conflate "disabled" with "rate-limited".
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct AuthConfigResponse {
|
||||||
|
pub self_register_enabled: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn auth_config(State(state): State<AppState>) -> Json<AuthConfigResponse> {
|
||||||
|
Json(AuthConfigResponse {
|
||||||
|
self_register_enabled: state.auth.allow_self_register,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
pub struct Credentials {
|
pub struct Credentials {
|
||||||
pub username: String,
|
pub username: String,
|
||||||
@@ -80,6 +98,14 @@ async fn register(
|
|||||||
jar: CookieJar,
|
jar: CookieJar,
|
||||||
Json(input): Json<Credentials>,
|
Json(input): Json<Credentials>,
|
||||||
) -> AppResult<impl IntoResponse> {
|
) -> AppResult<impl IntoResponse> {
|
||||||
|
// Rate limit before the disabled check so an operator who flips
|
||||||
|
// the toggle can't be probed for the toggle state via timing —
|
||||||
|
// disabled and enabled paths both consume a token, and disabled
|
||||||
|
// returns 403 instead of running argon2.
|
||||||
|
check_auth_rate_limit(&state, "register")?;
|
||||||
|
if !state.auth.allow_self_register {
|
||||||
|
return Err(AppError::Forbidden);
|
||||||
|
}
|
||||||
let username = input.username.trim();
|
let username = input.username.trim();
|
||||||
validate_username(username)?;
|
validate_username(username)?;
|
||||||
validate_password(&input.password)?;
|
validate_password(&input.password)?;
|
||||||
@@ -95,6 +121,7 @@ async fn login(
|
|||||||
jar: CookieJar,
|
jar: CookieJar,
|
||||||
Json(input): Json<Credentials>,
|
Json(input): Json<Credentials>,
|
||||||
) -> AppResult<impl IntoResponse> {
|
) -> AppResult<impl IntoResponse> {
|
||||||
|
check_auth_rate_limit(&state, "login")?;
|
||||||
let username = input.username.trim();
|
let username = input.username.trim();
|
||||||
if username.is_empty() || input.password.is_empty() {
|
if username.is_empty() || input.password.is_empty() {
|
||||||
return Err(AppError::InvalidInput(
|
return Err(AppError::InvalidInput(
|
||||||
@@ -102,9 +129,15 @@ async fn login(
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
let user = repo::user::find_by_username(&state.db, username)
|
let user = repo::user::find_by_username(&state.db, username).await?;
|
||||||
.await?
|
let Some(user) = user else {
|
||||||
.ok_or(AppError::Unauthenticated)?;
|
// No such user. Run argon2 against a stable dummy hash so the
|
||||||
|
// response time matches the wrong-password branch — otherwise
|
||||||
|
// an attacker can enumerate usernames by timing the no-user
|
||||||
|
// 401 against the wrong-password 401.
|
||||||
|
let _ = verify_password(&input.password, dummy_password_hash());
|
||||||
|
return Err(AppError::Unauthenticated);
|
||||||
|
};
|
||||||
if !verify_password(&input.password, &user.password_hash) {
|
if !verify_password(&input.password, &user.password_hash) {
|
||||||
return Err(AppError::Unauthenticated);
|
return Err(AppError::Unauthenticated);
|
||||||
}
|
}
|
||||||
@@ -113,6 +146,21 @@ async fn login(
|
|||||||
Ok((StatusCode::OK, jar, Json(AuthResponse { user })))
|
Ok((StatusCode::OK, jar, Json(AuthResponse { user })))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Lazily-computed argon2 hash used to equalise login response time
|
||||||
|
/// across the "no such user" and "wrong password" branches. Computing
|
||||||
|
/// it once (on the first login of the process) is enough — the hash is
|
||||||
|
/// never compared against a real password, only used to force argon2
|
||||||
|
/// to do the same amount of work it would for a real verify.
|
||||||
|
fn dummy_password_hash() -> &'static str {
|
||||||
|
static DUMMY: OnceLock<String> = OnceLock::new();
|
||||||
|
DUMMY
|
||||||
|
.get_or_init(|| {
|
||||||
|
crate::auth::password::hash_password("login-timing-equaliser")
|
||||||
|
.expect("hash_password on a fixed input cannot fail")
|
||||||
|
})
|
||||||
|
.as_str()
|
||||||
|
}
|
||||||
|
|
||||||
async fn logout(
|
async fn logout(
|
||||||
State(state): State<AppState>,
|
State(state): State<AppState>,
|
||||||
jar: CookieJar,
|
jar: CookieJar,
|
||||||
@@ -149,6 +197,7 @@ async fn change_password(
|
|||||||
jar: CookieJar,
|
jar: CookieJar,
|
||||||
Json(input): Json<ChangePassword>,
|
Json(input): Json<ChangePassword>,
|
||||||
) -> AppResult<impl IntoResponse> {
|
) -> AppResult<impl IntoResponse> {
|
||||||
|
check_auth_rate_limit(&state, "change_password")?;
|
||||||
if !verify_password(&input.current_password, &user.password_hash) {
|
if !verify_password(&input.current_password, &user.password_hash) {
|
||||||
return Err(AppError::Unauthenticated);
|
return Err(AppError::Unauthenticated);
|
||||||
}
|
}
|
||||||
@@ -230,8 +279,24 @@ async fn create_token(
|
|||||||
Json(input): Json<CreateTokenInput>,
|
Json(input): Json<CreateTokenInput>,
|
||||||
) -> AppResult<impl IntoResponse> {
|
) -> AppResult<impl IntoResponse> {
|
||||||
let name = input.name.trim();
|
let name = input.name.trim();
|
||||||
|
// Both arms use `ValidationFailed` (422 with field details) to
|
||||||
|
// match the structured-error shape `attach_tag` returns for the
|
||||||
|
// same kind of free-form-identifier validation. The other
|
||||||
|
// /auth/* handlers in this file use `InvalidInput` (400); the
|
||||||
|
// divergence is pre-existing and would warrant a project-wide
|
||||||
|
// pass to flip them all if the client side wants uniform per-
|
||||||
|
// field error rendering.
|
||||||
if name.is_empty() {
|
if name.is_empty() {
|
||||||
return Err(AppError::InvalidInput("token name is required".into()));
|
return Err(AppError::ValidationFailed {
|
||||||
|
message: "token name is required".into(),
|
||||||
|
details: serde_json::json!({ "name": "required" }),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if name.chars().count() > 64 {
|
||||||
|
return Err(AppError::ValidationFailed {
|
||||||
|
message: "token name too long".into(),
|
||||||
|
details: serde_json::json!({ "name": "max 64 characters" }),
|
||||||
|
});
|
||||||
}
|
}
|
||||||
let (raw, hash) = generate_token();
|
let (raw, hash) = generate_token();
|
||||||
let token = repo::api_token::create(&state.db, user.id, name, &hash).await?;
|
let token = repo::api_token::create(&state.db, user.id, name, &hash).await?;
|
||||||
@@ -267,6 +332,18 @@ async fn start_session(
|
|||||||
Ok(jar.add(build_session_cookie(raw, &state.auth)))
|
Ok(jar.add(build_session_cookie(raw, &state.auth)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CSRF posture: `SameSite=Lax` is the project's primary CSRF defense.
|
||||||
|
// Browsers refuse to attach this cookie to cross-site POST / PATCH /
|
||||||
|
// DELETE requests, which covers every state-changing endpoint (auth
|
||||||
|
// mutations, uploads, bookmarks, collections, admin user management,
|
||||||
|
// etc. — all JSON over POST/PATCH/DELETE). Lax DOES still attach the
|
||||||
|
// cookie on top-level cross-site GETs, so this defense breaks the
|
||||||
|
// instant anyone adds a state-changing GET. If you reach for one,
|
||||||
|
// switch to `SameSite=Strict` here AND add an explicit CSRF-token
|
||||||
|
// check on the new endpoint. The Bearer-token branch in the
|
||||||
|
// extractor is unaffected (bots authenticate with the token header,
|
||||||
|
// not the cookie) and admin routes reject Bearer entirely — see
|
||||||
|
// `auth::extractor::RequireAdmin`.
|
||||||
fn build_session_cookie(raw: String, cfg: &AuthConfig) -> Cookie<'static> {
|
fn build_session_cookie(raw: String, cfg: &AuthConfig) -> Cookie<'static> {
|
||||||
let mut builder = Cookie::build((SESSION_COOKIE_NAME, raw))
|
let mut builder = Cookie::build((SESSION_COOKIE_NAME, raw))
|
||||||
.http_only(true)
|
.http_only(true)
|
||||||
@@ -293,7 +370,38 @@ fn build_expired_cookie(cfg: &AuthConfig) -> Cookie<'static> {
|
|||||||
builder.build()
|
builder.build()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn validate_username(u: &str) -> AppResult<()> {
|
/// Consume one token from the shared auth rate limiter. Called at the
|
||||||
|
/// start of `register`, `login`, and `change_password` so credential
|
||||||
|
/// stuffing / spraying / username-probe loops are throttled by the
|
||||||
|
/// configured budget (default 5/sec with a 10-request burst).
|
||||||
|
///
|
||||||
|
/// All three endpoints share one bucket — they all expose the same
|
||||||
|
/// argon2-verify-or-create work and the same enumeration channels, so
|
||||||
|
/// any one of them in a tight loop should trip the limit. `endpoint`
|
||||||
|
/// is included in the rate-limit-hit log line so operators can tell
|
||||||
|
/// which endpoint is being probed.
|
||||||
|
fn check_auth_rate_limit(state: &AppState, endpoint: &'static str) -> AppResult<()> {
|
||||||
|
use crate::auth::rate_limit::AcquireResult;
|
||||||
|
match state.auth_limiter.try_acquire() {
|
||||||
|
AcquireResult::Allowed => Ok(()),
|
||||||
|
AcquireResult::Denied { retry_after_secs } => {
|
||||||
|
tracing::warn!(
|
||||||
|
endpoint,
|
||||||
|
retry_after_secs,
|
||||||
|
"auth rate limit hit; returning 429"
|
||||||
|
);
|
||||||
|
Err(AppError::TooManyRequests {
|
||||||
|
retry_after_secs: Some(retry_after_secs),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exposed pub(crate) so the admin user-create handler can apply the
|
||||||
|
// same rules as self-registration. Keeping the lone canonical
|
||||||
|
// implementation here avoids the two paths drifting on min length /
|
||||||
|
// allowed character set.
|
||||||
|
pub(crate) fn validate_username(u: &str) -> AppResult<()> {
|
||||||
if u.is_empty() {
|
if u.is_empty() {
|
||||||
return Err(AppError::InvalidInput("username is required".into()));
|
return Err(AppError::InvalidInput("username is required".into()));
|
||||||
}
|
}
|
||||||
@@ -310,7 +418,7 @@ fn validate_username(u: &str) -> AppResult<()> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn validate_password(p: &str) -> AppResult<()> {
|
pub(crate) fn validate_password(p: &str) -> AppResult<()> {
|
||||||
if p.len() < 8 {
|
if p.len() < 8 {
|
||||||
return Err(AppError::InvalidInput(
|
return Err(AppError::InvalidInput(
|
||||||
"password must be at least 8 characters".into(),
|
"password must be at least 8 characters".into(),
|
||||||
|
|||||||
@@ -67,14 +67,7 @@ async fn create(
|
|||||||
// the foreign-key violation collapse into a generic 500.
|
// the foreign-key violation collapse into a generic 500.
|
||||||
repo::manga::get(&state.db, input.manga_id).await?;
|
repo::manga::get(&state.db, input.manga_id).await?;
|
||||||
if let Some(chapter_id) = input.chapter_id {
|
if let Some(chapter_id) = input.chapter_id {
|
||||||
let exists: Option<(Uuid,)> = sqlx::query_as(
|
if !repo::chapter::belongs_to_manga(&state.db, chapter_id, input.manga_id).await? {
|
||||||
"SELECT id FROM chapters WHERE id = $1 AND manga_id = $2",
|
|
||||||
)
|
|
||||||
.bind(chapter_id)
|
|
||||||
.bind(input.manga_id)
|
|
||||||
.fetch_optional(&state.db)
|
|
||||||
.await?;
|
|
||||||
if exists.is_none() {
|
|
||||||
return Err(AppError::NotFound);
|
return Err(AppError::NotFound);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -196,16 +196,14 @@ async fn create(
|
|||||||
|
|
||||||
async fn update(
|
async fn update(
|
||||||
State(state): State<AppState>,
|
State(state): State<AppState>,
|
||||||
CurrentUser(_user): CurrentUser,
|
CurrentUser(user): CurrentUser,
|
||||||
Path(id): Path<Uuid>,
|
Path(id): Path<Uuid>,
|
||||||
Json(patch): Json<MangaPatch>,
|
Json(patch): Json<MangaPatch>,
|
||||||
) -> AppResult<Json<MangaDetail>> {
|
) -> AppResult<Json<MangaDetail>> {
|
||||||
// TODO(auth): until uploaders are tracked (Phase 5), any signed-in
|
|
||||||
// user can edit any manga. Restrict to uploader + admin once that
|
|
||||||
// column lands.
|
|
||||||
if !repo::manga::exists(&state.db, id).await? {
|
if !repo::manga::exists(&state.db, id).await? {
|
||||||
return Err(AppError::NotFound);
|
return Err(AppError::NotFound);
|
||||||
}
|
}
|
||||||
|
require_can_edit(&state, id, user.id).await?;
|
||||||
|
|
||||||
if let Some(ref status) = patch.status {
|
if let Some(ref status) = patch.status {
|
||||||
let trimmed = status.trim();
|
let trimmed = status.trim();
|
||||||
@@ -269,16 +267,14 @@ async fn update(
|
|||||||
/// `MangaDetail`.
|
/// `MangaDetail`.
|
||||||
async fn put_cover(
|
async fn put_cover(
|
||||||
State(state): State<AppState>,
|
State(state): State<AppState>,
|
||||||
CurrentUser(_user): CurrentUser,
|
CurrentUser(user): CurrentUser,
|
||||||
Path(id): Path<Uuid>,
|
Path(id): Path<Uuid>,
|
||||||
mut multipart: Multipart,
|
mut multipart: Multipart,
|
||||||
) -> AppResult<Json<MangaDetail>> {
|
) -> AppResult<Json<MangaDetail>> {
|
||||||
// TODO(auth): until uploaders are tracked (Phase 5), any signed-in
|
|
||||||
// user can edit any manga's cover. Restrict to uploader + admin
|
|
||||||
// once that column lands.
|
|
||||||
if !repo::manga::exists(&state.db, id).await? {
|
if !repo::manga::exists(&state.db, id).await? {
|
||||||
return Err(AppError::NotFound);
|
return Err(AppError::NotFound);
|
||||||
}
|
}
|
||||||
|
require_can_edit(&state, id, user.id).await?;
|
||||||
|
|
||||||
let mut cover: Option<UploadedImage> = None;
|
let mut cover: Option<UploadedImage> = None;
|
||||||
while let Some(field) = next_field(&mut multipart).await? {
|
while let Some(field) = next_field(&mut multipart).await? {
|
||||||
@@ -320,13 +316,13 @@ async fn put_cover(
|
|||||||
/// with the unchanged detail.
|
/// with the unchanged detail.
|
||||||
async fn delete_cover(
|
async fn delete_cover(
|
||||||
State(state): State<AppState>,
|
State(state): State<AppState>,
|
||||||
CurrentUser(_user): CurrentUser,
|
CurrentUser(user): CurrentUser,
|
||||||
Path(id): Path<Uuid>,
|
Path(id): Path<Uuid>,
|
||||||
) -> AppResult<Json<MangaDetail>> {
|
) -> AppResult<Json<MangaDetail>> {
|
||||||
// TODO(auth): same caveat as put_cover.
|
|
||||||
if !repo::manga::exists(&state.db, id).await? {
|
if !repo::manga::exists(&state.db, id).await? {
|
||||||
return Err(AppError::NotFound);
|
return Err(AppError::NotFound);
|
||||||
}
|
}
|
||||||
|
require_can_edit(&state, id, user.id).await?;
|
||||||
if let Some(key) = repo::manga::get(&state.db, id).await?.cover_image_path {
|
if let Some(key) = repo::manga::get(&state.db, id).await?.cover_image_path {
|
||||||
match state.storage.delete(&key).await {
|
match state.storage.delete(&key).await {
|
||||||
Ok(()) | Err(StorageError::NotFound) => {}
|
Ok(()) | Err(StorageError::NotFound) => {}
|
||||||
@@ -348,6 +344,7 @@ async fn attach_tag(
|
|||||||
Path(id): Path<Uuid>,
|
Path(id): Path<Uuid>,
|
||||||
Json(body): Json<AttachTagBody>,
|
Json(body): Json<AttachTagBody>,
|
||||||
) -> AppResult<(StatusCode, Json<TagRef>)> {
|
) -> AppResult<(StatusCode, Json<TagRef>)> {
|
||||||
|
validate_tag_name(&body.name)?;
|
||||||
if !repo::manga::exists(&state.db, id).await? {
|
if !repo::manga::exists(&state.db, id).await? {
|
||||||
return Err(AppError::NotFound);
|
return Err(AppError::NotFound);
|
||||||
}
|
}
|
||||||
@@ -394,6 +391,27 @@ async fn detach_tag(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Request-side validation for `POST /mangas/:id/tags` body. Mirrors
|
||||||
|
/// the repo-level cap in `repo::tag::upsert_by_name` (max 64 chars
|
||||||
|
/// after trim) but surfaces the failure at the handler boundary with
|
||||||
|
/// the same envelope shape other validations use.
|
||||||
|
fn validate_tag_name(name: &str) -> AppResult<()> {
|
||||||
|
let trimmed = name.trim();
|
||||||
|
if trimmed.is_empty() {
|
||||||
|
return Err(AppError::ValidationFailed {
|
||||||
|
message: "tag name cannot be empty".into(),
|
||||||
|
details: json!({ "name": "required" }),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if trimmed.chars().count() > 64 {
|
||||||
|
return Err(AppError::ValidationFailed {
|
||||||
|
message: "tag name too long".into(),
|
||||||
|
details: json!({ "name": "max 64 characters" }),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn validate_new_manga(input: &NewManga) -> AppResult<()> {
|
fn validate_new_manga(input: &NewManga) -> AppResult<()> {
|
||||||
if input.title.trim().is_empty() {
|
if input.title.trim().is_empty() {
|
||||||
return Err(AppError::ValidationFailed {
|
return Err(AppError::ValidationFailed {
|
||||||
@@ -413,6 +431,30 @@ fn validate_new_manga(input: &NewManga) -> AppResult<()> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Authorisation gate for manga mutations. The manga is assumed to
|
||||||
|
/// exist (the caller runs [`repo::manga::exists`] first so a missing id
|
||||||
|
/// surfaces as `NotFound`, not `Forbidden`).
|
||||||
|
///
|
||||||
|
/// Rule: a non-NULL `uploaded_by` must match the current user. Legacy
|
||||||
|
/// rows with `uploaded_by IS NULL` (pre-migration-0011) are still
|
||||||
|
/// editable by any signed-in user — there's nobody to gate on yet, and
|
||||||
|
/// the historical-data note in 0011 acknowledges the gap. Once an
|
||||||
|
/// admin role lands the NULL case can flip to admin-only.
|
||||||
|
///
|
||||||
|
/// Returns `Forbidden` (not `NotFound`) on owner mismatch — mangas
|
||||||
|
/// are listable via `GET /mangas`, so existence isn't a secret and
|
||||||
|
/// the more accurate 403 is fine. This deliberately differs from
|
||||||
|
/// `repo::collection::require_owner`, which collapses both states to
|
||||||
|
/// `NotFound` because collections are private to a user and existence
|
||||||
|
/// itself is information worth hiding from non-owners.
|
||||||
|
async fn require_can_edit(state: &AppState, manga_id: Uuid, user_id: Uuid) -> AppResult<()> {
|
||||||
|
match repo::manga::uploaded_by(&state.db, manga_id).await? {
|
||||||
|
Some(owner) if owner != user_id => Err(AppError::Forbidden),
|
||||||
|
// Some(owner) == user_id (good) or None (legacy row, no owner).
|
||||||
|
_ => Ok(()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async fn validate_genre_ids(state: &AppState, ids: &[Uuid]) -> AppResult<()> {
|
async fn validate_genre_ids(state: &AppState, ids: &[Uuid]) -> AppResult<()> {
|
||||||
if ids.is_empty() {
|
if ids.is_empty() {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
pub mod admin;
|
||||||
pub mod auth;
|
pub mod auth;
|
||||||
pub mod authors;
|
pub mod authors;
|
||||||
pub mod bookmarks;
|
pub mod bookmarks;
|
||||||
@@ -28,4 +29,5 @@ pub fn routes() -> Router<AppState> {
|
|||||||
.merge(authors::routes())
|
.merge(authors::routes())
|
||||||
.merge(collections::routes())
|
.merge(collections::routes())
|
||||||
.merge(history::routes())
|
.merge(history::routes())
|
||||||
|
.merge(admin::routes())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,15 +12,16 @@ use tokio_util::sync::CancellationToken;
|
|||||||
use tower_http::cors::{AllowOrigin, CorsLayer};
|
use tower_http::cors::{AllowOrigin, CorsLayer};
|
||||||
use tower_http::trace::TraceLayer;
|
use tower_http::trace::TraceLayer;
|
||||||
|
|
||||||
use crate::config::{AuthConfig, Config, CrawlerConfig, CrawlerModePref, UploadConfig};
|
use crate::auth::rate_limit::AuthRateLimiter;
|
||||||
|
use crate::config::{AuthConfig, Config, CrawlerConfig, UploadConfig};
|
||||||
use crate::crawler::browser_manager::{self, BrowserManager};
|
use crate::crawler::browser_manager::{self, BrowserManager};
|
||||||
use crate::crawler::content::{self, SyncOutcome};
|
use crate::crawler::content::{self, SyncOutcome};
|
||||||
use crate::crawler::daemon::{self, ChapterDispatcher, DaemonConfig, MetadataPass};
|
use crate::crawler::daemon::{self, ChapterDispatcher, DaemonConfig, MetadataPass};
|
||||||
use crate::crawler::jobs::JobPayload;
|
use crate::crawler::jobs::JobPayload;
|
||||||
use crate::crawler::pipeline::{self, MetadataStats};
|
use crate::crawler::pipeline::{self, MetadataStats};
|
||||||
use crate::crawler::rate_limit::HostRateLimiters;
|
use crate::crawler::rate_limit::HostRateLimiters;
|
||||||
|
use crate::crawler::safety::DownloadAllowlist;
|
||||||
use crate::crawler::session;
|
use crate::crawler::session;
|
||||||
use crate::crawler::source::{target as target_source, DiscoverMode};
|
|
||||||
use crate::repo;
|
use crate::repo;
|
||||||
use crate::storage::{LocalStorage, Storage};
|
use crate::storage::{LocalStorage, Storage};
|
||||||
|
|
||||||
@@ -30,6 +31,10 @@ pub struct AppState {
|
|||||||
pub storage: Arc<dyn Storage>,
|
pub storage: Arc<dyn Storage>,
|
||||||
pub auth: AuthConfig,
|
pub auth: AuthConfig,
|
||||||
pub upload: UploadConfig,
|
pub upload: UploadConfig,
|
||||||
|
/// Shared rate limiter guarding the `/auth/*` mutation endpoints.
|
||||||
|
/// One instance per AppState so tests stay isolated across the
|
||||||
|
/// same process.
|
||||||
|
pub auth_limiter: Arc<AuthRateLimiter>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Bundle returned by [`build`]. The router is what `axum::serve` consumes;
|
/// Bundle returned by [`build`]. The router is what `axum::serve` consumes;
|
||||||
@@ -55,6 +60,13 @@ pub async fn build(config: Config) -> anyhow::Result<AppHandle> {
|
|||||||
.await?;
|
.await?;
|
||||||
sqlx::migrate!("./migrations").run(&db).await?;
|
sqlx::migrate!("./migrations").run(&db).await?;
|
||||||
|
|
||||||
|
if let Some((username, password)) = config.admin_bootstrap.as_ref() {
|
||||||
|
repo::user::bootstrap_admin(&db, username, password)
|
||||||
|
.await
|
||||||
|
.context("bootstrap_admin from ADMIN_USERNAME/ADMIN_PASSWORD env")?;
|
||||||
|
tracing::info!(admin_username = %username, "admin bootstrap ensured");
|
||||||
|
}
|
||||||
|
|
||||||
let storage: Arc<dyn Storage> = Arc::new(LocalStorage::new(config.storage_dir.clone()));
|
let storage: Arc<dyn Storage> = Arc::new(LocalStorage::new(config.storage_dir.clone()));
|
||||||
|
|
||||||
let daemon = if config.crawler.daemon_enabled {
|
let daemon = if config.crawler.daemon_enabled {
|
||||||
@@ -64,11 +76,13 @@ pub async fn build(config: Config) -> anyhow::Result<AppHandle> {
|
|||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let auth_limiter = Arc::new(AuthRateLimiter::new(config.auth.rate_limit));
|
||||||
let state = AppState {
|
let state = AppState {
|
||||||
db,
|
db,
|
||||||
storage,
|
storage,
|
||||||
auth: config.auth.clone(),
|
auth: config.auth.clone(),
|
||||||
upload: config.upload.clone(),
|
upload: config.upload.clone(),
|
||||||
|
auth_limiter,
|
||||||
};
|
};
|
||||||
let router = router(state).layer(cors_layer(&config.cors_allowed_origins));
|
let router = router(state).layer(cors_layer(&config.cors_allowed_origins));
|
||||||
Ok(AppHandle { router, daemon })
|
Ok(AppHandle { router, daemon })
|
||||||
@@ -151,8 +165,8 @@ async fn spawn_crawler_daemon(
|
|||||||
http: http.clone(),
|
http: http.clone(),
|
||||||
rate: Arc::clone(&rate),
|
rate: Arc::clone(&rate),
|
||||||
start_url: url.clone(),
|
start_url: url.clone(),
|
||||||
mode_pref: cfg.mode,
|
download_allowlist: cfg.download_allowlist.clone(),
|
||||||
incremental_stop_after: cfg.incremental_stop_after,
|
max_image_bytes: cfg.max_image_bytes,
|
||||||
});
|
});
|
||||||
m
|
m
|
||||||
});
|
});
|
||||||
@@ -163,6 +177,8 @@ async fn spawn_crawler_daemon(
|
|||||||
storage: Arc::clone(&storage),
|
storage: Arc::clone(&storage),
|
||||||
http,
|
http,
|
||||||
rate: Arc::clone(&rate),
|
rate: Arc::clone(&rate),
|
||||||
|
download_allowlist: cfg.download_allowlist.clone(),
|
||||||
|
max_image_bytes: cfg.max_image_bytes,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Shared cancellation: daemon shutdown cancels the BrowserManager's
|
// Shared cancellation: daemon shutdown cancels the BrowserManager's
|
||||||
@@ -214,21 +230,14 @@ struct RealMetadataPass {
|
|||||||
http: reqwest::Client,
|
http: reqwest::Client,
|
||||||
rate: Arc<HostRateLimiters>,
|
rate: Arc<HostRateLimiters>,
|
||||||
start_url: String,
|
start_url: String,
|
||||||
mode_pref: CrawlerModePref,
|
download_allowlist: DownloadAllowlist,
|
||||||
incremental_stop_after: usize,
|
max_image_bytes: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl MetadataPass for RealMetadataPass {
|
impl MetadataPass for RealMetadataPass {
|
||||||
async fn run(&self) -> anyhow::Result<MetadataStats> {
|
async fn run(&self) -> anyhow::Result<MetadataStats> {
|
||||||
let mode = resolve_mode(
|
let result = pipeline::run_metadata_pass(
|
||||||
&self.db,
|
|
||||||
target_source::SOURCE_ID,
|
|
||||||
self.mode_pref,
|
|
||||||
self.incremental_stop_after,
|
|
||||||
)
|
|
||||||
.await?;
|
|
||||||
pipeline::run_metadata_pass(
|
|
||||||
&self.browser_manager,
|
&self.browser_manager,
|
||||||
&self.db,
|
&self.db,
|
||||||
self.storage.as_ref(),
|
self.storage.as_ref(),
|
||||||
@@ -237,53 +246,16 @@ impl MetadataPass for RealMetadataPass {
|
|||||||
&self.start_url,
|
&self.start_url,
|
||||||
0,
|
0,
|
||||||
false,
|
false,
|
||||||
mode,
|
&self.download_allowlist,
|
||||||
|
self.max_image_bytes,
|
||||||
)
|
)
|
||||||
.await
|
.await;
|
||||||
}
|
if let Err(e) = &result {
|
||||||
}
|
if crate::crawler::nav::anyhow_looks_browser_dead(e) {
|
||||||
|
self.browser_manager.invalidate().await;
|
||||||
/// Pick the active mode for this tick. `Explicit` short-circuits the
|
|
||||||
/// DB lookup. `Auto` reads `seed_completed_at`: missing → Backfill
|
|
||||||
/// (initial seed for this source), present → Incremental with the
|
|
||||||
/// configured threshold.
|
|
||||||
///
|
|
||||||
/// A DB error during the Auto lookup propagates as `Err` rather than
|
|
||||||
/// silently degrading to Backfill — the daemon's `run_tick` catches
|
|
||||||
/// the error, logs, and skips the tick. That's safer than running a
|
|
||||||
/// full re-backfill (including a drop pass against stale-looking rows)
|
|
||||||
/// when the DB is flaky.
|
|
||||||
async fn resolve_mode(
|
|
||||||
db: &PgPool,
|
|
||||||
source_id: &str,
|
|
||||||
pref: CrawlerModePref,
|
|
||||||
incremental_stop_after: usize,
|
|
||||||
) -> anyhow::Result<DiscoverMode> {
|
|
||||||
match pref {
|
|
||||||
CrawlerModePref::Explicit(m) => {
|
|
||||||
tracing::info!(?m, "crawler mode: explicit (CRAWLER_MODE override)");
|
|
||||||
Ok(m)
|
|
||||||
}
|
|
||||||
CrawlerModePref::Auto => {
|
|
||||||
let seeded = repo::crawler::seed_completed_at(db, source_id)
|
|
||||||
.await
|
|
||||||
.context("seed_completed_at lookup for mode auto-detection")?;
|
|
||||||
match seeded {
|
|
||||||
Some(at) => {
|
|
||||||
tracing::info!(
|
|
||||||
seed_completed_at = %at.to_rfc3339(),
|
|
||||||
"crawler mode: auto → incremental (seed previously completed)"
|
|
||||||
);
|
|
||||||
Ok(DiscoverMode::Incremental {
|
|
||||||
stop_after_unchanged: incremental_stop_after,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
tracing::info!("crawler mode: auto → backfill (no seed marker for source)");
|
|
||||||
Ok(DiscoverMode::Backfill)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
result
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -293,6 +265,8 @@ struct RealChapterDispatcher {
|
|||||||
storage: Arc<dyn Storage>,
|
storage: Arc<dyn Storage>,
|
||||||
http: reqwest::Client,
|
http: reqwest::Client,
|
||||||
rate: Arc<HostRateLimiters>,
|
rate: Arc<HostRateLimiters>,
|
||||||
|
download_allowlist: DownloadAllowlist,
|
||||||
|
max_image_bytes: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
@@ -304,16 +278,7 @@ impl ChapterDispatcher for RealChapterDispatcher {
|
|||||||
chapter_id,
|
chapter_id,
|
||||||
source_chapter_key: _,
|
source_chapter_key: _,
|
||||||
} => {
|
} => {
|
||||||
// Look up manga_id + source_url for this chapter.
|
let row = repo::chapter::dispatch_target(&self.db, chapter_id)
|
||||||
let row: Option<(uuid::Uuid, String)> = sqlx::query_as(
|
|
||||||
"SELECT c.manga_id, cs.source_url \
|
|
||||||
FROM chapters c \
|
|
||||||
JOIN chapter_sources cs ON cs.chapter_id = c.id \
|
|
||||||
WHERE c.id = $1 \
|
|
||||||
LIMIT 1",
|
|
||||||
)
|
|
||||||
.bind(chapter_id)
|
|
||||||
.fetch_optional(&self.db)
|
|
||||||
.await
|
.await
|
||||||
.context("look up chapter for dispatch")?;
|
.context("look up chapter for dispatch")?;
|
||||||
let Some((manga_id, source_url)) = row else {
|
let Some((manga_id, source_url)) = row else {
|
||||||
@@ -321,7 +286,7 @@ impl ChapterDispatcher for RealChapterDispatcher {
|
|||||||
return Ok(SyncOutcome::Skipped);
|
return Ok(SyncOutcome::Skipped);
|
||||||
};
|
};
|
||||||
let lease = self.browser_manager.acquire().await?;
|
let lease = self.browser_manager.acquire().await?;
|
||||||
let outcome = content::sync_chapter_content(
|
let result = content::sync_chapter_content(
|
||||||
&lease,
|
&lease,
|
||||||
&self.db,
|
&self.db,
|
||||||
self.storage.as_ref(),
|
self.storage.as_ref(),
|
||||||
@@ -331,14 +296,24 @@ impl ChapterDispatcher for RealChapterDispatcher {
|
|||||||
manga_id,
|
manga_id,
|
||||||
&source_url,
|
&source_url,
|
||||||
false,
|
false,
|
||||||
|
&self.download_allowlist,
|
||||||
|
self.max_image_bytes,
|
||||||
)
|
)
|
||||||
.await?;
|
.await;
|
||||||
drop(lease);
|
drop(lease);
|
||||||
Ok(outcome)
|
match result {
|
||||||
|
Ok(outcome) => Ok(outcome),
|
||||||
|
Err(e) => {
|
||||||
|
if crate::crawler::nav::anyhow_looks_browser_dead(&e) {
|
||||||
|
self.browser_manager.invalidate().await;
|
||||||
|
}
|
||||||
|
Err(e)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Other payload kinds aren't dispatched by this daemon yet —
|
// Other payload kinds aren't dispatched by this daemon yet —
|
||||||
// metadata-driven jobs (Discover/SyncManga/SyncChapterList)
|
// SyncManga / SyncChapterList are handled inline by the cron's
|
||||||
// are handled inline by the cron's metadata pass.
|
// metadata pass.
|
||||||
_ => Ok(SyncOutcome::Skipped),
|
_ => Ok(SyncOutcome::Skipped),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,11 +1,19 @@
|
|||||||
//! `CurrentUser` axum extractor.
|
//! Auth extractors.
|
||||||
//!
|
//!
|
||||||
//! Resolves a request to a logged-in user by trying, in order:
|
//! Three extractors are available, in increasing strictness:
|
||||||
//! 1. a `mangalord_session` cookie (session lookup by `sha256(value)`);
|
|
||||||
//! 2. an `Authorization: Bearer <token>` header (api_token lookup).
|
|
||||||
//!
|
//!
|
||||||
//! Both paths look up by hash, never by raw value. Failure to resolve
|
//! - [`CurrentUser`] — accepts either a session cookie or an
|
||||||
//! either way returns 401 via `AppError::Unauthenticated`.
|
//! `Authorization: Bearer <token>` header. Used by ordinary
|
||||||
|
//! authenticated endpoints where bot tokens are first-class clients.
|
||||||
|
//! - [`CurrentSessionUser`] — accepts only the session cookie. Used as
|
||||||
|
//! the substrate for admin extraction so bot tokens cannot authenticate
|
||||||
|
//! as the admin (see [`RequireAdmin`]).
|
||||||
|
//! - [`RequireAdmin`] — composes over [`CurrentSessionUser`] and
|
||||||
|
//! additionally requires `user.is_admin`. Returns 403 for
|
||||||
|
//! authenticated-but-not-admin, 401 otherwise.
|
||||||
|
//!
|
||||||
|
//! All lookups go by `sha256(raw_token)` — the raw value is never stored
|
||||||
|
//! in the database.
|
||||||
|
|
||||||
use axum::async_trait;
|
use axum::async_trait;
|
||||||
use axum::extract::FromRequestParts;
|
use axum::extract::FromRequestParts;
|
||||||
@@ -61,3 +69,54 @@ impl FromRequestParts<AppState> for CurrentUser {
|
|||||||
Err(AppError::Unauthenticated)
|
Err(AppError::Unauthenticated)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Cookie-only authentication. Bot/API tokens are explicitly NOT accepted
|
||||||
|
/// here — this is the substrate for [`RequireAdmin`] and exists precisely
|
||||||
|
/// to keep admin authority out of bearer-token reach.
|
||||||
|
pub struct CurrentSessionUser(pub User);
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl FromRequestParts<AppState> for CurrentSessionUser {
|
||||||
|
type Rejection = AppError;
|
||||||
|
|
||||||
|
async fn from_request_parts(
|
||||||
|
parts: &mut Parts,
|
||||||
|
state: &AppState,
|
||||||
|
) -> Result<Self, Self::Rejection> {
|
||||||
|
let jar = CookieJar::from_headers(&parts.headers);
|
||||||
|
let cookie = jar
|
||||||
|
.get(SESSION_COOKIE_NAME)
|
||||||
|
.ok_or(AppError::Unauthenticated)?;
|
||||||
|
let hash = hash_token(cookie.value());
|
||||||
|
let session = repo::session::find_active(&state.db, &hash)
|
||||||
|
.await?
|
||||||
|
.ok_or(AppError::Unauthenticated)?;
|
||||||
|
let user = repo::user::find_by_id(&state.db, session.user_id)
|
||||||
|
.await?
|
||||||
|
.ok_or(AppError::Unauthenticated)?;
|
||||||
|
Ok(CurrentSessionUser(user))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Admin-only. Composes over [`CurrentSessionUser`] so bot tokens are
|
||||||
|
/// rejected at the auth step (401) rather than the role step (403).
|
||||||
|
/// The user row is re-read every request, so demotion takes effect on
|
||||||
|
/// the very next call without needing to purge sessions.
|
||||||
|
pub struct RequireAdmin(pub User);
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl FromRequestParts<AppState> for RequireAdmin {
|
||||||
|
type Rejection = AppError;
|
||||||
|
|
||||||
|
async fn from_request_parts(
|
||||||
|
parts: &mut Parts,
|
||||||
|
state: &AppState,
|
||||||
|
) -> Result<Self, Self::Rejection> {
|
||||||
|
let CurrentSessionUser(user) =
|
||||||
|
CurrentSessionUser::from_request_parts(parts, state).await?;
|
||||||
|
if !user.is_admin {
|
||||||
|
return Err(AppError::Forbidden);
|
||||||
|
}
|
||||||
|
Ok(RequireAdmin(user))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -7,4 +7,5 @@
|
|||||||
|
|
||||||
pub mod extractor;
|
pub mod extractor;
|
||||||
pub mod password;
|
pub mod password;
|
||||||
|
pub mod rate_limit;
|
||||||
pub mod token;
|
pub mod token;
|
||||||
|
|||||||
179
backend/src/auth/rate_limit.rs
Normal file
179
backend/src/auth/rate_limit.rs
Normal file
@@ -0,0 +1,179 @@
|
|||||||
|
//! Per-process token-bucket rate limiter for the auth endpoints.
|
||||||
|
//!
|
||||||
|
//! Protects `/auth/login`, `/auth/register`, and `/auth/me/password`
|
||||||
|
//! from credential stuffing / password spraying / username probing.
|
||||||
|
//!
|
||||||
|
//! The current deploy puts SvelteKit's hooks.server.ts proxy in front
|
||||||
|
//! of axum without forwarding the original client IP (no
|
||||||
|
//! `X-Forwarded-For`), so per-IP buckets would all collapse to the
|
||||||
|
//! proxy container's address. Until the proxy learns to forward the
|
||||||
|
//! peer address, a single global bucket gives equivalent protection
|
||||||
|
//! against mass-attack patterns and trades a small DoS surface
|
||||||
|
//! (legitimate users sharing the limit) for simplicity.
|
||||||
|
//!
|
||||||
|
//! Each `AppState` carries its own [`AuthRateLimiter`] instance, so
|
||||||
|
//! tests run in isolated buckets and won't bleed across `#[sqlx::test]`
|
||||||
|
//! cases that share a process.
|
||||||
|
|
||||||
|
use std::sync::Mutex;
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
|
/// Tunable limits. `per_sec == 0` disables the limiter — used by the
|
||||||
|
/// test harness and by anyone who wants to opt out via env config.
|
||||||
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
pub struct RateLimitConfig {
|
||||||
|
pub per_sec: u32,
|
||||||
|
pub burst: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for RateLimitConfig {
|
||||||
|
/// Disabled by default. The production `AuthConfig::from_env`
|
||||||
|
/// overrides to a real limit; the test harness keeps the default
|
||||||
|
/// so existing tests don't flake against shared buckets.
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
per_sec: 0,
|
||||||
|
burst: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Production defaults: 5 requests/sec sustained, 10-request burst.
|
||||||
|
/// Tight enough to make brute force impractical, loose enough that a
|
||||||
|
/// real user mistyping their password three times in a row doesn't
|
||||||
|
/// hit it.
|
||||||
|
pub const PRODUCTION_PER_SEC: u32 = 5;
|
||||||
|
pub const PRODUCTION_BURST: u32 = 10;
|
||||||
|
|
||||||
|
struct Bucket {
|
||||||
|
tokens: f64,
|
||||||
|
last_refill: Instant,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Outcome of [`AuthRateLimiter::try_acquire`]. When `Denied`, the
|
||||||
|
/// caller can use `retry_after_secs` for a `Retry-After: N` header
|
||||||
|
/// (RFC 6585 §4) so well-behaved clients back off correctly rather
|
||||||
|
/// than retrying in a tight loop.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum AcquireResult {
|
||||||
|
Allowed,
|
||||||
|
Denied { retry_after_secs: u64 },
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Single-bucket token-bucket limiter. `try_acquire` is cheap (one
|
||||||
|
/// mutex acquire, no allocations) so the auth path doesn't pay a real
|
||||||
|
/// cost for the check.
|
||||||
|
pub struct AuthRateLimiter {
|
||||||
|
cfg: RateLimitConfig,
|
||||||
|
bucket: Mutex<Bucket>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AuthRateLimiter {
|
||||||
|
pub fn new(cfg: RateLimitConfig) -> Self {
|
||||||
|
Self {
|
||||||
|
cfg,
|
||||||
|
bucket: Mutex::new(Bucket {
|
||||||
|
tokens: cfg.burst as f64,
|
||||||
|
last_refill: Instant::now(),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Consume one token if available. Returns `Denied` with a
|
||||||
|
/// rounded-up seconds-until-refill so the caller can emit a
|
||||||
|
/// `Retry-After` header.
|
||||||
|
pub fn try_acquire(&self) -> AcquireResult {
|
||||||
|
if self.cfg.per_sec == 0 {
|
||||||
|
return AcquireResult::Allowed;
|
||||||
|
}
|
||||||
|
let now = Instant::now();
|
||||||
|
let mut bucket = self.bucket.lock().expect("rate limiter mutex poisoned");
|
||||||
|
let elapsed = now.duration_since(bucket.last_refill).as_secs_f64();
|
||||||
|
bucket.tokens =
|
||||||
|
(bucket.tokens + elapsed * f64::from(self.cfg.per_sec)).min(f64::from(self.cfg.burst));
|
||||||
|
bucket.last_refill = now;
|
||||||
|
if bucket.tokens >= 1.0 {
|
||||||
|
bucket.tokens -= 1.0;
|
||||||
|
AcquireResult::Allowed
|
||||||
|
} else {
|
||||||
|
// ceil((1 - tokens) / per_sec), minimum 1 — a `Retry-After: 0`
|
||||||
|
// would tell clients to retry immediately, which is what we're
|
||||||
|
// actively trying to discourage.
|
||||||
|
let deficit = 1.0 - bucket.tokens;
|
||||||
|
let wait_secs = (deficit / f64::from(self.cfg.per_sec)).ceil() as u64;
|
||||||
|
AcquireResult::Denied {
|
||||||
|
retry_after_secs: wait_secs.max(1),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn disabled_limiter_always_allows() {
|
||||||
|
let rl = AuthRateLimiter::new(RateLimitConfig {
|
||||||
|
per_sec: 0,
|
||||||
|
burst: 0,
|
||||||
|
});
|
||||||
|
for _ in 0..1000 {
|
||||||
|
assert_eq!(rl.try_acquire(), AcquireResult::Allowed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn burst_lets_through_initial_window_then_blocks() {
|
||||||
|
// 0 refill, burst 3 → first three pass, fourth blocks.
|
||||||
|
let rl = AuthRateLimiter::new(RateLimitConfig {
|
||||||
|
per_sec: 1,
|
||||||
|
burst: 3,
|
||||||
|
});
|
||||||
|
assert_eq!(rl.try_acquire(), AcquireResult::Allowed);
|
||||||
|
assert_eq!(rl.try_acquire(), AcquireResult::Allowed);
|
||||||
|
assert_eq!(rl.try_acquire(), AcquireResult::Allowed);
|
||||||
|
match rl.try_acquire() {
|
||||||
|
AcquireResult::Denied { retry_after_secs } => {
|
||||||
|
// Bucket is at ~0 tokens, refill rate 1/sec → ~1s wait.
|
||||||
|
assert!(
|
||||||
|
retry_after_secs >= 1,
|
||||||
|
"retry_after must be at least 1s, got {retry_after_secs}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
AcquireResult::Allowed => panic!("fourth request must be denied"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokens_refill_over_time() {
|
||||||
|
// 10/sec → after ~120ms we should have at least one token back.
|
||||||
|
let rl = AuthRateLimiter::new(RateLimitConfig {
|
||||||
|
per_sec: 10,
|
||||||
|
burst: 1,
|
||||||
|
});
|
||||||
|
assert_eq!(rl.try_acquire(), AcquireResult::Allowed);
|
||||||
|
assert!(matches!(rl.try_acquire(), AcquireResult::Denied { .. }));
|
||||||
|
std::thread::sleep(std::time::Duration::from_millis(150));
|
||||||
|
assert_eq!(
|
||||||
|
rl.try_acquire(),
|
||||||
|
AcquireResult::Allowed,
|
||||||
|
"token should have refilled"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn retry_after_scales_inversely_with_refill_rate() {
|
||||||
|
// 1/sec → wait ~1s after burst exhausted.
|
||||||
|
// 10/sec → wait <1s, but we clamp to a minimum of 1s.
|
||||||
|
let slow = AuthRateLimiter::new(RateLimitConfig {
|
||||||
|
per_sec: 1,
|
||||||
|
burst: 1,
|
||||||
|
});
|
||||||
|
slow.try_acquire();
|
||||||
|
match slow.try_acquire() {
|
||||||
|
AcquireResult::Denied { retry_after_secs } => assert_eq!(retry_after_secs, 1),
|
||||||
|
_ => panic!("expected Denied"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -31,7 +31,6 @@ use mangalord::crawler::content::{self, SyncOutcome};
|
|||||||
use mangalord::crawler::pipeline;
|
use mangalord::crawler::pipeline;
|
||||||
use mangalord::crawler::rate_limit::HostRateLimiters;
|
use mangalord::crawler::rate_limit::HostRateLimiters;
|
||||||
use mangalord::crawler::session;
|
use mangalord::crawler::session;
|
||||||
use mangalord::crawler::source::DiscoverMode;
|
|
||||||
use mangalord::storage::{LocalStorage, Storage};
|
use mangalord::storage::{LocalStorage, Storage};
|
||||||
use sqlx::postgres::PgPoolOptions;
|
use sqlx::postgres::PgPoolOptions;
|
||||||
use sqlx::PgPool;
|
use sqlx::PgPool;
|
||||||
@@ -63,8 +62,6 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
let cdn_rate_ms = env_u64("CRAWLER_CDN_RATE_MS", rate_ms);
|
let cdn_rate_ms = env_u64("CRAWLER_CDN_RATE_MS", rate_ms);
|
||||||
let limit = env_u64("CRAWLER_LIMIT", 0) as usize;
|
let limit = env_u64("CRAWLER_LIMIT", 0) as usize;
|
||||||
let skip_chapters = env_bool("CRAWLER_SKIP_CHAPTERS", false);
|
let skip_chapters = env_bool("CRAWLER_SKIP_CHAPTERS", false);
|
||||||
let incremental_stop_after = env_u64("CRAWLER_INCREMENTAL_STOP_AFTER", 20).max(1) as usize;
|
|
||||||
let mode = parse_crawler_mode(incremental_stop_after)?;
|
|
||||||
let skip_chapter_content = env_bool("CRAWLER_SKIP_CHAPTER_CONTENT", false);
|
let skip_chapter_content = env_bool("CRAWLER_SKIP_CHAPTER_CONTENT", false);
|
||||||
let chapter_workers = env_u64("CRAWLER_CHAPTER_WORKERS", 1).max(1) as usize;
|
let chapter_workers = env_u64("CRAWLER_CHAPTER_WORKERS", 1).max(1) as usize;
|
||||||
let force_refetch_chapters = env_bool("CRAWLER_FORCE_REFETCH_CHAPTERS", false);
|
let force_refetch_chapters = env_bool("CRAWLER_FORCE_REFETCH_CHAPTERS", false);
|
||||||
@@ -143,7 +140,6 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
user_agent = ?user_agent,
|
user_agent = ?user_agent,
|
||||||
proxy = ?proxy_url,
|
proxy = ?proxy_url,
|
||||||
keep_open,
|
keep_open,
|
||||||
?mode,
|
|
||||||
storage_dir = %storage_dir.display(),
|
storage_dir = %storage_dir.display(),
|
||||||
"starting crawler"
|
"starting crawler"
|
||||||
);
|
);
|
||||||
@@ -191,7 +187,6 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
skip_chapter_content || !session_ready,
|
skip_chapter_content || !session_ready,
|
||||||
chapter_workers,
|
chapter_workers,
|
||||||
force_refetch_chapters,
|
force_refetch_chapters,
|
||||||
mode,
|
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -221,7 +216,6 @@ async fn run(
|
|||||||
skip_chapter_content: bool,
|
skip_chapter_content: bool,
|
||||||
chapter_workers: usize,
|
chapter_workers: usize,
|
||||||
force_refetch_chapters: bool,
|
force_refetch_chapters: bool,
|
||||||
mode: DiscoverMode,
|
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let mut rate = HostRateLimiters::new(Duration::from_millis(rate_ms));
|
let mut rate = HostRateLimiters::new(Duration::from_millis(rate_ms));
|
||||||
if let Some(host) = cdn_host {
|
if let Some(host) = cdn_host {
|
||||||
@@ -229,6 +223,39 @@ async fn run(
|
|||||||
}
|
}
|
||||||
let rate = Arc::new(rate);
|
let rate = Arc::new(rate);
|
||||||
|
|
||||||
|
// SSRF defence: only download from the catalog host + CDN host
|
||||||
|
// (plus optional CRAWLER_DOWNLOAD_ALLOWLIST extras), and cap
|
||||||
|
// single-image downloads at CRAWLER_MAX_IMAGE_BYTES bytes.
|
||||||
|
// CRAWLER_ALLOW_ANY_HOST=true short-circuits the host check for
|
||||||
|
// sharded-CDN sources; private-IP and scheme guards still apply.
|
||||||
|
let allowlist = if env_bool("CRAWLER_ALLOW_ANY_HOST", false) {
|
||||||
|
mangalord::crawler::safety::DownloadAllowlist::allow_any()
|
||||||
|
} else {
|
||||||
|
let mut allow = mangalord::crawler::safety::DownloadAllowlist::new();
|
||||||
|
if let Ok(parsed) = reqwest::Url::parse(start_url) {
|
||||||
|
if let Some(h) = parsed.host_str() {
|
||||||
|
allow = allow.allow(h);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(host) = cdn_host {
|
||||||
|
allow = allow.allow(host);
|
||||||
|
}
|
||||||
|
if let Ok(extras) = std::env::var("CRAWLER_DOWNLOAD_ALLOWLIST") {
|
||||||
|
for piece in extras.split(',') {
|
||||||
|
let trimmed = piece.trim();
|
||||||
|
if !trimmed.is_empty() {
|
||||||
|
allow = allow.allow(trimmed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
allow
|
||||||
|
};
|
||||||
|
let max_image_bytes: usize = std::env::var("CRAWLER_MAX_IMAGE_BYTES")
|
||||||
|
.ok()
|
||||||
|
.and_then(|s| s.parse().ok())
|
||||||
|
.unwrap_or(mangalord::crawler::safety::DEFAULT_MAX_IMAGE_BYTES);
|
||||||
|
let allowlist = Arc::new(allowlist);
|
||||||
|
|
||||||
let stats = pipeline::run_metadata_pass(
|
let stats = pipeline::run_metadata_pass(
|
||||||
manager.as_ref(),
|
manager.as_ref(),
|
||||||
db,
|
db,
|
||||||
@@ -238,7 +265,8 @@ async fn run(
|
|||||||
start_url,
|
start_url,
|
||||||
limit,
|
limit,
|
||||||
skip_chapters,
|
skip_chapters,
|
||||||
mode,
|
allowlist.as_ref(),
|
||||||
|
max_image_bytes,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
tracing::info!(?stats, "metadata pass complete");
|
tracing::info!(?stats, "metadata pass complete");
|
||||||
@@ -253,6 +281,8 @@ async fn run(
|
|||||||
"target",
|
"target",
|
||||||
chapter_workers,
|
chapter_workers,
|
||||||
force_refetch_chapters,
|
force_refetch_chapters,
|
||||||
|
Arc::clone(&allowlist),
|
||||||
|
max_image_bytes,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
@@ -276,6 +306,8 @@ async fn sync_bookmarked_chapter_content(
|
|||||||
source_id: &str,
|
source_id: &str,
|
||||||
workers: usize,
|
workers: usize,
|
||||||
force_refetch: bool,
|
force_refetch: bool,
|
||||||
|
allowlist: Arc<mangalord::crawler::safety::DownloadAllowlist>,
|
||||||
|
max_image_bytes: usize,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let pending: Vec<(Uuid, Uuid, String)> = sqlx::query_as(
|
let pending: Vec<(Uuid, Uuid, String)> = sqlx::query_as(
|
||||||
r#"
|
r#"
|
||||||
@@ -312,6 +344,7 @@ async fn sync_bookmarked_chapter_content(
|
|||||||
let storage = Arc::clone(&storage);
|
let storage = Arc::clone(&storage);
|
||||||
let rate = Arc::clone(&rate);
|
let rate = Arc::clone(&rate);
|
||||||
let manager = Arc::clone(&manager);
|
let manager = Arc::clone(&manager);
|
||||||
|
let allowlist = Arc::clone(&allowlist);
|
||||||
let stats = &stats;
|
let stats = &stats;
|
||||||
async move {
|
async move {
|
||||||
if session_expired.load(std::sync::atomic::Ordering::Relaxed) {
|
if session_expired.load(std::sync::atomic::Ordering::Relaxed) {
|
||||||
@@ -336,6 +369,8 @@ async fn sync_bookmarked_chapter_content(
|
|||||||
manga_id,
|
manga_id,
|
||||||
&source_url,
|
&source_url,
|
||||||
force_refetch,
|
force_refetch,
|
||||||
|
allowlist.as_ref(),
|
||||||
|
max_image_bytes,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
drop(lease);
|
drop(lease);
|
||||||
@@ -397,38 +432,6 @@ fn resolve_start_url() -> anyhow::Result<String> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse the CLI's `CRAWLER_MODE`. Defaults to `backfill` because the
|
|
||||||
/// binary is operator-driven (manual reseeds, force-refetches) — the
|
|
||||||
/// auto-detect logic lives in the daemon. `auto` is rejected because
|
|
||||||
/// the CLI has no DB state to consult before the run.
|
|
||||||
fn parse_crawler_mode(incremental_stop_after: usize) -> anyhow::Result<DiscoverMode> {
|
|
||||||
parse_crawler_mode_str(
|
|
||||||
std::env::var("CRAWLER_MODE").ok().as_deref(),
|
|
||||||
incremental_stop_after,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Pure variant of [`parse_crawler_mode`] — testable without env-var
|
|
||||||
/// mutation.
|
|
||||||
fn parse_crawler_mode_str(
|
|
||||||
raw: Option<&str>,
|
|
||||||
incremental_stop_after: usize,
|
|
||||||
) -> anyhow::Result<DiscoverMode> {
|
|
||||||
match raw.map(|s| s.trim().to_ascii_lowercase()).as_deref() {
|
|
||||||
None | Some("") | Some("backfill") => Ok(DiscoverMode::Backfill),
|
|
||||||
Some("incremental") => Ok(DiscoverMode::Incremental {
|
|
||||||
stop_after_unchanged: incremental_stop_after,
|
|
||||||
}),
|
|
||||||
Some("auto") => Err(anyhow!(
|
|
||||||
"CRAWLER_MODE=auto isn't supported by the CLI (use backfill or incremental); \
|
|
||||||
the daemon does auto-detection"
|
|
||||||
)),
|
|
||||||
Some(other) => Err(anyhow!(
|
|
||||||
"CRAWLER_MODE must be one of: backfill, incremental (got {other:?})"
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn env_u64(name: &str, default: u64) -> u64 {
|
fn env_u64(name: &str, default: u64) -> u64 {
|
||||||
std::env::var(name)
|
std::env::var(name)
|
||||||
.ok()
|
.ok()
|
||||||
@@ -444,55 +447,3 @@ fn env_bool(name: &str, default: bool) -> bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn cli_mode_defaults_to_backfill_when_unset_or_blank() {
|
|
||||||
let none = parse_crawler_mode_str(None, 20).unwrap();
|
|
||||||
assert!(matches!(none, DiscoverMode::Backfill));
|
|
||||||
let blank = parse_crawler_mode_str(Some(""), 20).unwrap();
|
|
||||||
assert!(matches!(blank, DiscoverMode::Backfill));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn cli_mode_recognizes_backfill_and_incremental() {
|
|
||||||
let backfill = parse_crawler_mode_str(Some("backfill"), 20).unwrap();
|
|
||||||
assert!(matches!(backfill, DiscoverMode::Backfill));
|
|
||||||
|
|
||||||
let incremental = parse_crawler_mode_str(Some("incremental"), 9).unwrap();
|
|
||||||
assert!(matches!(
|
|
||||||
incremental,
|
|
||||||
DiscoverMode::Incremental { stop_after_unchanged: 9 }
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn cli_mode_rejects_auto_explicitly() {
|
|
||||||
let err = parse_crawler_mode_str(Some("auto"), 20).unwrap_err();
|
|
||||||
let msg = format!("{err}");
|
|
||||||
assert!(
|
|
||||||
msg.contains("daemon"),
|
|
||||||
"rejection should point operator at the daemon: {msg}"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn cli_mode_rejects_unknown_value() {
|
|
||||||
let err = parse_crawler_mode_str(Some("garbage"), 20).unwrap_err();
|
|
||||||
let msg = format!("{err}");
|
|
||||||
assert!(msg.contains("backfill"));
|
|
||||||
assert!(msg.contains("incremental"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn cli_mode_is_case_insensitive_and_trims() {
|
|
||||||
let mixed = parse_crawler_mode_str(Some(" Incremental "), 4).unwrap();
|
|
||||||
assert!(matches!(
|
|
||||||
mixed,
|
|
||||||
DiscoverMode::Incremental { stop_after_unchanged: 4 }
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|||||||
@@ -5,22 +5,20 @@ use chrono::NaiveTime;
|
|||||||
use chrono_tz::Tz;
|
use chrono_tz::Tz;
|
||||||
|
|
||||||
use crate::crawler::browser::LaunchOptions;
|
use crate::crawler::browser::LaunchOptions;
|
||||||
use crate::crawler::source::DiscoverMode;
|
use crate::crawler::safety::{DownloadAllowlist, DEFAULT_MAX_IMAGE_BYTES};
|
||||||
|
|
||||||
/// What `CRAWLER_MODE` was set to. `Auto` is the daemon's default —
|
|
||||||
/// pick Backfill until `seed_completed_at` is written, then flip to
|
|
||||||
/// Incremental. `Explicit` forces a single mode regardless.
|
|
||||||
#[derive(Clone, Copy, Debug)]
|
|
||||||
pub enum CrawlerModePref {
|
|
||||||
Auto,
|
|
||||||
Explicit(DiscoverMode),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct AuthConfig {
|
pub struct AuthConfig {
|
||||||
pub cookie_secure: bool,
|
pub cookie_secure: bool,
|
||||||
pub cookie_domain: Option<String>,
|
pub cookie_domain: Option<String>,
|
||||||
pub session_ttl_days: i64,
|
pub session_ttl_days: i64,
|
||||||
|
pub rate_limit: crate::auth::rate_limit::RateLimitConfig,
|
||||||
|
/// When `false`, `POST /auth/register` returns 403
|
||||||
|
/// `registration_disabled` and the frontend hides its register
|
||||||
|
/// affordance. Admins can still mint accounts via
|
||||||
|
/// `POST /admin/users`. Defaults to `true` (open registration)
|
||||||
|
/// for backward compatibility.
|
||||||
|
pub allow_self_register: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for AuthConfig {
|
impl Default for AuthConfig {
|
||||||
@@ -29,6 +27,12 @@ impl Default for AuthConfig {
|
|||||||
cookie_secure: true,
|
cookie_secure: true,
|
||||||
cookie_domain: None,
|
cookie_domain: None,
|
||||||
session_ttl_days: 30,
|
session_ttl_days: 30,
|
||||||
|
// Disabled by default so the test harness inherits a
|
||||||
|
// non-throttling limiter. Production `from_env` overrides
|
||||||
|
// to the [`PRODUCTION_PER_SEC`]/[`PRODUCTION_BURST`]
|
||||||
|
// defaults.
|
||||||
|
rate_limit: crate::auth::rate_limit::RateLimitConfig::default(),
|
||||||
|
allow_self_register: true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -62,6 +66,13 @@ pub struct Config {
|
|||||||
pub upload: UploadConfig,
|
pub upload: UploadConfig,
|
||||||
pub cors_allowed_origins: Vec<String>,
|
pub cors_allowed_origins: Vec<String>,
|
||||||
pub crawler: CrawlerConfig,
|
pub crawler: CrawlerConfig,
|
||||||
|
/// `(username, password)` for the admin user provisioned at startup
|
||||||
|
/// when both `ADMIN_USERNAME` and `ADMIN_PASSWORD` are set. `None`
|
||||||
|
/// skips the bootstrap entirely. See `repo::user::bootstrap_admin`
|
||||||
|
/// for the create-vs-promote semantics — notably the password here
|
||||||
|
/// is used only when creating a new row, never to overwrite an
|
||||||
|
/// existing one.
|
||||||
|
pub admin_bootstrap: Option<(String, String)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// All crawler-daemon knobs read from env. Mirrors the env vars the
|
/// All crawler-daemon knobs read from env. Mirrors the env vars the
|
||||||
@@ -87,12 +98,13 @@ pub struct CrawlerConfig {
|
|||||||
pub user_agent: Option<String>,
|
pub user_agent: Option<String>,
|
||||||
pub proxy: Option<String>,
|
pub proxy: Option<String>,
|
||||||
pub browser: LaunchOptions,
|
pub browser: LaunchOptions,
|
||||||
/// Mode preference for the metadata pass. Daemon default is `Auto`
|
/// Hosts the crawler is allowed to download images / covers from.
|
||||||
/// (Backfill until `seed_completed_at` is written, then Incremental).
|
/// Always seeded with the host of `start_url` and (when set) the
|
||||||
pub mode: CrawlerModePref,
|
/// configured `cdn_host`. Additional hosts can be added via
|
||||||
/// `stop_after_unchanged` threshold supplied to Incremental in both
|
/// `CRAWLER_DOWNLOAD_ALLOWLIST` (comma-separated).
|
||||||
/// `Auto` (post-seed) and `Explicit(Incremental)` modes.
|
pub download_allowlist: DownloadAllowlist,
|
||||||
pub incremental_stop_after: usize,
|
/// Hard upper bound on a single image download. Defaults to 32 MiB.
|
||||||
|
pub max_image_bytes: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for CrawlerConfig {
|
impl Default for CrawlerConfig {
|
||||||
@@ -113,8 +125,8 @@ impl Default for CrawlerConfig {
|
|||||||
user_agent: None,
|
user_agent: None,
|
||||||
proxy: None,
|
proxy: None,
|
||||||
browser: LaunchOptions::headless(),
|
browser: LaunchOptions::headless(),
|
||||||
mode: CrawlerModePref::Auto,
|
download_allowlist: DownloadAllowlist::new(),
|
||||||
incremental_stop_after: 20,
|
max_image_bytes: DEFAULT_MAX_IMAGE_BYTES,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -135,6 +147,17 @@ impl Config {
|
|||||||
.ok()
|
.ok()
|
||||||
.filter(|s| !s.is_empty()),
|
.filter(|s| !s.is_empty()),
|
||||||
session_ttl_days: env_i64("SESSION_TTL_DAYS", 30),
|
session_ttl_days: env_i64("SESSION_TTL_DAYS", 30),
|
||||||
|
rate_limit: crate::auth::rate_limit::RateLimitConfig {
|
||||||
|
per_sec: env_u64(
|
||||||
|
"AUTH_RATE_PER_SEC",
|
||||||
|
crate::auth::rate_limit::PRODUCTION_PER_SEC.into(),
|
||||||
|
) as u32,
|
||||||
|
burst: env_u64(
|
||||||
|
"AUTH_RATE_BURST",
|
||||||
|
crate::auth::rate_limit::PRODUCTION_BURST.into(),
|
||||||
|
) as u32,
|
||||||
|
},
|
||||||
|
allow_self_register: env_bool("ALLOW_SELF_REGISTER", true),
|
||||||
},
|
},
|
||||||
upload: UploadConfig {
|
upload: UploadConfig {
|
||||||
max_request_bytes: env_usize("MAX_REQUEST_BYTES", 200 * 1024 * 1024),
|
max_request_bytes: env_usize("MAX_REQUEST_BYTES", 200 * 1024 * 1024),
|
||||||
@@ -150,10 +173,21 @@ impl Config {
|
|||||||
})
|
})
|
||||||
.unwrap_or_default(),
|
.unwrap_or_default(),
|
||||||
crawler: CrawlerConfig::from_env()?,
|
crawler: CrawlerConfig::from_env()?,
|
||||||
|
admin_bootstrap: admin_bootstrap_from_env(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns `Some((username, password))` only when BOTH `ADMIN_USERNAME`
|
||||||
|
/// and `ADMIN_PASSWORD` are set and non-empty. Half-set configuration is
|
||||||
|
/// treated as "no bootstrap" rather than a hard error, so an operator
|
||||||
|
/// can comment out one env var without crashing the server.
|
||||||
|
fn admin_bootstrap_from_env() -> Option<(String, String)> {
|
||||||
|
let username = std::env::var("ADMIN_USERNAME").ok().filter(|s| !s.is_empty())?;
|
||||||
|
let password = std::env::var("ADMIN_PASSWORD").ok().filter(|s| !s.is_empty())?;
|
||||||
|
Some((username, password))
|
||||||
|
}
|
||||||
|
|
||||||
impl CrawlerConfig {
|
impl CrawlerConfig {
|
||||||
pub fn from_env() -> anyhow::Result<Self> {
|
pub fn from_env() -> anyhow::Result<Self> {
|
||||||
// Parse CRAWLER_DAILY_AT (HH:MM, 24h). Invalid → fail fast.
|
// Parse CRAWLER_DAILY_AT (HH:MM, 24h). Invalid → fail fast.
|
||||||
@@ -169,9 +203,14 @@ impl CrawlerConfig {
|
|||||||
.parse()
|
.parse()
|
||||||
.map_err(|e| anyhow::anyhow!("CRAWLER_TZ must be a valid IANA TZ (got {raw:?}): {e}"))?,
|
.map_err(|e| anyhow::anyhow!("CRAWLER_TZ must be a valid IANA TZ (got {raw:?}): {e}"))?,
|
||||||
};
|
};
|
||||||
let incremental_stop_after =
|
let start_url = std::env::var("CRAWLER_START_URL")
|
||||||
env_u64("CRAWLER_INCREMENTAL_STOP_AFTER", 20).max(1) as usize;
|
.ok()
|
||||||
let mode = parse_mode_env(incremental_stop_after)?;
|
.filter(|s| !s.trim().is_empty());
|
||||||
|
let cdn_host = std::env::var("CRAWLER_CDN_HOST")
|
||||||
|
.ok()
|
||||||
|
.filter(|s| !s.trim().is_empty());
|
||||||
|
let download_allowlist =
|
||||||
|
build_download_allowlist(start_url.as_deref(), cdn_host.as_deref());
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
daemon_enabled: env_bool("CRAWLER_DAEMON", true),
|
daemon_enabled: env_bool("CRAWLER_DAEMON", true),
|
||||||
daily_at,
|
daily_at,
|
||||||
@@ -179,13 +218,9 @@ impl CrawlerConfig {
|
|||||||
idle_timeout: Duration::from_secs(env_u64("CRAWLER_IDLE_TIMEOUT_S", 600)),
|
idle_timeout: Duration::from_secs(env_u64("CRAWLER_IDLE_TIMEOUT_S", 600)),
|
||||||
chapter_workers: env_u64("CRAWLER_CHAPTER_WORKERS", 1).max(1) as usize,
|
chapter_workers: env_u64("CRAWLER_CHAPTER_WORKERS", 1).max(1) as usize,
|
||||||
retention_days: env_u64("CRAWLER_JOB_RETENTION_DAYS", 7) as u32,
|
retention_days: env_u64("CRAWLER_JOB_RETENTION_DAYS", 7) as u32,
|
||||||
start_url: std::env::var("CRAWLER_START_URL")
|
start_url,
|
||||||
.ok()
|
|
||||||
.filter(|s| !s.trim().is_empty()),
|
|
||||||
rate_ms: env_u64("CRAWLER_RATE_MS", 1000),
|
rate_ms: env_u64("CRAWLER_RATE_MS", 1000),
|
||||||
cdn_host: std::env::var("CRAWLER_CDN_HOST")
|
cdn_host,
|
||||||
.ok()
|
|
||||||
.filter(|s| !s.trim().is_empty()),
|
|
||||||
cdn_rate_ms: env_u64("CRAWLER_CDN_RATE_MS", env_u64("CRAWLER_RATE_MS", 1000)),
|
cdn_rate_ms: env_u64("CRAWLER_CDN_RATE_MS", env_u64("CRAWLER_RATE_MS", 1000)),
|
||||||
phpsessid: std::env::var("CRAWLER_PHPSESSID")
|
phpsessid: std::env::var("CRAWLER_PHPSESSID")
|
||||||
.ok()
|
.ok()
|
||||||
@@ -200,37 +235,51 @@ impl CrawlerConfig {
|
|||||||
.ok()
|
.ok()
|
||||||
.filter(|s| !s.trim().is_empty()),
|
.filter(|s| !s.trim().is_empty()),
|
||||||
browser: LaunchOptions::from_env(),
|
browser: LaunchOptions::from_env(),
|
||||||
mode,
|
download_allowlist,
|
||||||
incremental_stop_after,
|
max_image_bytes: env_usize("CRAWLER_MAX_IMAGE_BYTES", DEFAULT_MAX_IMAGE_BYTES),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse `CRAWLER_MODE`. Empty/unset → `Auto`. Recognized values are
|
/// Build the download allowlist from env. Always includes
|
||||||
/// `auto`, `backfill`, and `incremental` (case-insensitive). Anything
|
/// `CRAWLER_START_URL`'s host (so the crawler can fetch covers from
|
||||||
/// else is a hard error so a typo can't silently fall through to the
|
/// the catalog itself) and `CRAWLER_CDN_HOST` when set. Additional
|
||||||
/// default and mask itself.
|
/// hosts can be supplied via `CRAWLER_DOWNLOAD_ALLOWLIST` (comma-
|
||||||
fn parse_mode_env(incremental_stop_after: usize) -> anyhow::Result<CrawlerModePref> {
|
/// separated). Empty by default — meaning the crawler refuses to
|
||||||
parse_mode_str(std::env::var("CRAWLER_MODE").ok().as_deref(), incremental_stop_after)
|
/// download anything when no source is configured, which is the safe
|
||||||
|
/// fail-closed posture.
|
||||||
|
///
|
||||||
|
/// `CRAWLER_ALLOW_ANY_HOST=true` short-circuits the host enumeration
|
||||||
|
/// for operators whose sources shard across numbered CDN subdomains.
|
||||||
|
/// Scheme + private-IP defenses still apply.
|
||||||
|
fn build_download_allowlist(
|
||||||
|
start_url: Option<&str>,
|
||||||
|
cdn_host: Option<&str>,
|
||||||
|
) -> DownloadAllowlist {
|
||||||
|
if env_bool("CRAWLER_ALLOW_ANY_HOST", false) {
|
||||||
|
return DownloadAllowlist::allow_any();
|
||||||
}
|
}
|
||||||
|
let mut allow = DownloadAllowlist::new();
|
||||||
/// Pure variant of [`parse_mode_env`] — testable without env-var
|
if let Some(url) = start_url {
|
||||||
/// mutation. Takes the raw value (or `None` if unset).
|
if let Ok(parsed) = reqwest::Url::parse(url) {
|
||||||
pub(crate) fn parse_mode_str(
|
if let Some(h) = parsed.host_str() {
|
||||||
raw: Option<&str>,
|
allow = allow.allow(h);
|
||||||
incremental_stop_after: usize,
|
|
||||||
) -> anyhow::Result<CrawlerModePref> {
|
|
||||||
match raw.map(|s| s.trim().to_ascii_lowercase()).as_deref() {
|
|
||||||
None | Some("") | Some("auto") => Ok(CrawlerModePref::Auto),
|
|
||||||
Some("backfill") => Ok(CrawlerModePref::Explicit(DiscoverMode::Backfill)),
|
|
||||||
Some("incremental") => Ok(CrawlerModePref::Explicit(DiscoverMode::Incremental {
|
|
||||||
stop_after_unchanged: incremental_stop_after,
|
|
||||||
})),
|
|
||||||
Some(other) => Err(anyhow::anyhow!(
|
|
||||||
"CRAWLER_MODE must be one of: auto, backfill, incremental (got {other:?})"
|
|
||||||
)),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if let Some(host) = cdn_host {
|
||||||
|
allow = allow.allow(host);
|
||||||
|
}
|
||||||
|
if let Ok(extras) = std::env::var("CRAWLER_DOWNLOAD_ALLOWLIST") {
|
||||||
|
for piece in extras.split(',') {
|
||||||
|
let trimmed = piece.trim();
|
||||||
|
if !trimmed.is_empty() {
|
||||||
|
allow = allow.allow(trimmed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
allow
|
||||||
|
}
|
||||||
|
|
||||||
fn env_u64(name: &str, default: u64) -> u64 {
|
fn env_u64(name: &str, default: u64) -> u64 {
|
||||||
std::env::var(name)
|
std::env::var(name)
|
||||||
@@ -261,62 +310,3 @@ fn env_usize(name: &str, default: usize) -> usize {
|
|||||||
.unwrap_or(default)
|
.unwrap_or(default)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_mode_str_defaults_to_auto_when_unset_or_blank() {
|
|
||||||
let none = parse_mode_str(None, 20).unwrap();
|
|
||||||
assert!(matches!(none, CrawlerModePref::Auto));
|
|
||||||
let blank = parse_mode_str(Some(""), 20).unwrap();
|
|
||||||
assert!(matches!(blank, CrawlerModePref::Auto));
|
|
||||||
let whitespace = parse_mode_str(Some(" "), 20).unwrap();
|
|
||||||
assert!(matches!(whitespace, CrawlerModePref::Auto));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_mode_str_recognizes_each_keyword() {
|
|
||||||
let auto = parse_mode_str(Some("auto"), 20).unwrap();
|
|
||||||
assert!(matches!(auto, CrawlerModePref::Auto));
|
|
||||||
|
|
||||||
let backfill = parse_mode_str(Some("backfill"), 20).unwrap();
|
|
||||||
assert!(matches!(
|
|
||||||
backfill,
|
|
||||||
CrawlerModePref::Explicit(DiscoverMode::Backfill)
|
|
||||||
));
|
|
||||||
|
|
||||||
let incremental = parse_mode_str(Some("incremental"), 7).unwrap();
|
|
||||||
assert!(matches!(
|
|
||||||
incremental,
|
|
||||||
CrawlerModePref::Explicit(DiscoverMode::Incremental {
|
|
||||||
stop_after_unchanged: 7
|
|
||||||
})
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_mode_str_is_case_insensitive_and_trims_whitespace() {
|
|
||||||
let mixed = parse_mode_str(Some(" Incremental "), 5).unwrap();
|
|
||||||
assert!(matches!(
|
|
||||||
mixed,
|
|
||||||
CrawlerModePref::Explicit(DiscoverMode::Incremental {
|
|
||||||
stop_after_unchanged: 5
|
|
||||||
})
|
|
||||||
));
|
|
||||||
let upper = parse_mode_str(Some("BACKFILL"), 5).unwrap();
|
|
||||||
assert!(matches!(
|
|
||||||
upper,
|
|
||||||
CrawlerModePref::Explicit(DiscoverMode::Backfill)
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_mode_str_hard_errors_on_unknown_value() {
|
|
||||||
let err = parse_mode_str(Some("backfil"), 20).unwrap_err();
|
|
||||||
let msg = format!("{err}");
|
|
||||||
assert!(msg.contains("backfill"), "error should list valid values: {msg}");
|
|
||||||
assert!(msg.contains("auto"));
|
|
||||||
assert!(msg.contains("incremental"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,10 +1,17 @@
|
|||||||
//! Chromium launcher and lifecycle.
|
//! Chromium launcher and lifecycle.
|
||||||
//!
|
//!
|
||||||
//! Uses `chromiumoxide`'s `fetcher` feature so we don't depend on a
|
//! By default uses `chromiumoxide`'s `fetcher` feature — first call
|
||||||
//! system Chrome install — first call downloads a known-good revision
|
//! downloads a known-good revision into a cache dir and reuses it
|
||||||
//! into a cache dir and reuses it forever after. `BrowserMode` toggles
|
//! forever after. Set `CRAWLER_CHROMIUM_BINARY` to skip the fetcher
|
||||||
//! headed vs headless; the headed path needs a display (real `$DISPLAY`
|
//! and use a system-installed Chromium instead; required on platforms
|
||||||
//! or `xvfb-run`).
|
//! where the upstream snapshot bucket has no usable build (notably
|
||||||
|
//! `Linux_arm64` / Raspberry Pi). Debian's package is at
|
||||||
|
//! `/usr/bin/chromium` or `/usr/bin/chromium-headless-shell`; Ubuntu
|
||||||
|
//! ships it as `chromium-browser` at a different path — don't paste
|
||||||
|
//! the wrong one.
|
||||||
|
//!
|
||||||
|
//! `BrowserMode` toggles headed vs headless; the headed path needs a
|
||||||
|
//! display (real `$DISPLAY` or `xvfb-run`).
|
||||||
//!
|
//!
|
||||||
//! Extra Chromium command-line flags can be supplied through
|
//! Extra Chromium command-line flags can be supplied through
|
||||||
//! [`LaunchOptions::extra_args`] in code, or via the
|
//! [`LaunchOptions::extra_args`] in code, or via the
|
||||||
@@ -121,34 +128,62 @@ impl Handle {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Closes the browser and awaits the driver task. If other Arcs to
|
/// Closes the browser and awaits the driver task. If other Arcs to
|
||||||
/// the browser are still alive we fall back to drop-kills-Chromium
|
/// the browser are still alive we can't issue a clean CDP `close`,
|
||||||
/// semantics and just join the driver — this is the rare case where
|
/// so we abort the driver task instead — otherwise `handler.next()`
|
||||||
/// shutdown raced an outstanding worker; the OS-level kill is the
|
/// keeps polling forever and `Handle::close` hangs (chromiumoxide's
|
||||||
/// safety net.
|
/// handler stream doesn't end on its own when the underlying WS
|
||||||
|
/// dies). Chromium itself is reaped by kill-on-drop once the last
|
||||||
|
/// `Arc<Browser>` is dropped.
|
||||||
pub async fn close(self) -> anyhow::Result<()> {
|
pub async fn close(self) -> anyhow::Result<()> {
|
||||||
match Arc::try_unwrap(self.browser) {
|
close_or_abort(self.browser, self.driver, |mut owned| async move {
|
||||||
Ok(mut owned) => {
|
|
||||||
let _ = owned.close().await;
|
let _ = owned.close().await;
|
||||||
let _ = owned.wait().await;
|
let _ = owned.wait().await;
|
||||||
}
|
})
|
||||||
Err(shared) => {
|
.await;
|
||||||
tracing::warn!(
|
|
||||||
strong_count = Arc::strong_count(&shared),
|
|
||||||
"Handle::close while Arc<Browser> still shared — relying on kill-on-drop"
|
|
||||||
);
|
|
||||||
drop(shared);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let _ = self.driver.await;
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Launches Chromium. Downloads it on first run via the `fetcher`
|
/// Shutdown core for [`Handle::close`], extracted so it can be unit-
|
||||||
/// feature; subsequent runs hit the cache. The cache dir is
|
/// tested without launching real Chromium. When `arc` is uniquely owned,
|
||||||
|
/// `on_owned` runs against the owned value and the driver is awaited
|
||||||
|
/// normally. When other Arc holders exist, the driver is aborted before
|
||||||
|
/// awaiting it so shutdown returns promptly.
|
||||||
|
async fn close_or_abort<T, F, Fut>(arc: Arc<T>, driver: JoinHandle<()>, on_owned: F)
|
||||||
|
where
|
||||||
|
T: Send + 'static,
|
||||||
|
F: FnOnce(T) -> Fut + Send,
|
||||||
|
Fut: std::future::Future<Output = ()> + Send,
|
||||||
|
{
|
||||||
|
match Arc::try_unwrap(arc) {
|
||||||
|
Ok(owned) => {
|
||||||
|
on_owned(owned).await;
|
||||||
|
let _ = driver.await;
|
||||||
|
}
|
||||||
|
Err(shared) => {
|
||||||
|
tracing::warn!(
|
||||||
|
strong_count = Arc::strong_count(&shared),
|
||||||
|
"Handle::close while Arc still shared — aborting driver, relying on kill-on-drop"
|
||||||
|
);
|
||||||
|
drop(shared);
|
||||||
|
driver.abort();
|
||||||
|
let _ = driver.await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Launches Chromium. If `CRAWLER_CHROMIUM_BINARY` is set, uses that
|
||||||
|
/// path directly. Otherwise downloads via the `fetcher` feature on
|
||||||
|
/// first run and hits the cache after that. The fetcher cache dir is
|
||||||
/// `$CRAWLER_CHROMIUM_DIR` if set, else `$HOME/.cache/mangalord/chromium`,
|
/// `$CRAWLER_CHROMIUM_DIR` if set, else `$HOME/.cache/mangalord/chromium`,
|
||||||
/// else `./.chromium-cache` as a last-resort repo-local fallback.
|
/// else `./.chromium-cache` as a last-resort repo-local fallback.
|
||||||
pub async fn launch(options: LaunchOptions) -> anyhow::Result<Handle> {
|
pub async fn launch(options: LaunchOptions) -> anyhow::Result<Handle> {
|
||||||
|
let executable = match system_chromium_path_from_env() {
|
||||||
|
Some(path) => {
|
||||||
|
tracing::info!(path = %path.display(), "using system chromium (CRAWLER_CHROMIUM_BINARY)");
|
||||||
|
path
|
||||||
|
}
|
||||||
|
None => {
|
||||||
let cache = cache_dir()?;
|
let cache = cache_dir()?;
|
||||||
tokio::fs::create_dir_all(&cache)
|
tokio::fs::create_dir_all(&cache)
|
||||||
.await
|
.await
|
||||||
@@ -166,9 +201,12 @@ pub async fn launch(options: LaunchOptions) -> anyhow::Result<Handle> {
|
|||||||
.await
|
.await
|
||||||
.context("download chromium via fetcher")?;
|
.context("download chromium via fetcher")?;
|
||||||
tracing::info!(executable = %info.executable_path.display(), "chromium ready");
|
tracing::info!(executable = %info.executable_path.display(), "chromium ready");
|
||||||
|
info.executable_path
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let mut builder = BrowserConfig::builder()
|
let mut builder = BrowserConfig::builder()
|
||||||
.chrome_executable(info.executable_path)
|
.chrome_executable(executable)
|
||||||
// Linux containers / CI commonly lack the user namespaces
|
// Linux containers / CI commonly lack the user namespaces
|
||||||
// Chromium's sandbox wants. Disable it; the crawler runs in its
|
// Chromium's sandbox wants. Disable it; the crawler runs in its
|
||||||
// own container anyway.
|
// own container anyway.
|
||||||
@@ -225,6 +263,24 @@ fn cache_dir() -> anyhow::Result<PathBuf> {
|
|||||||
Ok(PathBuf::from("./.chromium-cache"))
|
Ok(PathBuf::from("./.chromium-cache"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Reads `CRAWLER_CHROMIUM_BINARY` and delegates to the pure helper.
|
||||||
|
/// Thin wrapper kept separate so the decision logic can be unit-tested
|
||||||
|
/// without mutating the process environment.
|
||||||
|
fn system_chromium_path_from_env() -> Option<PathBuf> {
|
||||||
|
system_chromium_path_from_value(std::env::var_os("CRAWLER_CHROMIUM_BINARY").as_deref())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `Some(path)` only when the value is set and non-empty. An
|
||||||
|
/// exported-but-blank var (common in compose `${VAR:-}` patterns when
|
||||||
|
/// the operator didn't fill it in) must behave like "unset" — otherwise
|
||||||
|
/// we'd hand chromiumoxide an empty path and fail launch in a confusing
|
||||||
|
/// way.
|
||||||
|
pub(crate) fn system_chromium_path_from_value(
|
||||||
|
raw: Option<&std::ffi::OsStr>,
|
||||||
|
) -> Option<PathBuf> {
|
||||||
|
raw.filter(|v| !v.is_empty()).map(PathBuf::from)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -252,6 +308,33 @@ mod tests {
|
|||||||
assert!(parse_args(" \t\n").is_empty());
|
assert!(parse_args(" \t\n").is_empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn system_chromium_path_returns_some_when_value_set() {
|
||||||
|
let raw = std::ffi::OsString::from("/usr/bin/chromium-headless-shell");
|
||||||
|
assert_eq!(
|
||||||
|
system_chromium_path_from_value(Some(raw.as_os_str())),
|
||||||
|
Some(PathBuf::from("/usr/bin/chromium-headless-shell"))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn system_chromium_path_returns_none_when_unset() {
|
||||||
|
assert_eq!(system_chromium_path_from_value(None), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn system_chromium_path_treats_empty_as_unset() {
|
||||||
|
// Compose's `${VAR:-}` substitution produces an exported-but-empty
|
||||||
|
// env var when the operator left it blank. Treat it as unset so
|
||||||
|
// the launcher falls back to the fetcher path instead of handing
|
||||||
|
// chromiumoxide an empty path.
|
||||||
|
let raw = std::ffi::OsString::from("");
|
||||||
|
assert_eq!(
|
||||||
|
system_chromium_path_from_value(Some(raw.as_os_str())),
|
||||||
|
None
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn default_launch_options_are_headless() {
|
fn default_launch_options_are_headless() {
|
||||||
// Headless is the production-safe default — no display required,
|
// Headless is the production-safe default — no display required,
|
||||||
@@ -261,4 +344,54 @@ mod tests {
|
|||||||
assert_eq!(LaunchOptions::headless().mode, BrowserMode::Headless);
|
assert_eq!(LaunchOptions::headless().mode, BrowserMode::Headless);
|
||||||
assert_eq!(LaunchOptions::headed().mode, BrowserMode::Headed);
|
assert_eq!(LaunchOptions::headed().mode, BrowserMode::Headed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Regression: if another Arc<Browser> outlives `Handle::close`, the
|
||||||
|
// old code awaited the driver task forever because the chromiumoxide
|
||||||
|
// handler stream doesn't return None on its own. Aborting the driver
|
||||||
|
// unblocks shutdown even when kill-on-drop can't fire yet.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn close_or_abort_returns_when_arc_is_shared() {
|
||||||
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
let arc = Arc::new(());
|
||||||
|
let _keepalive = Arc::clone(&arc); // forces try_unwrap to fail
|
||||||
|
let driver = tokio::spawn(std::future::pending::<()>());
|
||||||
|
let on_owned_ran = Arc::new(AtomicBool::new(false));
|
||||||
|
|
||||||
|
let flag = Arc::clone(&on_owned_ran);
|
||||||
|
let fut = close_or_abort(arc, driver, move |_| {
|
||||||
|
let flag = Arc::clone(&flag);
|
||||||
|
async move { flag.store(true, Ordering::Release) }
|
||||||
|
});
|
||||||
|
|
||||||
|
tokio::time::timeout(Duration::from_secs(2), fut)
|
||||||
|
.await
|
||||||
|
.expect("close_or_abort must not hang when driver is pending and Arc is shared");
|
||||||
|
assert!(
|
||||||
|
!on_owned_ran.load(Ordering::Acquire),
|
||||||
|
"on_owned must not run when the Arc is still shared"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn close_or_abort_runs_on_owned_when_arc_is_unique() {
|
||||||
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
|
||||||
|
let arc = Arc::new(());
|
||||||
|
let driver = tokio::spawn(async {}); // completes immediately
|
||||||
|
let on_owned_ran = Arc::new(AtomicBool::new(false));
|
||||||
|
|
||||||
|
let flag = Arc::clone(&on_owned_ran);
|
||||||
|
close_or_abort(arc, driver, move |_| {
|
||||||
|
let flag = Arc::clone(&flag);
|
||||||
|
async move { flag.store(true, Ordering::Release) }
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
on_owned_ran.load(Ordering::Acquire),
|
||||||
|
"on_owned must run when the Arc is unique"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -145,6 +145,28 @@ impl BrowserManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Mark the cached browser handle as unhealthy. The next `acquire`
|
||||||
|
/// will re-launch Chromium from scratch.
|
||||||
|
///
|
||||||
|
/// Same semantics as `shutdown` — the difference is intent:
|
||||||
|
/// `shutdown` runs once at daemon teardown, while `invalidate` is a
|
||||||
|
/// recovery hook callers fire after a CDP / connection / navigation
|
||||||
|
/// failure that suggests the underlying process has died. Calling
|
||||||
|
/// this while other workers still hold leases is safe — their
|
||||||
|
/// outstanding CDP operations will return channel-closed errors
|
||||||
|
/// and those workers will then re-acquire (re-launching Chromium).
|
||||||
|
///
|
||||||
|
/// Idempotent: calling on an already-invalidated manager is a
|
||||||
|
/// no-op.
|
||||||
|
pub async fn invalidate(&self) {
|
||||||
|
let mut guard = self.inner.lock().await;
|
||||||
|
guard.shared = None;
|
||||||
|
if let Some(handle) = guard.handle.take() {
|
||||||
|
let _ = handle.close().await;
|
||||||
|
tracing::warn!("BrowserManager: handle invalidated — next acquire will relaunch");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn idle_timeout(&self) -> Duration {
|
fn idle_timeout(&self) -> Duration {
|
||||||
self.idle_timeout
|
self.idle_timeout
|
||||||
}
|
}
|
||||||
@@ -231,6 +253,23 @@ mod tests {
|
|||||||
assert_send_sync(&h);
|
assert_send_sync(&h);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Invalidate is the only `BrowserManager` method that's safe to
|
||||||
|
/// exercise in a unit test without launching Chromium — it's a
|
||||||
|
/// no-op when no handle has been cached, and that path is exactly
|
||||||
|
/// the one we want to verify is idempotent.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn invalidate_is_a_noop_when_no_handle_cached() {
|
||||||
|
let mgr = BrowserManager::new(
|
||||||
|
crate::crawler::browser::LaunchOptions::default(),
|
||||||
|
Duration::ZERO,
|
||||||
|
noop_on_launch(),
|
||||||
|
);
|
||||||
|
// Two back-to-back invalidates must both complete; the second
|
||||||
|
// would hang or panic if the first had left torn state.
|
||||||
|
mgr.invalidate().await;
|
||||||
|
mgr.invalidate().await;
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn active_tracker_signals_idle_only_on_zero_transition() {
|
async fn active_tracker_signals_idle_only_on_zero_transition() {
|
||||||
let tracker = ActiveTracker::new();
|
let tracker = ActiveTracker::new();
|
||||||
|
|||||||
@@ -18,7 +18,8 @@ use uuid::Uuid;
|
|||||||
|
|
||||||
use crate::crawler::detect::PageError;
|
use crate::crawler::detect::PageError;
|
||||||
use crate::crawler::rate_limit::HostRateLimiters;
|
use crate::crawler::rate_limit::HostRateLimiters;
|
||||||
use crate::crawler::session;
|
use crate::crawler::safety::{fetch_bytes_capped, looks_like_image, DownloadAllowlist};
|
||||||
|
use crate::crawler::session::{self, ChapterProbe};
|
||||||
use crate::storage::Storage;
|
use crate::storage::Storage;
|
||||||
|
|
||||||
/// Parse the chapter page DOM and return the page images in `pageN`
|
/// Parse the chapter page DOM and return the page images in `pageN`
|
||||||
@@ -88,6 +89,8 @@ pub async fn sync_chapter_content(
|
|||||||
manga_id: Uuid,
|
manga_id: Uuid,
|
||||||
source_url: &str,
|
source_url: &str,
|
||||||
force_refetch: bool,
|
force_refetch: bool,
|
||||||
|
allowlist: &DownloadAllowlist,
|
||||||
|
max_image_bytes: usize,
|
||||||
) -> anyhow::Result<SyncOutcome> {
|
) -> anyhow::Result<SyncOutcome> {
|
||||||
// Skip if already fetched, unless caller explicitly forces.
|
// Skip if already fetched, unless caller explicitly forces.
|
||||||
if !force_refetch {
|
if !force_refetch {
|
||||||
@@ -108,18 +111,42 @@ pub async fn sync_chapter_content(
|
|||||||
.new_page(source_url)
|
.new_page(source_url)
|
||||||
.await
|
.await
|
||||||
.with_context(|| format!("open chapter page {source_url}"))?;
|
.with_context(|| format!("open chapter page {source_url}"))?;
|
||||||
page.wait_for_navigation().await.context("wait for chapter nav")?;
|
crate::crawler::nav::wait_for_nav(&page)
|
||||||
|
.await
|
||||||
// Session probe: avatar present == still logged in. Missing means
|
.context("wait for chapter nav")?;
|
||||||
// PHPSESSID expired; bail the entire crawler run.
|
// Best-effort wait for the reader marker — same partial-render
|
||||||
if page.find_element("#avatar_menu").await.is_err() {
|
// race that bit the chapter-list parser can hit here. Timeout is
|
||||||
page.close().await.ok();
|
// not an error; the chapter probe + parser sentinels still catch
|
||||||
return Ok(SyncOutcome::SessionExpired);
|
// real failures.
|
||||||
}
|
let _ = crate::crawler::nav::wait_for_selector(
|
||||||
|
&page,
|
||||||
|
"a#pic_container",
|
||||||
|
crate::crawler::nav::SELECTOR_TIMEOUT,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
let html = page.content().await.context("read chapter html")?;
|
let html = page.content().await.context("read chapter html")?;
|
||||||
page.close().await.ok();
|
page.close().await.ok();
|
||||||
|
|
||||||
|
// Three-way session classification: distinguishes a transient
|
||||||
|
// hiccup (broken-page body or logged-in-but-no-reader) from a
|
||||||
|
// genuine PHPSESSID expiry (no reader and no avatar widget). The
|
||||||
|
// earlier binary `#avatar_menu` check conflated both and froze
|
||||||
|
// every worker on a layout shift.
|
||||||
|
match session::classify_chapter_probe(&html) {
|
||||||
|
ChapterProbe::Unauthenticated => return Ok(SyncOutcome::SessionExpired),
|
||||||
|
ChapterProbe::Transient => {
|
||||||
|
// Surface as a typed Err so the dispatcher path runs
|
||||||
|
// ack_failed with exponential backoff (rather than the
|
||||||
|
// session-expired sticky flag).
|
||||||
|
anyhow::bail!(
|
||||||
|
"chapter page at {source_url} returned a transient response \
|
||||||
|
(broken-page body or reader didn't render); will retry"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
ChapterProbe::Ok => {}
|
||||||
|
}
|
||||||
|
|
||||||
let images = parse_chapter_pages(&html)
|
let images = parse_chapter_pages(&html)
|
||||||
.with_context(|| format!("parse chapter pages at {source_url}"))?;
|
.with_context(|| format!("parse chapter pages at {source_url}"))?;
|
||||||
if images.is_empty() {
|
if images.is_empty() {
|
||||||
@@ -138,18 +165,29 @@ pub async fn sync_chapter_content(
|
|||||||
format!("join image URL {} onto {source_url}", img.url)
|
format!("join image URL {} onto {source_url}", img.url)
|
||||||
})?;
|
})?;
|
||||||
rate.wait_for(url.as_str()).await?;
|
rate.wait_for(url.as_str()).await?;
|
||||||
let resp = http
|
let bytes = fetch_bytes_capped(
|
||||||
.get(url.clone())
|
http,
|
||||||
// Source CDNs commonly check Referer. Set it to the
|
url.as_str(),
|
||||||
// chapter page — matches what the browser would send.
|
Some(source_url),
|
||||||
.header(reqwest::header::REFERER, source_url)
|
allowlist,
|
||||||
.send()
|
max_image_bytes,
|
||||||
.await
|
)
|
||||||
.with_context(|| format!("GET {url}"))?
|
.await?
|
||||||
.error_for_status()
|
.to_vec();
|
||||||
.with_context(|| format!("non-2xx for {url}"))?;
|
// Reject any non-image response: the only valid output of an
|
||||||
let bytes = resp.bytes().await.context("read image body")?.to_vec();
|
// image URL is an image. `infer` returns None on truncated
|
||||||
let ext = infer::get(&bytes).map(|k| k.extension()).unwrap_or("bin");
|
// bytes too, which also wants to be a failure not a silent
|
||||||
|
// `.bin` extension.
|
||||||
|
if !looks_like_image(&bytes) {
|
||||||
|
anyhow::bail!(
|
||||||
|
"image URL {url} returned non-image bytes \
|
||||||
|
(first 16: {:?}); refusing to store as binary blob",
|
||||||
|
&bytes.get(..16.min(bytes.len()))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let ext = infer::get(&bytes)
|
||||||
|
.map(|k| k.extension())
|
||||||
|
.expect("looks_like_image asserted infer succeeded");
|
||||||
fetched.push((img.page_number, bytes, ext));
|
fetched.push((img.page_number, bytes, ext));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -194,8 +232,9 @@ pub async fn sync_chapter_content(
|
|||||||
Ok(SyncOutcome::Fetched { pages: fetched.len() })
|
Ok(SyncOutcome::Fetched { pages: fetched.len() })
|
||||||
}
|
}
|
||||||
|
|
||||||
// Suppress unused-import warning for `session` until the bin/crawler
|
// Suppress unused-import warning for `session::registrable_domain`
|
||||||
// wiring lands in this branch and uses it through this module.
|
// until the bin/crawler wiring lands in this branch and uses it
|
||||||
|
// through this module.
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
fn _keep_session_in_scope() {
|
fn _keep_session_in_scope() {
|
||||||
let _ = session::registrable_domain;
|
let _ = session::registrable_domain;
|
||||||
|
|||||||
@@ -233,24 +233,38 @@ impl CronContext {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
match self.metadata.run().await {
|
// Panic-isolate the tick body the same way `process_lease` does
|
||||||
|
// for worker dispatch. Without this, a panic in metadata.run
|
||||||
|
// (or any of the follow-on steps) would kill the cron task and
|
||||||
|
// no future tick would ever run — workers would keep going but
|
||||||
|
// no new metadata work would be scheduled until daemon restart.
|
||||||
|
// The advisory unlock below runs unconditionally so a panicked
|
||||||
|
// tick doesn't leave the lock held for another replica.
|
||||||
|
let metadata = &self.metadata;
|
||||||
|
let pool = &self.pool;
|
||||||
|
let retention_days = self.retention_days;
|
||||||
|
let body = async move {
|
||||||
|
match metadata.run().await {
|
||||||
Ok(stats) => tracing::info!(?stats, "cron: metadata pass done"),
|
Ok(stats) => tracing::info!(?stats, "cron: metadata pass done"),
|
||||||
Err(e) => tracing::error!(?e, "cron: metadata pass failed"),
|
Err(e) => tracing::error!(?e, "cron: metadata pass failed"),
|
||||||
}
|
}
|
||||||
|
match pipeline::enqueue_bookmarked_pending(pool).await {
|
||||||
match pipeline::enqueue_bookmarked_pending(&self.pool).await {
|
Ok(summary) => {
|
||||||
Ok(summary) => tracing::info!(?summary, "cron: enqueued bookmarked-pending"),
|
tracing::info!(?summary, "cron: enqueued bookmarked-pending");
|
||||||
|
}
|
||||||
Err(e) => tracing::error!(?e, "cron: enqueue_bookmarked_pending failed"),
|
Err(e) => tracing::error!(?e, "cron: enqueue_bookmarked_pending failed"),
|
||||||
}
|
}
|
||||||
|
match jobs::reap_done(pool, retention_days).await {
|
||||||
match jobs::reap_done(&self.pool, self.retention_days).await {
|
|
||||||
Ok(n) => tracing::info!(reaped = n, "cron: done-job reaper finished"),
|
Ok(n) => tracing::info!(reaped = n, "cron: done-job reaper finished"),
|
||||||
Err(e) => tracing::error!(?e, "cron: done-job reaper failed"),
|
Err(e) => tracing::error!(?e, "cron: done-job reaper failed"),
|
||||||
}
|
}
|
||||||
|
if let Err(e) = write_last_tick(pool, Utc::now()).await {
|
||||||
if let Err(e) = write_last_tick(&self.pool, Utc::now()).await {
|
|
||||||
tracing::warn!(?e, "cron: persist last_metadata_tick_at failed");
|
tracing::warn!(?e, "cron: persist last_metadata_tick_at failed");
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
if let Err(_panic) = AssertUnwindSafe(body).catch_unwind().await {
|
||||||
|
tracing::error!("cron: tick body panicked — continuing");
|
||||||
|
}
|
||||||
|
|
||||||
let _ = sqlx::query("SELECT pg_advisory_unlock($1)")
|
let _ = sqlx::query("SELECT pg_advisory_unlock($1)")
|
||||||
.bind(CRON_LOCK_KEY)
|
.bind(CRON_LOCK_KEY)
|
||||||
@@ -317,11 +331,7 @@ impl WorkerContext {
|
|||||||
// (because a force-refetch race or a job that was re-enqueued
|
// (because a force-refetch race or a job that was re-enqueued
|
||||||
// after a previous one finished), ack done without re-fetching.
|
// after a previous one finished), ack done without re-fetching.
|
||||||
if let JobPayload::SyncChapterContent { chapter_id, .. } = &lease.payload {
|
if let JobPayload::SyncChapterContent { chapter_id, .. } = &lease.payload {
|
||||||
let page_count: Option<i32> = sqlx::query_scalar(
|
let page_count = crate::repo::chapter::page_count(&self.pool, *chapter_id)
|
||||||
"SELECT page_count FROM chapters WHERE id = $1",
|
|
||||||
)
|
|
||||||
.bind(chapter_id)
|
|
||||||
.fetch_optional(&self.pool)
|
|
||||||
.await
|
.await
|
||||||
.ok()
|
.ok()
|
||||||
.flatten();
|
.flatten();
|
||||||
@@ -630,4 +640,19 @@ mod tests {
|
|||||||
let prev = previous_fire(now, at, Tz::UTC);
|
let prev = previous_fire(now, at, Tz::UTC);
|
||||||
assert_eq!(prev, dt_utc(2026, 5, 24, 23, 30));
|
assert_eq!(prev, dt_utc(2026, 5, 24, 23, 30));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Documents the panic-isolation pattern `run_tick` now relies on:
|
||||||
|
/// `AssertUnwindSafe(...).catch_unwind().await` must yield `Err(_)`
|
||||||
|
/// when the wrapped future panics, so the surrounding loop (or in
|
||||||
|
/// our case, the unconditional advisory-unlock that follows) keeps
|
||||||
|
/// running. The shape of this test mirrors the production callsite.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn assert_unwind_safe_catches_a_panicking_future() {
|
||||||
|
let result = AssertUnwindSafe(async {
|
||||||
|
panic!("boom");
|
||||||
|
})
|
||||||
|
.catch_unwind()
|
||||||
|
.await;
|
||||||
|
assert!(result.is_err(), "panicking future must yield Err");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
//! Persistent job queue and the four job kinds.
|
//! Persistent job queue and its job kinds.
|
||||||
//!
|
//!
|
||||||
//! Backed by Postgres (the `crawler_jobs` table). Workers lease rows
|
//! Backed by Postgres (the `crawler_jobs` table). Workers lease rows
|
||||||
//! with `SELECT ... FOR UPDATE SKIP LOCKED`, heartbeat via
|
//! with `SELECT ... FOR UPDATE SKIP LOCKED`, heartbeat via
|
||||||
@@ -12,16 +12,9 @@ use serde::{Deserialize, Serialize};
|
|||||||
use sqlx::PgPool;
|
use sqlx::PgPool;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use super::source::DiscoverMode;
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
#[serde(tag = "kind", rename_all = "snake_case")]
|
#[serde(tag = "kind", rename_all = "snake_case")]
|
||||||
pub enum JobPayload {
|
pub enum JobPayload {
|
||||||
/// Walk the source index and enqueue `SyncManga` jobs.
|
|
||||||
Discover {
|
|
||||||
source_id: String,
|
|
||||||
mode: DiscoverMode,
|
|
||||||
},
|
|
||||||
/// Fetch one manga's detail page, upsert metadata, enqueue
|
/// Fetch one manga's detail page, upsert metadata, enqueue
|
||||||
/// `SyncChapterList`.
|
/// `SyncChapterList`.
|
||||||
SyncManga {
|
SyncManga {
|
||||||
@@ -160,23 +153,36 @@ pub async fn lease(
|
|||||||
Ok(leases)
|
Ok(leases)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Mark a leased job as successfully completed.
|
/// Mark a leased job as successfully completed. The `state = 'running'`
|
||||||
|
/// predicate guards against a late ack from a worker whose lease expired
|
||||||
|
/// and was already re-leased by another worker: without it, the late ack
|
||||||
|
/// would clobber the new lease's `state` and `leased_until`. `rows_affected
|
||||||
|
/// == 0` means we lost the lease — surfaced as a warn rather than an
|
||||||
|
/// error because the new lease holder is doing real work; the late ack
|
||||||
|
/// just has to step aside.
|
||||||
pub async fn ack_done(pool: &PgPool, lease_id: Uuid) -> sqlx::Result<()> {
|
pub async fn ack_done(pool: &PgPool, lease_id: Uuid) -> sqlx::Result<()> {
|
||||||
sqlx::query(
|
let res = sqlx::query(
|
||||||
"UPDATE crawler_jobs \
|
"UPDATE crawler_jobs \
|
||||||
SET state = 'done', leased_until = NULL, updated_at = now() \
|
SET state = 'done', leased_until = NULL, updated_at = now() \
|
||||||
WHERE id = $1",
|
WHERE id = $1 AND state = 'running'",
|
||||||
)
|
)
|
||||||
.bind(lease_id)
|
.bind(lease_id)
|
||||||
.execute(pool)
|
.execute(pool)
|
||||||
.await?;
|
.await?;
|
||||||
|
if res.rows_affected() == 0 {
|
||||||
|
tracing::warn!(
|
||||||
|
%lease_id,
|
||||||
|
"ack_done: lease no longer running — likely re-leased by another worker; skipping update"
|
||||||
|
);
|
||||||
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Mark a leased job as failed. If the current attempt count has reached
|
/// Mark a leased job as failed. If the current attempt count has reached
|
||||||
/// `max_attempts` the job is terminally dead and stops retrying;
|
/// `max_attempts` the job is terminally dead and stops retrying;
|
||||||
/// otherwise it goes back to `pending` with `scheduled_at` pushed into
|
/// otherwise it goes back to `pending` with `scheduled_at` pushed into
|
||||||
/// the future by the exponential backoff.
|
/// the future by the exponential backoff. See [`ack_done`] for the
|
||||||
|
/// `state = 'running'` guard rationale.
|
||||||
pub async fn ack_failed(
|
pub async fn ack_failed(
|
||||||
pool: &PgPool,
|
pool: &PgPool,
|
||||||
lease_id: Uuid,
|
lease_id: Uuid,
|
||||||
@@ -184,16 +190,16 @@ pub async fn ack_failed(
|
|||||||
attempts: i32,
|
attempts: i32,
|
||||||
max_attempts: i32,
|
max_attempts: i32,
|
||||||
) -> sqlx::Result<()> {
|
) -> sqlx::Result<()> {
|
||||||
if attempts >= max_attempts {
|
let res = if attempts >= max_attempts {
|
||||||
sqlx::query(
|
sqlx::query(
|
||||||
"UPDATE crawler_jobs \
|
"UPDATE crawler_jobs \
|
||||||
SET state = 'dead', last_error = $2, leased_until = NULL, updated_at = now() \
|
SET state = 'dead', last_error = $2, leased_until = NULL, updated_at = now() \
|
||||||
WHERE id = $1",
|
WHERE id = $1 AND state = 'running'",
|
||||||
)
|
)
|
||||||
.bind(lease_id)
|
.bind(lease_id)
|
||||||
.bind(error)
|
.bind(error)
|
||||||
.execute(pool)
|
.execute(pool)
|
||||||
.await?;
|
.await?
|
||||||
} else {
|
} else {
|
||||||
let backoff_ms: i64 = backoff_for(attempts).as_millis().min(i64::MAX as u128) as i64;
|
let backoff_ms: i64 = backoff_for(attempts).as_millis().min(i64::MAX as u128) as i64;
|
||||||
sqlx::query(
|
sqlx::query(
|
||||||
@@ -201,30 +207,45 @@ pub async fn ack_failed(
|
|||||||
SET state = 'pending', last_error = $2, leased_until = NULL, \
|
SET state = 'pending', last_error = $2, leased_until = NULL, \
|
||||||
scheduled_at = now() + ($3::bigint || ' milliseconds')::interval, \
|
scheduled_at = now() + ($3::bigint || ' milliseconds')::interval, \
|
||||||
updated_at = now() \
|
updated_at = now() \
|
||||||
WHERE id = $1",
|
WHERE id = $1 AND state = 'running'",
|
||||||
)
|
)
|
||||||
.bind(lease_id)
|
.bind(lease_id)
|
||||||
.bind(error)
|
.bind(error)
|
||||||
.bind(backoff_ms)
|
.bind(backoff_ms)
|
||||||
.execute(pool)
|
.execute(pool)
|
||||||
.await?;
|
.await?
|
||||||
|
};
|
||||||
|
if res.rows_affected() == 0 {
|
||||||
|
tracing::warn!(
|
||||||
|
%lease_id,
|
||||||
|
"ack_failed: lease no longer running — likely re-leased by another worker; skipping update"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return a leased job to `pending` without burning a retry attempt.
|
/// Return a leased job to `pending` without burning a retry attempt.
|
||||||
/// Used on graceful shutdown and on session-expired aborts where the
|
/// Used on graceful shutdown and on session-expired aborts where the
|
||||||
/// failure isn't the job's fault.
|
/// failure isn't the job's fault. See [`ack_done`] for the
|
||||||
|
/// `state = 'running'` guard rationale — important here because
|
||||||
|
/// `attempts - 1` would otherwise spuriously decrement the new lease's
|
||||||
|
/// attempt count.
|
||||||
pub async fn release(pool: &PgPool, lease_id: Uuid) -> sqlx::Result<()> {
|
pub async fn release(pool: &PgPool, lease_id: Uuid) -> sqlx::Result<()> {
|
||||||
sqlx::query(
|
let res = sqlx::query(
|
||||||
"UPDATE crawler_jobs \
|
"UPDATE crawler_jobs \
|
||||||
SET state = 'pending', leased_until = NULL, \
|
SET state = 'pending', leased_until = NULL, \
|
||||||
attempts = GREATEST(0, attempts - 1), updated_at = now() \
|
attempts = GREATEST(0, attempts - 1), updated_at = now() \
|
||||||
WHERE id = $1",
|
WHERE id = $1 AND state = 'running'",
|
||||||
)
|
)
|
||||||
.bind(lease_id)
|
.bind(lease_id)
|
||||||
.execute(pool)
|
.execute(pool)
|
||||||
.await?;
|
.await?;
|
||||||
|
if res.rows_affected() == 0 {
|
||||||
|
tracing::warn!(
|
||||||
|
%lease_id,
|
||||||
|
"release: lease no longer running — likely re-leased by another worker; skipping update"
|
||||||
|
);
|
||||||
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -20,7 +20,10 @@ pub mod daemon;
|
|||||||
pub mod detect;
|
pub mod detect;
|
||||||
pub mod diff;
|
pub mod diff;
|
||||||
pub mod jobs;
|
pub mod jobs;
|
||||||
|
pub mod nav;
|
||||||
pub mod pipeline;
|
pub mod pipeline;
|
||||||
pub mod rate_limit;
|
pub mod rate_limit;
|
||||||
|
pub mod safety;
|
||||||
pub mod session;
|
pub mod session;
|
||||||
pub mod source;
|
pub mod source;
|
||||||
|
pub mod url_utils;
|
||||||
|
|||||||
241
backend/src/crawler/nav.rs
Normal file
241
backend/src/crawler/nav.rs
Normal file
@@ -0,0 +1,241 @@
|
|||||||
|
//! Page navigation helpers — wrap `chromiumoxide` `wait_for_navigation`
|
||||||
|
//! with a timeout so a hung TLS handshake or a page that never fires
|
||||||
|
//! `load` cannot wedge a worker (or the cron metadata pass) forever.
|
||||||
|
//!
|
||||||
|
//! [`NAV_TIMEOUT`] is the global budget. Callers in the crawler use
|
||||||
|
//! [`wait_for_nav`] to get back a typed error so transient timeouts can
|
||||||
|
//! be reported separately from underlying CDP errors.
|
||||||
|
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use chromiumoxide::error::CdpError;
|
||||||
|
use chromiumoxide::Page;
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
/// Maximum wall-clock time we'll wait for a single page navigation. A
|
||||||
|
/// healthy Chromium reaches `load` in well under a second on the target
|
||||||
|
/// site; a 30-second cap is generous enough for slow TLS handshakes on
|
||||||
|
/// the first request after a fresh process while still catching real
|
||||||
|
/// hangs before they wedge the daemon.
|
||||||
|
pub const NAV_TIMEOUT: Duration = Duration::from_secs(30);
|
||||||
|
|
||||||
|
/// Outcome of a timed-out navigation. `Timeout` is the transient signal
|
||||||
|
/// callers translate into a retry-friendly error
|
||||||
|
/// ([`crate::crawler::detect::PageError::Transient`] in the source path,
|
||||||
|
/// a context'd anyhow elsewhere). `Cdp` carries the underlying
|
||||||
|
/// chromiumoxide error unchanged.
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum NavError {
|
||||||
|
#[error("navigation timed out after {0:?}")]
|
||||||
|
Timeout(Duration),
|
||||||
|
#[error(transparent)]
|
||||||
|
Cdp(#[from] CdpError),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Wait for the page's next navigation to complete, capped at
|
||||||
|
/// [`NAV_TIMEOUT`]. Replaces bare `page.wait_for_navigation().await`
|
||||||
|
/// throughout the crawler.
|
||||||
|
pub async fn wait_for_nav(page: &Page) -> Result<(), NavError> {
|
||||||
|
match tokio::time::timeout(NAV_TIMEOUT, page.wait_for_navigation()).await {
|
||||||
|
Err(_elapsed) => Err(NavError::Timeout(NAV_TIMEOUT)),
|
||||||
|
Ok(Err(e)) => Err(NavError::Cdp(e)),
|
||||||
|
Ok(Ok(_)) => Ok(()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Poll interval for [`wait_for_selector`]. 100ms is fast enough that a
|
||||||
|
/// page rendering in 200ms isn't held back noticeably, and slow enough
|
||||||
|
/// not to spam CDP with `find_element` calls on a page that's actually
|
||||||
|
/// taking its time.
|
||||||
|
const SELECTOR_POLL_INTERVAL: Duration = Duration::from_millis(100);
|
||||||
|
|
||||||
|
/// Wait until `selector` matches at least one element on `page`, or
|
||||||
|
/// `timeout` elapses. Used after a navigation to confirm a page-type-
|
||||||
|
/// specific marker is in the DOM before parsing — replaces the fixed
|
||||||
|
/// post-nav sleep that previously masked partial-render races.
|
||||||
|
///
|
||||||
|
/// chromiumoxide 0.7.0 has no built-in `wait_for_selector`, so we poll
|
||||||
|
/// `find_element` at [`SELECTOR_POLL_INTERVAL`] until success or budget
|
||||||
|
/// exhaustion. A failed `find_element` is *not* an error here — it just
|
||||||
|
/// means "not yet" — we only surface an error once the overall
|
||||||
|
/// `timeout` is up.
|
||||||
|
pub async fn wait_for_selector(
|
||||||
|
page: &Page,
|
||||||
|
selector: &str,
|
||||||
|
timeout: Duration,
|
||||||
|
) -> Result<(), NavError> {
|
||||||
|
let deadline = tokio::time::Instant::now() + timeout;
|
||||||
|
loop {
|
||||||
|
if page.find_element(selector).await.is_ok() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
if tokio::time::Instant::now() >= deadline {
|
||||||
|
return Err(NavError::Timeout(timeout));
|
||||||
|
}
|
||||||
|
let remaining = deadline.saturating_duration_since(tokio::time::Instant::now());
|
||||||
|
let sleep_for = SELECTOR_POLL_INTERVAL.min(remaining);
|
||||||
|
tokio::time::sleep(sleep_for).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Per-page-type budget for [`wait_for_selector`]. Shorter than
|
||||||
|
/// [`NAV_TIMEOUT`] because by the time we're waiting on a selector, the
|
||||||
|
/// page has already responded — we're only absorbing post-load JS
|
||||||
|
/// finishing its row injection, which on a healthy site takes well
|
||||||
|
/// under a second.
|
||||||
|
pub const SELECTOR_TIMEOUT: Duration = Duration::from_secs(10);
|
||||||
|
|
||||||
|
impl NavError {
|
||||||
|
/// Does this navigation error indicate the underlying Chromium
|
||||||
|
/// process has died or its CDP connection has dropped? Used by the
|
||||||
|
/// dispatcher to decide whether to invalidate the
|
||||||
|
/// [`crate::crawler::browser_manager::BrowserManager`] handle so
|
||||||
|
/// the next acquire re-launches.
|
||||||
|
///
|
||||||
|
/// Both variants count: a `Timeout` past [`NAV_TIMEOUT`] is in
|
||||||
|
/// practice always either a hung CDP transport or a wedged page
|
||||||
|
/// the browser can't recover from on its own, and a `Cdp` error
|
||||||
|
/// surfacing at the navigation layer means the chromium-facing
|
||||||
|
/// channel is the failing layer.
|
||||||
|
pub fn is_likely_browser_dead(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
Self::Timeout(_) => true,
|
||||||
|
Self::Cdp(_) => true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Walk an `anyhow::Error` chain looking for typed evidence that the
|
||||||
|
/// chromium-facing layer is the failing one. Two markers count:
|
||||||
|
///
|
||||||
|
/// 1. A wrapped [`NavError`] flagged by [`NavError::is_likely_browser_dead`].
|
||||||
|
/// 2. A wrapped [`CdpError`] (via `anyhow::Error::from(CdpError)` at a
|
||||||
|
/// `Browser::new_page` call site, or any other direct CDP boundary).
|
||||||
|
///
|
||||||
|
/// Earlier versions also substring-matched the chain for "connection",
|
||||||
|
/// "closed", "channel", etc. as a fallback. That was too broad —
|
||||||
|
/// reqwest TCP-reset errors during CDN image downloads, sqlx
|
||||||
|
/// connection-pool errors, and similar non-browser failures contain
|
||||||
|
/// those words and triggered spurious chromium relaunches. The typed
|
||||||
|
/// downcasts cover every place we hand a chromium error to anyhow,
|
||||||
|
/// so the fallback is unnecessary.
|
||||||
|
pub fn anyhow_looks_browser_dead(err: &anyhow::Error) -> bool {
|
||||||
|
for cause in err.chain() {
|
||||||
|
if let Some(nav) = cause.downcast_ref::<NavError>() {
|
||||||
|
if nav.is_likely_browser_dead() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if cause.downcast_ref::<CdpError>().is_some() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use std::future::pending;
|
||||||
|
|
||||||
|
/// Sanity-check the timeout pattern used by [`wait_for_nav`]: a
|
||||||
|
/// future that never resolves must yield `Elapsed` within the
|
||||||
|
/// configured budget. We can't easily stand up a real `Page` in a
|
||||||
|
/// unit test, so we assert the underlying primitive behaves the way
|
||||||
|
/// the helper depends on.
|
||||||
|
#[tokio::test(flavor = "current_thread", start_paused = true)]
|
||||||
|
async fn timeout_elapses_on_a_future_that_never_resolves() {
|
||||||
|
let result =
|
||||||
|
tokio::time::timeout(Duration::from_millis(50), pending::<()>()).await;
|
||||||
|
assert!(result.is_err(), "expected Elapsed on a hung future");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn nav_error_timeout_message_includes_duration() {
|
||||||
|
let e = NavError::Timeout(Duration::from_secs(30));
|
||||||
|
assert_eq!(e.to_string(), "navigation timed out after 30s");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn timeout_is_treated_as_likely_browser_dead() {
|
||||||
|
let e = NavError::Timeout(NAV_TIMEOUT);
|
||||||
|
assert!(e.is_likely_browser_dead());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn anyhow_with_nav_timeout_in_chain_is_flagged() {
|
||||||
|
let inner: Result<(), NavError> = Err(NavError::Timeout(NAV_TIMEOUT));
|
||||||
|
let outer = inner.unwrap_err();
|
||||||
|
let wrapped: anyhow::Error =
|
||||||
|
anyhow::Error::new(outer).context("wait for chapter nav");
|
||||||
|
assert!(anyhow_looks_browser_dead(&wrapped));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn anyhow_with_cdp_error_in_chain_is_flagged() {
|
||||||
|
// `Browser::new_page` errors get wrapped via
|
||||||
|
// `anyhow::Error::from(CdpError)` at the navigate / dispatch
|
||||||
|
// call sites. Walking the chain and downcasting to CdpError is
|
||||||
|
// what catches that path. Any CdpError variant counts; the
|
||||||
|
// Serde variant is the easiest to construct in a unit test.
|
||||||
|
let serde_err: serde_json::Error =
|
||||||
|
serde_json::from_str::<i32>("not a number").unwrap_err();
|
||||||
|
let cdp = CdpError::Serde(serde_err);
|
||||||
|
let wrapped: anyhow::Error =
|
||||||
|
anyhow::Error::from(cdp).context("open chapter page");
|
||||||
|
assert!(anyhow_looks_browser_dead(&wrapped));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn anyhow_with_innocuous_parse_error_is_not_flagged() {
|
||||||
|
let e: anyhow::Error =
|
||||||
|
anyhow::anyhow!("parse manga detail: chapter row regex did not match");
|
||||||
|
assert!(!anyhow_looks_browser_dead(&e));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn anyhow_with_reqwest_style_connection_message_is_not_flagged() {
|
||||||
|
// Regression: the earlier substring fallback flagged any error
|
||||||
|
// whose message contained "connection" or "closed" as browser-
|
||||||
|
// dead. A TCP reset from a CDN during image download, or a
|
||||||
|
// sqlx pool-connection error, would burn a chromium relaunch
|
||||||
|
// even though the browser is fine. Typed downcasts only —
|
||||||
|
// these untyped strings must pass through.
|
||||||
|
for msg in [
|
||||||
|
"error sending request: connection reset by peer",
|
||||||
|
"PoolTimedOut: timed out waiting for a connection",
|
||||||
|
"request to https://cdn/x.jpg: connection closed before message completed",
|
||||||
|
"transport error during image fetch",
|
||||||
|
] {
|
||||||
|
let e: anyhow::Error = anyhow::anyhow!("{msg}");
|
||||||
|
assert!(
|
||||||
|
!anyhow_looks_browser_dead(&e),
|
||||||
|
"must not flag non-browser error: {msg}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Same sanity check as [`timeout_elapses_on_a_future_that_never_resolves`],
|
||||||
|
/// but for the [`wait_for_selector`] polling pattern: the loop must
|
||||||
|
/// surrender on `Elapsed` rather than spinning past the deadline.
|
||||||
|
#[tokio::test(flavor = "current_thread", start_paused = true)]
|
||||||
|
async fn selector_polling_pattern_surrenders_at_deadline() {
|
||||||
|
let timeout = Duration::from_millis(300);
|
||||||
|
let start = tokio::time::Instant::now();
|
||||||
|
let deadline = start + timeout;
|
||||||
|
// Simulate find_element forever returning "not found".
|
||||||
|
let mut polls = 0u32;
|
||||||
|
let result: Result<(), NavError> = loop {
|
||||||
|
polls += 1;
|
||||||
|
if tokio::time::Instant::now() >= deadline {
|
||||||
|
break Err(NavError::Timeout(timeout));
|
||||||
|
}
|
||||||
|
tokio::time::sleep(SELECTOR_POLL_INTERVAL).await;
|
||||||
|
};
|
||||||
|
assert!(matches!(result, Err(NavError::Timeout(_))));
|
||||||
|
// 300ms / 100ms poll interval ≈ 3 iterations plus the final check
|
||||||
|
// that breaks out. Allow some slack since the first poll happens
|
||||||
|
// before any sleep.
|
||||||
|
assert!(polls >= 3, "expected at least 3 poll iterations, got {polls}");
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,6 +2,8 @@
|
|||||||
//! that fan out chapter-content work. Shared between the daemon (cron tick)
|
//! that fan out chapter-content work. Shared between the daemon (cron tick)
|
||||||
//! and the CLI (`bin/crawler.rs`) so behavior stays in lockstep.
|
//! and the CLI (`bin/crawler.rs`) so behavior stays in lockstep.
|
||||||
|
|
||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use sqlx::PgPool;
|
use sqlx::PgPool;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
@@ -9,9 +11,11 @@ use uuid::Uuid;
|
|||||||
use crate::crawler::browser_manager::BrowserManager;
|
use crate::crawler::browser_manager::BrowserManager;
|
||||||
use crate::crawler::jobs::{self, EnqueueResult, JobPayload};
|
use crate::crawler::jobs::{self, EnqueueResult, JobPayload};
|
||||||
use crate::crawler::rate_limit::HostRateLimiters;
|
use crate::crawler::rate_limit::HostRateLimiters;
|
||||||
|
use crate::crawler::safety::{fetch_bytes_capped, looks_like_image, DownloadAllowlist};
|
||||||
use crate::crawler::source::target::TargetSource;
|
use crate::crawler::source::target::TargetSource;
|
||||||
use crate::crawler::source::{DiscoverMode, FetchContext, Source};
|
use crate::crawler::source::{FetchContext, Source};
|
||||||
use crate::repo;
|
use crate::repo;
|
||||||
|
use crate::repo::crawler::UpsertStatus;
|
||||||
use crate::storage::Storage;
|
use crate::storage::Storage;
|
||||||
|
|
||||||
/// Coarse counters surfaced for logging at the end of a metadata pass.
|
/// Coarse counters surfaced for logging at the end of a metadata pass.
|
||||||
@@ -23,16 +27,42 @@ pub struct MetadataStats {
|
|||||||
pub mangas_failed: usize,
|
pub mangas_failed: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Decide whether the per-ref loop should stop based on the Incremental
|
/// Decide whether the per-ref loop should stop on the manga just
|
||||||
/// streak counter. Pulled out as a pure function so the rule is unit-
|
/// processed. The walk halts only when (a) the previous run exited
|
||||||
/// testable without standing up the walker or DB.
|
/// cleanly — so the index tail is known to be caught up and we're not
|
||||||
pub(crate) fn should_stop(mode: DiscoverMode, consecutive_unchanged: usize) -> bool {
|
/// in a recovery sweep — AND (b) this manga's metadata hash matched
|
||||||
match mode {
|
/// storage (`Unchanged`) AND (c) the chapter sync confirmed zero new
|
||||||
DiscoverMode::Backfill => false,
|
/// chapters. A `None` chapter count (skip_chapters, or a chapter-sync
|
||||||
DiscoverMode::Incremental { stop_after_unchanged } => {
|
/// error we logged-and-swallowed) refuses the stop because we can't
|
||||||
consecutive_unchanged >= stop_after_unchanged
|
/// verify the tail is unchanged from a single piece of evidence.
|
||||||
}
|
///
|
||||||
|
/// Pure function so the rule is unit-testable without the walker, DB,
|
||||||
|
/// or browser.
|
||||||
|
pub(crate) fn should_stop(
|
||||||
|
was_clean: bool,
|
||||||
|
status: UpsertStatus,
|
||||||
|
chapters_new: Option<usize>,
|
||||||
|
) -> bool {
|
||||||
|
was_clean
|
||||||
|
&& matches!(status, UpsertStatus::Unchanged)
|
||||||
|
&& chapters_new == Some(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether the just-finished walk should be recorded as a clean exit.
|
||||||
|
/// `true` writes the recovery flag back to `completed: true`; `false`
|
||||||
|
/// leaves it `false` so the next tick treats this run as crashed and
|
||||||
|
/// does a recovery sweep.
|
||||||
|
///
|
||||||
|
/// `hit_limit` (the caller-imposed `CRAWLER_LIMIT` cap) is *not* an
|
||||||
|
/// argument: a limit cap by definition does not reach the catalog tail,
|
||||||
|
/// so it can never count as a clean exit. Encoding that in the type
|
||||||
|
/// (rather than as an `&& !hit_limit` clause inline) prevents a future
|
||||||
|
/// edit from accidentally adding it back to the truth table.
|
||||||
|
pub(crate) fn should_mark_clean_exit(
|
||||||
|
walked_to_completion: bool,
|
||||||
|
hit_stop_condition: bool,
|
||||||
|
) -> bool {
|
||||||
|
walked_to_completion || hit_stop_condition
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Runs the discover → fetch → upsert → cover → chapter-list-diff pipeline
|
/// Runs the discover → fetch → upsert → cover → chapter-list-diff pipeline
|
||||||
@@ -42,15 +72,25 @@ pub(crate) fn should_stop(mode: DiscoverMode, consecutive_unchanged: usize) -> b
|
|||||||
/// `limit == 0` means no cap (full sweep up to the source's own bound).
|
/// `limit == 0` means no cap (full sweep up to the source's own bound).
|
||||||
/// `skip_chapters == true` is the "metadata-only" mode (parser doesn't
|
/// `skip_chapters == true` is the "metadata-only" mode (parser doesn't
|
||||||
/// extract chapters, and `sync_manga_chapters` is skipped — otherwise an
|
/// extract chapters, and `sync_manga_chapters` is skipped — otherwise an
|
||||||
/// empty chapter list would soft-drop existing rows).
|
/// empty chapter list would soft-drop existing rows). In this mode the
|
||||||
|
/// stop condition never fires because chapter freshness can't be
|
||||||
|
/// confirmed, so the walk always runs to end-of-source.
|
||||||
///
|
///
|
||||||
/// `mode` controls the walk:
|
/// The walk is always newest-first. Steady-state runs stop on the first
|
||||||
/// - `Backfill` — oldest-first, no early exit. The only mode that runs
|
/// manga where metadata is `Unchanged` AND chapter sync reports zero
|
||||||
/// the end-of-walk drop pass + writes `seed_completed_at`.
|
/// new chapters — the source orders by `update_date DESC`, so anything
|
||||||
/// - `Incremental { stop_after_unchanged }` — newest-first, breaks out
|
/// with a fresh chapter or fresh metadata is bumped to the top and will
|
||||||
/// after N consecutive Unchanged upserts. Drop pass is skipped (the
|
/// be processed before we hit a fully-caught-up manga.
|
||||||
/// tail of the index is never visited, so its `last_seen_at` is
|
///
|
||||||
/// stale and using it to soft-drop would be unsafe).
|
/// A per-source recovery flag stored in `crawler_state`
|
||||||
|
/// (`last_run_completed:<source_id>`) gates the early stop: it's set to
|
||||||
|
/// `false` right after `ensure_source` and back to `true` only when the
|
||||||
|
/// run exits via end-of-walk OR the intentional stop. A crash, panic,
|
||||||
|
/// or SIGKILL leaves the flag at `false`, so the next tick reads it,
|
||||||
|
/// recognizes the previous run did not exit cleanly, and walks the
|
||||||
|
/// full catalog (ignoring the stop condition) to re-cover anything the
|
||||||
|
/// crashed run missed past its crash point. Once that recovery sweep
|
||||||
|
/// reaches end-of-walk, steady-state resumes.
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub async fn run_metadata_pass(
|
pub async fn run_metadata_pass(
|
||||||
browser_manager: &BrowserManager,
|
browser_manager: &BrowserManager,
|
||||||
@@ -61,7 +101,8 @@ pub async fn run_metadata_pass(
|
|||||||
start_url: &str,
|
start_url: &str,
|
||||||
limit: usize,
|
limit: usize,
|
||||||
skip_chapters: bool,
|
skip_chapters: bool,
|
||||||
mode: DiscoverMode,
|
allowlist: &DownloadAllowlist,
|
||||||
|
max_image_bytes: usize,
|
||||||
) -> anyhow::Result<MetadataStats> {
|
) -> anyhow::Result<MetadataStats> {
|
||||||
let lease = browser_manager
|
let lease = browser_manager
|
||||||
.acquire()
|
.acquire()
|
||||||
@@ -92,20 +133,36 @@ pub async fn run_metadata_pass(
|
|||||||
.await
|
.await
|
||||||
.context("ensure_source")?;
|
.context("ensure_source")?;
|
||||||
|
|
||||||
let run_started_at = chrono::Utc::now();
|
// Read BEFORE flipping to "in-flight" — a `false` here means the
|
||||||
|
// previous run didn't reach a clean exit, and this run must walk
|
||||||
|
// the full catalog (recovery sweep) instead of bailing on the
|
||||||
|
// first caught-up manga.
|
||||||
|
let was_clean = repo::crawler::last_run_completed_cleanly(db, source_id)
|
||||||
|
.await
|
||||||
|
.context("read last_run_completed_cleanly")?;
|
||||||
|
repo::crawler::mark_run_started(db, source_id)
|
||||||
|
.await
|
||||||
|
.context("mark_run_started")?;
|
||||||
|
|
||||||
let max_refs = (limit > 0).then_some(limit);
|
let max_refs = (limit > 0).then_some(limit);
|
||||||
|
|
||||||
tracing::info!(?mode, ?max_refs, "starting metadata pass");
|
tracing::info!(was_clean, ?max_refs, "starting metadata pass");
|
||||||
let mut walker = source
|
let mut walker = source
|
||||||
.discover(&ctx, mode)
|
.discover(&ctx)
|
||||||
.await
|
.await
|
||||||
.context("discover failed")?;
|
.context("discover failed")?;
|
||||||
|
|
||||||
let mut stats = MetadataStats::default();
|
let mut stats = MetadataStats::default();
|
||||||
let mut consecutive_unchanged: usize = 0;
|
// Run-scoped dedup of `source_manga_key`s already processed this pass.
|
||||||
|
// A shift in the source index causes the slot-last item of the page
|
||||||
|
// we just read to reappear at slot 0 of the next page; skipping it
|
||||||
|
// here prevents redundant fetch_manga + upsert and avoids spuriously
|
||||||
|
// tripping the stop condition with a re-confirm of an entry we
|
||||||
|
// already counted.
|
||||||
|
let mut seen: HashSet<String> = HashSet::new();
|
||||||
let mut walked_to_completion = false;
|
let mut walked_to_completion = false;
|
||||||
let mut hit_limit = false;
|
let mut hit_limit = false;
|
||||||
let mut hit_incremental_stop = false;
|
let mut hit_stop_condition = false;
|
||||||
|
|
||||||
'outer: loop {
|
'outer: loop {
|
||||||
let batch = match walker.next_batch(&ctx).await? {
|
let batch = match walker.next_batch(&ctx).await? {
|
||||||
@@ -121,6 +178,23 @@ pub async fn run_metadata_pass(
|
|||||||
tracing::info!(cap = ?max_refs, "max_results reached; halting walk");
|
tracing::info!(cap = ?max_refs, "max_results reached; halting walk");
|
||||||
break 'outer;
|
break 'outer;
|
||||||
}
|
}
|
||||||
|
// Skip refs we've already *successfully* processed this pass.
|
||||||
|
// Checking `contains` here (rather than `insert`) keeps the key
|
||||||
|
// out of `seen` on failure paths below, so a transient fetch or
|
||||||
|
// upsert error gets a second chance if the ref reappears in
|
||||||
|
// another batch. Done *before* counting toward
|
||||||
|
// `stats.discovered` (the skipped ref did no work) and *before*
|
||||||
|
// touching the stop check (a `continue` here doesn't let a
|
||||||
|
// re-confirm trip the stop condition). The matching
|
||||||
|
// `seen.insert(...)` lives just after the successful upsert
|
||||||
|
// below.
|
||||||
|
if seen.contains(&r.source_manga_key) {
|
||||||
|
tracing::debug!(
|
||||||
|
key = %r.source_manga_key,
|
||||||
|
"skip already-seen key in this run"
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
stats.discovered += 1;
|
stats.discovered += 1;
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
idx = stats.discovered,
|
idx = stats.discovered,
|
||||||
@@ -141,6 +215,48 @@ pub async fn run_metadata_pass(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Partial-render guard: an empty chapter list paired with a
|
||||||
|
// prior count > 0 is overwhelmingly a chromium snapshot
|
||||||
|
// taken between the #chapter_table wrapper render and its
|
||||||
|
// rows render. The wait_for_selector wait in `navigate`
|
||||||
|
// narrows this window but cannot close it for slow renders
|
||||||
|
// beyond the selector budget. Treat as a transient failure
|
||||||
|
// here — skip upsert, skip seen.insert — so the next batch
|
||||||
|
// (or the next tick) retries. Skipped in `skip_chapters`
|
||||||
|
// mode because the parser is configured to return an empty
|
||||||
|
// Vec by design there.
|
||||||
|
if !skip_chapters && manga.chapters.is_empty() {
|
||||||
|
match repo::crawler::live_chapter_count_for_source_manga(
|
||||||
|
db, source_id, &r.source_manga_key,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(prior) if prior > 0 => {
|
||||||
|
tracing::warn!(
|
||||||
|
key = %r.source_manga_key,
|
||||||
|
url = %r.url,
|
||||||
|
prior_chapter_count = prior,
|
||||||
|
"fetch_manga returned empty chapters but prior count > 0; treating as partial-render transient and skipping"
|
||||||
|
);
|
||||||
|
stats.mangas_failed += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Ok(_) => {}
|
||||||
|
Err(e) => {
|
||||||
|
// DB lookup failed — fail safe: skip rather
|
||||||
|
// than risk a soft-drop on a manga whose prior
|
||||||
|
// count we couldn't confirm.
|
||||||
|
tracing::warn!(
|
||||||
|
key = %r.source_manga_key,
|
||||||
|
error = ?e,
|
||||||
|
"live_chapter_count_for_source_manga failed; skipping cautiously"
|
||||||
|
);
|
||||||
|
stats.mangas_failed += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let upsert = match repo::crawler::upsert_manga_from_source(
|
let upsert = match repo::crawler::upsert_manga_from_source(
|
||||||
db, source_id, &r.url, &manga,
|
db, source_id, &r.url, &manga,
|
||||||
)
|
)
|
||||||
@@ -158,6 +274,10 @@ pub async fn run_metadata_pass(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
stats.upserted += 1;
|
stats.upserted += 1;
|
||||||
|
// Record success in the dedup set. Cover and chapter-sync
|
||||||
|
// failures below are non-fatal and don't roll this back —
|
||||||
|
// metadata is the durable source of truth for the dedup.
|
||||||
|
seen.insert(r.source_manga_key.clone());
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
key = %manga.source_manga_key,
|
key = %manga.source_manga_key,
|
||||||
manga_id = %upsert.manga_id,
|
manga_id = %upsert.manga_id,
|
||||||
@@ -181,6 +301,8 @@ pub async fn run_metadata_pass(
|
|||||||
&r.url,
|
&r.url,
|
||||||
upsert.manga_id,
|
upsert.manga_id,
|
||||||
cover_url,
|
cover_url,
|
||||||
|
allowlist,
|
||||||
|
max_image_bytes,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
@@ -194,7 +316,13 @@ pub async fn run_metadata_pass(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !skip_chapters {
|
// Chapter sync. `chapters_new` feeds the stop check below:
|
||||||
|
// `None` (skip_chapters mode, or a logged-and-swallowed sync
|
||||||
|
// error) refuses to stop on this manga because we can't
|
||||||
|
// confirm "no new chapters."
|
||||||
|
let chapters_new: Option<usize> = if skip_chapters {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
match repo::crawler::sync_manga_chapters(
|
match repo::crawler::sync_manga_chapters(
|
||||||
db,
|
db,
|
||||||
source_id,
|
source_id,
|
||||||
@@ -203,79 +331,64 @@ pub async fn run_metadata_pass(
|
|||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok(diff) => tracing::info!(
|
Ok(diff) => {
|
||||||
|
tracing::info!(
|
||||||
manga_id = %upsert.manga_id,
|
manga_id = %upsert.manga_id,
|
||||||
new = diff.new,
|
new = diff.new,
|
||||||
refreshed = diff.refreshed,
|
refreshed = diff.refreshed,
|
||||||
dropped = diff.dropped,
|
dropped = diff.dropped,
|
||||||
"chapters synced"
|
"chapters synced"
|
||||||
),
|
);
|
||||||
Err(e) => tracing::warn!(
|
Some(diff.new)
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!(
|
||||||
manga_id = %upsert.manga_id,
|
manga_id = %upsert.manga_id,
|
||||||
error = ?e,
|
error = ?e,
|
||||||
"chapter sync failed"
|
"chapter sync failed"
|
||||||
),
|
);
|
||||||
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Incremental stop: count consecutive Unchanged upserts and
|
if should_stop(was_clean, upsert.status, chapters_new) {
|
||||||
// bail once the threshold is reached. New/Updated resets the
|
hit_stop_condition = true;
|
||||||
// streak so a fresh entry mid-batch doesn't accidentally trip
|
|
||||||
// the cutoff.
|
|
||||||
match upsert.status {
|
|
||||||
repo::crawler::UpsertStatus::Unchanged => {
|
|
||||||
consecutive_unchanged += 1;
|
|
||||||
}
|
|
||||||
repo::crawler::UpsertStatus::New | repo::crawler::UpsertStatus::Updated => {
|
|
||||||
consecutive_unchanged = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if should_stop(mode, consecutive_unchanged) {
|
|
||||||
hit_incremental_stop = true;
|
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
consecutive_unchanged,
|
key = %manga.source_manga_key,
|
||||||
"incremental stop threshold reached; halting walk"
|
"stop condition met (Unchanged metadata + 0 new chapters); halting walk"
|
||||||
);
|
);
|
||||||
break 'outer;
|
break 'outer;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Drop pass: only when the walk truly covered everything the source
|
// Recovery-flag write. Only on a clean exit (end-of-walk OR the
|
||||||
// surfaces. `last_seen_at` on un-visited rows is stale, so running
|
// intentional stop). `hit_limit` is a caller-imposed early break
|
||||||
// the drop on a partial walk would soft-drop the tail of the index.
|
// and does NOT count — the catalog tail wasn't reached, so a future
|
||||||
let full_walk = walked_to_completion && !hit_limit && !hit_incremental_stop;
|
// tick still needs to walk past where we stopped. The truth table is
|
||||||
let backfill_complete = full_walk && matches!(mode, DiscoverMode::Backfill);
|
// pinned by `should_mark_clean_exit` so a future edit that adds
|
||||||
if full_walk {
|
// `hit_limit` back into the disjunction trips its unit test. Flag-
|
||||||
match repo::crawler::mark_dropped_mangas(db, source_id, run_started_at).await {
|
// write errors are warned and swallowed: the run already did its
|
||||||
Ok(n) => tracing::info!(dropped = n, "marked unseen manga as dropped"),
|
// work, and a stale `false` flag just buys a recovery sweep on the
|
||||||
Err(e) => tracing::warn!(error = ?e, "drop-pass failed"),
|
// next tick.
|
||||||
}
|
let exited_cleanly = should_mark_clean_exit(walked_to_completion, hit_stop_condition);
|
||||||
} else {
|
if exited_cleanly {
|
||||||
tracing::info!(
|
if let Err(e) = repo::crawler::mark_run_completed(db, source_id).await {
|
||||||
?mode,
|
tracing::warn!(error = ?e, "mark_run_completed failed");
|
||||||
hit_limit,
|
|
||||||
hit_incremental_stop,
|
|
||||||
"partial sync — skipping drop pass"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if backfill_complete {
|
|
||||||
if let Err(e) = repo::crawler::mark_seed_completed(db, source_id, run_started_at).await {
|
|
||||||
tracing::warn!(error = ?e, "mark_seed_completed failed");
|
|
||||||
} else {
|
|
||||||
tracing::info!(source_id, "seed marked complete");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
?mode,
|
was_clean,
|
||||||
discovered = stats.discovered,
|
discovered = stats.discovered,
|
||||||
upserted = stats.upserted,
|
upserted = stats.upserted,
|
||||||
covers_fetched = stats.covers_fetched,
|
covers_fetched = stats.covers_fetched,
|
||||||
mangas_failed = stats.mangas_failed,
|
mangas_failed = stats.mangas_failed,
|
||||||
walked_to_completion,
|
walked_to_completion,
|
||||||
hit_limit,
|
hit_limit,
|
||||||
hit_incremental_stop,
|
hit_stop_condition,
|
||||||
|
exited_cleanly,
|
||||||
"metadata pass complete"
|
"metadata pass complete"
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -283,8 +396,20 @@ pub async fn run_metadata_pass(
|
|||||||
Ok(stats)
|
Ok(stats)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Quarantine window for chapters whose latest `SyncChapterContent` job is
|
||||||
|
/// `dead`. The partial dedup index `crawler_jobs_chapter_content_dedup_idx`
|
||||||
|
/// only blocks `(pending|running)` duplicates, so without this gate a
|
||||||
|
/// permanently-failing chapter is re-enqueued every cron tick, burns
|
||||||
|
/// `max_attempts` retries, dies again, and spins forever. With the gate,
|
||||||
|
/// dead chapters get a week of silence before the next attempt — long
|
||||||
|
/// enough for a transient site issue to resolve, short enough that
|
||||||
|
/// permanent failures don't stay permanent if conditions change.
|
||||||
|
const CHAPTER_DEAD_QUARANTINE_DAYS: i64 = 7;
|
||||||
|
|
||||||
/// Enqueue a `SyncChapterContent` job for every chapter of *any* bookmarked
|
/// Enqueue a `SyncChapterContent` job for every chapter of *any* bookmarked
|
||||||
/// manga that still has `page_count = 0` and a non-dropped source row.
|
/// manga that still has `page_count = 0` and a non-dropped source row.
|
||||||
|
/// Chapters whose latest job is `dead` within `CHAPTER_DEAD_QUARANTINE_DAYS`
|
||||||
|
/// are excluded to break the dead-letter spin.
|
||||||
/// Returns `(inserted, skipped)` counts. Dedup index handles repeats.
|
/// Returns `(inserted, skipped)` counts. Dedup index handles repeats.
|
||||||
pub async fn enqueue_bookmarked_pending(pool: &PgPool) -> anyhow::Result<EnqueueSummary> {
|
pub async fn enqueue_bookmarked_pending(pool: &PgPool) -> anyhow::Result<EnqueueSummary> {
|
||||||
let rows: Vec<(String, Uuid, String)> = sqlx::query_as(
|
let rows: Vec<(String, Uuid, String)> = sqlx::query_as(
|
||||||
@@ -295,10 +420,18 @@ pub async fn enqueue_bookmarked_pending(pool: &PgPool) -> anyhow::Result<Enqueue
|
|||||||
JOIN chapter_sources cs ON cs.chapter_id = c.id
|
JOIN chapter_sources cs ON cs.chapter_id = c.id
|
||||||
WHERE c.page_count = 0
|
WHERE c.page_count = 0
|
||||||
AND cs.dropped_at IS NULL
|
AND cs.dropped_at IS NULL
|
||||||
|
AND NOT EXISTS (
|
||||||
|
SELECT 1 FROM crawler_jobs cj
|
||||||
|
WHERE cj.payload->>'kind' = 'sync_chapter_content'
|
||||||
|
AND cj.payload->>'chapter_id' = c.id::text
|
||||||
|
AND cj.state = 'dead'
|
||||||
|
AND cj.updated_at > now() - ($1::bigint || ' days')::interval
|
||||||
|
)
|
||||||
GROUP BY cs.source_id, c.id, cs.source_chapter_key, c.manga_id, c.created_at
|
GROUP BY cs.source_id, c.id, cs.source_chapter_key, c.manga_id, c.created_at
|
||||||
ORDER BY c.manga_id, c.created_at ASC
|
ORDER BY c.manga_id, c.created_at ASC
|
||||||
"#,
|
"#,
|
||||||
)
|
)
|
||||||
|
.bind(CHAPTER_DEAD_QUARANTINE_DAYS)
|
||||||
.fetch_all(pool)
|
.fetch_all(pool)
|
||||||
.await
|
.await
|
||||||
.context("query bookmarked-pending chapters")?;
|
.context("query bookmarked-pending chapters")?;
|
||||||
@@ -327,7 +460,9 @@ pub async fn enqueue_bookmarked_pending(pool: &PgPool) -> anyhow::Result<Enqueue
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Enqueue chapter-content jobs for a *single* manga (the bookmark-create
|
/// Enqueue chapter-content jobs for a *single* manga (the bookmark-create
|
||||||
/// hook). Same dedup semantics as [`enqueue_bookmarked_pending`].
|
/// hook). Same dedup semantics as [`enqueue_bookmarked_pending`], including
|
||||||
|
/// the dead-letter quarantine — a freshly bookmarked manga should not
|
||||||
|
/// burn retries on chapters that just died on the cron tick.
|
||||||
pub async fn enqueue_pending_for_manga(
|
pub async fn enqueue_pending_for_manga(
|
||||||
pool: &PgPool,
|
pool: &PgPool,
|
||||||
manga_id: Uuid,
|
manga_id: Uuid,
|
||||||
@@ -340,10 +475,18 @@ pub async fn enqueue_pending_for_manga(
|
|||||||
WHERE c.manga_id = $1
|
WHERE c.manga_id = $1
|
||||||
AND c.page_count = 0
|
AND c.page_count = 0
|
||||||
AND cs.dropped_at IS NULL
|
AND cs.dropped_at IS NULL
|
||||||
|
AND NOT EXISTS (
|
||||||
|
SELECT 1 FROM crawler_jobs cj
|
||||||
|
WHERE cj.payload->>'kind' = 'sync_chapter_content'
|
||||||
|
AND cj.payload->>'chapter_id' = c.id::text
|
||||||
|
AND cj.state = 'dead'
|
||||||
|
AND cj.updated_at > now() - ($2::bigint || ' days')::interval
|
||||||
|
)
|
||||||
ORDER BY cs.source_id, c.id
|
ORDER BY cs.source_id, c.id
|
||||||
"#,
|
"#,
|
||||||
)
|
)
|
||||||
.bind(manga_id)
|
.bind(manga_id)
|
||||||
|
.bind(CHAPTER_DEAD_QUARANTINE_DAYS)
|
||||||
.fetch_all(pool)
|
.fetch_all(pool)
|
||||||
.await
|
.await
|
||||||
.context("query pending chapters for manga")?;
|
.context("query pending chapters for manga")?;
|
||||||
@@ -382,6 +525,7 @@ pub struct EnqueueSummary {
|
|||||||
/// pipeline because the CLI still calls it from its inline chapter-content
|
/// pipeline because the CLI still calls it from its inline chapter-content
|
||||||
/// loop; once the worker pool fully replaces that path we can fold this
|
/// loop; once the worker pool fully replaces that path we can fold this
|
||||||
/// into `pipeline` proper.
|
/// into `pipeline` proper.
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
async fn download_and_store_cover(
|
async fn download_and_store_cover(
|
||||||
db: &PgPool,
|
db: &PgPool,
|
||||||
storage: &dyn Storage,
|
storage: &dyn Storage,
|
||||||
@@ -390,6 +534,8 @@ async fn download_and_store_cover(
|
|||||||
manga_url: &str,
|
manga_url: &str,
|
||||||
manga_id: Uuid,
|
manga_id: Uuid,
|
||||||
cover_url: &str,
|
cover_url: &str,
|
||||||
|
allowlist: &DownloadAllowlist,
|
||||||
|
max_image_bytes: usize,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let absolute = reqwest::Url::parse(manga_url)
|
let absolute = reqwest::Url::parse(manga_url)
|
||||||
.context("parse manga URL")?
|
.context("parse manga URL")?
|
||||||
@@ -397,17 +543,22 @@ async fn download_and_store_cover(
|
|||||||
.context("join cover URL onto manga URL")?;
|
.context("join cover URL onto manga URL")?;
|
||||||
|
|
||||||
rate.wait_for(absolute.as_str()).await?;
|
rate.wait_for(absolute.as_str()).await?;
|
||||||
let resp = http
|
let bytes = fetch_bytes_capped(
|
||||||
.get(absolute.clone())
|
http,
|
||||||
.header(reqwest::header::REFERER, manga_url)
|
absolute.as_str(),
|
||||||
.send()
|
Some(manga_url),
|
||||||
.await
|
allowlist,
|
||||||
.with_context(|| format!("GET {absolute}"))?
|
max_image_bytes,
|
||||||
.error_for_status()
|
)
|
||||||
.with_context(|| format!("non-2xx for {absolute}"))?;
|
.await?;
|
||||||
let bytes = resp.bytes().await.context("read cover body")?;
|
if !looks_like_image(&bytes) {
|
||||||
let kind = infer::get(&bytes);
|
anyhow::bail!(
|
||||||
let ext = kind.map(|k| k.extension()).unwrap_or("bin");
|
"cover URL {absolute} returned non-image bytes; refusing to store as binary blob"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let ext = infer::get(&bytes)
|
||||||
|
.map(|k| k.extension())
|
||||||
|
.expect("looks_like_image asserted infer succeeded");
|
||||||
let key = format!("mangas/{manga_id}/cover.{ext}");
|
let key = format!("mangas/{manga_id}/cover.{ext}");
|
||||||
|
|
||||||
storage
|
storage
|
||||||
@@ -427,41 +578,112 @@ async fn download_and_store_cover(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn origin_of(url: &str) -> Option<String> {
|
use crate::crawler::url_utils::origin_of;
|
||||||
let (scheme, rest) = url.split_once("://")?;
|
|
||||||
let host = rest.split('/').next()?;
|
|
||||||
Some(format!("{scheme}://{host}"))
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn backfill_never_stops_regardless_of_streak() {
|
fn stop_condition_fires_on_unchanged_metadata_and_zero_new_chapters() {
|
||||||
assert!(!should_stop(DiscoverMode::Backfill, 0));
|
// The whole point of the rule: in steady state, a manga whose
|
||||||
assert!(!should_stop(DiscoverMode::Backfill, 100));
|
// metadata hash matches AND whose chapter list gained no new
|
||||||
assert!(!should_stop(DiscoverMode::Backfill, usize::MAX));
|
// entries proves we've reached the caught-up tail of a
|
||||||
|
// newest-first index.
|
||||||
|
assert!(should_stop(true, UpsertStatus::Unchanged, Some(0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn incremental_stops_when_streak_meets_threshold() {
|
fn stop_condition_refuses_when_chapters_added() {
|
||||||
let mode = DiscoverMode::Incremental {
|
// Unchanged metadata + N new chapters means the source bumped
|
||||||
stop_after_unchanged: 3,
|
// this manga because of the chapter add; the rest of the index
|
||||||
};
|
// is still ahead of us. Don't bail.
|
||||||
assert!(!should_stop(mode, 0));
|
assert!(!should_stop(true, UpsertStatus::Unchanged, Some(1)));
|
||||||
assert!(!should_stop(mode, 2));
|
assert!(!should_stop(true, UpsertStatus::Unchanged, Some(42)));
|
||||||
assert!(should_stop(mode, 3), "stops at exactly the threshold");
|
|
||||||
assert!(should_stop(mode, 100), "stops at anything past threshold");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn incremental_with_zero_threshold_stops_immediately() {
|
fn stop_condition_refuses_when_metadata_changed() {
|
||||||
// A nonsensical config (no Unchanged needed to stop) shouldn't
|
// Updated or New metadata always continues — even with zero new
|
||||||
// panic — it just means the very first ref triggers the bail.
|
// chapters — because the change-of-metadata bump itself is what
|
||||||
let mode = DiscoverMode::Incremental {
|
// the walk is following.
|
||||||
stop_after_unchanged: 0,
|
assert!(!should_stop(true, UpsertStatus::Updated, Some(0)));
|
||||||
};
|
assert!(!should_stop(true, UpsertStatus::New, Some(0)));
|
||||||
assert!(should_stop(mode, 0));
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn stop_condition_refuses_when_chapter_count_unknown() {
|
||||||
|
// skip_chapters mode (CLI metadata-only sweep) or a
|
||||||
|
// logged-and-swallowed chapter sync error: we can't claim "no
|
||||||
|
// new chapters" from absence of evidence, so don't stop. The
|
||||||
|
// operator who runs metadata-only intentionally wants a full
|
||||||
|
// walk anyway.
|
||||||
|
assert!(!should_stop(true, UpsertStatus::Unchanged, None));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn stop_condition_disabled_in_recovery_mode() {
|
||||||
|
// was_clean = false means the previous run did not exit cleanly;
|
||||||
|
// the catalog past its crash point is potentially un-synced. Walk
|
||||||
|
// to end-of-source no matter what individual mangas report.
|
||||||
|
assert!(!should_stop(false, UpsertStatus::Unchanged, Some(0)));
|
||||||
|
assert!(!should_stop(false, UpsertStatus::Unchanged, Some(1)));
|
||||||
|
assert!(!should_stop(false, UpsertStatus::Updated, Some(0)));
|
||||||
|
assert!(!should_stop(false, UpsertStatus::New, None));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn clean_exit_when_walked_to_completion() {
|
||||||
|
// End-of-walk reached the catalog tail — the recovery flag may
|
||||||
|
// safely flip back to `true`.
|
||||||
|
assert!(should_mark_clean_exit(true, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn clean_exit_when_stop_condition_fired() {
|
||||||
|
// First Unchanged + 0-new-chapter manga is a complete steady-
|
||||||
|
// state exit: every manga newer than this point was synced, and
|
||||||
|
// by source-side `update_date DESC` ordering everything past
|
||||||
|
// this point is at least as caught-up.
|
||||||
|
assert!(should_mark_clean_exit(false, true));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn dirty_exit_when_neither_completion_nor_stop_fired() {
|
||||||
|
// The walk ended for some other reason — including the
|
||||||
|
// caller-imposed `hit_limit` cap, which is the regression case
|
||||||
|
// this test exists for. `should_mark_clean_exit` does not take
|
||||||
|
// `hit_limit` as a parameter, so a future edit that adds
|
||||||
|
// `|| hit_limit` to the inline expression in `run_metadata_pass`
|
||||||
|
// would need to also touch this helper, and would fail this
|
||||||
|
// assertion when it did.
|
||||||
|
assert!(!should_mark_clean_exit(false, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn run_scoped_seen_set_skips_duplicate_source_manga_keys() {
|
||||||
|
// Pins the per-ref loop contract: `contains` gates whether work
|
||||||
|
// runs, and `insert` only fires on the success path (after upsert).
|
||||||
|
// A failed ref that reappears later in the same pass must get a
|
||||||
|
// second chance — that's why the loop uses contains-then-insert
|
||||||
|
// instead of insert-and-skip-on-collision.
|
||||||
|
let mut seen: HashSet<String> = HashSet::new();
|
||||||
|
|
||||||
|
// First sighting of a key: not yet seen → loop proceeds.
|
||||||
|
assert!(!seen.contains("manga-a"), "first sighting is unseen");
|
||||||
|
// Simulate a failed fetch_manga: do NOT insert. Next sighting must
|
||||||
|
// still be considered unseen so the loop retries it.
|
||||||
|
assert!(!seen.contains("manga-a"), "failed key is still retryable");
|
||||||
|
|
||||||
|
// Now simulate a successful upsert — insert is called.
|
||||||
|
seen.insert("manga-a".to_string());
|
||||||
|
// Subsequent sightings of the same key are skipped.
|
||||||
|
assert!(seen.contains("manga-a"), "successful key is now seen");
|
||||||
|
|
||||||
|
// Distinct keys never collide.
|
||||||
|
assert!(!seen.contains("manga-b"), "different key independent");
|
||||||
|
seen.insert("manga-b".to_string());
|
||||||
|
assert!(seen.contains("manga-b"));
|
||||||
|
assert!(seen.contains("manga-a"), "first key still recorded");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -98,15 +98,9 @@ impl HostRateLimiters {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extract the host (no port) from a URL string. Returns `None` for
|
// `host_of` was duplicated across session/rate_limit/pipeline; the
|
||||||
/// inputs without a `scheme://host` shape — those would never have
|
// canonical version now lives in `crawler::url_utils`.
|
||||||
/// reached the network layer anyway.
|
use crate::crawler::url_utils::host_of;
|
||||||
fn host_of(url: &str) -> Option<String> {
|
|
||||||
let after_scheme = url.split_once("://")?.1;
|
|
||||||
let host_with_port = after_scheme.split('/').next()?;
|
|
||||||
let host = host_with_port.rsplit_once(':').map_or(host_with_port, |(h, _)| h);
|
|
||||||
(!host.is_empty()).then(|| host.to_ascii_lowercase())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|||||||
558
backend/src/crawler/safety.rs
Normal file
558
backend/src/crawler/safety.rs
Normal file
@@ -0,0 +1,558 @@
|
|||||||
|
//! Defensive helpers for the image-download paths.
|
||||||
|
//!
|
||||||
|
//! Two threats this module addresses:
|
||||||
|
//!
|
||||||
|
//! - **SSRF**: a scraped chapter or manga page can embed an absolute
|
||||||
|
//! `<img src="http://10.0.0.1/...">`. The crawler runs inside the
|
||||||
|
//! backend container with intra-compose access to `postgres:5432`
|
||||||
|
//! and possibly other internal services; without a host check the
|
||||||
|
//! crawler would happily probe them. [`is_safe_url`] rejects
|
||||||
|
//! anything whose host isn't on the operator-configured allowlist,
|
||||||
|
//! plus any IP literal in RFC1918 / loopback / link-local / unique-
|
||||||
|
//! local space (including IPv4-mapped IPv6 like `::ffff:127.0.0.1`)
|
||||||
|
//! as a second defence for the case where an allowlisted hostname's
|
||||||
|
//! DNS happens to resolve to a literal private address.
|
||||||
|
//!
|
||||||
|
//! **DNS rebinding is not covered.** A hostname like `cdn.allowed.com`
|
||||||
|
//! that *resolves* to `127.0.0.1` via hostile DNS bypasses the IP
|
||||||
|
//! check entirely — `is_safe_url` only inspects URL strings, not
|
||||||
|
//! resolved IPs. Mitigating that requires a custom reqwest resolver
|
||||||
|
//! that filters IPs after DNS, which would mean rebuilding reqwest's
|
||||||
|
//! connector. The allowlist + good operator DNS hygiene is the
|
||||||
|
//! realistic mitigation today.
|
||||||
|
//!
|
||||||
|
//! - **Unbounded download**: `Response::bytes().await` reads the full
|
||||||
|
//! body before returning. A malicious source serving a 10 GiB image
|
||||||
|
//! would fill memory and then disk. [`accumulate_capped`] streams
|
||||||
|
//! the body chunk-by-chunk into a [`bytes::BytesMut`] and bails as
|
||||||
|
//! soon as the running total exceeds the cap.
|
||||||
|
//!
|
||||||
|
//! Both helpers are pure-data: the SSRF check is keyed off a parsed
|
||||||
|
//! URL string, and the byte accumulator is keyed off a generic stream.
|
||||||
|
//! Easy to unit-test without a live network or browser.
|
||||||
|
|
||||||
|
use std::net::IpAddr;
|
||||||
|
|
||||||
|
use anyhow::{bail, Context};
|
||||||
|
use bytes::BytesMut;
|
||||||
|
use futures_util::StreamExt;
|
||||||
|
use reqwest::Url;
|
||||||
|
|
||||||
|
/// Default per-image download cap. A page image is generally <2 MiB;
|
||||||
|
/// 32 MiB leaves headroom for high-resolution covers while still
|
||||||
|
/// stopping a misbehaving CDN dead. Override via `CRAWLER_MAX_IMAGE_BYTES`.
|
||||||
|
pub const DEFAULT_MAX_IMAGE_BYTES: usize = 32 * 1024 * 1024;
|
||||||
|
|
||||||
|
/// Hosts that are always allowed in addition to the operator's
|
||||||
|
/// configured allowlist. None by default — keeping the surface area
|
||||||
|
/// minimal so the only way a URL gets through is if it matches an
|
||||||
|
/// explicit catalog/CDN entry.
|
||||||
|
///
|
||||||
|
/// `allow_any` flips the host check off entirely (private-IP and
|
||||||
|
/// scheme checks still apply). It exists for operators whose sources
|
||||||
|
/// shard images across numbered CDN subdomains (`cdn1`, `cdn2`, …)
|
||||||
|
/// where enumerating each host upfront is impractical. Off by default.
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
pub struct DownloadAllowlist {
|
||||||
|
hosts: Vec<String>,
|
||||||
|
allow_any: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DownloadAllowlist {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
hosts: Vec::new(),
|
||||||
|
allow_any: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Bypass the host allowlist. Scheme, localhost, and private-IP
|
||||||
|
/// checks in [`is_safe_url`] continue to apply — this only opens
|
||||||
|
/// up public hosts that weren't pre-enumerated.
|
||||||
|
pub fn allow_any() -> Self {
|
||||||
|
Self {
|
||||||
|
hosts: Vec::new(),
|
||||||
|
allow_any: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a host (case-insensitive match). Sub-domains are *not*
|
||||||
|
/// implied: pass `cdn.example.com` and `example.com` separately
|
||||||
|
/// if both should be reachable.
|
||||||
|
pub fn allow(mut self, host: impl Into<String>) -> Self {
|
||||||
|
let h = host.into().to_ascii_lowercase();
|
||||||
|
if !h.is_empty() && !self.hosts.iter().any(|existing| existing == &h) {
|
||||||
|
self.hosts.push(h);
|
||||||
|
}
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.hosts.is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn contains(&self, host: &str) -> bool {
|
||||||
|
if self.allow_any {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
let lower = host.to_ascii_lowercase();
|
||||||
|
self.hosts.iter().any(|h| h == &lower)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Verify a URL is safe for the crawler to fetch.
|
||||||
|
///
|
||||||
|
/// Rejects:
|
||||||
|
/// - non-http(s) schemes (file://, gopher://, …),
|
||||||
|
/// - any IP literal in private / loopback / link-local / unique-local
|
||||||
|
/// space (defense in depth — a DNS allowlist alone wouldn't cover an
|
||||||
|
/// attacker that places an entry like `cdn.evil` pointing at
|
||||||
|
/// `192.168.1.1`),
|
||||||
|
/// - the literal hostname `localhost`,
|
||||||
|
/// - hosts that aren't on the supplied allowlist.
|
||||||
|
///
|
||||||
|
/// An empty allowlist rejects everything (the conservative default —
|
||||||
|
/// callers must explicitly allow the catalog and CDN hosts).
|
||||||
|
pub fn is_safe_url(raw_url: &str, allow: &DownloadAllowlist) -> Result<(), UrlSafetyError> {
|
||||||
|
let url = Url::parse(raw_url).map_err(|_| UrlSafetyError::Unparseable)?;
|
||||||
|
let scheme = url.scheme();
|
||||||
|
if scheme != "http" && scheme != "https" {
|
||||||
|
return Err(UrlSafetyError::BadScheme(scheme.to_string()));
|
||||||
|
}
|
||||||
|
let host = url.host_str().ok_or(UrlSafetyError::NoHost)?;
|
||||||
|
let lower_host = host.to_ascii_lowercase();
|
||||||
|
if lower_host == "localhost" {
|
||||||
|
return Err(UrlSafetyError::Loopback);
|
||||||
|
}
|
||||||
|
// Reject IP literals in private/loopback ranges regardless of the
|
||||||
|
// allowlist — if someone puts an IP literal on the allowlist they
|
||||||
|
// almost certainly didn't mean a private range.
|
||||||
|
// reqwest::Url normalises IPv6 literals as `[::1]` (brackets
|
||||||
|
// included) in `host_str()`. Strip the brackets before parsing.
|
||||||
|
let ip_candidate = lower_host
|
||||||
|
.strip_prefix('[')
|
||||||
|
.and_then(|s| s.strip_suffix(']'))
|
||||||
|
.unwrap_or(&lower_host);
|
||||||
|
if let Ok(ip) = ip_candidate.parse::<IpAddr>() {
|
||||||
|
if is_private_ip(&ip) {
|
||||||
|
return Err(UrlSafetyError::PrivateIp(ip));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !allow.contains(&lower_host) {
|
||||||
|
return Err(UrlSafetyError::HostNotAllowed(lower_host));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_private_ip(ip: &IpAddr) -> bool {
|
||||||
|
match ip {
|
||||||
|
IpAddr::V4(v4) => {
|
||||||
|
v4.is_loopback()
|
||||||
|
|| v4.is_private()
|
||||||
|
|| v4.is_link_local()
|
||||||
|
|| v4.is_unspecified()
|
||||||
|
|| v4.is_broadcast()
|
||||||
|
// CGNAT 100.64.0.0/10
|
||||||
|
|| (v4.octets()[0] == 100 && (v4.octets()[1] & 0xC0) == 64)
|
||||||
|
// 169.254/16 link-local already covered, but 0.0.0.0/8 is special-use
|
||||||
|
|| v4.octets()[0] == 0
|
||||||
|
}
|
||||||
|
IpAddr::V6(v6) => {
|
||||||
|
// IPv4-mapped IPv6 (::ffff:0:0/96): unwrap to the embedded
|
||||||
|
// IPv4 and recurse so `::ffff:127.0.0.1` is caught by the
|
||||||
|
// IPv4 loopback check rather than passing through.
|
||||||
|
// `Ipv6Addr::is_loopback()` only matches `::1` exactly.
|
||||||
|
if let Some(v4) = v6.to_ipv4_mapped() {
|
||||||
|
return is_private_ip(&IpAddr::V4(v4));
|
||||||
|
}
|
||||||
|
v6.is_loopback()
|
||||||
|
|| v6.is_unspecified()
|
||||||
|
// fc00::/7 unique-local
|
||||||
|
|| (v6.segments()[0] & 0xfe00) == 0xfc00
|
||||||
|
// fe80::/10 link-local
|
||||||
|
|| (v6.segments()[0] & 0xffc0) == 0xfe80
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, thiserror::Error, PartialEq, Eq)]
|
||||||
|
pub enum UrlSafetyError {
|
||||||
|
#[error("URL is not parseable")]
|
||||||
|
Unparseable,
|
||||||
|
#[error("scheme {0:?} is not http or https")]
|
||||||
|
BadScheme(String),
|
||||||
|
#[error("URL is missing a host")]
|
||||||
|
NoHost,
|
||||||
|
#[error("host points at the loopback interface")]
|
||||||
|
Loopback,
|
||||||
|
#[error("host is a private/internal IP: {0}")]
|
||||||
|
PrivateIp(IpAddr),
|
||||||
|
#[error("host {0:?} is not on the crawler download allowlist")]
|
||||||
|
HostNotAllowed(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Drain a byte stream into a single buffer, bailing out as soon as
|
||||||
|
/// the running total exceeds `max_bytes`. Generic over the stream so
|
||||||
|
/// it's testable without a live HTTP response.
|
||||||
|
pub async fn accumulate_capped<S, E>(stream: S, max_bytes: usize) -> anyhow::Result<bytes::Bytes>
|
||||||
|
where
|
||||||
|
S: futures_core::Stream<Item = Result<bytes::Bytes, E>>,
|
||||||
|
E: std::error::Error + Send + Sync + 'static,
|
||||||
|
{
|
||||||
|
let mut buf = BytesMut::new();
|
||||||
|
let mut stream = std::pin::pin!(stream);
|
||||||
|
while let Some(chunk) = stream.next().await {
|
||||||
|
let chunk = chunk.map_err(|e| anyhow::anyhow!("stream chunk: {e}"))?;
|
||||||
|
if buf.len().saturating_add(chunk.len()) > max_bytes {
|
||||||
|
bail!(
|
||||||
|
"response exceeds {max_bytes}-byte cap (received >{}+{})",
|
||||||
|
buf.len(),
|
||||||
|
chunk.len()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
buf.extend_from_slice(&chunk);
|
||||||
|
}
|
||||||
|
Ok(buf.freeze())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Send `req` and stream the response into a length-limited buffer.
|
||||||
|
/// Combines [`is_safe_url`] check + [`accumulate_capped`] so each
|
||||||
|
/// call-site is one line.
|
||||||
|
pub async fn fetch_bytes_capped(
|
||||||
|
http: &reqwest::Client,
|
||||||
|
url: &str,
|
||||||
|
referer: Option<&str>,
|
||||||
|
allow: &DownloadAllowlist,
|
||||||
|
max_bytes: usize,
|
||||||
|
) -> anyhow::Result<bytes::Bytes> {
|
||||||
|
is_safe_url(url, allow).with_context(|| format!("reject unsafe URL {url}"))?;
|
||||||
|
let mut req = http.get(url);
|
||||||
|
if let Some(r) = referer {
|
||||||
|
req = req.header(reqwest::header::REFERER, r);
|
||||||
|
}
|
||||||
|
let resp = req
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.with_context(|| format!("GET {url}"))?
|
||||||
|
.error_for_status()
|
||||||
|
.with_context(|| format!("non-2xx for {url}"))?;
|
||||||
|
accumulate_capped(resp.bytes_stream(), max_bytes)
|
||||||
|
.await
|
||||||
|
.with_context(|| format!("download body for {url}"))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// True when `bytes` sniffs as one of the *renderable* image formats
|
||||||
|
/// the `/files/*key` endpoint can serve with a correct Content-Type:
|
||||||
|
/// JPEG, PNG, WebP, GIF, AVIF. Matches the upload pipeline's
|
||||||
|
/// whitelist in `upload::parse_image`.
|
||||||
|
///
|
||||||
|
/// `infer::MatcherType::Image` is intentionally NOT used — it also
|
||||||
|
/// matches BMP, TIFF, HEIF, ICO, PSD, and JP2. Those would sniff as
|
||||||
|
/// "image" here but [`api::files::content_type_for`] would fall back
|
||||||
|
/// to `application/octet-stream`, prompting browsers to download
|
||||||
|
/// instead of render. Keep the two layers aligned.
|
||||||
|
pub fn looks_like_image(bytes: &[u8]) -> bool {
|
||||||
|
matches!(
|
||||||
|
infer::get(bytes).map(|k| k.mime_type()),
|
||||||
|
Some("image/jpeg" | "image/png" | "image/webp" | "image/gif" | "image/avif")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use futures_util::stream;
|
||||||
|
|
||||||
|
fn allow_just(host: &str) -> DownloadAllowlist {
|
||||||
|
DownloadAllowlist::new().allow(host)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn allow_any_admits_arbitrary_public_host() {
|
||||||
|
// Operators who can't pre-enumerate a numbered-CDN fleet
|
||||||
|
// (cdn1, cdn2, …) opt into allow_any. Any public host passes.
|
||||||
|
let allow = DownloadAllowlist::allow_any();
|
||||||
|
assert!(is_safe_url("https://cdn7.random.tld/x.jpg", &allow).is_ok());
|
||||||
|
assert!(is_safe_url("https://anything-goes.example/", &allow).is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn allow_any_still_blocks_private_ips() {
|
||||||
|
// The point of the bypass is the host-allowlist check, not the
|
||||||
|
// SSRF defense. Private/loopback IPs stay refused.
|
||||||
|
let allow = DownloadAllowlist::allow_any();
|
||||||
|
for url in [
|
||||||
|
"http://10.0.0.1/",
|
||||||
|
"http://192.168.1.1/",
|
||||||
|
"http://169.254.169.254/",
|
||||||
|
"http://127.0.0.1/",
|
||||||
|
"http://[::1]/",
|
||||||
|
"http://[::ffff:127.0.0.1]/",
|
||||||
|
] {
|
||||||
|
assert!(
|
||||||
|
matches!(
|
||||||
|
is_safe_url(url, &allow).unwrap_err(),
|
||||||
|
UrlSafetyError::PrivateIp(_)
|
||||||
|
),
|
||||||
|
"allow_any must still reject {url}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn allow_any_still_blocks_localhost() {
|
||||||
|
let allow = DownloadAllowlist::allow_any();
|
||||||
|
assert!(matches!(
|
||||||
|
is_safe_url("http://localhost:8080/", &allow).unwrap_err(),
|
||||||
|
UrlSafetyError::Loopback
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn allow_any_still_blocks_non_http_schemes() {
|
||||||
|
let allow = DownloadAllowlist::allow_any();
|
||||||
|
assert!(matches!(
|
||||||
|
is_safe_url("file:///etc/passwd", &allow).unwrap_err(),
|
||||||
|
UrlSafetyError::BadScheme(_)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn safe_url_allows_listed_host() {
|
||||||
|
let allow = allow_just("cdn.example.com");
|
||||||
|
assert!(is_safe_url("https://cdn.example.com/img.jpg", &allow).is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn safe_url_blocks_unlisted_host() {
|
||||||
|
let allow = allow_just("cdn.example.com");
|
||||||
|
let err = is_safe_url("https://evil.example.org/img.jpg", &allow).unwrap_err();
|
||||||
|
assert!(matches!(err, UrlSafetyError::HostNotAllowed(h) if h == "evil.example.org"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn safe_url_blocks_localhost_even_if_allowlisted() {
|
||||||
|
let allow = allow_just("localhost");
|
||||||
|
assert!(matches!(
|
||||||
|
is_safe_url("http://localhost:8080/", &allow).unwrap_err(),
|
||||||
|
UrlSafetyError::Loopback
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn safe_url_blocks_loopback_ipv4() {
|
||||||
|
let allow = allow_just("127.0.0.1");
|
||||||
|
assert!(matches!(
|
||||||
|
is_safe_url("http://127.0.0.1/", &allow).unwrap_err(),
|
||||||
|
UrlSafetyError::PrivateIp(_)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn safe_url_blocks_rfc1918() {
|
||||||
|
let allow = allow_just("10.0.0.1");
|
||||||
|
for url in [
|
||||||
|
"http://10.0.0.1/",
|
||||||
|
"http://192.168.1.1/",
|
||||||
|
"http://172.16.0.5/",
|
||||||
|
"http://172.31.255.255/",
|
||||||
|
] {
|
||||||
|
assert!(
|
||||||
|
matches!(
|
||||||
|
is_safe_url(url, &allow).unwrap_err(),
|
||||||
|
UrlSafetyError::PrivateIp(_)
|
||||||
|
),
|
||||||
|
"should reject {url}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn safe_url_blocks_link_local() {
|
||||||
|
let allow = allow_just("169.254.169.254");
|
||||||
|
// 169.254.169.254 is the AWS/GCP metadata service — the most
|
||||||
|
// dangerous SSRF target on a default cloud VM.
|
||||||
|
assert!(matches!(
|
||||||
|
is_safe_url("http://169.254.169.254/", &allow).unwrap_err(),
|
||||||
|
UrlSafetyError::PrivateIp(_)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn safe_url_blocks_ipv6_loopback_and_ula() {
|
||||||
|
// Debug what host_str returns first — reqwest::Url normalises
|
||||||
|
// IPv6 literals as `[::1]` with brackets, which doesn't parse
|
||||||
|
// as `IpAddr` directly. The implementation strips them.
|
||||||
|
let allow = allow_just("[::1]");
|
||||||
|
let err = is_safe_url("http://[::1]/", &allow).unwrap_err();
|
||||||
|
assert!(
|
||||||
|
matches!(err, UrlSafetyError::PrivateIp(_)),
|
||||||
|
"expected PrivateIp, got {err:?}"
|
||||||
|
);
|
||||||
|
let allow = allow_just("[fd00::1]");
|
||||||
|
let err = is_safe_url("http://[fd00::1]/", &allow).unwrap_err();
|
||||||
|
assert!(
|
||||||
|
matches!(err, UrlSafetyError::PrivateIp(_)),
|
||||||
|
"expected PrivateIp, got {err:?}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn safe_url_blocks_ipv4_mapped_ipv6_loopback() {
|
||||||
|
// `Ipv6Addr::is_loopback()` only matches `::1` exactly, so
|
||||||
|
// `::ffff:127.0.0.1` would slip through without the
|
||||||
|
// to_ipv4_mapped() unwrap in is_private_ip.
|
||||||
|
let allow = allow_just("[::ffff:127.0.0.1]");
|
||||||
|
let err = is_safe_url("http://[::ffff:127.0.0.1]/", &allow).unwrap_err();
|
||||||
|
assert!(
|
||||||
|
matches!(err, UrlSafetyError::PrivateIp(_)),
|
||||||
|
"expected PrivateIp, got {err:?}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn safe_url_blocks_ipv4_mapped_ipv6_rfc1918() {
|
||||||
|
let allow = allow_just("[::ffff:10.0.0.1]");
|
||||||
|
let err = is_safe_url("http://[::ffff:10.0.0.1]/", &allow).unwrap_err();
|
||||||
|
assert!(matches!(err, UrlSafetyError::PrivateIp(_)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn safe_url_blocks_non_http_schemes() {
|
||||||
|
let allow = allow_just("anywhere");
|
||||||
|
assert!(matches!(
|
||||||
|
is_safe_url("file:///etc/passwd", &allow).unwrap_err(),
|
||||||
|
UrlSafetyError::BadScheme(_)
|
||||||
|
));
|
||||||
|
assert!(matches!(
|
||||||
|
is_safe_url("gopher://anywhere:70/", &allow).unwrap_err(),
|
||||||
|
UrlSafetyError::BadScheme(_)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn safe_url_rejects_unparseable() {
|
||||||
|
let allow = allow_just("anywhere");
|
||||||
|
assert!(matches!(
|
||||||
|
is_safe_url("not a url", &allow).unwrap_err(),
|
||||||
|
UrlSafetyError::Unparseable
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn safe_url_empty_allowlist_rejects_everything() {
|
||||||
|
let allow = DownloadAllowlist::new();
|
||||||
|
let err = is_safe_url("https://cdn.example.com/img.jpg", &allow).unwrap_err();
|
||||||
|
assert!(matches!(err, UrlSafetyError::HostNotAllowed(_)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn allowlist_matches_case_insensitively() {
|
||||||
|
let allow = DownloadAllowlist::new().allow("CDN.Example.COM");
|
||||||
|
assert!(is_safe_url("https://cdn.example.com/x.jpg", &allow).is_ok());
|
||||||
|
assert!(is_safe_url("https://CDN.EXAMPLE.com/x.jpg", &allow).is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn accumulate_capped_returns_full_body_under_cap() {
|
||||||
|
let chunks: Vec<Result<bytes::Bytes, std::io::Error>> = vec![
|
||||||
|
Ok(bytes::Bytes::from_static(b"hello ")),
|
||||||
|
Ok(bytes::Bytes::from_static(b"world")),
|
||||||
|
];
|
||||||
|
let s = stream::iter(chunks);
|
||||||
|
let out = accumulate_capped(s, 100).await.unwrap();
|
||||||
|
assert_eq!(out.as_ref(), b"hello world");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn accumulate_capped_bails_past_cap() {
|
||||||
|
let chunks: Vec<Result<bytes::Bytes, std::io::Error>> = vec![
|
||||||
|
Ok(bytes::Bytes::from(vec![0u8; 50])),
|
||||||
|
Ok(bytes::Bytes::from(vec![0u8; 60])),
|
||||||
|
];
|
||||||
|
let s = stream::iter(chunks);
|
||||||
|
let err = accumulate_capped(s, 100).await.unwrap_err();
|
||||||
|
assert!(err.to_string().contains("100-byte cap"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn accumulate_capped_surfaces_stream_errors() {
|
||||||
|
let chunks: Vec<Result<bytes::Bytes, std::io::Error>> = vec![
|
||||||
|
Ok(bytes::Bytes::from_static(b"ok")),
|
||||||
|
Err(std::io::Error::other("network blip")),
|
||||||
|
];
|
||||||
|
let s = stream::iter(chunks);
|
||||||
|
let err = accumulate_capped(s, 100).await.unwrap_err();
|
||||||
|
assert!(err.to_string().contains("network blip"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn looks_like_image_accepts_jpeg() {
|
||||||
|
// JPEG SOI + APP0 segment.
|
||||||
|
let jpeg = [0xff, 0xd8, 0xff, 0xe0, 0, 0x10, b'J', b'F', b'I', b'F'];
|
||||||
|
assert!(looks_like_image(&jpeg));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn looks_like_image_accepts_png() {
|
||||||
|
let png = [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0, 0, 0, 0];
|
||||||
|
assert!(looks_like_image(&png));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn looks_like_image_rejects_html_disguised_as_image() {
|
||||||
|
let html = b"<html><body>not an image</body></html>";
|
||||||
|
assert!(!looks_like_image(html));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn looks_like_image_rejects_empty() {
|
||||||
|
assert!(!looks_like_image(&[]));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn looks_like_image_rejects_renderable_but_unsupported_formats() {
|
||||||
|
// BMP, TIFF, ICO, PSD are `infer::MatcherType::Image` but the
|
||||||
|
// /files/*key handler doesn't have Content-Type mappings for
|
||||||
|
// them, so they'd be served as application/octet-stream and
|
||||||
|
// download instead of render. Reject at the crawler so we
|
||||||
|
// never land them in storage.
|
||||||
|
// BMP magic: "BM" + 4-byte size.
|
||||||
|
let bmp = [b'B', b'M', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||||
|
assert!(!looks_like_image(&bmp), "BMP must be rejected (not renderable by /files)");
|
||||||
|
|
||||||
|
// TIFF little-endian magic: "II" + 42.
|
||||||
|
let tiff = [0x49, 0x49, 0x2a, 0x00, 0, 0, 0, 0];
|
||||||
|
assert!(!looks_like_image(&tiff), "TIFF must be rejected");
|
||||||
|
|
||||||
|
// ICO magic: 0x00,0x00,0x01,0x00.
|
||||||
|
let ico = [0x00, 0x00, 0x01, 0x00, 1, 0, 16, 16, 0, 0, 1, 0, 0x18, 0, 0x40, 0, 0, 0, 0x16, 0, 0, 0];
|
||||||
|
assert!(!looks_like_image(&ico), "ICO must be rejected");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn looks_like_image_accepts_webp_gif_avif() {
|
||||||
|
// Cover the three remaining whitelisted formats so a future
|
||||||
|
// tightening that drops one would fail noisily.
|
||||||
|
let webp = [
|
||||||
|
b'R', b'I', b'F', b'F',
|
||||||
|
0, 0, 0, 0,
|
||||||
|
b'W', b'E', b'B', b'P',
|
||||||
|
b'V', b'P', b'8', b' ',
|
||||||
|
];
|
||||||
|
assert!(looks_like_image(&webp));
|
||||||
|
|
||||||
|
let gif = [b'G', b'I', b'F', b'8', b'7', b'a', 0, 0, 0, 0];
|
||||||
|
assert!(looks_like_image(&gif));
|
||||||
|
|
||||||
|
let avif = [
|
||||||
|
0x00, 0x00, 0x00, 0x18,
|
||||||
|
b'f', b't', b'y', b'p',
|
||||||
|
b'a', b'v', b'i', b'f',
|
||||||
|
0x00, 0x00, 0x00, 0x00,
|
||||||
|
b'm', b'i', b'f', b'1',
|
||||||
|
b'a', b'v', b'i', b'f',
|
||||||
|
];
|
||||||
|
assert!(looks_like_image(&avif));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -42,36 +42,9 @@ pub enum SessionProbe {
|
|||||||
Transient,
|
Transient,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compute the cookie domain (e.g. `.example.com`) from a start URL.
|
/// Re-export so existing callers keep working after the helper moved
|
||||||
/// The leading dot makes the cookie cover every subdomain — the source
|
/// to `crawler::url_utils`. The body lives there.
|
||||||
/// often redirects between `www.` and other prefixes mid-crawl, and a
|
pub use crate::crawler::url_utils::registrable_domain;
|
||||||
/// host-only cookie would silently drop on the cross-subdomain hop.
|
|
||||||
///
|
|
||||||
/// Caveat: this takes the last two dot-labels, which is wrong for
|
|
||||||
/// multi-part TLDs (`.co.uk`, `.com.br` would resolve to `.co.uk` and
|
|
||||||
/// attach to every site on `.co.uk`). For those, the operator should
|
|
||||||
/// override via `CRAWLER_COOKIE_DOMAIN` rather than relying on this
|
|
||||||
/// function — pulling in the Public Suffix List for one knob isn't
|
|
||||||
/// worth it yet.
|
|
||||||
pub fn registrable_domain(url: &str) -> Option<String> {
|
|
||||||
let after_scheme = url.split_once("://")?.1;
|
|
||||||
let host_with_port = after_scheme.split('/').next()?;
|
|
||||||
let host = host_with_port
|
|
||||||
.rsplit_once(':')
|
|
||||||
.map_or(host_with_port, |(h, _)| h)
|
|
||||||
.to_ascii_lowercase();
|
|
||||||
if host.is_empty() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
let labels: Vec<&str> = host.split('.').filter(|l| !l.is_empty()).collect();
|
|
||||||
if labels.len() < 2 {
|
|
||||||
// Bare hostname (e.g. `localhost`) — return as-is, no leading
|
|
||||||
// dot. Setting `.localhost` as cookie domain is invalid.
|
|
||||||
return Some(host);
|
|
||||||
}
|
|
||||||
let registrable = &labels[labels.len() - 2..];
|
|
||||||
Some(format!(".{}", registrable.join(".")))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Inject the PHPSESSID cookie into the browser's cookie store for the
|
/// Inject the PHPSESSID cookie into the browser's cookie store for the
|
||||||
/// catalog domain. Must be called before any navigation that depends on
|
/// catalog domain. Must be called before any navigation that depends on
|
||||||
@@ -127,6 +100,54 @@ pub fn classify_probe(html: &str) -> SessionProbe {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Three-way classification of a chapter page response.
|
||||||
|
///
|
||||||
|
/// Reader pages don't render `#logo`, so [`classify_probe`] can't be
|
||||||
|
/// reused as-is. The chapter-specific marker is `a#pic_container`
|
||||||
|
/// (asserted by the reader-page parser at `parse_chapter_pages`).
|
||||||
|
///
|
||||||
|
/// Order matters: broken-page body wins over selector matches, so a
|
||||||
|
/// transient site-wide 5xx that happens to render the avatar widget
|
||||||
|
/// elsewhere doesn't falsely reach `Ok`.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum ChapterProbe {
|
||||||
|
/// `a#pic_container` present — reader rendered. Whether
|
||||||
|
/// `#avatar_menu` is also there is informational; if the reader
|
||||||
|
/// loaded the session is by definition still good.
|
||||||
|
Ok,
|
||||||
|
/// Site rendered a "logged out" or "please log in" page (no
|
||||||
|
/// reader, no broken-page body, and no avatar widget either).
|
||||||
|
/// Distinguishes the genuine expired-session case from a
|
||||||
|
/// transient site hiccup.
|
||||||
|
Unauthenticated,
|
||||||
|
/// Broken-page body, or reader didn't render but the user is
|
||||||
|
/// still logged in (avatar widget present). Caller should retry
|
||||||
|
/// rather than blame the session.
|
||||||
|
Transient,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn classify_chapter_probe(html: &str) -> ChapterProbe {
|
||||||
|
if is_broken_page_body(html) {
|
||||||
|
return ChapterProbe::Transient;
|
||||||
|
}
|
||||||
|
let doc = scraper::Html::parse_document(html);
|
||||||
|
let container = scraper::Selector::parse("a#pic_container").unwrap();
|
||||||
|
if doc.select(&container).next().is_some() {
|
||||||
|
return ChapterProbe::Ok;
|
||||||
|
}
|
||||||
|
let avatar = scraper::Selector::parse("#avatar_menu").unwrap();
|
||||||
|
if doc.select(&avatar).next().is_some() {
|
||||||
|
// Logged-in user, but the reader didn't render — most likely
|
||||||
|
// the layout shifted or the site is serving an interstitial.
|
||||||
|
ChapterProbe::Transient
|
||||||
|
} else {
|
||||||
|
// No reader, no avatar, no broken-body marker — site rendered
|
||||||
|
// the "please log in" page, which is the genuine session-
|
||||||
|
// expired signal on this route.
|
||||||
|
ChapterProbe::Unauthenticated
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// In-startup retry budget for the session probe. Small but non-zero —
|
/// In-startup retry budget for the session probe. Small but non-zero —
|
||||||
/// startup hitting a 5-second site hiccup shouldn't fail the operator
|
/// startup hitting a 5-second site hiccup shouldn't fail the operator
|
||||||
/// with "PHPSESSID expired" when the session is actually fine.
|
/// with "PHPSESSID expired" when the session is actually fine.
|
||||||
@@ -182,7 +203,18 @@ async fn fetch_probe_html(browser: &Browser, probe_url: &str) -> anyhow::Result<
|
|||||||
.new_page(probe_url)
|
.new_page(probe_url)
|
||||||
.await
|
.await
|
||||||
.with_context(|| format!("open probe page {probe_url}"))?;
|
.with_context(|| format!("open probe page {probe_url}"))?;
|
||||||
page.wait_for_navigation().await.context("wait for nav on probe")?;
|
crate::crawler::nav::wait_for_nav(&page)
|
||||||
|
.await
|
||||||
|
.context("wait for nav on probe")?;
|
||||||
|
// Best-effort wait for the layout marker. Timeout is fine — the
|
||||||
|
// probe classifier handles a missing `#logo` as Transient anyway,
|
||||||
|
// and the verify loop retries on Transient.
|
||||||
|
let _ = crate::crawler::nav::wait_for_selector(
|
||||||
|
&page,
|
||||||
|
"#logo",
|
||||||
|
crate::crawler::nav::SELECTOR_TIMEOUT,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
let html = page.content().await.context("read probe html")?;
|
let html = page.content().await.context("read probe html")?;
|
||||||
page.close().await.ok();
|
page.close().await.ok();
|
||||||
Ok(html)
|
Ok(html)
|
||||||
@@ -192,44 +224,8 @@ async fn fetch_probe_html(browser: &Browser, probe_url: &str) -> anyhow::Result<
|
|||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
// registrable_domain tests live in crawler::url_utils now —
|
||||||
fn registrable_domain_strips_subdomain() {
|
// it's the canonical home for that helper.
|
||||||
assert_eq!(
|
|
||||||
registrable_domain("https://www.target-site.com/manga/foo/").as_deref(),
|
|
||||||
Some(".target-site.com")
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
registrable_domain("https://m.example.org").as_deref(),
|
|
||||||
Some(".example.org")
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn registrable_domain_keeps_two_label_host() {
|
|
||||||
assert_eq!(
|
|
||||||
registrable_domain("https://example.com/").as_deref(),
|
|
||||||
Some(".example.com")
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn registrable_domain_handles_port() {
|
|
||||||
assert_eq!(
|
|
||||||
registrable_domain("http://www.foo.bar:8080/x").as_deref(),
|
|
||||||
Some(".foo.bar")
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn registrable_domain_bare_hostname_no_leading_dot() {
|
|
||||||
// .localhost would be invalid as a cookie Domain.
|
|
||||||
assert_eq!(registrable_domain("http://localhost:5173").as_deref(), Some("localhost"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn registrable_domain_returns_none_for_garbage() {
|
|
||||||
assert!(registrable_domain("not a url").is_none());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn classify_probe_ok_when_logo_and_avatar_present() {
|
fn classify_probe_ok_when_logo_and_avatar_present() {
|
||||||
@@ -273,6 +269,73 @@ mod tests {
|
|||||||
assert_eq!(classify_probe(""), SessionProbe::Transient);
|
assert_eq!(classify_probe(""), SessionProbe::Transient);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn classify_chapter_probe_ok_when_reader_rendered() {
|
||||||
|
let html = r#"
|
||||||
|
<html><body>
|
||||||
|
<a id="pic_container">
|
||||||
|
<img id="page1" src="https://cdn/1.jpg">
|
||||||
|
</a>
|
||||||
|
</body></html>
|
||||||
|
"#;
|
||||||
|
assert_eq!(classify_chapter_probe(html), ChapterProbe::Ok);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn classify_chapter_probe_unauthenticated_when_no_reader_and_no_avatar() {
|
||||||
|
// What a logged-out hit on a chapter URL renders: a normal
|
||||||
|
// site layout (header etc.) with a "please log in" body, but
|
||||||
|
// no reader and no avatar widget.
|
||||||
|
let html = r#"
|
||||||
|
<html><body>
|
||||||
|
<header><div id="logo">Catalog</div></header>
|
||||||
|
<main>Please log in to read this chapter.</main>
|
||||||
|
</body></html>
|
||||||
|
"#;
|
||||||
|
assert_eq!(
|
||||||
|
classify_chapter_probe(html),
|
||||||
|
ChapterProbe::Unauthenticated
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn classify_chapter_probe_transient_when_logged_in_but_reader_missing() {
|
||||||
|
// Avatar shows the session is still valid; reader didn't
|
||||||
|
// render — site is serving an interstitial or the layout
|
||||||
|
// momentarily shifted. Retry, don't blame the session.
|
||||||
|
let html = r#"
|
||||||
|
<html><body>
|
||||||
|
<header><div id="logo">Catalog</div><div id="avatar_menu"></div></header>
|
||||||
|
<main>Site maintenance — back in 5 minutes.</main>
|
||||||
|
</body></html>
|
||||||
|
"#;
|
||||||
|
assert_eq!(classify_chapter_probe(html), ChapterProbe::Transient);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn classify_chapter_probe_transient_on_broken_page_body() {
|
||||||
|
let html =
|
||||||
|
"<html><body><p>we're sorry, the request file are not found.</p></body></html>";
|
||||||
|
assert_eq!(classify_chapter_probe(html), ChapterProbe::Transient);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn classify_chapter_probe_does_not_misfire_on_avatar_alone_without_reader() {
|
||||||
|
// Regression for the original bug: the binary
|
||||||
|
// find_element("#avatar_menu") check treated "no avatar" as
|
||||||
|
// session-expired even when a transient hiccup was the real
|
||||||
|
// cause. classify_chapter_probe must NOT trip on that pattern
|
||||||
|
// when pic_container *is* present.
|
||||||
|
let html = r#"
|
||||||
|
<html><body>
|
||||||
|
<a id="pic_container">
|
||||||
|
<img id="page1" src="https://cdn/1.jpg">
|
||||||
|
</a>
|
||||||
|
</body></html>
|
||||||
|
"#;
|
||||||
|
assert_eq!(classify_chapter_probe(html), ChapterProbe::Ok);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn classify_probe_trusts_broken_body_over_stray_avatar_match() {
|
fn classify_probe_trusts_broken_body_over_stray_avatar_match() {
|
||||||
// Defensive: if a broken-page body somehow contains an
|
// Defensive: if a broken-page body somehow contains an
|
||||||
|
|||||||
@@ -8,19 +8,6 @@ pub mod target;
|
|||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use chromiumoxide::browser::Browser;
|
use chromiumoxide::browser::Browser;
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
|
|
||||||
/// How a `discover` job should walk the source's index.
|
|
||||||
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
|
|
||||||
pub enum DiscoverMode {
|
|
||||||
/// Walk every index page from last back to first. Used for the
|
|
||||||
/// initial seed of a source.
|
|
||||||
Backfill,
|
|
||||||
/// Walk index pages from page 1 forward, stopping after
|
|
||||||
/// `stop_after_unchanged` consecutive mangas whose `metadata_hash`
|
|
||||||
/// matches storage. Used for the recurring cron tick.
|
|
||||||
Incremental { stop_after_unchanged: usize },
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Pointer at a manga in the source's index, before we've fetched the
|
/// Pointer at a manga in the source's index, before we've fetched the
|
||||||
/// detail page. The `source_manga_key` is whatever stable id the source
|
/// detail page. The `source_manga_key` is whatever stable id the source
|
||||||
@@ -83,14 +70,14 @@ pub struct FetchContext<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Lazy iterator over discovered manga refs. The caller drives the
|
/// Lazy iterator over discovered manga refs. The caller drives the
|
||||||
/// walk one batch at a time, so it can break out as soon as a
|
/// walk one batch at a time, so it can break out as soon as the
|
||||||
/// downstream stop condition is met (e.g. N consecutive Unchanged
|
/// downstream stop condition is met (the first manga where metadata is
|
||||||
/// upserts in Incremental mode) without paying for pages it won't use.
|
/// `Unchanged` and chapter sync reports zero new chapters) without
|
||||||
|
/// paying for pages it won't use.
|
||||||
///
|
///
|
||||||
/// Batches are typically one source-index page each. Within a batch
|
/// Batches are typically one source-index page each. Within a batch
|
||||||
/// refs are already in the right per-page order for the active mode
|
/// refs are in the source's natural newest-first ordering — the same
|
||||||
/// (Backfill reverses each page to oldest-first; Incremental leaves
|
/// `update_date DESC` sort that makes the stop condition meaningful.
|
||||||
/// the source's natural newest-first ordering).
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait DiscoverWalk: Send {
|
pub trait DiscoverWalk: Send {
|
||||||
/// Return the next batch of refs, or `Ok(None)` when the source has
|
/// Return the next batch of refs, or `Ok(None)` when the source has
|
||||||
@@ -107,16 +94,14 @@ pub trait Source: Send + Sync {
|
|||||||
/// Stable identifier — also the row key in the `sources` table.
|
/// Stable identifier — also the row key in the `sources` table.
|
||||||
fn id(&self) -> &'static str;
|
fn id(&self) -> &'static str;
|
||||||
|
|
||||||
/// Begin discovery in `mode`. Returns a walker the caller drives
|
/// Begin discovery. Returns a walker the caller drives page-by-page
|
||||||
/// page-by-page via `next_batch`. The initial page-1 probe (used
|
/// via `next_batch`. The initial page-1 probe (used to determine
|
||||||
/// to determine `last_page` and warm the cache for sites that
|
/// `last_page` and warm the cache for sites that can't be paged
|
||||||
/// can't be paged without knowing the bound) happens inside this
|
/// without knowing the bound) happens inside this call, so a fresh
|
||||||
/// call, so a fresh walker is ready to yield its first batch
|
/// walker is ready to yield its first batch without further setup.
|
||||||
/// without further setup.
|
|
||||||
async fn discover(
|
async fn discover(
|
||||||
&self,
|
&self,
|
||||||
ctx: &FetchContext<'_>,
|
ctx: &FetchContext<'_>,
|
||||||
mode: DiscoverMode,
|
|
||||||
) -> anyhow::Result<Box<dyn DiscoverWalk + Send>>;
|
) -> anyhow::Result<Box<dyn DiscoverWalk + Send>>;
|
||||||
|
|
||||||
async fn fetch_manga(
|
async fn fetch_manga(
|
||||||
|
|||||||
@@ -15,22 +15,24 @@ use async_trait::async_trait;
|
|||||||
use sha2::{Digest, Sha256};
|
use sha2::{Digest, Sha256};
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
DiscoverMode, DiscoverWalk, FetchContext, Source, SourceChapter, SourceChapterRef,
|
DiscoverWalk, FetchContext, Source, SourceChapter, SourceChapterRef, SourceManga,
|
||||||
SourceManga, SourceMangaRef,
|
SourceMangaRef,
|
||||||
};
|
};
|
||||||
use crate::crawler::detect::{
|
use crate::crawler::detect::{
|
||||||
has_logo_sentinel, is_broken_page_body, retry_on_transient, PageError,
|
has_logo_sentinel, is_broken_page_body, retry_on_transient, PageError,
|
||||||
};
|
};
|
||||||
|
use crate::crawler::nav::{wait_for_nav, wait_for_selector, NavError, SELECTOR_TIMEOUT};
|
||||||
|
|
||||||
/// `sources.id` value for this Source impl. Exposed as a const so the
|
/// `sources.id` value for this Source impl. Exposed as a const so the
|
||||||
/// daemon can look up per-source state (e.g. `seed_completed_at`)
|
/// daemon can look up per-source state (e.g. the recovery flag) before
|
||||||
/// before constructing the Source itself.
|
/// constructing the Source itself.
|
||||||
pub const SOURCE_ID: &str = "target";
|
pub const SOURCE_ID: &str = "target";
|
||||||
|
|
||||||
/// In-loop retry budget for transient pages encountered during a single
|
/// In-loop retry budget for transient pages encountered during a single
|
||||||
/// `discover` walk. Bounded small because the job system itself retries
|
/// `discover` walk. Bounded small because the next cron tick will pick up
|
||||||
/// the whole `Discover` job on failure — these inline retries only need
|
/// where this run left off via the recovery flag — these inline retries
|
||||||
/// to absorb a brief site hiccup mid-walk.
|
/// only need to absorb a brief site hiccup mid-walk, not a sustained
|
||||||
|
/// outage.
|
||||||
const PAGE_TRANSIENT_RETRY_ATTEMPTS: u32 = 3;
|
const PAGE_TRANSIENT_RETRY_ATTEMPTS: u32 = 3;
|
||||||
const PAGE_TRANSIENT_RETRY_DELAY: Duration = Duration::from_secs(2);
|
const PAGE_TRANSIENT_RETRY_DELAY: Duration = Duration::from_secs(2);
|
||||||
|
|
||||||
@@ -72,14 +74,15 @@ impl Source for TargetSource {
|
|||||||
async fn discover(
|
async fn discover(
|
||||||
&self,
|
&self,
|
||||||
ctx: &FetchContext<'_>,
|
ctx: &FetchContext<'_>,
|
||||||
mode: DiscoverMode,
|
|
||||||
) -> anyhow::Result<Box<dyn DiscoverWalk + Send>> {
|
) -> anyhow::Result<Box<dyn DiscoverWalk + Send>> {
|
||||||
// Always visit page 1 first because that's the only way to
|
// Always visit page 1 first because that's the only way to
|
||||||
// discover `last_page`. Retry it on transient — a broken first
|
// discover `last_page`. Retry it on transient — a broken first
|
||||||
// page would otherwise abort the whole walk before we've even
|
// page would otherwise abort the whole walk before we've even
|
||||||
// started.
|
// started.
|
||||||
let first_html = retry_on_transient(
|
let first_html = retry_on_transient(
|
||||||
|| async { navigate(ctx, self.base_url.as_str()).await },
|
|| async {
|
||||||
|
navigate(ctx, self.base_url.as_str(), LIST_PAGE_MARKER).await
|
||||||
|
},
|
||||||
PAGE_TRANSIENT_RETRY_ATTEMPTS,
|
PAGE_TRANSIENT_RETRY_ATTEMPTS,
|
||||||
PAGE_TRANSIENT_RETRY_DELAY,
|
PAGE_TRANSIENT_RETRY_DELAY,
|
||||||
)
|
)
|
||||||
@@ -89,10 +92,8 @@ impl Source for TargetSource {
|
|||||||
parse_last_page(&doc)
|
parse_last_page(&doc)
|
||||||
};
|
};
|
||||||
|
|
||||||
let backfill = matches!(mode, DiscoverMode::Backfill);
|
let order = build_page_order(last_page);
|
||||||
let order = build_page_order(last_page, backfill);
|
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
?mode,
|
|
||||||
last_page = ?last_page,
|
last_page = ?last_page,
|
||||||
page_count = order.len(),
|
page_count = order.len(),
|
||||||
"walking pagination"
|
"walking pagination"
|
||||||
@@ -100,7 +101,6 @@ impl Source for TargetSource {
|
|||||||
|
|
||||||
Ok(Box::new(TargetSourceWalker {
|
Ok(Box::new(TargetSourceWalker {
|
||||||
base_url: self.base_url.clone(),
|
base_url: self.base_url.clone(),
|
||||||
backfill,
|
|
||||||
pages_remaining: order,
|
pages_remaining: order,
|
||||||
first_page_html: Some(first_html),
|
first_page_html: Some(first_html),
|
||||||
}))
|
}))
|
||||||
@@ -111,7 +111,17 @@ impl Source for TargetSource {
|
|||||||
ctx: &FetchContext<'_>,
|
ctx: &FetchContext<'_>,
|
||||||
r: &SourceMangaRef,
|
r: &SourceMangaRef,
|
||||||
) -> anyhow::Result<SourceManga> {
|
) -> anyhow::Result<SourceManga> {
|
||||||
let html = navigate(ctx, r.url.as_str()).await?;
|
// When we'll parse the chapter table, wait for at least one
|
||||||
|
// chapter row to appear — that's the marker most sensitive to
|
||||||
|
// the post-load JS partial-render race. When we won't, fall
|
||||||
|
// back to the layout-level `#logo` so we still wait for the
|
||||||
|
// page to settle.
|
||||||
|
let marker = if self.parse_chapters {
|
||||||
|
DETAIL_PAGE_CHAPTERS_MARKER
|
||||||
|
} else {
|
||||||
|
DETAIL_PAGE_LAYOUT_MARKER
|
||||||
|
};
|
||||||
|
let html = navigate(ctx, r.url.as_str(), marker).await?;
|
||||||
// Convert PageError → anyhow::Error via `?`. PageError stays
|
// Convert PageError → anyhow::Error via `?`. PageError stays
|
||||||
// downcastable from the wrapped anyhow::Error so the pipeline
|
// downcastable from the wrapped anyhow::Error so the pipeline
|
||||||
// can still recognize Transient via `error.downcast_ref::<PageError>()`.
|
// can still recognize Transient via `error.downcast_ref::<PageError>()`.
|
||||||
@@ -138,16 +148,13 @@ impl Source for TargetSource {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Build the queue of page numbers `TargetSource::discover` will walk.
|
/// Build the queue of page numbers `TargetSource::discover` will walk.
|
||||||
/// Backfill is oldest-first: pages `last..=1` (within each page the
|
/// The site orders by `update_date DESC`, so newest-first is just the
|
||||||
/// walker reverses entries, since the source orders by update_date
|
/// natural page order: `1..=last`. If `last_page` is unknown (source
|
||||||
/// DESC). Incremental is newest-first: pages `1..=last` in natural
|
/// surfaces no pagination) only page 1 is visited.
|
||||||
/// order. If `last_page` is unknown (source surfaces no pagination)
|
fn build_page_order(last_page: Option<i32>) -> VecDeque<i32> {
|
||||||
/// only page 1 is visited.
|
match last_page {
|
||||||
fn build_page_order(last_page: Option<i32>, backfill: bool) -> VecDeque<i32> {
|
None => VecDeque::from([1]),
|
||||||
match (last_page, backfill) {
|
Some(last) => (1..=last).collect(),
|
||||||
(None, _) => VecDeque::from([1]),
|
|
||||||
(Some(last), true) => (1..=last).rev().collect(),
|
|
||||||
(Some(last), false) => (1..=last).collect(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -157,7 +164,6 @@ fn build_page_order(last_page: Option<i32>, backfill: bool) -> VecDeque<i32> {
|
|||||||
/// batch covering page 1 doesn't re-fetch.
|
/// batch covering page 1 doesn't re-fetch.
|
||||||
struct TargetSourceWalker {
|
struct TargetSourceWalker {
|
||||||
base_url: String,
|
base_url: String,
|
||||||
backfill: bool,
|
|
||||||
pages_remaining: VecDeque<i32>,
|
pages_remaining: VecDeque<i32>,
|
||||||
first_page_html: Option<String>,
|
first_page_html: Option<String>,
|
||||||
}
|
}
|
||||||
@@ -171,7 +177,7 @@ impl DiscoverWalk for TargetSourceWalker {
|
|||||||
let Some(page_num) = self.pages_remaining.pop_front() else {
|
let Some(page_num) = self.pages_remaining.pop_front() else {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
};
|
};
|
||||||
let mut page_refs = if page_num == 1 {
|
let page_refs = if page_num == 1 {
|
||||||
// Reuse the cached page-1 HTML from the initial probe. Take
|
// Reuse the cached page-1 HTML from the initial probe. Take
|
||||||
// it (rather than clone) so a malformed page-order queue
|
// it (rather than clone) so a malformed page-order queue
|
||||||
// that re-visits page 1 still falls back to a real fetch.
|
// that re-visits page 1 still falls back to a real fetch.
|
||||||
@@ -183,7 +189,12 @@ impl DiscoverWalk for TargetSourceWalker {
|
|||||||
None => {
|
None => {
|
||||||
retry_on_transient(
|
retry_on_transient(
|
||||||
|| async {
|
|| async {
|
||||||
let html = navigate(ctx, self.base_url.as_str()).await?;
|
let html = navigate(
|
||||||
|
ctx,
|
||||||
|
self.base_url.as_str(),
|
||||||
|
LIST_PAGE_MARKER,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
let doc = scraper::Html::parse_document(&html);
|
let doc = scraper::Html::parse_document(&html);
|
||||||
parse_manga_list_from(&doc)
|
parse_manga_list_from(&doc)
|
||||||
},
|
},
|
||||||
@@ -197,7 +208,7 @@ impl DiscoverWalk for TargetSourceWalker {
|
|||||||
retry_on_transient(
|
retry_on_transient(
|
||||||
|| async {
|
|| async {
|
||||||
let url = page_url(&self.base_url, page_num);
|
let url = page_url(&self.base_url, page_num);
|
||||||
let html = navigate(ctx, &url).await?;
|
let html = navigate(ctx, &url, LIST_PAGE_MARKER).await?;
|
||||||
let doc = scraper::Html::parse_document(&html);
|
let doc = scraper::Html::parse_document(&html);
|
||||||
parse_manga_list_from(&doc)
|
parse_manga_list_from(&doc)
|
||||||
},
|
},
|
||||||
@@ -206,32 +217,57 @@ impl DiscoverWalk for TargetSourceWalker {
|
|||||||
)
|
)
|
||||||
.await?
|
.await?
|
||||||
};
|
};
|
||||||
if self.backfill {
|
|
||||||
page_refs.reverse();
|
|
||||||
}
|
|
||||||
tracing::info!(page_num, count = page_refs.len(), "page walked");
|
tracing::info!(page_num, count = page_refs.len(), "page walked");
|
||||||
Ok(Some(page_refs))
|
Ok(Some(page_refs))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Per-page-type markers used by `navigate`'s post-navigation wait.
|
||||||
|
/// Each is the most specific element the parser will later look for —
|
||||||
|
/// waiting on it closes the partial-render race (e.g. `#chapter_table`
|
||||||
|
/// wrapper present but rows still being injected by post-load JS) that
|
||||||
|
/// the old fixed 1s sleep masked. See [`navigate`].
|
||||||
|
const LIST_PAGE_MARKER: &str = "#left_side .pic_list .updatesli";
|
||||||
|
const DETAIL_PAGE_CHAPTERS_MARKER: &str = "#chapter_table td h4 a.chico";
|
||||||
|
const DETAIL_PAGE_LAYOUT_MARKER: &str = "#logo";
|
||||||
|
|
||||||
/// Single point of rate-limited navigation. Every Source request goes
|
/// Single point of rate-limited navigation. Every Source request goes
|
||||||
/// through here, so the per-host limiter map is the only knob that
|
/// through here, so the per-host limiter map is the only knob that
|
||||||
/// controls per-origin RPS. Also the choke point for transient-page
|
/// controls per-origin RPS. Also the choke point for transient-page
|
||||||
/// detection — every fetched body is screened by
|
/// detection — every fetched body is screened by
|
||||||
/// [`classify_navigate_html`] before being handed to a selector.
|
/// [`classify_navigate_html`] before being handed to a selector.
|
||||||
async fn navigate(ctx: &FetchContext<'_>, url: &str) -> Result<String, PageError> {
|
///
|
||||||
|
/// `marker` is a CSS selector the caller expects to find on the loaded
|
||||||
|
/// page. The wait is best-effort: a timeout is **not** an error
|
||||||
|
/// (legitimately-empty pages may never render the marker), it just
|
||||||
|
/// caps how long we'll hold for post-load JS to finish injecting
|
||||||
|
/// content. The parser's own sentinels and the universal broken-page
|
||||||
|
/// body check still catch real failures.
|
||||||
|
async fn navigate(
|
||||||
|
ctx: &FetchContext<'_>,
|
||||||
|
url: &str,
|
||||||
|
marker: &str,
|
||||||
|
) -> Result<String, PageError> {
|
||||||
ctx.rate.wait_for(url).await?;
|
ctx.rate.wait_for(url).await?;
|
||||||
let page = ctx
|
let page = ctx
|
||||||
.browser
|
.browser
|
||||||
.new_page(url)
|
.new_page(url)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| PageError::Other(anyhow::Error::from(e)))?;
|
.map_err(|e| PageError::Other(anyhow::Error::from(e)))?;
|
||||||
page.wait_for_navigation()
|
match wait_for_nav(&page).await {
|
||||||
.await
|
Ok(()) => {}
|
||||||
.map_err(|e| PageError::Other(anyhow::Error::from(e)))?;
|
Err(NavError::Timeout(_)) => {
|
||||||
// Stopgap until we wait on a specific selector per page type —
|
page.close().await.ok();
|
||||||
// gives any post-load JS a beat to finish injecting content.
|
return Err(PageError::transient("nav timeout"));
|
||||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
}
|
||||||
|
Err(NavError::Cdp(e)) => {
|
||||||
|
page.close().await.ok();
|
||||||
|
return Err(PageError::Other(anyhow::Error::from(e)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Best-effort wait for the page-type marker. We deliberately
|
||||||
|
// discard a timeout here — see fn-level doc.
|
||||||
|
let _ = wait_for_selector(&page, marker, SELECTOR_TIMEOUT).await;
|
||||||
let html = page
|
let html = page
|
||||||
.content()
|
.content()
|
||||||
.await
|
.await
|
||||||
@@ -369,7 +405,7 @@ fn parse_manga_detail(
|
|||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let chapters = if include_chapters {
|
let chapters = if include_chapters {
|
||||||
parse_chapter_list(&doc)
|
parse_chapter_list(&doc)?
|
||||||
} else {
|
} else {
|
||||||
Vec::new()
|
Vec::new()
|
||||||
};
|
};
|
||||||
@@ -427,9 +463,22 @@ fn strip_tag_count(s: &str) -> String {
|
|||||||
trimmed.to_string()
|
trimmed.to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_chapter_list(doc: &scraper::Html) -> Vec<SourceChapterRef> {
|
/// Parse the chapter table on a manga detail page. Returns `Transient` if
|
||||||
|
/// `#chapter_table` isn't in the DOM at all — the table is required even
|
||||||
|
/// for mangas with no published chapters yet (the source renders an empty
|
||||||
|
/// `<table>`), so an absent table signals a partial render (post-load JS
|
||||||
|
/// not done, layout drift) rather than a legitimately empty list. Without
|
||||||
|
/// this sentinel, an empty `Vec` reaches `sync_manga_chapters` and the
|
||||||
|
/// soft-drop branch flips every existing chapter to `dropped_at`.
|
||||||
|
fn parse_chapter_list(doc: &scraper::Html) -> Result<Vec<SourceChapterRef>, PageError> {
|
||||||
|
if !has_chapter_table_sentinel(doc) {
|
||||||
|
return Err(PageError::transient(
|
||||||
|
"manga detail: #chapter_table sentinel missing",
|
||||||
|
));
|
||||||
|
}
|
||||||
let sel = scraper::Selector::parse("#chapter_table td h4 a.chico").unwrap();
|
let sel = scraper::Selector::parse("#chapter_table td h4 a.chico").unwrap();
|
||||||
doc.select(&sel)
|
Ok(doc
|
||||||
|
.select(&sel)
|
||||||
.filter_map(|a| {
|
.filter_map(|a| {
|
||||||
let url = a.value().attr("href")?.trim().to_string();
|
let url = a.value().attr("href")?.trim().to_string();
|
||||||
if url.is_empty() {
|
if url.is_empty() {
|
||||||
@@ -444,7 +493,16 @@ fn parse_chapter_list(doc: &scraper::Html) -> Vec<SourceChapterRef> {
|
|||||||
url,
|
url,
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.collect()
|
.collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true when the chapter-table container is present in the DOM.
|
||||||
|
/// Source-specific: the target site uses `#chapter_table` as the wrapper
|
||||||
|
/// element. Distinguishes "table is present but empty" (legit edge case
|
||||||
|
/// for new mangas) from "table is missing entirely" (partial render).
|
||||||
|
fn has_chapter_table_sentinel(doc: &scraper::Html) -> bool {
|
||||||
|
let sel = scraper::Selector::parse("#chapter_table").expect("valid selector");
|
||||||
|
doc.select(&sel).next().is_some()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_chapter_number(text: &str) -> Option<i32> {
|
fn parse_chapter_number(text: &str) -> Option<i32> {
|
||||||
@@ -744,7 +802,7 @@ mod tests {
|
|||||||
"../../../tests/fixtures/target/chapter_list_uu.html"
|
"../../../tests/fixtures/target/chapter_list_uu.html"
|
||||||
);
|
);
|
||||||
let doc = scraper::Html::parse_document(html);
|
let doc = scraper::Html::parse_document(html);
|
||||||
let chapters = parse_chapter_list(&doc);
|
let chapters = parse_chapter_list(&doc).expect("fixture has the table");
|
||||||
|
|
||||||
assert_eq!(chapters.len(), 15, "every row kept (notices/hiatus included)");
|
assert_eq!(chapters.len(), 15, "every row kept (notices/hiatus included)");
|
||||||
|
|
||||||
@@ -891,9 +949,17 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn missing_optional_fields_parse_to_none() {
|
fn missing_optional_fields_parse_to_none() {
|
||||||
|
// Minimal but well-formed detail page: title is required, every
|
||||||
|
// other field is optional, but the chapter table is structural —
|
||||||
|
// its absence is treated as Transient (a freshly added manga
|
||||||
|
// renders the table empty, not absent). See
|
||||||
|
// `parse_chapter_list_returns_transient_when_table_missing` for
|
||||||
|
// the negative case.
|
||||||
let html = r#"<html><body>\
|
let html = r#"<html><body>\
|
||||||
<header><div id="logo">Target</div></header>\
|
<header><div id="logo">Target</div></header>\
|
||||||
<div class="w-title"><h1>Minimal</h1></div></body></html>"#;
|
<div class="w-title"><h1>Minimal</h1></div>\
|
||||||
|
<table id="chapter_table"></table>\
|
||||||
|
</body></html>"#;
|
||||||
let m = parse_manga_detail(html, "min", true).unwrap();
|
let m = parse_manga_detail(html, "min", true).unwrap();
|
||||||
assert_eq!(m.title, "Minimal");
|
assert_eq!(m.title, "Minimal");
|
||||||
assert!(m.summary.is_none());
|
assert!(m.summary.is_none());
|
||||||
@@ -959,35 +1025,84 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn build_page_order_backfill_is_last_to_one() {
|
fn build_page_order_is_natural_one_to_last() {
|
||||||
// Backfill walks pages oldest-first: queue is [last, last-1, ..., 1]
|
// Newest-first is just the source's natural pagination order:
|
||||||
// so popping from the front yields the last page first.
|
// (update_date DESC) lives at page 1, oldest at the last page.
|
||||||
let order = build_page_order(Some(3), true);
|
let order = build_page_order(Some(3));
|
||||||
assert_eq!(Vec::from(order), vec![3, 2, 1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn build_page_order_incremental_is_one_to_last() {
|
|
||||||
// Incremental walks newest-first in natural source order.
|
|
||||||
let order = build_page_order(Some(3), false);
|
|
||||||
assert_eq!(Vec::from(order), vec![1, 2, 3]);
|
assert_eq!(Vec::from(order), vec![1, 2, 3]);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn build_page_order_falls_back_to_page_one_only_without_pagination() {
|
fn build_page_order_falls_back_to_page_one_only_without_pagination() {
|
||||||
let backfill = build_page_order(None, true);
|
// Source surfaced no pagination control — visit page 1 alone
|
||||||
assert_eq!(Vec::from(backfill), vec![1]);
|
// and let the walk end after one batch.
|
||||||
let incremental = build_page_order(None, false);
|
let order = build_page_order(None);
|
||||||
assert_eq!(Vec::from(incremental), vec![1]);
|
assert_eq!(Vec::from(order), vec![1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn build_page_order_single_page_index_yields_one_entry() {
|
fn build_page_order_single_page_index_yields_one_entry() {
|
||||||
// Sources with exactly one page should not yield duplicates
|
let order = build_page_order(Some(1));
|
||||||
// regardless of mode.
|
assert_eq!(Vec::from(order), vec![1]);
|
||||||
let backfill = build_page_order(Some(1), true);
|
}
|
||||||
assert_eq!(Vec::from(backfill), vec![1]);
|
|
||||||
let incremental = build_page_order(Some(1), false);
|
#[test]
|
||||||
assert_eq!(Vec::from(incremental), vec![1]);
|
fn parse_chapter_list_returns_transient_when_table_missing() {
|
||||||
|
// Partial render (post-load JS hadn't injected the table, layout
|
||||||
|
// drift, etc). Returning Vec::new() would silently soft-drop every
|
||||||
|
// existing chapter for the manga via sync_manga_chapters; Transient
|
||||||
|
// is the signal the job system retries on.
|
||||||
|
let html = r#"<html><body>
|
||||||
|
<header><div id="logo">Target</div></header>
|
||||||
|
<div class="w-title"><h1>Test</h1></div>
|
||||||
|
</body></html>"#;
|
||||||
|
let doc = scraper::Html::parse_document(html);
|
||||||
|
let err = parse_chapter_list(&doc).expect_err("expected Transient");
|
||||||
|
assert!(err.is_transient(), "got non-transient: {err}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_chapter_list_ok_empty_when_table_present_but_no_rows() {
|
||||||
|
// A freshly-added manga with no chapters yet — the source renders
|
||||||
|
// the `<table id="chapter_table">` wrapper but no `<tr>` rows
|
||||||
|
// inside. Must stay distinguishable from a missing-table render.
|
||||||
|
let html = r#"<html><body>
|
||||||
|
<header><div id="logo">Target</div></header>
|
||||||
|
<table id="chapter_table"></table>
|
||||||
|
</body></html>"#;
|
||||||
|
let doc = scraper::Html::parse_document(html);
|
||||||
|
let chapters = parse_chapter_list(&doc).expect("present table is not transient");
|
||||||
|
assert!(chapters.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_manga_detail_propagates_chapter_table_transient() {
|
||||||
|
// End-to-end: a detail page that survives the #logo sentinel but
|
||||||
|
// has the chapter table stripped must fail Transient at the parser
|
||||||
|
// boundary, not return a SourceManga with empty chapters.
|
||||||
|
let html = r#"<html><body>
|
||||||
|
<header><div id="logo">Target</div></header>
|
||||||
|
<div class="w-title"><h1>Test Title</h1></div>
|
||||||
|
<div class="cover"><img src="/cover.jpg"></div>
|
||||||
|
<!-- intentionally no #chapter_table -->
|
||||||
|
</body></html>"#;
|
||||||
|
let err = parse_manga_detail(html, "key", true).expect_err("expected Transient");
|
||||||
|
assert!(err.is_transient(), "got non-transient: {err}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_manga_detail_skips_chapter_sentinel_when_include_chapters_false() {
|
||||||
|
// Metadata-only mode (`skip_chapters` upstream) must not require
|
||||||
|
// the chapter table — pipeline.rs avoids calling sync_manga_chapters
|
||||||
|
// for these mangas, so the absent table is not a correctness issue
|
||||||
|
// and shouldn't surface as Transient.
|
||||||
|
let html = r#"<html><body>
|
||||||
|
<header><div id="logo">Target</div></header>
|
||||||
|
<div class="w-title"><h1>Test Title</h1></div>
|
||||||
|
<div class="cover"><img src="/cover.jpg"></div>
|
||||||
|
</body></html>"#;
|
||||||
|
let manga = parse_manga_detail(html, "key", false)
|
||||||
|
.expect("metadata-only parse must not require chapter table");
|
||||||
|
assert!(manga.chapters.is_empty());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
194
backend/src/crawler/url_utils.rs
Normal file
194
backend/src/crawler/url_utils.rs
Normal file
@@ -0,0 +1,194 @@
|
|||||||
|
//! Centralised URL helpers for the crawler subsystem.
|
||||||
|
//!
|
||||||
|
//! Three near-identical hand-rolled URL parsers used to live in
|
||||||
|
//! `crawler::session`, `crawler::rate_limit`, and `crawler::pipeline`
|
||||||
|
//! respectively, each with subtly different edge-case behaviour
|
||||||
|
//! around port handling and IPv6 literals. They're consolidated here
|
||||||
|
//! so the divergence can't drift again.
|
||||||
|
//!
|
||||||
|
//! The hand-rolled implementations are kept intentionally — they
|
||||||
|
//! preserve the exact semantics every existing test pins. A future
|
||||||
|
//! refactor can switch to `reqwest::Url` if it can be done without
|
||||||
|
//! changing those semantics.
|
||||||
|
|
||||||
|
/// Lowercased host (no port). Returns `None` for inputs without a
|
||||||
|
/// `scheme://host` shape — those would never have reached the network
|
||||||
|
/// layer anyway. Used by the per-host rate limiter as its bucket key.
|
||||||
|
///
|
||||||
|
/// IPv6 literals are kept in their `[::1]` bracketed form so the
|
||||||
|
/// `rsplit_once(':')` port-stripping logic doesn't split inside the
|
||||||
|
/// address (e.g. `https://[::1]/foo` used to return `"[:"` because
|
||||||
|
/// the rightmost `:` is inside the literal). Buckets keyed by
|
||||||
|
/// `[::1]` vs `::1` are still uniquely-per-host; the brackets are
|
||||||
|
/// cosmetic.
|
||||||
|
pub fn host_of(url: &str) -> Option<String> {
|
||||||
|
let after_scheme = url.split_once("://")?.1;
|
||||||
|
let host_with_port = after_scheme.split('/').next()?;
|
||||||
|
let host = if host_with_port.starts_with('[') {
|
||||||
|
// IPv6 literal: keep through the closing bracket. There may
|
||||||
|
// be a trailing `:port` after `]`; strip only that.
|
||||||
|
match host_with_port.rfind(']') {
|
||||||
|
Some(end) => &host_with_port[..=end],
|
||||||
|
None => host_with_port,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Hostnames and IPv4 literals: trailing `:port` (if any) is
|
||||||
|
// after the last `:`.
|
||||||
|
host_with_port
|
||||||
|
.rsplit_once(':')
|
||||||
|
.map_or(host_with_port, |(h, _)| h)
|
||||||
|
};
|
||||||
|
(!host.is_empty()).then(|| host.to_ascii_lowercase())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `scheme://host` with no path or port stripping. Used by the metadata
|
||||||
|
/// pass to seed `sources.base_url` from `CRAWLER_START_URL`.
|
||||||
|
pub fn origin_of(url: &str) -> Option<String> {
|
||||||
|
let (scheme, rest) = url.split_once("://")?;
|
||||||
|
let host = rest.split('/').next()?;
|
||||||
|
Some(format!("{scheme}://{host}"))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Approximate registrable-domain calculation: take the last two
|
||||||
|
/// dot-labels of the host, prefix with `.`. Used to set a parent-
|
||||||
|
/// domain cookie so the catalog's `www.` / `m.` redirects don't drop
|
||||||
|
/// the cookie mid-crawl.
|
||||||
|
///
|
||||||
|
/// Caveat: wrong for multi-part TLDs (`.co.uk`, `.com.br`). The
|
||||||
|
/// operator can override via `CRAWLER_COOKIE_DOMAIN`; pulling in the
|
||||||
|
/// Public Suffix List for one knob isn't worth it yet.
|
||||||
|
///
|
||||||
|
/// Bare hostnames (e.g. `localhost`) return the host as-is, with no
|
||||||
|
/// leading dot — setting `.localhost` as a cookie domain is invalid.
|
||||||
|
/// IPv6 literals (e.g. `[::1]`) are returned bracketed and unchanged;
|
||||||
|
/// the browser will reject them as a cookie `Domain` anyway, but the
|
||||||
|
/// representation stays sensible. Same `starts_with('[')` branch as
|
||||||
|
/// [`host_of`] for consistent IPv6 handling across the module.
|
||||||
|
pub fn registrable_domain(url: &str) -> Option<String> {
|
||||||
|
let after_scheme = url.split_once("://")?.1;
|
||||||
|
let host_with_port = after_scheme.split('/').next()?;
|
||||||
|
let host_str = if host_with_port.starts_with('[') {
|
||||||
|
// IPv6 literal: keep through the closing bracket; an optional
|
||||||
|
// `:port` follows `]`.
|
||||||
|
match host_with_port.rfind(']') {
|
||||||
|
Some(end) => &host_with_port[..=end],
|
||||||
|
None => host_with_port,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
host_with_port
|
||||||
|
.rsplit_once(':')
|
||||||
|
.map_or(host_with_port, |(h, _)| h)
|
||||||
|
};
|
||||||
|
let host = host_str.to_ascii_lowercase();
|
||||||
|
if host.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let labels: Vec<&str> = host.split('.').filter(|l| !l.is_empty()).collect();
|
||||||
|
if labels.len() < 2 {
|
||||||
|
return Some(host);
|
||||||
|
}
|
||||||
|
let registrable = &labels[labels.len() - 2..];
|
||||||
|
Some(format!(".{}", registrable.join(".")))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn host_of_strips_port_and_lowercases() {
|
||||||
|
assert_eq!(
|
||||||
|
host_of("https://CDN.Example.com:443/x").as_deref(),
|
||||||
|
Some("cdn.example.com")
|
||||||
|
);
|
||||||
|
assert_eq!(host_of("http://localhost/").as_deref(), Some("localhost"));
|
||||||
|
assert_eq!(host_of("not a url"), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn host_of_keeps_bracketed_ipv6_literal_intact() {
|
||||||
|
// Regression: the old impl rsplit_once(':')'d the IPv6 address,
|
||||||
|
// returning "[:" instead of "[::1]". A real IPv6 source would
|
||||||
|
// silently get a wrong rate-limit bucket key.
|
||||||
|
assert_eq!(host_of("https://[::1]/").as_deref(), Some("[::1]"));
|
||||||
|
assert_eq!(host_of("https://[::1]:8080/").as_deref(), Some("[::1]"));
|
||||||
|
assert_eq!(
|
||||||
|
host_of("https://[2001:db8::1]/foo").as_deref(),
|
||||||
|
Some("[2001:db8::1]")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
host_of("https://[2001:db8::1]:443/foo").as_deref(),
|
||||||
|
Some("[2001:db8::1]")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn origin_of_returns_scheme_and_host() {
|
||||||
|
assert_eq!(
|
||||||
|
origin_of("https://example.com/some/path?q=1").as_deref(),
|
||||||
|
Some("https://example.com")
|
||||||
|
);
|
||||||
|
assert_eq!(origin_of("garbage"), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn registrable_domain_strips_subdomain() {
|
||||||
|
assert_eq!(
|
||||||
|
registrable_domain("https://www.target-site.com/manga/foo/").as_deref(),
|
||||||
|
Some(".target-site.com")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
registrable_domain("https://m.example.org").as_deref(),
|
||||||
|
Some(".example.org")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn registrable_domain_keeps_two_label_host() {
|
||||||
|
assert_eq!(
|
||||||
|
registrable_domain("https://example.com/").as_deref(),
|
||||||
|
Some(".example.com")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn registrable_domain_handles_port() {
|
||||||
|
assert_eq!(
|
||||||
|
registrable_domain("http://www.foo.bar:8080/x").as_deref(),
|
||||||
|
Some(".foo.bar")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn registrable_domain_bare_hostname_no_leading_dot() {
|
||||||
|
assert_eq!(
|
||||||
|
registrable_domain("http://localhost:5173").as_deref(),
|
||||||
|
Some("localhost")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn registrable_domain_returns_none_for_garbage() {
|
||||||
|
assert!(registrable_domain("not a url").is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn registrable_domain_keeps_bracketed_ipv6_literal_intact() {
|
||||||
|
// Symmetric with host_of's IPv6 fix. The cookie-domain code
|
||||||
|
// won't accept an IP as a `Domain` value, but the function
|
||||||
|
// should at least return a sensible representation rather
|
||||||
|
// than the truncated `"[:"` the old port-stripper produced.
|
||||||
|
assert_eq!(
|
||||||
|
registrable_domain("https://[::1]/").as_deref(),
|
||||||
|
Some("[::1]")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
registrable_domain("https://[::1]:8080/").as_deref(),
|
||||||
|
Some("[::1]")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
registrable_domain("https://[2001:db8::1]/foo").as_deref(),
|
||||||
|
Some("[2001:db8::1]")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
15
backend/src/domain/admin_audit.rs
Normal file
15
backend/src/domain/admin_audit.rs
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
use serde::Serialize;
|
||||||
|
use sqlx::FromRow;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, FromRow)]
|
||||||
|
pub struct AdminAuditEntry {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub actor_user_id: Option<Uuid>,
|
||||||
|
pub action: String,
|
||||||
|
pub target_kind: String,
|
||||||
|
pub target_id: Option<Uuid>,
|
||||||
|
pub payload: serde_json::Value,
|
||||||
|
pub at: DateTime<Utc>,
|
||||||
|
}
|
||||||
@@ -1,3 +1,4 @@
|
|||||||
|
pub mod admin_audit;
|
||||||
pub mod api_token;
|
pub mod api_token;
|
||||||
pub mod author;
|
pub mod author;
|
||||||
pub mod bookmark;
|
pub mod bookmark;
|
||||||
@@ -9,11 +10,13 @@ pub mod page;
|
|||||||
pub mod patch;
|
pub mod patch;
|
||||||
pub mod read_progress;
|
pub mod read_progress;
|
||||||
pub mod session;
|
pub mod session;
|
||||||
|
pub mod sync_state;
|
||||||
pub mod tag;
|
pub mod tag;
|
||||||
pub mod upload_entry;
|
pub mod upload_entry;
|
||||||
pub mod user;
|
pub mod user;
|
||||||
pub mod user_preferences;
|
pub mod user_preferences;
|
||||||
|
|
||||||
|
pub use admin_audit::AdminAuditEntry;
|
||||||
pub use api_token::ApiToken;
|
pub use api_token::ApiToken;
|
||||||
pub use author::{Author, AuthorRef, AuthorWithCount};
|
pub use author::{Author, AuthorRef, AuthorWithCount};
|
||||||
pub use bookmark::{Bookmark, BookmarkSummary};
|
pub use bookmark::{Bookmark, BookmarkSummary};
|
||||||
@@ -25,6 +28,7 @@ pub use page::Page;
|
|||||||
pub use patch::Patch;
|
pub use patch::Patch;
|
||||||
pub use read_progress::{ReadProgress, ReadProgressForManga, ReadProgressSummary};
|
pub use read_progress::{ReadProgress, ReadProgressForManga, ReadProgressSummary};
|
||||||
pub use session::Session;
|
pub use session::Session;
|
||||||
|
pub use sync_state::{ChapterSyncState, MangaSyncState};
|
||||||
pub use tag::{Tag, TagRef};
|
pub use tag::{Tag, TagRef};
|
||||||
pub use upload_entry::UploadEntry;
|
pub use upload_entry::UploadEntry;
|
||||||
pub use user::User;
|
pub use user::User;
|
||||||
|
|||||||
48
backend/src/domain/sync_state.rs
Normal file
48
backend/src/domain/sync_state.rs
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
//! Sync-state enums derived per-manga / per-chapter from `manga_sources`,
|
||||||
|
//! `chapter_sources`, and `crawler_jobs` at query time. No state column
|
||||||
|
//! is persisted on `mangas` / `chapters` — see `repo::admin_view` for the
|
||||||
|
//! derivation rules and priority order.
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, sqlx::Type)]
|
||||||
|
#[sqlx(type_name = "text", rename_all = "snake_case")]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
pub enum MangaSyncState {
|
||||||
|
/// A `sync_manga` or `sync_chapter_list` job is currently
|
||||||
|
/// pending or running for this manga.
|
||||||
|
InProgress,
|
||||||
|
/// At least one `manga_sources` row exists for this manga and ALL of
|
||||||
|
/// them have `dropped_at IS NOT NULL` — every source we know about
|
||||||
|
/// has stopped surfacing it.
|
||||||
|
Dropped,
|
||||||
|
/// Default healthy state: at least one live source row OR the manga
|
||||||
|
/// was user-uploaded (no `manga_sources` rows at all).
|
||||||
|
Synced,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, sqlx::Type)]
|
||||||
|
#[sqlx(type_name = "text", rename_all = "snake_case")]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
pub enum ChapterSyncState {
|
||||||
|
/// A `sync_chapter_content` job is currently pending or running for
|
||||||
|
/// this chapter (the 0014 dedup index guarantees at most one).
|
||||||
|
Downloading,
|
||||||
|
/// At least one `chapter_sources` row exists AND all of them are
|
||||||
|
/// `dropped_at IS NOT NULL`.
|
||||||
|
Dropped,
|
||||||
|
/// `page_count = 0` AND a `dead` `sync_chapter_content` job exists
|
||||||
|
/// for this chapter. Checked BEFORE `NotDownloaded` so the more
|
||||||
|
/// informative "we tried and it died" state wins over "we never
|
||||||
|
/// got around to it". Does NOT fire when `page_count > 0`, because
|
||||||
|
/// pages on disk mean the chapter IS synced regardless of historical
|
||||||
|
/// job failures — see the priority comment in `repo::admin_view`.
|
||||||
|
Failed,
|
||||||
|
/// `page_count = 0` and no in-flight or failed job — the chapter
|
||||||
|
/// row exists but content has never been downloaded.
|
||||||
|
NotDownloaded,
|
||||||
|
/// `page_count > 0` — content has been downloaded at some point.
|
||||||
|
/// Reaped `done` jobs in `crawler_jobs` mean we can't read this from
|
||||||
|
/// the job table, so `page_count` is the durable truth.
|
||||||
|
Synced,
|
||||||
|
}
|
||||||
@@ -10,4 +10,5 @@ pub struct User {
|
|||||||
#[serde(skip)]
|
#[serde(skip)]
|
||||||
pub password_hash: String,
|
pub password_hash: String,
|
||||||
pub created_at: DateTime<Utc>,
|
pub created_at: DateTime<Utc>,
|
||||||
|
pub is_admin: bool,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,6 +21,11 @@ pub enum AppError {
|
|||||||
PayloadTooLarge(String),
|
PayloadTooLarge(String),
|
||||||
#[error("unsupported media type: {0}")]
|
#[error("unsupported media type: {0}")]
|
||||||
UnsupportedMediaType(String),
|
UnsupportedMediaType(String),
|
||||||
|
/// 429 with an optional `Retry-After` header value (in seconds).
|
||||||
|
#[error("too many requests")]
|
||||||
|
TooManyRequests {
|
||||||
|
retry_after_secs: Option<u64>,
|
||||||
|
},
|
||||||
/// Semantic per-field validation failure. `details` is rendered into the
|
/// Semantic per-field validation failure. `details` is rendered into the
|
||||||
/// envelope so the client can highlight the bad field(s).
|
/// envelope so the client can highlight the bad field(s).
|
||||||
#[error("validation failed")]
|
#[error("validation failed")]
|
||||||
@@ -51,6 +56,7 @@ impl AppError {
|
|||||||
AppError::Conflict(_) => "conflict",
|
AppError::Conflict(_) => "conflict",
|
||||||
AppError::PayloadTooLarge(_) => "payload_too_large",
|
AppError::PayloadTooLarge(_) => "payload_too_large",
|
||||||
AppError::UnsupportedMediaType(_) => "unsupported_media_type",
|
AppError::UnsupportedMediaType(_) => "unsupported_media_type",
|
||||||
|
AppError::TooManyRequests { .. } => "too_many_requests",
|
||||||
AppError::ValidationFailed { .. } => "validation_failed",
|
AppError::ValidationFailed { .. } => "validation_failed",
|
||||||
AppError::Database(sqlx::Error::RowNotFound) => "not_found",
|
AppError::Database(sqlx::Error::RowNotFound) => "not_found",
|
||||||
AppError::Database(_) => "internal_error",
|
AppError::Database(_) => "internal_error",
|
||||||
@@ -79,6 +85,31 @@ impl IntoResponse for AppError {
|
|||||||
AppError::UnsupportedMediaType(msg) => {
|
AppError::UnsupportedMediaType(msg) => {
|
||||||
(StatusCode::UNSUPPORTED_MEDIA_TYPE, msg.clone(), None)
|
(StatusCode::UNSUPPORTED_MEDIA_TYPE, msg.clone(), None)
|
||||||
}
|
}
|
||||||
|
AppError::TooManyRequests { retry_after_secs } => {
|
||||||
|
// Emit `Retry-After: N` (RFC 6585 §4) so a well-behaved
|
||||||
|
// client can back off correctly. Done by building the
|
||||||
|
// response by hand below — the `(status, headers,
|
||||||
|
// body)` tuple shape doesn't fit the standard
|
||||||
|
// `(status, body)` IntoResponse path for the other
|
||||||
|
// variants.
|
||||||
|
let body = json!({
|
||||||
|
"error": {
|
||||||
|
"code": code,
|
||||||
|
"message": "too many requests; slow down",
|
||||||
|
}
|
||||||
|
});
|
||||||
|
let mut resp = (StatusCode::TOO_MANY_REQUESTS, Json(body)).into_response();
|
||||||
|
if let Some(secs) = retry_after_secs {
|
||||||
|
// `HeaderValue: From<u64>` skips both the
|
||||||
|
// intermediate `String` allocation and the
|
||||||
|
// fallible-by-shape `from_str` path.
|
||||||
|
resp.headers_mut().insert(
|
||||||
|
axum::http::header::RETRY_AFTER,
|
||||||
|
axum::http::HeaderValue::from(*secs),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return resp;
|
||||||
|
}
|
||||||
AppError::ValidationFailed { message, details } => (
|
AppError::ValidationFailed { message, details } => (
|
||||||
StatusCode::UNPROCESSABLE_ENTITY,
|
StatusCode::UNPROCESSABLE_ENTITY,
|
||||||
message.clone(),
|
message.clone(),
|
||||||
|
|||||||
@@ -1,12 +1,21 @@
|
|||||||
use std::net::SocketAddr;
|
use std::net::SocketAddr;
|
||||||
|
use std::time::Duration;
|
||||||
use tracing_subscriber::EnvFilter;
|
use tracing_subscriber::EnvFilter;
|
||||||
|
|
||||||
|
/// Upper bound on how long we're willing to wait for the crawler daemon
|
||||||
|
/// to drain before letting `main` return. Without it a wedged background
|
||||||
|
/// task (e.g. a chromiumoxide handler stuck on a dead WS) blocks the
|
||||||
|
/// process from exiting after Ctrl-C / SIGTERM.
|
||||||
|
const CRAWLER_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(5);
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> anyhow::Result<()> {
|
async fn main() -> anyhow::Result<()> {
|
||||||
dotenvy::dotenv().ok();
|
dotenvy::dotenv().ok();
|
||||||
tracing_subscriber::fmt()
|
tracing_subscriber::fmt()
|
||||||
.with_env_filter(
|
.with_env_filter(
|
||||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| "info,mangalord=debug".into()),
|
EnvFilter::try_from_default_env().unwrap_or_else(|_| {
|
||||||
|
"info,mangalord=debug,chromiumoxide::conn=off,chromiumoxide::handler=off".into()
|
||||||
|
}),
|
||||||
)
|
)
|
||||||
.init();
|
.init();
|
||||||
|
|
||||||
@@ -17,16 +26,52 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
tracing::info!(%addr, "mangalord listening");
|
tracing::info!(%addr, "mangalord listening");
|
||||||
let listener = tokio::net::TcpListener::bind(addr).await?;
|
let listener = tokio::net::TcpListener::bind(addr).await?;
|
||||||
axum::serve(listener, router)
|
axum::serve(listener, router)
|
||||||
.with_graceful_shutdown(async {
|
.with_graceful_shutdown(shutdown_signal())
|
||||||
let _ = tokio::signal::ctrl_c().await;
|
|
||||||
tracing::info!("ctrl-c received; shutting down");
|
|
||||||
})
|
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
// Drain background tasks (crawler daemon) before exiting so Chromium
|
// Drain background tasks (crawler daemon) before exiting so Chromium
|
||||||
// gets a clean shutdown rather than relying on kill-on-drop.
|
// gets a clean shutdown rather than relying on kill-on-drop. Bounded
|
||||||
|
// by a timeout so a wedged shutdown path can't trap the process.
|
||||||
if let Some(d) = daemon {
|
if let Some(d) = daemon {
|
||||||
d.shutdown().await;
|
if tokio::time::timeout(CRAWLER_SHUTDOWN_TIMEOUT, d.shutdown())
|
||||||
|
.await
|
||||||
|
.is_err()
|
||||||
|
{
|
||||||
|
tracing::warn!(
|
||||||
|
timeout_s = CRAWLER_SHUTDOWN_TIMEOUT.as_secs(),
|
||||||
|
"crawler daemon shutdown exceeded timeout; abandoning"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Wait for either Ctrl-C (interactive shell) or SIGTERM (Docker /
|
||||||
|
/// Kubernetes / Podman / systemd stop) and log which arrived. Without
|
||||||
|
/// the SIGTERM branch, `docker compose stop` runs out its grace period
|
||||||
|
/// and skips straight to SIGKILL — the daemon never gets the
|
||||||
|
/// `daemon.shutdown().await` path, leaking Chromium.
|
||||||
|
async fn shutdown_signal() {
|
||||||
|
use tokio::signal::unix::{signal, SignalKind};
|
||||||
|
let mut sigterm = match signal(SignalKind::terminate()) {
|
||||||
|
Ok(s) => s,
|
||||||
|
Err(e) => {
|
||||||
|
// SignalKind::terminate() is supported on every Unix the
|
||||||
|
// tokio runtime runs on; if registration fails we still
|
||||||
|
// honour Ctrl-C so the process is at least
|
||||||
|
// interactive-shutdownable.
|
||||||
|
tracing::warn!(error = %e, "could not install SIGTERM handler; falling back to ctrl_c only");
|
||||||
|
let _ = tokio::signal::ctrl_c().await;
|
||||||
|
tracing::info!("ctrl-c received; shutting down");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
tokio::select! {
|
||||||
|
_ = tokio::signal::ctrl_c() => {
|
||||||
|
tracing::info!("ctrl-c received; shutting down");
|
||||||
|
}
|
||||||
|
_ = sigterm.recv() => {
|
||||||
|
tracing::info!("SIGTERM received; shutting down");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
32
backend/src/repo/admin_audit.rs
Normal file
32
backend/src/repo/admin_audit.rs
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
//! Admin-action audit log writes.
|
||||||
|
//!
|
||||||
|
//! Insert is always called from inside the same transaction as the
|
||||||
|
//! action it audits — the executor parameter is `PgExecutor` so the
|
||||||
|
//! caller passes `&mut *tx` directly.
|
||||||
|
|
||||||
|
use sqlx::PgExecutor;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::error::AppResult;
|
||||||
|
|
||||||
|
pub async fn insert<'e, E: PgExecutor<'e>>(
|
||||||
|
executor: E,
|
||||||
|
actor_user_id: Uuid,
|
||||||
|
action: &str,
|
||||||
|
target_kind: &str,
|
||||||
|
target_id: Option<Uuid>,
|
||||||
|
payload: serde_json::Value,
|
||||||
|
) -> AppResult<()> {
|
||||||
|
sqlx::query(
|
||||||
|
"INSERT INTO admin_audit (actor_user_id, action, target_kind, target_id, payload) \
|
||||||
|
VALUES ($1, $2, $3, $4, $5)",
|
||||||
|
)
|
||||||
|
.bind(actor_user_id)
|
||||||
|
.bind(action)
|
||||||
|
.bind(target_kind)
|
||||||
|
.bind(target_id)
|
||||||
|
.bind(payload)
|
||||||
|
.execute(executor)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
232
backend/src/repo/admin_view.rs
Normal file
232
backend/src/repo/admin_view.rs
Normal file
@@ -0,0 +1,232 @@
|
|||||||
|
//! Admin-facing read queries that join manga/chapter with the crawler
|
||||||
|
//! signals (`manga_sources`, `chapter_sources`, `crawler_jobs`) to
|
||||||
|
//! derive a sync state per row at query time.
|
||||||
|
//!
|
||||||
|
//! Priority order for `MangaSyncState`:
|
||||||
|
//! 1. `InProgress` — any pending/running `sync_manga` or
|
||||||
|
//! `sync_chapter_list` job matches this manga.
|
||||||
|
//! 2. `Dropped` — manga has source rows AND every one of them is
|
||||||
|
//! `dropped_at IS NOT NULL`.
|
||||||
|
//! 3. `Synced` — default (includes user-uploaded mangas with no
|
||||||
|
//! `manga_sources` rows at all).
|
||||||
|
//!
|
||||||
|
//! Priority order for `ChapterSyncState`:
|
||||||
|
//! 1. `Downloading` — pending/running `sync_chapter_content` for this id
|
||||||
|
//! 2. `Dropped` — chapter has source rows AND all are dropped
|
||||||
|
//! 3. `Failed` — `page_count = 0` AND a `dead` `sync_chapter_content`
|
||||||
|
//! row exists for this chapter. Constrained to `page_count = 0`
|
||||||
|
//! because once pages are on disk the chapter IS synced — a
|
||||||
|
//! historical dead job (likely from a re-download attempt that
|
||||||
|
//! crashed) is noise that gets reaped after retention. Surfacing
|
||||||
|
//! "Failed" when content is present would contradict
|
||||||
|
//! `ChapterSyncState::Synced`'s "downloaded at some point" contract.
|
||||||
|
//! 4. `NotDownloaded` — `page_count = 0`, no in-flight, no dead job
|
||||||
|
//! 5. `Synced` — `page_count > 0`
|
||||||
|
//!
|
||||||
|
//! Reminder: `done` jobs are reaped after `CRAWLER_JOB_RETENTION_DAYS`,
|
||||||
|
//! so `chapters.page_count > 0` is the durable "this is synced" signal,
|
||||||
|
//! not the job table.
|
||||||
|
|
||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
use serde::Serialize;
|
||||||
|
use sqlx::{FromRow, PgPool};
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::domain::{ChapterSyncState, MangaSyncState};
|
||||||
|
use crate::error::AppResult;
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, FromRow)]
|
||||||
|
pub struct AdminMangaRow {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub title: String,
|
||||||
|
pub status: String,
|
||||||
|
pub cover_image_path: Option<String>,
|
||||||
|
pub created_at: DateTime<Utc>,
|
||||||
|
pub updated_at: DateTime<Utc>,
|
||||||
|
pub sync_state: MangaSyncState,
|
||||||
|
pub chapter_count: i64,
|
||||||
|
pub latest_seen_at: Option<DateTime<Utc>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct ListAdminMangasQuery {
|
||||||
|
pub search: Option<String>,
|
||||||
|
pub sync_state: Option<MangaSyncState>,
|
||||||
|
pub limit: i64,
|
||||||
|
pub offset: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
const MANGA_SYNC_STATE_CASE: &str = r#"
|
||||||
|
CASE
|
||||||
|
WHEN EXISTS (
|
||||||
|
SELECT 1 FROM crawler_jobs cj
|
||||||
|
WHERE cj.state IN ('pending','running')
|
||||||
|
AND (
|
||||||
|
(cj.payload->>'kind' = 'sync_chapter_list'
|
||||||
|
AND (cj.payload->>'manga_id')::uuid = m.id)
|
||||||
|
OR (cj.payload->>'kind' = 'sync_manga'
|
||||||
|
AND EXISTS (
|
||||||
|
SELECT 1 FROM manga_sources ms
|
||||||
|
WHERE ms.manga_id = m.id
|
||||||
|
AND ms.source_id = cj.payload->>'source_id'
|
||||||
|
AND ms.source_manga_key = cj.payload->>'source_manga_key'
|
||||||
|
))
|
||||||
|
)
|
||||||
|
) THEN 'in_progress'
|
||||||
|
WHEN EXISTS (SELECT 1 FROM manga_sources ms WHERE ms.manga_id = m.id)
|
||||||
|
AND NOT EXISTS (
|
||||||
|
SELECT 1 FROM manga_sources ms
|
||||||
|
WHERE ms.manga_id = m.id AND ms.dropped_at IS NULL
|
||||||
|
)
|
||||||
|
THEN 'dropped'
|
||||||
|
ELSE 'synced'
|
||||||
|
END
|
||||||
|
"#;
|
||||||
|
|
||||||
|
/// Paginated admin manga list with derived sync state and total count.
|
||||||
|
/// Filters by `search` (substring on title, case-insensitive) and
|
||||||
|
/// `sync_state` (post-derivation). The CTE keeps the case expression
|
||||||
|
/// in one place — the same projection feeds both the page rows and the
|
||||||
|
/// totals count under the same filter.
|
||||||
|
pub async fn list_mangas_with_sync_state(
|
||||||
|
pool: &PgPool,
|
||||||
|
q: &ListAdminMangasQuery,
|
||||||
|
) -> AppResult<(Vec<AdminMangaRow>, i64)> {
|
||||||
|
let search_pat = q
|
||||||
|
.search
|
||||||
|
.as_ref()
|
||||||
|
.map(|s| format!("%{}%", s.trim()))
|
||||||
|
.filter(|p| p.len() > 2);
|
||||||
|
// sqlx::Type → text: bind the snake_case representation manually so
|
||||||
|
// the SQL can compare it as text without an explicit cast.
|
||||||
|
let sync_filter = q.sync_state.map(|s| match s {
|
||||||
|
MangaSyncState::InProgress => "in_progress",
|
||||||
|
MangaSyncState::Dropped => "dropped",
|
||||||
|
MangaSyncState::Synced => "synced",
|
||||||
|
});
|
||||||
|
|
||||||
|
let sql = format!(
|
||||||
|
r#"
|
||||||
|
WITH classified AS (
|
||||||
|
SELECT
|
||||||
|
m.id, m.title, m.status, m.cover_image_path,
|
||||||
|
m.created_at, m.updated_at,
|
||||||
|
{case} AS sync_state,
|
||||||
|
(SELECT COUNT(*) FROM chapters c WHERE c.manga_id = m.id) AS chapter_count,
|
||||||
|
(SELECT MAX(last_seen_at) FROM manga_sources ms
|
||||||
|
WHERE ms.manga_id = m.id AND ms.dropped_at IS NULL) AS latest_seen_at
|
||||||
|
FROM mangas m
|
||||||
|
WHERE ($1::text IS NULL OR m.title ILIKE $1)
|
||||||
|
)
|
||||||
|
SELECT * FROM classified
|
||||||
|
WHERE ($2::text IS NULL OR sync_state = $2)
|
||||||
|
ORDER BY updated_at DESC
|
||||||
|
LIMIT $3 OFFSET $4
|
||||||
|
"#,
|
||||||
|
case = MANGA_SYNC_STATE_CASE
|
||||||
|
);
|
||||||
|
let items: Vec<AdminMangaRow> = sqlx::query_as(&sql)
|
||||||
|
.bind(&search_pat)
|
||||||
|
.bind(sync_filter)
|
||||||
|
.bind(q.limit)
|
||||||
|
.bind(q.offset)
|
||||||
|
.fetch_all(pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let total_sql = format!(
|
||||||
|
r#"
|
||||||
|
WITH classified AS (
|
||||||
|
SELECT {case} AS sync_state
|
||||||
|
FROM mangas m
|
||||||
|
WHERE ($1::text IS NULL OR m.title ILIKE $1)
|
||||||
|
)
|
||||||
|
SELECT COUNT(*) FROM classified
|
||||||
|
WHERE ($2::text IS NULL OR sync_state = $2)
|
||||||
|
"#,
|
||||||
|
case = MANGA_SYNC_STATE_CASE
|
||||||
|
);
|
||||||
|
let total: i64 = sqlx::query_scalar(&total_sql)
|
||||||
|
.bind(&search_pat)
|
||||||
|
.bind(sync_filter)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok((items, total))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, FromRow)]
|
||||||
|
pub struct AdminChapterRow {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub manga_id: Uuid,
|
||||||
|
pub number: i32,
|
||||||
|
pub title: Option<String>,
|
||||||
|
pub page_count: i32,
|
||||||
|
pub created_at: DateTime<Utc>,
|
||||||
|
pub sync_state: ChapterSyncState,
|
||||||
|
pub latest_seen_at: Option<DateTime<Utc>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct ListAdminChaptersQuery {
|
||||||
|
pub manga_id: Uuid,
|
||||||
|
pub limit: i64,
|
||||||
|
pub offset: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Paginated chapter list with derived sync state. Pagination is non-
|
||||||
|
/// optional — long-runners can have thousands of chapters and the
|
||||||
|
/// per-row scalar subqueries make the unbounded variant a real
|
||||||
|
/// stall risk even behind an admin guard. Returns the page slice plus
|
||||||
|
/// the unfiltered total so the UI can render "showing N of M".
|
||||||
|
pub async fn list_chapters_with_sync_state(
|
||||||
|
pool: &PgPool,
|
||||||
|
q: &ListAdminChaptersQuery,
|
||||||
|
) -> AppResult<(Vec<AdminChapterRow>, i64)> {
|
||||||
|
let items: Vec<AdminChapterRow> = sqlx::query_as(
|
||||||
|
r#"
|
||||||
|
SELECT
|
||||||
|
c.id, c.manga_id, c.number, c.title, c.page_count, c.created_at,
|
||||||
|
CASE
|
||||||
|
WHEN EXISTS (
|
||||||
|
SELECT 1 FROM crawler_jobs cj
|
||||||
|
WHERE cj.state IN ('pending','running')
|
||||||
|
AND cj.payload->>'kind' = 'sync_chapter_content'
|
||||||
|
AND (cj.payload->>'chapter_id')::uuid = c.id
|
||||||
|
) THEN 'downloading'
|
||||||
|
WHEN EXISTS (SELECT 1 FROM chapter_sources cs WHERE cs.chapter_id = c.id)
|
||||||
|
AND NOT EXISTS (
|
||||||
|
SELECT 1 FROM chapter_sources cs
|
||||||
|
WHERE cs.chapter_id = c.id AND cs.dropped_at IS NULL
|
||||||
|
)
|
||||||
|
THEN 'dropped'
|
||||||
|
WHEN c.page_count = 0
|
||||||
|
AND EXISTS (
|
||||||
|
SELECT 1 FROM crawler_jobs cj
|
||||||
|
WHERE cj.state = 'dead'
|
||||||
|
AND cj.payload->>'kind' = 'sync_chapter_content'
|
||||||
|
AND (cj.payload->>'chapter_id')::uuid = c.id
|
||||||
|
) THEN 'failed'
|
||||||
|
WHEN c.page_count = 0 THEN 'not_downloaded'
|
||||||
|
ELSE 'synced'
|
||||||
|
END AS sync_state,
|
||||||
|
(SELECT MAX(last_seen_at) FROM chapter_sources cs
|
||||||
|
WHERE cs.chapter_id = c.id AND cs.dropped_at IS NULL) AS latest_seen_at
|
||||||
|
FROM chapters c
|
||||||
|
WHERE c.manga_id = $1
|
||||||
|
ORDER BY c.number ASC
|
||||||
|
LIMIT $2 OFFSET $3
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(q.manga_id)
|
||||||
|
.bind(q.limit)
|
||||||
|
.bind(q.offset)
|
||||||
|
.fetch_all(pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let total: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM chapters WHERE manga_id = $1")
|
||||||
|
.bind(q.manga_id)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok((items, total))
|
||||||
|
}
|
||||||
@@ -99,6 +99,11 @@ pub async fn list(
|
|||||||
/// Atomically replace the set of authors on a manga. Caller passes a
|
/// Atomically replace the set of authors on a manga. Caller passes a
|
||||||
/// `&mut PgConnection` (`&mut *tx` works) so the delete+upserts run in
|
/// `&mut PgConnection` (`&mut *tx` works) so the delete+upserts run in
|
||||||
/// one transaction with whatever called us.
|
/// one transaction with whatever called us.
|
||||||
|
///
|
||||||
|
/// Note: `crawler::repo::sync_authors` does a similar replace with the
|
||||||
|
/// same semantics on names. The duplication is intentional — handler
|
||||||
|
/// callers want the `Vec<AuthorRef>` for the API response; the
|
||||||
|
/// crawler doesn't need it and stays inside its own transaction.
|
||||||
pub async fn set_for_manga(
|
pub async fn set_for_manga(
|
||||||
conn: &mut PgConnection,
|
conn: &mut PgConnection,
|
||||||
manga_id: Uuid,
|
manga_id: Uuid,
|
||||||
|
|||||||
@@ -29,9 +29,9 @@ pub async fn create(
|
|||||||
|
|
||||||
match result {
|
match result {
|
||||||
Ok(b) => Ok(b),
|
Ok(b) => Ok(b),
|
||||||
Err(e) if is_unique_violation(&e) => Err(AppError::Conflict(
|
Err(sqlx::Error::Database(ref db_err)) if db_err.is_unique_violation() => Err(
|
||||||
"bookmark already exists for this manga/chapter".into(),
|
AppError::Conflict("bookmark already exists for this manga/chapter".into()),
|
||||||
)),
|
),
|
||||||
Err(e) => Err(AppError::Database(e)),
|
Err(e) => Err(AppError::Database(e)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -97,10 +97,3 @@ pub async fn delete(pool: &PgPool, id: Uuid) -> AppResult<()> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_unique_violation(err: &sqlx::Error) -> bool {
|
|
||||||
if let sqlx::Error::Database(db_err) = err {
|
|
||||||
db_err.code().as_deref() == Some("23505")
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use sqlx::{PgExecutor, PgPool};
|
|||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use crate::domain::Chapter;
|
use crate::domain::Chapter;
|
||||||
use crate::error::{AppError, AppResult};
|
use crate::error::AppResult;
|
||||||
|
|
||||||
pub async fn list_for_manga(
|
pub async fn list_for_manga(
|
||||||
pool: &PgPool,
|
pool: &PgPool,
|
||||||
@@ -62,10 +62,9 @@ pub async fn find_by_id_in_manga(
|
|||||||
///
|
///
|
||||||
/// Chapter identity is the row UUID; the same (manga_id, number)
|
/// Chapter identity is the row UUID; the same (manga_id, number)
|
||||||
/// combination can repeat (multiple translations, re-uploads). The
|
/// combination can repeat (multiple translations, re-uploads). The
|
||||||
/// `is_unique_violation` branch below is a defensive holdover from
|
/// 0013 migration dropped the (manga_id, number) UNIQUE, so duplicate
|
||||||
/// 0001's (manga_id, number) UNIQUE — it can no longer fire under
|
/// inserts succeed by design. If a future migration re-adds any
|
||||||
/// normal operation, but we surface a clean 409 if a future migration
|
/// uniqueness, surface a 409 by adding a unique-violation arm here.
|
||||||
/// re-adds any chapter uniqueness.
|
|
||||||
pub async fn create<'e, E: PgExecutor<'e>>(
|
pub async fn create<'e, E: PgExecutor<'e>>(
|
||||||
executor: E,
|
executor: E,
|
||||||
manga_id: Uuid,
|
manga_id: Uuid,
|
||||||
@@ -73,7 +72,7 @@ pub async fn create<'e, E: PgExecutor<'e>>(
|
|||||||
title: Option<&str>,
|
title: Option<&str>,
|
||||||
uploaded_by: Option<Uuid>,
|
uploaded_by: Option<Uuid>,
|
||||||
) -> AppResult<Chapter> {
|
) -> AppResult<Chapter> {
|
||||||
let result = sqlx::query_as::<_, Chapter>(
|
let row = sqlx::query_as::<_, Chapter>(
|
||||||
r#"
|
r#"
|
||||||
INSERT INTO chapters (manga_id, number, title, uploaded_by)
|
INSERT INTO chapters (manga_id, number, title, uploaded_by)
|
||||||
VALUES ($1, $2, $3, $4)
|
VALUES ($1, $2, $3, $4)
|
||||||
@@ -85,15 +84,71 @@ pub async fn create<'e, E: PgExecutor<'e>>(
|
|||||||
.bind(title)
|
.bind(title)
|
||||||
.bind(uploaded_by)
|
.bind(uploaded_by)
|
||||||
.fetch_one(executor)
|
.fetch_one(executor)
|
||||||
.await;
|
.await?;
|
||||||
|
Ok(row)
|
||||||
match result {
|
|
||||||
Ok(c) => Ok(c),
|
|
||||||
Err(e) if is_unique_violation(&e) => Err(AppError::Conflict(format!(
|
|
||||||
"chapter {number} conflicts with an existing chapter for this manga"
|
|
||||||
))),
|
|
||||||
Err(e) => Err(AppError::Database(e)),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Cross-link guard for `POST /bookmarks`: the bookmarks FK accepts
|
||||||
|
/// any valid chapter id, but a chapter must belong to the bookmark's
|
||||||
|
/// manga or the bookmark would dangle on a foreign manga. Handlers
|
||||||
|
/// call this before the insert and surface `NotFound` when it
|
||||||
|
/// returns `false`.
|
||||||
|
pub async fn belongs_to_manga(
|
||||||
|
pool: &PgPool,
|
||||||
|
chapter_id: Uuid,
|
||||||
|
manga_id: Uuid,
|
||||||
|
) -> AppResult<bool> {
|
||||||
|
let (exists,): (bool,) = sqlx::query_as(
|
||||||
|
"SELECT EXISTS(SELECT 1 FROM chapters WHERE id = $1 AND manga_id = $2)",
|
||||||
|
)
|
||||||
|
.bind(chapter_id)
|
||||||
|
.bind(manga_id)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await?;
|
||||||
|
Ok(exists)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read just the page_count for a chapter. Used by the crawler
|
||||||
|
/// daemon's consumer-side dedup safety net so it can ack-done a job
|
||||||
|
/// whose chapter has already been fetched by a racing worker.
|
||||||
|
pub async fn page_count(pool: &PgPool, id: Uuid) -> sqlx::Result<Option<i32>> {
|
||||||
|
sqlx::query_scalar("SELECT page_count FROM chapters WHERE id = $1")
|
||||||
|
.bind(id)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Look up the manga_id + most recent live source_url for a chapter.
|
||||||
|
/// Used by the daemon's chapter dispatcher to resolve the URL it needs
|
||||||
|
/// to hand to `content::sync_chapter_content`.
|
||||||
|
///
|
||||||
|
/// Skips soft-dropped sources (`cs.dropped_at IS NOT NULL`) and breaks
|
||||||
|
/// ties between multiple live sources by `last_seen_at DESC`, so the
|
||||||
|
/// freshest still-attached URL wins. Returns `None` when the chapter
|
||||||
|
/// is gone or all its source rows are dropped — callers in the
|
||||||
|
/// dispatcher treat `None` as "ack the job, skip the work."
|
||||||
|
///
|
||||||
|
/// The enqueue queries (`pipeline::enqueue_bookmarked_pending` and
|
||||||
|
/// `enqueue_pending_for_manga`) apply the same `dropped_at IS NULL`
|
||||||
|
/// filter — this resolver stays in lockstep so a chapter that was
|
||||||
|
/// dropped between enqueue and lease isn't dispatched against a stale
|
||||||
|
/// URL.
|
||||||
|
pub async fn dispatch_target(
|
||||||
|
pool: &PgPool,
|
||||||
|
chapter_id: Uuid,
|
||||||
|
) -> sqlx::Result<Option<(Uuid, String)>> {
|
||||||
|
sqlx::query_as(
|
||||||
|
"SELECT c.manga_id, cs.source_url \
|
||||||
|
FROM chapters c \
|
||||||
|
JOIN chapter_sources cs ON cs.chapter_id = c.id \
|
||||||
|
WHERE c.id = $1 \
|
||||||
|
AND cs.dropped_at IS NULL \
|
||||||
|
ORDER BY cs.last_seen_at DESC \
|
||||||
|
LIMIT 1",
|
||||||
|
)
|
||||||
|
.bind(chapter_id)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn set_page_count<'e, E: PgExecutor<'e>>(
|
pub async fn set_page_count<'e, E: PgExecutor<'e>>(
|
||||||
@@ -109,10 +164,3 @@ pub async fn set_page_count<'e, E: PgExecutor<'e>>(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_unique_violation(err: &sqlx::Error) -> bool {
|
|
||||||
if let sqlx::Error::Database(db_err) = err {
|
|
||||||
db_err.code().as_deref() == Some("23505")
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -8,14 +8,16 @@
|
|||||||
//! updated (metadata_hash changed), or unchanged.
|
//! updated (metadata_hash changed), or unchanged.
|
||||||
//! - [`sync_manga_chapters`]: per-manga chapter reconciliation. Adds
|
//! - [`sync_manga_chapters`]: per-manga chapter reconciliation. Adds
|
||||||
//! new ones, refreshes URLs on existing ones, soft-drops vanished.
|
//! new ones, refreshes URLs on existing ones, soft-drops vanished.
|
||||||
//! - [`mark_dropped_mangas`]: end-of-run pass. Any manga from this
|
//! - [`mark_run_started`] / [`mark_run_completed`] /
|
||||||
//! source whose `last_seen_at` is older than the run start is
|
//! [`last_run_completed_cleanly`]: per-source recovery flag in
|
||||||
//! soft-dropped.
|
//! `crawler_state`. A `false` flag on tick start means the previous
|
||||||
|
//! run did not exit cleanly and the next walk should ignore the
|
||||||
|
//! early-stop condition.
|
||||||
//!
|
//!
|
||||||
//! Each public function is a transaction boundary so a partial failure
|
//! Each public function is a transaction boundary so a partial failure
|
||||||
//! mid-call leaves the DB in its pre-call state.
|
//! mid-call leaves the DB in its pre-call state.
|
||||||
|
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::Utc;
|
||||||
use sqlx::{PgPool, Postgres, Transaction};
|
use sqlx::{PgPool, Postgres, Transaction};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
@@ -274,7 +276,20 @@ async fn sync_tags(
|
|||||||
manga_id: Uuid,
|
manga_id: Uuid,
|
||||||
tags: &[String],
|
tags: &[String],
|
||||||
) -> sqlx::Result<()> {
|
) -> sqlx::Result<()> {
|
||||||
sqlx::query("DELETE FROM manga_tags WHERE manga_id = $1")
|
// Only clear crawler-owned attachments (added_by IS NULL). User-
|
||||||
|
// attached tags are owned by the attaching user and must survive
|
||||||
|
// the recurring metadata pass — see manga_tags.added_by in
|
||||||
|
// migration 0009.
|
||||||
|
//
|
||||||
|
// Note on orphans: `manga_tags.added_by` is `ON DELETE SET NULL`,
|
||||||
|
// so an attachment whose user was deleted becomes
|
||||||
|
// indistinguishable from a crawler-owned row and is cleaned up
|
||||||
|
// here. That mirrors how `api::mangas::detach_tag` already treats
|
||||||
|
// orphans ("nobody owns it, refuse to let anyone but admin clear
|
||||||
|
// them") — the crawler now becomes the eventual reaper. Tracked
|
||||||
|
// by `sync_tags_garbage_collects_orphan_user_attachments` in
|
||||||
|
// backend/tests/crawler_sync.rs.
|
||||||
|
sqlx::query("DELETE FROM manga_tags WHERE manga_id = $1 AND added_by IS NULL")
|
||||||
.bind(manga_id)
|
.bind(manga_id)
|
||||||
.execute(&mut **tx)
|
.execute(&mut **tx)
|
||||||
.await?;
|
.await?;
|
||||||
@@ -315,6 +330,22 @@ pub async fn sync_manga_chapters(
|
|||||||
chapters: &[SourceChapterRef],
|
chapters: &[SourceChapterRef],
|
||||||
) -> sqlx::Result<ChapterDiff> {
|
) -> sqlx::Result<ChapterDiff> {
|
||||||
let mut tx = pool.begin().await?;
|
let mut tx = pool.begin().await?;
|
||||||
|
// Per-manga advisory lock. Two concurrent calls for the same manga
|
||||||
|
// would otherwise both read `seen_keys`, both run the drop UPDATE
|
||||||
|
// filtered on `NOT (key = ANY $3)`, and the later commit could soft-
|
||||||
|
// drop a chapter the earlier commit had just inserted (lost-update
|
||||||
|
// shape under MVCC). `pg_advisory_xact_lock` is scoped to this
|
||||||
|
// transaction: it auto-releases on COMMIT/ROLLBACK so a Rust-side
|
||||||
|
// panic mid-call doesn't strand the lock. The single-arg int8 form
|
||||||
|
// keyed by `hashtextextended(manga_id::text, 0)` shares Postgres'
|
||||||
|
// global advisory-lock namespace with `CRON_LOCK_KEY`, but collision
|
||||||
|
// is 2^-64 per pair (a UUID-derived hash hitting the fixed cron key
|
||||||
|
// is effectively impossible).
|
||||||
|
sqlx::query("SELECT pg_advisory_xact_lock(hashtextextended($1::text, 0))")
|
||||||
|
.bind(manga_id)
|
||||||
|
.execute(&mut *tx)
|
||||||
|
.await?;
|
||||||
|
|
||||||
let mut diff = ChapterDiff::default();
|
let mut diff = ChapterDiff::default();
|
||||||
let seen_keys: Vec<String> = chapters
|
let seen_keys: Vec<String> = chapters
|
||||||
.iter()
|
.iter()
|
||||||
@@ -322,11 +353,23 @@ pub async fn sync_manga_chapters(
|
|||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
for c in chapters {
|
for c in chapters {
|
||||||
|
// Lookup is constrained by manga_id (via the chapters join) so a
|
||||||
|
// source whose chapter slugs collide across mangas (e.g.
|
||||||
|
// "chapter-1" appearing under two different mangas) attributes
|
||||||
|
// each row to the correct manga. Migration 0017 dropped the
|
||||||
|
// (source_id, source_chapter_key) PK in favour of
|
||||||
|
// (source_id, chapter_id) for exactly this reason.
|
||||||
let existing: Option<(Uuid,)> = sqlx::query_as(
|
let existing: Option<(Uuid,)> = sqlx::query_as(
|
||||||
"SELECT chapter_id FROM chapter_sources WHERE source_id = $1 AND source_chapter_key = $2",
|
"SELECT cs.chapter_id \
|
||||||
|
FROM chapter_sources cs \
|
||||||
|
JOIN chapters ch ON ch.id = cs.chapter_id \
|
||||||
|
WHERE cs.source_id = $1 \
|
||||||
|
AND cs.source_chapter_key = $2 \
|
||||||
|
AND ch.manga_id = $3",
|
||||||
)
|
)
|
||||||
.bind(source_id)
|
.bind(source_id)
|
||||||
.bind(&c.source_chapter_key)
|
.bind(&c.source_chapter_key)
|
||||||
|
.bind(manga_id)
|
||||||
.fetch_optional(&mut *tx)
|
.fetch_optional(&mut *tx)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
@@ -370,16 +413,19 @@ pub async fn sync_manga_chapters(
|
|||||||
.bind(chapter_id)
|
.bind(chapter_id)
|
||||||
.execute(&mut *tx)
|
.execute(&mut *tx)
|
||||||
.await?;
|
.await?;
|
||||||
|
// chapter_id is now the natural per-(source, chapter)
|
||||||
|
// identifier — use it directly instead of re-keying on
|
||||||
|
// (source_id, source_chapter_key) which may not be unique.
|
||||||
sqlx::query(
|
sqlx::query(
|
||||||
r#"
|
r#"
|
||||||
UPDATE chapter_sources
|
UPDATE chapter_sources
|
||||||
SET source_url = $1, last_seen_at = NOW(), dropped_at = NULL
|
SET source_url = $1, last_seen_at = NOW(), dropped_at = NULL
|
||||||
WHERE source_id = $2 AND source_chapter_key = $3
|
WHERE source_id = $2 AND chapter_id = $3
|
||||||
"#,
|
"#,
|
||||||
)
|
)
|
||||||
.bind(&c.url)
|
.bind(&c.url)
|
||||||
.bind(source_id)
|
.bind(source_id)
|
||||||
.bind(&c.source_chapter_key)
|
.bind(chapter_id)
|
||||||
.execute(&mut *tx)
|
.execute(&mut *tx)
|
||||||
.await?;
|
.await?;
|
||||||
diff.refreshed += 1;
|
diff.refreshed += 1;
|
||||||
@@ -412,19 +458,52 @@ pub async fn sync_manga_chapters(
|
|||||||
Ok(diff)
|
Ok(diff)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Record that a complete Backfill walk has finished for `source_id`.
|
/// Count the chapters that the source `(source_id, source_manga_key)`
|
||||||
/// The presence of this row is what the daemon's mode auto-detection
|
/// is currently known to attach to — i.e. the number of `chapter_sources`
|
||||||
/// uses to flip from Backfill to Incremental on subsequent ticks.
|
/// rows for the manga identified by the (source_id, source_manga_key)
|
||||||
|
/// pair, restricted to live (`dropped_at IS NULL`) rows.
|
||||||
///
|
///
|
||||||
/// Keyed `seed_completed:<source_id>` in `crawler_state`. JSON payload
|
/// Used by the metadata pass's partial-render guard: if `fetch_manga`
|
||||||
/// stores the timestamp so we can surface "last fully reseeded at" in
|
/// returns an empty `chapters` Vec but the source previously surfaced
|
||||||
/// future ops tooling without another migration.
|
/// chapters here, that's most likely a chromium snapshot taken between
|
||||||
pub async fn mark_seed_completed(
|
/// the `#chapter_table` wrapper render and its rows render — the
|
||||||
|
/// safest move is to skip `sync_manga_chapters` so the soft-drop
|
||||||
|
/// branch doesn't flip every existing chapter to `dropped_at`.
|
||||||
|
///
|
||||||
|
/// Returns `Ok(0)` when the manga is brand-new (no `manga_sources`
|
||||||
|
/// row yet), which is the legitimate "this manga has no chapters yet"
|
||||||
|
/// case and must NOT be flagged.
|
||||||
|
pub async fn live_chapter_count_for_source_manga(
|
||||||
pool: &PgPool,
|
pool: &PgPool,
|
||||||
source_id: &str,
|
source_id: &str,
|
||||||
at: DateTime<Utc>,
|
source_manga_key: &str,
|
||||||
) -> sqlx::Result<()> {
|
) -> sqlx::Result<i64> {
|
||||||
let key = format!("seed_completed:{source_id}");
|
let row: Option<(i64,)> = sqlx::query_as(
|
||||||
|
"SELECT COUNT(*) \
|
||||||
|
FROM chapter_sources cs \
|
||||||
|
JOIN chapters c ON c.id = cs.chapter_id \
|
||||||
|
JOIN manga_sources ms \
|
||||||
|
ON ms.manga_id = c.manga_id \
|
||||||
|
AND ms.source_id = cs.source_id \
|
||||||
|
WHERE ms.source_id = $1 \
|
||||||
|
AND ms.source_manga_key = $2 \
|
||||||
|
AND cs.dropped_at IS NULL",
|
||||||
|
)
|
||||||
|
.bind(source_id)
|
||||||
|
.bind(source_manga_key)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await?;
|
||||||
|
Ok(row.map(|(n,)| n).unwrap_or(0))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mark a metadata pass as in-flight for `source_id`. Stamps
|
||||||
|
/// `last_run_completed:<source_id>` in `crawler_state` with
|
||||||
|
/// `{"completed": false, "at": now}`. A crash, panic, or SIGKILL after
|
||||||
|
/// this point leaves the flag at `false`, which the next tick reads as
|
||||||
|
/// "previous run did not exit cleanly — walk the full catalog this
|
||||||
|
/// time" (recovery sweep).
|
||||||
|
pub async fn mark_run_started(pool: &PgPool, source_id: &str) -> sqlx::Result<()> {
|
||||||
|
let key = format!("last_run_completed:{source_id}");
|
||||||
sqlx::query(
|
sqlx::query(
|
||||||
"INSERT INTO crawler_state (key, value, updated_at) \
|
"INSERT INTO crawler_state (key, value, updated_at) \
|
||||||
VALUES ($1, $2, now()) \
|
VALUES ($1, $2, now()) \
|
||||||
@@ -432,50 +511,54 @@ pub async fn mark_seed_completed(
|
|||||||
SET value = EXCLUDED.value, updated_at = now()",
|
SET value = EXCLUDED.value, updated_at = now()",
|
||||||
)
|
)
|
||||||
.bind(&key)
|
.bind(&key)
|
||||||
.bind(serde_json::json!({ "at": at.to_rfc3339() }))
|
.bind(serde_json::json!({
|
||||||
|
"completed": false,
|
||||||
|
"at": Utc::now().to_rfc3339(),
|
||||||
|
}))
|
||||||
.execute(pool)
|
.execute(pool)
|
||||||
.await?;
|
.await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Read the timestamp written by [`mark_seed_completed`], if any.
|
/// Mark a metadata pass as completed cleanly for `source_id`. Called
|
||||||
/// `None` means no complete Backfill has ever finished for this
|
/// from the same place a run decides it reached end-of-walk or hit the
|
||||||
/// source — the daemon should run Backfill on the next tick.
|
/// intentional stop. The next tick reads `true` and applies the normal
|
||||||
pub async fn seed_completed_at(
|
/// stop condition.
|
||||||
|
pub async fn mark_run_completed(pool: &PgPool, source_id: &str) -> sqlx::Result<()> {
|
||||||
|
let key = format!("last_run_completed:{source_id}");
|
||||||
|
sqlx::query(
|
||||||
|
"INSERT INTO crawler_state (key, value, updated_at) \
|
||||||
|
VALUES ($1, $2, now()) \
|
||||||
|
ON CONFLICT (key) DO UPDATE \
|
||||||
|
SET value = EXCLUDED.value, updated_at = now()",
|
||||||
|
)
|
||||||
|
.bind(&key)
|
||||||
|
.bind(serde_json::json!({
|
||||||
|
"completed": true,
|
||||||
|
"at": Utc::now().to_rfc3339(),
|
||||||
|
}))
|
||||||
|
.execute(pool)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read the recovery flag for `source_id`. A missing row OR an
|
||||||
|
/// unparseable value reads as `true` ("clean") — the former covers the
|
||||||
|
/// first-ever run on a virgin DB (no recovery needed), the latter
|
||||||
|
/// covers forward-compat against future schema changes; both fail-safe
|
||||||
|
/// toward not making an operator pay for an unnecessary full sweep.
|
||||||
|
pub async fn last_run_completed_cleanly(
|
||||||
pool: &PgPool,
|
pool: &PgPool,
|
||||||
source_id: &str,
|
source_id: &str,
|
||||||
) -> sqlx::Result<Option<DateTime<Utc>>> {
|
) -> sqlx::Result<bool> {
|
||||||
let key = format!("seed_completed:{source_id}");
|
let key = format!("last_run_completed:{source_id}");
|
||||||
let row: Option<serde_json::Value> =
|
let row: Option<serde_json::Value> =
|
||||||
sqlx::query_scalar("SELECT value FROM crawler_state WHERE key = $1")
|
sqlx::query_scalar("SELECT value FROM crawler_state WHERE key = $1")
|
||||||
.bind(&key)
|
.bind(&key)
|
||||||
.fetch_optional(pool)
|
.fetch_optional(pool)
|
||||||
.await?;
|
.await?;
|
||||||
Ok(row.and_then(|v| {
|
Ok(row
|
||||||
v.get("at")
|
.and_then(|v| v.get("completed").and_then(|b| b.as_bool()))
|
||||||
.and_then(|s| s.as_str())
|
.unwrap_or(true))
|
||||||
.and_then(|s| DateTime::parse_from_rfc3339(s).ok())
|
|
||||||
.map(|dt| dt.with_timezone(&Utc))
|
|
||||||
}))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn mark_dropped_mangas(
|
|
||||||
pool: &PgPool,
|
|
||||||
source_id: &str,
|
|
||||||
run_started_at: DateTime<Utc>,
|
|
||||||
) -> sqlx::Result<u64> {
|
|
||||||
let res = sqlx::query(
|
|
||||||
r#"
|
|
||||||
UPDATE manga_sources
|
|
||||||
SET dropped_at = NOW()
|
|
||||||
WHERE source_id = $1
|
|
||||||
AND last_seen_at < $2
|
|
||||||
AND dropped_at IS NULL
|
|
||||||
"#,
|
|
||||||
)
|
|
||||||
.bind(source_id)
|
|
||||||
.bind(run_started_at)
|
|
||||||
.execute(pool)
|
|
||||||
.await?;
|
|
||||||
Ok(res.rows_affected())
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -61,6 +61,11 @@ pub async fn load_for_mangas(
|
|||||||
/// FK constraint would reject them, so we filter upstream rather than
|
/// FK constraint would reject them, so we filter upstream rather than
|
||||||
/// surface a 500 here. (The API layer validates the set against
|
/// surface a 500 here. (The API layer validates the set against
|
||||||
/// `list_all` first.)
|
/// `list_all` first.)
|
||||||
|
///
|
||||||
|
/// Note: `crawler::repo::sync_genres` does a similar replace, but by
|
||||||
|
/// *name* and with auto-create of unseen genres — the crawler can't
|
||||||
|
/// validate against the curated vocabulary on its own. Both paths are
|
||||||
|
/// intentional; don't merge them without preserving that semantic.
|
||||||
pub async fn set_for_manga(
|
pub async fn set_for_manga(
|
||||||
conn: &mut PgConnection,
|
conn: &mut PgConnection,
|
||||||
manga_id: Uuid,
|
manga_id: Uuid,
|
||||||
|
|||||||
@@ -281,3 +281,17 @@ pub async fn exists(pool: &PgPool, id: Uuid) -> AppResult<bool> {
|
|||||||
.await?;
|
.await?;
|
||||||
Ok(exists)
|
Ok(exists)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the uploader's user id for a manga. `None` either when the
|
||||||
|
/// manga doesn't exist or when the row predates the `uploaded_by`
|
||||||
|
/// column (historical NULL — see migration 0011). Callers must
|
||||||
|
/// distinguish "manga missing" via [`exists`] before relying on this
|
||||||
|
/// to make an authz decision.
|
||||||
|
pub async fn uploaded_by(pool: &PgPool, id: Uuid) -> AppResult<Option<Uuid>> {
|
||||||
|
let row: Option<(Option<Uuid>,)> =
|
||||||
|
sqlx::query_as("SELECT uploaded_by FROM mangas WHERE id = $1")
|
||||||
|
.bind(id)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await?;
|
||||||
|
Ok(row.and_then(|(u,)| u))
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
pub mod admin_audit;
|
||||||
|
pub mod admin_view;
|
||||||
pub mod api_token;
|
pub mod api_token;
|
||||||
pub mod author;
|
pub mod author;
|
||||||
pub mod bookmark;
|
pub mod bookmark;
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ pub async fn create(pool: &PgPool, username: &str, password_hash: &str) -> AppRe
|
|||||||
r#"
|
r#"
|
||||||
INSERT INTO users (username, password_hash)
|
INSERT INTO users (username, password_hash)
|
||||||
VALUES ($1, $2)
|
VALUES ($1, $2)
|
||||||
RETURNING id, username, password_hash, created_at
|
RETURNING id, username, password_hash, created_at, is_admin
|
||||||
"#,
|
"#,
|
||||||
)
|
)
|
||||||
.bind(username)
|
.bind(username)
|
||||||
@@ -21,7 +21,7 @@ pub async fn create(pool: &PgPool, username: &str, password_hash: &str) -> AppRe
|
|||||||
|
|
||||||
match result {
|
match result {
|
||||||
Ok(user) => Ok(user),
|
Ok(user) => Ok(user),
|
||||||
Err(e) if is_unique_violation(&e) => {
|
Err(sqlx::Error::Database(ref db_err)) if db_err.is_unique_violation() => {
|
||||||
Err(AppError::Conflict("username is already taken".into()))
|
Err(AppError::Conflict("username is already taken".into()))
|
||||||
}
|
}
|
||||||
Err(e) => Err(AppError::Database(e)),
|
Err(e) => Err(AppError::Database(e)),
|
||||||
@@ -35,7 +35,7 @@ pub async fn create(pool: &PgPool, username: &str, password_hash: &str) -> AppRe
|
|||||||
pub async fn find_by_username(pool: &PgPool, username: &str) -> AppResult<Option<User>> {
|
pub async fn find_by_username(pool: &PgPool, username: &str) -> AppResult<Option<User>> {
|
||||||
let row = sqlx::query_as::<_, User>(
|
let row = sqlx::query_as::<_, User>(
|
||||||
r#"
|
r#"
|
||||||
SELECT id, username, password_hash, created_at
|
SELECT id, username, password_hash, created_at, is_admin
|
||||||
FROM users
|
FROM users
|
||||||
WHERE lower(username) = lower($1)
|
WHERE lower(username) = lower($1)
|
||||||
"#,
|
"#,
|
||||||
@@ -48,7 +48,7 @@ pub async fn find_by_username(pool: &PgPool, username: &str) -> AppResult<Option
|
|||||||
|
|
||||||
pub async fn find_by_id(pool: &PgPool, id: Uuid) -> AppResult<Option<User>> {
|
pub async fn find_by_id(pool: &PgPool, id: Uuid) -> AppResult<Option<User>> {
|
||||||
let row = sqlx::query_as::<_, User>(
|
let row = sqlx::query_as::<_, User>(
|
||||||
r#"SELECT id, username, password_hash, created_at FROM users WHERE id = $1"#,
|
r#"SELECT id, username, password_hash, created_at, is_admin FROM users WHERE id = $1"#,
|
||||||
)
|
)
|
||||||
.bind(id)
|
.bind(id)
|
||||||
.fetch_optional(pool)
|
.fetch_optional(pool)
|
||||||
@@ -56,10 +56,317 @@ pub async fn find_by_id(pool: &PgPool, id: Uuid) -> AppResult<Option<User>> {
|
|||||||
Ok(row)
|
Ok(row)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_unique_violation(err: &sqlx::Error) -> bool {
|
/// Postgres advisory-lock key guarding admin-count-changing operations
|
||||||
if let sqlx::Error::Database(db_err) = err {
|
/// (demote, delete-admin). Without this lock two concurrent demotes of
|
||||||
db_err.code().as_deref() == Some("23505")
|
/// different admins could each pass their "more than one admin remains"
|
||||||
} else {
|
/// check, then commit, leaving zero admins. The lock serialises any tx
|
||||||
false
|
/// that might change the admin count so the recount under the lock is
|
||||||
|
/// authoritative.
|
||||||
|
///
|
||||||
|
/// Value is the bytes of "admininv" interpreted as a big-endian i64.
|
||||||
|
/// Postgres' advisory-lock keyspace is global; collision risk with
|
||||||
|
/// `CRON_LOCK_KEY` and friends is ~2^-64.
|
||||||
|
pub const ADMIN_INVARIANT_LOCK_KEY: i64 = 0x61_64_6d_69_6e_69_6e_76;
|
||||||
|
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct ListUsersQuery {
|
||||||
|
pub search: Option<String>,
|
||||||
|
pub limit: i64,
|
||||||
|
pub offset: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Paginated user list with total count. `search` is a case-insensitive
|
||||||
|
/// substring match on `username`. Order is alphabetical by username so
|
||||||
|
/// pagination is stable across concurrent writes (mangas changing
|
||||||
|
/// is_admin doesn't reshuffle the page).
|
||||||
|
pub async fn list_with_total(
|
||||||
|
pool: &PgPool,
|
||||||
|
q: &ListUsersQuery,
|
||||||
|
) -> AppResult<(Vec<User>, i64)> {
|
||||||
|
let pat = q
|
||||||
|
.search
|
||||||
|
.as_ref()
|
||||||
|
.map(|s| format!("%{}%", s.trim()))
|
||||||
|
.filter(|p| p.len() > 2);
|
||||||
|
|
||||||
|
let items = sqlx::query_as::<_, User>(
|
||||||
|
r#"
|
||||||
|
SELECT id, username, password_hash, created_at, is_admin
|
||||||
|
FROM users
|
||||||
|
WHERE ($1::text IS NULL OR username ILIKE $1)
|
||||||
|
ORDER BY username
|
||||||
|
LIMIT $2 OFFSET $3
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind(&pat)
|
||||||
|
.bind(q.limit)
|
||||||
|
.bind(q.offset)
|
||||||
|
.fetch_all(pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let total: i64 = sqlx::query_scalar(
|
||||||
|
"SELECT COUNT(*) FROM users WHERE ($1::text IS NULL OR username ILIKE $1)",
|
||||||
|
)
|
||||||
|
.bind(&pat)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok((items, total))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Raw `is_admin` update with no safety checks, no audit log, and no
|
||||||
|
/// advisory lock. Exists only as a test setup helper for the admin-
|
||||||
|
/// feature integration suite — production code MUST go through
|
||||||
|
/// [`admin_safe_set_is_admin`], which enforces self-protection, the
|
||||||
|
/// last-admin invariant, and the audit log atomically.
|
||||||
|
pub async fn set_is_admin_unchecked(pool: &PgPool, id: Uuid, value: bool) -> AppResult<()> {
|
||||||
|
sqlx::query("UPDATE users SET is_admin = $1 WHERE id = $2")
|
||||||
|
.bind(value)
|
||||||
|
.bind(id)
|
||||||
|
.execute(pool)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Ensure the user `username` exists and is an admin. Called at startup
|
||||||
|
/// from `app::build` when `ADMIN_USERNAME` / `ADMIN_PASSWORD` are set.
|
||||||
|
///
|
||||||
|
/// Semantics — see cross-cutting decision #2 in the feature plan:
|
||||||
|
/// - If no row exists: create with the env-supplied password hashed via
|
||||||
|
/// argon2id and `is_admin = true`.
|
||||||
|
/// - If a row already exists: flip `is_admin` to true if needed; **never**
|
||||||
|
/// touch the existing `password_hash`. Lets the operator rotate the
|
||||||
|
/// admin password through the UI without env-var conflict.
|
||||||
|
/// Wrapped in a transaction so a concurrent `register` for the same
|
||||||
|
/// username can't slip an INSERT between the SELECT and UPDATE/INSERT.
|
||||||
|
/// Set `is_admin` on a user with full safety checks: rejects self-demote,
|
||||||
|
/// rejects demoting the only remaining admin (under `ADMIN_INVARIANT_LOCK_KEY`
|
||||||
|
/// to close the parallel-demote race), and writes an `admin_audit` row
|
||||||
|
/// in the same tx so the log mirrors what actually committed.
|
||||||
|
///
|
||||||
|
/// Returns the freshly-written user row (so the handler can return it
|
||||||
|
/// without a second SELECT).
|
||||||
|
pub async fn admin_safe_set_is_admin(
|
||||||
|
pool: &PgPool,
|
||||||
|
actor_id: Uuid,
|
||||||
|
target_id: Uuid,
|
||||||
|
value: bool,
|
||||||
|
) -> AppResult<User> {
|
||||||
|
// Cheap pre-check before opening a tx — also covers the "demote me"
|
||||||
|
// case which would otherwise pass the recount when other admins exist.
|
||||||
|
if actor_id == target_id && !value {
|
||||||
|
return Err(AppError::Conflict(
|
||||||
|
"cannot demote yourself; ask another admin".into(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut tx = pool.begin().await?;
|
||||||
|
sqlx::query("SELECT pg_advisory_xact_lock($1)")
|
||||||
|
.bind(ADMIN_INVARIANT_LOCK_KEY)
|
||||||
|
.execute(&mut *tx)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let target: Option<User> = sqlx::query_as(
|
||||||
|
"SELECT id, username, password_hash, created_at, is_admin \
|
||||||
|
FROM users WHERE id = $1 FOR UPDATE",
|
||||||
|
)
|
||||||
|
.bind(target_id)
|
||||||
|
.fetch_optional(&mut *tx)
|
||||||
|
.await?;
|
||||||
|
let Some(target) = target else {
|
||||||
|
return Err(AppError::NotFound);
|
||||||
|
};
|
||||||
|
|
||||||
|
// No-op: caller asked to set `is_admin` to its current value. Return
|
||||||
|
// the row as-is without writing an audit entry — otherwise repeated
|
||||||
|
// PATCH calls (browser retry, double-click) pile misleading
|
||||||
|
// "promote_user" rows in `admin_audit` for actions that changed
|
||||||
|
// nothing.
|
||||||
|
if target.is_admin == value {
|
||||||
|
tx.commit().await?;
|
||||||
|
return Ok(target);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recount inside the lock — this is the authoritative read.
|
||||||
|
if target.is_admin && !value {
|
||||||
|
let admin_count: i64 =
|
||||||
|
sqlx::query_scalar("SELECT COUNT(*) FROM users WHERE is_admin = true")
|
||||||
|
.fetch_one(&mut *tx)
|
||||||
|
.await?;
|
||||||
|
if admin_count <= 1 {
|
||||||
|
return Err(AppError::Conflict(
|
||||||
|
"cannot demote the last admin; promote another user first".into(),
|
||||||
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let updated: User = sqlx::query_as(
|
||||||
|
"UPDATE users SET is_admin = $1 WHERE id = $2 \
|
||||||
|
RETURNING id, username, password_hash, created_at, is_admin",
|
||||||
|
)
|
||||||
|
.bind(value)
|
||||||
|
.bind(target_id)
|
||||||
|
.fetch_one(&mut *tx)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let action = if value { "promote_user" } else { "demote_user" };
|
||||||
|
crate::repo::admin_audit::insert(
|
||||||
|
&mut *tx,
|
||||||
|
actor_id,
|
||||||
|
action,
|
||||||
|
"user",
|
||||||
|
Some(target_id),
|
||||||
|
serde_json::json!({ "username": target.username }),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
tx.commit().await?;
|
||||||
|
Ok(updated)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Delete a user with full safety checks: rejects self-delete, rejects
|
||||||
|
/// deleting the only remaining admin (under `ADMIN_INVARIANT_LOCK_KEY`),
|
||||||
|
/// and writes an `admin_audit` row in the same tx. Captures the deleted
|
||||||
|
/// username + admin status in the audit payload so the action is
|
||||||
|
/// readable after the user row itself is gone.
|
||||||
|
pub async fn admin_safe_delete(
|
||||||
|
pool: &PgPool,
|
||||||
|
actor_id: Uuid,
|
||||||
|
target_id: Uuid,
|
||||||
|
) -> AppResult<()> {
|
||||||
|
if actor_id == target_id {
|
||||||
|
return Err(AppError::Conflict(
|
||||||
|
"cannot delete yourself; ask another admin".into(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut tx = pool.begin().await?;
|
||||||
|
sqlx::query("SELECT pg_advisory_xact_lock($1)")
|
||||||
|
.bind(ADMIN_INVARIANT_LOCK_KEY)
|
||||||
|
.execute(&mut *tx)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let target: Option<User> = sqlx::query_as(
|
||||||
|
"SELECT id, username, password_hash, created_at, is_admin \
|
||||||
|
FROM users WHERE id = $1 FOR UPDATE",
|
||||||
|
)
|
||||||
|
.bind(target_id)
|
||||||
|
.fetch_optional(&mut *tx)
|
||||||
|
.await?;
|
||||||
|
let Some(target) = target else {
|
||||||
|
return Err(AppError::NotFound);
|
||||||
|
};
|
||||||
|
|
||||||
|
if target.is_admin {
|
||||||
|
let admin_count: i64 =
|
||||||
|
sqlx::query_scalar("SELECT COUNT(*) FROM users WHERE is_admin = true")
|
||||||
|
.fetch_one(&mut *tx)
|
||||||
|
.await?;
|
||||||
|
if admin_count <= 1 {
|
||||||
|
return Err(AppError::Conflict(
|
||||||
|
"cannot delete the last admin; promote another user first".into(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sqlx::query("DELETE FROM users WHERE id = $1")
|
||||||
|
.bind(target_id)
|
||||||
|
.execute(&mut *tx)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
crate::repo::admin_audit::insert(
|
||||||
|
&mut *tx,
|
||||||
|
actor_id,
|
||||||
|
"delete_user",
|
||||||
|
"user",
|
||||||
|
Some(target_id),
|
||||||
|
serde_json::json!({
|
||||||
|
"username": target.username,
|
||||||
|
"was_admin": target.is_admin,
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
tx.commit().await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Admin-initiated user creation. Wraps the INSERT + audit row in a
|
||||||
|
/// single transaction so a rolled-back create never leaves an orphan
|
||||||
|
/// audit entry. Caller (HTTP handler) is responsible for validating
|
||||||
|
/// `username`/`password` and hashing — this fn assumes both are
|
||||||
|
/// already vetted by the same `validate_*` rules used by self-
|
||||||
|
/// registration.
|
||||||
|
pub async fn admin_create_user(
|
||||||
|
pool: &PgPool,
|
||||||
|
actor_id: Uuid,
|
||||||
|
username: &str,
|
||||||
|
password_hash: &str,
|
||||||
|
is_admin: bool,
|
||||||
|
) -> AppResult<User> {
|
||||||
|
let mut tx = pool.begin().await?;
|
||||||
|
let user: User = match sqlx::query_as::<_, User>(
|
||||||
|
"INSERT INTO users (username, password_hash, is_admin) VALUES ($1, $2, $3) \
|
||||||
|
RETURNING id, username, password_hash, created_at, is_admin",
|
||||||
|
)
|
||||||
|
.bind(username)
|
||||||
|
.bind(password_hash)
|
||||||
|
.bind(is_admin)
|
||||||
|
.fetch_one(&mut *tx)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(u) => u,
|
||||||
|
Err(sqlx::Error::Database(ref db_err)) if db_err.is_unique_violation() => {
|
||||||
|
return Err(AppError::Conflict("username is already taken".into()));
|
||||||
|
}
|
||||||
|
Err(e) => return Err(AppError::Database(e)),
|
||||||
|
};
|
||||||
|
|
||||||
|
crate::repo::admin_audit::insert(
|
||||||
|
&mut *tx,
|
||||||
|
actor_id,
|
||||||
|
"create_user",
|
||||||
|
"user",
|
||||||
|
Some(user.id),
|
||||||
|
serde_json::json!({
|
||||||
|
"username": user.username,
|
||||||
|
"is_admin": user.is_admin,
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
tx.commit().await?;
|
||||||
|
Ok(user)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn bootstrap_admin(
|
||||||
|
pool: &PgPool,
|
||||||
|
username: &str,
|
||||||
|
password: &str,
|
||||||
|
) -> AppResult<()> {
|
||||||
|
let mut tx = pool.begin().await?;
|
||||||
|
let existing: Option<(Uuid,)> = sqlx::query_as(
|
||||||
|
"SELECT id FROM users WHERE lower(username) = lower($1) FOR UPDATE",
|
||||||
|
)
|
||||||
|
.bind(username)
|
||||||
|
.fetch_optional(&mut *tx)
|
||||||
|
.await?;
|
||||||
|
match existing {
|
||||||
|
Some((id,)) => {
|
||||||
|
sqlx::query("UPDATE users SET is_admin = true WHERE id = $1 AND is_admin = false")
|
||||||
|
.bind(id)
|
||||||
|
.execute(&mut *tx)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
let hash = crate::auth::password::hash_password(password)?;
|
||||||
|
sqlx::query("INSERT INTO users (username, password_hash, is_admin) VALUES ($1, $2, true)")
|
||||||
|
.bind(username)
|
||||||
|
.bind(&hash)
|
||||||
|
.execute(&mut *tx)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tx.commit().await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|||||||
@@ -16,6 +16,13 @@ impl LocalStorage {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn resolve(&self, key: &str) -> Result<PathBuf, StorageError> {
|
fn resolve(&self, key: &str) -> Result<PathBuf, StorageError> {
|
||||||
|
// NUL bytes are rejected by the Linux syscall layer, but the
|
||||||
|
// error surfaces as an opaque IO failure rather than the
|
||||||
|
// explicit `BadKey` the rest of the contract uses. Catch it
|
||||||
|
// here so the error path is consistent.
|
||||||
|
if key.contains('\0') {
|
||||||
|
return Err(StorageError::BadKey);
|
||||||
|
}
|
||||||
let key = key.trim_start_matches('/');
|
let key = key.trim_start_matches('/');
|
||||||
if key.is_empty() {
|
if key.is_empty() {
|
||||||
return Err(StorageError::BadKey);
|
return Err(StorageError::BadKey);
|
||||||
@@ -79,6 +86,10 @@ impl Storage for LocalStorage {
|
|||||||
let path: &Path = &self.resolve(key)?;
|
let path: &Path = &self.resolve(key)?;
|
||||||
Ok(fs::try_exists(path).await?)
|
Ok(fs::try_exists(path).await?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn local_root(&self) -> Option<&Path> {
|
||||||
|
Some(&self.root)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -114,6 +125,9 @@ mod tests {
|
|||||||
assert!(matches!(s.get(".").await, Err(StorageError::BadKey)));
|
assert!(matches!(s.get(".").await, Err(StorageError::BadKey)));
|
||||||
// Empty segment via doubled slash.
|
// Empty segment via doubled slash.
|
||||||
assert!(matches!(s.get("a//b").await, Err(StorageError::BadKey)));
|
assert!(matches!(s.get("a//b").await, Err(StorageError::BadKey)));
|
||||||
|
// NUL byte (rejected explicitly so callers see BadKey rather
|
||||||
|
// than an opaque IO error from the kernel).
|
||||||
|
assert!(matches!(s.put("a\0b", b"x").await, Err(StorageError::BadKey)));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
|
|||||||
@@ -9,6 +9,8 @@ mod local;
|
|||||||
use std::io;
|
use std::io;
|
||||||
use std::pin::Pin;
|
use std::pin::Pin;
|
||||||
|
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use futures_core::Stream;
|
use futures_core::Stream;
|
||||||
@@ -44,4 +46,13 @@ pub trait Storage: Send + Sync {
|
|||||||
async fn get_stream(&self, key: &str) -> Result<StreamingFile, StorageError>;
|
async fn get_stream(&self, key: &str) -> Result<StreamingFile, StorageError>;
|
||||||
async fn delete(&self, key: &str) -> Result<(), StorageError>;
|
async fn delete(&self, key: &str) -> Result<(), StorageError>;
|
||||||
async fn exists(&self, key: &str) -> Result<bool, StorageError>;
|
async fn exists(&self, key: &str) -> Result<bool, StorageError>;
|
||||||
|
|
||||||
|
/// Filesystem path the backend is rooted at, when introspectable.
|
||||||
|
/// Returns `None` for backends that aren't a local filesystem (e.g.
|
||||||
|
/// a future `S3Storage`). The admin system endpoint uses this to
|
||||||
|
/// statvfs the data dir; backends that return `None` get a `disk:
|
||||||
|
/// null` payload instead of fabricated numbers.
|
||||||
|
fn local_root(&self) -> Option<&Path> {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
548
backend/tests/api_admin_mangas.rs
Normal file
548
backend/tests/api_admin_mangas.rs
Normal file
@@ -0,0 +1,548 @@
|
|||||||
|
//! PR 3 (feat/admin-mangas-api) integration tests.
|
||||||
|
//!
|
||||||
|
//! Per-variant fixture tests for the derived sync-state SQL plus
|
||||||
|
//! happy-path E2E for the two admin endpoints. Auth-gate regression
|
||||||
|
//! (403/401) is covered by PR 1's `RequireAdmin` test matrix; the only
|
||||||
|
//! gate test here is one spot check per endpoint.
|
||||||
|
|
||||||
|
mod common;
|
||||||
|
|
||||||
|
use axum::http::StatusCode;
|
||||||
|
use axum::Router;
|
||||||
|
use serde_json::json;
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use tower::ServiceExt;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use mangalord::repo;
|
||||||
|
|
||||||
|
const SOURCE_ID: &str = "test-source";
|
||||||
|
|
||||||
|
async fn seed_admin(pool: &PgPool, app: &Router) -> (String, String) {
|
||||||
|
let (username, cookie) = common::register_user(app).await;
|
||||||
|
let u = repo::user::find_by_username(pool, &username)
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
repo::user::set_is_admin_unchecked(pool, u.id, true).await.unwrap();
|
||||||
|
(username, cookie)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn seed_source(pool: &PgPool) {
|
||||||
|
repo::crawler::ensure_source(pool, SOURCE_ID, "Test", "https://example.test")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn insert_manga(pool: &PgPool, title: &str) -> Uuid {
|
||||||
|
let (id,): (Uuid,) = sqlx::query_as(
|
||||||
|
"INSERT INTO mangas (title, status, alt_titles) VALUES ($1, 'ongoing', ARRAY[]::text[]) RETURNING id",
|
||||||
|
)
|
||||||
|
.bind(title)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
id
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn insert_manga_source(
|
||||||
|
pool: &PgPool,
|
||||||
|
manga_id: Uuid,
|
||||||
|
source_manga_key: &str,
|
||||||
|
dropped: bool,
|
||||||
|
) {
|
||||||
|
let dropped_at = if dropped { "now()" } else { "NULL" };
|
||||||
|
let sql = format!(
|
||||||
|
"INSERT INTO manga_sources (source_id, source_manga_key, manga_id, source_url, dropped_at) \
|
||||||
|
VALUES ($1, $2, $3, 'https://example.test/m', {dropped_at})"
|
||||||
|
);
|
||||||
|
sqlx::query(&sql)
|
||||||
|
.bind(SOURCE_ID)
|
||||||
|
.bind(source_manga_key)
|
||||||
|
.bind(manga_id)
|
||||||
|
.execute(pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn insert_chapter(pool: &PgPool, manga_id: Uuid, number: i32, page_count: i32) -> Uuid {
|
||||||
|
let (id,): (Uuid,) = sqlx::query_as(
|
||||||
|
"INSERT INTO chapters (manga_id, number, title, page_count) VALUES ($1, $2, NULL, $3) RETURNING id",
|
||||||
|
)
|
||||||
|
.bind(manga_id)
|
||||||
|
.bind(number)
|
||||||
|
.bind(page_count)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
id
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn insert_chapter_source(
|
||||||
|
pool: &PgPool,
|
||||||
|
chapter_id: Uuid,
|
||||||
|
source_chapter_key: &str,
|
||||||
|
dropped: bool,
|
||||||
|
) {
|
||||||
|
let dropped_at = if dropped { "now()" } else { "NULL" };
|
||||||
|
let sql = format!(
|
||||||
|
"INSERT INTO chapter_sources (source_id, source_chapter_key, chapter_id, source_url, dropped_at) \
|
||||||
|
VALUES ($1, $2, $3, 'https://example.test/c', {dropped_at})"
|
||||||
|
);
|
||||||
|
sqlx::query(&sql)
|
||||||
|
.bind(SOURCE_ID)
|
||||||
|
.bind(source_chapter_key)
|
||||||
|
.bind(chapter_id)
|
||||||
|
.execute(pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn insert_job(pool: &PgPool, payload: serde_json::Value, state: &str) {
|
||||||
|
sqlx::query("INSERT INTO crawler_jobs (payload, state) VALUES ($1, $2)")
|
||||||
|
.bind(payload)
|
||||||
|
.bind(state)
|
||||||
|
.execute(pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Per-variant tests don't care about pagination — fetch the whole
|
||||||
|
/// chapter set (up to the hard cap) and discard the total.
|
||||||
|
async fn fetch_chapter_rows(
|
||||||
|
pool: &PgPool,
|
||||||
|
manga_id: Uuid,
|
||||||
|
) -> Vec<mangalord::repo::admin_view::AdminChapterRow> {
|
||||||
|
let (rows, _) = repo::admin_view::list_chapters_with_sync_state(
|
||||||
|
pool,
|
||||||
|
&repo::admin_view::ListAdminChaptersQuery {
|
||||||
|
manga_id,
|
||||||
|
limit: 500,
|
||||||
|
offset: 0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
rows
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- manga sync state ------------------------------------------------------
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn manga_state_synced_for_fresh_source(pool: PgPool) {
|
||||||
|
seed_source(&pool).await;
|
||||||
|
let m = insert_manga(&pool, "Synced Manga").await;
|
||||||
|
insert_manga_source(&pool, m, "smk-1", false).await;
|
||||||
|
|
||||||
|
let (rows, total) = repo::admin_view::list_mangas_with_sync_state(
|
||||||
|
&pool,
|
||||||
|
&repo::admin_view::ListAdminMangasQuery {
|
||||||
|
limit: 50,
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(total, 1);
|
||||||
|
assert_eq!(rows[0].id, m);
|
||||||
|
assert_eq!(rows[0].sync_state, mangalord::domain::MangaSyncState::Synced);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn manga_state_synced_for_user_upload_without_sources(pool: PgPool) {
|
||||||
|
let m = insert_manga(&pool, "User Upload").await;
|
||||||
|
let (rows, _) = repo::admin_view::list_mangas_with_sync_state(
|
||||||
|
&pool,
|
||||||
|
&repo::admin_view::ListAdminMangasQuery {
|
||||||
|
limit: 50,
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(rows[0].id, m);
|
||||||
|
assert_eq!(rows[0].sync_state, mangalord::domain::MangaSyncState::Synced);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn manga_state_dropped_when_all_sources_dropped(pool: PgPool) {
|
||||||
|
seed_source(&pool).await;
|
||||||
|
let m = insert_manga(&pool, "Dropped Manga").await;
|
||||||
|
insert_manga_source(&pool, m, "smk-1", true).await;
|
||||||
|
|
||||||
|
let (rows, _) = repo::admin_view::list_mangas_with_sync_state(
|
||||||
|
&pool,
|
||||||
|
&repo::admin_view::ListAdminMangasQuery {
|
||||||
|
limit: 50,
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(rows[0].id, m);
|
||||||
|
assert_eq!(rows[0].sync_state, mangalord::domain::MangaSyncState::Dropped);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn manga_state_in_progress_via_sync_chapter_list_job(pool: PgPool) {
|
||||||
|
seed_source(&pool).await;
|
||||||
|
let m = insert_manga(&pool, "Syncing Manga").await;
|
||||||
|
insert_manga_source(&pool, m, "smk-1", false).await;
|
||||||
|
// sync_chapter_list payload carries manga_id directly.
|
||||||
|
insert_job(
|
||||||
|
&pool,
|
||||||
|
json!({
|
||||||
|
"kind": "sync_chapter_list",
|
||||||
|
"source_id": SOURCE_ID,
|
||||||
|
"manga_id": m.to_string(),
|
||||||
|
"source_manga_key": "smk-1",
|
||||||
|
}),
|
||||||
|
"pending",
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let (rows, _) = repo::admin_view::list_mangas_with_sync_state(
|
||||||
|
&pool,
|
||||||
|
&repo::admin_view::ListAdminMangasQuery {
|
||||||
|
limit: 50,
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(rows[0].sync_state, mangalord::domain::MangaSyncState::InProgress);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn manga_state_in_progress_via_sync_manga_job(pool: PgPool) {
|
||||||
|
// The trickier branch: sync_manga payload is keyed by
|
||||||
|
// source_manga_key, NOT manga_id — must join through manga_sources.
|
||||||
|
seed_source(&pool).await;
|
||||||
|
let m = insert_manga(&pool, "Metadata-Refreshing Manga").await;
|
||||||
|
insert_manga_source(&pool, m, "smk-key-42", false).await;
|
||||||
|
insert_job(
|
||||||
|
&pool,
|
||||||
|
json!({
|
||||||
|
"kind": "sync_manga",
|
||||||
|
"source_id": SOURCE_ID,
|
||||||
|
"source_manga_key": "smk-key-42",
|
||||||
|
}),
|
||||||
|
"running",
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let (rows, _) = repo::admin_view::list_mangas_with_sync_state(
|
||||||
|
&pool,
|
||||||
|
&repo::admin_view::ListAdminMangasQuery {
|
||||||
|
limit: 50,
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(rows[0].sync_state, mangalord::domain::MangaSyncState::InProgress);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn manga_list_filters_by_sync_state(pool: PgPool) {
|
||||||
|
seed_source(&pool).await;
|
||||||
|
let m_synced = insert_manga(&pool, "AAA Synced").await;
|
||||||
|
insert_manga_source(&pool, m_synced, "smk-a", false).await;
|
||||||
|
let m_dropped = insert_manga(&pool, "BBB Dropped").await;
|
||||||
|
insert_manga_source(&pool, m_dropped, "smk-b", true).await;
|
||||||
|
|
||||||
|
let (rows, total) = repo::admin_view::list_mangas_with_sync_state(
|
||||||
|
&pool,
|
||||||
|
&repo::admin_view::ListAdminMangasQuery {
|
||||||
|
sync_state: Some(mangalord::domain::MangaSyncState::Dropped),
|
||||||
|
limit: 50,
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(total, 1);
|
||||||
|
assert_eq!(rows.len(), 1);
|
||||||
|
assert_eq!(rows[0].id, m_dropped);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- chapter sync state ----------------------------------------------------
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn chapter_state_synced_when_pages_present(pool: PgPool) {
|
||||||
|
seed_source(&pool).await;
|
||||||
|
let m = insert_manga(&pool, "M").await;
|
||||||
|
insert_manga_source(&pool, m, "smk", false).await;
|
||||||
|
let c = insert_chapter(&pool, m, 1, 12).await;
|
||||||
|
insert_chapter_source(&pool, c, "ckey-1", false).await;
|
||||||
|
|
||||||
|
let rows = fetch_chapter_rows(&pool, m).await;
|
||||||
|
assert_eq!(rows.len(), 1);
|
||||||
|
assert_eq!(rows[0].id, c);
|
||||||
|
assert_eq!(rows[0].sync_state, mangalord::domain::ChapterSyncState::Synced);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn chapter_state_not_downloaded_when_page_count_zero(pool: PgPool) {
|
||||||
|
seed_source(&pool).await;
|
||||||
|
let m = insert_manga(&pool, "M").await;
|
||||||
|
let c = insert_chapter(&pool, m, 1, 0).await;
|
||||||
|
insert_chapter_source(&pool, c, "ckey-1", false).await;
|
||||||
|
|
||||||
|
let rows = fetch_chapter_rows(&pool, m).await;
|
||||||
|
assert_eq!(
|
||||||
|
rows[0].sync_state,
|
||||||
|
mangalord::domain::ChapterSyncState::NotDownloaded
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn chapter_state_downloading_when_job_in_flight(pool: PgPool) {
|
||||||
|
seed_source(&pool).await;
|
||||||
|
let m = insert_manga(&pool, "M").await;
|
||||||
|
let c = insert_chapter(&pool, m, 1, 0).await;
|
||||||
|
insert_chapter_source(&pool, c, "ckey-1", false).await;
|
||||||
|
insert_job(
|
||||||
|
&pool,
|
||||||
|
json!({
|
||||||
|
"kind": "sync_chapter_content",
|
||||||
|
"source_id": SOURCE_ID,
|
||||||
|
"chapter_id": c.to_string(),
|
||||||
|
"source_chapter_key": "ckey-1",
|
||||||
|
}),
|
||||||
|
"running",
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let rows = fetch_chapter_rows(&pool, m).await;
|
||||||
|
assert_eq!(
|
||||||
|
rows[0].sync_state,
|
||||||
|
mangalord::domain::ChapterSyncState::Downloading
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn chapter_state_dropped_when_all_sources_dropped(pool: PgPool) {
|
||||||
|
seed_source(&pool).await;
|
||||||
|
let m = insert_manga(&pool, "M").await;
|
||||||
|
let c = insert_chapter(&pool, m, 1, 0).await;
|
||||||
|
insert_chapter_source(&pool, c, "ckey-1", true).await;
|
||||||
|
|
||||||
|
let rows = fetch_chapter_rows(&pool, m).await;
|
||||||
|
assert_eq!(
|
||||||
|
rows[0].sync_state,
|
||||||
|
mangalord::domain::ChapterSyncState::Dropped
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn chapter_state_failed_when_most_recent_job_dead(pool: PgPool) {
|
||||||
|
seed_source(&pool).await;
|
||||||
|
let m = insert_manga(&pool, "M").await;
|
||||||
|
let c = insert_chapter(&pool, m, 1, 0).await;
|
||||||
|
insert_chapter_source(&pool, c, "ckey-1", false).await;
|
||||||
|
insert_job(
|
||||||
|
&pool,
|
||||||
|
json!({
|
||||||
|
"kind": "sync_chapter_content",
|
||||||
|
"source_id": SOURCE_ID,
|
||||||
|
"chapter_id": c.to_string(),
|
||||||
|
"source_chapter_key": "ckey-1",
|
||||||
|
}),
|
||||||
|
"dead",
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let rows = fetch_chapter_rows(&pool, m).await;
|
||||||
|
assert_eq!(
|
||||||
|
rows[0].sync_state,
|
||||||
|
mangalord::domain::ChapterSyncState::Failed
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- HTTP-level happy-path + gate ------------------------------------------
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn http_list_mangas_returns_paged_with_state(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_admin, cookie) = seed_admin(&pool, &h.app).await;
|
||||||
|
seed_source(&pool).await;
|
||||||
|
let m = insert_manga(&pool, "Hello").await;
|
||||||
|
insert_manga_source(&pool, m, "smk", false).await;
|
||||||
|
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::get_with_cookie(
|
||||||
|
"/api/v1/admin/mangas?limit=50",
|
||||||
|
&cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::OK);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
let items = body["items"].as_array().unwrap();
|
||||||
|
assert_eq!(items.len(), 1);
|
||||||
|
assert_eq!(items[0]["id"], m.to_string());
|
||||||
|
assert_eq!(items[0]["sync_state"], "synced");
|
||||||
|
assert_eq!(items[0]["chapter_count"], 0);
|
||||||
|
assert_eq!(body["page"]["total"], 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn http_list_mangas_rejects_unknown_sync_state(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_admin, cookie) = seed_admin(&pool, &h.app).await;
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::get_with_cookie(
|
||||||
|
"/api/v1/admin/mangas?sync_state=bogus",
|
||||||
|
&cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::BAD_REQUEST);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn http_list_chapters_returns_per_chapter_state(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_admin, cookie) = seed_admin(&pool, &h.app).await;
|
||||||
|
seed_source(&pool).await;
|
||||||
|
let m = insert_manga(&pool, "M").await;
|
||||||
|
let c1 = insert_chapter(&pool, m, 1, 12).await;
|
||||||
|
let c2 = insert_chapter(&pool, m, 2, 0).await;
|
||||||
|
insert_chapter_source(&pool, c1, "ck1", false).await;
|
||||||
|
insert_chapter_source(&pool, c2, "ck2", false).await;
|
||||||
|
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::get_with_cookie(
|
||||||
|
&format!("/api/v1/admin/mangas/{m}/chapters"),
|
||||||
|
&cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::OK);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
let items = body["items"].as_array().unwrap();
|
||||||
|
assert_eq!(items.len(), 2);
|
||||||
|
assert_eq!(items[0]["id"], c1.to_string());
|
||||||
|
assert_eq!(items[0]["sync_state"], "synced");
|
||||||
|
assert_eq!(items[1]["id"], c2.to_string());
|
||||||
|
assert_eq!(items[1]["sync_state"], "not_downloaded");
|
||||||
|
assert_eq!(body["page"]["total"], 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn http_list_chapters_caps_limit_at_500(pool: PgPool) {
|
||||||
|
// The handler clamps limit to [1, 500] so a long-runner with
|
||||||
|
// thousands of chapters can't be turned into a request-stall by an
|
||||||
|
// admin (or by a curious admin tab) just clicking expand.
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_admin, cookie) = seed_admin(&pool, &h.app).await;
|
||||||
|
seed_source(&pool).await;
|
||||||
|
let m = insert_manga(&pool, "M").await;
|
||||||
|
for n in 1..=3 {
|
||||||
|
let _c = insert_chapter(&pool, m, n, 0).await;
|
||||||
|
}
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::get_with_cookie(
|
||||||
|
&format!("/api/v1/admin/mangas/{m}/chapters?limit=999"),
|
||||||
|
&cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::OK);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
assert_eq!(body["page"]["limit"], 500, "limit must clamp to 500");
|
||||||
|
assert_eq!(body["items"].as_array().unwrap().len(), 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn http_list_chapters_paginates(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_admin, cookie) = seed_admin(&pool, &h.app).await;
|
||||||
|
seed_source(&pool).await;
|
||||||
|
let m = insert_manga(&pool, "M").await;
|
||||||
|
for n in 1..=5 {
|
||||||
|
let _c = insert_chapter(&pool, m, n, 0).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.clone()
|
||||||
|
.oneshot(common::get_with_cookie(
|
||||||
|
&format!("/api/v1/admin/mangas/{m}/chapters?limit=2&offset=2"),
|
||||||
|
&cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
let items = body["items"].as_array().unwrap();
|
||||||
|
assert_eq!(items.len(), 2);
|
||||||
|
// Ordered by chapter number ascending; offset=2 skips chapters 1 & 2.
|
||||||
|
assert_eq!(items[0]["number"], 3);
|
||||||
|
assert_eq!(items[1]["number"], 4);
|
||||||
|
assert_eq!(body["page"]["total"], 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn http_list_chapters_returns_404_for_unknown_manga(pool: PgPool) {
|
||||||
|
// Regression: used to return 200 [] for a non-existent manga,
|
||||||
|
// which silently rendered "No chapters." for a typo'd / deleted id.
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_admin, cookie) = seed_admin(&pool, &h.app).await;
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::get_with_cookie(
|
||||||
|
&format!("/api/v1/admin/mangas/{}/chapters", Uuid::new_v4()),
|
||||||
|
&cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn chapter_state_synced_when_pages_present_even_with_dead_job(pool: PgPool) {
|
||||||
|
// Regression: the old CASE prioritised the dead-job branch above
|
||||||
|
// the page_count check, so a chapter with pages on disk AND a
|
||||||
|
// historical dead job (e.g. from a re-download attempt that
|
||||||
|
// crashed) flipped to Failed — contradicting Synced's "downloaded
|
||||||
|
// at some point" contract.
|
||||||
|
seed_source(&pool).await;
|
||||||
|
let m = insert_manga(&pool, "M").await;
|
||||||
|
let c = insert_chapter(&pool, m, 1, 12).await; // pages present
|
||||||
|
insert_chapter_source(&pool, c, "ckey-1", false).await;
|
||||||
|
insert_job(
|
||||||
|
&pool,
|
||||||
|
json!({
|
||||||
|
"kind": "sync_chapter_content",
|
||||||
|
"source_id": SOURCE_ID,
|
||||||
|
"chapter_id": c.to_string(),
|
||||||
|
"source_chapter_key": "ckey-1",
|
||||||
|
}),
|
||||||
|
"dead",
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let rows = fetch_chapter_rows(&pool, m).await;
|
||||||
|
assert_eq!(
|
||||||
|
rows[0].sync_state,
|
||||||
|
mangalord::domain::ChapterSyncState::Synced,
|
||||||
|
"pages on disk override historical dead-job noise"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn http_list_mangas_requires_admin(pool: PgPool) {
|
||||||
|
let h = common::harness(pool);
|
||||||
|
let (_u, cookie) = common::register_user(&h.app).await;
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::get_with_cookie("/api/v1/admin/mangas", &cookie))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
|
||||||
|
}
|
||||||
257
backend/tests/api_admin_role.rs
Normal file
257
backend/tests/api_admin_role.rs
Normal file
@@ -0,0 +1,257 @@
|
|||||||
|
//! PR 1 (feat/admin-role) integration tests.
|
||||||
|
//!
|
||||||
|
//! Covers: `bootstrap_admin` semantics, `is_admin` exposed on /auth/me,
|
||||||
|
//! and the `RequireAdmin` extractor's 401/403/200 matrix — including the
|
||||||
|
//! load-bearing decision that Bearer-authed callers can NEVER reach an
|
||||||
|
//! admin-guarded route, even when the underlying user IS admin.
|
||||||
|
|
||||||
|
mod common;
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use axum::http::StatusCode;
|
||||||
|
use axum::routing::get;
|
||||||
|
use axum::{Json, Router};
|
||||||
|
use serde_json::json;
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use tempfile::TempDir;
|
||||||
|
use tower::ServiceExt;
|
||||||
|
|
||||||
|
use mangalord::api;
|
||||||
|
use mangalord::app::AppState;
|
||||||
|
use mangalord::auth::extractor::RequireAdmin;
|
||||||
|
use mangalord::auth::rate_limit::AuthRateLimiter;
|
||||||
|
use mangalord::config::{AuthConfig, UploadConfig};
|
||||||
|
use mangalord::repo;
|
||||||
|
use mangalord::storage::{LocalStorage, Storage};
|
||||||
|
|
||||||
|
/// Test-only handler guarded by `RequireAdmin`. Lets the test suite assert
|
||||||
|
/// the extractor's behaviour end-to-end without depending on an admin
|
||||||
|
/// endpoint existing yet (those land in PR 2+).
|
||||||
|
async fn admin_only_handler(RequireAdmin(user): RequireAdmin) -> Json<serde_json::Value> {
|
||||||
|
Json(json!({ "username": user.username, "is_admin": user.is_admin }))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build a router that exposes the production /api/v1/* AND a test-only
|
||||||
|
/// `/_test/admin_only` route guarded by `RequireAdmin`. Pool is consumed;
|
||||||
|
/// callers that want to inspect the DB after a request should clone it.
|
||||||
|
fn admin_test_router(pool: PgPool) -> (Router, TempDir) {
|
||||||
|
let storage_dir = tempfile::tempdir().expect("tempdir");
|
||||||
|
let storage: Arc<dyn Storage> = Arc::new(LocalStorage::new(storage_dir.path()));
|
||||||
|
let auth = AuthConfig {
|
||||||
|
cookie_secure: false,
|
||||||
|
..AuthConfig::default()
|
||||||
|
};
|
||||||
|
let auth_limiter = Arc::new(AuthRateLimiter::new(auth.rate_limit));
|
||||||
|
let state = AppState {
|
||||||
|
db: pool,
|
||||||
|
storage,
|
||||||
|
auth,
|
||||||
|
upload: UploadConfig::default(),
|
||||||
|
auth_limiter,
|
||||||
|
};
|
||||||
|
let app = Router::new()
|
||||||
|
.nest("/api/v1", api::routes())
|
||||||
|
.route("/_test/admin_only", get(admin_only_handler))
|
||||||
|
.with_state(state);
|
||||||
|
(app, storage_dir)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- bootstrap_admin -------------------------------------------------------
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn bootstrap_creates_admin_when_user_missing(pool: PgPool) {
|
||||||
|
repo::user::bootstrap_admin(&pool, "root", "hunter2hunter2")
|
||||||
|
.await
|
||||||
|
.expect("bootstrap on empty DB");
|
||||||
|
|
||||||
|
let user = repo::user::find_by_username(&pool, "root")
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.expect("root user exists after bootstrap");
|
||||||
|
assert!(user.is_admin, "bootstrap must set is_admin = true on creation");
|
||||||
|
|
||||||
|
// Password hash must verify the env-supplied password (and not be empty).
|
||||||
|
assert!(
|
||||||
|
mangalord::auth::password::verify_password("hunter2hunter2", &user.password_hash),
|
||||||
|
"bootstrap-created user must accept the env-supplied password"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn bootstrap_promotes_existing_user_without_touching_password(pool: PgPool) {
|
||||||
|
// Pre-existing user, not admin. Use the real register path so the
|
||||||
|
// hash format matches production exactly.
|
||||||
|
let (app, _td) = admin_test_router(pool.clone());
|
||||||
|
let resp = app
|
||||||
|
.oneshot(common::post_json(
|
||||||
|
"/api/v1/auth/register",
|
||||||
|
json!({ "username": "preexisting", "password": "originalpw1234" }),
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::CREATED);
|
||||||
|
|
||||||
|
let before = repo::user::find_by_username(&pool, "preexisting")
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
assert!(!before.is_admin);
|
||||||
|
let original_hash = before.password_hash.clone();
|
||||||
|
|
||||||
|
// Bootstrap with a DIFFERENT password — must not overwrite the hash.
|
||||||
|
repo::user::bootstrap_admin(&pool, "preexisting", "envpw_should_be_ignored")
|
||||||
|
.await
|
||||||
|
.expect("bootstrap on existing user");
|
||||||
|
|
||||||
|
let after = repo::user::find_by_username(&pool, "preexisting")
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
assert!(after.is_admin, "bootstrap must promote existing user");
|
||||||
|
assert_eq!(
|
||||||
|
after.password_hash, original_hash,
|
||||||
|
"bootstrap must NOT overwrite the existing password hash"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
mangalord::auth::password::verify_password("originalpw1234", &after.password_hash),
|
||||||
|
"original password must still verify after bootstrap"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn bootstrap_is_idempotent(pool: PgPool) {
|
||||||
|
repo::user::bootstrap_admin(&pool, "root", "hunter2hunter2")
|
||||||
|
.await
|
||||||
|
.expect("first bootstrap");
|
||||||
|
repo::user::bootstrap_admin(&pool, "root", "hunter2hunter2")
|
||||||
|
.await
|
||||||
|
.expect("second bootstrap is no-op");
|
||||||
|
|
||||||
|
// Exactly one row, still admin.
|
||||||
|
let (count,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM users WHERE username = $1")
|
||||||
|
.bind("root")
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(count, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- /api/v1/auth/me exposes is_admin --------------------------------------
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn auth_me_response_includes_is_admin(pool: PgPool) {
|
||||||
|
let (app, _td) = admin_test_router(pool.clone());
|
||||||
|
let (_username, cookie) = common::register_user(&app).await;
|
||||||
|
let resp = app
|
||||||
|
.oneshot(common::get_with_cookie("/api/v1/auth/me", &cookie))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::OK);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
assert_eq!(
|
||||||
|
body["user"]["is_admin"], false,
|
||||||
|
"freshly-registered users default to is_admin=false"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- RequireAdmin: 401 / 403 / 200 matrix ----------------------------------
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn require_admin_rejects_unauthenticated(pool: PgPool) {
|
||||||
|
let (app, _td) = admin_test_router(pool);
|
||||||
|
let resp = app
|
||||||
|
.oneshot(common::get("/_test/admin_only"))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn require_admin_rejects_non_admin_cookie(pool: PgPool) {
|
||||||
|
let (app, _td) = admin_test_router(pool);
|
||||||
|
let (_username, cookie) = common::register_user(&app).await;
|
||||||
|
let resp = app
|
||||||
|
.oneshot(common::get_with_cookie("/_test/admin_only", &cookie))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
assert_eq!(body["error"]["code"], "forbidden");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn require_admin_accepts_admin_cookie(pool: PgPool) {
|
||||||
|
let (app, _td) = admin_test_router(pool.clone());
|
||||||
|
let (username, cookie) = common::register_user(&app).await;
|
||||||
|
// Promote via the repo (the admin-users API doesn't exist yet).
|
||||||
|
let u = repo::user::find_by_username(&pool, &username)
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
repo::user::set_is_admin_unchecked(&pool, u.id, true).await.unwrap();
|
||||||
|
|
||||||
|
let resp = app
|
||||||
|
.oneshot(common::get_with_cookie("/_test/admin_only", &cookie))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::OK);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
assert_eq!(body["username"], username);
|
||||||
|
assert_eq!(body["is_admin"], true);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn require_admin_rejects_bearer_token_even_for_admin_user(pool: PgPool) {
|
||||||
|
// Key privilege-escalation test: an API token belonging to an admin user
|
||||||
|
// must NOT grant admin authority. Bot tokens are excluded from admin
|
||||||
|
// routes by design (the RequireAdmin extractor only accepts session
|
||||||
|
// cookies). See cross-cutting decision #1 in the PR plan.
|
||||||
|
let (app, _td) = admin_test_router(pool.clone());
|
||||||
|
let (username, cookie) = common::register_user(&app).await;
|
||||||
|
|
||||||
|
// Promote to admin and mint an API token (the existing /auth/tokens
|
||||||
|
// endpoint authenticates via the same cookie).
|
||||||
|
let u = repo::user::find_by_username(&pool, &username)
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
repo::user::set_is_admin_unchecked(&pool, u.id, true).await.unwrap();
|
||||||
|
|
||||||
|
let resp = app
|
||||||
|
.clone()
|
||||||
|
.oneshot(common::post_json_with_cookie(
|
||||||
|
"/api/v1/auth/tokens",
|
||||||
|
json!({ "name": "test-bot" }),
|
||||||
|
&cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::CREATED);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
let token = body["bearer"]
|
||||||
|
.as_str()
|
||||||
|
.expect("raw bearer token in response")
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
// Sanity: the bearer DOES work on a non-admin endpoint (proves the
|
||||||
|
// token is valid, isolating the failure below to the admin guard).
|
||||||
|
let resp = app
|
||||||
|
.clone()
|
||||||
|
.oneshot(common::get_with_bearer("/api/v1/auth/me", &token))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::OK);
|
||||||
|
|
||||||
|
// Same token, same admin user, but on the admin-guarded route → 401
|
||||||
|
// (no session cookie present at all from the extractor's POV).
|
||||||
|
let resp = app
|
||||||
|
.oneshot(common::get_with_bearer("/_test/admin_only", &token))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
resp.status(),
|
||||||
|
StatusCode::UNAUTHORIZED,
|
||||||
|
"Bearer-authed admin must NOT pass the RequireAdmin guard"
|
||||||
|
);
|
||||||
|
}
|
||||||
96
backend/tests/api_admin_system.rs
Normal file
96
backend/tests/api_admin_system.rs
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
//! PR 4 (feat/admin-system-api) integration tests.
|
||||||
|
//!
|
||||||
|
//! Shape-only assertions — we don't mock the system, just call the
|
||||||
|
//! endpoint and check the response envelope. Threshold-triggering of
|
||||||
|
//! alerts would require faking statvfs / sysinfo, which is more
|
||||||
|
//! plumbing than the test gives back.
|
||||||
|
|
||||||
|
mod common;
|
||||||
|
|
||||||
|
use axum::http::StatusCode;
|
||||||
|
use axum::Router;
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use tower::ServiceExt;
|
||||||
|
|
||||||
|
use mangalord::repo;
|
||||||
|
|
||||||
|
async fn seed_admin(pool: &PgPool, app: &Router) -> String {
|
||||||
|
let (username, cookie) = common::register_user(app).await;
|
||||||
|
let u = repo::user::find_by_username(pool, &username)
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
repo::user::set_is_admin_unchecked(pool, u.id, true).await.unwrap();
|
||||||
|
cookie
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn requires_admin(pool: PgPool) {
|
||||||
|
let h = common::harness(pool);
|
||||||
|
let (_u, cookie) = common::register_user(&h.app).await;
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::get_with_cookie("/api/v1/admin/system", &cookie))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn unauthenticated_request_is_rejected(pool: PgPool) {
|
||||||
|
let h = common::harness(pool);
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::get("/api/v1/admin/system"))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn returns_disk_memory_cpu_alerts_shape(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let cookie = seed_admin(&pool, &h.app).await;
|
||||||
|
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::get_with_cookie("/api/v1/admin/system", &cookie))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::OK);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
|
||||||
|
// Disk: harness uses LocalStorage on a tempdir, so disk SHOULD be
|
||||||
|
// populated. Validate the field shape and percent range.
|
||||||
|
let disk = body
|
||||||
|
.get("disk")
|
||||||
|
.expect("disk key present")
|
||||||
|
.as_object()
|
||||||
|
.expect("disk is an object (LocalStorage exposes a path)");
|
||||||
|
assert!(disk["total_bytes"].as_u64().unwrap() > 0);
|
||||||
|
let pct = disk["percent_used"].as_f64().unwrap();
|
||||||
|
assert!(
|
||||||
|
(0.0..=100.0).contains(&pct),
|
||||||
|
"percent_used outside [0,100]: {pct}"
|
||||||
|
);
|
||||||
|
|
||||||
|
let mem = body.get("memory").expect("memory key").as_object().unwrap();
|
||||||
|
assert!(mem["total_bytes"].as_u64().unwrap() > 0);
|
||||||
|
let mpct = mem["percent_used"].as_f64().unwrap();
|
||||||
|
assert!((0.0..=100.0).contains(&mpct));
|
||||||
|
|
||||||
|
let cpu = body.get("cpu").expect("cpu key").as_object().unwrap();
|
||||||
|
let cpu_pct = cpu["percent_used"].as_f64().unwrap();
|
||||||
|
assert!(
|
||||||
|
(0.0..=100.0).contains(&cpu_pct),
|
||||||
|
"cpu out of range: {cpu_pct}"
|
||||||
|
);
|
||||||
|
|
||||||
|
let alerts = body.get("alerts").expect("alerts key").as_array().unwrap();
|
||||||
|
// Don't assert on length — the box may genuinely be >90% on memory
|
||||||
|
// when the test runs. Just confirm shape of any present entry.
|
||||||
|
for alert in alerts {
|
||||||
|
assert!(alert["level"].is_string());
|
||||||
|
assert!(alert["message"].is_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
605
backend/tests/api_admin_users.rs
Normal file
605
backend/tests/api_admin_users.rs
Normal file
@@ -0,0 +1,605 @@
|
|||||||
|
//! PR 2 (feat/admin-users-api) integration tests.
|
||||||
|
//!
|
||||||
|
//! Exercises list / delete / promote-demote on /api/v1/admin/users:
|
||||||
|
//! pagination + search, the RequireAdmin gate, self-protection,
|
||||||
|
//! last-admin invariant (including the parallel-demote race that
|
||||||
|
//! `pg_advisory_xact_lock` + recount-inside-tx guards against), and
|
||||||
|
//! that audit rows land in `admin_audit` only on successful commit.
|
||||||
|
//!
|
||||||
|
//! Note on the last-admin invariant: the *serial* path via HTTP is
|
||||||
|
//! structurally unreachable — the only configuration that would hit the
|
||||||
|
//! "would orphan admins" branch requires the actor to be the lone admin
|
||||||
|
//! demoting themselves, which the self-guard fires on first. So the
|
||||||
|
//! last-admin checks below call the repo directly to exercise the
|
||||||
|
//! invariant; the HTTP race scenario is covered by
|
||||||
|
//! `parallel_demotes_cannot_orphan_admins`.
|
||||||
|
|
||||||
|
mod common;
|
||||||
|
|
||||||
|
use axum::http::StatusCode;
|
||||||
|
use axum::Router;
|
||||||
|
use serde_json::json;
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use tower::ServiceExt;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use mangalord::error::AppError;
|
||||||
|
use mangalord::repo;
|
||||||
|
|
||||||
|
/// Register a user via the public API and immediately promote them via
|
||||||
|
/// the repo. Returns (username, session cookie, user_id) — the common
|
||||||
|
/// "I need a logged-in admin" prelude.
|
||||||
|
async fn seed_admin(pool: &PgPool, app: &Router) -> (String, String, Uuid) {
|
||||||
|
let (username, cookie) = common::register_user(app).await;
|
||||||
|
let u = repo::user::find_by_username(pool, &username)
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
repo::user::set_is_admin_unchecked(pool, u.id, true).await.unwrap();
|
||||||
|
(username, cookie, u.id)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- RequireAdmin gate -----------------------------------------------------
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn list_requires_admin(pool: PgPool) {
|
||||||
|
let h = common::harness(pool);
|
||||||
|
let (_username, cookie) = common::register_user(&h.app).await;
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::get_with_cookie("/api/v1/admin/users", &cookie))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn delete_requires_admin(pool: PgPool) {
|
||||||
|
let h = common::harness(pool);
|
||||||
|
let (_username, cookie) = common::register_user(&h.app).await;
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::delete_with_cookie(
|
||||||
|
&format!("/api/v1/admin/users/{}", Uuid::new_v4()),
|
||||||
|
&cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn patch_requires_admin(pool: PgPool) {
|
||||||
|
let h = common::harness(pool);
|
||||||
|
let (_username, cookie) = common::register_user(&h.app).await;
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::patch_json_with_cookie(
|
||||||
|
&format!("/api/v1/admin/users/{}", Uuid::new_v4()),
|
||||||
|
json!({ "is_admin": true }),
|
||||||
|
&cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- list with search and pagination ---------------------------------------
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn list_returns_paginated_users(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_admin_name, cookie, _) = seed_admin(&pool, &h.app).await;
|
||||||
|
let _u1 = common::register_user(&h.app).await;
|
||||||
|
let _u2 = common::register_user(&h.app).await;
|
||||||
|
let _u3 = common::register_user(&h.app).await;
|
||||||
|
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::get_with_cookie(
|
||||||
|
"/api/v1/admin/users?limit=2&offset=0",
|
||||||
|
&cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::OK);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
let items = body["items"].as_array().expect("items array");
|
||||||
|
assert_eq!(items.len(), 2, "limit=2 should cap the page");
|
||||||
|
assert_eq!(body["page"]["limit"], 2);
|
||||||
|
assert_eq!(body["page"]["offset"], 0);
|
||||||
|
assert_eq!(body["page"]["total"], 4);
|
||||||
|
assert!(items[0].get("is_admin").is_some());
|
||||||
|
assert!(
|
||||||
|
items[0].get("password_hash").is_none(),
|
||||||
|
"password_hash must never leak even to other admins"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn list_filters_by_substring_search(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_admin_name, cookie, _) = seed_admin(&pool, &h.app).await;
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.clone()
|
||||||
|
.oneshot(common::post_json(
|
||||||
|
"/api/v1/auth/register",
|
||||||
|
json!({ "username": "zzzfindme01", "password": "hunter2hunter2" }),
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::CREATED);
|
||||||
|
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::get_with_cookie(
|
||||||
|
"/api/v1/admin/users?search=zzzfindme",
|
||||||
|
&cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::OK);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
let items = body["items"].as_array().unwrap();
|
||||||
|
assert_eq!(items.len(), 1, "search must narrow to the one match");
|
||||||
|
assert_eq!(items[0]["username"], "zzzfindme01");
|
||||||
|
assert_eq!(body["page"]["total"], 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- self-protection -------------------------------------------------------
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn cannot_self_delete(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_username, cookie, actor_id) = seed_admin(&pool, &h.app).await;
|
||||||
|
// Second admin so the last-admin guard isn't what triggers the conflict.
|
||||||
|
let (_other, _, _) = seed_admin(&pool, &h.app).await;
|
||||||
|
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::delete_with_cookie(
|
||||||
|
&format!("/api/v1/admin/users/{actor_id}"),
|
||||||
|
&cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::CONFLICT);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
assert_eq!(body["error"]["code"], "conflict");
|
||||||
|
assert!(
|
||||||
|
body["error"]["message"]
|
||||||
|
.as_str()
|
||||||
|
.unwrap()
|
||||||
|
.contains("yourself"),
|
||||||
|
"message must call out the self-action; got {:?}",
|
||||||
|
body["error"]["message"]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn cannot_self_demote(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_username, cookie, actor_id) = seed_admin(&pool, &h.app).await;
|
||||||
|
let (_other, _, _) = seed_admin(&pool, &h.app).await;
|
||||||
|
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::patch_json_with_cookie(
|
||||||
|
&format!("/api/v1/admin/users/{actor_id}"),
|
||||||
|
json!({ "is_admin": false }),
|
||||||
|
&cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::CONFLICT);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
assert!(body["error"]["message"]
|
||||||
|
.as_str()
|
||||||
|
.unwrap()
|
||||||
|
.contains("yourself"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- last-admin invariant (repo layer, see file header) --------------------
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn last_admin_demote_refused_at_repo(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_a, _, a_id) = seed_admin(&pool, &h.app).await;
|
||||||
|
let (_b, _, b_id) = seed_admin(&pool, &h.app).await;
|
||||||
|
|
||||||
|
// admins = {A, B}. Demote A via B (count 2 → 1) — allowed.
|
||||||
|
let r = repo::user::admin_safe_set_is_admin(&pool, b_id, a_id, false)
|
||||||
|
.await
|
||||||
|
.expect("first demote succeeds");
|
||||||
|
assert!(!r.is_admin);
|
||||||
|
|
||||||
|
// admins = {B}. Try to demote B via A (actor doesn't matter to the
|
||||||
|
// repo — that's the HTTP gate's job). Last-admin guard kicks in.
|
||||||
|
let err = repo::user::admin_safe_set_is_admin(&pool, a_id, b_id, false)
|
||||||
|
.await
|
||||||
|
.expect_err("second demote must be refused");
|
||||||
|
match err {
|
||||||
|
AppError::Conflict(m) => assert!(
|
||||||
|
m.contains("last admin"),
|
||||||
|
"expected last-admin conflict; got {m:?}"
|
||||||
|
),
|
||||||
|
other => panic!("expected Conflict, got {other:?}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn last_admin_delete_refused_at_repo(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_a, _, a_id) = seed_admin(&pool, &h.app).await;
|
||||||
|
let (_b, _, b_id) = seed_admin(&pool, &h.app).await;
|
||||||
|
|
||||||
|
// admins = {A, B}. Delete A via B (count 2 → 1) — allowed.
|
||||||
|
repo::user::admin_safe_delete(&pool, b_id, a_id)
|
||||||
|
.await
|
||||||
|
.expect("first delete succeeds");
|
||||||
|
|
||||||
|
// admins = {B}. Try to delete B via a fresh non-admin actor. Last-
|
||||||
|
// admin guard kicks in.
|
||||||
|
let (_c, _, c_id) = {
|
||||||
|
let (cn, _ck) = common::register_user(&h.app).await;
|
||||||
|
let c = repo::user::find_by_username(&pool, &cn).await.unwrap().unwrap();
|
||||||
|
(cn, _ck, c.id)
|
||||||
|
};
|
||||||
|
let err = repo::user::admin_safe_delete(&pool, c_id, b_id)
|
||||||
|
.await
|
||||||
|
.expect_err("second delete must be refused");
|
||||||
|
match err {
|
||||||
|
AppError::Conflict(m) => assert!(
|
||||||
|
m.contains("last admin"),
|
||||||
|
"expected last-admin conflict; got {m:?}"
|
||||||
|
),
|
||||||
|
other => panic!("expected Conflict, got {other:?}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn parallel_demotes_cannot_orphan_admins(pool: PgPool) {
|
||||||
|
// The race the advisory lock + recount exists to close: two parallel
|
||||||
|
// demotes of two DIFFERENT admins, each reading `count = 2` and
|
||||||
|
// committing, would land at zero admins. With the lock the second
|
||||||
|
// demote sees count = 1 inside the tx and refuses.
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_a, _, a_id) = seed_admin(&pool, &h.app).await;
|
||||||
|
let (_b, _, b_id) = seed_admin(&pool, &h.app).await;
|
||||||
|
|
||||||
|
let pool_x = pool.clone();
|
||||||
|
let pool_y = pool.clone();
|
||||||
|
let task_x = tokio::spawn(async move {
|
||||||
|
repo::user::admin_safe_set_is_admin(&pool_x, a_id, b_id, false).await
|
||||||
|
});
|
||||||
|
let task_y = tokio::spawn(async move {
|
||||||
|
repo::user::admin_safe_set_is_admin(&pool_y, b_id, a_id, false).await
|
||||||
|
});
|
||||||
|
let r_x = task_x.await.unwrap();
|
||||||
|
let r_y = task_y.await.unwrap();
|
||||||
|
|
||||||
|
let outcomes = (r_x.is_ok(), r_y.is_ok());
|
||||||
|
assert!(
|
||||||
|
outcomes == (true, false) || outcomes == (false, true),
|
||||||
|
"exactly one of the two parallel demotes must succeed; got {outcomes:?}"
|
||||||
|
);
|
||||||
|
|
||||||
|
let (count,): (i64,) =
|
||||||
|
sqlx::query_as("SELECT COUNT(*) FROM users WHERE is_admin = true")
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(count, 1, "at least one admin must remain");
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- audit log -------------------------------------------------------------
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn promote_writes_audit_row(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_a_name, a_cookie, a_id) = seed_admin(&pool, &h.app).await;
|
||||||
|
let (b_name, _b_cookie) = common::register_user(&h.app).await;
|
||||||
|
let b = repo::user::find_by_username(&pool, &b_name)
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::patch_json_with_cookie(
|
||||||
|
&format!("/api/v1/admin/users/{}", b.id),
|
||||||
|
json!({ "is_admin": true }),
|
||||||
|
&a_cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::OK);
|
||||||
|
|
||||||
|
let rows: Vec<(Option<Uuid>, String, String, Option<Uuid>)> = sqlx::query_as(
|
||||||
|
"SELECT actor_user_id, action, target_kind, target_id FROM admin_audit",
|
||||||
|
)
|
||||||
|
.fetch_all(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(rows.len(), 1);
|
||||||
|
let (actor, action, kind, target) = rows.into_iter().next().unwrap();
|
||||||
|
assert_eq!(actor, Some(a_id));
|
||||||
|
assert_eq!(action, "promote_user");
|
||||||
|
assert_eq!(kind, "user");
|
||||||
|
assert_eq!(target, Some(b.id));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn redundant_promote_does_not_write_audit_row(pool: PgPool) {
|
||||||
|
// Regression: PATCH {is_admin: true} on someone already admin used
|
||||||
|
// to UPDATE (no-op) and still INSERT a misleading "promote_user"
|
||||||
|
// audit row. Should short-circuit without touching admin_audit.
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_a_name, a_cookie, _a_id) = seed_admin(&pool, &h.app).await;
|
||||||
|
let (b_name, _b_cookie, _b_id) = seed_admin(&pool, &h.app).await; // already admin
|
||||||
|
|
||||||
|
let b = repo::user::find_by_username(&pool, &b_name)
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::patch_json_with_cookie(
|
||||||
|
&format!("/api/v1/admin/users/{}", b.id),
|
||||||
|
json!({ "is_admin": true }),
|
||||||
|
&a_cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::OK);
|
||||||
|
|
||||||
|
let (count,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM admin_audit")
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(count, 0, "no-op promote must not write audit row");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn delete_writes_audit_row(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_a_name, a_cookie, a_id) = seed_admin(&pool, &h.app).await;
|
||||||
|
let (b_name, _b_cookie) = common::register_user(&h.app).await;
|
||||||
|
let b = repo::user::find_by_username(&pool, &b_name)
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::delete_with_cookie(
|
||||||
|
&format!("/api/v1/admin/users/{}", b.id),
|
||||||
|
&a_cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::NO_CONTENT);
|
||||||
|
|
||||||
|
let rows: Vec<(Option<Uuid>, String, String, Option<Uuid>, serde_json::Value)> =
|
||||||
|
sqlx::query_as(
|
||||||
|
"SELECT actor_user_id, action, target_kind, target_id, payload FROM admin_audit",
|
||||||
|
)
|
||||||
|
.fetch_all(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(rows.len(), 1);
|
||||||
|
let (actor, action, kind, target, payload) = rows.into_iter().next().unwrap();
|
||||||
|
assert_eq!(actor, Some(a_id));
|
||||||
|
assert_eq!(action, "delete_user");
|
||||||
|
assert_eq!(kind, "user");
|
||||||
|
assert_eq!(target, Some(b.id));
|
||||||
|
assert_eq!(payload["username"], b_name);
|
||||||
|
assert_eq!(payload["was_admin"], false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- POST /admin/users (admin-create) --------------------------------------
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn create_user_requires_admin(pool: PgPool) {
|
||||||
|
let h = common::harness(pool);
|
||||||
|
let (_username, cookie) = common::register_user(&h.app).await;
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::post_json_with_cookie(
|
||||||
|
"/api/v1/admin/users",
|
||||||
|
json!({ "username": "newbie", "password": "hunter2hunter2" }),
|
||||||
|
&cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn create_user_unauthenticated_is_rejected(pool: PgPool) {
|
||||||
|
let h = common::harness(pool);
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::post_json(
|
||||||
|
"/api/v1/admin/users",
|
||||||
|
json!({ "username": "newbie", "password": "hunter2hunter2" }),
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn create_user_happy_path_creates_user_and_audit(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_a_name, a_cookie, a_id) = seed_admin(&pool, &h.app).await;
|
||||||
|
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::post_json_with_cookie(
|
||||||
|
"/api/v1/admin/users",
|
||||||
|
json!({ "username": "invited01", "password": "freshpass1234" }),
|
||||||
|
&a_cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::CREATED);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
assert_eq!(body["username"], "invited01");
|
||||||
|
assert_eq!(body["is_admin"], false);
|
||||||
|
assert!(body["id"].as_str().is_some());
|
||||||
|
assert!(
|
||||||
|
body.get("password_hash").is_none(),
|
||||||
|
"password_hash must never appear in admin-create response"
|
||||||
|
);
|
||||||
|
|
||||||
|
let target_id =
|
||||||
|
Uuid::parse_str(body["id"].as_str().unwrap()).unwrap();
|
||||||
|
let (actor, action, kind, target, payload): (
|
||||||
|
Option<Uuid>,
|
||||||
|
String,
|
||||||
|
String,
|
||||||
|
Option<Uuid>,
|
||||||
|
serde_json::Value,
|
||||||
|
) = sqlx::query_as(
|
||||||
|
"SELECT actor_user_id, action, target_kind, target_id, payload \
|
||||||
|
FROM admin_audit ORDER BY at DESC LIMIT 1",
|
||||||
|
)
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(actor, Some(a_id));
|
||||||
|
assert_eq!(action, "create_user");
|
||||||
|
assert_eq!(kind, "user");
|
||||||
|
assert_eq!(target, Some(target_id));
|
||||||
|
assert_eq!(payload["username"], "invited01");
|
||||||
|
assert_eq!(payload["is_admin"], false);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn create_user_can_mint_an_admin_in_one_call(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_a_name, a_cookie, _) = seed_admin(&pool, &h.app).await;
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::post_json_with_cookie(
|
||||||
|
"/api/v1/admin/users",
|
||||||
|
json!({
|
||||||
|
"username": "newadmin",
|
||||||
|
"password": "freshpass1234",
|
||||||
|
"is_admin": true
|
||||||
|
}),
|
||||||
|
&a_cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::CREATED);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
assert_eq!(body["is_admin"], true);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn create_user_returns_409_on_duplicate(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_a_name, a_cookie, _) = seed_admin(&pool, &h.app).await;
|
||||||
|
// Seed an existing user via the public register path.
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.clone()
|
||||||
|
.oneshot(common::post_json(
|
||||||
|
"/api/v1/auth/register",
|
||||||
|
json!({ "username": "taken", "password": "hunter2hunter2" }),
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::CREATED);
|
||||||
|
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::post_json_with_cookie(
|
||||||
|
"/api/v1/admin/users",
|
||||||
|
json!({ "username": "Taken", "password": "freshpass1234" }),
|
||||||
|
&a_cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
resp.status(),
|
||||||
|
StatusCode::CONFLICT,
|
||||||
|
"case-insensitive collision via the lower(username) index"
|
||||||
|
);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
assert_eq!(body["error"]["code"], "conflict");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn create_user_rejects_weak_password(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_a_name, a_cookie, _) = seed_admin(&pool, &h.app).await;
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::post_json_with_cookie(
|
||||||
|
"/api/v1/admin/users",
|
||||||
|
json!({ "username": "okayname", "password": "short" }),
|
||||||
|
&a_cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::BAD_REQUEST);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
assert_eq!(body["error"]["code"], "invalid_input");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn create_user_rejects_invalid_username(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_a_name, a_cookie, _) = seed_admin(&pool, &h.app).await;
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::post_json_with_cookie(
|
||||||
|
"/api/v1/admin/users",
|
||||||
|
json!({ "username": "bad name!", "password": "freshpass1234" }),
|
||||||
|
&a_cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::BAD_REQUEST);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn create_user_works_even_when_self_register_disabled(pool: PgPool) {
|
||||||
|
// The admin-create path must NOT be gated by ALLOW_SELF_REGISTER —
|
||||||
|
// that's the entire point of having an admin-create endpoint.
|
||||||
|
let h = common::harness_with_self_register_disabled(pool.clone());
|
||||||
|
// Bootstrap an admin out-of-band since self-register would refuse.
|
||||||
|
repo::user::bootstrap_admin(&pool, "root", "hunter2hunter2")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.clone()
|
||||||
|
.oneshot(common::post_json(
|
||||||
|
"/api/v1/auth/login",
|
||||||
|
json!({ "username": "root", "password": "hunter2hunter2" }),
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::OK);
|
||||||
|
let cookie = common::extract_session_cookie(&resp).unwrap();
|
||||||
|
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::post_json_with_cookie(
|
||||||
|
"/api/v1/admin/users",
|
||||||
|
json!({ "username": "invited01", "password": "freshpass1234" }),
|
||||||
|
&cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
resp.status(),
|
||||||
|
StatusCode::CREATED,
|
||||||
|
"admin must be able to mint users even with self-register off"
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -567,6 +567,166 @@ async fn user_a_cannot_delete_user_b_token(pool: PgPool) {
|
|||||||
assert_eq!(resp.status(), StatusCode::NO_CONTENT);
|
assert_eq!(resp.status(), StatusCode::NO_CONTENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Username enumeration via login response time: an attacker probes
|
||||||
|
/// for valid usernames by measuring how long /auth/login takes. Before
|
||||||
|
/// the equalisation fix, the no-user branch returned 401 in <1 ms
|
||||||
|
/// while the wrong-password branch took ~50-100 ms (the argon2 verify
|
||||||
|
/// cost). This test asserts the no-user branch now spends at least
|
||||||
|
/// some meaningful fraction of the wrong-password branch's time.
|
||||||
|
///
|
||||||
|
/// Tolerance is intentionally loose so CI variance doesn't flap the
|
||||||
|
/// test. The unequalised gap is large enough (~50x) that even a noisy
|
||||||
|
/// CI run with a 5x slack still catches it.
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn login_no_user_branch_runs_argon2_for_timing_equalisation(pool: PgPool) {
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
|
let h = common::harness(pool);
|
||||||
|
|
||||||
|
// Register the victim user so the wrong-password branch has a real
|
||||||
|
// argon2 hash to verify against.
|
||||||
|
let _ = h
|
||||||
|
.app
|
||||||
|
.clone()
|
||||||
|
.oneshot(common::post_json(
|
||||||
|
"/api/v1/auth/register",
|
||||||
|
json!({ "username": "victim", "password": "hunter2hunter2" }),
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Warm-up: first login of the process initialises the dummy hash
|
||||||
|
// lazily. Skip that cost when measuring.
|
||||||
|
let _ = h
|
||||||
|
.app
|
||||||
|
.clone()
|
||||||
|
.oneshot(common::post_json(
|
||||||
|
"/api/v1/auth/login",
|
||||||
|
json!({ "username": "victim", "password": "wrong" }),
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let _ = h
|
||||||
|
.app
|
||||||
|
.clone()
|
||||||
|
.oneshot(common::post_json(
|
||||||
|
"/api/v1/auth/login",
|
||||||
|
json!({ "username": "ghost", "password": "wrong" }),
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Median-of-N is more stable than a single sample.
|
||||||
|
async fn sample_min(
|
||||||
|
app: &axum::Router,
|
||||||
|
username: &str,
|
||||||
|
n: u32,
|
||||||
|
) -> std::time::Duration {
|
||||||
|
let mut samples = Vec::with_capacity(n as usize);
|
||||||
|
for _ in 0..n {
|
||||||
|
let req = common::post_json(
|
||||||
|
"/api/v1/auth/login",
|
||||||
|
json!({ "username": username, "password": "wrong-guess" }),
|
||||||
|
);
|
||||||
|
let t = Instant::now();
|
||||||
|
let resp = app.clone().oneshot(req).await.unwrap();
|
||||||
|
let d = t.elapsed();
|
||||||
|
assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
|
||||||
|
samples.push(d);
|
||||||
|
}
|
||||||
|
// Use the minimum: it's the floor that argon2 takes, robust
|
||||||
|
// against unrelated stalls (DB connection acquisition, etc.).
|
||||||
|
*samples.iter().min().unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
let wrong_pwd = sample_min(&h.app, "victim", 3).await;
|
||||||
|
let no_user = sample_min(&h.app, "ghost", 3).await;
|
||||||
|
|
||||||
|
// 5x slack: argon2 dominates both branches, so they should be
|
||||||
|
// within an order of magnitude. Unequalised, no_user would be
|
||||||
|
// ~50-100x faster. Asserting "no_user >= wrong_pwd / 5" catches
|
||||||
|
// the bug without being flaky in CI.
|
||||||
|
assert!(
|
||||||
|
no_user * 5 >= wrong_pwd,
|
||||||
|
"login timing leaks user existence: no_user={no_user:?}, wrong_pwd={wrong_pwd:?}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Brute-force / spray protection: at default production limits, a
|
||||||
|
/// tight loop of /auth/login attempts should burst through the bucket
|
||||||
|
/// and then 429 every subsequent request until the bucket refills.
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn login_rate_limited_under_burst_pressure(pool: PgPool) {
|
||||||
|
let h = common::harness_with_auth_rate_limit(pool, 1, 3);
|
||||||
|
|
||||||
|
// Register a victim so the wrong-password branch is real work.
|
||||||
|
let _ = h
|
||||||
|
.app
|
||||||
|
.clone()
|
||||||
|
.oneshot(common::post_json("/api/v1/auth/register", creds("victim")))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Register consumed one token from the burst-3 bucket. Fire 30
|
||||||
|
// wrong-password logins back-to-back; with per_sec=1 the refill
|
||||||
|
// is too slow to keep up and at least one must come back 429.
|
||||||
|
let mut saw_429 = false;
|
||||||
|
for _ in 0..30 {
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.clone()
|
||||||
|
.oneshot(common::post_json(
|
||||||
|
"/api/v1/auth/login",
|
||||||
|
json!({ "username": "victim", "password": "wrong" }),
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
if resp.status() == StatusCode::TOO_MANY_REQUESTS {
|
||||||
|
// RFC 6585 §4: 429 SHOULD include a Retry-After header. The
|
||||||
|
// value is in seconds; with per_sec=1 the bucket needs ~1s
|
||||||
|
// to refill, so the header should be 1 or 2.
|
||||||
|
let retry_after = resp
|
||||||
|
.headers()
|
||||||
|
.get(axum::http::header::RETRY_AFTER)
|
||||||
|
.and_then(|v| v.to_str().ok())
|
||||||
|
.and_then(|s| s.parse::<u32>().ok())
|
||||||
|
.expect("Retry-After header present and numeric");
|
||||||
|
assert!(
|
||||||
|
retry_after >= 1,
|
||||||
|
"Retry-After must be at least 1s, got {retry_after}"
|
||||||
|
);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
assert_eq!(body["error"]["code"], "too_many_requests");
|
||||||
|
saw_429 = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert!(
|
||||||
|
saw_429,
|
||||||
|
"expected at least one 429 within 30 rapid login attempts"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Default (test-harness) limits are disabled, so existing tests that
|
||||||
|
/// fire multiple auth requests don't start failing.
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn default_test_harness_does_not_rate_limit(pool: PgPool) {
|
||||||
|
let h = common::harness(pool);
|
||||||
|
for i in 0..50 {
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.clone()
|
||||||
|
.oneshot(common::post_json(
|
||||||
|
"/api/v1/auth/login",
|
||||||
|
json!({ "username": format!("nobody-{i}"), "password": "x" }),
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
// None of these should be 429 — only 401.
|
||||||
|
assert_eq!(resp.status(), StatusCode::UNAUTHORIZED, "iter {i}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[sqlx::test(migrations = "./migrations")]
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
async fn delete_unknown_token_is_404(pool: PgPool) {
|
async fn delete_unknown_token_is_404(pool: PgPool) {
|
||||||
let h = common::harness(pool);
|
let h = common::harness(pool);
|
||||||
@@ -581,3 +741,68 @@ async fn delete_unknown_token_is_404(pool: PgPool) {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Bot token names are user-supplied free-form strings; a 10 MB name
|
||||||
|
/// was accepted before. Cap at 64 chars to match the other free-form
|
||||||
|
/// identifier caps (tags, collection names). The response uses
|
||||||
|
/// `ValidationFailed` (422 with per-field details) so clients can
|
||||||
|
/// render the same shape they already handle for `attach_tag`.
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn create_token_rejects_name_over_64_chars(pool: PgPool) {
|
||||||
|
let h = common::harness(pool);
|
||||||
|
let (_, cookie) = common::register_user(&h.app).await;
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::post_json_with_cookie(
|
||||||
|
"/api/v1/auth/tokens",
|
||||||
|
json!({ "name": "x".repeat(65) }),
|
||||||
|
&cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::UNPROCESSABLE_ENTITY);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
assert_eq!(body["error"]["code"], "validation_failed");
|
||||||
|
assert!(body["error"]["details"]["name"].is_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- self-register toggle + /auth/config -----------------------------------
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn auth_config_reports_self_register_enabled_by_default(pool: PgPool) {
|
||||||
|
let h = common::harness(pool);
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::get("/api/v1/auth/config"))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::OK);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
assert_eq!(body["self_register_enabled"], true);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn auth_config_reflects_self_register_disabled(pool: PgPool) {
|
||||||
|
let h = common::harness_with_self_register_disabled(pool);
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::get("/api/v1/auth/config"))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::OK);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
assert_eq!(body["self_register_enabled"], false);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn register_returns_403_when_self_register_disabled(pool: PgPool) {
|
||||||
|
let h = common::harness_with_self_register_disabled(pool);
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::post_json("/api/v1/auth/register", creds("alice")))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
assert_eq!(body["error"]["code"], "forbidden");
|
||||||
|
}
|
||||||
|
|||||||
@@ -410,3 +410,53 @@ async fn delete_cover_404_on_unknown_id(pool: PgPool) {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Authz: PUT /mangas/:id/cover must be uploader-only.
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn put_cover_forbidden_for_non_uploader(pool: PgPool) {
|
||||||
|
let h = harness(pool);
|
||||||
|
let (_, owner_cookie) = register_user(&h.app).await;
|
||||||
|
let (_, intruder_cookie) = register_user(&h.app).await;
|
||||||
|
|
||||||
|
let manga =
|
||||||
|
create_manga_with_cover(&h.app, &owner_cookie, "Mine", None).await;
|
||||||
|
let id = id_of(&manga);
|
||||||
|
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(put_multipart_with_cookie(
|
||||||
|
&format!("/api/v1/mangas/{id}/cover"),
|
||||||
|
cover_form(&fake_png_bytes()),
|
||||||
|
&intruder_cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Authz: DELETE /mangas/:id/cover must be uploader-only.
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn delete_cover_forbidden_for_non_uploader(pool: PgPool) {
|
||||||
|
let h = harness(pool);
|
||||||
|
let (_, owner_cookie) = register_user(&h.app).await;
|
||||||
|
let (_, intruder_cookie) = register_user(&h.app).await;
|
||||||
|
|
||||||
|
let manga = create_manga_with_cover(
|
||||||
|
&h.app,
|
||||||
|
&owner_cookie,
|
||||||
|
"Mine",
|
||||||
|
Some(("image/jpeg", &fake_jpeg_bytes())),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let id = id_of(&manga);
|
||||||
|
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(delete_with_cookie(
|
||||||
|
&format!("/api/v1/mangas/{id}/cover"),
|
||||||
|
&intruder_cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
|
||||||
|
}
|
||||||
|
|||||||
@@ -566,3 +566,78 @@ async fn patch_requires_authentication(pool: PgPool) {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
|
assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A signed-in user who didn't upload the manga must not be able to
|
||||||
|
/// PATCH it. Without the uploader-gate this returned 200 — see
|
||||||
|
/// REVIEW.md "manga PATCH / cover endpoints don't check ownership".
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn patch_forbidden_for_non_uploader(pool: PgPool) {
|
||||||
|
let h = common::harness(pool);
|
||||||
|
let (_, owner_cookie) = common::register_user(&h.app).await;
|
||||||
|
let (_, intruder_cookie) = common::register_user(&h.app).await;
|
||||||
|
|
||||||
|
let created = create_manga(&h.app, &owner_cookie, json!({ "title": "Mine" })).await;
|
||||||
|
let id = id_of(&created);
|
||||||
|
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::patch_json_with_cookie(
|
||||||
|
&format!("/api/v1/mangas/{id}"),
|
||||||
|
json!({ "status": "completed" }),
|
||||||
|
&intruder_cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::FORBIDDEN);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Owner can still edit their own manga (regression guard for the
|
||||||
|
/// authz fix).
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn patch_allowed_for_uploader(pool: PgPool) {
|
||||||
|
let h = common::harness(pool);
|
||||||
|
let (_, cookie) = common::register_user(&h.app).await;
|
||||||
|
let created = create_manga(&h.app, &cookie, json!({ "title": "Owned" })).await;
|
||||||
|
let id = id_of(&created);
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::patch_json_with_cookie(
|
||||||
|
&format!("/api/v1/mangas/{id}"),
|
||||||
|
json!({ "status": "completed" }),
|
||||||
|
&cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Legacy rows with `uploaded_by IS NULL` (created before migration
|
||||||
|
/// 0011) remain editable by any signed-in user. Without this carve-out
|
||||||
|
/// the historical-data note in 0011 would be broken.
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn patch_allowed_on_legacy_null_uploader(pool: PgPool) {
|
||||||
|
let h = common::harness(pool.clone());
|
||||||
|
let (_, cookie) = common::register_user(&h.app).await;
|
||||||
|
let created = create_manga(&h.app, &cookie, json!({ "title": "Legacy" })).await;
|
||||||
|
let id = id_of(&created);
|
||||||
|
|
||||||
|
// Simulate a row uploaded before the column existed: clear
|
||||||
|
// uploaded_by directly via SQL.
|
||||||
|
sqlx::query("UPDATE mangas SET uploaded_by = NULL WHERE id = $1")
|
||||||
|
.bind(id)
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let (_, other_cookie) = common::register_user(&h.app).await;
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::patch_json_with_cookie(
|
||||||
|
&format!("/api/v1/mangas/{id}"),
|
||||||
|
json!({ "status": "completed" }),
|
||||||
|
&other_cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::OK);
|
||||||
|
}
|
||||||
|
|||||||
@@ -59,6 +59,31 @@ async fn reattach_same_tag_is_idempotent_and_returns_200(pool: PgPool) {
|
|||||||
assert_eq!(second.status(), StatusCode::OK);
|
assert_eq!(second.status(), StatusCode::OK);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Tag names over 64 chars are rejected at the handler boundary. The
|
||||||
|
/// repo enforces the same cap, but doing it at the handler keeps the
|
||||||
|
/// envelope consistent with the other validation paths
|
||||||
|
/// (username, collection name, etc.).
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn attach_rejects_tag_name_over_64_chars(pool: PgPool) {
|
||||||
|
let h = common::harness(pool);
|
||||||
|
let (_, cookie) = common::register_user(&h.app).await;
|
||||||
|
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "Berserk").await;
|
||||||
|
|
||||||
|
let long_name: String = "x".repeat(65);
|
||||||
|
let resp = h
|
||||||
|
.app
|
||||||
|
.oneshot(common::post_json_with_cookie(
|
||||||
|
&format!("/api/v1/mangas/{manga_id}/tags"),
|
||||||
|
json!({ "name": long_name }),
|
||||||
|
&cookie,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(resp.status(), StatusCode::UNPROCESSABLE_ENTITY);
|
||||||
|
let body = common::body_json(resp).await;
|
||||||
|
assert_eq!(body["error"]["code"], "validation_failed");
|
||||||
|
}
|
||||||
|
|
||||||
#[sqlx::test(migrations = "./migrations")]
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
async fn tag_names_dedup_case_insensitively(pool: PgPool) {
|
async fn tag_names_dedup_case_insensitively(pool: PgPool) {
|
||||||
let h = common::harness(pool);
|
let h = common::harness(pool);
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ use tempfile::TempDir;
|
|||||||
use tower::ServiceExt;
|
use tower::ServiceExt;
|
||||||
|
|
||||||
use mangalord::app::{router, AppState};
|
use mangalord::app::{router, AppState};
|
||||||
|
use mangalord::auth::rate_limit::AuthRateLimiter;
|
||||||
use mangalord::config::{AuthConfig, UploadConfig};
|
use mangalord::config::{AuthConfig, UploadConfig};
|
||||||
use mangalord::storage::{LocalStorage, Storage, StorageError, StreamingFile};
|
use mangalord::storage::{LocalStorage, Storage, StorageError, StreamingFile};
|
||||||
|
|
||||||
@@ -49,20 +50,65 @@ fn harness_inner(
|
|||||||
storage: Arc<dyn Storage>,
|
storage: Arc<dyn Storage>,
|
||||||
storage_dir: TempDir,
|
storage_dir: TempDir,
|
||||||
) -> Harness {
|
) -> Harness {
|
||||||
|
harness_with_auth_config(pool, storage, storage_dir, AuthConfig {
|
||||||
|
cookie_secure: false,
|
||||||
|
..AuthConfig::default()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn harness_with_auth_config(
|
||||||
|
pool: PgPool,
|
||||||
|
storage: Arc<dyn Storage>,
|
||||||
|
storage_dir: TempDir,
|
||||||
|
auth: AuthConfig,
|
||||||
|
) -> Harness {
|
||||||
|
let auth_limiter = Arc::new(AuthRateLimiter::new(auth.rate_limit));
|
||||||
let state = AppState {
|
let state = AppState {
|
||||||
db: pool,
|
db: pool,
|
||||||
storage,
|
storage,
|
||||||
auth: AuthConfig { cookie_secure: false, ..AuthConfig::default() },
|
auth,
|
||||||
upload: UploadConfig {
|
upload: UploadConfig {
|
||||||
// Keep file caps small in tests so the size-cap path is cheap to
|
// Keep file caps small in tests so the size-cap path is cheap to
|
||||||
// exercise without producing tens of MBs of bytes.
|
// exercise without producing tens of MBs of bytes.
|
||||||
max_request_bytes: 4 * 1024 * 1024,
|
max_request_bytes: 4 * 1024 * 1024,
|
||||||
max_file_bytes: 256 * 1024,
|
max_file_bytes: 256 * 1024,
|
||||||
},
|
},
|
||||||
|
auth_limiter,
|
||||||
};
|
};
|
||||||
Harness { app: router(state), _storage_dir: storage_dir }
|
Harness { app: router(state), _storage_dir: storage_dir }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Like [`harness`] but flips `ALLOW_SELF_REGISTER` off so the
|
||||||
|
/// register-disabled test exercises the 403 branch in
|
||||||
|
/// `api::auth::register`.
|
||||||
|
pub fn harness_with_self_register_disabled(pool: PgPool) -> Harness {
|
||||||
|
let storage_dir = tempfile::tempdir().expect("tempdir");
|
||||||
|
let storage = Arc::new(LocalStorage::new(storage_dir.path()));
|
||||||
|
let auth = AuthConfig {
|
||||||
|
cookie_secure: false,
|
||||||
|
allow_self_register: false,
|
||||||
|
..AuthConfig::default()
|
||||||
|
};
|
||||||
|
harness_with_auth_config(pool, storage, storage_dir, auth)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Like [`harness`] but configures a tight auth rate limit. Used by
|
||||||
|
/// the brute-force-rate-limiting test.
|
||||||
|
pub fn harness_with_auth_rate_limit(
|
||||||
|
pool: PgPool,
|
||||||
|
per_sec: u32,
|
||||||
|
burst: u32,
|
||||||
|
) -> Harness {
|
||||||
|
let storage_dir = tempfile::tempdir().expect("tempdir");
|
||||||
|
let storage = Arc::new(LocalStorage::new(storage_dir.path()));
|
||||||
|
let auth = AuthConfig {
|
||||||
|
cookie_secure: false,
|
||||||
|
rate_limit: mangalord::auth::rate_limit::RateLimitConfig { per_sec, burst },
|
||||||
|
..AuthConfig::default()
|
||||||
|
};
|
||||||
|
harness_with_auth_config(pool, storage, storage_dir, auth)
|
||||||
|
}
|
||||||
|
|
||||||
/// Wraps a real `Storage` and fails on the N-th `put` call so tests can
|
/// Wraps a real `Storage` and fails on the N-th `put` call so tests can
|
||||||
/// assert that handlers roll their DB writes back when storage errors
|
/// assert that handlers roll their DB writes back when storage errors
|
||||||
/// mid-upload. Reads and other operations delegate to `inner`.
|
/// mid-upload. Reads and other operations delegate to `inner`.
|
||||||
|
|||||||
@@ -10,6 +10,11 @@
|
|||||||
//!
|
//!
|
||||||
//! Override the cache location with `CRAWLER_CHROMIUM_DIR=/some/path` if
|
//! Override the cache location with `CRAWLER_CHROMIUM_DIR=/some/path` if
|
||||||
//! `$HOME/.cache/mangalord/chromium` isn't writable.
|
//! `$HOME/.cache/mangalord/chromium` isn't writable.
|
||||||
|
//!
|
||||||
|
//! Set `CRAWLER_CHROMIUM_BINARY=/usr/bin/chromium-headless-shell` (or
|
||||||
|
//! another system chromium path) to exercise the system-chromium
|
||||||
|
//! launch path instead of the fetcher download — this is the path the
|
||||||
|
//! Raspberry Pi deployment takes.
|
||||||
|
|
||||||
use mangalord::crawler::browser::{self, LaunchOptions};
|
use mangalord::crawler::browser::{self, LaunchOptions};
|
||||||
|
|
||||||
|
|||||||
@@ -370,3 +370,150 @@ async fn enqueue_bookmarked_pending_skips_dropped_sources(pool: PgPool) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn enqueue_bookmarked_pending_skips_recently_dead_chapters(pool: PgPool) {
|
||||||
|
// Setup: a chapter whose last SyncChapterContent job died yesterday.
|
||||||
|
// The cron tick must not re-enqueue — without the quarantine, the
|
||||||
|
// chapter would spin: re-enqueue → max_attempts retries → dies again
|
||||||
|
// → re-enqueue next tick → forever.
|
||||||
|
let user_id: Uuid = sqlx::query_scalar(
|
||||||
|
"INSERT INTO users (username, password_hash) VALUES ($1, $2) RETURNING id",
|
||||||
|
)
|
||||||
|
.bind("alice")
|
||||||
|
.bind("not-a-real-hash")
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let manga_id: Uuid =
|
||||||
|
sqlx::query_scalar("INSERT INTO mangas (title) VALUES ($1) RETURNING id")
|
||||||
|
.bind("Test")
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
sqlx::query(
|
||||||
|
"INSERT INTO sources (id, name, base_url) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING",
|
||||||
|
)
|
||||||
|
.bind("target")
|
||||||
|
.bind("Target")
|
||||||
|
.bind("https://example.com")
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let chapter_id: Uuid = sqlx::query_scalar(
|
||||||
|
"INSERT INTO chapters (manga_id, number, page_count) VALUES ($1, 1, 0) RETURNING id",
|
||||||
|
)
|
||||||
|
.bind(manga_id)
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
sqlx::query(
|
||||||
|
"INSERT INTO chapter_sources (source_id, source_chapter_key, chapter_id, source_url) \
|
||||||
|
VALUES ($1, $2, $3, $4)",
|
||||||
|
)
|
||||||
|
.bind("target")
|
||||||
|
.bind("ch1")
|
||||||
|
.bind(chapter_id)
|
||||||
|
.bind("https://example.com/ch1")
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
sqlx::query("INSERT INTO bookmarks (user_id, manga_id) VALUES ($1, $2)")
|
||||||
|
.bind(user_id)
|
||||||
|
.bind(manga_id)
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
// The dead job from the prior tick, updated 1 day ago (well inside the
|
||||||
|
// 7-day quarantine window).
|
||||||
|
sqlx::query(
|
||||||
|
"INSERT INTO crawler_jobs (payload, state, updated_at) \
|
||||||
|
VALUES ($1::jsonb, 'dead', now() - interval '1 day')",
|
||||||
|
)
|
||||||
|
.bind(serde_json::json!({
|
||||||
|
"kind": "sync_chapter_content",
|
||||||
|
"source_id": "target",
|
||||||
|
"chapter_id": chapter_id.to_string(),
|
||||||
|
"source_chapter_key": "ch1",
|
||||||
|
}))
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let summary = pipeline::enqueue_bookmarked_pending(&pool).await.unwrap();
|
||||||
|
assert_eq!(summary.inserted, 0, "recently dead chapter is quarantined");
|
||||||
|
assert_eq!(summary.skipped, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn enqueue_bookmarked_pending_resumes_after_quarantine_expires(pool: PgPool) {
|
||||||
|
// Same setup as above but the dead job is 10 days old — past the
|
||||||
|
// 7-day quarantine. The chapter should be re-enqueued so a once-failed
|
||||||
|
// chapter eventually gets a second shot at success.
|
||||||
|
let user_id: Uuid = sqlx::query_scalar(
|
||||||
|
"INSERT INTO users (username, password_hash) VALUES ($1, $2) RETURNING id",
|
||||||
|
)
|
||||||
|
.bind("alice")
|
||||||
|
.bind("not-a-real-hash")
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let manga_id: Uuid =
|
||||||
|
sqlx::query_scalar("INSERT INTO mangas (title) VALUES ($1) RETURNING id")
|
||||||
|
.bind("Test")
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
sqlx::query(
|
||||||
|
"INSERT INTO sources (id, name, base_url) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING",
|
||||||
|
)
|
||||||
|
.bind("target")
|
||||||
|
.bind("Target")
|
||||||
|
.bind("https://example.com")
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let chapter_id: Uuid = sqlx::query_scalar(
|
||||||
|
"INSERT INTO chapters (manga_id, number, page_count) VALUES ($1, 1, 0) RETURNING id",
|
||||||
|
)
|
||||||
|
.bind(manga_id)
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
sqlx::query(
|
||||||
|
"INSERT INTO chapter_sources (source_id, source_chapter_key, chapter_id, source_url) \
|
||||||
|
VALUES ($1, $2, $3, $4)",
|
||||||
|
)
|
||||||
|
.bind("target")
|
||||||
|
.bind("ch1")
|
||||||
|
.bind(chapter_id)
|
||||||
|
.bind("https://example.com/ch1")
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
sqlx::query("INSERT INTO bookmarks (user_id, manga_id) VALUES ($1, $2)")
|
||||||
|
.bind(user_id)
|
||||||
|
.bind(manga_id)
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
sqlx::query(
|
||||||
|
"INSERT INTO crawler_jobs (payload, state, updated_at) \
|
||||||
|
VALUES ($1::jsonb, 'dead', now() - interval '10 days')",
|
||||||
|
)
|
||||||
|
.bind(serde_json::json!({
|
||||||
|
"kind": "sync_chapter_content",
|
||||||
|
"source_id": "target",
|
||||||
|
"chapter_id": chapter_id.to_string(),
|
||||||
|
"source_chapter_key": "ch1",
|
||||||
|
}))
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let summary = pipeline::enqueue_bookmarked_pending(&pool).await.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
summary.inserted, 1,
|
||||||
|
"dead chapter is re-enqueued after quarantine expires"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,85 +0,0 @@
|
|||||||
//! Integration tests for the incremental-mode coordination state:
|
|
||||||
//! `mark_seed_completed` / `seed_completed_at` round-trip via the
|
|
||||||
//! `crawler_state` table.
|
|
||||||
//!
|
|
||||||
//! End-to-end pipeline behavior (walker + stop-on-Unchanged) requires
|
|
||||||
//! a real `chromiumoxide::Browser` to construct a `FetchContext`, so
|
|
||||||
//! the live integration of that path is covered by
|
|
||||||
//! `crawler_browser_smoke.rs` instead. The pure stop logic itself is
|
|
||||||
//! unit-tested in `crawler::pipeline::tests`.
|
|
||||||
|
|
||||||
use chrono::Utc;
|
|
||||||
use mangalord::repo::crawler;
|
|
||||||
use sqlx::PgPool;
|
|
||||||
|
|
||||||
#[sqlx::test(migrations = "./migrations")]
|
|
||||||
async fn seed_completed_at_none_before_any_run(pool: PgPool) {
|
|
||||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
let res = crawler::seed_completed_at(&pool, "target").await.unwrap();
|
|
||||||
assert!(res.is_none(), "fresh source has no seed marker");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[sqlx::test(migrations = "./migrations")]
|
|
||||||
async fn mark_seed_completed_then_read_round_trips_timestamp(pool: PgPool) {
|
|
||||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
let at = Utc::now();
|
|
||||||
crawler::mark_seed_completed(&pool, "target", at)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
let read = crawler::seed_completed_at(&pool, "target")
|
|
||||||
.await
|
|
||||||
.unwrap()
|
|
||||||
.expect("marker present after mark");
|
|
||||||
// RFC3339 round-trip is millisecond-precise on chrono::Utc; allow a
|
|
||||||
// 1ms tolerance to absorb postgres jsonb whitespace canonicalization.
|
|
||||||
let drift = (read - at).num_milliseconds().abs();
|
|
||||||
assert!(drift <= 1, "round-trip drift: {drift}ms (at={at}, read={read})");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[sqlx::test(migrations = "./migrations")]
|
|
||||||
async fn mark_seed_completed_overwrites_previous_value(pool: PgPool) {
|
|
||||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
let first = Utc::now() - chrono::Duration::hours(1);
|
|
||||||
let second = Utc::now();
|
|
||||||
crawler::mark_seed_completed(&pool, "target", first)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
crawler::mark_seed_completed(&pool, "target", second)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
let read = crawler::seed_completed_at(&pool, "target")
|
|
||||||
.await
|
|
||||||
.unwrap()
|
|
||||||
.expect("marker present");
|
|
||||||
let drift = (read - second).num_milliseconds().abs();
|
|
||||||
assert!(drift <= 1, "should reflect the latest mark, not the first");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[sqlx::test(migrations = "./migrations")]
|
|
||||||
async fn seed_completed_is_per_source(pool: PgPool) {
|
|
||||||
// Two sources, only one is marked complete. The other must still
|
|
||||||
// report None — the key is namespaced by source_id.
|
|
||||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
crawler::ensure_source(&pool, "other", "O", "https://y.example")
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
crawler::mark_seed_completed(&pool, "target", Utc::now())
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert!(crawler::seed_completed_at(&pool, "target")
|
|
||||||
.await
|
|
||||||
.unwrap()
|
|
||||||
.is_some());
|
|
||||||
assert!(crawler::seed_completed_at(&pool, "other")
|
|
||||||
.await
|
|
||||||
.unwrap()
|
|
||||||
.is_none());
|
|
||||||
}
|
|
||||||
@@ -9,7 +9,6 @@ use std::time::Duration;
|
|||||||
use mangalord::crawler::jobs::{
|
use mangalord::crawler::jobs::{
|
||||||
self, EnqueueResult, JobPayload, KIND_SYNC_CHAPTER_CONTENT,
|
self, EnqueueResult, JobPayload, KIND_SYNC_CHAPTER_CONTENT,
|
||||||
};
|
};
|
||||||
use mangalord::crawler::source::DiscoverMode;
|
|
||||||
use sqlx::PgPool;
|
use sqlx::PgPool;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
@@ -21,10 +20,13 @@ fn chapter_content_payload(chapter_id: Uuid) -> JobPayload {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn discover_payload() -> JobPayload {
|
/// A non-`SyncChapterContent` payload, used to assert that only the
|
||||||
JobPayload::Discover {
|
/// chapter-content kind is deduplicated by the partial index and that
|
||||||
|
/// `lease`'s kind filter correctly excludes other kinds.
|
||||||
|
fn sync_manga_payload(key: &str) -> JobPayload {
|
||||||
|
JobPayload::SyncManga {
|
||||||
source_id: "target".into(),
|
source_id: "target".into(),
|
||||||
mode: DiscoverMode::Backfill,
|
source_manga_key: key.into(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -141,7 +143,7 @@ async fn different_chapter_ids_can_coexist(pool: PgPool) {
|
|||||||
|
|
||||||
#[sqlx::test(migrations = "./migrations")]
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
async fn non_chapter_content_payloads_are_never_deduped(pool: PgPool) {
|
async fn non_chapter_content_payloads_are_never_deduped(pool: PgPool) {
|
||||||
let p = discover_payload();
|
let p = sync_manga_payload("foo");
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
jobs::enqueue(&pool, &p).await.unwrap(),
|
jobs::enqueue(&pool, &p).await.unwrap(),
|
||||||
EnqueueResult::Inserted(_)
|
EnqueueResult::Inserted(_)
|
||||||
@@ -185,7 +187,10 @@ async fn lease_marks_running_and_bumps_attempts_and_sets_leased_until(pool: PgPo
|
|||||||
|
|
||||||
#[sqlx::test(migrations = "./migrations")]
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
async fn lease_with_kind_filter_only_matches_that_kind(pool: PgPool) {
|
async fn lease_with_kind_filter_only_matches_that_kind(pool: PgPool) {
|
||||||
let discover_id = match jobs::enqueue(&pool, &discover_payload()).await.unwrap() {
|
let manga_id = match jobs::enqueue(&pool, &sync_manga_payload("foo"))
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
{
|
||||||
EnqueueResult::Inserted(id) => id,
|
EnqueueResult::Inserted(id) => id,
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
@@ -207,8 +212,8 @@ async fn lease_with_kind_filter_only_matches_that_kind(pool: PgPool) {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(leases.len(), 1, "only chapter content payload leases");
|
assert_eq!(leases.len(), 1, "only chapter content payload leases");
|
||||||
assert_eq!(leases[0].id, chapter_id);
|
assert_eq!(leases[0].id, chapter_id);
|
||||||
// discover is still pending
|
// sync_manga is still pending
|
||||||
assert_eq!(job_state(&pool, discover_id).await, "pending");
|
assert_eq!(job_state(&pool, manga_id).await, "pending");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[sqlx::test(migrations = "./migrations")]
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
@@ -355,6 +360,112 @@ async fn ack_failed_at_max_marks_dead(pool: PgPool) {
|
|||||||
assert_eq!(last_error.as_deref(), Some("final boom"));
|
assert_eq!(last_error.as_deref(), Some("final boom"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn ack_done_no_ops_when_lease_was_stolen(pool: PgPool) {
|
||||||
|
// Worker A's lease expires, worker B re-leases the job (state stays
|
||||||
|
// 'running' but attempts++ and leased_until refreshed). A late
|
||||||
|
// ack_done from worker A must not clobber B's progress.
|
||||||
|
let id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
{
|
||||||
|
EnqueueResult::Inserted(id) => id,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
// Worker A grabs the lease, but its lease expires immediately.
|
||||||
|
let _a_leases = jobs::lease(&pool, None, 1, Duration::from_secs(60))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
sqlx::query("UPDATE crawler_jobs SET leased_until = now() - interval '1 minute' WHERE id = $1")
|
||||||
|
.bind(id)
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
// Worker B re-leases the expired-but-still-running job.
|
||||||
|
let b_leases = jobs::lease(&pool, None, 1, Duration::from_secs(60))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(b_leases.len(), 1);
|
||||||
|
assert_eq!(b_leases[0].attempts, 2, "re-lease bumps attempts");
|
||||||
|
|
||||||
|
// Worker A's late ack_done — guarded by `state = 'running'` + lease_id
|
||||||
|
// but in the simplest implementation the guard is state-only. Either
|
||||||
|
// way, the job stays 'running' with worker B's progress intact.
|
||||||
|
jobs::ack_done(&pool, id).await.unwrap();
|
||||||
|
// Worker B is still working; until B acks, the job remains 'running'
|
||||||
|
// with its leased_until in the future and attempts == 2.
|
||||||
|
// (We can't make ack_done's lease_id distinguish A from B today —
|
||||||
|
// both share the same `id` — so the strongest current guarantee is
|
||||||
|
// that a late ack_done doesn't fire when state is already 'done',
|
||||||
|
// exercised below.)
|
||||||
|
// Finalize: worker B acks done.
|
||||||
|
jobs::ack_done(&pool, b_leases[0].id).await.unwrap();
|
||||||
|
assert_eq!(job_state(&pool, id).await, "done");
|
||||||
|
assert_eq!(job_attempts(&pool, id).await, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn ack_failed_no_ops_when_state_is_not_running(pool: PgPool) {
|
||||||
|
// After a job transitions to 'done', a stale ack_failed (e.g. a
|
||||||
|
// worker that finished work and queued its ack but then handed off
|
||||||
|
// before the SQL ran) must not flip the state back to 'pending' or
|
||||||
|
// 'dead'. The `state = 'running'` predicate enforces this.
|
||||||
|
let id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
{
|
||||||
|
EnqueueResult::Inserted(id) => id,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
let leases = jobs::lease(&pool, None, 1, Duration::from_secs(60))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
jobs::ack_done(&pool, leases[0].id).await.unwrap();
|
||||||
|
assert_eq!(job_state(&pool, id).await, "done");
|
||||||
|
|
||||||
|
// Late ack_failed arrives. Must be a no-op.
|
||||||
|
jobs::ack_failed(&pool, leases[0].id, "late", 1, 5)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
job_state(&pool, id).await,
|
||||||
|
"done",
|
||||||
|
"late ack_failed must not resurrect a done job"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn release_no_ops_when_state_is_not_running(pool: PgPool) {
|
||||||
|
// Mirror of ack_failed_no_ops_when_state_is_not_running. release also
|
||||||
|
// decrements `attempts`, which would corrupt a re-leased job's
|
||||||
|
// attempt count if the guard were missing.
|
||||||
|
let id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
{
|
||||||
|
EnqueueResult::Inserted(id) => id,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
let leases = jobs::lease(&pool, None, 1, Duration::from_secs(60))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
jobs::ack_done(&pool, leases[0].id).await.unwrap();
|
||||||
|
let attempts_before = job_attempts(&pool, id).await;
|
||||||
|
|
||||||
|
// Late release arrives.
|
||||||
|
jobs::release(&pool, leases[0].id).await.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
job_state(&pool, id).await,
|
||||||
|
"done",
|
||||||
|
"late release must not flip a done job back to pending"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
job_attempts(&pool, id).await,
|
||||||
|
attempts_before,
|
||||||
|
"late release must not decrement attempts of a non-running job"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[sqlx::test(migrations = "./migrations")]
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
async fn release_returns_to_pending_and_undoes_attempt_increment(pool: PgPool) {
|
async fn release_returns_to_pending_and_undoes_attempt_increment(pool: PgPool) {
|
||||||
let id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
let id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||||
|
|||||||
82
backend/tests/crawler_recovery_flag.rs
Normal file
82
backend/tests/crawler_recovery_flag.rs
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
//! Integration tests for the per-source recovery flag:
|
||||||
|
//! `mark_run_started` / `mark_run_completed` / `last_run_completed_cleanly`
|
||||||
|
//! round-trip via the `crawler_state` table.
|
||||||
|
//!
|
||||||
|
//! End-to-end pipeline behavior (a crashed run forcing a recovery sweep
|
||||||
|
//! on the next tick) requires a real `chromiumoxide::Browser` to drive
|
||||||
|
//! the walker, so that path is covered by `crawler_browser_smoke.rs`.
|
||||||
|
//! The pure stop-condition logic itself is unit-tested in
|
||||||
|
//! `crawler::pipeline::tests`.
|
||||||
|
|
||||||
|
use mangalord::repo::crawler;
|
||||||
|
use sqlx::PgPool;
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn defaults_to_clean_when_no_marker(pool: PgPool) {
|
||||||
|
// First-ever run semantics: absence of the key must NOT trigger a
|
||||||
|
// recovery walk on a virgin DB. Treat missing as "previous run
|
||||||
|
// completed cleanly" so the first tick can take the early-stop path.
|
||||||
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let clean = crawler::last_run_completed_cleanly(&pool, "target")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert!(clean, "absent marker must read as clean");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn mark_run_started_flips_to_false(pool: PgPool) {
|
||||||
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
crawler::mark_run_started(&pool, "target").await.unwrap();
|
||||||
|
let clean = crawler::last_run_completed_cleanly(&pool, "target")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert!(!clean, "after mark_run_started, flag must read false");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn started_then_completed_round_trips_to_clean(pool: PgPool) {
|
||||||
|
// Steady-state: a run starts (flag → false) and exits cleanly
|
||||||
|
// (flag → true). The next tick should see "clean" and apply the
|
||||||
|
// normal stop condition.
|
||||||
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
crawler::mark_run_started(&pool, "target").await.unwrap();
|
||||||
|
crawler::mark_run_completed(&pool, "target").await.unwrap();
|
||||||
|
let clean = crawler::last_run_completed_cleanly(&pool, "target")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert!(
|
||||||
|
clean,
|
||||||
|
"after start → complete the flag must round-trip to clean"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn flag_is_per_source(pool: PgPool) {
|
||||||
|
// Two sources, only one is mid-run. The other must still report
|
||||||
|
// clean — the crawler_state key is namespaced by source_id.
|
||||||
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
crawler::ensure_source(&pool, "other", "O", "https://y.example")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
crawler::mark_run_started(&pool, "target").await.unwrap();
|
||||||
|
assert!(
|
||||||
|
!crawler::last_run_completed_cleanly(&pool, "target")
|
||||||
|
.await
|
||||||
|
.unwrap(),
|
||||||
|
"target is mid-run"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
crawler::last_run_completed_cleanly(&pool, "other")
|
||||||
|
.await
|
||||||
|
.unwrap(),
|
||||||
|
"other source is untouched and reads clean"
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -232,6 +232,67 @@ async fn sync_chapters_adds_new_refreshes_existing_and_drops_vanished(pool: PgPo
|
|||||||
assert!(dropped.0.is_some(), "ch2 should be soft-dropped");
|
assert!(dropped.0.is_some(), "ch2 should be soft-dropped");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn live_chapter_count_returns_zero_for_unknown_source_key(pool: PgPool) {
|
||||||
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
// No manga_sources row yet → unknown key path. Must not error and
|
||||||
|
// must report zero so the partial-render guard accepts the
|
||||||
|
// "brand-new manga with no chapters" case as legitimate.
|
||||||
|
let n = crawler::live_chapter_count_for_source_manga(&pool, "target", "nobody")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(n, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn live_chapter_count_only_counts_live_sources(pool: PgPool) {
|
||||||
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
||||||
|
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let chapters = vec![
|
||||||
|
SourceChapterRef {
|
||||||
|
source_chapter_key: "1".into(),
|
||||||
|
number: 1,
|
||||||
|
title: Some("Ch.1".into()),
|
||||||
|
url: "https://x.example/foo/1".into(),
|
||||||
|
},
|
||||||
|
SourceChapterRef {
|
||||||
|
source_chapter_key: "2".into(),
|
||||||
|
number: 2,
|
||||||
|
title: Some("Ch.2".into()),
|
||||||
|
url: "https://x.example/foo/2".into(),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
crawler::sync_manga_chapters(&pool, "target", up.manga_id, &chapters)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
crawler::live_chapter_count_for_source_manga(&pool, "target", "foo")
|
||||||
|
.await
|
||||||
|
.unwrap(),
|
||||||
|
2
|
||||||
|
);
|
||||||
|
// Soft-drop one source row — count drops by one, the row stays.
|
||||||
|
sqlx::query(
|
||||||
|
"UPDATE chapter_sources SET dropped_at = NOW() WHERE source_chapter_key = '2'",
|
||||||
|
)
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
crawler::live_chapter_count_for_source_manga(&pool, "target", "foo")
|
||||||
|
.await
|
||||||
|
.unwrap(),
|
||||||
|
1
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/// Real-world sources publish multiple chapters at the same number
|
/// Real-world sources publish multiple chapters at the same number
|
||||||
/// (different uploaders, translator notes, re-releases). After the
|
/// (different uploaders, translator notes, re-releases). After the
|
||||||
/// (manga_id, number) UNIQUE drop in 0013, each `SourceChapterRef`
|
/// (manga_id, number) UNIQUE drop in 0013, each `SourceChapterRef`
|
||||||
@@ -309,59 +370,223 @@ async fn sync_chapters_keeps_duplicate_numbered_chapters_as_separate_rows(pool:
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[sqlx::test(migrations = "./migrations")]
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
async fn mark_dropped_mangas_only_drops_unseen(pool: PgPool) {
|
async fn sync_chapters_isolates_colliding_keys_across_mangas(pool: PgPool) {
|
||||||
|
// Two mangas, both with a chapter whose source_chapter_key is
|
||||||
|
// "chapter-1". Pre-migration-0017 the PK enforced (source_id,
|
||||||
|
// source_chapter_key) globally and the lookup didn't filter by
|
||||||
|
// manga_id, so the second manga's sync would adopt the first manga's
|
||||||
|
// chapter_id (silent attribution corruption). After 0017 each manga
|
||||||
|
// owns its own row.
|
||||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
// Seed two mangas before "now" so a later run_started_at sees them as stale.
|
let m1 = sample_manga("foo", "Manga Foo", "hash-foo");
|
||||||
let _ = crawler::upsert_manga_from_source(
|
let m2 = sample_manga("bar", "Manga Bar", "hash-bar");
|
||||||
&pool,
|
let up1 = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m1)
|
||||||
"target",
|
|
||||||
"https://x.example/foo",
|
|
||||||
&sample_manga("foo", "Foo", "hf"),
|
|
||||||
)
|
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let _ = crawler::upsert_manga_from_source(
|
let up2 = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/bar", &m2)
|
||||||
&pool,
|
|
||||||
"target",
|
|
||||||
"https://x.example/bar",
|
|
||||||
&sample_manga("bar", "Bar", "hb"),
|
|
||||||
)
|
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
assert_ne!(up1.manga_id, up2.manga_id);
|
||||||
|
|
||||||
// Now mark a new "run" beginning. Re-upsert only `foo` — `bar`
|
let shared = vec![SourceChapterRef {
|
||||||
// should be the one flagged dropped.
|
source_chapter_key: "chapter-1".into(),
|
||||||
let run_started = chrono::Utc::now();
|
number: 1,
|
||||||
// Sleep briefly so the second upsert's NOW() > run_started_at.
|
title: Some("Ch.1".into()),
|
||||||
tokio::time::sleep(std::time::Duration::from_millis(20)).await;
|
url: "https://x.example/foo/chapter-1/".into(),
|
||||||
let _ = crawler::upsert_manga_from_source(
|
}];
|
||||||
&pool,
|
let diff1 = crawler::sync_manga_chapters(&pool, "target", up1.manga_id, &shared)
|
||||||
"target",
|
.await
|
||||||
"https://x.example/foo",
|
.unwrap();
|
||||||
&sample_manga("foo", "Foo", "hf"),
|
assert_eq!(diff1.new, 1, "manga foo: chapter inserted fresh");
|
||||||
|
|
||||||
|
// Manga bar now syncs *the same key*. Under the old schema this would
|
||||||
|
// either fail on PK conflict or attribute the chapter to foo. Under
|
||||||
|
// the new schema bar gets its own chapter row.
|
||||||
|
let bar_chapters = vec![SourceChapterRef {
|
||||||
|
source_chapter_key: "chapter-1".into(),
|
||||||
|
number: 1,
|
||||||
|
title: Some("Ch.1 (bar)".into()),
|
||||||
|
url: "https://x.example/bar/chapter-1/".into(),
|
||||||
|
}];
|
||||||
|
let diff2 = crawler::sync_manga_chapters(&pool, "target", up2.manga_id, &bar_chapters)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
diff2.new, 1,
|
||||||
|
"manga bar: same key resolved per-manga to a fresh row"
|
||||||
|
);
|
||||||
|
|
||||||
|
let foo_count: (i64,) = sqlx::query_as(
|
||||||
|
"SELECT COUNT(*) FROM chapters WHERE manga_id = $1",
|
||||||
)
|
)
|
||||||
.await
|
.bind(up1.manga_id)
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let n = crawler::mark_dropped_mangas(&pool, "target", run_started)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(n, 1, "only bar should have been dropped");
|
|
||||||
|
|
||||||
let foo_dropped: (Option<chrono::DateTime<chrono::Utc>>,) =
|
|
||||||
sqlx::query_as("SELECT dropped_at FROM manga_sources WHERE source_manga_key = 'foo'")
|
|
||||||
.fetch_one(&pool)
|
.fetch_one(&pool)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert!(foo_dropped.0.is_none(), "foo seen this run, must not be dropped");
|
let bar_count: (i64,) = sqlx::query_as(
|
||||||
let bar_dropped: (Option<chrono::DateTime<chrono::Utc>>,) =
|
"SELECT COUNT(*) FROM chapters WHERE manga_id = $1",
|
||||||
sqlx::query_as("SELECT dropped_at FROM manga_sources WHERE source_manga_key = 'bar'")
|
)
|
||||||
|
.bind(up2.manga_id)
|
||||||
.fetch_one(&pool)
|
.fetch_one(&pool)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert!(bar_dropped.0.is_some());
|
assert_eq!(foo_count.0, 1);
|
||||||
|
assert_eq!(bar_count.0, 1);
|
||||||
|
|
||||||
|
let bar_title: (Option<String>,) = sqlx::query_as(
|
||||||
|
"SELECT title FROM chapters WHERE manga_id = $1 AND number = 1",
|
||||||
|
)
|
||||||
|
.bind(up2.manga_id)
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
bar_title.0.as_deref(),
|
||||||
|
Some("Ch.1 (bar)"),
|
||||||
|
"bar's chapter has bar's title, not foo's"
|
||||||
|
);
|
||||||
|
|
||||||
|
// A subsequent re-sync of foo with the same key correctly refreshes
|
||||||
|
// foo's row, not bar's.
|
||||||
|
let foo_resync = vec![SourceChapterRef {
|
||||||
|
source_chapter_key: "chapter-1".into(),
|
||||||
|
number: 1,
|
||||||
|
title: Some("Ch.1 (foo updated)".into()),
|
||||||
|
url: "https://x.example/foo/chapter-1/".into(),
|
||||||
|
}];
|
||||||
|
let diff_refresh = crawler::sync_manga_chapters(&pool, "target", up1.manga_id, &foo_resync)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(diff_refresh.refreshed, 1);
|
||||||
|
assert_eq!(diff_refresh.new, 0);
|
||||||
|
|
||||||
|
let foo_title: (Option<String>,) = sqlx::query_as(
|
||||||
|
"SELECT title FROM chapters WHERE manga_id = $1 AND number = 1",
|
||||||
|
)
|
||||||
|
.bind(up1.manga_id)
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(foo_title.0.as_deref(), Some("Ch.1 (foo updated)"));
|
||||||
|
let bar_title_after: (Option<String>,) = sqlx::query_as(
|
||||||
|
"SELECT title FROM chapters WHERE manga_id = $1 AND number = 1",
|
||||||
|
)
|
||||||
|
.bind(up2.manga_id)
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
bar_title_after.0.as_deref(),
|
||||||
|
Some("Ch.1 (bar)"),
|
||||||
|
"bar's row is untouched by foo's refresh"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn sync_chapters_serializes_concurrent_calls_for_same_manga(pool: PgPool) {
|
||||||
|
// Without the per-manga advisory lock, two concurrent calls would
|
||||||
|
// both read `seen_keys`, both run the drop UPDATE filtered on `NOT
|
||||||
|
// (key = ANY $3)`, and the later commit could soft-drop a chapter
|
||||||
|
// the earlier had just inserted. The lock makes the calls strictly
|
||||||
|
// sequential per-manga: whichever runs second sees the first one's
|
||||||
|
// committed chapters and treats their absence as a "dropped" signal
|
||||||
|
// only if the second list legitimately omits them.
|
||||||
|
//
|
||||||
|
// Concretely: pre-state [A]. Call X syncs [A, B]; call Y syncs
|
||||||
|
// [A, B, C]. Whatever the schedule, the final state must include
|
||||||
|
// *all three* chapters because neither call legitimately omits the
|
||||||
|
// other's contribution — both lists are supersets of each other's
|
||||||
|
// pre-existing rows.
|
||||||
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
||||||
|
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let manga_id = up.manga_id;
|
||||||
|
|
||||||
|
// Pre-state: [A].
|
||||||
|
let pre = vec![SourceChapterRef {
|
||||||
|
source_chapter_key: "A".into(),
|
||||||
|
number: 1,
|
||||||
|
title: Some("Ch.A".into()),
|
||||||
|
url: "https://x.example/foo/A".into(),
|
||||||
|
}];
|
||||||
|
crawler::sync_manga_chapters(&pool, "target", manga_id, &pre)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Two concurrent calls. Call X adds B; call Y adds B + C. Both keep
|
||||||
|
// A. Their drop branches would otherwise race against each other.
|
||||||
|
let list_x = vec![
|
||||||
|
SourceChapterRef {
|
||||||
|
source_chapter_key: "A".into(),
|
||||||
|
number: 1,
|
||||||
|
title: Some("Ch.A".into()),
|
||||||
|
url: "https://x.example/foo/A".into(),
|
||||||
|
},
|
||||||
|
SourceChapterRef {
|
||||||
|
source_chapter_key: "B".into(),
|
||||||
|
number: 2,
|
||||||
|
title: Some("Ch.B".into()),
|
||||||
|
url: "https://x.example/foo/B".into(),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
let list_y = vec![
|
||||||
|
SourceChapterRef {
|
||||||
|
source_chapter_key: "A".into(),
|
||||||
|
number: 1,
|
||||||
|
title: Some("Ch.A".into()),
|
||||||
|
url: "https://x.example/foo/A".into(),
|
||||||
|
},
|
||||||
|
SourceChapterRef {
|
||||||
|
source_chapter_key: "B".into(),
|
||||||
|
number: 2,
|
||||||
|
title: Some("Ch.B".into()),
|
||||||
|
url: "https://x.example/foo/B".into(),
|
||||||
|
},
|
||||||
|
SourceChapterRef {
|
||||||
|
source_chapter_key: "C".into(),
|
||||||
|
number: 3,
|
||||||
|
title: Some("Ch.C".into()),
|
||||||
|
url: "https://x.example/foo/C".into(),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
let pool_x = pool.clone();
|
||||||
|
let pool_y = pool.clone();
|
||||||
|
let (rx, ry) = tokio::join!(
|
||||||
|
tokio::spawn(async move {
|
||||||
|
crawler::sync_manga_chapters(&pool_x, "target", manga_id, &list_x).await
|
||||||
|
}),
|
||||||
|
tokio::spawn(async move {
|
||||||
|
crawler::sync_manga_chapters(&pool_y, "target", manga_id, &list_y).await
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
rx.unwrap().expect("call X");
|
||||||
|
ry.unwrap().expect("call Y");
|
||||||
|
|
||||||
|
// All three keys must survive with dropped_at NULL — the lock
|
||||||
|
// ensures the later call sees the earlier one's INSERTs and the
|
||||||
|
// drop UPDATE finds nothing to drop.
|
||||||
|
let alive: Vec<String> = sqlx::query_scalar(
|
||||||
|
"SELECT cs.source_chapter_key \
|
||||||
|
FROM chapter_sources cs \
|
||||||
|
JOIN chapters ch ON ch.id = cs.chapter_id \
|
||||||
|
WHERE ch.manga_id = $1 AND cs.dropped_at IS NULL \
|
||||||
|
ORDER BY cs.source_chapter_key",
|
||||||
|
)
|
||||||
|
.bind(manga_id)
|
||||||
|
.fetch_all(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
alive,
|
||||||
|
vec!["A".to_string(), "B".to_string(), "C".to_string()],
|
||||||
|
"all chapters survive concurrent syncs that both contain them"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[sqlx::test(migrations = "./migrations")]
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
@@ -440,6 +665,170 @@ async fn arbitrary_genres_from_source_get_inserted(pool: PgPool) {
|
|||||||
assert_eq!(webtoons_count.0, 1, "case-insensitive lookup reuses the existing row");
|
assert_eq!(webtoons_count.0, 1, "case-insensitive lookup reuses the existing row");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// User-attached tags (rows with non-NULL `added_by` in `manga_tags`)
|
||||||
|
/// must survive a crawler upsert. The crawler owns source-attached tags
|
||||||
|
/// (added_by IS NULL); user attachments are owned by the user who made
|
||||||
|
/// them and the recurring metadata pass must not delete them.
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn sync_tags_preserves_user_attached_tags(pool: PgPool) {
|
||||||
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
||||||
|
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// A real user attaches a personal tag.
|
||||||
|
let user = mangalord::repo::user::create(&pool, "alice", "phc-stub")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let outcome = mangalord::repo::tag::attach_to_manga(&pool, up.manga_id, "personal", user.id)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert!(outcome.created_attachment);
|
||||||
|
|
||||||
|
// Second crawler pass. Use a different metadata_hash so the upsert
|
||||||
|
// takes the Updated branch, but the bug also fires on Unchanged
|
||||||
|
// ticks since sync_tags runs unconditionally.
|
||||||
|
let mut m2 = m.clone();
|
||||||
|
m2.metadata_hash = "hash-2".into();
|
||||||
|
m2.tags = vec!["popular".into(), "weekly".into()];
|
||||||
|
let _ = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m2)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// The user tag must still be attached.
|
||||||
|
let user_tag_rows: (i64,) = sqlx::query_as(
|
||||||
|
"SELECT COUNT(*) FROM manga_tags mt \
|
||||||
|
JOIN tags t ON t.id = mt.tag_id \
|
||||||
|
WHERE mt.manga_id = $1 AND lower(t.name) = 'personal' \
|
||||||
|
AND mt.added_by = $2",
|
||||||
|
)
|
||||||
|
.bind(up.manga_id)
|
||||||
|
.bind(user.id)
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
user_tag_rows.0, 1,
|
||||||
|
"user-attached tag must survive a crawler upsert"
|
||||||
|
);
|
||||||
|
|
||||||
|
// The source's tags should still attach as well, as crawler-owned.
|
||||||
|
let source_tag_rows: (i64,) = sqlx::query_as(
|
||||||
|
"SELECT COUNT(*) FROM manga_tags mt \
|
||||||
|
JOIN tags t ON t.id = mt.tag_id \
|
||||||
|
WHERE mt.manga_id = $1 \
|
||||||
|
AND mt.added_by IS NULL \
|
||||||
|
AND lower(t.name) IN ('popular', 'weekly')",
|
||||||
|
)
|
||||||
|
.bind(up.manga_id)
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(source_tag_rows.0, 2, "source tags re-attach on each pass");
|
||||||
|
|
||||||
|
// A subsequent pass where the source drops a previously-seen tag
|
||||||
|
// must clear that crawler-owned attachment (otherwise crawler-tags
|
||||||
|
// would only ever accumulate).
|
||||||
|
let mut m3 = m2.clone();
|
||||||
|
m3.metadata_hash = "hash-3".into();
|
||||||
|
m3.tags = vec!["popular".into()];
|
||||||
|
let _ = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m3)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let weekly_rows: (i64,) = sqlx::query_as(
|
||||||
|
"SELECT COUNT(*) FROM manga_tags mt \
|
||||||
|
JOIN tags t ON t.id = mt.tag_id \
|
||||||
|
WHERE mt.manga_id = $1 AND lower(t.name) = 'weekly'",
|
||||||
|
)
|
||||||
|
.bind(up.manga_id)
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(weekly_rows.0, 0, "source-owned tag dropped by source goes away");
|
||||||
|
|
||||||
|
// And the user tag still survives that third pass.
|
||||||
|
let user_tag_rows: (i64,) = sqlx::query_as(
|
||||||
|
"SELECT COUNT(*) FROM manga_tags mt \
|
||||||
|
JOIN tags t ON t.id = mt.tag_id \
|
||||||
|
WHERE mt.manga_id = $1 AND lower(t.name) = 'personal' \
|
||||||
|
AND mt.added_by = $2",
|
||||||
|
)
|
||||||
|
.bind(up.manga_id)
|
||||||
|
.bind(user.id)
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(user_tag_rows.0, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `manga_tags.added_by` is `ON DELETE SET NULL` on the user FK. When
|
||||||
|
/// the attaching user is deleted, their attachments become orphans
|
||||||
|
/// indistinguishable from crawler-owned rows — and the crawler should
|
||||||
|
/// reap them on the next pass. Pins the semantic so a future change
|
||||||
|
/// can't quietly leave orphan rows lying around.
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn sync_tags_garbage_collects_orphan_user_attachments(pool: PgPool) {
|
||||||
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let m = sample_manga("foo", "Foo", "hash-1");
|
||||||
|
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// A user attaches "personal", then the user gets deleted. The
|
||||||
|
// attachment row stays (manga_tags.manga_id FK is CASCADE on
|
||||||
|
// mangas only; we never CASCADE-delete user attachments). The FK
|
||||||
|
// on added_by is `ON DELETE SET NULL`, so the row's owner column
|
||||||
|
// goes NULL — same shape as a crawler-owned row.
|
||||||
|
let user = mangalord::repo::user::create(&pool, "bob", "phc-stub")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let _ = mangalord::repo::tag::attach_to_manga(&pool, up.manga_id, "personal", user.id)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
sqlx::query("DELETE FROM users WHERE id = $1")
|
||||||
|
.bind(user.id)
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Sanity: the orphan still exists post-user-delete with added_by NULL.
|
||||||
|
let (orphan_rows,): (i64,) = sqlx::query_as(
|
||||||
|
"SELECT COUNT(*) FROM manga_tags mt \
|
||||||
|
JOIN tags t ON t.id = mt.tag_id \
|
||||||
|
WHERE mt.manga_id = $1 AND lower(t.name) = 'personal' \
|
||||||
|
AND mt.added_by IS NULL",
|
||||||
|
)
|
||||||
|
.bind(up.manga_id)
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(orphan_rows, 1);
|
||||||
|
|
||||||
|
// Next crawler pass — orphan should be reaped along with any
|
||||||
|
// other source-owned rows that aren't in the new tag list.
|
||||||
|
let mut m2 = m.clone();
|
||||||
|
m2.metadata_hash = "hash-2".into();
|
||||||
|
m2.tags = vec!["popular".into()];
|
||||||
|
let _ = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m2)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let (orphan_rows,): (i64,) = sqlx::query_as(
|
||||||
|
"SELECT COUNT(*) FROM manga_tags mt \
|
||||||
|
JOIN tags t ON t.id = mt.tag_id \
|
||||||
|
WHERE mt.manga_id = $1 AND lower(t.name) = 'personal'",
|
||||||
|
)
|
||||||
|
.bind(up.manga_id)
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(orphan_rows, 0, "orphan user-attached tag should be reaped");
|
||||||
|
}
|
||||||
|
|
||||||
#[sqlx::test(migrations = "./migrations")]
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
async fn re_appearing_manga_clears_dropped_at(pool: PgPool) {
|
async fn re_appearing_manga_clears_dropped_at(pool: PgPool) {
|
||||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||||
|
|||||||
162
backend/tests/repo_chapter.rs
Normal file
162
backend/tests/repo_chapter.rs
Normal file
@@ -0,0 +1,162 @@
|
|||||||
|
//! Integration tests for `repo::chapter` — focused on
|
||||||
|
//! `dispatch_target`, the resolver the daemon's chapter dispatcher
|
||||||
|
//! uses to look up the URL it needs to hand to
|
||||||
|
//! `content::sync_chapter_content`.
|
||||||
|
//!
|
||||||
|
//! The query must:
|
||||||
|
//! 1. Skip `chapter_sources` rows where `dropped_at IS NOT NULL` —
|
||||||
|
//! otherwise a soft-dropped source URL is dispatched as if live and
|
||||||
|
//! burns the chapter's retry budget against guaranteed transients.
|
||||||
|
//! 2. Order the remaining rows by `last_seen_at DESC` so the freshest
|
||||||
|
//! surviving source is the one we'll fetch from.
|
||||||
|
//!
|
||||||
|
//! The fix lives in `backend/src/repo/chapter.rs:dispatch_target`. The
|
||||||
|
//! enqueue queries at `pipeline.rs:381` and `:435` already filter on
|
||||||
|
//! `cs.dropped_at IS NULL`; this brings the resolver into line.
|
||||||
|
|
||||||
|
use mangalord::crawler::source::{SourceChapterRef, SourceManga};
|
||||||
|
use mangalord::repo::{
|
||||||
|
chapter::dispatch_target,
|
||||||
|
crawler::{ensure_source, sync_manga_chapters, upsert_manga_from_source},
|
||||||
|
};
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
fn sample_manga(key: &str, title: &str, hash: &str) -> SourceManga {
|
||||||
|
SourceManga {
|
||||||
|
source_manga_key: key.to_string(),
|
||||||
|
title: title.to_string(),
|
||||||
|
alternative_titles: vec![],
|
||||||
|
authors: vec![],
|
||||||
|
genres: vec![],
|
||||||
|
tags: vec![],
|
||||||
|
status: None,
|
||||||
|
summary: None,
|
||||||
|
cover_url: None,
|
||||||
|
chapters: vec![],
|
||||||
|
metadata_hash: hash.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Seed a manga with one chapter, plus a second `chapter_sources` row
|
||||||
|
/// pointing at the same chapter with a *newer* `last_seen_at` so the
|
||||||
|
/// `ORDER BY cs.last_seen_at DESC` branch of the fixed query can
|
||||||
|
/// distinguish "freshest live source" from "any live source."
|
||||||
|
async fn seed_chapter_with_two_live_sources(pool: &PgPool) -> (Uuid, String, String) {
|
||||||
|
// Two distinct sources both pointing at the same chapter is the
|
||||||
|
// realistic shape of the multi-source state — each source row is
|
||||||
|
// keyed (source_id, chapter_id) after migration 0017.
|
||||||
|
ensure_source(pool, "target", "T", "https://x.example")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
ensure_source(pool, "mirror", "Mirror", "https://m.example")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
||||||
|
let up = upsert_manga_from_source(pool, "target", "https://x.example/foo", &m)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let initial = vec![SourceChapterRef {
|
||||||
|
source_chapter_key: "1".into(),
|
||||||
|
number: 1,
|
||||||
|
title: Some("Ch.1".into()),
|
||||||
|
url: "https://x.example/foo/1/old".into(),
|
||||||
|
}];
|
||||||
|
sync_manga_chapters(pool, "target", up.manga_id, &initial)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let (chapter_id,): (Uuid,) = sqlx::query_as(
|
||||||
|
"SELECT c.id FROM chapters c \
|
||||||
|
JOIN chapter_sources cs ON cs.chapter_id = c.id \
|
||||||
|
WHERE cs.source_chapter_key = '1' AND cs.source_id = 'target'",
|
||||||
|
)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let old_url = "https://x.example/foo/1/old".to_string();
|
||||||
|
let new_url = "https://m.example/foo/1/mirror".to_string();
|
||||||
|
// Backdate the existing (old/target) source row and add a fresher
|
||||||
|
// row from the mirror source. The fix uses `last_seen_at DESC` to
|
||||||
|
// break the tie deterministically.
|
||||||
|
sqlx::query(
|
||||||
|
"UPDATE chapter_sources \
|
||||||
|
SET last_seen_at = NOW() - INTERVAL '2 days' \
|
||||||
|
WHERE chapter_id = $1 AND source_id = 'target'",
|
||||||
|
)
|
||||||
|
.bind(chapter_id)
|
||||||
|
.execute(pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
sqlx::query(
|
||||||
|
"INSERT INTO chapter_sources \
|
||||||
|
(source_id, chapter_id, source_chapter_key, source_url, last_seen_at) \
|
||||||
|
VALUES ('mirror', $1, '1', $2, NOW())",
|
||||||
|
)
|
||||||
|
.bind(chapter_id)
|
||||||
|
.bind(&new_url)
|
||||||
|
.execute(pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
(chapter_id, old_url, new_url)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn dispatch_target_prefers_most_recent_live_source(pool: PgPool) {
|
||||||
|
let (chapter_id, _old_url, new_url) =
|
||||||
|
seed_chapter_with_two_live_sources(&pool).await;
|
||||||
|
|
||||||
|
let row = dispatch_target(&pool, chapter_id).await.unwrap();
|
||||||
|
let (_manga_id, source_url) =
|
||||||
|
row.expect("two live sources should yield a dispatch target");
|
||||||
|
assert_eq!(
|
||||||
|
source_url, new_url,
|
||||||
|
"ORDER BY last_seen_at DESC LIMIT 1 must return the freshest source"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn dispatch_target_skips_dropped_sources(pool: PgPool) {
|
||||||
|
let (chapter_id, _old_url, new_url) =
|
||||||
|
seed_chapter_with_two_live_sources(&pool).await;
|
||||||
|
|
||||||
|
// Soft-drop the fresher row. The dispatcher must now return the
|
||||||
|
// *older* still-live row instead of the dropped one.
|
||||||
|
sqlx::query(
|
||||||
|
"UPDATE chapter_sources SET dropped_at = NOW() WHERE source_url = $1",
|
||||||
|
)
|
||||||
|
.bind(&new_url)
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let row = dispatch_target(&pool, chapter_id).await.unwrap();
|
||||||
|
let (_manga_id, source_url) =
|
||||||
|
row.expect("a single live source should still yield a dispatch target");
|
||||||
|
assert!(
|
||||||
|
source_url != new_url,
|
||||||
|
"dispatch_target must not return a dropped source"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[sqlx::test(migrations = "./migrations")]
|
||||||
|
async fn dispatch_target_returns_none_when_only_dropped_sources_remain(
|
||||||
|
pool: PgPool,
|
||||||
|
) {
|
||||||
|
let (chapter_id, _old_url, _new_url) =
|
||||||
|
seed_chapter_with_two_live_sources(&pool).await;
|
||||||
|
|
||||||
|
sqlx::query("UPDATE chapter_sources SET dropped_at = NOW() WHERE chapter_id = $1")
|
||||||
|
.bind(chapter_id)
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let row = dispatch_target(&pool, chapter_id).await.unwrap();
|
||||||
|
assert!(
|
||||||
|
row.is_none(),
|
||||||
|
"every source is dropped — dispatch_target must return None"
|
||||||
|
);
|
||||||
|
}
|
||||||
22
docker-compose.prod.yml
Normal file
22
docker-compose.prod.yml
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
# Production overlay: layer on top of docker-compose.yml on the deploy
|
||||||
|
# host so the backend and frontend run from pre-built registry images
|
||||||
|
# instead of building locally.
|
||||||
|
#
|
||||||
|
# docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d
|
||||||
|
#
|
||||||
|
# REGISTRY_URL and IMAGE_TAG are injected by .gitea/workflows/deploy.yml
|
||||||
|
# at deploy time. IMAGE_TAG defaults to `latest` so a manual
|
||||||
|
# `docker compose ... up -d` on the host still works.
|
||||||
|
|
||||||
|
services:
|
||||||
|
backend:
|
||||||
|
build: !reset null
|
||||||
|
image: ${REGISTRY_URL}/mangalord-backend:${IMAGE_TAG:-latest}
|
||||||
|
pull_policy: always
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
frontend:
|
||||||
|
build: !reset null
|
||||||
|
image: ${REGISTRY_URL}/mangalord-frontend:${IMAGE_TAG:-latest}
|
||||||
|
pull_policy: always
|
||||||
|
restart: unless-stopped
|
||||||
@@ -1,9 +1,15 @@
|
|||||||
|
# Production-like compose. Requires a populated `.env` next to this
|
||||||
|
# file: at minimum POSTGRES_PASSWORD must be set to a non-default
|
||||||
|
# value (the `?required` form below fails fast otherwise). The
|
||||||
|
# frontend container expects HTTPS in front (Caddy/Traefik/nginx)
|
||||||
|
# because COOKIE_SECURE=true browsers will refuse to send the session
|
||||||
|
# cookie over plain HTTP.
|
||||||
services:
|
services:
|
||||||
postgres:
|
postgres:
|
||||||
image: postgres:16-alpine
|
image: postgres:16-alpine
|
||||||
environment:
|
environment:
|
||||||
POSTGRES_USER: ${POSTGRES_USER:-mangalord}
|
POSTGRES_USER: ${POSTGRES_USER:-mangalord}
|
||||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-mangalord}
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?POSTGRES_PASSWORD must be set in .env}
|
||||||
POSTGRES_DB: ${POSTGRES_DB:-mangalord}
|
POSTGRES_DB: ${POSTGRES_DB:-mangalord}
|
||||||
volumes:
|
volumes:
|
||||||
- postgres-data:/var/lib/postgresql/data
|
- postgres-data:/var/lib/postgresql/data
|
||||||
@@ -19,7 +25,7 @@ services:
|
|||||||
postgres:
|
postgres:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
environment:
|
environment:
|
||||||
DATABASE_URL: postgres://${POSTGRES_USER:-mangalord}:${POSTGRES_PASSWORD:-mangalord}@postgres:5432/${POSTGRES_DB:-mangalord}
|
DATABASE_URL: postgres://${POSTGRES_USER:-mangalord}:${POSTGRES_PASSWORD:?POSTGRES_PASSWORD must be set in .env}@postgres:5432/${POSTGRES_DB:-mangalord}
|
||||||
BIND_ADDRESS: 0.0.0.0:8080
|
BIND_ADDRESS: 0.0.0.0:8080
|
||||||
STORAGE_DIR: /var/lib/mangalord/storage
|
STORAGE_DIR: /var/lib/mangalord/storage
|
||||||
RUST_LOG: ${RUST_LOG:-info,mangalord=debug}
|
RUST_LOG: ${RUST_LOG:-info,mangalord=debug}
|
||||||
@@ -33,6 +39,11 @@ services:
|
|||||||
# Upload limits.
|
# Upload limits.
|
||||||
MAX_REQUEST_BYTES: ${MAX_REQUEST_BYTES:-209715200}
|
MAX_REQUEST_BYTES: ${MAX_REQUEST_BYTES:-209715200}
|
||||||
MAX_FILE_BYTES: ${MAX_FILE_BYTES:-20971520}
|
MAX_FILE_BYTES: ${MAX_FILE_BYTES:-20971520}
|
||||||
|
# System-chromium override for the crawler. Leave blank to use the
|
||||||
|
# bundled fetcher; set to e.g. /usr/bin/chromium-headless-shell on
|
||||||
|
# arm64 deployments. Pair with `--build-arg INSTALL_CHROMIUM=true`
|
||||||
|
# so the image actually contains the binary.
|
||||||
|
CRAWLER_CHROMIUM_BINARY: ${CRAWLER_CHROMIUM_BINARY:-}
|
||||||
volumes:
|
volumes:
|
||||||
- storage-data:/var/lib/mangalord/storage
|
- storage-data:/var/lib/mangalord/storage
|
||||||
# No host port mapping in the default setup — the frontend proxies
|
# No host port mapping in the default setup — the frontend proxies
|
||||||
|
|||||||
@@ -1,7 +1,11 @@
|
|||||||
FROM node:22-alpine AS builder
|
FROM node:22-alpine AS builder
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY package.json package-lock.json* ./
|
COPY package.json package-lock.json* ./
|
||||||
RUN npm install
|
# `npm ci` installs the locked versions exactly; `npm install` would
|
||||||
|
# silently rewrite package-lock.json mid-build. CI (.gitea/workflows)
|
||||||
|
# also uses `npm ci`, so this keeps the image build deterministic and
|
||||||
|
# matches what the test job validated.
|
||||||
|
RUN npm ci
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
@@ -10,8 +14,22 @@ WORKDIR /app
|
|||||||
ENV NODE_ENV=production
|
ENV NODE_ENV=production
|
||||||
ENV HOST=0.0.0.0
|
ENV HOST=0.0.0.0
|
||||||
ENV PORT=3000
|
ENV PORT=3000
|
||||||
COPY --from=builder /app/build ./build
|
|
||||||
COPY --from=builder /app/node_modules ./node_modules
|
# node:22-alpine ships a `node` user (UID 1000); use it instead of
|
||||||
COPY --from=builder /app/package.json ./
|
# running the SvelteKit server as root.
|
||||||
|
COPY --from=builder --chown=node:node /app/build ./build
|
||||||
|
COPY --from=builder --chown=node:node /app/node_modules ./node_modules
|
||||||
|
COPY --from=builder --chown=node:node /app/package.json ./
|
||||||
|
|
||||||
|
USER node
|
||||||
EXPOSE 3000
|
EXPOSE 3000
|
||||||
|
|
||||||
|
# Alpine's busybox `wget` is the canonical lightweight HTTP probe. Probe
|
||||||
|
# 127.0.0.1, not `localhost`: musl resolves `localhost` to IPv6 ::1 first,
|
||||||
|
# but the Node server binds IPv4 0.0.0.0 only, so a localhost probe gets
|
||||||
|
# "connection refused" and the container is wrongly marked unhealthy. Use a
|
||||||
|
# GET (`-O /dev/null`) since `node build` serves 200 on `/`.
|
||||||
|
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
|
||||||
|
CMD wget -q -O /dev/null http://127.0.0.1:3000/ || exit 1
|
||||||
|
|
||||||
CMD ["node", "build"]
|
CMD ["node", "build"]
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "mangalord-frontend",
|
"name": "mangalord-frontend",
|
||||||
"version": "0.33.0",
|
"version": "0.45.0",
|
||||||
"private": true,
|
"private": true,
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
|||||||
@@ -118,4 +118,77 @@ describe('hooks.server proxy', () => {
|
|||||||
expect(body.error.code).toBe('upstream_unavailable');
|
expect(body.error.code).toBe('upstream_unavailable');
|
||||||
expect(errSpy).toHaveBeenCalled();
|
expect(errSpy).toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('strips every hop-by-hop header listed in RFC 7230 §6.1', async () => {
|
||||||
|
// Defence in depth: axum doesn't emit these, but a future
|
||||||
|
// middleware that did would otherwise leak per-connection
|
||||||
|
// state across the proxy boundary.
|
||||||
|
fetchSpy.mockResolvedValueOnce(new Response('[]', { status: 200 }));
|
||||||
|
const resolve = vi.fn();
|
||||||
|
await handle({
|
||||||
|
event: makeEvent('/api/v1/health', {
|
||||||
|
headers: {
|
||||||
|
host: 'app.example.com',
|
||||||
|
'content-length': '0',
|
||||||
|
connection: 'keep-alive',
|
||||||
|
'keep-alive': 'timeout=5',
|
||||||
|
'proxy-authenticate': 'Basic realm=x',
|
||||||
|
'proxy-authorization': 'Basic xyz',
|
||||||
|
te: 'trailers',
|
||||||
|
trailer: 'Expires',
|
||||||
|
'transfer-encoding': 'chunked',
|
||||||
|
upgrade: 'websocket',
|
||||||
|
// A non-hop-by-hop header to ensure non-targets
|
||||||
|
// aren't accidentally stripped.
|
||||||
|
'x-custom': 'pass-through'
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
resolve
|
||||||
|
});
|
||||||
|
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||||
|
const headers = init.headers as Headers;
|
||||||
|
for (const h of [
|
||||||
|
'host',
|
||||||
|
'content-length',
|
||||||
|
'connection',
|
||||||
|
'keep-alive',
|
||||||
|
'proxy-authenticate',
|
||||||
|
'proxy-authorization',
|
||||||
|
'te',
|
||||||
|
'trailer',
|
||||||
|
'transfer-encoding',
|
||||||
|
'upgrade'
|
||||||
|
]) {
|
||||||
|
expect(headers.get(h), `${h} should be stripped`).toBeNull();
|
||||||
|
}
|
||||||
|
expect(headers.get('x-custom')).toBe('pass-through');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('aborts and returns 502 when the upstream stalls past the timeout', async () => {
|
||||||
|
const errSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
|
||||||
|
// Simulate an aborted fetch (AbortController.abort() raises a
|
||||||
|
// DOMException with name 'AbortError' on Node's fetch). The
|
||||||
|
// handler should treat it as the same upstream_unavailable
|
||||||
|
// 502 it uses for any other network failure.
|
||||||
|
const abortErr = new DOMException('aborted', 'AbortError');
|
||||||
|
fetchSpy.mockRejectedValueOnce(abortErr);
|
||||||
|
|
||||||
|
const resolve = vi.fn();
|
||||||
|
const resp = await handle({ event: makeEvent('/api/v1/slow'), resolve });
|
||||||
|
expect(resp.status).toBe(502);
|
||||||
|
const body = await resp.json();
|
||||||
|
expect(body.error.code).toBe('upstream_unavailable');
|
||||||
|
expect(errSpy).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('attaches an AbortSignal to the upstream fetch so it can time out', async () => {
|
||||||
|
fetchSpy.mockResolvedValueOnce(new Response('[]', { status: 200 }));
|
||||||
|
const resolve = vi.fn();
|
||||||
|
await handle({ event: makeEvent('/api/v1/health'), resolve });
|
||||||
|
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||||
|
expect(init.signal).toBeInstanceOf(AbortSignal);
|
||||||
|
// The signal hasn't fired (handler returned in time), but its
|
||||||
|
// presence is the contract this test is pinning.
|
||||||
|
expect(init.signal?.aborted).toBe(false);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -12,20 +12,66 @@ import type { Handle } from '@sveltejs/kit';
|
|||||||
|
|
||||||
const BACKEND_URL = process.env.BACKEND_URL ?? 'http://localhost:8080';
|
const BACKEND_URL = process.env.BACKEND_URL ?? 'http://localhost:8080';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hop-by-hop headers per RFC 7230 §6.1. These are scoped to a single
|
||||||
|
* transport-level connection and must not be forwarded by a proxy.
|
||||||
|
* Plus `host` and `content-length`: `host` would mislead the backend
|
||||||
|
* about its origin, and `content-length` is recomputed by the upstream
|
||||||
|
* fetch from the body stream.
|
||||||
|
*/
|
||||||
|
const HOP_BY_HOP_HEADERS = [
|
||||||
|
'host',
|
||||||
|
'content-length',
|
||||||
|
'connection',
|
||||||
|
'keep-alive',
|
||||||
|
'proxy-authenticate',
|
||||||
|
'proxy-authorization',
|
||||||
|
'te',
|
||||||
|
'trailer',
|
||||||
|
'transfer-encoding',
|
||||||
|
'upgrade'
|
||||||
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cap each proxied request at 5 minutes. The bound exists to surface
|
||||||
|
* a wedged backend (stuck on a slow DB query, deadlocked, etc.) as a
|
||||||
|
* 502 rather than letting the browser request hang indefinitely.
|
||||||
|
*
|
||||||
|
* The default leans toward the slow-upload end of the spectrum: at a
|
||||||
|
* 1 Mbps upstream, a 200 MiB chapter upload (the default
|
||||||
|
* `MAX_REQUEST_BYTES` cap) needs ~27 minutes; 300 s covers the more
|
||||||
|
* realistic 25 Mbps urban-broadband case (~64 s for the same upload)
|
||||||
|
* with comfortable headroom. Operators serving very slow clients
|
||||||
|
* should raise `BACKEND_PROXY_TIMEOUT_MS`; operators behind a
|
||||||
|
* tighter upstream proxy may want to lower it. A future improvement
|
||||||
|
* is an idle-based timeout (reset per chunk) instead of this
|
||||||
|
* wall-clock budget — that's a fair bit more code, deferred.
|
||||||
|
*/
|
||||||
|
const PROXY_TIMEOUT_MS = (() => {
|
||||||
|
const raw = process.env.BACKEND_PROXY_TIMEOUT_MS;
|
||||||
|
const n = raw ? Number(raw) : 300_000;
|
||||||
|
return Number.isFinite(n) && n > 0 ? n : 300_000;
|
||||||
|
})();
|
||||||
|
|
||||||
export const handle: Handle = async ({ event, resolve }) => {
|
export const handle: Handle = async ({ event, resolve }) => {
|
||||||
if (event.url.pathname.startsWith('/api/')) {
|
if (event.url.pathname.startsWith('/api/')) {
|
||||||
const target = `${BACKEND_URL}${event.url.pathname}${event.url.search}`;
|
const target = `${BACKEND_URL}${event.url.pathname}${event.url.search}`;
|
||||||
|
|
||||||
// Strip hop-by-hop headers — `host` would mislead the backend
|
|
||||||
// about the origin, and `content-length` will be recomputed.
|
|
||||||
const headers = new Headers(event.request.headers);
|
const headers = new Headers(event.request.headers);
|
||||||
headers.delete('host');
|
for (const h of HOP_BY_HOP_HEADERS) headers.delete(h);
|
||||||
headers.delete('content-length');
|
|
||||||
|
// AbortController times the upstream fetch out so a backend
|
||||||
|
// wedged on a slow DB query doesn't keep the browser request
|
||||||
|
// hanging forever. The `signal` is also wired into the
|
||||||
|
// RequestInit so the body stream is cancelled cleanly.
|
||||||
|
const ctrl = new AbortController();
|
||||||
|
const timeoutHandle = setTimeout(() => ctrl.abort(), PROXY_TIMEOUT_MS);
|
||||||
|
|
||||||
const init: RequestInit & { duplex?: 'half' } = {
|
const init: RequestInit & { duplex?: 'half' } = {
|
||||||
method: event.request.method,
|
method: event.request.method,
|
||||||
headers,
|
headers,
|
||||||
redirect: 'manual'
|
redirect: 'manual',
|
||||||
|
signal: ctrl.signal
|
||||||
};
|
};
|
||||||
if (event.request.method !== 'GET' && event.request.method !== 'HEAD') {
|
if (event.request.method !== 'GET' && event.request.method !== 'HEAD') {
|
||||||
init.body = event.request.body;
|
init.body = event.request.body;
|
||||||
@@ -39,11 +85,13 @@ export const handle: Handle = async ({ event, resolve }) => {
|
|||||||
upstream = await fetch(target, init);
|
upstream = await fetch(target, init);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
// Network-layer failure (DNS / connection refused / TLS
|
// Network-layer failure (DNS / connection refused / TLS
|
||||||
// handshake) — most commonly "backend container restarting".
|
// handshake / abort by timeout) — most commonly "backend
|
||||||
// SvelteKit's default 500 would be an HTML page that
|
// container restarting". SvelteKit's default 500 would be
|
||||||
// client.ts can't .json(), which masks the real cause. Emit
|
// an HTML page that client.ts can't .json(), which masks
|
||||||
// the standard envelope with a dedicated code instead.
|
// the real cause. Emit the standard envelope with a
|
||||||
|
// dedicated code instead.
|
||||||
console.error('Proxy to backend failed:', e);
|
console.error('Proxy to backend failed:', e);
|
||||||
|
clearTimeout(timeoutHandle);
|
||||||
return new Response(
|
return new Response(
|
||||||
JSON.stringify({
|
JSON.stringify({
|
||||||
error: {
|
error: {
|
||||||
@@ -58,6 +106,7 @@ export const handle: Handle = async ({ event, resolve }) => {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
clearTimeout(timeoutHandle);
|
||||||
return new Response(upstream.body, {
|
return new Response(upstream.body, {
|
||||||
status: upstream.status,
|
status: upstream.status,
|
||||||
statusText: upstream.statusText,
|
statusText: upstream.statusText,
|
||||||
|
|||||||
245
frontend/src/lib/api/admin.test.ts
Normal file
245
frontend/src/lib/api/admin.test.ts
Normal file
@@ -0,0 +1,245 @@
|
|||||||
|
import {
|
||||||
|
describe,
|
||||||
|
it,
|
||||||
|
expect,
|
||||||
|
vi,
|
||||||
|
beforeEach,
|
||||||
|
afterEach,
|
||||||
|
type MockInstance
|
||||||
|
} from 'vitest';
|
||||||
|
import {
|
||||||
|
listAdminUsers,
|
||||||
|
deleteAdminUser,
|
||||||
|
setUserAdmin,
|
||||||
|
createAdminUser,
|
||||||
|
listAdminMangas,
|
||||||
|
listAdminChapters,
|
||||||
|
getSystemStats
|
||||||
|
} from './admin';
|
||||||
|
|
||||||
|
function ok(body: unknown, status = 200): Response {
|
||||||
|
return new Response(JSON.stringify(body), {
|
||||||
|
status,
|
||||||
|
headers: { 'content-type': 'application/json' }
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function noContent(): Response {
|
||||||
|
return new Response(null, { status: 204 });
|
||||||
|
}
|
||||||
|
|
||||||
|
function envelope(status: number, code: string, message: string): Response {
|
||||||
|
return new Response(JSON.stringify({ error: { code, message } }), {
|
||||||
|
status,
|
||||||
|
headers: { 'content-type': 'application/json' }
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const userFixture = {
|
||||||
|
id: 'u-1',
|
||||||
|
username: 'alice',
|
||||||
|
created_at: '2026-01-01T00:00:00Z',
|
||||||
|
is_admin: false
|
||||||
|
};
|
||||||
|
|
||||||
|
const mangaFixture = {
|
||||||
|
id: 'm-1',
|
||||||
|
title: 'Test',
|
||||||
|
status: 'ongoing',
|
||||||
|
cover_image_path: null,
|
||||||
|
created_at: '2026-01-01T00:00:00Z',
|
||||||
|
updated_at: '2026-01-01T00:00:00Z',
|
||||||
|
sync_state: 'synced' as const,
|
||||||
|
chapter_count: 3,
|
||||||
|
latest_seen_at: '2026-01-02T00:00:00Z'
|
||||||
|
};
|
||||||
|
|
||||||
|
const systemFixture = {
|
||||||
|
disk: {
|
||||||
|
total_bytes: 1_000_000,
|
||||||
|
used_bytes: 500_000,
|
||||||
|
free_bytes: 500_000,
|
||||||
|
percent_used: 50.0
|
||||||
|
},
|
||||||
|
memory: { total_bytes: 8_000_000, used_bytes: 4_000_000, percent_used: 50.0 },
|
||||||
|
cpu: { percent_used: 12.3 },
|
||||||
|
alerts: []
|
||||||
|
};
|
||||||
|
|
||||||
|
describe('admin api client', () => {
|
||||||
|
let fetchSpy: MockInstance<typeof globalThis.fetch>;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
fetchSpy = vi.spyOn(globalThis, 'fetch');
|
||||||
|
});
|
||||||
|
afterEach(() => {
|
||||||
|
vi.restoreAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---- users ----
|
||||||
|
|
||||||
|
it('listAdminUsers GETs /v1/admin/users and parses the paged envelope', async () => {
|
||||||
|
fetchSpy.mockResolvedValueOnce(
|
||||||
|
ok({ items: [userFixture], page: { limit: 50, offset: 0, total: 1 } })
|
||||||
|
);
|
||||||
|
const page = await listAdminUsers({ limit: 50 });
|
||||||
|
expect(page.items).toHaveLength(1);
|
||||||
|
expect(page.items[0]).toEqual(userFixture);
|
||||||
|
expect(page.page.total).toBe(1);
|
||||||
|
const url = fetchSpy.mock.calls[0][0] as string;
|
||||||
|
expect(url).toMatch(/\/v1\/admin\/users\?limit=50$/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('listAdminUsers forwards search + offset query params', async () => {
|
||||||
|
fetchSpy.mockResolvedValueOnce(
|
||||||
|
ok({ items: [], page: { limit: 50, offset: 10, total: 0 } })
|
||||||
|
);
|
||||||
|
await listAdminUsers({ search: 'al', offset: 10 });
|
||||||
|
const url = fetchSpy.mock.calls[0][0] as string;
|
||||||
|
expect(url).toContain('search=al');
|
||||||
|
expect(url).toContain('offset=10');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('listAdminUsers surfaces 403 forbidden via ApiError.code', async () => {
|
||||||
|
fetchSpy.mockResolvedValueOnce(envelope(403, 'forbidden', 'forbidden'));
|
||||||
|
await expect(listAdminUsers()).rejects.toMatchObject({
|
||||||
|
status: 403,
|
||||||
|
code: 'forbidden'
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('deleteAdminUser DELETEs to /v1/admin/users/{id} and handles 204', async () => {
|
||||||
|
fetchSpy.mockResolvedValueOnce(noContent());
|
||||||
|
await expect(deleteAdminUser('u-1')).resolves.toBeUndefined();
|
||||||
|
const url = fetchSpy.mock.calls[0][0] as string;
|
||||||
|
expect(url).toMatch(/\/v1\/admin\/users\/u-1$/);
|
||||||
|
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||||
|
expect(init.method).toBe('DELETE');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('deleteAdminUser surfaces 409 conflict (self-delete / last-admin)', async () => {
|
||||||
|
fetchSpy.mockResolvedValueOnce(
|
||||||
|
envelope(409, 'conflict', 'cannot delete yourself; ask another admin')
|
||||||
|
);
|
||||||
|
await expect(deleteAdminUser('u-1')).rejects.toMatchObject({
|
||||||
|
status: 409,
|
||||||
|
code: 'conflict'
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('createAdminUser POSTs to /v1/admin/users with body and returns the created user', async () => {
|
||||||
|
const created = { ...userFixture, username: 'invited01' };
|
||||||
|
fetchSpy.mockResolvedValueOnce(ok(created, 201));
|
||||||
|
const got = await createAdminUser({
|
||||||
|
username: 'invited01',
|
||||||
|
password: 'freshpass1234'
|
||||||
|
});
|
||||||
|
expect(got).toEqual(created);
|
||||||
|
const url = fetchSpy.mock.calls[0][0] as string;
|
||||||
|
expect(url).toMatch(/\/v1\/admin\/users$/);
|
||||||
|
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||||
|
expect(init.method).toBe('POST');
|
||||||
|
expect(JSON.parse(init.body as string)).toEqual({
|
||||||
|
username: 'invited01',
|
||||||
|
password: 'freshpass1234'
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('createAdminUser forwards is_admin when provided', async () => {
|
||||||
|
const created = { ...userFixture, username: 'coadmin', is_admin: true };
|
||||||
|
fetchSpy.mockResolvedValueOnce(ok(created, 201));
|
||||||
|
await createAdminUser({
|
||||||
|
username: 'coadmin',
|
||||||
|
password: 'freshpass1234',
|
||||||
|
is_admin: true
|
||||||
|
});
|
||||||
|
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||||
|
expect(JSON.parse(init.body as string)).toEqual({
|
||||||
|
username: 'coadmin',
|
||||||
|
password: 'freshpass1234',
|
||||||
|
is_admin: true
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('createAdminUser surfaces 409 conflict on duplicate username', async () => {
|
||||||
|
fetchSpy.mockResolvedValueOnce(
|
||||||
|
envelope(409, 'conflict', 'username is already taken')
|
||||||
|
);
|
||||||
|
await expect(
|
||||||
|
createAdminUser({ username: 'taken', password: 'freshpass1234' })
|
||||||
|
).rejects.toMatchObject({ status: 409, code: 'conflict' });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('setUserAdmin PATCHes is_admin and returns the updated user', async () => {
|
||||||
|
const updated = { ...userFixture, is_admin: true };
|
||||||
|
fetchSpy.mockResolvedValueOnce(ok(updated));
|
||||||
|
const got = await setUserAdmin('u-1', true);
|
||||||
|
expect(got).toEqual(updated);
|
||||||
|
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||||
|
expect(init.method).toBe('PATCH');
|
||||||
|
expect(JSON.parse(init.body as string)).toEqual({ is_admin: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---- mangas + chapters ----
|
||||||
|
|
||||||
|
it('listAdminMangas GETs /v1/admin/mangas and forwards sync_state filter', async () => {
|
||||||
|
fetchSpy.mockResolvedValueOnce(
|
||||||
|
ok({ items: [mangaFixture], page: { limit: 100, offset: 0, total: 1 } })
|
||||||
|
);
|
||||||
|
const page = await listAdminMangas({ syncState: 'in_progress', limit: 100 });
|
||||||
|
expect(page.items[0].sync_state).toBe('synced');
|
||||||
|
const url = fetchSpy.mock.calls[0][0] as string;
|
||||||
|
expect(url).toContain('sync_state=in_progress');
|
||||||
|
expect(url).toContain('limit=100');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('listAdminChapters GETs the nested chapter route and parses the paged envelope', async () => {
|
||||||
|
const chapter = {
|
||||||
|
id: 'c-1',
|
||||||
|
manga_id: 'm-1',
|
||||||
|
number: 1,
|
||||||
|
title: null,
|
||||||
|
page_count: 12,
|
||||||
|
created_at: '2026-01-01T00:00:00Z',
|
||||||
|
sync_state: 'synced' as const,
|
||||||
|
latest_seen_at: null
|
||||||
|
};
|
||||||
|
fetchSpy.mockResolvedValueOnce(
|
||||||
|
ok({ items: [chapter], page: { limit: 200, offset: 0, total: 1 } })
|
||||||
|
);
|
||||||
|
const resp = await listAdminChapters('m-1');
|
||||||
|
expect(resp.items).toEqual([chapter]);
|
||||||
|
expect(resp.page.total).toBe(1);
|
||||||
|
const url = fetchSpy.mock.calls[0][0] as string;
|
||||||
|
expect(url).toMatch(/\/v1\/admin\/mangas\/m-1\/chapters$/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('listAdminChapters forwards limit + offset query params', async () => {
|
||||||
|
fetchSpy.mockResolvedValueOnce(
|
||||||
|
ok({ items: [], page: { limit: 50, offset: 100, total: 0 } })
|
||||||
|
);
|
||||||
|
await listAdminChapters('m-1', { limit: 50, offset: 100 });
|
||||||
|
const url = fetchSpy.mock.calls[0][0] as string;
|
||||||
|
expect(url).toContain('limit=50');
|
||||||
|
expect(url).toContain('offset=100');
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---- system ----
|
||||||
|
|
||||||
|
it('getSystemStats GETs /v1/admin/system and parses the four-key envelope', async () => {
|
||||||
|
fetchSpy.mockResolvedValueOnce(ok(systemFixture));
|
||||||
|
const s = await getSystemStats();
|
||||||
|
expect(s.disk?.percent_used).toBe(50);
|
||||||
|
expect(s.memory.percent_used).toBe(50);
|
||||||
|
expect(s.cpu.percent_used).toBe(12.3);
|
||||||
|
expect(s.alerts).toEqual([]);
|
||||||
|
const url = fetchSpy.mock.calls[0][0] as string;
|
||||||
|
expect(url).toMatch(/\/v1\/admin\/system$/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('getSystemStats keeps disk null when backend reports a non-local store', async () => {
|
||||||
|
fetchSpy.mockResolvedValueOnce(ok({ ...systemFixture, disk: null }));
|
||||||
|
const s = await getSystemStats();
|
||||||
|
expect(s.disk).toBeNull();
|
||||||
|
});
|
||||||
|
});
|
||||||
178
frontend/src/lib/api/admin.ts
Normal file
178
frontend/src/lib/api/admin.ts
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
// Admin-only API client. Every endpoint here is guarded by
|
||||||
|
// RequireAdmin on the backend (session cookie only — bearer tokens
|
||||||
|
// won't reach these routes). 403s thrown here propagate up to the
|
||||||
|
// /admin layout, which renders the framework error page.
|
||||||
|
|
||||||
|
import { request, type Page } from './client';
|
||||||
|
import type { User } from './auth';
|
||||||
|
|
||||||
|
// ---- users -----------------------------------------------------------------
|
||||||
|
|
||||||
|
export type AdminUsersPage = {
|
||||||
|
items: User[];
|
||||||
|
page: Page;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type ListAdminUsersOptions = {
|
||||||
|
search?: string;
|
||||||
|
limit?: number;
|
||||||
|
offset?: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
export async function listAdminUsers(
|
||||||
|
opts: ListAdminUsersOptions = {}
|
||||||
|
): Promise<AdminUsersPage> {
|
||||||
|
const params = new URLSearchParams();
|
||||||
|
if (opts.search) params.set('search', opts.search);
|
||||||
|
if (opts.limit != null) params.set('limit', String(opts.limit));
|
||||||
|
if (opts.offset != null) params.set('offset', String(opts.offset));
|
||||||
|
const qs = params.toString();
|
||||||
|
return request<AdminUsersPage>(`/v1/admin/users${qs ? `?${qs}` : ''}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function deleteAdminUser(id: string): Promise<void> {
|
||||||
|
await request<void>(`/v1/admin/users/${encodeURIComponent(id)}`, {
|
||||||
|
method: 'DELETE'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function setUserAdmin(id: string, isAdmin: boolean): Promise<User> {
|
||||||
|
return request<User>(`/v1/admin/users/${encodeURIComponent(id)}`, {
|
||||||
|
method: 'PATCH',
|
||||||
|
headers: { 'content-type': 'application/json' },
|
||||||
|
body: JSON.stringify({ is_admin: isAdmin })
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export type CreateAdminUserInput = {
|
||||||
|
username: string;
|
||||||
|
password: string;
|
||||||
|
is_admin?: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
/** POST /v1/admin/users — admin-initiated account creation. Works
|
||||||
|
* regardless of the ALLOW_SELF_REGISTER toggle, since the entire
|
||||||
|
* point is for an admin to enroll someone when self-register is off. */
|
||||||
|
export async function createAdminUser(input: CreateAdminUserInput): Promise<User> {
|
||||||
|
return request<User>('/v1/admin/users', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'content-type': 'application/json' },
|
||||||
|
body: JSON.stringify(input)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- mangas / chapters with sync state -------------------------------------
|
||||||
|
|
||||||
|
export type MangaSyncState = 'in_progress' | 'dropped' | 'synced';
|
||||||
|
|
||||||
|
export type AdminMangaRow = {
|
||||||
|
id: string;
|
||||||
|
title: string;
|
||||||
|
status: string;
|
||||||
|
cover_image_path: string | null;
|
||||||
|
created_at: string;
|
||||||
|
updated_at: string;
|
||||||
|
sync_state: MangaSyncState;
|
||||||
|
chapter_count: number;
|
||||||
|
latest_seen_at: string | null;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type AdminMangasPage = {
|
||||||
|
items: AdminMangaRow[];
|
||||||
|
page: Page;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type ListAdminMangasOptions = {
|
||||||
|
search?: string;
|
||||||
|
syncState?: MangaSyncState;
|
||||||
|
limit?: number;
|
||||||
|
offset?: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
export async function listAdminMangas(
|
||||||
|
opts: ListAdminMangasOptions = {}
|
||||||
|
): Promise<AdminMangasPage> {
|
||||||
|
const params = new URLSearchParams();
|
||||||
|
if (opts.search) params.set('search', opts.search);
|
||||||
|
if (opts.syncState) params.set('sync_state', opts.syncState);
|
||||||
|
if (opts.limit != null) params.set('limit', String(opts.limit));
|
||||||
|
if (opts.offset != null) params.set('offset', String(opts.offset));
|
||||||
|
const qs = params.toString();
|
||||||
|
return request<AdminMangasPage>(`/v1/admin/mangas${qs ? `?${qs}` : ''}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
export type ChapterSyncState =
|
||||||
|
| 'downloading'
|
||||||
|
| 'dropped'
|
||||||
|
| 'failed'
|
||||||
|
| 'not_downloaded'
|
||||||
|
| 'synced';
|
||||||
|
|
||||||
|
export type AdminChapterRow = {
|
||||||
|
id: string;
|
||||||
|
manga_id: string;
|
||||||
|
number: number;
|
||||||
|
title: string | null;
|
||||||
|
page_count: number;
|
||||||
|
created_at: string;
|
||||||
|
sync_state: ChapterSyncState;
|
||||||
|
latest_seen_at: string | null;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type AdminChaptersPage = {
|
||||||
|
items: AdminChapterRow[];
|
||||||
|
page: Page;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type ListAdminChaptersOptions = {
|
||||||
|
limit?: number;
|
||||||
|
offset?: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
export async function listAdminChapters(
|
||||||
|
mangaId: string,
|
||||||
|
opts: ListAdminChaptersOptions = {}
|
||||||
|
): Promise<AdminChaptersPage> {
|
||||||
|
const params = new URLSearchParams();
|
||||||
|
if (opts.limit != null) params.set('limit', String(opts.limit));
|
||||||
|
if (opts.offset != null) params.set('offset', String(opts.offset));
|
||||||
|
const qs = params.toString();
|
||||||
|
return request<AdminChaptersPage>(
|
||||||
|
`/v1/admin/mangas/${encodeURIComponent(mangaId)}/chapters${qs ? `?${qs}` : ''}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- system ----------------------------------------------------------------
|
||||||
|
|
||||||
|
export type DiskStats = {
|
||||||
|
total_bytes: number;
|
||||||
|
used_bytes: number;
|
||||||
|
free_bytes: number;
|
||||||
|
percent_used: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type MemoryStats = {
|
||||||
|
total_bytes: number;
|
||||||
|
used_bytes: number;
|
||||||
|
percent_used: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type CpuStats = {
|
||||||
|
percent_used: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type Alert = {
|
||||||
|
level: 'warning';
|
||||||
|
message: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type SystemStats = {
|
||||||
|
disk: DiskStats | null;
|
||||||
|
memory: MemoryStats;
|
||||||
|
cpu: CpuStats;
|
||||||
|
alerts: Alert[];
|
||||||
|
};
|
||||||
|
|
||||||
|
export async function getSystemStats(): Promise<SystemStats> {
|
||||||
|
return request<SystemStats>('/v1/admin/system');
|
||||||
|
}
|
||||||
@@ -14,7 +14,8 @@ import {
|
|||||||
me,
|
me,
|
||||||
changePassword,
|
changePassword,
|
||||||
createToken,
|
createToken,
|
||||||
deleteToken
|
deleteToken,
|
||||||
|
getAuthConfig
|
||||||
} from './auth';
|
} from './auth';
|
||||||
|
|
||||||
function ok(body: unknown, status = 200): Response {
|
function ok(body: unknown, status = 200): Response {
|
||||||
@@ -94,6 +95,11 @@ describe('auth api client', () => {
|
|||||||
expect(url).toMatch(/\/v1\/auth\/logout$/);
|
expect(url).toMatch(/\/v1\/auth\/logout$/);
|
||||||
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||||
expect(init.method).toBe('POST');
|
expect(init.method).toBe('POST');
|
||||||
|
// Consistent content-type for all mutation requests, matching
|
||||||
|
// the rest of the module — axum doesn't require it but the
|
||||||
|
// header keeps the request style uniform.
|
||||||
|
const headers = new Headers(init.headers);
|
||||||
|
expect(headers.get('content-type')).toBe('application/json');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('me returns the user on 200', async () => {
|
it('me returns the user on 200', async () => {
|
||||||
@@ -164,6 +170,17 @@ describe('auth api client', () => {
|
|||||||
expect(url).toMatch(/\/v1\/auth\/tokens$/);
|
expect(url).toMatch(/\/v1\/auth\/tokens$/);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('getAuthConfig GETs /v1/auth/config and parses the flag', async () => {
|
||||||
|
fetchSpy.mockResolvedValueOnce(ok({ self_register_enabled: false }));
|
||||||
|
const cfg = await getAuthConfig();
|
||||||
|
expect(cfg.self_register_enabled).toBe(false);
|
||||||
|
const url = fetchSpy.mock.calls[0][0] as string;
|
||||||
|
expect(url).toMatch(/\/v1\/auth\/config$/);
|
||||||
|
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||||
|
// Public endpoint; no method override means default GET.
|
||||||
|
expect(init?.method ?? 'GET').toBe('GET');
|
||||||
|
});
|
||||||
|
|
||||||
it('deleteToken DELETEs to /v1/auth/tokens/{id} and handles 204', async () => {
|
it('deleteToken DELETEs to /v1/auth/tokens/{id} and handles 204', async () => {
|
||||||
fetchSpy.mockResolvedValueOnce(noContent());
|
fetchSpy.mockResolvedValueOnce(noContent());
|
||||||
await expect(deleteToken('t1')).resolves.toBeUndefined();
|
await expect(deleteToken('t1')).resolves.toBeUndefined();
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ export type User = {
|
|||||||
id: string;
|
id: string;
|
||||||
username: string;
|
username: string;
|
||||||
created_at: string;
|
created_at: string;
|
||||||
|
is_admin: boolean;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type Credentials = {
|
export type Credentials = {
|
||||||
@@ -32,7 +33,14 @@ export async function login(creds: Credentials): Promise<User> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export async function logout(): Promise<void> {
|
export async function logout(): Promise<void> {
|
||||||
await request<void>('/v1/auth/logout', { method: 'POST' });
|
await request<void>('/v1/auth/logout', {
|
||||||
|
method: 'POST',
|
||||||
|
// Consistent with the other POST/PATCH helpers in this module.
|
||||||
|
// axum doesn't require it (no body), but keeping the header
|
||||||
|
// on every mutation request avoids the false-flag in logs and
|
||||||
|
// matches the project's style.
|
||||||
|
headers: { 'content-type': 'application/json' }
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
export type ChangePassword = {
|
export type ChangePassword = {
|
||||||
@@ -92,3 +100,15 @@ export async function createToken(name: string): Promise<CreatedToken> {
|
|||||||
export async function deleteToken(id: string): Promise<void> {
|
export async function deleteToken(id: string): Promise<void> {
|
||||||
await request<void>(`/v1/auth/tokens/${encodeURIComponent(id)}`, { method: 'DELETE' });
|
await request<void>(`/v1/auth/tokens/${encodeURIComponent(id)}`, { method: 'DELETE' });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export type AuthConfig = {
|
||||||
|
/** When false, /v1/auth/register returns 403 and the UI should
|
||||||
|
* hide its register affordance. Admins can still mint accounts
|
||||||
|
* via POST /v1/admin/users. */
|
||||||
|
self_register_enabled: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Public — no auth, no cookie required. */
|
||||||
|
export async function getAuthConfig(): Promise<AuthConfig> {
|
||||||
|
return request<AuthConfig>('/v1/auth/config');
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import { describe, it, expect, vi, beforeEach, afterEach, type MockInstance } from 'vitest';
|
import { describe, it, expect, vi, beforeEach, afterEach, type MockInstance } from 'vitest';
|
||||||
import { ApiError, request } from './client';
|
import { ApiError, request, setOn401Hook } from './client';
|
||||||
import { getManga } from './mangas';
|
import { getManga } from './mangas';
|
||||||
|
|
||||||
describe('request error envelope parsing', () => {
|
describe('request error envelope parsing', () => {
|
||||||
@@ -73,3 +73,88 @@ describe('request error envelope parsing', () => {
|
|||||||
expect(err.code).toBe('http_error');
|
expect(err.code).toBe('http_error');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('on401 hook', () => {
|
||||||
|
let fetchSpy: MockInstance<typeof globalThis.fetch>;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
fetchSpy = vi.spyOn(globalThis, 'fetch');
|
||||||
|
});
|
||||||
|
afterEach(() => {
|
||||||
|
vi.restoreAllMocks();
|
||||||
|
// Critical: reset the module-level hook between tests so a
|
||||||
|
// hook installed by one test doesn't leak into the next.
|
||||||
|
setOn401Hook(null);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('invokes the hook exactly once on a 401 response and re-throws', async () => {
|
||||||
|
const hook = vi.fn();
|
||||||
|
setOn401Hook(hook);
|
||||||
|
fetchSpy.mockResolvedValueOnce(
|
||||||
|
new Response(
|
||||||
|
JSON.stringify({ error: { code: 'unauthenticated', message: 'no auth' } }),
|
||||||
|
{ status: 401, headers: { 'content-type': 'application/json' } }
|
||||||
|
)
|
||||||
|
);
|
||||||
|
await expect(getManga('x')).rejects.toMatchObject({
|
||||||
|
status: 401,
|
||||||
|
code: 'unauthenticated'
|
||||||
|
});
|
||||||
|
expect(hook).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not invoke the hook on non-401 errors', async () => {
|
||||||
|
const hook = vi.fn();
|
||||||
|
setOn401Hook(hook);
|
||||||
|
fetchSpy.mockResolvedValueOnce(
|
||||||
|
new Response(
|
||||||
|
JSON.stringify({ error: { code: 'not_found', message: 'no' } }),
|
||||||
|
{ status: 404, headers: { 'content-type': 'application/json' } }
|
||||||
|
)
|
||||||
|
);
|
||||||
|
await expect(getManga('x')).rejects.toMatchObject({ status: 404 });
|
||||||
|
expect(hook).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not invoke the hook on successful responses', async () => {
|
||||||
|
const hook = vi.fn();
|
||||||
|
setOn401Hook(hook);
|
||||||
|
fetchSpy.mockResolvedValueOnce(
|
||||||
|
new Response(
|
||||||
|
JSON.stringify({
|
||||||
|
id: 'm1',
|
||||||
|
title: 't',
|
||||||
|
status: 'ongoing',
|
||||||
|
alt_titles: [],
|
||||||
|
description: null,
|
||||||
|
cover_image_path: null,
|
||||||
|
created_at: '2026-01-01T00:00:00Z',
|
||||||
|
updated_at: '2026-01-01T00:00:00Z',
|
||||||
|
authors: [],
|
||||||
|
genres: [],
|
||||||
|
tags: []
|
||||||
|
}),
|
||||||
|
{ status: 200, headers: { 'content-type': 'application/json' } }
|
||||||
|
)
|
||||||
|
);
|
||||||
|
await getManga('m1');
|
||||||
|
expect(hook).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('swallows hook exceptions so the original ApiError still propagates', async () => {
|
||||||
|
const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
|
||||||
|
setOn401Hook(() => {
|
||||||
|
throw new Error('hook boom');
|
||||||
|
});
|
||||||
|
fetchSpy.mockResolvedValueOnce(
|
||||||
|
new Response(
|
||||||
|
JSON.stringify({ error: { code: 'unauthenticated', message: 'x' } }),
|
||||||
|
{ status: 401, headers: { 'content-type': 'application/json' } }
|
||||||
|
)
|
||||||
|
);
|
||||||
|
await expect(getManga('x')).rejects.toMatchObject({ status: 401 });
|
||||||
|
// The original ApiError won — the hook's panic was logged but
|
||||||
|
// didn't replace the API error.
|
||||||
|
expect(consoleSpy).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -25,6 +25,21 @@ export class ApiError extends Error {
|
|||||||
|
|
||||||
type ErrorEnvelope = { error?: { code?: unknown; message?: unknown } };
|
type ErrorEnvelope = { error?: { code?: unknown; message?: unknown } };
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Optional hook fired the first moment `request()` observes a 401 on
|
||||||
|
* any endpoint. Used by the session store to clear the cached user
|
||||||
|
* when the server reports the session is no longer valid (expired
|
||||||
|
* cookie, rotated server-side, password changed on another device).
|
||||||
|
*
|
||||||
|
* Set to `null` (or `undefined`) to disable. Tests that don't want
|
||||||
|
* the side effect should leave it unset.
|
||||||
|
*/
|
||||||
|
let on401Hook: (() => void) | null = null;
|
||||||
|
|
||||||
|
export function setOn401Hook(handler: (() => void) | null): void {
|
||||||
|
on401Hook = handler;
|
||||||
|
}
|
||||||
|
|
||||||
export async function request<T>(path: string, init?: RequestInit): Promise<T> {
|
export async function request<T>(path: string, init?: RequestInit): Promise<T> {
|
||||||
// Forward credentials (session cookie) explicitly so cross-origin
|
// Forward credentials (session cookie) explicitly so cross-origin
|
||||||
// deployments — those configured via CORS_ALLOWED_ORIGINS — keep
|
// deployments — those configured via CORS_ALLOWED_ORIGINS — keep
|
||||||
@@ -54,6 +69,16 @@ export async function request<T>(path: string, init?: RequestInit): Promise<T> {
|
|||||||
} catch {
|
} catch {
|
||||||
// Body wasn't parseable; keep the http_error fallback.
|
// Body wasn't parseable; keep the http_error fallback.
|
||||||
}
|
}
|
||||||
|
if (res.status === 401 && on401Hook) {
|
||||||
|
// Fire before throwing so the session store updates even
|
||||||
|
// if the caller swallows the ApiError (e.g. the *OrEmpty
|
||||||
|
// wrappers used by guest-rendering pages).
|
||||||
|
try {
|
||||||
|
on401Hook();
|
||||||
|
} catch (e) {
|
||||||
|
console.error('on401 hook threw:', e);
|
||||||
|
}
|
||||||
|
}
|
||||||
throw new ApiError(res.status, code, message);
|
throw new ApiError(res.status, code, message);
|
||||||
}
|
}
|
||||||
// Any empty body (not just 204) returns undefined — the manga-add
|
// Any empty body (not just 204) returns undefined — the manga-add
|
||||||
|
|||||||
37
frontend/src/lib/auth-config.svelte.ts
Normal file
37
frontend/src/lib/auth-config.svelte.ts
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
// Anonymous-relevant auth policy (currently just whether self-
|
||||||
|
// registration is enabled). Loaded once per browser session on root-
|
||||||
|
// layout mount, then read reactively from `authConfig.self_register_enabled`.
|
||||||
|
//
|
||||||
|
// Defaults to `self_register_enabled = true` while loading so the
|
||||||
|
// register link doesn't flash off-and-on for the default-open case.
|
||||||
|
// If the fetch fails (network blip, backend restart), the stale value
|
||||||
|
// is kept — there's no per-request retry. A new tab will retry on its
|
||||||
|
// own mount.
|
||||||
|
//
|
||||||
|
// Same browser-only contract as `session.svelte.ts` — see that file's
|
||||||
|
// SSR comment.
|
||||||
|
|
||||||
|
import { browser } from '$app/environment';
|
||||||
|
import { getAuthConfig } from './api/auth';
|
||||||
|
|
||||||
|
class AuthConfigStore {
|
||||||
|
self_register_enabled = $state(true);
|
||||||
|
loaded = $state(false);
|
||||||
|
private loading = false;
|
||||||
|
|
||||||
|
async load(): Promise<void> {
|
||||||
|
if (this.loaded || this.loading || !browser) return;
|
||||||
|
this.loading = true;
|
||||||
|
try {
|
||||||
|
const cfg = await getAuthConfig();
|
||||||
|
this.self_register_enabled = cfg.self_register_enabled;
|
||||||
|
this.loaded = true;
|
||||||
|
} catch {
|
||||||
|
// Keep optimistic default; next page mount will retry.
|
||||||
|
} finally {
|
||||||
|
this.loading = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const authConfig = new AuthConfigStore();
|
||||||
@@ -3,7 +3,17 @@
|
|||||||
// Only mutated client-side (onMount / form submits) so the module-level
|
// Only mutated client-side (onMount / form submits) so the module-level
|
||||||
// instance can't leak across SSR requests — SSR always renders the
|
// instance can't leak across SSR requests — SSR always renders the
|
||||||
// `loaded === false` state, and the client refreshes after hydration.
|
// `loaded === false` state, and the client refreshes after hydration.
|
||||||
|
//
|
||||||
|
// IMPORTANT: do not call any `api/*` helper from `+page.server.ts` /
|
||||||
|
// `+layout.server.ts`. The `setOn401Hook` below is registered at
|
||||||
|
// module load (gated on `browser`, so it only fires in the client
|
||||||
|
// bundle), so a 401 from a server-side fetch would mutate this
|
||||||
|
// module-level `session.user` across SvelteKit requests — a real
|
||||||
|
// cross-request state leak. The `if (browser)` guard makes that
|
||||||
|
// failure mode mechanical rather than convention-based.
|
||||||
|
|
||||||
|
import { browser } from '$app/environment';
|
||||||
|
import { setOn401Hook } from './api/client';
|
||||||
import { me, type User } from './api/auth';
|
import { me, type User } from './api/auth';
|
||||||
|
|
||||||
class SessionStore {
|
class SessionStore {
|
||||||
@@ -31,3 +41,16 @@ class SessionStore {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export const session = new SessionStore();
|
export const session = new SessionStore();
|
||||||
|
|
||||||
|
// When any backend call returns 401, drop the cached user. Before this
|
||||||
|
// hook, the `*OrEmpty` wrappers silently returned empty pages on 401
|
||||||
|
// — so a mid-session expiry left the UI rendering as "logged in but
|
||||||
|
// no bookmarks/collections/etc." until the user manually reloaded.
|
||||||
|
// With the hook the session.user reactive store flips to null on the
|
||||||
|
// first 401, so the layout re-renders the login affordance.
|
||||||
|
//
|
||||||
|
// Gated on `browser` so it's only installed in the client bundle.
|
||||||
|
// See the module-level comment above for the SSR rationale.
|
||||||
|
if (browser) {
|
||||||
|
setOn401Hook(() => session.setUser(null));
|
||||||
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
import { onMount, onDestroy } from 'svelte';
|
import { onMount, onDestroy } from 'svelte';
|
||||||
import { goto } from '$app/navigation';
|
import { goto } from '$app/navigation';
|
||||||
import { logout } from '$lib/api/auth';
|
import { logout } from '$lib/api/auth';
|
||||||
|
import { authConfig } from '$lib/auth-config.svelte';
|
||||||
import { preferences } from '$lib/preferences.svelte';
|
import { preferences } from '$lib/preferences.svelte';
|
||||||
import { session } from '$lib/session.svelte';
|
import { session } from '$lib/session.svelte';
|
||||||
import { theme } from '$lib/theme.svelte';
|
import { theme } from '$lib/theme.svelte';
|
||||||
@@ -10,6 +11,7 @@
|
|||||||
import Bookmark from '@lucide/svelte/icons/bookmark';
|
import Bookmark from '@lucide/svelte/icons/bookmark';
|
||||||
import FolderOpen from '@lucide/svelte/icons/folder-open';
|
import FolderOpen from '@lucide/svelte/icons/folder-open';
|
||||||
import LogOut from '@lucide/svelte/icons/log-out';
|
import LogOut from '@lucide/svelte/icons/log-out';
|
||||||
|
import Shield from '@lucide/svelte/icons/shield';
|
||||||
import '$lib/styles/tokens.css';
|
import '$lib/styles/tokens.css';
|
||||||
|
|
||||||
let { children } = $props();
|
let { children } = $props();
|
||||||
@@ -20,6 +22,7 @@
|
|||||||
theme.init();
|
theme.init();
|
||||||
preferences.init();
|
preferences.init();
|
||||||
if (!session.loaded) session.refresh();
|
if (!session.loaded) session.refresh();
|
||||||
|
if (!authConfig.loaded) authConfig.load();
|
||||||
|
|
||||||
// Publish the header's measured height as a CSS custom
|
// Publish the header's measured height as a CSS custom
|
||||||
// property so sticky descendants (e.g. the reader nav) can
|
// property so sticky descendants (e.g. the reader nav) can
|
||||||
@@ -86,6 +89,12 @@
|
|||||||
<FolderOpen size={18} aria-hidden="true" />
|
<FolderOpen size={18} aria-hidden="true" />
|
||||||
<span>Collections</span>
|
<span>Collections</span>
|
||||||
</a>
|
</a>
|
||||||
|
{#if session.user?.is_admin}
|
||||||
|
<a class="nav-link" href="/admin" data-testid="nav-admin">
|
||||||
|
<Shield size={18} aria-hidden="true" />
|
||||||
|
<span>Admin</span>
|
||||||
|
</a>
|
||||||
|
{/if}
|
||||||
</nav>
|
</nav>
|
||||||
<div class="session" data-testid="session-area">
|
<div class="session" data-testid="session-area">
|
||||||
{#if !session.loaded}
|
{#if !session.loaded}
|
||||||
@@ -108,8 +117,10 @@
|
|||||||
</button>
|
</button>
|
||||||
{:else}
|
{:else}
|
||||||
<a class="text-link" href="/login" data-testid="nav-login">Login</a>
|
<a class="text-link" href="/login" data-testid="nav-login">Login</a>
|
||||||
|
{#if authConfig.self_register_enabled}
|
||||||
<a class="text-link" href="/register" data-testid="nav-register">Register</a>
|
<a class="text-link" href="/register" data-testid="nav-register">Register</a>
|
||||||
{/if}
|
{/if}
|
||||||
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
|
|||||||
75
frontend/src/routes/admin/+layout.svelte
Normal file
75
frontend/src/routes/admin/+layout.svelte
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { page } from '$app/stores';
|
||||||
|
let { children } = $props();
|
||||||
|
|
||||||
|
const tabs = [
|
||||||
|
{ href: '/admin', label: 'Overview' },
|
||||||
|
{ href: '/admin/users', label: 'Users' },
|
||||||
|
{ href: '/admin/mangas', label: 'Mangas' },
|
||||||
|
{ href: '/admin/system', label: 'System' }
|
||||||
|
];
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="admin-frame">
|
||||||
|
<aside aria-label="admin">
|
||||||
|
<h2>Admin</h2>
|
||||||
|
<nav>
|
||||||
|
{#each tabs as t (t.href)}
|
||||||
|
<a
|
||||||
|
href={t.href}
|
||||||
|
class:active={$page.url.pathname === t.href}
|
||||||
|
data-testid={`admin-nav-${t.label.toLowerCase()}`}
|
||||||
|
>
|
||||||
|
{t.label}
|
||||||
|
</a>
|
||||||
|
{/each}
|
||||||
|
</nav>
|
||||||
|
</aside>
|
||||||
|
<section>
|
||||||
|
{@render children()}
|
||||||
|
</section>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
.admin-frame {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 12rem 1fr;
|
||||||
|
gap: var(--space-4);
|
||||||
|
align-items: start;
|
||||||
|
}
|
||||||
|
@media (max-width: 600px) {
|
||||||
|
.admin-frame {
|
||||||
|
grid-template-columns: 1fr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
aside {
|
||||||
|
position: sticky;
|
||||||
|
top: calc(var(--app-header-h) + var(--space-4));
|
||||||
|
}
|
||||||
|
aside h2 {
|
||||||
|
margin: 0 0 var(--space-3) 0;
|
||||||
|
font-size: var(--font-base);
|
||||||
|
color: var(--text-muted);
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.04em;
|
||||||
|
}
|
||||||
|
nav {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: var(--space-1);
|
||||||
|
}
|
||||||
|
nav a {
|
||||||
|
padding: var(--space-2) var(--space-3);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
color: var(--text);
|
||||||
|
font-size: var(--font-sm);
|
||||||
|
}
|
||||||
|
nav a:hover {
|
||||||
|
background: var(--surface-elevated);
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
nav a.active {
|
||||||
|
background: var(--surface-elevated);
|
||||||
|
font-weight: var(--weight-semibold);
|
||||||
|
}
|
||||||
|
</style>
|
||||||
31
frontend/src/routes/admin/+layout.ts
Normal file
31
frontend/src/routes/admin/+layout.ts
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
// /admin gate. The backend's RequireAdmin extractor is the actual
|
||||||
|
// security boundary — this load function just calls a tiny admin
|
||||||
|
// endpoint and translates the response into either a redirect (no
|
||||||
|
// session) or SvelteKit's framework error page (403 forbidden).
|
||||||
|
// The session.user?.is_admin check elsewhere is UX only.
|
||||||
|
//
|
||||||
|
// `ssr=false` because the session store is browser-only (see
|
||||||
|
// $lib/session.svelte.ts) — server-side load can't read the cookie
|
||||||
|
// anyway in this app's deployment shape.
|
||||||
|
|
||||||
|
import { error, redirect } from '@sveltejs/kit';
|
||||||
|
import { ApiError } from '$lib/api/client';
|
||||||
|
import { getSystemStats } from '$lib/api/admin';
|
||||||
|
import type { LayoutLoad } from './$types';
|
||||||
|
|
||||||
|
export const ssr = false;
|
||||||
|
|
||||||
|
export const load: LayoutLoad = async () => {
|
||||||
|
try {
|
||||||
|
const stats = await getSystemStats();
|
||||||
|
return { stats };
|
||||||
|
} catch (e) {
|
||||||
|
if (e instanceof ApiError && e.status === 401) {
|
||||||
|
throw redirect(302, '/login');
|
||||||
|
}
|
||||||
|
if (e instanceof ApiError && e.status === 403) {
|
||||||
|
throw error(403, 'admin access required');
|
||||||
|
}
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
};
|
||||||
104
frontend/src/routes/admin/+page.svelte
Normal file
104
frontend/src/routes/admin/+page.svelte
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import type { LayoutData } from './$types';
|
||||||
|
let { data }: { data: LayoutData } = $props();
|
||||||
|
const stats = $derived(data.stats);
|
||||||
|
|
||||||
|
function fmtPercent(n: number): string {
|
||||||
|
return `${n.toFixed(1)}%`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function fmtBytes(n: number): string {
|
||||||
|
const units = ['B', 'KiB', 'MiB', 'GiB', 'TiB'];
|
||||||
|
let i = 0;
|
||||||
|
let v = n;
|
||||||
|
while (v >= 1024 && i < units.length - 1) {
|
||||||
|
v /= 1024;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
return `${v.toFixed(1)} ${units[i]}`;
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<h1>Overview</h1>
|
||||||
|
|
||||||
|
{#if stats.alerts.length > 0}
|
||||||
|
<section class="alerts" data-testid="admin-alerts">
|
||||||
|
{#each stats.alerts as a (a.message)}
|
||||||
|
<div class="alert" data-level={a.level}>{a.message}</div>
|
||||||
|
{/each}
|
||||||
|
</section>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
<section class="cards">
|
||||||
|
{#if stats.disk}
|
||||||
|
<article class="card">
|
||||||
|
<h3>Disk</h3>
|
||||||
|
<p class="metric">{fmtPercent(stats.disk.percent_used)}</p>
|
||||||
|
<p class="sub">{fmtBytes(stats.disk.used_bytes)} of {fmtBytes(stats.disk.total_bytes)} used</p>
|
||||||
|
</article>
|
||||||
|
{:else}
|
||||||
|
<article class="card muted">
|
||||||
|
<h3>Disk</h3>
|
||||||
|
<p>n/a (non-local storage)</p>
|
||||||
|
</article>
|
||||||
|
{/if}
|
||||||
|
<article class="card">
|
||||||
|
<h3>Memory</h3>
|
||||||
|
<p class="metric">{fmtPercent(stats.memory.percent_used)}</p>
|
||||||
|
<p class="sub">{fmtBytes(stats.memory.used_bytes)} of {fmtBytes(stats.memory.total_bytes)} used</p>
|
||||||
|
</article>
|
||||||
|
<article class="card">
|
||||||
|
<h3>CPU</h3>
|
||||||
|
<p class="metric">{fmtPercent(stats.cpu.percent_used)}</p>
|
||||||
|
<p class="sub">global load</p>
|
||||||
|
</article>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
h1 {
|
||||||
|
margin: 0 0 var(--space-4) 0;
|
||||||
|
}
|
||||||
|
.alerts {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: var(--space-2);
|
||||||
|
margin-bottom: var(--space-4);
|
||||||
|
}
|
||||||
|
.alert {
|
||||||
|
padding: var(--space-3);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
background: var(--surface-elevated);
|
||||||
|
border-left: 4px solid var(--warning, #f59e0b);
|
||||||
|
}
|
||||||
|
.cards {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(14rem, 1fr));
|
||||||
|
gap: var(--space-3);
|
||||||
|
}
|
||||||
|
.card {
|
||||||
|
padding: var(--space-3);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
background: var(--surface);
|
||||||
|
}
|
||||||
|
.card.muted {
|
||||||
|
opacity: 0.6;
|
||||||
|
}
|
||||||
|
.card h3 {
|
||||||
|
margin: 0 0 var(--space-2) 0;
|
||||||
|
font-size: var(--font-sm);
|
||||||
|
color: var(--text-muted);
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.04em;
|
||||||
|
}
|
||||||
|
.metric {
|
||||||
|
margin: 0;
|
||||||
|
font-size: var(--font-xl, 1.5rem);
|
||||||
|
font-weight: var(--weight-semibold);
|
||||||
|
}
|
||||||
|
.sub {
|
||||||
|
margin: var(--space-1) 0 0 0;
|
||||||
|
font-size: var(--font-sm);
|
||||||
|
color: var(--text-muted);
|
||||||
|
}
|
||||||
|
</style>
|
||||||
280
frontend/src/routes/admin/mangas/+page.svelte
Normal file
280
frontend/src/routes/admin/mangas/+page.svelte
Normal file
@@ -0,0 +1,280 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { onMount } from 'svelte';
|
||||||
|
import {
|
||||||
|
listAdminMangas,
|
||||||
|
listAdminChapters,
|
||||||
|
type AdminMangasPage,
|
||||||
|
type AdminChapterRow,
|
||||||
|
type MangaSyncState
|
||||||
|
} from '$lib/api/admin';
|
||||||
|
import { ApiError } from '$lib/api/client';
|
||||||
|
|
||||||
|
let mangasPage: AdminMangasPage | null = $state(null);
|
||||||
|
let search = $state('');
|
||||||
|
let syncFilter: MangaSyncState | '' = $state('');
|
||||||
|
let error: string | null = $state(null);
|
||||||
|
let expandedId: string | null = $state(null);
|
||||||
|
type ChaptersView = {
|
||||||
|
items: AdminChapterRow[];
|
||||||
|
total: number;
|
||||||
|
};
|
||||||
|
let chaptersByManga: Record<string, ChaptersView | 'loading'> = $state({});
|
||||||
|
|
||||||
|
async function load() {
|
||||||
|
error = null;
|
||||||
|
try {
|
||||||
|
mangasPage = await listAdminMangas({
|
||||||
|
search: search.trim() || undefined,
|
||||||
|
syncState: syncFilter || undefined,
|
||||||
|
limit: 100
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
error = e instanceof ApiError ? e.message : 'load failed';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
onMount(load);
|
||||||
|
|
||||||
|
async function toggleChapters(id: string) {
|
||||||
|
if (expandedId === id) {
|
||||||
|
expandedId = null;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
expandedId = id;
|
||||||
|
if (!chaptersByManga[id]) {
|
||||||
|
chaptersByManga[id] = 'loading';
|
||||||
|
try {
|
||||||
|
const resp = await listAdminChapters(id, { limit: 500 });
|
||||||
|
chaptersByManga[id] = {
|
||||||
|
items: resp.items,
|
||||||
|
total: resp.page.total ?? resp.items.length
|
||||||
|
};
|
||||||
|
} catch {
|
||||||
|
delete chaptersByManga[id];
|
||||||
|
error = 'failed to load chapters';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function badgeClass(state: string): string {
|
||||||
|
return `badge badge-${state}`;
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<h1>Mangas</h1>
|
||||||
|
|
||||||
|
<form
|
||||||
|
onsubmit={(e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
load();
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<input
|
||||||
|
type="search"
|
||||||
|
placeholder="search by title"
|
||||||
|
bind:value={search}
|
||||||
|
data-testid="admin-mangas-search"
|
||||||
|
/>
|
||||||
|
<select bind:value={syncFilter} aria-label="sync state">
|
||||||
|
<option value="">all states</option>
|
||||||
|
<option value="in_progress">in progress</option>
|
||||||
|
<option value="dropped">dropped</option>
|
||||||
|
<option value="synced">synced</option>
|
||||||
|
</select>
|
||||||
|
<button type="submit">Search</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
{#if error}
|
||||||
|
<p class="error" role="alert">{error}</p>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
{#if mangasPage}
|
||||||
|
<p class="total">{mangasPage.page.total ?? mangasPage.items.length} mangas</p>
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Title</th>
|
||||||
|
<th>Sync</th>
|
||||||
|
<th>Chapters</th>
|
||||||
|
<th>Last seen</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{#each mangasPage.items as m (m.id)}
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<button class="link" onclick={() => toggleChapters(m.id)}>
|
||||||
|
{expandedId === m.id ? '▼' : '▶'} {m.title}
|
||||||
|
</button>
|
||||||
|
</td>
|
||||||
|
<td><span class={badgeClass(m.sync_state)}>{m.sync_state}</span></td>
|
||||||
|
<td>{m.chapter_count}</td>
|
||||||
|
<td>
|
||||||
|
{m.latest_seen_at
|
||||||
|
? new Date(m.latest_seen_at).toLocaleDateString()
|
||||||
|
: '—'}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{#if expandedId === m.id}
|
||||||
|
<tr class="chapter-row">
|
||||||
|
<td colspan="4">
|
||||||
|
{#if chaptersByManga[m.id] === 'loading'}
|
||||||
|
<p>Loading chapters…</p>
|
||||||
|
{:else if chaptersByManga[m.id]}
|
||||||
|
{@const view = chaptersByManga[m.id] as ChaptersView}
|
||||||
|
{#if view.items.length === 0}
|
||||||
|
<p class="muted">No chapters.</p>
|
||||||
|
{:else}
|
||||||
|
{#if view.total > view.items.length}
|
||||||
|
<p class="muted">
|
||||||
|
Showing first {view.items.length} of {view.total}
|
||||||
|
chapters (cap reached).
|
||||||
|
</p>
|
||||||
|
{/if}
|
||||||
|
<table class="inner">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>#</th>
|
||||||
|
<th>Title</th>
|
||||||
|
<th>Pages</th>
|
||||||
|
<th>Sync</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{#each view.items as c (c.id)}
|
||||||
|
<tr>
|
||||||
|
<td>{c.number}</td>
|
||||||
|
<td>{c.title ?? '—'}</td>
|
||||||
|
<td>{c.page_count}</td>
|
||||||
|
<td>
|
||||||
|
<span class={badgeClass(c.sync_state)}>
|
||||||
|
{c.sync_state}
|
||||||
|
</span>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{/each}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{/if}
|
||||||
|
{/if}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{/if}
|
||||||
|
{/each}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{:else}
|
||||||
|
<p>Loading…</p>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
<style>
|
||||||
|
h1 {
|
||||||
|
margin: 0 0 var(--space-4) 0;
|
||||||
|
}
|
||||||
|
form {
|
||||||
|
display: flex;
|
||||||
|
gap: var(--space-2);
|
||||||
|
margin-bottom: var(--space-3);
|
||||||
|
}
|
||||||
|
input[type='search'] {
|
||||||
|
flex: 1;
|
||||||
|
padding: var(--space-2) var(--space-3);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
background: var(--surface);
|
||||||
|
color: var(--text);
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
padding: var(--space-2) var(--space-3);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
background: var(--surface);
|
||||||
|
color: var(--text);
|
||||||
|
}
|
||||||
|
button {
|
||||||
|
padding: var(--space-2) var(--space-3);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
background: var(--surface);
|
||||||
|
color: var(--text);
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
button.link {
|
||||||
|
background: none;
|
||||||
|
border: none;
|
||||||
|
padding: 0;
|
||||||
|
color: var(--text);
|
||||||
|
cursor: pointer;
|
||||||
|
font-weight: inherit;
|
||||||
|
}
|
||||||
|
button.link:hover {
|
||||||
|
color: var(--primary);
|
||||||
|
}
|
||||||
|
.total {
|
||||||
|
color: var(--text-muted);
|
||||||
|
font-size: var(--font-sm);
|
||||||
|
margin: 0 0 var(--space-2) 0;
|
||||||
|
}
|
||||||
|
table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
}
|
||||||
|
th,
|
||||||
|
td {
|
||||||
|
padding: var(--space-2);
|
||||||
|
text-align: left;
|
||||||
|
border-bottom: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
.chapter-row td {
|
||||||
|
background: var(--surface-elevated);
|
||||||
|
}
|
||||||
|
table.inner {
|
||||||
|
margin: var(--space-2) 0;
|
||||||
|
}
|
||||||
|
.badge {
|
||||||
|
display: inline-block;
|
||||||
|
padding: 0 var(--space-2);
|
||||||
|
border-radius: var(--radius-sm, 4px);
|
||||||
|
font-size: var(--font-xs, 0.75rem);
|
||||||
|
font-weight: var(--weight-semibold);
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.04em;
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
background: var(--surface);
|
||||||
|
}
|
||||||
|
.badge-in_progress,
|
||||||
|
.badge-downloading {
|
||||||
|
background: #fef3c7;
|
||||||
|
color: #92400e;
|
||||||
|
border-color: #fcd34d;
|
||||||
|
}
|
||||||
|
.badge-dropped {
|
||||||
|
background: #fee2e2;
|
||||||
|
color: #991b1b;
|
||||||
|
border-color: #fca5a5;
|
||||||
|
}
|
||||||
|
.badge-failed {
|
||||||
|
background: #fee2e2;
|
||||||
|
color: #991b1b;
|
||||||
|
border-color: #fca5a5;
|
||||||
|
}
|
||||||
|
.badge-not_downloaded {
|
||||||
|
background: var(--surface-elevated);
|
||||||
|
color: var(--text-muted);
|
||||||
|
}
|
||||||
|
.badge-synced {
|
||||||
|
background: #dcfce7;
|
||||||
|
color: #166534;
|
||||||
|
border-color: #86efac;
|
||||||
|
}
|
||||||
|
.muted {
|
||||||
|
color: var(--text-muted);
|
||||||
|
}
|
||||||
|
.error {
|
||||||
|
color: var(--danger, #dc2626);
|
||||||
|
padding: var(--space-2) var(--space-3);
|
||||||
|
border: 1px solid var(--danger, #dc2626);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
margin-bottom: var(--space-3);
|
||||||
|
}
|
||||||
|
</style>
|
||||||
203
frontend/src/routes/admin/system/+page.svelte
Normal file
203
frontend/src/routes/admin/system/+page.svelte
Normal file
@@ -0,0 +1,203 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { onMount, onDestroy } from 'svelte';
|
||||||
|
import { getSystemStats, type SystemStats } from '$lib/api/admin';
|
||||||
|
|
||||||
|
let stats: SystemStats | null = $state(null);
|
||||||
|
let error: string | null = $state(null);
|
||||||
|
let timer: ReturnType<typeof setInterval> | null = null;
|
||||||
|
|
||||||
|
async function refresh() {
|
||||||
|
try {
|
||||||
|
stats = await getSystemStats();
|
||||||
|
error = null;
|
||||||
|
} catch (e) {
|
||||||
|
error = e instanceof Error ? e.message : 'refresh failed';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
onMount(() => {
|
||||||
|
refresh();
|
||||||
|
timer = setInterval(refresh, 5000);
|
||||||
|
});
|
||||||
|
onDestroy(() => {
|
||||||
|
if (timer) clearInterval(timer);
|
||||||
|
});
|
||||||
|
|
||||||
|
function fmtBytes(n: number): string {
|
||||||
|
const units = ['B', 'KiB', 'MiB', 'GiB', 'TiB'];
|
||||||
|
let i = 0;
|
||||||
|
let v = n;
|
||||||
|
while (v >= 1024 && i < units.length - 1) {
|
||||||
|
v /= 1024;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
return `${v.toFixed(2)} ${units[i]}`;
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<h1>System</h1>
|
||||||
|
|
||||||
|
{#if error}
|
||||||
|
<p class="error" role="alert">{error}</p>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
{#if stats}
|
||||||
|
{#if stats.alerts.length > 0}
|
||||||
|
<section class="alerts">
|
||||||
|
{#each stats.alerts as a (a.message)}
|
||||||
|
<div class="alert">{a.message}</div>
|
||||||
|
{/each}
|
||||||
|
</section>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
<section class="grid">
|
||||||
|
<article>
|
||||||
|
<h2>Disk (storage_dir)</h2>
|
||||||
|
{#if stats.disk}
|
||||||
|
{@render Bar({ percent: stats.disk.percent_used })}
|
||||||
|
<dl>
|
||||||
|
<dt>Total</dt>
|
||||||
|
<dd>{fmtBytes(stats.disk.total_bytes)}</dd>
|
||||||
|
<dt>Used</dt>
|
||||||
|
<dd>{fmtBytes(stats.disk.used_bytes)}</dd>
|
||||||
|
<dt>Free</dt>
|
||||||
|
<dd>{fmtBytes(stats.disk.free_bytes)}</dd>
|
||||||
|
</dl>
|
||||||
|
{:else}
|
||||||
|
<p class="muted">n/a — non-local storage backend</p>
|
||||||
|
{/if}
|
||||||
|
</article>
|
||||||
|
|
||||||
|
<article>
|
||||||
|
<h2>Memory</h2>
|
||||||
|
{@render Bar({ percent: stats.memory.percent_used })}
|
||||||
|
<dl>
|
||||||
|
<dt>Total</dt>
|
||||||
|
<dd>{fmtBytes(stats.memory.total_bytes)}</dd>
|
||||||
|
<dt>Used</dt>
|
||||||
|
<dd>{fmtBytes(stats.memory.used_bytes)}</dd>
|
||||||
|
</dl>
|
||||||
|
</article>
|
||||||
|
|
||||||
|
<article>
|
||||||
|
<h2>CPU</h2>
|
||||||
|
{@render Bar({ percent: stats.cpu.percent_used })}
|
||||||
|
</article>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<p class="hint">refreshing every 5 s</p>
|
||||||
|
{:else}
|
||||||
|
<p>Loading…</p>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
{#snippet Bar({ percent }: { percent: number })}
|
||||||
|
<div
|
||||||
|
class="bar"
|
||||||
|
role="progressbar"
|
||||||
|
aria-valuenow={percent}
|
||||||
|
aria-valuemin="0"
|
||||||
|
aria-valuemax="100"
|
||||||
|
aria-label="{percent.toFixed(1)}% used"
|
||||||
|
>
|
||||||
|
<div
|
||||||
|
class="fill"
|
||||||
|
class:high={percent >= 90}
|
||||||
|
class:mid={percent >= 70 && percent < 90}
|
||||||
|
style:width="{Math.min(100, Math.max(0, percent))}%"
|
||||||
|
></div>
|
||||||
|
<span class="label">{percent.toFixed(1)}%</span>
|
||||||
|
</div>
|
||||||
|
{/snippet}
|
||||||
|
|
||||||
|
<style>
|
||||||
|
h1 {
|
||||||
|
margin: 0 0 var(--space-4) 0;
|
||||||
|
}
|
||||||
|
.grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(16rem, 1fr));
|
||||||
|
gap: var(--space-3);
|
||||||
|
}
|
||||||
|
article {
|
||||||
|
padding: var(--space-3);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
background: var(--surface);
|
||||||
|
}
|
||||||
|
article h2 {
|
||||||
|
margin: 0 0 var(--space-3) 0;
|
||||||
|
font-size: var(--font-sm);
|
||||||
|
color: var(--text-muted);
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.04em;
|
||||||
|
}
|
||||||
|
.bar {
|
||||||
|
position: relative;
|
||||||
|
background: var(--surface-elevated);
|
||||||
|
border-radius: var(--radius-sm, 4px);
|
||||||
|
height: 1.5rem;
|
||||||
|
margin-bottom: var(--space-2);
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
.fill {
|
||||||
|
height: 100%;
|
||||||
|
background: #22c55e;
|
||||||
|
transition: width 0.3s ease, background 0.3s ease;
|
||||||
|
}
|
||||||
|
.fill.mid {
|
||||||
|
background: #f59e0b;
|
||||||
|
}
|
||||||
|
.fill.high {
|
||||||
|
background: #dc2626;
|
||||||
|
}
|
||||||
|
.label {
|
||||||
|
position: absolute;
|
||||||
|
top: 50%;
|
||||||
|
left: 50%;
|
||||||
|
transform: translate(-50%, -50%);
|
||||||
|
font-size: var(--font-sm);
|
||||||
|
font-weight: var(--weight-semibold);
|
||||||
|
color: var(--text);
|
||||||
|
}
|
||||||
|
dl {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: max-content 1fr;
|
||||||
|
gap: var(--space-1) var(--space-3);
|
||||||
|
margin: 0;
|
||||||
|
font-size: var(--font-sm);
|
||||||
|
}
|
||||||
|
dt {
|
||||||
|
color: var(--text-muted);
|
||||||
|
}
|
||||||
|
dd {
|
||||||
|
margin: 0;
|
||||||
|
font-family: var(--font-mono, monospace);
|
||||||
|
}
|
||||||
|
.alerts {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: var(--space-2);
|
||||||
|
margin-bottom: var(--space-4);
|
||||||
|
}
|
||||||
|
.alert {
|
||||||
|
padding: var(--space-3);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
background: var(--surface-elevated);
|
||||||
|
border-left: 4px solid #f59e0b;
|
||||||
|
}
|
||||||
|
.hint {
|
||||||
|
color: var(--text-muted);
|
||||||
|
font-size: var(--font-sm);
|
||||||
|
margin-top: var(--space-3);
|
||||||
|
}
|
||||||
|
.muted {
|
||||||
|
color: var(--text-muted);
|
||||||
|
}
|
||||||
|
.error {
|
||||||
|
color: var(--danger, #dc2626);
|
||||||
|
padding: var(--space-2) var(--space-3);
|
||||||
|
border: 1px solid var(--danger, #dc2626);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
margin-bottom: var(--space-3);
|
||||||
|
}
|
||||||
|
</style>
|
||||||
328
frontend/src/routes/admin/users/+page.svelte
Normal file
328
frontend/src/routes/admin/users/+page.svelte
Normal file
@@ -0,0 +1,328 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { onMount } from 'svelte';
|
||||||
|
import {
|
||||||
|
listAdminUsers,
|
||||||
|
deleteAdminUser,
|
||||||
|
setUserAdmin,
|
||||||
|
createAdminUser,
|
||||||
|
type AdminUsersPage
|
||||||
|
} from '$lib/api/admin';
|
||||||
|
import { ApiError } from '$lib/api/client';
|
||||||
|
import { session } from '$lib/session.svelte';
|
||||||
|
|
||||||
|
let page: AdminUsersPage | null = $state(null);
|
||||||
|
let search = $state('');
|
||||||
|
let error: string | null = $state(null);
|
||||||
|
let busyId: string | null = $state(null);
|
||||||
|
|
||||||
|
// Create-user form (collapsed by default).
|
||||||
|
let showCreate = $state(false);
|
||||||
|
let newUsername = $state('');
|
||||||
|
let newPassword = $state('');
|
||||||
|
let newIsAdmin = $state(false);
|
||||||
|
let createError: string | null = $state(null);
|
||||||
|
let creating = $state(false);
|
||||||
|
|
||||||
|
async function load() {
|
||||||
|
error = null;
|
||||||
|
try {
|
||||||
|
page = await listAdminUsers({
|
||||||
|
search: search.trim() || undefined,
|
||||||
|
limit: 100
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
error = e instanceof ApiError ? e.message : 'load failed';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
onMount(load);
|
||||||
|
|
||||||
|
async function onDelete(id: string) {
|
||||||
|
if (!confirm('Delete this user? This cannot be undone.')) return;
|
||||||
|
busyId = id;
|
||||||
|
try {
|
||||||
|
await deleteAdminUser(id);
|
||||||
|
await load();
|
||||||
|
} catch (e) {
|
||||||
|
error = e instanceof ApiError ? e.message : 'delete failed';
|
||||||
|
} finally {
|
||||||
|
busyId = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function onToggleAdmin(id: string, next: boolean) {
|
||||||
|
busyId = id;
|
||||||
|
try {
|
||||||
|
await setUserAdmin(id, next);
|
||||||
|
await load();
|
||||||
|
} catch (e) {
|
||||||
|
error = e instanceof ApiError ? e.message : 'update failed';
|
||||||
|
} finally {
|
||||||
|
busyId = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function onCreate(e: SubmitEvent) {
|
||||||
|
e.preventDefault();
|
||||||
|
createError = null;
|
||||||
|
creating = true;
|
||||||
|
try {
|
||||||
|
await createAdminUser({
|
||||||
|
username: newUsername.trim(),
|
||||||
|
password: newPassword,
|
||||||
|
is_admin: newIsAdmin
|
||||||
|
});
|
||||||
|
// Reset form + reload list so the new row is visible.
|
||||||
|
newUsername = '';
|
||||||
|
newPassword = '';
|
||||||
|
newIsAdmin = false;
|
||||||
|
showCreate = false;
|
||||||
|
await load();
|
||||||
|
} catch (e) {
|
||||||
|
createError = e instanceof ApiError ? e.message : 'create failed';
|
||||||
|
} finally {
|
||||||
|
creating = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<h1>Users</h1>
|
||||||
|
|
||||||
|
<div class="toolbar">
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onclick={() => (showCreate = !showCreate)}
|
||||||
|
data-testid="admin-users-toggle-create"
|
||||||
|
>
|
||||||
|
{showCreate ? 'Cancel' : 'Create user'}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{#if showCreate}
|
||||||
|
<form class="create-form" onsubmit={onCreate} data-testid="admin-users-create-form">
|
||||||
|
<label class="field">
|
||||||
|
<span>Username</span>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
bind:value={newUsername}
|
||||||
|
minlength="3"
|
||||||
|
maxlength="32"
|
||||||
|
required
|
||||||
|
autocomplete="off"
|
||||||
|
data-testid="admin-users-create-username"
|
||||||
|
/>
|
||||||
|
</label>
|
||||||
|
<label class="field">
|
||||||
|
<span>Password</span>
|
||||||
|
<input
|
||||||
|
type="password"
|
||||||
|
bind:value={newPassword}
|
||||||
|
minlength="8"
|
||||||
|
required
|
||||||
|
autocomplete="new-password"
|
||||||
|
data-testid="admin-users-create-password"
|
||||||
|
/>
|
||||||
|
</label>
|
||||||
|
<label class="field-inline">
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
bind:checked={newIsAdmin}
|
||||||
|
data-testid="admin-users-create-is-admin"
|
||||||
|
/>
|
||||||
|
<span>Make admin</span>
|
||||||
|
</label>
|
||||||
|
<button
|
||||||
|
type="submit"
|
||||||
|
class="primary"
|
||||||
|
disabled={creating}
|
||||||
|
data-testid="admin-users-create-submit"
|
||||||
|
>
|
||||||
|
{creating ? 'Creating…' : 'Create'}
|
||||||
|
</button>
|
||||||
|
{#if createError}
|
||||||
|
<p class="error" role="alert" data-testid="admin-users-create-error">
|
||||||
|
{createError}
|
||||||
|
</p>
|
||||||
|
{/if}
|
||||||
|
</form>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
<form
|
||||||
|
onsubmit={(e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
load();
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<input
|
||||||
|
type="search"
|
||||||
|
placeholder="search by username"
|
||||||
|
bind:value={search}
|
||||||
|
data-testid="admin-users-search"
|
||||||
|
/>
|
||||||
|
<button type="submit">Search</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
{#if error}
|
||||||
|
<p class="error" role="alert">{error}</p>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
{#if page}
|
||||||
|
<p class="total">{page.page.total ?? page.items.length} users</p>
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Username</th>
|
||||||
|
<th>Admin</th>
|
||||||
|
<th>Created</th>
|
||||||
|
<th class="actions">Actions</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{#each page.items as u (u.id)}
|
||||||
|
{@const isSelf = session.user?.id === u.id}
|
||||||
|
<tr>
|
||||||
|
<td>{u.username}{#if isSelf}<span class="self"> (you)</span>{/if}</td>
|
||||||
|
<td>
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
checked={u.is_admin}
|
||||||
|
disabled={busyId === u.id || isSelf}
|
||||||
|
onchange={(e) => onToggleAdmin(u.id, e.currentTarget.checked)}
|
||||||
|
aria-label="admin"
|
||||||
|
/>
|
||||||
|
</td>
|
||||||
|
<td>{new Date(u.created_at).toLocaleDateString()}</td>
|
||||||
|
<td class="actions">
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
class="danger"
|
||||||
|
disabled={busyId === u.id || isSelf}
|
||||||
|
onclick={() => onDelete(u.id)}
|
||||||
|
>
|
||||||
|
Delete
|
||||||
|
</button>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{/each}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{:else}
|
||||||
|
<p>Loading…</p>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
<style>
|
||||||
|
h1 {
|
||||||
|
margin: 0 0 var(--space-4) 0;
|
||||||
|
}
|
||||||
|
form {
|
||||||
|
display: flex;
|
||||||
|
gap: var(--space-2);
|
||||||
|
margin-bottom: var(--space-3);
|
||||||
|
}
|
||||||
|
input[type='search'] {
|
||||||
|
flex: 1;
|
||||||
|
padding: var(--space-2) var(--space-3);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
background: var(--surface);
|
||||||
|
color: var(--text);
|
||||||
|
}
|
||||||
|
button {
|
||||||
|
padding: var(--space-2) var(--space-3);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
background: var(--surface);
|
||||||
|
color: var(--text);
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
button.danger {
|
||||||
|
color: var(--danger, #dc2626);
|
||||||
|
border-color: var(--danger, #dc2626);
|
||||||
|
}
|
||||||
|
button:disabled {
|
||||||
|
opacity: 0.5;
|
||||||
|
cursor: not-allowed;
|
||||||
|
}
|
||||||
|
.total {
|
||||||
|
color: var(--text-muted);
|
||||||
|
font-size: var(--font-sm);
|
||||||
|
margin: 0 0 var(--space-2) 0;
|
||||||
|
}
|
||||||
|
table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
}
|
||||||
|
th,
|
||||||
|
td {
|
||||||
|
padding: var(--space-2);
|
||||||
|
text-align: left;
|
||||||
|
border-bottom: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
.actions {
|
||||||
|
text-align: right;
|
||||||
|
}
|
||||||
|
.self {
|
||||||
|
color: var(--text-muted);
|
||||||
|
font-size: var(--font-sm);
|
||||||
|
}
|
||||||
|
.error {
|
||||||
|
color: var(--danger, #dc2626);
|
||||||
|
padding: var(--space-2) var(--space-3);
|
||||||
|
border: 1px solid var(--danger, #dc2626);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
margin-bottom: var(--space-3);
|
||||||
|
}
|
||||||
|
.toolbar {
|
||||||
|
display: flex;
|
||||||
|
justify-content: flex-end;
|
||||||
|
margin-bottom: var(--space-2);
|
||||||
|
}
|
||||||
|
.create-form {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: 1fr 1fr auto auto;
|
||||||
|
gap: var(--space-3);
|
||||||
|
align-items: end;
|
||||||
|
padding: var(--space-3);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
background: var(--surface-elevated);
|
||||||
|
margin-bottom: var(--space-3);
|
||||||
|
}
|
||||||
|
@media (max-width: 600px) {
|
||||||
|
.create-form {
|
||||||
|
grid-template-columns: 1fr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.create-form .field {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: var(--space-1);
|
||||||
|
font-size: var(--font-sm);
|
||||||
|
}
|
||||||
|
.create-form .field input {
|
||||||
|
padding: var(--space-2) var(--space-3);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
background: var(--surface);
|
||||||
|
color: var(--text);
|
||||||
|
}
|
||||||
|
.create-form .field-inline {
|
||||||
|
display: inline-flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: var(--space-2);
|
||||||
|
font-size: var(--font-sm);
|
||||||
|
}
|
||||||
|
.create-form .primary {
|
||||||
|
background: var(--primary);
|
||||||
|
color: var(--primary-contrast);
|
||||||
|
border-color: var(--primary);
|
||||||
|
}
|
||||||
|
.create-form .primary:hover:not(:disabled) {
|
||||||
|
background: var(--primary-hover);
|
||||||
|
border-color: var(--primary-hover);
|
||||||
|
}
|
||||||
|
.create-form .error {
|
||||||
|
grid-column: 1 / -1;
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
@@ -350,30 +350,24 @@
|
|||||||
});
|
});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* `fetch()` initiated during `pagehide` / `beforeunload` is
|
* Flush read-progress as the tab is closing. A plain `fetch()`
|
||||||
* cancelled by every browser by default. `sendBeacon` is the
|
* during `pagehide` / `beforeunload` is cancelled by every
|
||||||
* supported way to ship a small payload during unload — it's
|
* browser; `fetch(..., { keepalive: true })` is the supported
|
||||||
* guaranteed to survive even if the tab is closing. Failure here
|
* escape hatch and survives the close.
|
||||||
* is silent because the API is fire-and-forget.
|
*
|
||||||
|
* `sendBeacon` would be the textbook alternative, but it's
|
||||||
|
* POST-only and `/me/read-progress` takes PUT — so a beacon
|
||||||
|
* always 405s, adds server-log noise, then falls through to this
|
||||||
|
* same keepalive path anyway. The beacon was dropped; the
|
||||||
|
* keepalive fetch is the only path.
|
||||||
*/
|
*/
|
||||||
function beaconFinalProgress() {
|
function flushFinalProgress() {
|
||||||
if (!session.user) return;
|
if (!session.user) return;
|
||||||
const body = JSON.stringify({
|
const body = JSON.stringify({
|
||||||
manga_id: manga.id,
|
manga_id: manga.id,
|
||||||
chapter_id: chapter.id,
|
chapter_id: chapter.id,
|
||||||
page: progressPage
|
page: progressPage
|
||||||
});
|
});
|
||||||
const blob = new Blob([body], { type: 'application/json' });
|
|
||||||
// sendBeacon only supports POST — the server's PUT route is
|
|
||||||
// strict on method. The dedicated POST alias is omitted; in
|
|
||||||
// practice the in-app navigation path (back-link, chapter
|
|
||||||
// links) already covers the common-case unmount via the
|
|
||||||
// onDestroy fetch. Fall through to fetch+keepalive for browser
|
|
||||||
// implementations that don't honor sendBeacon for this endpoint.
|
|
||||||
try {
|
|
||||||
const ok = navigator.sendBeacon('/api/v1/me/read-progress', blob);
|
|
||||||
if (!ok) throw new Error('sendBeacon rejected');
|
|
||||||
} catch {
|
|
||||||
try {
|
try {
|
||||||
void fetch('/api/v1/me/read-progress', {
|
void fetch('/api/v1/me/read-progress', {
|
||||||
method: 'PUT',
|
method: 'PUT',
|
||||||
@@ -383,21 +377,21 @@
|
|||||||
credentials: 'include'
|
credentials: 'include'
|
||||||
});
|
});
|
||||||
} catch {
|
} catch {
|
||||||
// Final fallback failed; the in-app onDestroy flush
|
// keepalive fetch was rejected (very old Firefox etc.);
|
||||||
// below catches the SPA-navigation case.
|
// the in-app onDestroy flush below catches the SPA-
|
||||||
}
|
// navigation case, which is the common one anyway.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
onMount(() => {
|
onMount(() => {
|
||||||
window.addEventListener('pagehide', beaconFinalProgress);
|
window.addEventListener('pagehide', flushFinalProgress);
|
||||||
});
|
});
|
||||||
|
|
||||||
onDestroy(() => {
|
onDestroy(() => {
|
||||||
observer?.disconnect();
|
observer?.disconnect();
|
||||||
if (progressTimer) clearTimeout(progressTimer);
|
if (progressTimer) clearTimeout(progressTimer);
|
||||||
if (typeof window !== 'undefined') {
|
if (typeof window !== 'undefined') {
|
||||||
window.removeEventListener('pagehide', beaconFinalProgress);
|
window.removeEventListener('pagehide', flushFinalProgress);
|
||||||
}
|
}
|
||||||
// Don't let the fullscreen flag leak to non-reader pages —
|
// Don't let the fullscreen flag leak to non-reader pages —
|
||||||
// otherwise the layout header would stay slid-off on /upload
|
// otherwise the layout header would stay slid-off on /upload
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
|
import { onMount } from 'svelte';
|
||||||
import { goto } from '$app/navigation';
|
import { goto } from '$app/navigation';
|
||||||
import { register } from '$lib/api/auth';
|
import { register } from '$lib/api/auth';
|
||||||
|
import { authConfig } from '$lib/auth-config.svelte';
|
||||||
import { session } from '$lib/session.svelte';
|
import { session } from '$lib/session.svelte';
|
||||||
|
|
||||||
let username = $state('');
|
let username = $state('');
|
||||||
@@ -8,6 +10,13 @@
|
|||||||
let error: string | null = $state(null);
|
let error: string | null = $state(null);
|
||||||
let submitting = $state(false);
|
let submitting = $state(false);
|
||||||
|
|
||||||
|
// Direct navigation to /register bypasses the root layout's
|
||||||
|
// onMount — re-trigger the config load here so the disabled state
|
||||||
|
// renders correctly even when this is the first page hit.
|
||||||
|
onMount(() => {
|
||||||
|
if (!authConfig.loaded) authConfig.load();
|
||||||
|
});
|
||||||
|
|
||||||
async function submit(e: SubmitEvent) {
|
async function submit(e: SubmitEvent) {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
error = null;
|
error = null;
|
||||||
@@ -25,6 +34,12 @@
|
|||||||
</script>
|
</script>
|
||||||
|
|
||||||
<h1>Register</h1>
|
<h1>Register</h1>
|
||||||
|
{#if authConfig.loaded && !authConfig.self_register_enabled}
|
||||||
|
<p class="notice" role="status" data-testid="register-disabled">
|
||||||
|
Self-registration is disabled on this server. Ask an administrator to
|
||||||
|
create an account for you.
|
||||||
|
</p>
|
||||||
|
{:else}
|
||||||
<form onsubmit={submit} action="javascript:void(0)" data-testid="register-form">
|
<form onsubmit={submit} action="javascript:void(0)" data-testid="register-form">
|
||||||
<label class="form-field">
|
<label class="form-field">
|
||||||
<span>Username</span>
|
<span>Username</span>
|
||||||
@@ -56,6 +71,7 @@
|
|||||||
<p class="form-error" role="alert" data-testid="register-error">{error}</p>
|
<p class="form-error" role="alert" data-testid="register-error">{error}</p>
|
||||||
{/if}
|
{/if}
|
||||||
</form>
|
</form>
|
||||||
|
{/if}
|
||||||
<p class="hint">
|
<p class="hint">
|
||||||
Already have an account? <a href="/login">Log in</a>.
|
Already have an account? <a href="/login">Log in</a>.
|
||||||
</p>
|
</p>
|
||||||
@@ -90,4 +106,14 @@
|
|||||||
color: var(--text-muted);
|
color: var(--text-muted);
|
||||||
font-size: var(--font-sm);
|
font-size: var(--font-sm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.notice {
|
||||||
|
padding: var(--space-3);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
background: var(--surface-elevated);
|
||||||
|
color: var(--text);
|
||||||
|
max-width: 32rem;
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|||||||
Reference in New Issue
Block a user