Compare commits
26 Commits
5e92a2c450
...
bugfix/api
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8667f8b957 | ||
|
|
e7662d18d6 | ||
|
|
45ce0d8f12 | ||
|
|
51f42b03e9 | ||
|
|
fa0a7da311 | ||
|
|
9ff49166a5 | ||
|
|
b845d88766 | ||
|
|
9fe0f26d75 | ||
|
|
93c7fd63fc | ||
|
|
89b84252a5 | ||
|
|
728d704a66 | ||
|
|
d24e68c78d | ||
|
|
51346227dd | ||
|
|
c51353ead3 | ||
|
|
b1a3a4e9d3 | ||
|
|
26eccd0abe | ||
|
|
89b8785a40 | ||
|
|
64ccc0ba84 | ||
|
|
215325ad2f | ||
|
|
7aa6e7e6d9 | ||
|
|
c95c1805df | ||
|
|
21f44cea3f | ||
|
|
58e637085d | ||
|
|
19c1276490 | ||
|
|
7560d59616 | ||
|
|
274cc819ca |
71
.gitea/README.md
Normal file
71
.gitea/README.md
Normal file
@@ -0,0 +1,71 @@
|
||||
# Gitea Actions
|
||||
|
||||
The [`deploy`](workflows/deploy.yml) workflow runs on every push to `main`
|
||||
(and via manual `workflow_dispatch`). It tests, builds, pushes the images
|
||||
to a private registry, and rolls the stack over by SSH on the target host.
|
||||
|
||||
## Required secrets
|
||||
|
||||
Set under *Repo Settings → Actions → Secrets*:
|
||||
|
||||
| Name | Example | Purpose |
|
||||
| -------------------- | ------------------------ | ---------------------------------------------------------------- |
|
||||
| `REGISTRY_URL` | `registry.example.com` | Registry host. No scheme, no trailing slash. |
|
||||
| `REGISTRY_USERNAME` | `mangalord-ci` | `docker login` user. |
|
||||
| `REGISTRY_PASSWORD` | `<token>` | `docker login` token/password. |
|
||||
| `SSH_HOST` | `mangalord.example.com` | Deploy target hostname/IP. |
|
||||
| `SSH_USER` | `deploy` | SSH user on the target (must be in the `docker` group). |
|
||||
| `SSH_PRIVATE_KEY` | `-----BEGIN OPENSSH...` | Private key authorised in the target user's `authorized_keys`. |
|
||||
| `SSH_PORT` | `22` | Optional. Defaults to `22` if unset. |
|
||||
|
||||
## Required variables
|
||||
|
||||
Set under *Repo Settings → Actions → Variables* (not secrets — they appear
|
||||
in logs):
|
||||
|
||||
| Name | Example | Purpose |
|
||||
| ------------- | ------------------------ | ---------------------------------------------------------------------- |
|
||||
| `DEPLOY_PATH` | `/srv/mangalord` | Directory on target holding `docker-compose.yml`, `.env`, and the prod overlay. |
|
||||
|
||||
## One-time host setup
|
||||
|
||||
The workflow assumes the deploy target already has:
|
||||
|
||||
1. Docker + Docker Compose v2 installed and the `SSH_USER` in the `docker` group.
|
||||
2. `$DEPLOY_PATH/docker-compose.yml` (copy of the repo's [docker-compose.yml](../docker-compose.yml)).
|
||||
3. `$DEPLOY_PATH/docker-compose.prod.yml` (copy of the repo's [docker-compose.prod.yml](../docker-compose.prod.yml)).
|
||||
4. `$DEPLOY_PATH/.env` populated from [.env.example](../.env.example) with production values (real `POSTGRES_PASSWORD`, `COOKIE_SECURE=true`, etc.).
|
||||
|
||||
Bootstrap once:
|
||||
|
||||
```bash
|
||||
ssh deploy@mangalord.example.com
|
||||
sudo mkdir -p /srv/mangalord && sudo chown deploy:deploy /srv/mangalord
|
||||
cd /srv/mangalord
|
||||
# place docker-compose.yml, docker-compose.prod.yml, and .env here
|
||||
```
|
||||
|
||||
The first workflow run will pull the images, bring the stack up, and run
|
||||
the embedded migrations on startup.
|
||||
|
||||
## Image tags
|
||||
|
||||
Every push produces three tags per image:
|
||||
|
||||
- `mangalord-{backend,frontend}:latest`
|
||||
- `mangalord-{backend,frontend}:<git-sha>` — used by the deploy job; lets
|
||||
you pin a deploy to a specific commit
|
||||
- `mangalord-{backend,frontend}:<version>` — the version from
|
||||
[backend/Cargo.toml](../backend/Cargo.toml) (verified in lockstep with
|
||||
[frontend/package.json](../frontend/package.json))
|
||||
|
||||
## Rollback
|
||||
|
||||
SSH to the target, set `IMAGE_TAG` to a previous commit SHA, and re-up:
|
||||
|
||||
```bash
|
||||
cd /srv/mangalord
|
||||
export REGISTRY_URL=registry.example.com
|
||||
export IMAGE_TAG=<previous-sha>
|
||||
docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d
|
||||
```
|
||||
144
.gitea/workflows/deploy.yml
Normal file
144
.gitea/workflows/deploy.yml
Normal file
@@ -0,0 +1,144 @@
|
||||
name: deploy
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
test-backend:
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: rust:1-slim
|
||||
services:
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
env:
|
||||
POSTGRES_USER: mangalord
|
||||
POSTGRES_PASSWORD: mangalord
|
||||
POSTGRES_DB: mangalord
|
||||
options: >-
|
||||
--health-cmd "pg_isready -U mangalord"
|
||||
--health-interval 5s
|
||||
--health-timeout 5s
|
||||
--health-retries 10
|
||||
env:
|
||||
DATABASE_URL: postgres://mangalord:mangalord@postgres:5432/mangalord
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install build deps
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends pkg-config libssl-dev ca-certificates
|
||||
- name: Cache cargo registry and target
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
~/.cargo/git
|
||||
backend/target
|
||||
key: cargo-${{ runner.os }}-${{ hashFiles('backend/Cargo.lock') }}
|
||||
restore-keys: |
|
||||
cargo-${{ runner.os }}-
|
||||
- name: cargo test
|
||||
working-directory: backend
|
||||
run: cargo test --locked
|
||||
|
||||
test-frontend:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '22'
|
||||
cache: npm
|
||||
cache-dependency-path: frontend/package-lock.json
|
||||
- name: npm ci
|
||||
working-directory: frontend
|
||||
run: npm ci
|
||||
- name: vitest
|
||||
working-directory: frontend
|
||||
run: npm test
|
||||
|
||||
build-and-push:
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-backend, test-frontend]
|
||||
outputs:
|
||||
image_tag: ${{ steps.meta.outputs.image_tag }}
|
||||
version: ${{ steps.meta.outputs.version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Resolve image tags
|
||||
id: meta
|
||||
run: |
|
||||
version="$(grep -m1 '^version' backend/Cargo.toml | cut -d'"' -f2)"
|
||||
frontend_version="$(grep -m1 '"version"' frontend/package.json | cut -d'"' -f4)"
|
||||
if [ "$version" != "$frontend_version" ]; then
|
||||
echo "Version mismatch: backend=$version frontend=$frontend_version" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "image_tag=${GITHUB_SHA}" >> "$GITHUB_OUTPUT"
|
||||
echo "version=${version}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: docker login
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ secrets.REGISTRY_URL }}
|
||||
username: ${{ secrets.REGISTRY_USERNAME }}
|
||||
password: ${{ secrets.REGISTRY_PASSWORD }}
|
||||
|
||||
- name: Build & push backend
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./backend
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.REGISTRY_URL }}/mangalord-backend:latest
|
||||
${{ secrets.REGISTRY_URL }}/mangalord-backend:${{ steps.meta.outputs.image_tag }}
|
||||
${{ secrets.REGISTRY_URL }}/mangalord-backend:${{ steps.meta.outputs.version }}
|
||||
cache-from: type=gha,scope=backend
|
||||
cache-to: type=gha,mode=max,scope=backend
|
||||
|
||||
- name: Build & push frontend
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./frontend
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.REGISTRY_URL }}/mangalord-frontend:latest
|
||||
${{ secrets.REGISTRY_URL }}/mangalord-frontend:${{ steps.meta.outputs.image_tag }}
|
||||
${{ secrets.REGISTRY_URL }}/mangalord-frontend:${{ steps.meta.outputs.version }}
|
||||
cache-from: type=gha,scope=frontend
|
||||
cache-to: type=gha,mode=max,scope=frontend
|
||||
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build-and-push
|
||||
steps:
|
||||
- name: SSH deploy
|
||||
uses: appleboy/ssh-action@v1.0.3
|
||||
with:
|
||||
host: ${{ secrets.SSH_HOST }}
|
||||
username: ${{ secrets.SSH_USER }}
|
||||
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
port: ${{ secrets.SSH_PORT || 22 }}
|
||||
envs: REGISTRY_URL,REGISTRY_USERNAME,REGISTRY_PASSWORD,IMAGE_TAG,DEPLOY_PATH
|
||||
script_stop: true
|
||||
script: |
|
||||
set -euo pipefail
|
||||
cd "$DEPLOY_PATH"
|
||||
echo "$REGISTRY_PASSWORD" | docker login "$REGISTRY_URL" -u "$REGISTRY_USERNAME" --password-stdin
|
||||
export REGISTRY_URL IMAGE_TAG
|
||||
docker compose -f docker-compose.yml -f docker-compose.prod.yml pull
|
||||
docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d
|
||||
docker image prune -f
|
||||
docker logout "$REGISTRY_URL"
|
||||
env:
|
||||
REGISTRY_URL: ${{ secrets.REGISTRY_URL }}
|
||||
REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }}
|
||||
REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }}
|
||||
IMAGE_TAG: ${{ needs.build-and-push.outputs.image_tag }}
|
||||
DEPLOY_PATH: ${{ vars.DEPLOY_PATH }}
|
||||
1381
backend/Cargo.lock
generated
1381
backend/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,7 +1,8 @@
|
||||
[package]
|
||||
name = "mangalord"
|
||||
version = "0.16.0"
|
||||
version = "0.34.1"
|
||||
edition = "2021"
|
||||
default-run = "mangalord"
|
||||
|
||||
[lib]
|
||||
path = "src/lib.rs"
|
||||
@@ -10,6 +11,10 @@ path = "src/lib.rs"
|
||||
name = "mangalord"
|
||||
path = "src/main.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "crawler"
|
||||
path = "src/bin/crawler.rs"
|
||||
|
||||
[dependencies]
|
||||
axum = { version = "0.7", features = ["macros", "multipart"] }
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
@@ -18,6 +23,7 @@ serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
uuid = { version = "1", features = ["v4", "serde"] }
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
chrono-tz = "0.9"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
tower = { version = "0.5", features = ["util"] }
|
||||
@@ -36,7 +42,11 @@ time = "0.3"
|
||||
infer = "0.16"
|
||||
tokio-util = { version = "0.7", features = ["io"] }
|
||||
futures-core = "0.3"
|
||||
futures-util = "0.3"
|
||||
bytes = "1"
|
||||
chromiumoxide = { version = "0.7", features = ["tokio-runtime", "_fetcher-rusttls-tokio"], default-features = false }
|
||||
scraper = "0.20"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "socks", "cookies"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
@@ -44,3 +54,4 @@ tower = { version = "0.5", features = ["util"] }
|
||||
http-body-util = "0.1"
|
||||
mime = "0.3"
|
||||
futures-util = "0.3"
|
||||
tokio = { version = "1", features = ["test-util"] }
|
||||
|
||||
31
backend/migrations/0010_collections.sql
Normal file
31
backend/migrations/0010_collections.sql
Normal file
@@ -0,0 +1,31 @@
|
||||
-- User-owned manga collections. Each user can curate any number of
|
||||
-- named lists (e.g., "Favorites", "Reading list"); mangas can belong
|
||||
-- to many collections of many users without restriction.
|
||||
|
||||
CREATE TABLE collections (
|
||||
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
user_id uuid NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||||
name text NOT NULL,
|
||||
description text,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
updated_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
-- Per-user case-insensitive name uniqueness so "Favorites" and
|
||||
-- "favorites" don't both end up in someone's sidebar.
|
||||
CREATE UNIQUE INDEX collections_user_name_lower_uniq
|
||||
ON collections (user_id, lower(name));
|
||||
|
||||
CREATE INDEX collections_user_idx ON collections (user_id, created_at DESC);
|
||||
|
||||
CREATE TABLE collection_mangas (
|
||||
collection_id uuid NOT NULL REFERENCES collections(id) ON DELETE CASCADE,
|
||||
manga_id uuid NOT NULL REFERENCES mangas(id) ON DELETE CASCADE,
|
||||
added_at timestamptz NOT NULL DEFAULT now(),
|
||||
PRIMARY KEY (collection_id, manga_id)
|
||||
);
|
||||
|
||||
-- Reverse lookup: which collections contain this manga? Used by the
|
||||
-- "Add to collection" modal to pre-check the boxes for the user's
|
||||
-- collections this manga is already in.
|
||||
CREATE INDEX collection_mangas_manga_idx ON collection_mangas (manga_id);
|
||||
39
backend/migrations/0011_history.sql
Normal file
39
backend/migrations/0011_history.sql
Normal file
@@ -0,0 +1,39 @@
|
||||
-- Per-user reading progress and uploader attribution.
|
||||
--
|
||||
-- Reading progress is the simplest shape that supports "jump to last
|
||||
-- read chapter" — one row per (user, manga). The reader writes
|
||||
-- through on chapter open and on page advance (debounced); the
|
||||
-- history view shows them sorted by most-recently-touched.
|
||||
--
|
||||
-- Uploader attribution adds nullable `uploaded_by` columns to the two
|
||||
-- upload sinks. Historical rows have NULL because the original
|
||||
-- handlers didn't track this; new uploads stamp the current user.
|
||||
|
||||
CREATE TABLE read_progress (
|
||||
user_id uuid NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||||
manga_id uuid NOT NULL REFERENCES mangas(id) ON DELETE CASCADE,
|
||||
-- Chapter is nullable so a deleted chapter doesn't blow away
|
||||
-- the user's progress row entirely — they just see "(chapter
|
||||
-- removed)" in the history UI.
|
||||
chapter_id uuid REFERENCES chapters(id) ON DELETE SET NULL,
|
||||
page integer NOT NULL DEFAULT 1 CHECK (page >= 1),
|
||||
updated_at timestamptz NOT NULL DEFAULT now(),
|
||||
PRIMARY KEY (user_id, manga_id)
|
||||
);
|
||||
|
||||
-- Most queries on this table want "most recent first" per user; the
|
||||
-- composite index makes both filter and sort index-only.
|
||||
CREATE INDEX read_progress_user_idx
|
||||
ON read_progress (user_id, updated_at DESC);
|
||||
|
||||
ALTER TABLE mangas
|
||||
ADD COLUMN uploaded_by uuid REFERENCES users(id) ON DELETE SET NULL;
|
||||
CREATE INDEX mangas_uploaded_by_idx
|
||||
ON mangas (uploaded_by, created_at DESC)
|
||||
WHERE uploaded_by IS NOT NULL;
|
||||
|
||||
ALTER TABLE chapters
|
||||
ADD COLUMN uploaded_by uuid REFERENCES users(id) ON DELETE SET NULL;
|
||||
CREATE INDEX chapters_uploaded_by_idx
|
||||
ON chapters (uploaded_by, created_at DESC)
|
||||
WHERE uploaded_by IS NOT NULL;
|
||||
72
backend/migrations/0012_crawler.sql
Normal file
72
backend/migrations/0012_crawler.sql
Normal file
@@ -0,0 +1,72 @@
|
||||
-- Crawler tables.
|
||||
--
|
||||
-- Same philosophy as 0001_init.sql: new concepts go in new tables
|
||||
-- joined to existing ones, not jammed onto `mangas`/`chapters`. A
|
||||
-- crawled manga IS a manga; the only thing the source-link tables
|
||||
-- carry is "where did this come from and when did we last see it".
|
||||
-- That keeps the API and frontend source-agnostic.
|
||||
|
||||
-- 1. Source registry. One row per site the crawler knows about.
|
||||
-- `config` carries per-site knobs (base URL, rate limits, custom
|
||||
-- selectors) so adding a source is a row insert plus a `Source`
|
||||
-- trait impl — no schema change.
|
||||
CREATE TABLE sources (
|
||||
id text PRIMARY KEY,
|
||||
name text NOT NULL,
|
||||
base_url text NOT NULL,
|
||||
enabled boolean NOT NULL DEFAULT true,
|
||||
config jsonb NOT NULL DEFAULT '{}'::jsonb,
|
||||
created_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
-- 2. Link tables. `(source_id, source_*_key)` is the natural key the
|
||||
-- source itself exposes; the FK to `mangas`/`chapters` is what
|
||||
-- threads it back into our domain. `metadata_hash` is the signal
|
||||
-- used by `crawler::diff` to detect updates without re-comparing
|
||||
-- every field. `last_seen_at` + `dropped_at` is the soft-drop pair.
|
||||
CREATE TABLE manga_sources (
|
||||
source_id text NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
|
||||
source_manga_key text NOT NULL,
|
||||
manga_id uuid NOT NULL REFERENCES mangas(id) ON DELETE CASCADE,
|
||||
source_url text NOT NULL,
|
||||
metadata_hash text,
|
||||
first_seen_at timestamptz NOT NULL DEFAULT now(),
|
||||
last_seen_at timestamptz NOT NULL DEFAULT now(),
|
||||
dropped_at timestamptz,
|
||||
PRIMARY KEY (source_id, source_manga_key)
|
||||
);
|
||||
CREATE INDEX manga_sources_manga_idx ON manga_sources (manga_id);
|
||||
CREATE INDEX manga_sources_last_seen_idx ON manga_sources (source_id, last_seen_at);
|
||||
|
||||
CREATE TABLE chapter_sources (
|
||||
source_id text NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
|
||||
source_chapter_key text NOT NULL,
|
||||
chapter_id uuid NOT NULL REFERENCES chapters(id) ON DELETE CASCADE,
|
||||
source_url text NOT NULL,
|
||||
first_seen_at timestamptz NOT NULL DEFAULT now(),
|
||||
last_seen_at timestamptz NOT NULL DEFAULT now(),
|
||||
dropped_at timestamptz,
|
||||
PRIMARY KEY (source_id, source_chapter_key)
|
||||
);
|
||||
CREATE INDEX chapter_sources_chapter_idx ON chapter_sources (chapter_id);
|
||||
|
||||
-- 3. Persistent job queue. Workers lease with
|
||||
-- `FOR UPDATE SKIP LOCKED`, heartbeat via `leased_until`, and ack
|
||||
-- by transitioning state. The partial index keeps the hot path
|
||||
-- (pick the next ready job) off the bulk of done/dead rows.
|
||||
CREATE TABLE crawler_jobs (
|
||||
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
payload jsonb NOT NULL,
|
||||
state text NOT NULL DEFAULT 'pending'
|
||||
CHECK (state IN ('pending','running','done','failed','dead')),
|
||||
attempts integer NOT NULL DEFAULT 0,
|
||||
max_attempts integer NOT NULL DEFAULT 5,
|
||||
scheduled_at timestamptz NOT NULL DEFAULT now(),
|
||||
leased_until timestamptz,
|
||||
last_error text,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
updated_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
CREATE INDEX crawler_jobs_ready_idx
|
||||
ON crawler_jobs (scheduled_at)
|
||||
WHERE state IN ('pending', 'failed');
|
||||
18
backend/migrations/0013_drop_chapters_unique_number.sql
Normal file
18
backend/migrations/0013_drop_chapters_unique_number.sql
Normal file
@@ -0,0 +1,18 @@
|
||||
-- Real-world sources publish multiple chapters at the same number:
|
||||
-- different uploaders, translator notices/farewells, paid-vs-free
|
||||
-- re-uploads, and our own users can legitimately have two versions of
|
||||
-- "Ch.52" with different scanlations. The (manga_id, number) UNIQUE
|
||||
-- from 0001_init silently collapses all of those into a single row via
|
||||
-- ON CONFLICT, dropping data. Drop the constraint and lean on the
|
||||
-- chapter id (UUID) as the only chapter identity going forward.
|
||||
|
||||
ALTER TABLE chapters DROP CONSTRAINT chapters_manga_id_number_key;
|
||||
|
||||
-- The UNIQUE was also our only index on (manga_id, number) since
|
||||
-- 0007 dropped the redundant explicit one. Chapter list pages
|
||||
-- ORDER BY number ASC and the manga page is a hot read path, so put
|
||||
-- the index back without the uniqueness. Secondary sort by created_at
|
||||
-- so duplicate-numbered chapters have a stable order in lists and
|
||||
-- prev/next navigation.
|
||||
CREATE INDEX chapters_manga_id_number_idx
|
||||
ON chapters (manga_id, number, created_at);
|
||||
15
backend/migrations/0014_crawler_jobs_dedup_index.sql
Normal file
15
backend/migrations/0014_crawler_jobs_dedup_index.sql
Normal file
@@ -0,0 +1,15 @@
|
||||
-- Dedup SyncChapterContent jobs in flight.
|
||||
--
|
||||
-- Without this, the daemon's bookmark/cron enqueue paths would have to do a
|
||||
-- pre-check + insert race that's incorrect under concurrency. The partial
|
||||
-- unique index lets both producers use plain `INSERT ... ON CONFLICT DO
|
||||
-- NOTHING`: at most one (pending|running) job per chapter_id exists, and the
|
||||
-- slot frees again as soon as the job transitions to done/failed/dead so a
|
||||
-- re-enqueue is possible after the row is reaped or a force-refetch is wanted.
|
||||
--
|
||||
-- Scoped to sync_chapter_content payloads only so Discover / SyncManga /
|
||||
-- SyncChapterList jobs (which don't carry a chapter_id) remain un-deduped.
|
||||
CREATE UNIQUE INDEX crawler_jobs_chapter_content_dedup_idx
|
||||
ON crawler_jobs ((payload->>'chapter_id'))
|
||||
WHERE state IN ('pending', 'running')
|
||||
AND payload->>'kind' = 'sync_chapter_content';
|
||||
12
backend/migrations/0015_crawler_state.sql
Normal file
12
backend/migrations/0015_crawler_state.sql
Normal file
@@ -0,0 +1,12 @@
|
||||
-- Small key-value table for daemon state that needs to survive restarts.
|
||||
--
|
||||
-- Used so far only by the cron scheduler (`last_metadata_tick_at`) so it can
|
||||
-- detect that the most recent slot was missed (e.g. the backend was down at
|
||||
-- midnight) and fire immediately on startup before resuming the regular
|
||||
-- schedule. JSONB on the value column lets future keys carry richer payloads
|
||||
-- without another migration.
|
||||
CREATE TABLE crawler_state (
|
||||
key text PRIMARY KEY,
|
||||
value jsonb NOT NULL,
|
||||
updated_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
@@ -230,8 +230,24 @@ async fn create_token(
|
||||
Json(input): Json<CreateTokenInput>,
|
||||
) -> AppResult<impl IntoResponse> {
|
||||
let name = input.name.trim();
|
||||
// Both arms use `ValidationFailed` (422 with field details) to
|
||||
// match the structured-error shape `attach_tag` returns for the
|
||||
// same kind of free-form-identifier validation. The other
|
||||
// /auth/* handlers in this file use `InvalidInput` (400); the
|
||||
// divergence is pre-existing and would warrant a project-wide
|
||||
// pass to flip them all if the client side wants uniform per-
|
||||
// field error rendering.
|
||||
if name.is_empty() {
|
||||
return Err(AppError::InvalidInput("token name is required".into()));
|
||||
return Err(AppError::ValidationFailed {
|
||||
message: "token name is required".into(),
|
||||
details: serde_json::json!({ "name": "required" }),
|
||||
});
|
||||
}
|
||||
if name.chars().count() > 64 {
|
||||
return Err(AppError::ValidationFailed {
|
||||
message: "token name too long".into(),
|
||||
details: serde_json::json!({ "name": "max 64 characters" }),
|
||||
});
|
||||
}
|
||||
let (raw, hash) = generate_token();
|
||||
let token = repo::api_token::create(&state.db, user.id, name, &hash).await?;
|
||||
|
||||
@@ -13,6 +13,7 @@ use uuid::Uuid;
|
||||
use crate::api::pagination::PagedResponse;
|
||||
use crate::app::AppState;
|
||||
use crate::auth::extractor::CurrentUser;
|
||||
use crate::crawler::pipeline;
|
||||
use crate::domain::{Bookmark, BookmarkSummary};
|
||||
use crate::error::{AppError, AppResult};
|
||||
use crate::repo;
|
||||
@@ -86,6 +87,29 @@ async fn create(
|
||||
input.page,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Fire-and-forget: kick off content syncs for any pending chapters of
|
||||
// the newly-bookmarked manga. The dedup index makes this idempotent
|
||||
// across repeated bookmarks of the same manga; failure here must not
|
||||
// surface to the user (the daily cron sweeps anything missed).
|
||||
let pool = state.db.clone();
|
||||
let manga_id = input.manga_id;
|
||||
tokio::spawn(async move {
|
||||
match pipeline::enqueue_pending_for_manga(&pool, manga_id).await {
|
||||
Ok(summary) => tracing::info!(
|
||||
%manga_id,
|
||||
inserted = summary.inserted,
|
||||
skipped = summary.skipped,
|
||||
failed = summary.failed,
|
||||
"bookmark hook: enqueued pending chapters"
|
||||
),
|
||||
Err(e) => tracing::warn!(
|
||||
%manga_id, error = ?e,
|
||||
"bookmark hook: enqueue_pending_for_manga failed"
|
||||
),
|
||||
}
|
||||
});
|
||||
|
||||
Ok((StatusCode::CREATED, Json(bookmark)))
|
||||
}
|
||||
|
||||
@@ -111,6 +135,7 @@ async fn list_me(
|
||||
) -> AppResult<Json<PagedResponse<BookmarkSummary>>> {
|
||||
let limit = params.limit.clamp(1, 200);
|
||||
let offset = params.offset.max(0);
|
||||
let items = repo::bookmark::list_for_user(&state.db, user.id, limit, offset).await?;
|
||||
Ok(Json(PagedResponse::new(items, limit, offset)))
|
||||
let (items, total) =
|
||||
repo::bookmark::list_for_user(&state.db, user.id, limit, offset).await?;
|
||||
Ok(Json(PagedResponse::with_total(items, limit, offset, total)))
|
||||
}
|
||||
|
||||
@@ -26,9 +26,9 @@ use crate::upload::{parse_image, UploadedImage};
|
||||
pub fn routes() -> Router<AppState> {
|
||||
Router::new()
|
||||
.route("/mangas/:manga_id/chapters", get(list).post(create))
|
||||
.route("/mangas/:manga_id/chapters/:number", get(get_one))
|
||||
.route("/mangas/:manga_id/chapters/:chapter_id", get(get_one))
|
||||
.route(
|
||||
"/mangas/:manga_id/chapters/:number/pages",
|
||||
"/mangas/:manga_id/chapters/:chapter_id/pages",
|
||||
get(list_pages),
|
||||
)
|
||||
}
|
||||
@@ -60,10 +60,10 @@ async fn list(
|
||||
|
||||
async fn get_one(
|
||||
State(state): State<AppState>,
|
||||
Path((manga_id, number)): Path<(Uuid, i32)>,
|
||||
Path((manga_id, chapter_id)): Path<(Uuid, Uuid)>,
|
||||
) -> AppResult<Json<Chapter>> {
|
||||
repo::manga::get(&state.db, manga_id).await?;
|
||||
let chapter = repo::chapter::find_by_manga_and_number(&state.db, manga_id, number)
|
||||
let chapter = repo::chapter::find_by_id_in_manga(&state.db, manga_id, chapter_id)
|
||||
.await?
|
||||
.ok_or(AppError::NotFound)?;
|
||||
Ok(Json(chapter))
|
||||
@@ -71,7 +71,7 @@ async fn get_one(
|
||||
|
||||
async fn create(
|
||||
State(state): State<AppState>,
|
||||
CurrentUser(_user): CurrentUser,
|
||||
CurrentUser(user): CurrentUser,
|
||||
Path(manga_id): Path<Uuid>,
|
||||
mut multipart: Multipart,
|
||||
) -> AppResult<(StatusCode, Json<Chapter>)> {
|
||||
@@ -133,6 +133,7 @@ async fn create(
|
||||
manga_id,
|
||||
metadata.number,
|
||||
metadata.title.as_deref(),
|
||||
Some(user.id),
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -163,10 +164,10 @@ struct PagesResponse {
|
||||
|
||||
async fn list_pages(
|
||||
State(state): State<AppState>,
|
||||
Path((manga_id, number)): Path<(Uuid, i32)>,
|
||||
Path((manga_id, chapter_id)): Path<(Uuid, Uuid)>,
|
||||
) -> AppResult<Json<PagesResponse>> {
|
||||
repo::manga::get(&state.db, manga_id).await?;
|
||||
let chapter = repo::chapter::find_by_manga_and_number(&state.db, manga_id, number)
|
||||
let chapter = repo::chapter::find_by_id_in_manga(&state.db, manga_id, chapter_id)
|
||||
.await?
|
||||
.ok_or(AppError::NotFound)?;
|
||||
let pages = repo::page::list_for_chapter(&state.db, chapter.id).await?;
|
||||
|
||||
247
backend/src/api/collections.rs
Normal file
247
backend/src/api/collections.rs
Normal file
@@ -0,0 +1,247 @@
|
||||
use axum::extract::{Path, Query, State};
|
||||
use axum::http::StatusCode;
|
||||
use axum::routing::{delete, get, post};
|
||||
use axum::{Json, Router};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::api::pagination::PagedResponse;
|
||||
use crate::app::AppState;
|
||||
use crate::auth::extractor::CurrentUser;
|
||||
use crate::domain::collection::{
|
||||
Collection, CollectionPatch, CollectionSummary, NewCollection,
|
||||
};
|
||||
use crate::domain::manga::Manga;
|
||||
use crate::domain::patch::Patch;
|
||||
use crate::error::{AppError, AppResult};
|
||||
use crate::repo;
|
||||
|
||||
pub fn routes() -> Router<AppState> {
|
||||
Router::new()
|
||||
.route("/collections", post(create))
|
||||
.route("/me/collections", get(list_mine))
|
||||
.route("/collections/:id", get(get_one).patch(update).delete(delete_one))
|
||||
.route("/collections/:id/mangas", get(list_mangas).post(add_manga))
|
||||
.route(
|
||||
"/collections/:id/mangas/:manga_id",
|
||||
delete(remove_manga),
|
||||
)
|
||||
.route(
|
||||
"/mangas/:id/my-collections",
|
||||
get(list_my_collections_containing),
|
||||
)
|
||||
}
|
||||
|
||||
const MAX_NAME_LEN: usize = 64;
|
||||
const MAX_DESCRIPTION_LEN: usize = 1024;
|
||||
const DEFAULT_LIMIT: i64 = 50;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct ListParams {
|
||||
#[serde(default = "default_limit")]
|
||||
pub limit: i64,
|
||||
#[serde(default)]
|
||||
pub offset: i64,
|
||||
}
|
||||
|
||||
fn default_limit() -> i64 {
|
||||
DEFAULT_LIMIT
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct AddMangaBody {
|
||||
pub manga_id: Uuid,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct MangaCollectionIds {
|
||||
pub collection_ids: Vec<Uuid>,
|
||||
}
|
||||
|
||||
fn validate_name(name: &str) -> AppResult<()> {
|
||||
let trimmed = name.trim();
|
||||
if trimmed.is_empty() {
|
||||
return Err(AppError::ValidationFailed {
|
||||
message: "name is required".into(),
|
||||
details: json!({ "name": "required" }),
|
||||
});
|
||||
}
|
||||
if trimmed.chars().count() > MAX_NAME_LEN {
|
||||
return Err(AppError::ValidationFailed {
|
||||
message: "name too long".into(),
|
||||
details: json!({ "name": format!("max {MAX_NAME_LEN} characters") }),
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_description(desc: Option<&str>) -> AppResult<()> {
|
||||
if let Some(d) = desc {
|
||||
if d.chars().count() > MAX_DESCRIPTION_LEN {
|
||||
return Err(AppError::ValidationFailed {
|
||||
message: "description too long".into(),
|
||||
details: json!({ "description": format!("max {MAX_DESCRIPTION_LEN} characters") }),
|
||||
});
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn create(
|
||||
State(state): State<AppState>,
|
||||
CurrentUser(user): CurrentUser,
|
||||
Json(input): Json<NewCollection>,
|
||||
) -> AppResult<(StatusCode, Json<Collection>)> {
|
||||
validate_name(&input.name)?;
|
||||
validate_description(input.description.as_deref())?;
|
||||
let row = repo::collection::create(
|
||||
&state.db,
|
||||
user.id,
|
||||
&input.name,
|
||||
input.description.as_deref(),
|
||||
)
|
||||
.await?;
|
||||
Ok((StatusCode::CREATED, Json(row)))
|
||||
}
|
||||
|
||||
async fn list_mine(
|
||||
State(state): State<AppState>,
|
||||
CurrentUser(user): CurrentUser,
|
||||
Query(params): Query<ListParams>,
|
||||
) -> AppResult<Json<PagedResponse<CollectionSummary>>> {
|
||||
let limit = params.limit.clamp(1, 200);
|
||||
let offset = params.offset.max(0);
|
||||
let (items, total) =
|
||||
repo::collection::list_for_user(&state.db, user.id, limit, offset).await?;
|
||||
Ok(Json(PagedResponse::with_total(items, limit, offset, total)))
|
||||
}
|
||||
|
||||
async fn get_one(
|
||||
State(state): State<AppState>,
|
||||
CurrentUser(user): CurrentUser,
|
||||
Path(id): Path<Uuid>,
|
||||
) -> AppResult<Json<Collection>> {
|
||||
let row = require_owner(&state, user.id, id).await?;
|
||||
Ok(Json(row))
|
||||
}
|
||||
|
||||
async fn update(
|
||||
State(state): State<AppState>,
|
||||
CurrentUser(user): CurrentUser,
|
||||
Path(id): Path<Uuid>,
|
||||
Json(patch): Json<CollectionPatch>,
|
||||
) -> AppResult<Json<Collection>> {
|
||||
require_owner_id(&state, user.id, id).await?;
|
||||
if let Some(ref n) = patch.name {
|
||||
validate_name(n)?;
|
||||
}
|
||||
if let Patch::Set(ref d) = patch.description {
|
||||
validate_description(Some(d.as_str()))?;
|
||||
}
|
||||
// Three-state semantics via `Patch<T>`: omitted → Unchanged
|
||||
// (column untouched), explicit `null` → Clear (NULL), value → Set.
|
||||
let description_provided = patch.description.is_provided();
|
||||
let description_value: Option<&str> = match &patch.description {
|
||||
Patch::Set(s) => Some(s.as_str()),
|
||||
Patch::Clear | Patch::Unchanged => None,
|
||||
};
|
||||
let updated = repo::collection::update(
|
||||
&state.db,
|
||||
id,
|
||||
patch.name.as_deref(),
|
||||
description_provided,
|
||||
description_value,
|
||||
)
|
||||
.await?;
|
||||
Ok(Json(updated))
|
||||
}
|
||||
|
||||
async fn delete_one(
|
||||
State(state): State<AppState>,
|
||||
CurrentUser(user): CurrentUser,
|
||||
Path(id): Path<Uuid>,
|
||||
) -> AppResult<StatusCode> {
|
||||
require_owner_id(&state, user.id, id).await?;
|
||||
repo::collection::delete(&state.db, id).await?;
|
||||
Ok(StatusCode::NO_CONTENT)
|
||||
}
|
||||
|
||||
async fn list_mangas(
|
||||
State(state): State<AppState>,
|
||||
CurrentUser(user): CurrentUser,
|
||||
Path(id): Path<Uuid>,
|
||||
Query(params): Query<ListParams>,
|
||||
) -> AppResult<Json<PagedResponse<Manga>>> {
|
||||
require_owner_id(&state, user.id, id).await?;
|
||||
let limit = params.limit.clamp(1, 200);
|
||||
let offset = params.offset.max(0);
|
||||
let (items, total) =
|
||||
repo::collection::list_mangas(&state.db, id, limit, offset).await?;
|
||||
Ok(Json(PagedResponse::with_total(items, limit, offset, total)))
|
||||
}
|
||||
|
||||
async fn add_manga(
|
||||
State(state): State<AppState>,
|
||||
CurrentUser(user): CurrentUser,
|
||||
Path(id): Path<Uuid>,
|
||||
Json(body): Json<AddMangaBody>,
|
||||
) -> AppResult<StatusCode> {
|
||||
require_owner_id(&state, user.id, id).await?;
|
||||
if !repo::manga::exists(&state.db, body.manga_id).await? {
|
||||
return Err(AppError::NotFound);
|
||||
}
|
||||
let created = repo::collection::add_manga(&state.db, id, body.manga_id).await?;
|
||||
Ok(if created { StatusCode::CREATED } else { StatusCode::OK })
|
||||
}
|
||||
|
||||
async fn remove_manga(
|
||||
State(state): State<AppState>,
|
||||
CurrentUser(user): CurrentUser,
|
||||
Path((collection_id, manga_id)): Path<(Uuid, Uuid)>,
|
||||
) -> AppResult<StatusCode> {
|
||||
require_owner_id(&state, user.id, collection_id).await?;
|
||||
repo::collection::remove_manga(&state.db, collection_id, manga_id).await?;
|
||||
Ok(StatusCode::NO_CONTENT)
|
||||
}
|
||||
|
||||
async fn list_my_collections_containing(
|
||||
State(state): State<AppState>,
|
||||
CurrentUser(user): CurrentUser,
|
||||
Path(manga_id): Path<Uuid>,
|
||||
) -> AppResult<Json<MangaCollectionIds>> {
|
||||
// No 404 if the manga doesn't exist — the empty list is the
|
||||
// correct answer ("you have it in zero of your collections") and
|
||||
// keeps the request side-effect-free.
|
||||
let ids =
|
||||
repo::collection::list_collections_containing(&state.db, user.id, manga_id).await?;
|
||||
Ok(Json(MangaCollectionIds { collection_ids: ids }))
|
||||
}
|
||||
|
||||
/// Returns the row iff the caller owns it. Both "doesn't exist" and
|
||||
/// "exists but belongs to someone else" surface as `NotFound` so the
|
||||
/// API doesn't disclose collection existence to non-owners — the
|
||||
/// frontend already does this funnelling for URLs, and consistency at
|
||||
/// the API matters because the same identifiers travel through bots
|
||||
/// and shared links.
|
||||
async fn require_owner(
|
||||
state: &AppState,
|
||||
user_id: Uuid,
|
||||
id: Uuid,
|
||||
) -> AppResult<Collection> {
|
||||
match repo::collection::get(&state.db, id).await {
|
||||
Ok(row) if row.user_id == user_id => Ok(row),
|
||||
// Either the row doesn't exist (NotFound from `get`) or it
|
||||
// belongs to someone else — both collapse to NotFound.
|
||||
Ok(_) | Err(AppError::NotFound) => Err(AppError::NotFound),
|
||||
Err(other) => Err(other),
|
||||
}
|
||||
}
|
||||
|
||||
async fn require_owner_id(state: &AppState, user_id: Uuid, id: Uuid) -> AppResult<()> {
|
||||
match repo::collection::find_owner(&state.db, id).await? {
|
||||
Some(owner) if owner == user_id => Ok(()),
|
||||
// Same non-leakage rationale as `require_owner` above.
|
||||
_ => Err(AppError::NotFound),
|
||||
}
|
||||
}
|
||||
145
backend/src/api/history.rs
Normal file
145
backend/src/api/history.rs
Normal file
@@ -0,0 +1,145 @@
|
||||
//! Reading-progress and upload-history endpoints (Phase 5).
|
||||
//!
|
||||
//! All routes live under `/me/...` and require `CurrentUser`. They
|
||||
//! never expose another user's data — the user id is taken from the
|
||||
//! auth extractor, not from the path or body.
|
||||
|
||||
use axum::extract::{Path, Query, State};
|
||||
use axum::http::StatusCode;
|
||||
use axum::routing::{get, put};
|
||||
use axum::{Json, Router};
|
||||
use serde::Deserialize;
|
||||
use serde_json::json;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::api::pagination::PagedResponse;
|
||||
use crate::app::AppState;
|
||||
use crate::auth::extractor::CurrentUser;
|
||||
use crate::domain::read_progress::{
|
||||
ReadProgress, ReadProgressForManga, ReadProgressSummary, UpsertReadProgress,
|
||||
};
|
||||
use crate::domain::upload_entry::UploadEntry;
|
||||
use crate::error::{AppError, AppResult};
|
||||
use crate::repo;
|
||||
|
||||
pub fn routes() -> Router<AppState> {
|
||||
Router::new()
|
||||
.route("/me/read-progress", put(upsert).get(list))
|
||||
.route(
|
||||
"/me/read-progress/:manga_id",
|
||||
get(get_one).delete(delete_one),
|
||||
)
|
||||
.route("/me/uploads", get(uploads))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct ListParams {
|
||||
#[serde(default = "default_limit")]
|
||||
pub limit: i64,
|
||||
#[serde(default)]
|
||||
pub offset: i64,
|
||||
}
|
||||
|
||||
fn default_limit() -> i64 {
|
||||
50
|
||||
}
|
||||
|
||||
async fn upsert(
|
||||
State(state): State<AppState>,
|
||||
CurrentUser(user): CurrentUser,
|
||||
Json(input): Json<UpsertReadProgress>,
|
||||
) -> AppResult<Json<ReadProgress>> {
|
||||
let page = input.page.unwrap_or(1);
|
||||
if page < 1 {
|
||||
return Err(AppError::ValidationFailed {
|
||||
message: "page must be 1 or greater".into(),
|
||||
details: json!({ "page": "must be >= 1" }),
|
||||
});
|
||||
}
|
||||
// Cross-link guard: the FKs on read_progress accept any valid
|
||||
// (manga_id, chapter_id), even when they refer to unrelated mangas.
|
||||
// Reject mismatched pairs so history can't end up rendering a
|
||||
// chapter number from the wrong manga.
|
||||
if let Some(chapter_id) = input.chapter_id {
|
||||
let belongs = repo::read_progress::chapter_belongs_to_manga(
|
||||
&state.db,
|
||||
input.manga_id,
|
||||
chapter_id,
|
||||
)
|
||||
.await?;
|
||||
if !belongs {
|
||||
return Err(AppError::ValidationFailed {
|
||||
message: "chapter does not belong to this manga".into(),
|
||||
details: json!({ "chapter_id": "must reference a chapter of the supplied manga" }),
|
||||
});
|
||||
}
|
||||
}
|
||||
let row = repo::read_progress::upsert(
|
||||
&state.db,
|
||||
user.id,
|
||||
input.manga_id,
|
||||
input.chapter_id,
|
||||
page,
|
||||
)
|
||||
.await?;
|
||||
Ok(Json(row))
|
||||
}
|
||||
|
||||
async fn list(
|
||||
State(state): State<AppState>,
|
||||
CurrentUser(user): CurrentUser,
|
||||
Query(params): Query<ListParams>,
|
||||
) -> AppResult<Json<PagedResponse<ReadProgressSummary>>> {
|
||||
let limit = params.limit.clamp(1, 200);
|
||||
let offset = params.offset.max(0);
|
||||
let (items, total) =
|
||||
repo::read_progress::list_for_user(&state.db, user.id, limit, offset).await?;
|
||||
Ok(Json(PagedResponse::with_total(items, limit, offset, total)))
|
||||
}
|
||||
|
||||
async fn get_one(
|
||||
State(state): State<AppState>,
|
||||
CurrentUser(user): CurrentUser,
|
||||
Path(manga_id): Path<Uuid>,
|
||||
) -> AppResult<Json<ReadProgressForManga>> {
|
||||
// Enriched with `chapter_number` so the manga page's Continue
|
||||
// CTA doesn't need to resolve the chapter id against the paged
|
||||
// chapters list.
|
||||
Ok(Json(
|
||||
repo::read_progress::get_for_manga(&state.db, user.id, manga_id).await?,
|
||||
))
|
||||
}
|
||||
|
||||
async fn delete_one(
|
||||
State(state): State<AppState>,
|
||||
CurrentUser(user): CurrentUser,
|
||||
Path(manga_id): Path<Uuid>,
|
||||
) -> AppResult<StatusCode> {
|
||||
repo::read_progress::delete(&state.db, user.id, manga_id).await?;
|
||||
Ok(StatusCode::NO_CONTENT)
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct UploadListParams {
|
||||
#[serde(default = "default_uploads_limit")]
|
||||
pub limit: i64,
|
||||
}
|
||||
|
||||
fn default_uploads_limit() -> i64 {
|
||||
50
|
||||
}
|
||||
|
||||
async fn uploads(
|
||||
State(state): State<AppState>,
|
||||
CurrentUser(user): CurrentUser,
|
||||
Query(params): Query<UploadListParams>,
|
||||
) -> AppResult<Json<PagedResponse<UploadEntry>>> {
|
||||
// Limit-only pagination for now — keyset across two unrelated
|
||||
// tables is a future enhancement. Total comes from a fast count
|
||||
// query so the UI can show "N total" without dragging the rows
|
||||
// across the wire.
|
||||
let limit = params.limit.clamp(1, 200);
|
||||
let (items, total) =
|
||||
repo::upload_history::list_for_user(&state.db, user.id, limit).await?;
|
||||
Ok(Json(PagedResponse::with_total(items, limit, 0, total)))
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
use axum::extract::{Multipart, Path, Query, State};
|
||||
use axum::http::StatusCode;
|
||||
use axum::routing::{delete, get, post};
|
||||
use axum::routing::{delete, get, post, put};
|
||||
use axum::{Json, Router};
|
||||
use serde::Deserialize;
|
||||
use serde_json::json;
|
||||
@@ -9,16 +9,19 @@ use uuid::Uuid;
|
||||
use crate::api::pagination::PagedResponse;
|
||||
use crate::app::AppState;
|
||||
use crate::auth::extractor::CurrentUser;
|
||||
use crate::domain::manga::{MangaCard, MangaDetail, MangaPatch, NewManga, Patch};
|
||||
use crate::domain::manga::{MangaCard, MangaDetail, MangaPatch, NewManga};
|
||||
use crate::domain::patch::Patch;
|
||||
use crate::domain::tag::TagRef;
|
||||
use crate::error::{AppError, AppResult};
|
||||
use crate::repo;
|
||||
use crate::storage::StorageError;
|
||||
use crate::upload::{parse_image, UploadedImage};
|
||||
|
||||
pub fn routes() -> Router<AppState> {
|
||||
Router::new()
|
||||
.route("/mangas", get(list).post(create))
|
||||
.route("/mangas/:id", get(get_one).patch(update))
|
||||
.route("/mangas/:id/cover", put(put_cover).delete(delete_cover))
|
||||
.route("/mangas/:id/tags", post(attach_tag))
|
||||
.route("/mangas/:id/tags/:tag_id", delete(detach_tag))
|
||||
}
|
||||
@@ -168,6 +171,7 @@ async fn create(
|
||||
&status,
|
||||
metadata.description.as_deref(),
|
||||
&alt_titles,
|
||||
Some(_user.id),
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -257,6 +261,82 @@ async fn update(
|
||||
Ok(Json(repo::manga::get_detail(&state.db, id).await?))
|
||||
}
|
||||
|
||||
/// `PUT /api/v1/mangas/:id/cover` is multipart/form-data with a single
|
||||
/// required `cover` part containing image bytes. MIME is sniffed by
|
||||
/// magic bytes (jpeg/png/webp/gif/avif); filename and Content-Type from
|
||||
/// the client are ignored. Replaces any existing cover, deleting the
|
||||
/// previous blob if its extension differs. Returns the refreshed
|
||||
/// `MangaDetail`.
|
||||
async fn put_cover(
|
||||
State(state): State<AppState>,
|
||||
CurrentUser(_user): CurrentUser,
|
||||
Path(id): Path<Uuid>,
|
||||
mut multipart: Multipart,
|
||||
) -> AppResult<Json<MangaDetail>> {
|
||||
// TODO(auth): until uploaders are tracked (Phase 5), any signed-in
|
||||
// user can edit any manga's cover. Restrict to uploader + admin
|
||||
// once that column lands.
|
||||
if !repo::manga::exists(&state.db, id).await? {
|
||||
return Err(AppError::NotFound);
|
||||
}
|
||||
|
||||
let mut cover: Option<UploadedImage> = None;
|
||||
while let Some(field) = next_field(&mut multipart).await? {
|
||||
if field.name() == Some("cover") {
|
||||
let bytes = read_field_bytes(field).await?.to_vec();
|
||||
cover = Some(parse_image(bytes, state.upload.max_file_bytes, "cover")?);
|
||||
}
|
||||
}
|
||||
let img = cover.ok_or_else(|| AppError::ValidationFailed {
|
||||
message: "cover part is required".into(),
|
||||
details: json!({ "cover": "required" }),
|
||||
})?;
|
||||
|
||||
// Read the old key BEFORE writing so we can clean up an orphan if
|
||||
// the extension changed (e.g., .png → .jpg). Same-extension is a
|
||||
// `put` overwrite — no delete needed.
|
||||
let old_key = repo::manga::get(&state.db, id).await?.cover_image_path;
|
||||
let new_key = format!("mangas/{}/cover.{}", id, img.ext);
|
||||
state.storage.put(&new_key, &img.bytes).await?;
|
||||
|
||||
if let Some(prev) = old_key.as_deref() {
|
||||
if prev != new_key {
|
||||
// Swallow NotFound — AppError maps it to a client 404,
|
||||
// which would be wrong here. The DB row can outlive a
|
||||
// manually-deleted blob.
|
||||
match state.storage.delete(prev).await {
|
||||
Ok(()) | Err(StorageError::NotFound) => {}
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
repo::manga::set_cover_image_path(&state.db, id, &new_key).await?;
|
||||
Ok(Json(repo::manga::get_detail(&state.db, id).await?))
|
||||
}
|
||||
|
||||
/// `DELETE /api/v1/mangas/:id/cover` clears `cover_image_path` and
|
||||
/// removes the blob. Idempotent: removing a non-existent cover succeeds
|
||||
/// with the unchanged detail.
|
||||
async fn delete_cover(
|
||||
State(state): State<AppState>,
|
||||
CurrentUser(_user): CurrentUser,
|
||||
Path(id): Path<Uuid>,
|
||||
) -> AppResult<Json<MangaDetail>> {
|
||||
// TODO(auth): same caveat as put_cover.
|
||||
if !repo::manga::exists(&state.db, id).await? {
|
||||
return Err(AppError::NotFound);
|
||||
}
|
||||
if let Some(key) = repo::manga::get(&state.db, id).await?.cover_image_path {
|
||||
match state.storage.delete(&key).await {
|
||||
Ok(()) | Err(StorageError::NotFound) => {}
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
repo::manga::clear_cover_image_path(&state.db, id).await?;
|
||||
}
|
||||
Ok(Json(repo::manga::get_detail(&state.db, id).await?))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct AttachTagBody {
|
||||
pub name: String,
|
||||
@@ -268,6 +348,7 @@ async fn attach_tag(
|
||||
Path(id): Path<Uuid>,
|
||||
Json(body): Json<AttachTagBody>,
|
||||
) -> AppResult<(StatusCode, Json<TagRef>)> {
|
||||
validate_tag_name(&body.name)?;
|
||||
if !repo::manga::exists(&state.db, id).await? {
|
||||
return Err(AppError::NotFound);
|
||||
}
|
||||
@@ -314,6 +395,27 @@ async fn detach_tag(
|
||||
}
|
||||
}
|
||||
|
||||
/// Request-side validation for `POST /mangas/:id/tags` body. Mirrors
|
||||
/// the repo-level cap in `repo::tag::upsert_by_name` (max 64 chars
|
||||
/// after trim) but surfaces the failure at the handler boundary with
|
||||
/// the same envelope shape other validations use.
|
||||
fn validate_tag_name(name: &str) -> AppResult<()> {
|
||||
let trimmed = name.trim();
|
||||
if trimmed.is_empty() {
|
||||
return Err(AppError::ValidationFailed {
|
||||
message: "tag name cannot be empty".into(),
|
||||
details: json!({ "name": "required" }),
|
||||
});
|
||||
}
|
||||
if trimmed.chars().count() > 64 {
|
||||
return Err(AppError::ValidationFailed {
|
||||
message: "tag name too long".into(),
|
||||
details: json!({ "name": "max 64 characters" }),
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_new_manga(input: &NewManga) -> AppResult<()> {
|
||||
if input.title.trim().is_empty() {
|
||||
return Err(AppError::ValidationFailed {
|
||||
|
||||
@@ -2,9 +2,11 @@ pub mod auth;
|
||||
pub mod authors;
|
||||
pub mod bookmarks;
|
||||
pub mod chapters;
|
||||
pub mod collections;
|
||||
pub mod files;
|
||||
pub mod genres;
|
||||
pub mod health;
|
||||
pub mod history;
|
||||
pub mod mangas;
|
||||
pub mod pagination;
|
||||
pub mod tags;
|
||||
@@ -24,4 +26,6 @@ pub fn routes() -> Router<AppState> {
|
||||
.merge(genres::routes())
|
||||
.merge(tags::routes())
|
||||
.merge(authors::routes())
|
||||
.merge(collections::routes())
|
||||
.merge(history::routes())
|
||||
}
|
||||
|
||||
@@ -1,14 +1,27 @@
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
|
||||
use anyhow::Context;
|
||||
use async_trait::async_trait;
|
||||
use axum::extract::DefaultBodyLimit;
|
||||
use axum::http::{HeaderName, HeaderValue, Method};
|
||||
use axum::Router;
|
||||
use sqlx::postgres::PgPoolOptions;
|
||||
use sqlx::PgPool;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tower_http::cors::{AllowOrigin, CorsLayer};
|
||||
use tower_http::trace::TraceLayer;
|
||||
|
||||
use crate::config::{AuthConfig, Config, UploadConfig};
|
||||
use crate::config::{AuthConfig, Config, CrawlerConfig, CrawlerModePref, UploadConfig};
|
||||
use crate::crawler::browser_manager::{self, BrowserManager};
|
||||
use crate::crawler::content::{self, SyncOutcome};
|
||||
use crate::crawler::daemon::{self, ChapterDispatcher, DaemonConfig, MetadataPass};
|
||||
use crate::crawler::jobs::JobPayload;
|
||||
use crate::crawler::pipeline::{self, MetadataStats};
|
||||
use crate::crawler::rate_limit::HostRateLimiters;
|
||||
use crate::crawler::session;
|
||||
use crate::crawler::source::{target as target_source, DiscoverMode};
|
||||
use crate::repo;
|
||||
use crate::storage::{LocalStorage, Storage};
|
||||
|
||||
#[derive(Clone)]
|
||||
@@ -19,7 +32,23 @@ pub struct AppState {
|
||||
pub upload: UploadConfig,
|
||||
}
|
||||
|
||||
pub async fn build(config: Config) -> anyhow::Result<Router> {
|
||||
/// Bundle returned by [`build`]. The router is what `axum::serve` consumes;
|
||||
/// the daemon (when enabled) outlives the HTTP server and is awaited via
|
||||
/// [`AppHandle::shutdown`] after the listener has finished gracefully.
|
||||
pub struct AppHandle {
|
||||
pub router: Router,
|
||||
pub daemon: Option<daemon::DaemonHandle>,
|
||||
}
|
||||
|
||||
impl AppHandle {
|
||||
pub async fn shutdown(self) {
|
||||
if let Some(d) = self.daemon {
|
||||
d.shutdown().await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn build(config: Config) -> anyhow::Result<AppHandle> {
|
||||
let db = PgPoolOptions::new()
|
||||
.max_connections(10)
|
||||
.connect(&config.database_url)
|
||||
@@ -28,13 +57,291 @@ pub async fn build(config: Config) -> anyhow::Result<Router> {
|
||||
|
||||
let storage: Arc<dyn Storage> = Arc::new(LocalStorage::new(config.storage_dir.clone()));
|
||||
|
||||
let daemon = if config.crawler.daemon_enabled {
|
||||
Some(spawn_crawler_daemon(db.clone(), Arc::clone(&storage), &config.crawler).await?)
|
||||
} else {
|
||||
tracing::info!("crawler daemon disabled (CRAWLER_DAEMON=false)");
|
||||
None
|
||||
};
|
||||
|
||||
let state = AppState {
|
||||
db,
|
||||
storage,
|
||||
auth: config.auth.clone(),
|
||||
upload: config.upload.clone(),
|
||||
};
|
||||
Ok(router(state).layer(cors_layer(&config.cors_allowed_origins)))
|
||||
let router = router(state).layer(cors_layer(&config.cors_allowed_origins));
|
||||
Ok(AppHandle { router, daemon })
|
||||
}
|
||||
|
||||
async fn spawn_crawler_daemon(
|
||||
db: PgPool,
|
||||
storage: Arc<dyn Storage>,
|
||||
cfg: &CrawlerConfig,
|
||||
) -> anyhow::Result<daemon::DaemonHandle> {
|
||||
// Reqwest client with cookie jar pre-seeded so CDN image fetches
|
||||
// include PHPSESSID. Same shape as bin/crawler.rs main().
|
||||
let cookie_jar = Arc::new(reqwest::cookie::Jar::default());
|
||||
if let (Some(sid), Some(domain), Some(start_url)) =
|
||||
(&cfg.phpsessid, &cfg.cookie_domain, &cfg.start_url)
|
||||
{
|
||||
let cookie_str = format!("PHPSESSID={sid}; Domain={domain}; Path=/");
|
||||
let seed_url = reqwest::Url::parse(start_url)
|
||||
.context("parse CRAWLER_START_URL for cookie seed")?;
|
||||
cookie_jar.add_cookie_str(&cookie_str, &seed_url);
|
||||
}
|
||||
let mut http_builder = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(30))
|
||||
.no_proxy()
|
||||
.cookie_provider(cookie_jar);
|
||||
if let Some(ua) = &cfg.user_agent {
|
||||
http_builder = http_builder.user_agent(ua);
|
||||
}
|
||||
if let Some(proxy) = &cfg.proxy {
|
||||
http_builder = http_builder
|
||||
.proxy(reqwest::Proxy::all(proxy).with_context(|| format!("parse proxy: {proxy}"))?);
|
||||
}
|
||||
let http = http_builder.build().context("build crawler reqwest")?;
|
||||
|
||||
let mut rate = HostRateLimiters::new(std::time::Duration::from_millis(cfg.rate_ms));
|
||||
if let Some(host) = &cfg.cdn_host {
|
||||
rate = rate.with_override(host, std::time::Duration::from_millis(cfg.cdn_rate_ms));
|
||||
}
|
||||
let rate = Arc::new(rate);
|
||||
|
||||
// Browser manager. on_launch re-injects PHPSESSID on every fresh
|
||||
// chromium spawn so an idle teardown followed by re-launch stays
|
||||
// authenticated without operator action.
|
||||
let mut launch_opts = cfg.browser.clone();
|
||||
if let Some(proxy) = &cfg.proxy {
|
||||
launch_opts.extra_args.push(format!("--proxy-server={proxy}"));
|
||||
}
|
||||
let on_launch = match (&cfg.phpsessid, &cfg.cookie_domain, &cfg.start_url) {
|
||||
(Some(sid), Some(domain), Some(start_url)) => {
|
||||
let sid = sid.clone();
|
||||
let domain = domain.clone();
|
||||
let start_url = start_url.clone();
|
||||
let on_launch: browser_manager::OnLaunch = Arc::new(move |browser| {
|
||||
let sid = sid.clone();
|
||||
let domain = domain.clone();
|
||||
let start_url = start_url.clone();
|
||||
Box::pin(async move {
|
||||
session::inject_phpsessid(&browser, &sid, &domain)
|
||||
.await
|
||||
.context("on_launch: inject_phpsessid")?;
|
||||
session::verify_session(&browser, &start_url)
|
||||
.await
|
||||
.context("on_launch: verify_session")?;
|
||||
Ok(())
|
||||
})
|
||||
});
|
||||
on_launch
|
||||
}
|
||||
_ => browser_manager::noop_on_launch(),
|
||||
};
|
||||
let browser_manager = BrowserManager::new(launch_opts, cfg.idle_timeout, on_launch);
|
||||
|
||||
let session_expired = Arc::new(AtomicBool::new(false));
|
||||
|
||||
let metadata_pass: Option<Arc<dyn MetadataPass>> = cfg.start_url.as_ref().map(|url| {
|
||||
let m: Arc<dyn MetadataPass> = Arc::new(RealMetadataPass {
|
||||
browser_manager: Arc::clone(&browser_manager),
|
||||
db: db.clone(),
|
||||
storage: Arc::clone(&storage),
|
||||
http: http.clone(),
|
||||
rate: Arc::clone(&rate),
|
||||
start_url: url.clone(),
|
||||
mode_pref: cfg.mode,
|
||||
incremental_stop_after: cfg.incremental_stop_after,
|
||||
});
|
||||
m
|
||||
});
|
||||
|
||||
let dispatcher: Arc<dyn ChapterDispatcher> = Arc::new(RealChapterDispatcher {
|
||||
browser_manager: Arc::clone(&browser_manager),
|
||||
db: db.clone(),
|
||||
storage: Arc::clone(&storage),
|
||||
http,
|
||||
rate: Arc::clone(&rate),
|
||||
});
|
||||
|
||||
// Shared cancellation: daemon shutdown cancels the BrowserManager's
|
||||
// idle reaper too. Reaper itself is added to the daemon's extra_tasks
|
||||
// so DaemonHandle::shutdown awaits its completion.
|
||||
let cancel = CancellationToken::new();
|
||||
let reaper_task = browser_manager::spawn_idle_reaper(
|
||||
Arc::clone(&browser_manager),
|
||||
cancel.clone(),
|
||||
);
|
||||
// Also close the browser explicitly on shutdown so we don't rely on
|
||||
// kill-on-drop when other Arc<Browser> holders may still exist.
|
||||
let shutdown_task = {
|
||||
let cancel = cancel.clone();
|
||||
let mgr = Arc::clone(&browser_manager);
|
||||
tokio::spawn(async move {
|
||||
cancel.cancelled().await;
|
||||
mgr.shutdown().await;
|
||||
})
|
||||
};
|
||||
|
||||
let daemon_handle = daemon::spawn(
|
||||
db,
|
||||
cancel,
|
||||
DaemonConfig {
|
||||
metadata_pass,
|
||||
dispatcher,
|
||||
chapter_workers: cfg.chapter_workers,
|
||||
daily_at: cfg.daily_at,
|
||||
tz: cfg.tz,
|
||||
retention_days: cfg.retention_days,
|
||||
session_expired,
|
||||
extra_tasks: vec![reaper_task, shutdown_task],
|
||||
},
|
||||
);
|
||||
|
||||
Ok(daemon_handle)
|
||||
}
|
||||
|
||||
// Real impls of the daemon traits, owning the browser manager + I/O. Kept
|
||||
// in app.rs because they need the same builder-side env wiring that
|
||||
// AppState gets — the daemon module itself stays free of reqwest / storage
|
||||
// details so its tests don't pull them in.
|
||||
|
||||
struct RealMetadataPass {
|
||||
browser_manager: Arc<BrowserManager>,
|
||||
db: PgPool,
|
||||
storage: Arc<dyn Storage>,
|
||||
http: reqwest::Client,
|
||||
rate: Arc<HostRateLimiters>,
|
||||
start_url: String,
|
||||
mode_pref: CrawlerModePref,
|
||||
incremental_stop_after: usize,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl MetadataPass for RealMetadataPass {
|
||||
async fn run(&self) -> anyhow::Result<MetadataStats> {
|
||||
let mode = resolve_mode(
|
||||
&self.db,
|
||||
target_source::SOURCE_ID,
|
||||
self.mode_pref,
|
||||
self.incremental_stop_after,
|
||||
)
|
||||
.await?;
|
||||
pipeline::run_metadata_pass(
|
||||
&self.browser_manager,
|
||||
&self.db,
|
||||
self.storage.as_ref(),
|
||||
&self.http,
|
||||
&self.rate,
|
||||
&self.start_url,
|
||||
0,
|
||||
false,
|
||||
mode,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
/// Pick the active mode for this tick. `Explicit` short-circuits the
|
||||
/// DB lookup. `Auto` reads `seed_completed_at`: missing → Backfill
|
||||
/// (initial seed for this source), present → Incremental with the
|
||||
/// configured threshold.
|
||||
///
|
||||
/// A DB error during the Auto lookup propagates as `Err` rather than
|
||||
/// silently degrading to Backfill — the daemon's `run_tick` catches
|
||||
/// the error, logs, and skips the tick. That's safer than running a
|
||||
/// full re-backfill (including a drop pass against stale-looking rows)
|
||||
/// when the DB is flaky.
|
||||
async fn resolve_mode(
|
||||
db: &PgPool,
|
||||
source_id: &str,
|
||||
pref: CrawlerModePref,
|
||||
incremental_stop_after: usize,
|
||||
) -> anyhow::Result<DiscoverMode> {
|
||||
match pref {
|
||||
CrawlerModePref::Explicit(m) => {
|
||||
tracing::info!(?m, "crawler mode: explicit (CRAWLER_MODE override)");
|
||||
Ok(m)
|
||||
}
|
||||
CrawlerModePref::Auto => {
|
||||
let seeded = repo::crawler::seed_completed_at(db, source_id)
|
||||
.await
|
||||
.context("seed_completed_at lookup for mode auto-detection")?;
|
||||
match seeded {
|
||||
Some(at) => {
|
||||
tracing::info!(
|
||||
seed_completed_at = %at.to_rfc3339(),
|
||||
"crawler mode: auto → incremental (seed previously completed)"
|
||||
);
|
||||
Ok(DiscoverMode::Incremental {
|
||||
stop_after_unchanged: incremental_stop_after,
|
||||
})
|
||||
}
|
||||
None => {
|
||||
tracing::info!("crawler mode: auto → backfill (no seed marker for source)");
|
||||
Ok(DiscoverMode::Backfill)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct RealChapterDispatcher {
|
||||
browser_manager: Arc<BrowserManager>,
|
||||
db: PgPool,
|
||||
storage: Arc<dyn Storage>,
|
||||
http: reqwest::Client,
|
||||
rate: Arc<HostRateLimiters>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ChapterDispatcher for RealChapterDispatcher {
|
||||
async fn dispatch(&self, payload: JobPayload) -> anyhow::Result<SyncOutcome> {
|
||||
match payload {
|
||||
JobPayload::SyncChapterContent {
|
||||
source_id: _,
|
||||
chapter_id,
|
||||
source_chapter_key: _,
|
||||
} => {
|
||||
// Look up manga_id + source_url for this chapter.
|
||||
let row: Option<(uuid::Uuid, String)> = sqlx::query_as(
|
||||
"SELECT c.manga_id, cs.source_url \
|
||||
FROM chapters c \
|
||||
JOIN chapter_sources cs ON cs.chapter_id = c.id \
|
||||
WHERE c.id = $1 \
|
||||
LIMIT 1",
|
||||
)
|
||||
.bind(chapter_id)
|
||||
.fetch_optional(&self.db)
|
||||
.await
|
||||
.context("look up chapter for dispatch")?;
|
||||
let Some((manga_id, source_url)) = row else {
|
||||
// Chapter (or its source row) is gone — ack done.
|
||||
return Ok(SyncOutcome::Skipped);
|
||||
};
|
||||
let lease = self.browser_manager.acquire().await?;
|
||||
let outcome = content::sync_chapter_content(
|
||||
&lease,
|
||||
&self.db,
|
||||
self.storage.as_ref(),
|
||||
&self.http,
|
||||
&self.rate,
|
||||
chapter_id,
|
||||
manga_id,
|
||||
&source_url,
|
||||
false,
|
||||
)
|
||||
.await?;
|
||||
drop(lease);
|
||||
Ok(outcome)
|
||||
}
|
||||
// Other payload kinds aren't dispatched by this daemon yet —
|
||||
// metadata-driven jobs (Discover/SyncManga/SyncChapterList)
|
||||
// are handled inline by the cron's metadata pass.
|
||||
_ => Ok(SyncOutcome::Skipped),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a router from a pre-assembled state. Used by integration tests
|
||||
|
||||
498
backend/src/bin/crawler.rs
Normal file
498
backend/src/bin/crawler.rs
Normal file
@@ -0,0 +1,498 @@
|
||||
//! Crawler binary.
|
||||
//!
|
||||
//! Now an ops escape hatch sitting alongside the in-process daemon: walks
|
||||
//! the source's manga listing (all pages), fetches each manga's metadata +
|
||||
//! chapter list, downloads covers, reconciles chapters — and then, for any
|
||||
//! chapter belonging to a bookmarked manga whose `page_count` is still 0,
|
||||
//! fetches the chapter pages inline. The daemon does the same work through
|
||||
//! `crawler_jobs`; the CLI is kept around for force-refetches and manual
|
||||
//! backfills.
|
||||
//!
|
||||
//! Configuration mirrors the daemon's `CRAWLER_*` env vars (see
|
||||
//! `crate::config::CrawlerConfig`) plus the CLI-only:
|
||||
//! - **Start URL**: first CLI positional arg, else `$CRAWLER_START_URL`.
|
||||
//! - **Skip chapters / chapter content / force re-fetch / keep browser**:
|
||||
//! `CRAWLER_SKIP_CHAPTERS`, `CRAWLER_SKIP_CHAPTER_CONTENT`,
|
||||
//! `CRAWLER_FORCE_REFETCH_CHAPTERS`, `CRAWLER_KEEP_BROWSER_OPEN`.
|
||||
//! - **Limit**: `CRAWLER_LIMIT` (max manga detail fetches per run).
|
||||
//!
|
||||
//! See `crawler::pipeline::run_metadata_pass` for the shared metadata
|
||||
//! flow.
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{anyhow, Context};
|
||||
use futures_util::stream::{self, StreamExt};
|
||||
use mangalord::crawler::browser::{BrowserMode, LaunchOptions};
|
||||
use mangalord::crawler::browser_manager::{self, BrowserManager};
|
||||
use mangalord::crawler::content::{self, SyncOutcome};
|
||||
use mangalord::crawler::pipeline;
|
||||
use mangalord::crawler::rate_limit::HostRateLimiters;
|
||||
use mangalord::crawler::session;
|
||||
use mangalord::crawler::source::DiscoverMode;
|
||||
use mangalord::storage::{LocalStorage, Storage};
|
||||
use sqlx::postgres::PgPoolOptions;
|
||||
use sqlx::PgPool;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
use uuid::Uuid;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
dotenvy::dotenv().ok();
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(
|
||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| {
|
||||
"info,mangalord=debug,chromiumoxide::conn=off,chromiumoxide::handler=off"
|
||||
.into()
|
||||
}),
|
||||
)
|
||||
.init();
|
||||
|
||||
let start_url = resolve_start_url()?;
|
||||
let database_url = std::env::var("DATABASE_URL")
|
||||
.map_err(|_| anyhow!("DATABASE_URL must be set"))?;
|
||||
let storage_dir: PathBuf = std::env::var("STORAGE_DIR")
|
||||
.unwrap_or_else(|_| "./data/storage".to_string())
|
||||
.into();
|
||||
let rate_ms = env_u64("CRAWLER_RATE_MS", 1000);
|
||||
let cdn_host = std::env::var("CRAWLER_CDN_HOST")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty());
|
||||
let cdn_rate_ms = env_u64("CRAWLER_CDN_RATE_MS", rate_ms);
|
||||
let limit = env_u64("CRAWLER_LIMIT", 0) as usize;
|
||||
let skip_chapters = env_bool("CRAWLER_SKIP_CHAPTERS", false);
|
||||
let incremental_stop_after = env_u64("CRAWLER_INCREMENTAL_STOP_AFTER", 20).max(1) as usize;
|
||||
let mode = parse_crawler_mode(incremental_stop_after)?;
|
||||
let skip_chapter_content = env_bool("CRAWLER_SKIP_CHAPTER_CONTENT", false);
|
||||
let chapter_workers = env_u64("CRAWLER_CHAPTER_WORKERS", 1).max(1) as usize;
|
||||
let force_refetch_chapters = env_bool("CRAWLER_FORCE_REFETCH_CHAPTERS", false);
|
||||
let phpsessid = std::env::var("CRAWLER_PHPSESSID")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty());
|
||||
let cookie_domain = std::env::var("CRAWLER_COOKIE_DOMAIN")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty())
|
||||
.or_else(|| session::registrable_domain(&start_url));
|
||||
let user_agent = std::env::var("CRAWLER_USER_AGENT")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty());
|
||||
let proxy_url = std::env::var("CRAWLER_PROXY")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty());
|
||||
let keep_browser_open = env_bool("CRAWLER_KEEP_BROWSER_OPEN", false);
|
||||
|
||||
let db = PgPoolOptions::new()
|
||||
.max_connections(5)
|
||||
.connect(&database_url)
|
||||
.await
|
||||
.context("connect to database")?;
|
||||
sqlx::migrate!("./migrations").run(&db).await?;
|
||||
|
||||
let storage: Arc<dyn Storage> = Arc::new(LocalStorage::new(&storage_dir));
|
||||
|
||||
let cookie_jar = Arc::new(reqwest::cookie::Jar::default());
|
||||
if let (Some(sid), Some(domain)) = (&phpsessid, &cookie_domain) {
|
||||
let cookie_str = format!("PHPSESSID={sid}; Domain={domain}; Path=/");
|
||||
let seed_url =
|
||||
reqwest::Url::parse(&start_url).context("parse start URL for cookie seed")?;
|
||||
cookie_jar.add_cookie_str(&cookie_str, &seed_url);
|
||||
tracing::info!(domain, "seeded PHPSESSID into reqwest cookie jar");
|
||||
}
|
||||
let mut http_builder = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(30))
|
||||
.no_proxy()
|
||||
.cookie_provider(cookie_jar);
|
||||
if let Some(ua) = &user_agent {
|
||||
http_builder = http_builder.user_agent(ua);
|
||||
}
|
||||
if let Some(proxy) = &proxy_url {
|
||||
http_builder = http_builder
|
||||
.proxy(reqwest::Proxy::all(proxy).with_context(|| format!("parse proxy URL: {proxy}"))?);
|
||||
}
|
||||
let http = http_builder.build().context("build http client")?;
|
||||
|
||||
let mut options = LaunchOptions::from_env();
|
||||
if let Some(proxy) = &proxy_url {
|
||||
options.extra_args.push(format!("--proxy-server={proxy}"));
|
||||
}
|
||||
let keep_open = match (keep_browser_open, options.mode) {
|
||||
(true, BrowserMode::Headed) => true,
|
||||
(true, BrowserMode::Headless) => {
|
||||
tracing::warn!(
|
||||
"CRAWLER_KEEP_BROWSER_OPEN ignored in headless mode (no window to inspect)"
|
||||
);
|
||||
false
|
||||
}
|
||||
_ => false,
|
||||
};
|
||||
tracing::info!(
|
||||
?options,
|
||||
%start_url,
|
||||
rate_ms,
|
||||
cdn_host = ?cdn_host,
|
||||
cdn_rate_ms,
|
||||
limit,
|
||||
skip_chapters,
|
||||
skip_chapter_content,
|
||||
chapter_workers,
|
||||
force_refetch_chapters,
|
||||
phpsessid_set = phpsessid.is_some(),
|
||||
cookie_domain = ?cookie_domain,
|
||||
user_agent = ?user_agent,
|
||||
proxy = ?proxy_url,
|
||||
keep_open,
|
||||
?mode,
|
||||
storage_dir = %storage_dir.display(),
|
||||
"starting crawler"
|
||||
);
|
||||
|
||||
// BrowserManager with idle_timeout = ZERO so the CLI keeps Chromium
|
||||
// alive for the entire run — same lifecycle as the old direct
|
||||
// `browser::launch()` flow. on_launch re-injects PHPSESSID + runs the
|
||||
// session probe; bad cookies fail fast before any real work happens.
|
||||
let on_launch: browser_manager::OnLaunch = match (&phpsessid, &cookie_domain) {
|
||||
(Some(sid), Some(domain)) => {
|
||||
let sid = sid.clone();
|
||||
let domain = domain.clone();
|
||||
let start_url_clone = start_url.clone();
|
||||
Arc::new(move |browser| {
|
||||
let sid = sid.clone();
|
||||
let domain = domain.clone();
|
||||
let start_url = start_url_clone.clone();
|
||||
Box::pin(async move {
|
||||
session::inject_phpsessid(&browser, &sid, &domain)
|
||||
.await
|
||||
.context("inject_phpsessid")?;
|
||||
session::verify_session(&browser, &start_url)
|
||||
.await
|
||||
.context("verify_session")?;
|
||||
Ok(())
|
||||
})
|
||||
})
|
||||
}
|
||||
_ => browser_manager::noop_on_launch(),
|
||||
};
|
||||
let session_ready = phpsessid.is_some() && cookie_domain.is_some();
|
||||
let manager = BrowserManager::new(options, Duration::ZERO, on_launch);
|
||||
|
||||
let result = run(
|
||||
Arc::clone(&manager),
|
||||
&db,
|
||||
Arc::clone(&storage),
|
||||
&http,
|
||||
&start_url,
|
||||
rate_ms,
|
||||
cdn_host.as_deref(),
|
||||
cdn_rate_ms,
|
||||
limit,
|
||||
skip_chapters,
|
||||
skip_chapter_content || !session_ready,
|
||||
chapter_workers,
|
||||
force_refetch_chapters,
|
||||
mode,
|
||||
)
|
||||
.await;
|
||||
|
||||
if keep_open {
|
||||
tracing::info!(
|
||||
"crawler finished; browser kept open. Press Ctrl+C to close and exit."
|
||||
);
|
||||
let _ = tokio::signal::ctrl_c().await;
|
||||
tracing::info!("Ctrl+C received; closing browser");
|
||||
}
|
||||
manager.shutdown().await;
|
||||
result
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn run(
|
||||
manager: Arc<BrowserManager>,
|
||||
db: &PgPool,
|
||||
storage: Arc<dyn Storage>,
|
||||
http: &reqwest::Client,
|
||||
start_url: &str,
|
||||
rate_ms: u64,
|
||||
cdn_host: Option<&str>,
|
||||
cdn_rate_ms: u64,
|
||||
limit: usize,
|
||||
skip_chapters: bool,
|
||||
skip_chapter_content: bool,
|
||||
chapter_workers: usize,
|
||||
force_refetch_chapters: bool,
|
||||
mode: DiscoverMode,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut rate = HostRateLimiters::new(Duration::from_millis(rate_ms));
|
||||
if let Some(host) = cdn_host {
|
||||
rate = rate.with_override(host, Duration::from_millis(cdn_rate_ms));
|
||||
}
|
||||
let rate = Arc::new(rate);
|
||||
|
||||
let stats = pipeline::run_metadata_pass(
|
||||
manager.as_ref(),
|
||||
db,
|
||||
storage.as_ref(),
|
||||
http,
|
||||
rate.as_ref(),
|
||||
start_url,
|
||||
limit,
|
||||
skip_chapters,
|
||||
mode,
|
||||
)
|
||||
.await?;
|
||||
tracing::info!(?stats, "metadata pass complete");
|
||||
|
||||
if !skip_chapter_content {
|
||||
sync_bookmarked_chapter_content(
|
||||
Arc::clone(&manager),
|
||||
db,
|
||||
Arc::clone(&storage),
|
||||
http,
|
||||
Arc::clone(&rate),
|
||||
"target",
|
||||
chapter_workers,
|
||||
force_refetch_chapters,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Find every chapter whose manga is bookmarked by at least one user and
|
||||
/// that hasn't been content-synced yet, then fan them out across `workers`
|
||||
/// concurrent tasks. Same as before except the browser comes from a
|
||||
/// BrowserManager lease so it interleaves cleanly with the metadata pass.
|
||||
///
|
||||
/// A `SessionExpired` result aborts the phase.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn sync_bookmarked_chapter_content(
|
||||
manager: Arc<BrowserManager>,
|
||||
db: &PgPool,
|
||||
storage: Arc<dyn Storage>,
|
||||
http: &reqwest::Client,
|
||||
rate: Arc<HostRateLimiters>,
|
||||
source_id: &str,
|
||||
workers: usize,
|
||||
force_refetch: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
let pending: Vec<(Uuid, Uuid, String)> = sqlx::query_as(
|
||||
r#"
|
||||
SELECT id, manga_id, source_url FROM (
|
||||
SELECT DISTINCT c.id, c.manga_id, c.created_at, cs.source_url
|
||||
FROM chapters c
|
||||
JOIN bookmarks b ON b.manga_id = c.manga_id
|
||||
JOIN chapter_sources cs ON cs.chapter_id = c.id
|
||||
WHERE cs.source_id = $1
|
||||
AND cs.dropped_at IS NULL
|
||||
AND (c.page_count = 0 OR $2)
|
||||
) sub
|
||||
ORDER BY manga_id, created_at ASC
|
||||
"#,
|
||||
)
|
||||
.bind(source_id)
|
||||
.bind(force_refetch)
|
||||
.fetch_all(db)
|
||||
.await
|
||||
.context("query pending chapter content")?;
|
||||
|
||||
if pending.is_empty() {
|
||||
tracing::info!("chapter content: nothing pending");
|
||||
return Ok(());
|
||||
}
|
||||
tracing::info!(count = pending.len(), workers, "chapter content phase starting");
|
||||
|
||||
let session_expired = Arc::new(std::sync::atomic::AtomicBool::new(false));
|
||||
let stats = std::sync::Mutex::new(WorkerStats::default());
|
||||
|
||||
stream::iter(pending.into_iter())
|
||||
.for_each_concurrent(workers.max(1), |(chapter_id, manga_id, source_url)| {
|
||||
let session_expired = Arc::clone(&session_expired);
|
||||
let storage = Arc::clone(&storage);
|
||||
let rate = Arc::clone(&rate);
|
||||
let manager = Arc::clone(&manager);
|
||||
let stats = &stats;
|
||||
async move {
|
||||
if session_expired.load(std::sync::atomic::Ordering::Relaxed) {
|
||||
return;
|
||||
}
|
||||
let lease = match manager.acquire().await {
|
||||
Ok(l) => l,
|
||||
Err(e) => {
|
||||
tracing::error!(%chapter_id, error = ?e, "browser acquire failed");
|
||||
let mut s = stats.lock().unwrap();
|
||||
s.failed += 1;
|
||||
return;
|
||||
}
|
||||
};
|
||||
let outcome = content::sync_chapter_content(
|
||||
&lease,
|
||||
db,
|
||||
storage.as_ref(),
|
||||
http,
|
||||
rate.as_ref(),
|
||||
chapter_id,
|
||||
manga_id,
|
||||
&source_url,
|
||||
force_refetch,
|
||||
)
|
||||
.await;
|
||||
drop(lease);
|
||||
let mut s = stats.lock().unwrap();
|
||||
match outcome {
|
||||
Ok(SyncOutcome::Fetched { pages }) => {
|
||||
tracing::info!(%chapter_id, pages, "chapter content fetched");
|
||||
s.fetched += 1;
|
||||
}
|
||||
Ok(SyncOutcome::Skipped) => s.skipped += 1,
|
||||
Ok(SyncOutcome::SessionExpired) => {
|
||||
tracing::error!(
|
||||
%chapter_id,
|
||||
"session expired mid-run — refresh CRAWLER_PHPSESSID and re-run"
|
||||
);
|
||||
session_expired
|
||||
.store(true, std::sync::atomic::Ordering::Relaxed);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
%chapter_id, error = ?e, "chapter content sync failed"
|
||||
);
|
||||
s.failed += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
let total = stats.into_inner().unwrap();
|
||||
tracing::info!(
|
||||
fetched = total.fetched,
|
||||
skipped = total.skipped,
|
||||
failed = total.failed,
|
||||
"chapter content phase done"
|
||||
);
|
||||
|
||||
if session_expired.load(std::sync::atomic::Ordering::Relaxed) {
|
||||
anyhow::bail!("session expired during chapter content phase");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Copy)]
|
||||
struct WorkerStats {
|
||||
fetched: usize,
|
||||
skipped: usize,
|
||||
failed: usize,
|
||||
}
|
||||
|
||||
fn resolve_start_url() -> anyhow::Result<String> {
|
||||
if let Some(arg) = std::env::args().nth(1) {
|
||||
return Ok(arg);
|
||||
}
|
||||
std::env::var("CRAWLER_START_URL").map_err(|_| {
|
||||
anyhow!(
|
||||
"start URL is required — pass as first CLI arg or set $CRAWLER_START_URL"
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse the CLI's `CRAWLER_MODE`. Defaults to `backfill` because the
|
||||
/// binary is operator-driven (manual reseeds, force-refetches) — the
|
||||
/// auto-detect logic lives in the daemon. `auto` is rejected because
|
||||
/// the CLI has no DB state to consult before the run.
|
||||
fn parse_crawler_mode(incremental_stop_after: usize) -> anyhow::Result<DiscoverMode> {
|
||||
parse_crawler_mode_str(
|
||||
std::env::var("CRAWLER_MODE").ok().as_deref(),
|
||||
incremental_stop_after,
|
||||
)
|
||||
}
|
||||
|
||||
/// Pure variant of [`parse_crawler_mode`] — testable without env-var
|
||||
/// mutation.
|
||||
fn parse_crawler_mode_str(
|
||||
raw: Option<&str>,
|
||||
incremental_stop_after: usize,
|
||||
) -> anyhow::Result<DiscoverMode> {
|
||||
match raw.map(|s| s.trim().to_ascii_lowercase()).as_deref() {
|
||||
None | Some("") | Some("backfill") => Ok(DiscoverMode::Backfill),
|
||||
Some("incremental") => Ok(DiscoverMode::Incremental {
|
||||
stop_after_unchanged: incremental_stop_after,
|
||||
}),
|
||||
Some("auto") => Err(anyhow!(
|
||||
"CRAWLER_MODE=auto isn't supported by the CLI (use backfill or incremental); \
|
||||
the daemon does auto-detection"
|
||||
)),
|
||||
Some(other) => Err(anyhow!(
|
||||
"CRAWLER_MODE must be one of: backfill, incremental (got {other:?})"
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn env_u64(name: &str, default: u64) -> u64 {
|
||||
std::env::var(name)
|
||||
.ok()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(default)
|
||||
}
|
||||
|
||||
fn env_bool(name: &str, default: bool) -> bool {
|
||||
match std::env::var(name).ok().as_deref() {
|
||||
Some("1") | Some("true") | Some("TRUE") | Some("yes") => true,
|
||||
Some("0") | Some("false") | Some("FALSE") | Some("no") => false,
|
||||
_ => default,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn cli_mode_defaults_to_backfill_when_unset_or_blank() {
|
||||
let none = parse_crawler_mode_str(None, 20).unwrap();
|
||||
assert!(matches!(none, DiscoverMode::Backfill));
|
||||
let blank = parse_crawler_mode_str(Some(""), 20).unwrap();
|
||||
assert!(matches!(blank, DiscoverMode::Backfill));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cli_mode_recognizes_backfill_and_incremental() {
|
||||
let backfill = parse_crawler_mode_str(Some("backfill"), 20).unwrap();
|
||||
assert!(matches!(backfill, DiscoverMode::Backfill));
|
||||
|
||||
let incremental = parse_crawler_mode_str(Some("incremental"), 9).unwrap();
|
||||
assert!(matches!(
|
||||
incremental,
|
||||
DiscoverMode::Incremental { stop_after_unchanged: 9 }
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cli_mode_rejects_auto_explicitly() {
|
||||
let err = parse_crawler_mode_str(Some("auto"), 20).unwrap_err();
|
||||
let msg = format!("{err}");
|
||||
assert!(
|
||||
msg.contains("daemon"),
|
||||
"rejection should point operator at the daemon: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cli_mode_rejects_unknown_value() {
|
||||
let err = parse_crawler_mode_str(Some("garbage"), 20).unwrap_err();
|
||||
let msg = format!("{err}");
|
||||
assert!(msg.contains("backfill"));
|
||||
assert!(msg.contains("incremental"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cli_mode_is_case_insensitive_and_trims() {
|
||||
let mixed = parse_crawler_mode_str(Some(" Incremental "), 4).unwrap();
|
||||
assert!(matches!(
|
||||
mixed,
|
||||
DiscoverMode::Incremental { stop_after_unchanged: 4 }
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,20 @@
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
|
||||
use chrono::NaiveTime;
|
||||
use chrono_tz::Tz;
|
||||
|
||||
use crate::crawler::browser::LaunchOptions;
|
||||
use crate::crawler::source::DiscoverMode;
|
||||
|
||||
/// What `CRAWLER_MODE` was set to. `Auto` is the daemon's default —
|
||||
/// pick Backfill until `seed_completed_at` is written, then flip to
|
||||
/// Incremental. `Explicit` forces a single mode regardless.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum CrawlerModePref {
|
||||
Auto,
|
||||
Explicit(DiscoverMode),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AuthConfig {
|
||||
@@ -45,6 +61,62 @@ pub struct Config {
|
||||
pub auth: AuthConfig,
|
||||
pub upload: UploadConfig,
|
||||
pub cors_allowed_origins: Vec<String>,
|
||||
pub crawler: CrawlerConfig,
|
||||
}
|
||||
|
||||
/// All crawler-daemon knobs read from env. Mirrors the env vars the
|
||||
/// `bin/crawler` binary already reads, plus the new daemon-only knobs
|
||||
/// (daily_at, tz, idle_timeout, retention_days, daemon_enabled).
|
||||
///
|
||||
/// `daemon_enabled = false` skips the daemon spawn entirely — used by
|
||||
/// integration tests and dev runs that don't want background activity.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CrawlerConfig {
|
||||
pub daemon_enabled: bool,
|
||||
pub daily_at: NaiveTime,
|
||||
pub tz: Tz,
|
||||
pub idle_timeout: Duration,
|
||||
pub chapter_workers: usize,
|
||||
pub retention_days: u32,
|
||||
pub start_url: Option<String>,
|
||||
pub rate_ms: u64,
|
||||
pub cdn_host: Option<String>,
|
||||
pub cdn_rate_ms: u64,
|
||||
pub phpsessid: Option<String>,
|
||||
pub cookie_domain: Option<String>,
|
||||
pub user_agent: Option<String>,
|
||||
pub proxy: Option<String>,
|
||||
pub browser: LaunchOptions,
|
||||
/// Mode preference for the metadata pass. Daemon default is `Auto`
|
||||
/// (Backfill until `seed_completed_at` is written, then Incremental).
|
||||
pub mode: CrawlerModePref,
|
||||
/// `stop_after_unchanged` threshold supplied to Incremental in both
|
||||
/// `Auto` (post-seed) and `Explicit(Incremental)` modes.
|
||||
pub incremental_stop_after: usize,
|
||||
}
|
||||
|
||||
impl Default for CrawlerConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
daemon_enabled: false,
|
||||
daily_at: NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
|
||||
tz: Tz::UTC,
|
||||
idle_timeout: Duration::from_secs(600),
|
||||
chapter_workers: 1,
|
||||
retention_days: 7,
|
||||
start_url: None,
|
||||
rate_ms: 1000,
|
||||
cdn_host: None,
|
||||
cdn_rate_ms: 1000,
|
||||
phpsessid: None,
|
||||
cookie_domain: None,
|
||||
user_agent: None,
|
||||
proxy: None,
|
||||
browser: LaunchOptions::headless(),
|
||||
mode: CrawlerModePref::Auto,
|
||||
incremental_stop_after: 20,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
@@ -77,10 +149,96 @@ impl Config {
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
crawler: CrawlerConfig::from_env()?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl CrawlerConfig {
|
||||
pub fn from_env() -> anyhow::Result<Self> {
|
||||
// Parse CRAWLER_DAILY_AT (HH:MM, 24h). Invalid → fail fast.
|
||||
let daily_at = match std::env::var("CRAWLER_DAILY_AT").ok().as_deref() {
|
||||
None | Some("") => NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
|
||||
Some(raw) => NaiveTime::parse_from_str(raw, "%H:%M").map_err(|e| {
|
||||
anyhow::anyhow!("CRAWLER_DAILY_AT must be HH:MM (got {raw:?}): {e}")
|
||||
})?,
|
||||
};
|
||||
let tz: Tz = match std::env::var("CRAWLER_TZ").ok().as_deref() {
|
||||
None | Some("") => Tz::UTC,
|
||||
Some(raw) => raw
|
||||
.parse()
|
||||
.map_err(|e| anyhow::anyhow!("CRAWLER_TZ must be a valid IANA TZ (got {raw:?}): {e}"))?,
|
||||
};
|
||||
let incremental_stop_after =
|
||||
env_u64("CRAWLER_INCREMENTAL_STOP_AFTER", 20).max(1) as usize;
|
||||
let mode = parse_mode_env(incremental_stop_after)?;
|
||||
Ok(Self {
|
||||
daemon_enabled: env_bool("CRAWLER_DAEMON", true),
|
||||
daily_at,
|
||||
tz,
|
||||
idle_timeout: Duration::from_secs(env_u64("CRAWLER_IDLE_TIMEOUT_S", 600)),
|
||||
chapter_workers: env_u64("CRAWLER_CHAPTER_WORKERS", 1).max(1) as usize,
|
||||
retention_days: env_u64("CRAWLER_JOB_RETENTION_DAYS", 7) as u32,
|
||||
start_url: std::env::var("CRAWLER_START_URL")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty()),
|
||||
rate_ms: env_u64("CRAWLER_RATE_MS", 1000),
|
||||
cdn_host: std::env::var("CRAWLER_CDN_HOST")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty()),
|
||||
cdn_rate_ms: env_u64("CRAWLER_CDN_RATE_MS", env_u64("CRAWLER_RATE_MS", 1000)),
|
||||
phpsessid: std::env::var("CRAWLER_PHPSESSID")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty()),
|
||||
cookie_domain: std::env::var("CRAWLER_COOKIE_DOMAIN")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty()),
|
||||
user_agent: std::env::var("CRAWLER_USER_AGENT")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty()),
|
||||
proxy: std::env::var("CRAWLER_PROXY")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty()),
|
||||
browser: LaunchOptions::from_env(),
|
||||
mode,
|
||||
incremental_stop_after,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse `CRAWLER_MODE`. Empty/unset → `Auto`. Recognized values are
|
||||
/// `auto`, `backfill`, and `incremental` (case-insensitive). Anything
|
||||
/// else is a hard error so a typo can't silently fall through to the
|
||||
/// default and mask itself.
|
||||
fn parse_mode_env(incremental_stop_after: usize) -> anyhow::Result<CrawlerModePref> {
|
||||
parse_mode_str(std::env::var("CRAWLER_MODE").ok().as_deref(), incremental_stop_after)
|
||||
}
|
||||
|
||||
/// Pure variant of [`parse_mode_env`] — testable without env-var
|
||||
/// mutation. Takes the raw value (or `None` if unset).
|
||||
pub(crate) fn parse_mode_str(
|
||||
raw: Option<&str>,
|
||||
incremental_stop_after: usize,
|
||||
) -> anyhow::Result<CrawlerModePref> {
|
||||
match raw.map(|s| s.trim().to_ascii_lowercase()).as_deref() {
|
||||
None | Some("") | Some("auto") => Ok(CrawlerModePref::Auto),
|
||||
Some("backfill") => Ok(CrawlerModePref::Explicit(DiscoverMode::Backfill)),
|
||||
Some("incremental") => Ok(CrawlerModePref::Explicit(DiscoverMode::Incremental {
|
||||
stop_after_unchanged: incremental_stop_after,
|
||||
})),
|
||||
Some(other) => Err(anyhow::anyhow!(
|
||||
"CRAWLER_MODE must be one of: auto, backfill, incremental (got {other:?})"
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn env_u64(name: &str, default: u64) -> u64 {
|
||||
std::env::var(name)
|
||||
.ok()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(default)
|
||||
}
|
||||
|
||||
fn env_bool(name: &str, default: bool) -> bool {
|
||||
match std::env::var(name).ok().as_deref() {
|
||||
Some("1") | Some("true") | Some("TRUE") | Some("yes") => true,
|
||||
@@ -102,3 +260,63 @@ fn env_usize(name: &str, default: usize) -> usize {
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(default)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parse_mode_str_defaults_to_auto_when_unset_or_blank() {
|
||||
let none = parse_mode_str(None, 20).unwrap();
|
||||
assert!(matches!(none, CrawlerModePref::Auto));
|
||||
let blank = parse_mode_str(Some(""), 20).unwrap();
|
||||
assert!(matches!(blank, CrawlerModePref::Auto));
|
||||
let whitespace = parse_mode_str(Some(" "), 20).unwrap();
|
||||
assert!(matches!(whitespace, CrawlerModePref::Auto));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_mode_str_recognizes_each_keyword() {
|
||||
let auto = parse_mode_str(Some("auto"), 20).unwrap();
|
||||
assert!(matches!(auto, CrawlerModePref::Auto));
|
||||
|
||||
let backfill = parse_mode_str(Some("backfill"), 20).unwrap();
|
||||
assert!(matches!(
|
||||
backfill,
|
||||
CrawlerModePref::Explicit(DiscoverMode::Backfill)
|
||||
));
|
||||
|
||||
let incremental = parse_mode_str(Some("incremental"), 7).unwrap();
|
||||
assert!(matches!(
|
||||
incremental,
|
||||
CrawlerModePref::Explicit(DiscoverMode::Incremental {
|
||||
stop_after_unchanged: 7
|
||||
})
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_mode_str_is_case_insensitive_and_trims_whitespace() {
|
||||
let mixed = parse_mode_str(Some(" Incremental "), 5).unwrap();
|
||||
assert!(matches!(
|
||||
mixed,
|
||||
CrawlerModePref::Explicit(DiscoverMode::Incremental {
|
||||
stop_after_unchanged: 5
|
||||
})
|
||||
));
|
||||
let upper = parse_mode_str(Some("BACKFILL"), 5).unwrap();
|
||||
assert!(matches!(
|
||||
upper,
|
||||
CrawlerModePref::Explicit(DiscoverMode::Backfill)
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_mode_str_hard_errors_on_unknown_value() {
|
||||
let err = parse_mode_str(Some("backfil"), 20).unwrap_err();
|
||||
let msg = format!("{err}");
|
||||
assert!(msg.contains("backfill"), "error should list valid values: {msg}");
|
||||
assert!(msg.contains("auto"));
|
||||
assert!(msg.contains("incremental"));
|
||||
}
|
||||
}
|
||||
|
||||
264
backend/src/crawler/browser.rs
Normal file
264
backend/src/crawler/browser.rs
Normal file
@@ -0,0 +1,264 @@
|
||||
//! Chromium launcher and lifecycle.
|
||||
//!
|
||||
//! Uses `chromiumoxide`'s `fetcher` feature so we don't depend on a
|
||||
//! system Chrome install — first call downloads a known-good revision
|
||||
//! into a cache dir and reuses it forever after. `BrowserMode` toggles
|
||||
//! headed vs headless; the headed path needs a display (real `$DISPLAY`
|
||||
//! or `xvfb-run`).
|
||||
//!
|
||||
//! Extra Chromium command-line flags can be supplied through
|
||||
//! [`LaunchOptions::extra_args`] in code, or via the
|
||||
//! `CRAWLER_BROWSER_ARGS` env var (whitespace-separated) when going
|
||||
//! through [`LaunchOptions::from_env`]. The launcher always also
|
||||
//! injects `--no-sandbox` and `--disable-dev-shm-usage` because they're
|
||||
//! near-mandatory for containerized Chromium; everything else is
|
||||
//! caller-provided.
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Context;
|
||||
use chromiumoxide::browser::{Browser, BrowserConfig};
|
||||
use chromiumoxide::error::CdpError;
|
||||
use chromiumoxide::fetcher::{BrowserFetcher, BrowserFetcherOptions};
|
||||
use futures_util::StreamExt;
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum BrowserMode {
|
||||
/// Real window. Needs `$DISPLAY` (or `xvfb-run` wrapping the
|
||||
/// binary). Opt-in via `CRAWLER_BROWSER_MODE=headed` — useful for
|
||||
/// debugging a flow visually or for sites that fingerprint
|
||||
/// headless Chrome. Not used in production.
|
||||
Headed,
|
||||
/// No window. Faster, lower resource use, runs without a display.
|
||||
/// This is the default for both `from_env()` and `Default`.
|
||||
Headless,
|
||||
}
|
||||
|
||||
/// Configuration for a single browser launch.
|
||||
///
|
||||
/// Public fields rather than a builder — there are only two of them
|
||||
/// and callers benefit from struct literal syntax for clarity.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct LaunchOptions {
|
||||
pub mode: BrowserMode,
|
||||
/// Extra Chromium flags, appended after the launcher's own
|
||||
/// defaults. Example: `vec!["--lang=de-DE".into(),
|
||||
/// "--window-size=1280,800".into()]`.
|
||||
pub extra_args: Vec<String>,
|
||||
}
|
||||
|
||||
impl LaunchOptions {
|
||||
pub fn headed() -> Self {
|
||||
Self {
|
||||
mode: BrowserMode::Headed,
|
||||
extra_args: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn headless() -> Self {
|
||||
Self {
|
||||
mode: BrowserMode::Headless,
|
||||
extra_args: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads `CRAWLER_BROWSER_MODE` (`headless`|`headed`, default
|
||||
/// `headless`) and `CRAWLER_BROWSER_ARGS` (whitespace-separated
|
||||
/// Chromium flags). Flags containing whitespace aren't supported
|
||||
/// through the env var — use the programmatic API for those.
|
||||
pub fn from_env() -> Self {
|
||||
let mode = match std::env::var("CRAWLER_BROWSER_MODE").as_deref() {
|
||||
Ok("headed") => BrowserMode::Headed,
|
||||
_ => BrowserMode::Headless,
|
||||
};
|
||||
let extra_args = std::env::var("CRAWLER_BROWSER_ARGS")
|
||||
.map(|s| parse_args(&s))
|
||||
.unwrap_or_default();
|
||||
Self { mode, extra_args }
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for LaunchOptions {
|
||||
fn default() -> Self {
|
||||
Self::headless()
|
||||
}
|
||||
}
|
||||
|
||||
/// Whitespace-split a CRAWLER_BROWSER_ARGS-style string. Exposed
|
||||
/// separately from `from_env` so it can be unit-tested without
|
||||
/// touching process environment.
|
||||
pub(crate) fn parse_args(s: &str) -> Vec<String> {
|
||||
s.split_whitespace().map(str::to_string).collect()
|
||||
}
|
||||
|
||||
/// Owned browser plus the spawned task that drives its CDP event loop.
|
||||
/// Dropping `Handle` without calling `close` leaks the Chromium process
|
||||
/// — always call `close().await` in production paths.
|
||||
///
|
||||
/// The browser is stored behind an `Arc` so it can be shared across
|
||||
/// worker tasks (via [`Handle::shared`]) without copying. `Browser::new_page`
|
||||
/// only needs `&self`, so multiple workers can drive the same browser
|
||||
/// concurrently as long as the manager keeps the `Arc` alive.
|
||||
pub struct Handle {
|
||||
browser: Arc<Browser>,
|
||||
driver: JoinHandle<()>,
|
||||
}
|
||||
|
||||
impl Handle {
|
||||
/// Borrow the browser. Equivalent to `&*handle.shared()`.
|
||||
pub fn browser(&self) -> &Browser {
|
||||
&self.browser
|
||||
}
|
||||
|
||||
/// Clone the shared handle. Workers hold these to call `new_page`
|
||||
/// concurrently. The browser only exits when the last `Arc<Browser>`
|
||||
/// is dropped (kill-on-drop), or when `close()` is called on the
|
||||
/// originating `Handle` while it is the sole holder.
|
||||
pub fn shared(&self) -> Arc<Browser> {
|
||||
Arc::clone(&self.browser)
|
||||
}
|
||||
|
||||
/// Closes the browser and awaits the driver task. If other Arcs to
|
||||
/// the browser are still alive we fall back to drop-kills-Chromium
|
||||
/// semantics and just join the driver — this is the rare case where
|
||||
/// shutdown raced an outstanding worker; the OS-level kill is the
|
||||
/// safety net.
|
||||
pub async fn close(self) -> anyhow::Result<()> {
|
||||
match Arc::try_unwrap(self.browser) {
|
||||
Ok(mut owned) => {
|
||||
let _ = owned.close().await;
|
||||
let _ = owned.wait().await;
|
||||
}
|
||||
Err(shared) => {
|
||||
tracing::warn!(
|
||||
strong_count = Arc::strong_count(&shared),
|
||||
"Handle::close while Arc<Browser> still shared — relying on kill-on-drop"
|
||||
);
|
||||
drop(shared);
|
||||
}
|
||||
}
|
||||
let _ = self.driver.await;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Launches Chromium. Downloads it on first run via the `fetcher`
|
||||
/// feature; subsequent runs hit the cache. The cache dir is
|
||||
/// `$CRAWLER_CHROMIUM_DIR` if set, else `$HOME/.cache/mangalord/chromium`,
|
||||
/// else `./.chromium-cache` as a last-resort repo-local fallback.
|
||||
pub async fn launch(options: LaunchOptions) -> anyhow::Result<Handle> {
|
||||
let cache = cache_dir()?;
|
||||
tokio::fs::create_dir_all(&cache)
|
||||
.await
|
||||
.with_context(|| format!("create cache dir {}", cache.display()))?;
|
||||
|
||||
let fetcher = BrowserFetcher::new(
|
||||
BrowserFetcherOptions::builder()
|
||||
.with_path(&cache)
|
||||
.build()
|
||||
.map_err(|e| anyhow::anyhow!("fetcher options: {e}"))?,
|
||||
);
|
||||
tracing::info!(path = %cache.display(), "ensuring chromium revision is present");
|
||||
let info = fetcher
|
||||
.fetch()
|
||||
.await
|
||||
.context("download chromium via fetcher")?;
|
||||
tracing::info!(executable = %info.executable_path.display(), "chromium ready");
|
||||
|
||||
let mut builder = BrowserConfig::builder()
|
||||
.chrome_executable(info.executable_path)
|
||||
// Linux containers / CI commonly lack the user namespaces
|
||||
// Chromium's sandbox wants. Disable it; the crawler runs in its
|
||||
// own container anyway.
|
||||
.arg("--no-sandbox")
|
||||
.arg("--disable-dev-shm-usage");
|
||||
for arg in &options.extra_args {
|
||||
builder = builder.arg(arg);
|
||||
}
|
||||
if matches!(options.mode, BrowserMode::Headed) {
|
||||
builder = builder.with_head();
|
||||
}
|
||||
tracing::info!(
|
||||
mode = ?options.mode,
|
||||
extra_args = ?options.extra_args,
|
||||
"building browser config"
|
||||
);
|
||||
let config = builder
|
||||
.build()
|
||||
.map_err(|e| anyhow::anyhow!("browser config: {e}"))?;
|
||||
|
||||
let (browser, mut handler) = Browser::launch(config)
|
||||
.await
|
||||
.context("launch chromium")?;
|
||||
|
||||
let driver = tokio::spawn(async move {
|
||||
while let Some(event) = handler.next().await {
|
||||
match event {
|
||||
Ok(_) => {}
|
||||
// chromiumoxide 0.7 ships fixed CDP type bindings, so any
|
||||
// CDP event Chrome added later fails to deserialize. The
|
||||
// connection is unaffected — these are noise. Suppress
|
||||
// them so real failures stay visible.
|
||||
Err(CdpError::Serde(_)) => {
|
||||
tracing::trace!("chromium emitted an unrecognized CDP event");
|
||||
}
|
||||
Err(err) => tracing::warn!(?err, "chromium handler event error"),
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(Handle {
|
||||
browser: Arc::new(browser),
|
||||
driver,
|
||||
})
|
||||
}
|
||||
|
||||
fn cache_dir() -> anyhow::Result<PathBuf> {
|
||||
if let Ok(dir) = std::env::var("CRAWLER_CHROMIUM_DIR") {
|
||||
return Ok(PathBuf::from(dir));
|
||||
}
|
||||
if let Ok(home) = std::env::var("HOME") {
|
||||
return Ok(PathBuf::from(home).join(".cache/mangalord/chromium"));
|
||||
}
|
||||
Ok(PathBuf::from("./.chromium-cache"))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parse_args_splits_on_whitespace() {
|
||||
assert_eq!(
|
||||
parse_args("--lang=de-DE --window-size=1280,800"),
|
||||
vec!["--lang=de-DE", "--window-size=1280,800"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_args_tolerates_irregular_whitespace() {
|
||||
// tabs, multiple spaces, leading/trailing — all collapsed.
|
||||
assert_eq!(
|
||||
parse_args(" --a\t--b --c=1\n"),
|
||||
vec!["--a", "--b", "--c=1"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_args_empty_string_yields_empty_vec() {
|
||||
assert!(parse_args("").is_empty());
|
||||
assert!(parse_args(" \t\n").is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_launch_options_are_headless() {
|
||||
// Headless is the production-safe default — no display required,
|
||||
// smaller resource footprint. `Headed` stays available as an
|
||||
// opt-in for debugging via CRAWLER_BROWSER_MODE=headed.
|
||||
assert_eq!(LaunchOptions::default().mode, BrowserMode::Headless);
|
||||
assert_eq!(LaunchOptions::headless().mode, BrowserMode::Headless);
|
||||
assert_eq!(LaunchOptions::headed().mode, BrowserMode::Headed);
|
||||
}
|
||||
}
|
||||
262
backend/src/crawler/browser_manager.rs
Normal file
262
backend/src/crawler/browser_manager.rs
Normal file
@@ -0,0 +1,262 @@
|
||||
//! Lazy-launch / idle-teardown Chromium manager for the daemon.
|
||||
//!
|
||||
//! The first worker that calls [`BrowserManager::acquire`] triggers a real
|
||||
//! Chromium launch (and the `on_launch` hook — used to re-inject the
|
||||
//! PHPSESSID cookie on every fresh process). Each acquire bumps an active
|
||||
//! counter; the returned [`BrowserLease`] decrements it on drop.
|
||||
//!
|
||||
//! When the active counter hits zero, a background reaper task waits
|
||||
//! `idle_timeout`. If still zero on wake, it closes Chromium and clears the
|
||||
//! cached handle. The next acquire re-launches.
|
||||
//!
|
||||
//! `idle_timeout = Duration::ZERO` disables the reaper — Chromium stays alive
|
||||
//! until [`BrowserManager::shutdown`].
|
||||
|
||||
use std::ops::Deref;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Context;
|
||||
use chromiumoxide::browser::Browser;
|
||||
use futures_util::future::BoxFuture;
|
||||
use tokio::sync::{Mutex, Notify};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::crawler::browser::{self, LaunchOptions};
|
||||
|
||||
/// Hook invoked on every fresh launch with the new browser. Typically used
|
||||
/// to re-inject PHPSESSID + run the session probe. Errors abort the
|
||||
/// `acquire` that triggered the launch — the next acquire will re-launch.
|
||||
pub type OnLaunch =
|
||||
Arc<dyn Fn(Arc<Browser>) -> BoxFuture<'static, anyhow::Result<()>> + Send + Sync>;
|
||||
|
||||
/// Returns an `OnLaunch` that does nothing — useful when no session is
|
||||
/// configured (e.g. CLI metadata-only runs).
|
||||
pub fn noop_on_launch() -> OnLaunch {
|
||||
Arc::new(|_| Box::pin(async { Ok(()) }))
|
||||
}
|
||||
|
||||
/// Decoupled active-lease tracker. Owns the atomic counter and the idle
|
||||
/// notifier so the wiring is unit-testable without standing up a real
|
||||
/// `BrowserManager` (which would require launching Chromium).
|
||||
#[derive(Default)]
|
||||
pub(crate) struct ActiveTracker {
|
||||
counter: AtomicUsize,
|
||||
idle_signal: Notify,
|
||||
}
|
||||
|
||||
impl ActiveTracker {
|
||||
pub(crate) fn new() -> Arc<Self> {
|
||||
Arc::new(Self::default())
|
||||
}
|
||||
|
||||
pub(crate) fn acquire(self: &Arc<Self>) {
|
||||
self.counter.fetch_add(1, Ordering::AcqRel);
|
||||
}
|
||||
|
||||
pub(crate) fn release(self: &Arc<Self>) {
|
||||
if self.counter.fetch_sub(1, Ordering::AcqRel) == 1 {
|
||||
self.idle_signal.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn current(&self) -> usize {
|
||||
self.counter.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
pub(crate) fn idle_signal(&self) -> &Notify {
|
||||
&self.idle_signal
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BrowserManager {
|
||||
inner: Mutex<Inner>,
|
||||
active: Arc<ActiveTracker>,
|
||||
launch_opts: LaunchOptions,
|
||||
idle_timeout: Duration,
|
||||
on_launch: OnLaunch,
|
||||
}
|
||||
|
||||
struct Inner {
|
||||
handle: Option<browser::Handle>,
|
||||
shared: Option<Arc<Browser>>,
|
||||
}
|
||||
|
||||
impl BrowserManager {
|
||||
pub fn new(
|
||||
launch_opts: LaunchOptions,
|
||||
idle_timeout: Duration,
|
||||
on_launch: OnLaunch,
|
||||
) -> Arc<Self> {
|
||||
Arc::new(Self {
|
||||
inner: Mutex::new(Inner {
|
||||
handle: None,
|
||||
shared: None,
|
||||
}),
|
||||
active: ActiveTracker::new(),
|
||||
launch_opts,
|
||||
idle_timeout,
|
||||
on_launch,
|
||||
})
|
||||
}
|
||||
|
||||
/// Acquire a shared browser lease. The first acquire after a teardown
|
||||
/// launches a fresh Chromium (and runs `on_launch`); subsequent acquires
|
||||
/// while a process is alive just bump the counter and clone the `Arc`.
|
||||
pub async fn acquire(&self) -> anyhow::Result<BrowserLease> {
|
||||
let mut guard = self.inner.lock().await;
|
||||
if guard.handle.is_none() {
|
||||
let handle = browser::launch(self.launch_opts.clone())
|
||||
.await
|
||||
.context("BrowserManager: launch chromium")?;
|
||||
let shared = handle.shared();
|
||||
// Run the on-launch hook before publishing the handle so a session
|
||||
// probe failure doesn't leave a half-initialized browser behind.
|
||||
if let Err(e) = (self.on_launch)(Arc::clone(&shared)).await {
|
||||
// Close the just-launched browser since we won't be using it.
|
||||
let _ = handle.close().await;
|
||||
return Err(e.context("BrowserManager: on_launch hook failed"));
|
||||
}
|
||||
guard.handle = Some(handle);
|
||||
guard.shared = Some(shared);
|
||||
}
|
||||
let browser = guard
|
||||
.shared
|
||||
.as_ref()
|
||||
.expect("shared set above")
|
||||
.clone();
|
||||
self.active.acquire();
|
||||
Ok(BrowserLease {
|
||||
browser,
|
||||
active: Arc::clone(&self.active),
|
||||
})
|
||||
}
|
||||
|
||||
/// Forcefully close the cached browser regardless of active count.
|
||||
/// Used on daemon shutdown. After this returns the next acquire will
|
||||
/// re-launch from scratch.
|
||||
pub async fn shutdown(&self) {
|
||||
let mut guard = self.inner.lock().await;
|
||||
guard.shared = None;
|
||||
if let Some(handle) = guard.handle.take() {
|
||||
let _ = handle.close().await;
|
||||
}
|
||||
}
|
||||
|
||||
fn idle_timeout(&self) -> Duration {
|
||||
self.idle_timeout
|
||||
}
|
||||
|
||||
fn active(&self) -> Arc<ActiveTracker> {
|
||||
Arc::clone(&self.active)
|
||||
}
|
||||
}
|
||||
|
||||
/// Background reaper. Returns immediately when `idle_timeout == 0`.
|
||||
/// Otherwise spawns a task that:
|
||||
/// 1. Waits on `idle_signal` (woken when active hits zero).
|
||||
/// 2. Sleeps `idle_timeout`.
|
||||
/// 3. Re-checks the counter under the mutex — if still zero, takes the
|
||||
/// handle and closes it.
|
||||
///
|
||||
/// Repeats forever until `cancel` fires.
|
||||
pub fn spawn_idle_reaper(mgr: Arc<BrowserManager>, cancel: CancellationToken) -> JoinHandle<()> {
|
||||
tokio::spawn(async move {
|
||||
if mgr.idle_timeout().is_zero() {
|
||||
// Block until cancellation, then exit.
|
||||
cancel.cancelled().await;
|
||||
return;
|
||||
}
|
||||
let active = mgr.active();
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = cancel.cancelled() => return,
|
||||
_ = active.idle_signal().notified() => {}
|
||||
}
|
||||
if active.current() > 0 {
|
||||
continue;
|
||||
}
|
||||
tokio::select! {
|
||||
_ = cancel.cancelled() => return,
|
||||
_ = tokio::time::sleep(mgr.idle_timeout()) => {}
|
||||
}
|
||||
let mut guard = mgr.inner.lock().await;
|
||||
if active.current() > 0 {
|
||||
// A worker grabbed a lease during the sleep — abort teardown.
|
||||
continue;
|
||||
}
|
||||
let handle = guard.handle.take();
|
||||
guard.shared = None;
|
||||
drop(guard);
|
||||
if let Some(h) = handle {
|
||||
let _ = h.close().await;
|
||||
tracing::info!("BrowserManager: idle teardown — Chromium closed");
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// A worker-side handle that keeps the browser alive while in scope.
|
||||
/// `Deref<Target = Browser>` so callers can pass `&*lease` to APIs that
|
||||
/// expect `&Browser`.
|
||||
pub struct BrowserLease {
|
||||
browser: Arc<Browser>,
|
||||
active: Arc<ActiveTracker>,
|
||||
}
|
||||
|
||||
impl Deref for BrowserLease {
|
||||
type Target = Browser;
|
||||
fn deref(&self) -> &Browser {
|
||||
&self.browser
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for BrowserLease {
|
||||
fn drop(&mut self) {
|
||||
self.active.release();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
|
||||
#[test]
|
||||
fn noop_on_launch_is_send_sync() {
|
||||
fn assert_send_sync<T: Send + Sync>(_: &T) {}
|
||||
let h = noop_on_launch();
|
||||
assert_send_sync(&h);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn active_tracker_signals_idle_only_on_zero_transition() {
|
||||
let tracker = ActiveTracker::new();
|
||||
let signaled = Arc::new(AtomicBool::new(false));
|
||||
{
|
||||
let s = Arc::clone(&signaled);
|
||||
let t = Arc::clone(&tracker);
|
||||
tokio::spawn(async move {
|
||||
t.idle_signal().notified().await;
|
||||
s.store(true, Ordering::Release);
|
||||
});
|
||||
}
|
||||
|
||||
tracker.acquire();
|
||||
tracker.acquire();
|
||||
assert_eq!(tracker.current(), 2);
|
||||
tracker.release();
|
||||
assert_eq!(tracker.current(), 1);
|
||||
tokio::time::sleep(Duration::from_millis(20)).await;
|
||||
assert!(!signaled.load(Ordering::Acquire), "no idle signal at count 1");
|
||||
tracker.release();
|
||||
tokio::time::sleep(Duration::from_millis(20)).await;
|
||||
assert_eq!(tracker.current(), 0);
|
||||
assert!(
|
||||
signaled.load(Ordering::Acquire),
|
||||
"idle signal fires on 1 -> 0 transition"
|
||||
);
|
||||
}
|
||||
}
|
||||
268
backend/src/crawler/content.rs
Normal file
268
backend/src/crawler/content.rs
Normal file
@@ -0,0 +1,268 @@
|
||||
//! Chapter content sync — fetch a logged-in chapter page, extract its
|
||||
//! image URLs in `pageN` order, download each to storage, and atomically
|
||||
//! persist a `pages` row per image plus the chapter's `page_count`.
|
||||
//!
|
||||
//! Only chapters belonging to a manga someone has bookmarked are
|
||||
//! candidates. The crawler scans bookmarks at the start of each run and
|
||||
//! enqueues unfetched chapters; the API also enqueues at bookmark-time
|
||||
//! so users get instant feedback. Both feed into the same queue and
|
||||
//! dedup by chapter id.
|
||||
|
||||
// Implementation lands in the next commits in this branch. Module is
|
||||
// declared so other crates can `use crawler::content` without breaking
|
||||
// builds while iteration is in progress.
|
||||
|
||||
use anyhow::Context;
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::crawler::detect::PageError;
|
||||
use crate::crawler::rate_limit::HostRateLimiters;
|
||||
use crate::crawler::session;
|
||||
use crate::storage::Storage;
|
||||
|
||||
/// Parse the chapter page DOM and return the page images in `pageN`
|
||||
/// order. Filters out the loader `<img class="loading">` and any
|
||||
/// `<img>` without a numeric `id="pageN"`.
|
||||
///
|
||||
/// Reader pages don't render the site's `#logo` element, so the
|
||||
/// universal logo-sentinel can't apply here — instead we assert
|
||||
/// `a#pic_container` is present. Its absence means the response is the
|
||||
/// transient broken-page response (or a redirect to some other layout)
|
||||
/// and the caller should retry.
|
||||
pub fn parse_chapter_pages(html: &str) -> Result<Vec<ChapterImage>, PageError> {
|
||||
let doc = scraper::Html::parse_document(html);
|
||||
let container_sel = scraper::Selector::parse("a#pic_container").unwrap();
|
||||
if doc.select(&container_sel).next().is_none() {
|
||||
return Err(PageError::transient("reader: a#pic_container missing"));
|
||||
}
|
||||
let sel = scraper::Selector::parse("a#pic_container img:not(.loading)").unwrap();
|
||||
let mut pages: Vec<ChapterImage> = doc
|
||||
.select(&sel)
|
||||
.filter_map(|img| {
|
||||
let id = img.value().id()?;
|
||||
let n: i32 = id.strip_prefix("page")?.parse().ok()?;
|
||||
let src = img.value().attr("src")?.trim().to_string();
|
||||
if src.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(ChapterImage { page_number: n, url: src })
|
||||
})
|
||||
.collect();
|
||||
pages.sort_by_key(|p| p.page_number);
|
||||
Ok(pages)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct ChapterImage {
|
||||
pub page_number: i32,
|
||||
pub url: String,
|
||||
}
|
||||
|
||||
/// Outcome of a single chapter sync — surfaced to callers for logging
|
||||
/// and exit-code decisions.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum SyncOutcome {
|
||||
/// All images downloaded and stored, chapter row updated.
|
||||
Fetched { pages: usize },
|
||||
/// `page_count > 0` already — no-op unless force_refetch is set.
|
||||
Skipped,
|
||||
/// Session probe failed mid-sync (avatar selector missing on the
|
||||
/// chapter page). Caller should abort the whole crawler run.
|
||||
SessionExpired,
|
||||
}
|
||||
|
||||
/// Fetch all images for one chapter and persist them atomically. On
|
||||
/// any error after the first storage put, the DB transaction rolls
|
||||
/// back so the chapter stays at `page_count = 0` and is retried on the
|
||||
/// next run. Bytes already written to storage become orphans; a future
|
||||
/// reaper sweeps them.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn sync_chapter_content(
|
||||
browser: &chromiumoxide::Browser,
|
||||
db: &PgPool,
|
||||
storage: &dyn Storage,
|
||||
http: &reqwest::Client,
|
||||
rate: &HostRateLimiters,
|
||||
chapter_id: Uuid,
|
||||
manga_id: Uuid,
|
||||
source_url: &str,
|
||||
force_refetch: bool,
|
||||
) -> anyhow::Result<SyncOutcome> {
|
||||
// Skip if already fetched, unless caller explicitly forces.
|
||||
if !force_refetch {
|
||||
let (page_count,): (i32,) =
|
||||
sqlx::query_as("SELECT page_count FROM chapters WHERE id = $1")
|
||||
.bind(chapter_id)
|
||||
.fetch_one(db)
|
||||
.await
|
||||
.context("read chapter page_count")?;
|
||||
if page_count > 0 {
|
||||
return Ok(SyncOutcome::Skipped);
|
||||
}
|
||||
}
|
||||
|
||||
// Nav to chapter page (rate-limited per host).
|
||||
rate.wait_for(source_url).await?;
|
||||
let page = browser
|
||||
.new_page(source_url)
|
||||
.await
|
||||
.with_context(|| format!("open chapter page {source_url}"))?;
|
||||
page.wait_for_navigation().await.context("wait for chapter nav")?;
|
||||
|
||||
// Session probe: avatar present == still logged in. Missing means
|
||||
// PHPSESSID expired; bail the entire crawler run.
|
||||
if page.find_element("#avatar_menu").await.is_err() {
|
||||
page.close().await.ok();
|
||||
return Ok(SyncOutcome::SessionExpired);
|
||||
}
|
||||
|
||||
let html = page.content().await.context("read chapter html")?;
|
||||
page.close().await.ok();
|
||||
|
||||
let images = parse_chapter_pages(&html)
|
||||
.with_context(|| format!("parse chapter pages at {source_url}"))?;
|
||||
if images.is_empty() {
|
||||
anyhow::bail!("no page images parsed from {source_url}");
|
||||
}
|
||||
|
||||
// Resolve image URLs against the chapter URL (they may be relative).
|
||||
let base = reqwest::Url::parse(source_url).context("parse chapter URL")?;
|
||||
|
||||
// Fetch every image bytes-first into memory before writing
|
||||
// anything. Lets us bail the whole chapter cleanly if any image
|
||||
// fails — DB stays at page_count=0, no partial rows persisted.
|
||||
let mut fetched: Vec<(i32, Vec<u8>, &'static str)> = Vec::with_capacity(images.len());
|
||||
for img in &images {
|
||||
let url = base.join(&img.url).with_context(|| {
|
||||
format!("join image URL {} onto {source_url}", img.url)
|
||||
})?;
|
||||
rate.wait_for(url.as_str()).await?;
|
||||
let resp = http
|
||||
.get(url.clone())
|
||||
// Source CDNs commonly check Referer. Set it to the
|
||||
// chapter page — matches what the browser would send.
|
||||
.header(reqwest::header::REFERER, source_url)
|
||||
.send()
|
||||
.await
|
||||
.with_context(|| format!("GET {url}"))?
|
||||
.error_for_status()
|
||||
.with_context(|| format!("non-2xx for {url}"))?;
|
||||
let bytes = resp.bytes().await.context("read image body")?.to_vec();
|
||||
let ext = infer::get(&bytes).map(|k| k.extension()).unwrap_or("bin");
|
||||
fetched.push((img.page_number, bytes, ext));
|
||||
}
|
||||
|
||||
// Atomic write: storage puts + page row inserts + page_count
|
||||
// update, all in one transaction. If anything fails, rollback +
|
||||
// the chapter is retried next run. Storage orphans the bytes; a
|
||||
// reaper sweeps them later.
|
||||
let mut tx = db.begin().await.context("open chapter sync tx")?;
|
||||
for (page_number, bytes, ext) in &fetched {
|
||||
let key = format!(
|
||||
"mangas/{manga_id}/chapters/{chapter_id}/pages/{:04}.{ext}",
|
||||
page_number
|
||||
);
|
||||
storage
|
||||
.put(&key, bytes)
|
||||
.await
|
||||
.with_context(|| format!("put {key}"))?;
|
||||
// (chapter_id, page_number) is unique — re-runs idempotent.
|
||||
sqlx::query(
|
||||
"INSERT INTO pages (chapter_id, page_number, storage_key, content_type)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
ON CONFLICT (chapter_id, page_number) DO UPDATE
|
||||
SET storage_key = EXCLUDED.storage_key,
|
||||
content_type = EXCLUDED.content_type",
|
||||
)
|
||||
.bind(chapter_id)
|
||||
.bind(page_number)
|
||||
.bind(&key)
|
||||
.bind(format!("image/{ext}"))
|
||||
.execute(&mut *tx)
|
||||
.await
|
||||
.with_context(|| format!("insert page row {page_number}"))?;
|
||||
}
|
||||
sqlx::query("UPDATE chapters SET page_count = $1 WHERE id = $2")
|
||||
.bind(fetched.len() as i32)
|
||||
.bind(chapter_id)
|
||||
.execute(&mut *tx)
|
||||
.await
|
||||
.context("update page_count")?;
|
||||
tx.commit().await.context("commit chapter sync")?;
|
||||
|
||||
Ok(SyncOutcome::Fetched { pages: fetched.len() })
|
||||
}
|
||||
|
||||
// Suppress unused-import warning for `session` until the bin/crawler
|
||||
// wiring lands in this branch and uses it through this module.
|
||||
#[allow(dead_code)]
|
||||
fn _keep_session_in_scope() {
|
||||
let _ = session::registrable_domain;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parse_chapter_pages_skips_loader_and_sorts_by_id() {
|
||||
// Loader image, two real pages out of order, and one with no id.
|
||||
let html = r#"
|
||||
<html><body id="body"><a id="pic_container">
|
||||
<img class="loading" src="/images/ajax-loader2.gif">
|
||||
<img id="page2" class="page2" src="https://cdn/2.jpg">
|
||||
<img id="page1" class="page1" src="https://cdn/1.jpg">
|
||||
<img src="https://cdn/orphan.jpg">
|
||||
<img id="not-a-page" src="https://cdn/not-a-page.jpg">
|
||||
</a></body></html>
|
||||
"#;
|
||||
let pages = parse_chapter_pages(html).expect("parse");
|
||||
assert_eq!(pages.len(), 2);
|
||||
assert_eq!(pages[0].page_number, 1);
|
||||
assert_eq!(pages[0].url, "https://cdn/1.jpg");
|
||||
assert_eq!(pages[1].page_number, 2);
|
||||
assert_eq!(pages[1].url, "https://cdn/2.jpg");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_chapter_pages_drops_images_without_src() {
|
||||
let html = r#"
|
||||
<a id="pic_container">
|
||||
<img id="page1" src="">
|
||||
<img id="page2" src="https://cdn/2.jpg">
|
||||
</a>
|
||||
"#;
|
||||
let pages = parse_chapter_pages(html).expect("parse");
|
||||
assert_eq!(pages.len(), 1);
|
||||
assert_eq!(pages[0].page_number, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_chapter_pages_handles_three_digit_page_ids() {
|
||||
let html = r#"
|
||||
<a id="pic_container">
|
||||
<img id="page126" src="https://cdn/126.jpg">
|
||||
<img id="page9" src="https://cdn/9.jpg">
|
||||
<img id="page50" src="https://cdn/50.jpg">
|
||||
</a>
|
||||
"#;
|
||||
let pages = parse_chapter_pages(html).expect("parse");
|
||||
assert_eq!(
|
||||
pages.iter().map(|p| p.page_number).collect::<Vec<_>>(),
|
||||
vec![9, 50, 126]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_chapter_pages_returns_transient_when_container_missing() {
|
||||
// Reader doesn't render #logo, so the universal logo sentinel
|
||||
// can't be used here — a#pic_container is the reader-specific
|
||||
// marker. Broken-page response trips this.
|
||||
let html = "<html><body>\
|
||||
<p>we're sorry, the request file are not found.</p>\
|
||||
</body></html>";
|
||||
let err = parse_chapter_pages(html).expect_err("expected Transient");
|
||||
assert!(err.is_transient(), "got non-transient: {err}");
|
||||
}
|
||||
}
|
||||
633
backend/src/crawler/daemon.rs
Normal file
633
backend/src/crawler/daemon.rs
Normal file
@@ -0,0 +1,633 @@
|
||||
//! In-process crawler daemon.
|
||||
//!
|
||||
//! Owns a cron task that fires a daily metadata pass and N worker tasks
|
||||
//! that drain `SyncChapterContent` jobs from `crawler_jobs`. The dispatch
|
||||
//! seams ([`MetadataPass`], [`ChapterDispatcher`]) are traits so tests can
|
||||
//! inject stubs without standing up a real Chromium / `Source` impl.
|
||||
//!
|
||||
//! ## Cron
|
||||
//!
|
||||
//! Each tick:
|
||||
//! 1. Acquire a Postgres advisory lock on a dedicated pool connection
|
||||
//! (multi-replica safety). Skip the tick on contention.
|
||||
//! 2. Call [`MetadataPass::run`] (typically `pipeline::run_metadata_pass`).
|
||||
//! 3. Enqueue `SyncChapterContent` jobs for any bookmarked manga whose
|
||||
//! chapters still have `page_count = 0`.
|
||||
//! 4. Reap `done` jobs older than `retention_days`.
|
||||
//! 5. Persist `last_metadata_tick_at` and release the lock.
|
||||
//!
|
||||
//! If the last persisted tick is older than the most recent scheduled slot
|
||||
//! (e.g. backend was down at midnight), the daemon fires immediately on
|
||||
//! startup before resuming the regular schedule.
|
||||
//!
|
||||
//! ## Workers
|
||||
//!
|
||||
//! Each worker leases one chapter-content job at a time, dispatches via the
|
||||
//! [`ChapterDispatcher`], and acks `done` / `failed` / re-`pending` based on
|
||||
//! the outcome. A `SessionExpired` outcome flips the sticky
|
||||
//! `session_expired` flag — all workers idle while it's set (until operator
|
||||
//! restart with a refreshed PHPSESSID).
|
||||
//!
|
||||
//! Worker dispatch is wrapped in `catch_unwind` so a panicking handler
|
||||
//! marks the job failed instead of taking down the worker task.
|
||||
|
||||
use std::panic::AssertUnwindSafe;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, Datelike, NaiveTime, TimeZone, Timelike, Utc};
|
||||
use chrono_tz::Tz;
|
||||
use futures_util::FutureExt;
|
||||
use serde_json::json;
|
||||
use sqlx::PgPool;
|
||||
use tokio::task::JoinSet;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::crawler::content::SyncOutcome;
|
||||
use crate::crawler::jobs::{self, JobPayload, Lease, KIND_SYNC_CHAPTER_CONTENT};
|
||||
use crate::crawler::pipeline;
|
||||
|
||||
/// Fixed `pg_try_advisory_lock` key. ASCII "MANGALRD" interpreted as a
|
||||
/// big-endian i64. Hardcoded so every replica agrees on the lock identity
|
||||
/// without consulting config.
|
||||
pub const CRON_LOCK_KEY: i64 = 0x4D414E47414C5244;
|
||||
|
||||
const STATE_KEY_LAST_TICK: &str = "last_metadata_tick_at";
|
||||
|
||||
#[async_trait]
|
||||
pub trait MetadataPass: Send + Sync {
|
||||
async fn run(&self) -> anyhow::Result<pipeline::MetadataStats>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait ChapterDispatcher: Send + Sync {
|
||||
async fn dispatch(&self, payload: JobPayload) -> anyhow::Result<SyncOutcome>;
|
||||
}
|
||||
|
||||
/// Configuration for [`spawn`]. Use `None` for `metadata_pass` to disable
|
||||
/// the cron entirely (worker-pool-only mode — useful when only the
|
||||
/// bookmark-triggered enqueue path is wanted).
|
||||
pub struct DaemonConfig {
|
||||
pub metadata_pass: Option<Arc<dyn MetadataPass>>,
|
||||
pub dispatcher: Arc<dyn ChapterDispatcher>,
|
||||
pub chapter_workers: usize,
|
||||
pub daily_at: NaiveTime,
|
||||
pub tz: Tz,
|
||||
pub retention_days: u32,
|
||||
pub session_expired: Arc<AtomicBool>,
|
||||
/// Tasks that should run alongside the cron + workers and be cancelled
|
||||
/// on shutdown. Used to hand the daemon ownership of the browser
|
||||
/// manager's idle reaper.
|
||||
pub extra_tasks: Vec<tokio::task::JoinHandle<()>>,
|
||||
}
|
||||
|
||||
pub struct DaemonHandle {
|
||||
cancel: CancellationToken,
|
||||
join: JoinSet<()>,
|
||||
extra: Vec<tokio::task::JoinHandle<()>>,
|
||||
}
|
||||
|
||||
impl DaemonHandle {
|
||||
/// Trigger shutdown and await all worker / cron / extra tasks.
|
||||
pub async fn shutdown(mut self) {
|
||||
self.cancel.cancel();
|
||||
while self.join.join_next().await.is_some() {}
|
||||
for task in self.extra.drain(..) {
|
||||
let _ = task.await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Cancellation token that drives shutdown — exposed so callers
|
||||
/// (`app::spawn_crawler_daemon`) can hand the same token to auxiliary
|
||||
/// tasks (e.g. the BrowserManager idle reaper) and have them stop on
|
||||
/// the daemon's signal.
|
||||
pub fn cancel_token(&self) -> CancellationToken {
|
||||
self.cancel.clone()
|
||||
}
|
||||
}
|
||||
|
||||
/// Spawn the daemon. Returns immediately; tasks run in the background.
|
||||
/// Pass an external [`CancellationToken`] so auxiliary tasks (e.g. a
|
||||
/// BrowserManager idle reaper) can share the same shutdown signal —
|
||||
/// typically created in the caller, cloned into both spawns.
|
||||
pub fn spawn(pool: PgPool, cancel: CancellationToken, cfg: DaemonConfig) -> DaemonHandle {
|
||||
let mut join = JoinSet::new();
|
||||
|
||||
let DaemonConfig {
|
||||
metadata_pass,
|
||||
dispatcher,
|
||||
chapter_workers,
|
||||
daily_at,
|
||||
tz,
|
||||
retention_days,
|
||||
session_expired,
|
||||
extra_tasks,
|
||||
} = cfg;
|
||||
|
||||
if let Some(metadata) = metadata_pass {
|
||||
let ctx = CronContext {
|
||||
pool: pool.clone(),
|
||||
cancel: cancel.clone(),
|
||||
daily_at,
|
||||
tz,
|
||||
retention_days,
|
||||
metadata,
|
||||
};
|
||||
join.spawn(async move { ctx.run().await });
|
||||
} else {
|
||||
tracing::info!("crawler daemon: no metadata_pass — cron disabled");
|
||||
}
|
||||
|
||||
for worker_id in 0..chapter_workers.max(1) {
|
||||
let ctx = WorkerContext {
|
||||
pool: pool.clone(),
|
||||
cancel: cancel.clone(),
|
||||
dispatcher: Arc::clone(&dispatcher),
|
||||
session_expired: Arc::clone(&session_expired),
|
||||
id: worker_id,
|
||||
};
|
||||
join.spawn(async move { ctx.run().await });
|
||||
}
|
||||
|
||||
DaemonHandle {
|
||||
cancel,
|
||||
join,
|
||||
extra: extra_tasks,
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Cron
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
struct CronContext {
|
||||
pool: PgPool,
|
||||
cancel: CancellationToken,
|
||||
daily_at: NaiveTime,
|
||||
tz: Tz,
|
||||
retention_days: u32,
|
||||
metadata: Arc<dyn MetadataPass>,
|
||||
}
|
||||
|
||||
impl CronContext {
|
||||
async fn run(self) {
|
||||
// On startup, fire immediately if the most recent slot has already
|
||||
// passed and we never recorded a tick for it.
|
||||
let now = Utc::now();
|
||||
let mut catchup = match read_last_tick(&self.pool).await {
|
||||
Ok(Some(last)) => previous_fire(now, self.daily_at, self.tz) > last,
|
||||
Ok(None) => true,
|
||||
Err(e) => {
|
||||
tracing::warn!(?e, "cron: read_last_tick failed; assuming no catch-up");
|
||||
false
|
||||
}
|
||||
};
|
||||
|
||||
loop {
|
||||
if catchup {
|
||||
tracing::info!("cron: catch-up tick (missed scheduled slot)");
|
||||
self.run_tick().await;
|
||||
catchup = false;
|
||||
continue;
|
||||
}
|
||||
// Recompute next-fire from now() each iteration so clock jumps
|
||||
// (NTP step, suspend/resume) don't strand us on a stale instant.
|
||||
let next = next_fire(Utc::now(), self.daily_at, self.tz);
|
||||
let wait = (next - Utc::now()).to_std().unwrap_or(Duration::ZERO);
|
||||
tracing::info!(
|
||||
next_fire_utc = %next.to_rfc3339(),
|
||||
wait_seconds = wait.as_secs(),
|
||||
"cron: sleeping until next slot"
|
||||
);
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(wait) => {}
|
||||
_ = self.cancel.cancelled() => {
|
||||
tracing::info!("cron: shutdown");
|
||||
return;
|
||||
}
|
||||
}
|
||||
self.run_tick().await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn run_tick(&self) {
|
||||
let mut conn = match self.pool.acquire().await {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
tracing::error!(?e, "cron: acquire conn failed; skipping tick");
|
||||
return;
|
||||
}
|
||||
};
|
||||
// pg_try_advisory_lock is session-scoped — we must hold the same
|
||||
// connection for the unlock or the call silently no-ops on a
|
||||
// different connection from the pool.
|
||||
let acquired: bool = sqlx::query_scalar("SELECT pg_try_advisory_lock($1)")
|
||||
.bind(CRON_LOCK_KEY)
|
||||
.fetch_one(&mut *conn)
|
||||
.await
|
||||
.unwrap_or(false);
|
||||
if !acquired {
|
||||
tracing::info!("cron: tick skipped — another replica holds the lock");
|
||||
return;
|
||||
}
|
||||
|
||||
match self.metadata.run().await {
|
||||
Ok(stats) => tracing::info!(?stats, "cron: metadata pass done"),
|
||||
Err(e) => tracing::error!(?e, "cron: metadata pass failed"),
|
||||
}
|
||||
|
||||
match pipeline::enqueue_bookmarked_pending(&self.pool).await {
|
||||
Ok(summary) => tracing::info!(?summary, "cron: enqueued bookmarked-pending"),
|
||||
Err(e) => tracing::error!(?e, "cron: enqueue_bookmarked_pending failed"),
|
||||
}
|
||||
|
||||
match jobs::reap_done(&self.pool, self.retention_days).await {
|
||||
Ok(n) => tracing::info!(reaped = n, "cron: done-job reaper finished"),
|
||||
Err(e) => tracing::error!(?e, "cron: done-job reaper failed"),
|
||||
}
|
||||
|
||||
if let Err(e) = write_last_tick(&self.pool, Utc::now()).await {
|
||||
tracing::warn!(?e, "cron: persist last_metadata_tick_at failed");
|
||||
}
|
||||
|
||||
let _ = sqlx::query("SELECT pg_advisory_unlock($1)")
|
||||
.bind(CRON_LOCK_KEY)
|
||||
.execute(&mut *conn)
|
||||
.await;
|
||||
drop(conn);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Workers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
struct WorkerContext {
|
||||
pool: PgPool,
|
||||
cancel: CancellationToken,
|
||||
dispatcher: Arc<dyn ChapterDispatcher>,
|
||||
session_expired: Arc<AtomicBool>,
|
||||
id: usize,
|
||||
}
|
||||
|
||||
impl WorkerContext {
|
||||
async fn run(self) {
|
||||
loop {
|
||||
if self.cancel.is_cancelled() {
|
||||
tracing::info!(worker = self.id, "worker: shutdown");
|
||||
return;
|
||||
}
|
||||
if self.session_expired.load(Ordering::Acquire) {
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(Duration::from_secs(30)) => continue,
|
||||
_ = self.cancel.cancelled() => return,
|
||||
}
|
||||
}
|
||||
let leases = match jobs::lease(
|
||||
&self.pool,
|
||||
Some(KIND_SYNC_CHAPTER_CONTENT),
|
||||
1,
|
||||
Duration::from_secs(60),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
tracing::warn!(worker = self.id, ?e, "worker: lease failed");
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(Duration::from_secs(5)) => continue,
|
||||
_ = self.cancel.cancelled() => return,
|
||||
}
|
||||
}
|
||||
};
|
||||
let Some(lease) = leases.into_iter().next() else {
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(Duration::from_secs(1)) => continue,
|
||||
_ = self.cancel.cancelled() => return,
|
||||
}
|
||||
};
|
||||
self.process_lease(lease).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn process_lease(&self, lease: Lease) {
|
||||
// Consumer-side dedup safety net: if the chapter already has pages
|
||||
// (because a force-refetch race or a job that was re-enqueued
|
||||
// after a previous one finished), ack done without re-fetching.
|
||||
if let JobPayload::SyncChapterContent { chapter_id, .. } = &lease.payload {
|
||||
let page_count: Option<i32> = sqlx::query_scalar(
|
||||
"SELECT page_count FROM chapters WHERE id = $1",
|
||||
)
|
||||
.bind(chapter_id)
|
||||
.fetch_optional(&self.pool)
|
||||
.await
|
||||
.ok()
|
||||
.flatten();
|
||||
if matches!(page_count, Some(n) if n > 0) {
|
||||
let _ = jobs::ack_done(&self.pool, lease.id).await;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
let outcome = AssertUnwindSafe(self.dispatcher.dispatch(lease.payload.clone()))
|
||||
.catch_unwind()
|
||||
.await;
|
||||
match outcome {
|
||||
Ok(Ok(SyncOutcome::Fetched { .. } | SyncOutcome::Skipped)) => {
|
||||
let _ = jobs::ack_done(&self.pool, lease.id).await;
|
||||
}
|
||||
Ok(Ok(SyncOutcome::SessionExpired)) => {
|
||||
tracing::error!(
|
||||
worker = self.id,
|
||||
lease_id = %lease.id,
|
||||
"session expired — workers will idle until restart"
|
||||
);
|
||||
self.session_expired.store(true, Ordering::Release);
|
||||
let _ = jobs::release(&self.pool, lease.id).await;
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
tracing::warn!(
|
||||
worker = self.id,
|
||||
lease_id = %lease.id,
|
||||
error = ?e,
|
||||
"worker: dispatch error — ack failed"
|
||||
);
|
||||
let _ = jobs::ack_failed(
|
||||
&self.pool,
|
||||
lease.id,
|
||||
&format!("{e:#}"),
|
||||
lease.attempts,
|
||||
lease.max_attempts,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
Err(_panic) => {
|
||||
tracing::error!(
|
||||
worker = self.id,
|
||||
lease_id = %lease.id,
|
||||
"worker: dispatcher panicked — ack failed"
|
||||
);
|
||||
let _ = jobs::ack_failed(
|
||||
&self.pool,
|
||||
lease.id,
|
||||
"worker panicked",
|
||||
lease.attempts,
|
||||
lease.max_attempts,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Cron timing primitives
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Compute the next UTC instant when `daily_at` (interpreted in `tz`) will
|
||||
/// fire, strictly after `now`. Handles DST gaps (spring-forward) by
|
||||
/// advancing past the gap; on DST overlap (fall-back) picks the later
|
||||
/// instant so the job runs once, not twice.
|
||||
pub fn next_fire(now: DateTime<Utc>, daily_at: NaiveTime, tz: Tz) -> DateTime<Utc> {
|
||||
let now_local = now.with_timezone(&tz);
|
||||
// Start with today's slot in the local TZ.
|
||||
let mut candidate = local_at(now_local.date_naive(), daily_at, tz);
|
||||
// If today's slot is in the past (or now), roll forward day-by-day.
|
||||
while candidate <= now {
|
||||
let next_day = candidate
|
||||
.with_timezone(&tz)
|
||||
.date_naive()
|
||||
.succ_opt()
|
||||
.unwrap_or_else(|| {
|
||||
// Defensive: succ_opt only fails at chrono's max date.
|
||||
chrono::NaiveDate::from_ymd_opt(
|
||||
candidate.year(),
|
||||
candidate.month(),
|
||||
candidate.day(),
|
||||
)
|
||||
.expect("valid date")
|
||||
});
|
||||
candidate = local_at(next_day, daily_at, tz);
|
||||
}
|
||||
candidate
|
||||
}
|
||||
|
||||
/// The most recent fire instant at or before `now`. Used to detect missed
|
||||
/// slots after a restart.
|
||||
pub fn previous_fire(now: DateTime<Utc>, daily_at: NaiveTime, tz: Tz) -> DateTime<Utc> {
|
||||
let now_local = now.with_timezone(&tz);
|
||||
let today = local_at(now_local.date_naive(), daily_at, tz);
|
||||
if today <= now {
|
||||
return today;
|
||||
}
|
||||
let yesterday = now_local
|
||||
.date_naive()
|
||||
.pred_opt()
|
||||
.expect("a day before now");
|
||||
local_at(yesterday, daily_at, tz)
|
||||
}
|
||||
|
||||
/// Resolve a local date+time to a UTC instant in `tz`, navigating DST
|
||||
/// edges deterministically:
|
||||
/// - `LocalResult::Single` → that instant.
|
||||
/// - `LocalResult::Ambiguous(_, latest)` → the later instant (fall-back
|
||||
/// hour). Picking latest means a daily job fires once across the
|
||||
/// repeated hour, not twice.
|
||||
/// - `LocalResult::None` → spring-forward gap. Advance the local time
|
||||
/// by 1 minute and try again, repeating up to 120 times (so the worst
|
||||
/// case is still well inside an hour-long gap).
|
||||
fn local_at(date: chrono::NaiveDate, time: NaiveTime, tz: Tz) -> DateTime<Utc> {
|
||||
use chrono::LocalResult;
|
||||
for offset_minutes in 0..120 {
|
||||
let mut t = time;
|
||||
if offset_minutes > 0 {
|
||||
let added = chrono::NaiveTime::from_num_seconds_from_midnight_opt(
|
||||
((time.num_seconds_from_midnight() as i64 + offset_minutes * 60) % 86_400) as u32,
|
||||
0,
|
||||
)
|
||||
.unwrap_or(time);
|
||||
t = added;
|
||||
}
|
||||
let naive = date.and_time(t);
|
||||
match tz.from_local_datetime(&naive) {
|
||||
LocalResult::Single(dt) => return dt.with_timezone(&Utc),
|
||||
LocalResult::Ambiguous(_, latest) => return latest.with_timezone(&Utc),
|
||||
LocalResult::None => continue,
|
||||
}
|
||||
}
|
||||
// Should be unreachable — DST gaps are always less than an hour.
|
||||
Utc.from_utc_datetime(&date.and_time(time))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// crawler_state I/O
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async fn read_last_tick(pool: &PgPool) -> sqlx::Result<Option<DateTime<Utc>>> {
|
||||
let row: Option<serde_json::Value> = sqlx::query_scalar(
|
||||
"SELECT value FROM crawler_state WHERE key = $1",
|
||||
)
|
||||
.bind(STATE_KEY_LAST_TICK)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
Ok(row.and_then(|v| {
|
||||
v.get("at")
|
||||
.and_then(|s| s.as_str())
|
||||
.and_then(|s| DateTime::parse_from_rfc3339(s).ok())
|
||||
.map(|dt| dt.with_timezone(&Utc))
|
||||
}))
|
||||
}
|
||||
|
||||
async fn write_last_tick(pool: &PgPool, at: DateTime<Utc>) -> sqlx::Result<()> {
|
||||
sqlx::query(
|
||||
"INSERT INTO crawler_state (key, value, updated_at) \
|
||||
VALUES ($1, $2, now()) \
|
||||
ON CONFLICT (key) DO UPDATE \
|
||||
SET value = EXCLUDED.value, updated_at = now()",
|
||||
)
|
||||
.bind(STATE_KEY_LAST_TICK)
|
||||
.bind(json!({ "at": at.to_rfc3339() }))
|
||||
.execute(pool)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test helpers (not gated on cfg(test) — integration tests in tests/ dir
|
||||
// need them too).
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub mod test_support {
|
||||
//! Lightweight stubs the daemon tests use. Public because integration
|
||||
//! tests live outside this module.
|
||||
use super::*;
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
|
||||
pub struct CountingMetadataPass {
|
||||
pub count: AtomicUsize,
|
||||
}
|
||||
|
||||
impl Default for CountingMetadataPass {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
count: AtomicUsize::new(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl MetadataPass for CountingMetadataPass {
|
||||
async fn run(&self) -> anyhow::Result<pipeline::MetadataStats> {
|
||||
self.count.fetch_add(1, Ordering::AcqRel);
|
||||
Ok(pipeline::MetadataStats::default())
|
||||
}
|
||||
}
|
||||
|
||||
pub type DispatchFn = Arc<
|
||||
dyn Fn(JobPayload) -> futures_util::future::BoxFuture<'static, anyhow::Result<SyncOutcome>>
|
||||
+ Send
|
||||
+ Sync,
|
||||
>;
|
||||
|
||||
pub struct StubDispatcher {
|
||||
pub handler: DispatchFn,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ChapterDispatcher for StubDispatcher {
|
||||
async fn dispatch(&self, payload: JobPayload) -> anyhow::Result<SyncOutcome> {
|
||||
(self.handler)(payload).await
|
||||
}
|
||||
}
|
||||
|
||||
pub fn always_done() -> Arc<StubDispatcher> {
|
||||
Arc::new(StubDispatcher {
|
||||
handler: Arc::new(|_| Box::pin(async { Ok(SyncOutcome::Fetched { pages: 1 }) })),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn panicking_dispatcher() -> Arc<StubDispatcher> {
|
||||
Arc::new(StubDispatcher {
|
||||
handler: Arc::new(|_| Box::pin(async { panic!("intentional dispatcher panic") })),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use chrono::Duration as ChronoDuration;
|
||||
|
||||
fn dt_utc(y: i32, mo: u32, d: u32, h: u32, mi: u32) -> DateTime<Utc> {
|
||||
Utc.with_ymd_and_hms(y, mo, d, h, mi, 0).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn next_fire_in_utc_at_midnight_advances_one_day() {
|
||||
let now = dt_utc(2026, 5, 25, 12, 0); // noon UTC
|
||||
let at = NaiveTime::from_hms_opt(0, 0, 0).unwrap();
|
||||
let next = next_fire(now, at, Tz::UTC);
|
||||
// Next midnight is May 26 00:00 UTC.
|
||||
assert_eq!(next, dt_utc(2026, 5, 26, 0, 0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn next_fire_before_today_slot_returns_today() {
|
||||
let now = dt_utc(2026, 5, 25, 23, 0); // 23:00 UTC
|
||||
let at = NaiveTime::from_hms_opt(23, 30, 0).unwrap();
|
||||
let next = next_fire(now, at, Tz::UTC);
|
||||
assert_eq!(next, dt_utc(2026, 5, 25, 23, 30));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn next_fire_skips_spring_forward_gap_in_europe_berlin() {
|
||||
// 2024-03-31: clocks jump 02:00 -> 03:00 in Berlin (CET -> CEST).
|
||||
// Asking for daily_at = 02:30 on the morning of the jump should
|
||||
// land on the *next valid* local instant past the gap. We test
|
||||
// by computing `next_fire` at 2024-03-31 00:30 UTC (= 01:30 CET,
|
||||
// i.e. just before the gap). The next 02:30 local does not exist,
|
||||
// so the helper advances past it.
|
||||
let now = dt_utc(2024, 3, 31, 0, 30); // 01:30 local Berlin (CET = UTC+1)
|
||||
let at = NaiveTime::from_hms_opt(2, 30, 0).unwrap();
|
||||
let next = next_fire(now, at, Tz::Europe__Berlin);
|
||||
// Local Berlin time skips from 02:00 -> 03:00. After the +1 minute
|
||||
// search, the first valid slot is 03:00 local on 2024-03-31, which
|
||||
// is 01:00 UTC (CEST = UTC+2).
|
||||
// We assert the result is strictly between (now) and 1h later
|
||||
// and is in UTC — the exact minute depends on how many +1m steps
|
||||
// were required.
|
||||
assert!(next > now);
|
||||
assert!(next < now + ChronoDuration::hours(2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn next_fire_on_fall_back_picks_later_instant() {
|
||||
// 2024-10-27: clocks jump 03:00 -> 02:00 (CEST -> CET) in Berlin.
|
||||
// 02:30 happens twice on that day. We pick the later one.
|
||||
let now = dt_utc(2024, 10, 26, 12, 0); // day before, noon UTC
|
||||
let at = NaiveTime::from_hms_opt(2, 30, 0).unwrap();
|
||||
let next = next_fire(now, at, Tz::Europe__Berlin);
|
||||
// First 02:30 local is 00:30 UTC (CEST = UTC+2).
|
||||
// Second 02:30 local is 01:30 UTC (CET = UTC+1).
|
||||
// We expect the later instant: 01:30 UTC on 2024-10-27.
|
||||
assert_eq!(next, dt_utc(2024, 10, 27, 1, 30));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn previous_fire_returns_today_when_now_is_after_slot() {
|
||||
let now = dt_utc(2026, 5, 25, 12, 0); // noon UTC
|
||||
let at = NaiveTime::from_hms_opt(0, 0, 0).unwrap();
|
||||
let prev = previous_fire(now, at, Tz::UTC);
|
||||
assert_eq!(prev, dt_utc(2026, 5, 25, 0, 0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn previous_fire_returns_yesterday_when_now_is_before_today_slot() {
|
||||
let now = dt_utc(2026, 5, 25, 8, 0); // 08:00 UTC
|
||||
let at = NaiveTime::from_hms_opt(23, 30, 0).unwrap();
|
||||
let prev = previous_fire(now, at, Tz::UTC);
|
||||
assert_eq!(prev, dt_utc(2026, 5, 24, 23, 30));
|
||||
}
|
||||
}
|
||||
250
backend/src/crawler/detect.rs
Normal file
250
backend/src/crawler/detect.rs
Normal file
@@ -0,0 +1,250 @@
|
||||
//! Transient-page detection.
|
||||
//!
|
||||
//! The target site occasionally responds with a 403 + tiny "we're sorry,
|
||||
//! the request file are not found" body on pages that actually exist.
|
||||
//! Selectors on that body match nothing, which is indistinguishable from
|
||||
//! a genuinely empty page unless we look for the broken-page markers
|
||||
//! explicitly. The same shape covers full-site outages: 5xx pages,
|
||||
//! Cloudflare interstitials, and "site is down" placeholders all share
|
||||
//! the trait that the normal layout (`#logo` in the header) is absent.
|
||||
//!
|
||||
//! Helpers here are split into two signals so callers can compose them:
|
||||
//! - [`is_broken_page_body`]: pattern-match on the known broken-page
|
||||
//! string. Works for *any* page on the site, including the reader,
|
||||
//! which doesn't render `#logo`.
|
||||
//! - [`has_logo_sentinel`]: assert `#logo` is in the parsed DOM. Site-
|
||||
//! structural marker — present on the manga list, manga detail,
|
||||
//! chapter-list, and login probe pages. **Not** present on the reader,
|
||||
//! so callers in the reader path must rely on the body signature only.
|
||||
//!
|
||||
//! [`PageError::Transient`] is the typed signal returned by parser and
|
||||
//! navigate wrappers. Job handlers map it to "reschedule with backoff"
|
||||
//! rather than the per-page silent skip the parsers used to do.
|
||||
|
||||
use std::future::Future;
|
||||
use std::time::Duration;
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
/// Universal substring of the broken-page body. The site renders the
|
||||
/// exact string verbatim in a single `<p>`, so a case-insensitive
|
||||
/// substring match is enough — we deliberately do *not* anchor to the
|
||||
/// kaomoji because that part is more likely to change than the prose.
|
||||
const BROKEN_PAGE_MARKER: &str = "we're sorry, the request file are not found";
|
||||
|
||||
/// Outcome of a page fetch or parse when the caller wants to
|
||||
/// distinguish "site/page is transiently broken — retry later" from
|
||||
/// other errors. `Transient` is the only retry-friendly variant; every
|
||||
/// other failure mode stays as `anyhow::Error` and is treated as today.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum PageError {
|
||||
/// Page came back but the site signaled trouble — broken-page body
|
||||
/// signature, structural sentinel missing, etc. Caller should
|
||||
/// reschedule this fetch rather than treat it as data.
|
||||
#[error("transient page error: {reason}")]
|
||||
Transient { reason: String },
|
||||
#[error(transparent)]
|
||||
Other(#[from] anyhow::Error),
|
||||
}
|
||||
|
||||
impl PageError {
|
||||
pub fn transient(reason: impl Into<String>) -> Self {
|
||||
Self::Transient { reason: reason.into() }
|
||||
}
|
||||
|
||||
pub fn is_transient(&self) -> bool {
|
||||
matches!(self, Self::Transient { .. })
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true when the response body matches the known broken-page
|
||||
/// template. Case-insensitive substring match — small bodies (~150B)
|
||||
/// make the scan trivially fast, and the broken page is always tiny so
|
||||
/// false positives on a real catalog page are not a concern.
|
||||
pub fn is_broken_page_body(html: &str) -> bool {
|
||||
html.to_ascii_lowercase().contains(BROKEN_PAGE_MARKER)
|
||||
}
|
||||
|
||||
/// Returns true when the parsed document contains `#logo` — the site's
|
||||
/// header logo element, present on every full-layout page and absent on
|
||||
/// the broken-page response and on the reader.
|
||||
pub fn has_logo_sentinel(doc: &scraper::Html) -> bool {
|
||||
let sel = scraper::Selector::parse("#logo").expect("#logo is a valid selector");
|
||||
doc.select(&sel).next().is_some()
|
||||
}
|
||||
|
||||
/// Retry `op` up to `max_attempts` times whenever it returns
|
||||
/// [`PageError::Transient`], sleeping `delay` between attempts.
|
||||
/// Non-transient errors short-circuit immediately. Used by discover-loop
|
||||
/// callers so a single broken page doesn't drop the whole walk — the
|
||||
/// caller can fall back on the job system's retry/backoff once the
|
||||
/// inline budget is exhausted.
|
||||
pub async fn retry_on_transient<F, Fut, T>(
|
||||
mut op: F,
|
||||
max_attempts: u32,
|
||||
delay: Duration,
|
||||
) -> Result<T, PageError>
|
||||
where
|
||||
F: FnMut() -> Fut,
|
||||
Fut: Future<Output = Result<T, PageError>>,
|
||||
{
|
||||
debug_assert!(max_attempts >= 1, "max_attempts must be at least 1");
|
||||
let mut attempt = 0u32;
|
||||
loop {
|
||||
attempt += 1;
|
||||
match op().await {
|
||||
Ok(v) => return Ok(v),
|
||||
Err(e) if !e.is_transient() => return Err(e),
|
||||
Err(e) if attempt >= max_attempts => return Err(e),
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
attempt,
|
||||
max_attempts,
|
||||
error = %e,
|
||||
"transient error; sleeping before retry"
|
||||
);
|
||||
tokio::time::sleep(delay).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn broken_page_body_matches_exact_template() {
|
||||
let html = "<html><head></head><body>\
|
||||
<p>we're sorry, the request file are not found. Σ(っ°Д °;)っ</p>\
|
||||
</body></html>";
|
||||
assert!(is_broken_page_body(html));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn broken_page_body_is_case_insensitive() {
|
||||
let html = "<p>WE'RE SORRY, THE REQUEST FILE ARE NOT FOUND.</p>";
|
||||
assert!(is_broken_page_body(html));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn broken_page_body_does_not_match_normal_listing() {
|
||||
let html = "<html><body><div id='logo'></div>\
|
||||
<ul><li>Manga A</li><li>Manga B</li></ul></body></html>";
|
||||
assert!(!is_broken_page_body(html));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn broken_page_body_does_not_match_empty_string() {
|
||||
assert!(!is_broken_page_body(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn logo_sentinel_present_on_normal_page() {
|
||||
let doc = scraper::Html::parse_document(
|
||||
"<html><body><div id='logo'>Site</div><main>...</main></body></html>",
|
||||
);
|
||||
assert!(has_logo_sentinel(&doc));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn logo_sentinel_absent_on_broken_page() {
|
||||
let doc = scraper::Html::parse_document(
|
||||
"<html><head></head><body>\
|
||||
<p>we're sorry, the request file are not found.</p></body></html>",
|
||||
);
|
||||
assert!(!has_logo_sentinel(&doc));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn logo_sentinel_absent_on_empty_document() {
|
||||
let doc = scraper::Html::parse_document("");
|
||||
assert!(!has_logo_sentinel(&doc));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn page_error_transient_constructor_sets_reason() {
|
||||
let e = PageError::transient("logo missing");
|
||||
assert!(e.is_transient());
|
||||
assert_eq!(e.to_string(), "transient page error: logo missing");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn page_error_other_is_not_transient() {
|
||||
let e: PageError = anyhow::anyhow!("something else").into();
|
||||
assert!(!e.is_transient());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn retry_returns_ok_after_a_transient_streak() {
|
||||
let mut attempt = 0u32;
|
||||
let result: Result<i32, PageError> = retry_on_transient(
|
||||
|| {
|
||||
attempt += 1;
|
||||
let n = attempt;
|
||||
async move {
|
||||
if n < 3 {
|
||||
Err(PageError::transient("not yet"))
|
||||
} else {
|
||||
Ok(42)
|
||||
}
|
||||
}
|
||||
},
|
||||
5,
|
||||
Duration::from_millis(0),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(result.unwrap(), 42);
|
||||
assert_eq!(attempt, 3);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn retry_gives_up_after_max_attempts_on_persistent_transient() {
|
||||
let mut attempt = 0u32;
|
||||
let result: Result<i32, PageError> = retry_on_transient(
|
||||
|| {
|
||||
attempt += 1;
|
||||
async { Err(PageError::transient("always")) }
|
||||
},
|
||||
3,
|
||||
Duration::from_millis(0),
|
||||
)
|
||||
.await;
|
||||
let err = result.expect_err("expected Transient");
|
||||
assert!(err.is_transient());
|
||||
assert_eq!(attempt, 3, "retried max_attempts times, no more");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn retry_does_not_retry_non_transient_errors() {
|
||||
let mut attempt = 0u32;
|
||||
let result: Result<i32, PageError> = retry_on_transient(
|
||||
|| {
|
||||
attempt += 1;
|
||||
async { Err(PageError::Other(anyhow::anyhow!("permanent"))) }
|
||||
},
|
||||
5,
|
||||
Duration::from_millis(0),
|
||||
)
|
||||
.await;
|
||||
assert!(result.is_err());
|
||||
assert!(!result.unwrap_err().is_transient());
|
||||
assert_eq!(attempt, 1, "non-transient must fail immediately");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn retry_returns_ok_on_first_attempt_without_sleeping() {
|
||||
let mut attempt = 0u32;
|
||||
let result: Result<i32, PageError> = retry_on_transient(
|
||||
|| {
|
||||
attempt += 1;
|
||||
async { Ok(7) }
|
||||
},
|
||||
5,
|
||||
Duration::from_secs(60),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(result.unwrap(), 7);
|
||||
assert_eq!(attempt, 1);
|
||||
}
|
||||
}
|
||||
15
backend/src/crawler/diff.rs
Normal file
15
backend/src/crawler/diff.rs
Normal file
@@ -0,0 +1,15 @@
|
||||
//! Change-detection rules between the source and our DB.
|
||||
//!
|
||||
//! | Event | Signal |
|
||||
//! |--------------------|----------------------------------------------------------------------------------------|
|
||||
//! | New manga | `(source_id, source_manga_key)` not in `manga_sources` |
|
||||
//! | Updated metadata | freshly computed `metadata_hash` differs from the stored one |
|
||||
//! | Dropped manga | `last_seen_at < discover_run_started_at` for N consecutive successful discover runs |
|
||||
//! | New chapter | `(source_id, source_chapter_key)` not in `chapter_sources` |
|
||||
//! | Dropped chapter | present in DB but absent from the latest `fetch_chapter_list` for the same manga |
|
||||
//!
|
||||
//! Dropped is always a soft flag (`dropped_at`), never a row delete —
|
||||
//! restoring is a matter of clearing the flag if the source brings the
|
||||
//! item back.
|
||||
//!
|
||||
//! Scaffold only — implementations land once `repo::crawler` exists.
|
||||
269
backend/src/crawler/jobs.rs
Normal file
269
backend/src/crawler/jobs.rs
Normal file
@@ -0,0 +1,269 @@
|
||||
//! Persistent job queue and the four job kinds.
|
||||
//!
|
||||
//! Backed by Postgres (the `crawler_jobs` table). Workers lease rows
|
||||
//! with `SELECT ... FOR UPDATE SKIP LOCKED`, heartbeat via
|
||||
//! `leased_until`, and ack by transitioning to `done` (or backoff /
|
||||
//! `dead`). Handlers are idempotent so a crash mid-run is recoverable
|
||||
//! by replay.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::source::DiscoverMode;
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "kind", rename_all = "snake_case")]
|
||||
pub enum JobPayload {
|
||||
/// Walk the source index and enqueue `SyncManga` jobs.
|
||||
Discover {
|
||||
source_id: String,
|
||||
mode: DiscoverMode,
|
||||
},
|
||||
/// Fetch one manga's detail page, upsert metadata, enqueue
|
||||
/// `SyncChapterList`.
|
||||
SyncManga {
|
||||
source_id: String,
|
||||
source_manga_key: String,
|
||||
},
|
||||
/// Diff the chapter list, enqueue `SyncChapterContent` for new
|
||||
/// chapters, soft-drop vanished ones.
|
||||
SyncChapterList {
|
||||
source_id: String,
|
||||
manga_id: Uuid,
|
||||
source_manga_key: String,
|
||||
},
|
||||
/// Download a single chapter's page images into storage.
|
||||
SyncChapterContent {
|
||||
source_id: String,
|
||||
chapter_id: Uuid,
|
||||
source_chapter_key: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, sqlx::Type, Serialize, Deserialize)]
|
||||
#[sqlx(type_name = "text", rename_all = "snake_case")]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum JobState {
|
||||
Pending,
|
||||
Running,
|
||||
Done,
|
||||
Failed,
|
||||
Dead,
|
||||
}
|
||||
|
||||
/// Kind discriminator stored in `payload->>'kind'`. Public so callers
|
||||
/// (daemon worker, bookmark hook) can filter `lease()` to a single kind
|
||||
/// without re-spelling the literal.
|
||||
pub const KIND_SYNC_CHAPTER_CONTENT: &str = "sync_chapter_content";
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum EnqueueResult {
|
||||
Inserted(Uuid),
|
||||
Skipped,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Lease {
|
||||
pub id: Uuid,
|
||||
pub payload: JobPayload,
|
||||
pub attempts: i32,
|
||||
pub max_attempts: i32,
|
||||
}
|
||||
|
||||
/// Exponential backoff for `ack_failed` retries. `attempts` is the
|
||||
/// post-increment value reported by `lease()` (so the first failure has
|
||||
/// `attempts == 1` and waits 60s, the second 120s, etc.). Capped at 1h to
|
||||
/// avoid runaway long sleeps that would outlive the daemon process.
|
||||
fn backoff_for(attempts: i32) -> Duration {
|
||||
let shift = attempts.saturating_sub(1).clamp(0, 20) as u32;
|
||||
let secs = 60u64.saturating_mul(1u64 << shift);
|
||||
Duration::from_secs(secs.min(3600))
|
||||
}
|
||||
|
||||
/// Insert a new pending job. For `SyncChapterContent` payloads the
|
||||
/// partial unique index `crawler_jobs_chapter_content_dedup_idx` blocks
|
||||
/// a second `(pending|running)` insert per chapter_id, returning
|
||||
/// `Skipped`. The slot frees again once the previous job leaves the
|
||||
/// in-flight states (done/failed/dead), so a re-enqueue after a force
|
||||
/// refetch succeeds.
|
||||
pub async fn enqueue(pool: &PgPool, payload: &JobPayload) -> sqlx::Result<EnqueueResult> {
|
||||
let json = serde_json::to_value(payload).expect("JobPayload is always serializable");
|
||||
let id: Option<Uuid> = sqlx::query_scalar(
|
||||
"INSERT INTO crawler_jobs (payload) VALUES ($1) \
|
||||
ON CONFLICT DO NOTHING RETURNING id",
|
||||
)
|
||||
.bind(json)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
Ok(match id {
|
||||
Some(id) => EnqueueResult::Inserted(id),
|
||||
None => EnqueueResult::Skipped,
|
||||
})
|
||||
}
|
||||
|
||||
/// Lease up to `max` rows whose `state` is `pending`, or `running` with
|
||||
/// an expired `leased_until` (the crashed-worker recovery path). The
|
||||
/// inner CTE uses `FOR UPDATE SKIP LOCKED` so concurrent leasers don't
|
||||
/// block each other and each row is handed to exactly one worker.
|
||||
///
|
||||
/// `kind_filter` matches against `payload->>'kind'`; `None` means
|
||||
/// any kind.
|
||||
pub async fn lease(
|
||||
pool: &PgPool,
|
||||
kind_filter: Option<&str>,
|
||||
max: i64,
|
||||
lease_duration: Duration,
|
||||
) -> sqlx::Result<Vec<Lease>> {
|
||||
let lease_ms: i64 = lease_duration.as_millis().min(i64::MAX as u128) as i64;
|
||||
let rows: Vec<(Uuid, serde_json::Value, i32, i32)> = sqlx::query_as(
|
||||
r#"
|
||||
WITH leased AS (
|
||||
SELECT id FROM crawler_jobs
|
||||
WHERE (state = 'pending' OR (state = 'running' AND leased_until < now()))
|
||||
AND scheduled_at <= now()
|
||||
AND ($1::text IS NULL OR payload->>'kind' = $1)
|
||||
ORDER BY scheduled_at
|
||||
LIMIT $2
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
UPDATE crawler_jobs j
|
||||
SET state = 'running',
|
||||
attempts = j.attempts + 1,
|
||||
leased_until = now() + ($3::bigint || ' milliseconds')::interval,
|
||||
updated_at = now()
|
||||
FROM leased l
|
||||
WHERE j.id = l.id
|
||||
RETURNING j.id, j.payload, j.attempts, j.max_attempts
|
||||
"#,
|
||||
)
|
||||
.bind(kind_filter)
|
||||
.bind(max)
|
||||
.bind(lease_ms)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
let mut leases = Vec::with_capacity(rows.len());
|
||||
for (id, payload_json, attempts, max_attempts) in rows {
|
||||
let payload: JobPayload = serde_json::from_value(payload_json).map_err(|e| {
|
||||
sqlx::Error::Decode(format!("invalid JobPayload JSON for job {id}: {e}").into())
|
||||
})?;
|
||||
leases.push(Lease {
|
||||
id,
|
||||
payload,
|
||||
attempts,
|
||||
max_attempts,
|
||||
});
|
||||
}
|
||||
Ok(leases)
|
||||
}
|
||||
|
||||
/// Mark a leased job as successfully completed.
|
||||
pub async fn ack_done(pool: &PgPool, lease_id: Uuid) -> sqlx::Result<()> {
|
||||
sqlx::query(
|
||||
"UPDATE crawler_jobs \
|
||||
SET state = 'done', leased_until = NULL, updated_at = now() \
|
||||
WHERE id = $1",
|
||||
)
|
||||
.bind(lease_id)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Mark a leased job as failed. If the current attempt count has reached
|
||||
/// `max_attempts` the job is terminally dead and stops retrying;
|
||||
/// otherwise it goes back to `pending` with `scheduled_at` pushed into
|
||||
/// the future by the exponential backoff.
|
||||
pub async fn ack_failed(
|
||||
pool: &PgPool,
|
||||
lease_id: Uuid,
|
||||
error: &str,
|
||||
attempts: i32,
|
||||
max_attempts: i32,
|
||||
) -> sqlx::Result<()> {
|
||||
if attempts >= max_attempts {
|
||||
sqlx::query(
|
||||
"UPDATE crawler_jobs \
|
||||
SET state = 'dead', last_error = $2, leased_until = NULL, updated_at = now() \
|
||||
WHERE id = $1",
|
||||
)
|
||||
.bind(lease_id)
|
||||
.bind(error)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
} else {
|
||||
let backoff_ms: i64 = backoff_for(attempts).as_millis().min(i64::MAX as u128) as i64;
|
||||
sqlx::query(
|
||||
"UPDATE crawler_jobs \
|
||||
SET state = 'pending', last_error = $2, leased_until = NULL, \
|
||||
scheduled_at = now() + ($3::bigint || ' milliseconds')::interval, \
|
||||
updated_at = now() \
|
||||
WHERE id = $1",
|
||||
)
|
||||
.bind(lease_id)
|
||||
.bind(error)
|
||||
.bind(backoff_ms)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return a leased job to `pending` without burning a retry attempt.
|
||||
/// Used on graceful shutdown and on session-expired aborts where the
|
||||
/// failure isn't the job's fault.
|
||||
pub async fn release(pool: &PgPool, lease_id: Uuid) -> sqlx::Result<()> {
|
||||
sqlx::query(
|
||||
"UPDATE crawler_jobs \
|
||||
SET state = 'pending', leased_until = NULL, \
|
||||
attempts = GREATEST(0, attempts - 1), updated_at = now() \
|
||||
WHERE id = $1",
|
||||
)
|
||||
.bind(lease_id)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete `done` jobs whose `updated_at` is older than `retention_days`
|
||||
/// days. `0` disables the reaper without touching the table. Returns the
|
||||
/// number of rows removed.
|
||||
pub async fn reap_done(pool: &PgPool, retention_days: u32) -> sqlx::Result<u64> {
|
||||
if retention_days == 0 {
|
||||
return Ok(0);
|
||||
}
|
||||
let result = sqlx::query(
|
||||
"DELETE FROM crawler_jobs \
|
||||
WHERE state = 'done' \
|
||||
AND updated_at < now() - ($1::bigint || ' days')::interval",
|
||||
)
|
||||
.bind(retention_days as i64)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
Ok(result.rows_affected())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn backoff_grows_exponentially_and_caps_at_one_hour() {
|
||||
// attempts == 1 → 60s, doubling each step.
|
||||
assert_eq!(backoff_for(1), Duration::from_secs(60));
|
||||
assert_eq!(backoff_for(2), Duration::from_secs(120));
|
||||
assert_eq!(backoff_for(3), Duration::from_secs(240));
|
||||
assert_eq!(backoff_for(4), Duration::from_secs(480));
|
||||
assert_eq!(backoff_for(5), Duration::from_secs(960));
|
||||
assert_eq!(backoff_for(6), Duration::from_secs(1920));
|
||||
// 7th: 60 * 64 = 3840 → capped to 3600.
|
||||
assert_eq!(backoff_for(7), Duration::from_secs(3600));
|
||||
assert_eq!(backoff_for(20), Duration::from_secs(3600));
|
||||
// Garbage / zero / negatives stay sane.
|
||||
assert_eq!(backoff_for(0), Duration::from_secs(60));
|
||||
assert_eq!(backoff_for(-5), Duration::from_secs(60));
|
||||
}
|
||||
}
|
||||
26
backend/src/crawler/mod.rs
Normal file
26
backend/src/crawler/mod.rs
Normal file
@@ -0,0 +1,26 @@
|
||||
//! Crawler subsystem.
|
||||
//!
|
||||
//! Runs as its own binary (`src/bin/crawler.rs`) and shares `domain`,
|
||||
//! `repo`, and `storage` with the API binary. Layering mirrors the
|
||||
//! `Storage` trait pattern: callers depend on the `source::Source`
|
||||
//! trait, not on a concrete site; new sites plug in as additional
|
||||
//! impls without touching the job runner.
|
||||
//!
|
||||
//! Submodules:
|
||||
//! - [`browser`]: launches and pools Chromium via `chromiumoxide`.
|
||||
//! First run downloads a known-good build via the `fetcher` feature.
|
||||
//! - [`source`]: the `Source` trait. Per-site impls live alongside it.
|
||||
//! - [`jobs`]: job kinds, queue wrapper, handler dispatch.
|
||||
//! - [`diff`]: change detection — new / updated / dropped semantics.
|
||||
|
||||
pub mod browser;
|
||||
pub mod browser_manager;
|
||||
pub mod content;
|
||||
pub mod daemon;
|
||||
pub mod detect;
|
||||
pub mod diff;
|
||||
pub mod jobs;
|
||||
pub mod pipeline;
|
||||
pub mod rate_limit;
|
||||
pub mod session;
|
||||
pub mod source;
|
||||
467
backend/src/crawler/pipeline.rs
Normal file
467
backend/src/crawler/pipeline.rs
Normal file
@@ -0,0 +1,467 @@
|
||||
//! Crawler pipeline — the reusable metadata pass and the enqueue helpers
|
||||
//! that fan out chapter-content work. Shared between the daemon (cron tick)
|
||||
//! and the CLI (`bin/crawler.rs`) so behavior stays in lockstep.
|
||||
|
||||
use anyhow::Context;
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::crawler::browser_manager::BrowserManager;
|
||||
use crate::crawler::jobs::{self, EnqueueResult, JobPayload};
|
||||
use crate::crawler::rate_limit::HostRateLimiters;
|
||||
use crate::crawler::source::target::TargetSource;
|
||||
use crate::crawler::source::{DiscoverMode, FetchContext, Source};
|
||||
use crate::repo;
|
||||
use crate::storage::Storage;
|
||||
|
||||
/// Coarse counters surfaced for logging at the end of a metadata pass.
|
||||
#[derive(Debug, Default, Clone, Copy)]
|
||||
pub struct MetadataStats {
|
||||
pub discovered: usize,
|
||||
pub upserted: usize,
|
||||
pub covers_fetched: usize,
|
||||
pub mangas_failed: usize,
|
||||
}
|
||||
|
||||
/// Decide whether the per-ref loop should stop based on the Incremental
|
||||
/// streak counter. Pulled out as a pure function so the rule is unit-
|
||||
/// testable without standing up the walker or DB.
|
||||
pub(crate) fn should_stop(mode: DiscoverMode, consecutive_unchanged: usize) -> bool {
|
||||
match mode {
|
||||
DiscoverMode::Backfill => false,
|
||||
DiscoverMode::Incremental { stop_after_unchanged } => {
|
||||
consecutive_unchanged >= stop_after_unchanged
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Runs the discover → fetch → upsert → cover → chapter-list-diff pipeline
|
||||
/// for the target source. Pure metadata; chapter content is enqueued as
|
||||
/// separate `SyncChapterContent` jobs by the caller after this returns.
|
||||
///
|
||||
/// `limit == 0` means no cap (full sweep up to the source's own bound).
|
||||
/// `skip_chapters == true` is the "metadata-only" mode (parser doesn't
|
||||
/// extract chapters, and `sync_manga_chapters` is skipped — otherwise an
|
||||
/// empty chapter list would soft-drop existing rows).
|
||||
///
|
||||
/// `mode` controls the walk:
|
||||
/// - `Backfill` — oldest-first, no early exit. The only mode that runs
|
||||
/// the end-of-walk drop pass + writes `seed_completed_at`.
|
||||
/// - `Incremental { stop_after_unchanged }` — newest-first, breaks out
|
||||
/// after N consecutive Unchanged upserts. Drop pass is skipped (the
|
||||
/// tail of the index is never visited, so its `last_seen_at` is
|
||||
/// stale and using it to soft-drop would be unsafe).
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn run_metadata_pass(
|
||||
browser_manager: &BrowserManager,
|
||||
db: &PgPool,
|
||||
storage: &dyn Storage,
|
||||
http: &reqwest::Client,
|
||||
rate: &HostRateLimiters,
|
||||
start_url: &str,
|
||||
limit: usize,
|
||||
skip_chapters: bool,
|
||||
mode: DiscoverMode,
|
||||
) -> anyhow::Result<MetadataStats> {
|
||||
let lease = browser_manager
|
||||
.acquire()
|
||||
.await
|
||||
.context("acquire browser lease for metadata pass")?;
|
||||
let browser_ref: &chromiumoxide::Browser = &lease;
|
||||
|
||||
let source = {
|
||||
let s = TargetSource::new(start_url.to_string());
|
||||
if skip_chapters {
|
||||
s.without_chapter_parsing()
|
||||
} else {
|
||||
s
|
||||
}
|
||||
};
|
||||
let ctx = FetchContext {
|
||||
browser: browser_ref,
|
||||
rate,
|
||||
};
|
||||
|
||||
let source_id = source.id();
|
||||
repo::crawler::ensure_source(
|
||||
db,
|
||||
source_id,
|
||||
"Target Site",
|
||||
&origin_of(start_url).unwrap_or_else(|| start_url.to_string()),
|
||||
)
|
||||
.await
|
||||
.context("ensure_source")?;
|
||||
|
||||
let run_started_at = chrono::Utc::now();
|
||||
let max_refs = (limit > 0).then_some(limit);
|
||||
|
||||
tracing::info!(?mode, ?max_refs, "starting metadata pass");
|
||||
let mut walker = source
|
||||
.discover(&ctx, mode)
|
||||
.await
|
||||
.context("discover failed")?;
|
||||
|
||||
let mut stats = MetadataStats::default();
|
||||
let mut consecutive_unchanged: usize = 0;
|
||||
let mut walked_to_completion = false;
|
||||
let mut hit_limit = false;
|
||||
let mut hit_incremental_stop = false;
|
||||
|
||||
'outer: loop {
|
||||
let batch = match walker.next_batch(&ctx).await? {
|
||||
Some(b) => b,
|
||||
None => {
|
||||
walked_to_completion = true;
|
||||
break;
|
||||
}
|
||||
};
|
||||
for r in batch {
|
||||
if max_refs.map(|m| stats.discovered >= m).unwrap_or(false) {
|
||||
hit_limit = true;
|
||||
tracing::info!(cap = ?max_refs, "max_results reached; halting walk");
|
||||
break 'outer;
|
||||
}
|
||||
stats.discovered += 1;
|
||||
tracing::info!(
|
||||
idx = stats.discovered,
|
||||
key = %r.source_manga_key,
|
||||
"fetching metadata"
|
||||
);
|
||||
let manga = match source.fetch_manga(&ctx, &r).await {
|
||||
Ok(m) => m,
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
key = %r.source_manga_key,
|
||||
url = %r.url,
|
||||
error = ?e,
|
||||
"fetch_manga failed"
|
||||
);
|
||||
stats.mangas_failed += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let upsert = match repo::crawler::upsert_manga_from_source(
|
||||
db, source_id, &r.url, &manga,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(u) => u,
|
||||
Err(e) => {
|
||||
tracing::error!(
|
||||
key = %r.source_manga_key,
|
||||
error = ?e,
|
||||
"upsert_manga_from_source failed"
|
||||
);
|
||||
stats.mangas_failed += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
stats.upserted += 1;
|
||||
tracing::info!(
|
||||
key = %manga.source_manga_key,
|
||||
manga_id = %upsert.manga_id,
|
||||
status = ?upsert.status,
|
||||
title = %manga.title,
|
||||
"manga upserted"
|
||||
);
|
||||
|
||||
// Cover image: download when missing in storage or when metadata
|
||||
// signaled an update (cover URL is part of metadata_hash, so
|
||||
// Updated implies the URL may have moved). Failures are non-fatal.
|
||||
let needs_cover = upsert.cover_image_path.is_none()
|
||||
|| matches!(upsert.status, repo::crawler::UpsertStatus::Updated);
|
||||
if needs_cover {
|
||||
if let Some(cover_url) = manga.cover_url.as_deref() {
|
||||
match download_and_store_cover(
|
||||
db,
|
||||
storage,
|
||||
http,
|
||||
rate,
|
||||
&r.url,
|
||||
upsert.manga_id,
|
||||
cover_url,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => stats.covers_fetched += 1,
|
||||
Err(e) => tracing::warn!(
|
||||
manga_id = %upsert.manga_id,
|
||||
error = ?e,
|
||||
"cover download failed"
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !skip_chapters {
|
||||
match repo::crawler::sync_manga_chapters(
|
||||
db,
|
||||
source_id,
|
||||
upsert.manga_id,
|
||||
&manga.chapters,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(diff) => tracing::info!(
|
||||
manga_id = %upsert.manga_id,
|
||||
new = diff.new,
|
||||
refreshed = diff.refreshed,
|
||||
dropped = diff.dropped,
|
||||
"chapters synced"
|
||||
),
|
||||
Err(e) => tracing::warn!(
|
||||
manga_id = %upsert.manga_id,
|
||||
error = ?e,
|
||||
"chapter sync failed"
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
// Incremental stop: count consecutive Unchanged upserts and
|
||||
// bail once the threshold is reached. New/Updated resets the
|
||||
// streak so a fresh entry mid-batch doesn't accidentally trip
|
||||
// the cutoff.
|
||||
match upsert.status {
|
||||
repo::crawler::UpsertStatus::Unchanged => {
|
||||
consecutive_unchanged += 1;
|
||||
}
|
||||
repo::crawler::UpsertStatus::New | repo::crawler::UpsertStatus::Updated => {
|
||||
consecutive_unchanged = 0;
|
||||
}
|
||||
}
|
||||
if should_stop(mode, consecutive_unchanged) {
|
||||
hit_incremental_stop = true;
|
||||
tracing::info!(
|
||||
consecutive_unchanged,
|
||||
"incremental stop threshold reached; halting walk"
|
||||
);
|
||||
break 'outer;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Drop pass: only when the walk truly covered everything the source
|
||||
// surfaces. `last_seen_at` on un-visited rows is stale, so running
|
||||
// the drop on a partial walk would soft-drop the tail of the index.
|
||||
let full_walk = walked_to_completion && !hit_limit && !hit_incremental_stop;
|
||||
let backfill_complete = full_walk && matches!(mode, DiscoverMode::Backfill);
|
||||
if full_walk {
|
||||
match repo::crawler::mark_dropped_mangas(db, source_id, run_started_at).await {
|
||||
Ok(n) => tracing::info!(dropped = n, "marked unseen manga as dropped"),
|
||||
Err(e) => tracing::warn!(error = ?e, "drop-pass failed"),
|
||||
}
|
||||
} else {
|
||||
tracing::info!(
|
||||
?mode,
|
||||
hit_limit,
|
||||
hit_incremental_stop,
|
||||
"partial sync — skipping drop pass"
|
||||
);
|
||||
}
|
||||
if backfill_complete {
|
||||
if let Err(e) = repo::crawler::mark_seed_completed(db, source_id, run_started_at).await {
|
||||
tracing::warn!(error = ?e, "mark_seed_completed failed");
|
||||
} else {
|
||||
tracing::info!(source_id, "seed marked complete");
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
?mode,
|
||||
discovered = stats.discovered,
|
||||
upserted = stats.upserted,
|
||||
covers_fetched = stats.covers_fetched,
|
||||
mangas_failed = stats.mangas_failed,
|
||||
walked_to_completion,
|
||||
hit_limit,
|
||||
hit_incremental_stop,
|
||||
"metadata pass complete"
|
||||
);
|
||||
|
||||
drop(lease);
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
/// Enqueue a `SyncChapterContent` job for every chapter of *any* bookmarked
|
||||
/// manga that still has `page_count = 0` and a non-dropped source row.
|
||||
/// Returns `(inserted, skipped)` counts. Dedup index handles repeats.
|
||||
pub async fn enqueue_bookmarked_pending(pool: &PgPool) -> anyhow::Result<EnqueueSummary> {
|
||||
let rows: Vec<(String, Uuid, String)> = sqlx::query_as(
|
||||
r#"
|
||||
SELECT cs.source_id, c.id AS chapter_id, cs.source_chapter_key
|
||||
FROM chapters c
|
||||
JOIN bookmarks b ON b.manga_id = c.manga_id
|
||||
JOIN chapter_sources cs ON cs.chapter_id = c.id
|
||||
WHERE c.page_count = 0
|
||||
AND cs.dropped_at IS NULL
|
||||
GROUP BY cs.source_id, c.id, cs.source_chapter_key, c.manga_id, c.created_at
|
||||
ORDER BY c.manga_id, c.created_at ASC
|
||||
"#,
|
||||
)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.context("query bookmarked-pending chapters")?;
|
||||
|
||||
let mut summary = EnqueueSummary::default();
|
||||
for (source_id, chapter_id, source_chapter_key) in rows {
|
||||
let payload = JobPayload::SyncChapterContent {
|
||||
source_id,
|
||||
chapter_id,
|
||||
source_chapter_key,
|
||||
};
|
||||
match jobs::enqueue(pool, &payload).await {
|
||||
Ok(EnqueueResult::Inserted(_)) => summary.inserted += 1,
|
||||
Ok(EnqueueResult::Skipped) => summary.skipped += 1,
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
%chapter_id,
|
||||
error = ?e,
|
||||
"enqueue chapter content failed"
|
||||
);
|
||||
summary.failed += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(summary)
|
||||
}
|
||||
|
||||
/// Enqueue chapter-content jobs for a *single* manga (the bookmark-create
|
||||
/// hook). Same dedup semantics as [`enqueue_bookmarked_pending`].
|
||||
pub async fn enqueue_pending_for_manga(
|
||||
pool: &PgPool,
|
||||
manga_id: Uuid,
|
||||
) -> anyhow::Result<EnqueueSummary> {
|
||||
let rows: Vec<(String, Uuid, String)> = sqlx::query_as(
|
||||
r#"
|
||||
SELECT DISTINCT cs.source_id, c.id AS chapter_id, cs.source_chapter_key
|
||||
FROM chapters c
|
||||
JOIN chapter_sources cs ON cs.chapter_id = c.id
|
||||
WHERE c.manga_id = $1
|
||||
AND c.page_count = 0
|
||||
AND cs.dropped_at IS NULL
|
||||
ORDER BY cs.source_id, c.id
|
||||
"#,
|
||||
)
|
||||
.bind(manga_id)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.context("query pending chapters for manga")?;
|
||||
|
||||
let mut summary = EnqueueSummary::default();
|
||||
for (source_id, chapter_id, source_chapter_key) in rows {
|
||||
let payload = JobPayload::SyncChapterContent {
|
||||
source_id,
|
||||
chapter_id,
|
||||
source_chapter_key,
|
||||
};
|
||||
match jobs::enqueue(pool, &payload).await {
|
||||
Ok(EnqueueResult::Inserted(_)) => summary.inserted += 1,
|
||||
Ok(EnqueueResult::Skipped) => summary.skipped += 1,
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
%chapter_id,
|
||||
error = ?e,
|
||||
"enqueue chapter content failed"
|
||||
);
|
||||
summary.failed += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(summary)
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy)]
|
||||
pub struct EnqueueSummary {
|
||||
pub inserted: usize,
|
||||
pub skipped: usize,
|
||||
pub failed: usize,
|
||||
}
|
||||
|
||||
/// Download a cover image and persist its storage path. Local to the
|
||||
/// pipeline because the CLI still calls it from its inline chapter-content
|
||||
/// loop; once the worker pool fully replaces that path we can fold this
|
||||
/// into `pipeline` proper.
|
||||
async fn download_and_store_cover(
|
||||
db: &PgPool,
|
||||
storage: &dyn Storage,
|
||||
http: &reqwest::Client,
|
||||
rate: &HostRateLimiters,
|
||||
manga_url: &str,
|
||||
manga_id: Uuid,
|
||||
cover_url: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
let absolute = reqwest::Url::parse(manga_url)
|
||||
.context("parse manga URL")?
|
||||
.join(cover_url)
|
||||
.context("join cover URL onto manga URL")?;
|
||||
|
||||
rate.wait_for(absolute.as_str()).await?;
|
||||
let resp = http
|
||||
.get(absolute.clone())
|
||||
.header(reqwest::header::REFERER, manga_url)
|
||||
.send()
|
||||
.await
|
||||
.with_context(|| format!("GET {absolute}"))?
|
||||
.error_for_status()
|
||||
.with_context(|| format!("non-2xx for {absolute}"))?;
|
||||
let bytes = resp.bytes().await.context("read cover body")?;
|
||||
let kind = infer::get(&bytes);
|
||||
let ext = kind.map(|k| k.extension()).unwrap_or("bin");
|
||||
let key = format!("mangas/{manga_id}/cover.{ext}");
|
||||
|
||||
storage
|
||||
.put(&key, &bytes)
|
||||
.await
|
||||
.with_context(|| format!("store cover at {key}"))?;
|
||||
repo::manga::set_cover_image_path(db, manga_id, &key)
|
||||
.await
|
||||
.with_context(|| format!("update cover_image_path for {manga_id}"))?;
|
||||
tracing::info!(
|
||||
manga_id = %manga_id,
|
||||
key = %key,
|
||||
bytes = bytes.len(),
|
||||
%absolute,
|
||||
"cover stored"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn origin_of(url: &str) -> Option<String> {
|
||||
let (scheme, rest) = url.split_once("://")?;
|
||||
let host = rest.split('/').next()?;
|
||||
Some(format!("{scheme}://{host}"))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn backfill_never_stops_regardless_of_streak() {
|
||||
assert!(!should_stop(DiscoverMode::Backfill, 0));
|
||||
assert!(!should_stop(DiscoverMode::Backfill, 100));
|
||||
assert!(!should_stop(DiscoverMode::Backfill, usize::MAX));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn incremental_stops_when_streak_meets_threshold() {
|
||||
let mode = DiscoverMode::Incremental {
|
||||
stop_after_unchanged: 3,
|
||||
};
|
||||
assert!(!should_stop(mode, 0));
|
||||
assert!(!should_stop(mode, 2));
|
||||
assert!(should_stop(mode, 3), "stops at exactly the threshold");
|
||||
assert!(should_stop(mode, 100), "stops at anything past threshold");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn incremental_with_zero_threshold_stops_immediately() {
|
||||
// A nonsensical config (no Unchanged needed to stop) shouldn't
|
||||
// panic — it just means the very first ref triggers the bail.
|
||||
let mode = DiscoverMode::Incremental {
|
||||
stop_after_unchanged: 0,
|
||||
};
|
||||
assert!(should_stop(mode, 0));
|
||||
}
|
||||
}
|
||||
184
backend/src/crawler/rate_limit.rs
Normal file
184
backend/src/crawler/rate_limit.rs
Normal file
@@ -0,0 +1,184 @@
|
||||
//! Per-host request pacing.
|
||||
//!
|
||||
//! `RateLimiter` is a single-token bucket: each `wait().await` returns
|
||||
//! immediately when at least `interval` has elapsed since the last call,
|
||||
//! otherwise sleeps just enough to satisfy it. Uses
|
||||
//! `tokio::time::Instant` so tests can run under `start_paused` virtual
|
||||
//! time without sleeping for real.
|
||||
//!
|
||||
//! `HostRateLimiters` is the multi-host wrapper actually used by the
|
||||
//! crawler — concurrent workers issuing requests to different origins
|
||||
//! (catalog vs. CDN) don't contend on a shared budget; each host gets
|
||||
//! its own bucket. `wait_for(url)` extracts the host, lazily creates a
|
||||
//! limiter for it, and serializes only against other callers hitting
|
||||
//! the same host.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::time::Instant;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RateLimiter {
|
||||
interval: Duration,
|
||||
last: Option<Instant>,
|
||||
}
|
||||
|
||||
impl RateLimiter {
|
||||
pub fn new(interval: Duration) -> Self {
|
||||
Self {
|
||||
interval,
|
||||
last: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn wait(&mut self) {
|
||||
if let Some(last) = self.last {
|
||||
let elapsed = last.elapsed();
|
||||
if elapsed < self.interval {
|
||||
tokio::time::sleep(self.interval - elapsed).await;
|
||||
}
|
||||
}
|
||||
self.last = Some(Instant::now());
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-host rate limiter map. The outer `Mutex<HashMap>` is held only
|
||||
/// during the entry-or-insert + Arc clone; the per-host `Mutex<RateLimiter>`
|
||||
/// is held during the actual `wait().await`. So N workers calling
|
||||
/// `wait_for(url)` on N different hosts contend nowhere except the brief
|
||||
/// HashMap lookup; workers hitting the same host serialize on that
|
||||
/// host's bucket.
|
||||
#[derive(Debug)]
|
||||
pub struct HostRateLimiters {
|
||||
default_interval: Duration,
|
||||
overrides: HashMap<String, Duration>,
|
||||
map: Mutex<HashMap<String, Arc<Mutex<RateLimiter>>>>,
|
||||
}
|
||||
|
||||
impl HostRateLimiters {
|
||||
pub fn new(default_interval: Duration) -> Self {
|
||||
Self {
|
||||
default_interval,
|
||||
overrides: HashMap::new(),
|
||||
map: Mutex::new(HashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set a per-host interval that overrides `default_interval`. Calls
|
||||
/// after a host's limiter has been instantiated do *not* re-create
|
||||
/// it — set all overrides before the first `wait_for` to that host.
|
||||
pub fn with_override(mut self, host: impl Into<String>, interval: Duration) -> Self {
|
||||
self.overrides.insert(host.into(), interval);
|
||||
self
|
||||
}
|
||||
|
||||
/// Block until the per-host budget allows the next request to
|
||||
/// `url`'s host. Returns an error only when the URL has no host
|
||||
/// (malformed input).
|
||||
pub async fn wait_for(&self, url: &str) -> anyhow::Result<()> {
|
||||
let host = host_of(url)
|
||||
.ok_or_else(|| anyhow::anyhow!("no host in url: {url}"))?;
|
||||
let limiter = {
|
||||
let mut map = self.map.lock().await;
|
||||
map.entry(host.clone())
|
||||
.or_insert_with(|| {
|
||||
let interval = self
|
||||
.overrides
|
||||
.get(&host)
|
||||
.copied()
|
||||
.unwrap_or(self.default_interval);
|
||||
Arc::new(Mutex::new(RateLimiter::new(interval)))
|
||||
})
|
||||
.clone()
|
||||
};
|
||||
limiter.lock().await.wait().await;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract the host (no port) from a URL string. Returns `None` for
|
||||
/// inputs without a `scheme://host` shape — those would never have
|
||||
/// reached the network layer anyway.
|
||||
fn host_of(url: &str) -> Option<String> {
|
||||
let after_scheme = url.split_once("://")?.1;
|
||||
let host_with_port = after_scheme.split('/').next()?;
|
||||
let host = host_with_port.rsplit_once(':').map_or(host_with_port, |(h, _)| h);
|
||||
(!host.is_empty()).then(|| host.to_ascii_lowercase())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test(start_paused = true)]
|
||||
async fn first_call_does_not_sleep() {
|
||||
let mut rl = RateLimiter::new(Duration::from_millis(100));
|
||||
let t0 = Instant::now();
|
||||
rl.wait().await;
|
||||
assert_eq!(Instant::now() - t0, Duration::ZERO);
|
||||
}
|
||||
|
||||
#[tokio::test(start_paused = true)]
|
||||
async fn second_call_sleeps_to_fill_interval() {
|
||||
let mut rl = RateLimiter::new(Duration::from_millis(100));
|
||||
let t0 = Instant::now();
|
||||
rl.wait().await;
|
||||
rl.wait().await;
|
||||
// Second call had to wait the full 100ms after the (instant)
|
||||
// first call.
|
||||
assert_eq!(Instant::now() - t0, Duration::from_millis(100));
|
||||
}
|
||||
|
||||
#[tokio::test(start_paused = true)]
|
||||
async fn no_sleep_if_interval_already_elapsed() {
|
||||
let mut rl = RateLimiter::new(Duration::from_millis(100));
|
||||
rl.wait().await;
|
||||
tokio::time::sleep(Duration::from_millis(250)).await;
|
||||
let t0 = Instant::now();
|
||||
rl.wait().await;
|
||||
// Already 250ms past — no further wait needed.
|
||||
assert_eq!(Instant::now() - t0, Duration::ZERO);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn host_of_parses_scheme_path_and_port() {
|
||||
assert_eq!(host_of("https://Example.com/path").as_deref(), Some("example.com"));
|
||||
assert_eq!(host_of("http://cdn.foo.bar/img.jpg").as_deref(), Some("cdn.foo.bar"));
|
||||
assert_eq!(host_of("http://localhost:8080/x").as_deref(), Some("localhost"));
|
||||
assert!(host_of("not a url").is_none());
|
||||
}
|
||||
|
||||
#[tokio::test(start_paused = true)]
|
||||
async fn host_rate_limiters_pace_per_host() {
|
||||
// Two hosts at 100ms each. Two consecutive calls to the SAME
|
||||
// host wait 100ms total. Two consecutive calls to DIFFERENT
|
||||
// hosts both fire immediately.
|
||||
let rl = HostRateLimiters::new(Duration::from_millis(100));
|
||||
|
||||
let t0 = Instant::now();
|
||||
rl.wait_for("https://a.example/x").await.unwrap();
|
||||
rl.wait_for("https://b.example/y").await.unwrap();
|
||||
assert_eq!(Instant::now() - t0, Duration::ZERO, "different hosts don't contend");
|
||||
|
||||
let t1 = Instant::now();
|
||||
rl.wait_for("https://a.example/x").await.unwrap();
|
||||
assert_eq!(
|
||||
Instant::now() - t1,
|
||||
Duration::from_millis(100),
|
||||
"second call to same host waits a full interval"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(start_paused = true)]
|
||||
async fn host_rate_limiters_honor_overrides() {
|
||||
let rl = HostRateLimiters::new(Duration::from_millis(1000))
|
||||
.with_override("fast.example", Duration::from_millis(100));
|
||||
|
||||
rl.wait_for("https://fast.example/a").await.unwrap();
|
||||
let t0 = Instant::now();
|
||||
rl.wait_for("https://fast.example/b").await.unwrap();
|
||||
assert_eq!(Instant::now() - t0, Duration::from_millis(100));
|
||||
}
|
||||
}
|
||||
288
backend/src/crawler/session.rs
Normal file
288
backend/src/crawler/session.rs
Normal file
@@ -0,0 +1,288 @@
|
||||
//! PHPSESSID injection + login probe.
|
||||
//!
|
||||
//! The catalog site we crawl renders chapter pages as a single multi-
|
||||
//! page list only for logged-in users. We don't try to bypass the
|
||||
//! login (CAPTCHA wall) — instead the operator pastes their browser's
|
||||
//! `PHPSESSID` cookie into `CRAWLER_PHPSESSID` and the crawler injects
|
||||
//! it into Chromium *and* reqwest before the first navigation.
|
||||
//!
|
||||
//! Two things the cookie alone doesn't give us:
|
||||
//! 1. The cookie value is only meaningful to the *server* — we have
|
||||
//! no way to predict from the value alone whether it's still valid.
|
||||
//! `verify_session` does a navigation and inspects the probe page
|
||||
//! for three outcomes: broken-page response (transient — retry the
|
||||
//! probe), `#logo` present but `#avatar_menu` absent (genuine logout
|
||||
//! — bail loudly), or both present (authenticated). The earlier
|
||||
//! avatar-only check conflated "site is hiccuping" with "session is
|
||||
//! dead" and refused to start the crawler when the site had a brief
|
||||
//! 503.
|
||||
//! 2. The reqwest client (used for cover and chapter-image downloads)
|
||||
//! has its own cookie store; we seed it for the catalog host only.
|
||||
//! CDN hosts are deliberately *not* given the cookie — they serve
|
||||
//! image bytes by signed URLs and don't need it.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{anyhow, Context};
|
||||
use chromiumoxide::browser::Browser;
|
||||
use chromiumoxide::cdp::browser_protocol::network::CookieParam;
|
||||
|
||||
use crate::crawler::detect::{has_logo_sentinel, is_broken_page_body};
|
||||
|
||||
/// Outcome of inspecting a probe-page response.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum SessionProbe {
|
||||
/// `#logo` present and `#avatar_menu` present — session valid.
|
||||
Ok,
|
||||
/// `#logo` present but `#avatar_menu` absent — site rendered the
|
||||
/// normal layout for an unauthenticated visitor; refresh PHPSESSID.
|
||||
Unauthenticated,
|
||||
/// Broken-page body signature or `#logo` missing — site is hiccuping.
|
||||
/// Caller retries the probe rather than blaming the session.
|
||||
Transient,
|
||||
}
|
||||
|
||||
/// Compute the cookie domain (e.g. `.example.com`) from a start URL.
|
||||
/// The leading dot makes the cookie cover every subdomain — the source
|
||||
/// often redirects between `www.` and other prefixes mid-crawl, and a
|
||||
/// host-only cookie would silently drop on the cross-subdomain hop.
|
||||
///
|
||||
/// Caveat: this takes the last two dot-labels, which is wrong for
|
||||
/// multi-part TLDs (`.co.uk`, `.com.br` would resolve to `.co.uk` and
|
||||
/// attach to every site on `.co.uk`). For those, the operator should
|
||||
/// override via `CRAWLER_COOKIE_DOMAIN` rather than relying on this
|
||||
/// function — pulling in the Public Suffix List for one knob isn't
|
||||
/// worth it yet.
|
||||
pub fn registrable_domain(url: &str) -> Option<String> {
|
||||
let after_scheme = url.split_once("://")?.1;
|
||||
let host_with_port = after_scheme.split('/').next()?;
|
||||
let host = host_with_port
|
||||
.rsplit_once(':')
|
||||
.map_or(host_with_port, |(h, _)| h)
|
||||
.to_ascii_lowercase();
|
||||
if host.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let labels: Vec<&str> = host.split('.').filter(|l| !l.is_empty()).collect();
|
||||
if labels.len() < 2 {
|
||||
// Bare hostname (e.g. `localhost`) — return as-is, no leading
|
||||
// dot. Setting `.localhost` as cookie domain is invalid.
|
||||
return Some(host);
|
||||
}
|
||||
let registrable = &labels[labels.len() - 2..];
|
||||
Some(format!(".{}", registrable.join(".")))
|
||||
}
|
||||
|
||||
/// Inject the PHPSESSID cookie into the browser's cookie store for the
|
||||
/// catalog domain. Must be called before any navigation that depends on
|
||||
/// authentication; subsequent navigations include the cookie
|
||||
/// automatically.
|
||||
pub async fn inject_phpsessid(
|
||||
browser: &Browser,
|
||||
sid: &str,
|
||||
cookie_domain: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
let cookie = CookieParam {
|
||||
name: "PHPSESSID".to_string(),
|
||||
value: sid.to_string(),
|
||||
url: None,
|
||||
domain: Some(cookie_domain.to_string()),
|
||||
path: Some("/".to_string()),
|
||||
secure: None,
|
||||
http_only: Some(true),
|
||||
same_site: None,
|
||||
expires: None,
|
||||
priority: None,
|
||||
same_party: None,
|
||||
source_scheme: None,
|
||||
source_port: None,
|
||||
partition_key: None,
|
||||
};
|
||||
browser
|
||||
.set_cookies(vec![cookie])
|
||||
.await
|
||||
.context("set PHPSESSID in chromium cookie store")?;
|
||||
tracing::info!(domain = cookie_domain, "injected PHPSESSID into browser");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Three-way classification of a probe-page response. Pure over HTML so
|
||||
/// it's unit-testable without a real browser. Order matters: a body
|
||||
/// matching the broken-page template is `Transient` even if the page
|
||||
/// happens to contain `#avatar_menu` HTML somewhere — trust the universal
|
||||
/// site signal over a stray selector match.
|
||||
pub fn classify_probe(html: &str) -> SessionProbe {
|
||||
if is_broken_page_body(html) {
|
||||
return SessionProbe::Transient;
|
||||
}
|
||||
let doc = scraper::Html::parse_document(html);
|
||||
if !has_logo_sentinel(&doc) {
|
||||
return SessionProbe::Transient;
|
||||
}
|
||||
let avatar_sel = scraper::Selector::parse("#avatar_menu").unwrap();
|
||||
if doc.select(&avatar_sel).next().is_some() {
|
||||
SessionProbe::Ok
|
||||
} else {
|
||||
SessionProbe::Unauthenticated
|
||||
}
|
||||
}
|
||||
|
||||
/// In-startup retry budget for the session probe. Small but non-zero —
|
||||
/// startup hitting a 5-second site hiccup shouldn't fail the operator
|
||||
/// with "PHPSESSID expired" when the session is actually fine.
|
||||
const PROBE_MAX_ATTEMPTS: u32 = 3;
|
||||
const PROBE_RETRY_DELAY: Duration = Duration::from_secs(2);
|
||||
|
||||
/// Navigate to `probe_url` and classify the response. Retries the probe
|
||||
/// on `Transient` outcomes (broken-page body, missing `#logo`); fails
|
||||
/// fast on `Unauthenticated`; returns `Ok(())` on success.
|
||||
///
|
||||
/// This burns one navigation per attempt against the catalog's rate
|
||||
/// limiter. The trade is worth it — failing here costs ~1s; failing 30
|
||||
/// minutes into a backfill costs 30 minutes.
|
||||
pub async fn verify_session(browser: &Browser, probe_url: &str) -> anyhow::Result<()> {
|
||||
let mut attempt = 0u32;
|
||||
loop {
|
||||
attempt += 1;
|
||||
let html = fetch_probe_html(browser, probe_url).await?;
|
||||
match classify_probe(&html) {
|
||||
SessionProbe::Ok => {
|
||||
tracing::info!(attempt, "session probe ok — #logo + #avatar_menu present");
|
||||
return Ok(());
|
||||
}
|
||||
SessionProbe::Unauthenticated => {
|
||||
return Err(anyhow!(
|
||||
"session probe failed — #avatar_menu not present at {probe_url} \
|
||||
(page rendered the normal layout); PHPSESSID is missing, expired, \
|
||||
or revoked. Refresh CRAWLER_PHPSESSID and re-run."
|
||||
));
|
||||
}
|
||||
SessionProbe::Transient if attempt < PROBE_MAX_ATTEMPTS => {
|
||||
tracing::warn!(
|
||||
attempt,
|
||||
max_attempts = PROBE_MAX_ATTEMPTS,
|
||||
"session probe got a transient page; retrying"
|
||||
);
|
||||
tokio::time::sleep(PROBE_RETRY_DELAY).await;
|
||||
}
|
||||
SessionProbe::Transient => {
|
||||
return Err(anyhow!(
|
||||
"session probe failed — probe page at {probe_url} returned a \
|
||||
broken-page response after {PROBE_MAX_ATTEMPTS} attempts. \
|
||||
The site appears to be down or rate-limiting us; try again \
|
||||
later before refreshing CRAWLER_PHPSESSID."
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn fetch_probe_html(browser: &Browser, probe_url: &str) -> anyhow::Result<String> {
|
||||
let page = browser
|
||||
.new_page(probe_url)
|
||||
.await
|
||||
.with_context(|| format!("open probe page {probe_url}"))?;
|
||||
page.wait_for_navigation().await.context("wait for nav on probe")?;
|
||||
let html = page.content().await.context("read probe html")?;
|
||||
page.close().await.ok();
|
||||
Ok(html)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn registrable_domain_strips_subdomain() {
|
||||
assert_eq!(
|
||||
registrable_domain("https://www.target-site.com/manga/foo/").as_deref(),
|
||||
Some(".target-site.com")
|
||||
);
|
||||
assert_eq!(
|
||||
registrable_domain("https://m.example.org").as_deref(),
|
||||
Some(".example.org")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn registrable_domain_keeps_two_label_host() {
|
||||
assert_eq!(
|
||||
registrable_domain("https://example.com/").as_deref(),
|
||||
Some(".example.com")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn registrable_domain_handles_port() {
|
||||
assert_eq!(
|
||||
registrable_domain("http://www.foo.bar:8080/x").as_deref(),
|
||||
Some(".foo.bar")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn registrable_domain_bare_hostname_no_leading_dot() {
|
||||
// .localhost would be invalid as a cookie Domain.
|
||||
assert_eq!(registrable_domain("http://localhost:5173").as_deref(), Some("localhost"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn registrable_domain_returns_none_for_garbage() {
|
||||
assert!(registrable_domain("not a url").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_probe_ok_when_logo_and_avatar_present() {
|
||||
let html = r#"<html><body>
|
||||
<header><div id="logo">Target</div><div id="avatar_menu"></div></header>
|
||||
</body></html>"#;
|
||||
assert_eq!(classify_probe(html), SessionProbe::Ok);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_probe_unauth_when_logo_present_but_avatar_absent() {
|
||||
// Real "logged out" response: site layout renders fine, just no
|
||||
// avatar widget. This is the only state that should blame the
|
||||
// session cookie.
|
||||
let html = r#"<html><body>
|
||||
<header><div id="logo">Target</div></header>
|
||||
<main>Please log in.</main>
|
||||
</body></html>"#;
|
||||
assert_eq!(classify_probe(html), SessionProbe::Unauthenticated);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_probe_transient_on_broken_page_body() {
|
||||
let html = "<html><body>\
|
||||
<p>we're sorry, the request file are not found.</p>\
|
||||
</body></html>";
|
||||
assert_eq!(classify_probe(html), SessionProbe::Transient);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_probe_transient_when_logo_missing() {
|
||||
// No broken-body marker, but no site layout either — treat as
|
||||
// transient (could be a Cloudflare interstitial, a 5xx page,
|
||||
// etc.) rather than blaming the session.
|
||||
let html = "<html><body><h1>Service Unavailable</h1></body></html>";
|
||||
assert_eq!(classify_probe(html), SessionProbe::Transient);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_probe_transient_on_empty_response() {
|
||||
assert_eq!(classify_probe(""), SessionProbe::Transient);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_probe_trusts_broken_body_over_stray_avatar_match() {
|
||||
// Defensive: if a broken-page body somehow contains an
|
||||
// #avatar_menu element (e.g. an unrelated debug page on the
|
||||
// same template), the body signature still wins.
|
||||
let html = r#"<html><body>
|
||||
<p>we're sorry, the request file are not found.</p>
|
||||
<div id="logo"></div>
|
||||
<div id="avatar_menu"></div>
|
||||
</body></html>"#;
|
||||
assert_eq!(classify_probe(html), SessionProbe::Transient);
|
||||
}
|
||||
}
|
||||
139
backend/src/crawler/source.rs
Normal file
139
backend/src/crawler/source.rs
Normal file
@@ -0,0 +1,139 @@
|
||||
//! `Source` trait — the per-site abstraction.
|
||||
//!
|
||||
//! Job handlers depend on this trait, not on a concrete site. Adding a
|
||||
//! new site is: implement `Source`, register it in a `sources` table
|
||||
//! row, and the existing job pipeline picks it up unchanged.
|
||||
|
||||
pub mod target;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use chromiumoxide::browser::Browser;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// How a `discover` job should walk the source's index.
|
||||
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
|
||||
pub enum DiscoverMode {
|
||||
/// Walk every index page from last back to first. Used for the
|
||||
/// initial seed of a source.
|
||||
Backfill,
|
||||
/// Walk index pages from page 1 forward, stopping after
|
||||
/// `stop_after_unchanged` consecutive mangas whose `metadata_hash`
|
||||
/// matches storage. Used for the recurring cron tick.
|
||||
Incremental { stop_after_unchanged: usize },
|
||||
}
|
||||
|
||||
/// Pointer at a manga in the source's index, before we've fetched the
|
||||
/// detail page. The `source_manga_key` is whatever stable id the source
|
||||
/// uses (slug, numeric id, etc).
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SourceMangaRef {
|
||||
pub source_manga_key: String,
|
||||
pub title: String,
|
||||
pub url: String,
|
||||
}
|
||||
|
||||
/// Full metadata returned by `fetch_manga`. The hash is computed by the
|
||||
/// source impl over the metadata-only field set (title through
|
||||
/// cover_url) — chapter changes are tracked separately via
|
||||
/// `chapter_sources`, so they intentionally do not affect
|
||||
/// `metadata_hash`.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SourceManga {
|
||||
pub source_manga_key: String,
|
||||
pub title: String,
|
||||
pub alternative_titles: Vec<String>,
|
||||
pub authors: Vec<String>,
|
||||
pub genres: Vec<String>,
|
||||
pub tags: Vec<String>,
|
||||
pub status: Option<String>,
|
||||
pub summary: Option<String>,
|
||||
pub cover_url: Option<String>,
|
||||
/// Chapters surfaced on the same page as the metadata. Sources
|
||||
/// where the chapter list lives elsewhere can leave this empty
|
||||
/// and supply it via `fetch_chapter_list` instead.
|
||||
pub chapters: Vec<SourceChapterRef>,
|
||||
pub metadata_hash: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SourceChapterRef {
|
||||
pub source_chapter_key: String,
|
||||
pub number: i32,
|
||||
pub title: Option<String>,
|
||||
pub url: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SourceChapter {
|
||||
pub source_chapter_key: String,
|
||||
pub number: i32,
|
||||
pub title: Option<String>,
|
||||
/// Ordered list of page image URLs, ready to be fetched and put
|
||||
/// into `Storage`.
|
||||
pub page_urls: Vec<String>,
|
||||
}
|
||||
|
||||
/// Context passed to every `Source` call. Carries the browser handle
|
||||
/// plus the per-host rate-limiter map so impls that issue multiple
|
||||
/// requests in one call (pagination walks, multi-page chapter image
|
||||
/// fetches) honor the right budget for each origin.
|
||||
pub struct FetchContext<'a> {
|
||||
pub browser: &'a Browser,
|
||||
pub rate: &'a crate::crawler::rate_limit::HostRateLimiters,
|
||||
}
|
||||
|
||||
/// Lazy iterator over discovered manga refs. The caller drives the
|
||||
/// walk one batch at a time, so it can break out as soon as a
|
||||
/// downstream stop condition is met (e.g. N consecutive Unchanged
|
||||
/// upserts in Incremental mode) without paying for pages it won't use.
|
||||
///
|
||||
/// Batches are typically one source-index page each. Within a batch
|
||||
/// refs are already in the right per-page order for the active mode
|
||||
/// (Backfill reverses each page to oldest-first; Incremental leaves
|
||||
/// the source's natural newest-first ordering).
|
||||
#[async_trait]
|
||||
pub trait DiscoverWalk: Send {
|
||||
/// Return the next batch of refs, or `Ok(None)` when the source has
|
||||
/// no more pages. The walker is single-use; calling `next_batch`
|
||||
/// after `None` is allowed and continues to return `None`.
|
||||
async fn next_batch(
|
||||
&mut self,
|
||||
ctx: &FetchContext<'_>,
|
||||
) -> anyhow::Result<Option<Vec<SourceMangaRef>>>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait Source: Send + Sync {
|
||||
/// Stable identifier — also the row key in the `sources` table.
|
||||
fn id(&self) -> &'static str;
|
||||
|
||||
/// Begin discovery in `mode`. Returns a walker the caller drives
|
||||
/// page-by-page via `next_batch`. The initial page-1 probe (used
|
||||
/// to determine `last_page` and warm the cache for sites that
|
||||
/// can't be paged without knowing the bound) happens inside this
|
||||
/// call, so a fresh walker is ready to yield its first batch
|
||||
/// without further setup.
|
||||
async fn discover(
|
||||
&self,
|
||||
ctx: &FetchContext<'_>,
|
||||
mode: DiscoverMode,
|
||||
) -> anyhow::Result<Box<dyn DiscoverWalk + Send>>;
|
||||
|
||||
async fn fetch_manga(
|
||||
&self,
|
||||
ctx: &FetchContext<'_>,
|
||||
r: &SourceMangaRef,
|
||||
) -> anyhow::Result<SourceManga>;
|
||||
|
||||
async fn fetch_chapter_list(
|
||||
&self,
|
||||
ctx: &FetchContext<'_>,
|
||||
manga: &SourceManga,
|
||||
) -> anyhow::Result<Vec<SourceChapterRef>>;
|
||||
|
||||
async fn fetch_chapter(
|
||||
&self,
|
||||
ctx: &FetchContext<'_>,
|
||||
r: &SourceChapterRef,
|
||||
) -> anyhow::Result<SourceChapter>;
|
||||
}
|
||||
993
backend/src/crawler/source/target.rs
Normal file
993
backend/src/crawler/source/target.rs
Normal file
@@ -0,0 +1,993 @@
|
||||
//! First concrete [`Source`] impl, modeled on the selectors of the
|
||||
//! old Puppeteer crawler. The name "target" is a placeholder — rename
|
||||
//! once the site is officially identified.
|
||||
//!
|
||||
//! `scraper`'s selector parser does not support `:has()` or
|
||||
//! `:contains()`, so the labelled-`td` lookups from the old script
|
||||
//! (`td:has(label:contains("Author:"))`) are implemented by walking
|
||||
//! the parsed tree.
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Context;
|
||||
use async_trait::async_trait;
|
||||
use sha2::{Digest, Sha256};
|
||||
|
||||
use super::{
|
||||
DiscoverMode, DiscoverWalk, FetchContext, Source, SourceChapter, SourceChapterRef,
|
||||
SourceManga, SourceMangaRef,
|
||||
};
|
||||
use crate::crawler::detect::{
|
||||
has_logo_sentinel, is_broken_page_body, retry_on_transient, PageError,
|
||||
};
|
||||
|
||||
/// `sources.id` value for this Source impl. Exposed as a const so the
|
||||
/// daemon can look up per-source state (e.g. `seed_completed_at`)
|
||||
/// before constructing the Source itself.
|
||||
pub const SOURCE_ID: &str = "target";
|
||||
|
||||
/// In-loop retry budget for transient pages encountered during a single
|
||||
/// `discover` walk. Bounded small because the job system itself retries
|
||||
/// the whole `Discover` job on failure — these inline retries only need
|
||||
/// to absorb a brief site hiccup mid-walk.
|
||||
const PAGE_TRANSIENT_RETRY_ATTEMPTS: u32 = 3;
|
||||
const PAGE_TRANSIENT_RETRY_DELAY: Duration = Duration::from_secs(2);
|
||||
|
||||
pub struct TargetSource {
|
||||
base_url: String,
|
||||
parse_chapters: bool,
|
||||
}
|
||||
|
||||
impl TargetSource {
|
||||
pub fn new(base_url: impl Into<String>) -> Self {
|
||||
Self {
|
||||
base_url: base_url.into(),
|
||||
parse_chapters: true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn base_url(&self) -> &str {
|
||||
&self.base_url
|
||||
}
|
||||
|
||||
/// Skip the chapter-list selector when parsing detail pages.
|
||||
/// The returned `SourceManga.chapters` will be empty even when the
|
||||
/// page has a chapter table. Caller must also avoid calling
|
||||
/// `repo::crawler::sync_manga_chapters` for these mangas — an
|
||||
/// empty list would otherwise soft-drop the manga's existing
|
||||
/// chapter rows.
|
||||
pub fn without_chapter_parsing(mut self) -> Self {
|
||||
self.parse_chapters = false;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Source for TargetSource {
|
||||
fn id(&self) -> &'static str {
|
||||
SOURCE_ID
|
||||
}
|
||||
|
||||
async fn discover(
|
||||
&self,
|
||||
ctx: &FetchContext<'_>,
|
||||
mode: DiscoverMode,
|
||||
) -> anyhow::Result<Box<dyn DiscoverWalk + Send>> {
|
||||
// Always visit page 1 first because that's the only way to
|
||||
// discover `last_page`. Retry it on transient — a broken first
|
||||
// page would otherwise abort the whole walk before we've even
|
||||
// started.
|
||||
let first_html = retry_on_transient(
|
||||
|| async { navigate(ctx, self.base_url.as_str()).await },
|
||||
PAGE_TRANSIENT_RETRY_ATTEMPTS,
|
||||
PAGE_TRANSIENT_RETRY_DELAY,
|
||||
)
|
||||
.await?;
|
||||
let last_page = {
|
||||
let doc = scraper::Html::parse_document(&first_html);
|
||||
parse_last_page(&doc)
|
||||
};
|
||||
|
||||
let backfill = matches!(mode, DiscoverMode::Backfill);
|
||||
let order = build_page_order(last_page, backfill);
|
||||
tracing::info!(
|
||||
?mode,
|
||||
last_page = ?last_page,
|
||||
page_count = order.len(),
|
||||
"walking pagination"
|
||||
);
|
||||
|
||||
Ok(Box::new(TargetSourceWalker {
|
||||
base_url: self.base_url.clone(),
|
||||
backfill,
|
||||
pages_remaining: order,
|
||||
first_page_html: Some(first_html),
|
||||
}))
|
||||
}
|
||||
|
||||
async fn fetch_manga(
|
||||
&self,
|
||||
ctx: &FetchContext<'_>,
|
||||
r: &SourceMangaRef,
|
||||
) -> anyhow::Result<SourceManga> {
|
||||
let html = navigate(ctx, r.url.as_str()).await?;
|
||||
// Convert PageError → anyhow::Error via `?`. PageError stays
|
||||
// downcastable from the wrapped anyhow::Error so the pipeline
|
||||
// can still recognize Transient via `error.downcast_ref::<PageError>()`.
|
||||
let manga = parse_manga_detail(&html, &r.source_manga_key, self.parse_chapters)
|
||||
.with_context(|| format!("parse manga detail at {}", r.url))?;
|
||||
Ok(manga)
|
||||
}
|
||||
|
||||
async fn fetch_chapter_list(
|
||||
&self,
|
||||
_ctx: &FetchContext<'_>,
|
||||
_manga: &SourceManga,
|
||||
) -> anyhow::Result<Vec<SourceChapterRef>> {
|
||||
anyhow::bail!("fetch_chapter_list not implemented yet")
|
||||
}
|
||||
|
||||
async fn fetch_chapter(
|
||||
&self,
|
||||
_ctx: &FetchContext<'_>,
|
||||
_r: &SourceChapterRef,
|
||||
) -> anyhow::Result<SourceChapter> {
|
||||
anyhow::bail!("fetch_chapter not implemented yet")
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the queue of page numbers `TargetSource::discover` will walk.
|
||||
/// Backfill is oldest-first: pages `last..=1` (within each page the
|
||||
/// walker reverses entries, since the source orders by update_date
|
||||
/// DESC). Incremental is newest-first: pages `1..=last` in natural
|
||||
/// order. If `last_page` is unknown (source surfaces no pagination)
|
||||
/// only page 1 is visited.
|
||||
fn build_page_order(last_page: Option<i32>, backfill: bool) -> VecDeque<i32> {
|
||||
match (last_page, backfill) {
|
||||
(None, _) => VecDeque::from([1]),
|
||||
(Some(last), true) => (1..=last).rev().collect(),
|
||||
(Some(last), false) => (1..=last).collect(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Walker returned by [`TargetSource::discover`]. Pops one source-index
|
||||
/// page per `next_batch` call. Page 1's HTML is cached at construction
|
||||
/// time (the discover call needed it to read `last_page` anyway) so the
|
||||
/// batch covering page 1 doesn't re-fetch.
|
||||
struct TargetSourceWalker {
|
||||
base_url: String,
|
||||
backfill: bool,
|
||||
pages_remaining: VecDeque<i32>,
|
||||
first_page_html: Option<String>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl DiscoverWalk for TargetSourceWalker {
|
||||
async fn next_batch(
|
||||
&mut self,
|
||||
ctx: &FetchContext<'_>,
|
||||
) -> anyhow::Result<Option<Vec<SourceMangaRef>>> {
|
||||
let Some(page_num) = self.pages_remaining.pop_front() else {
|
||||
return Ok(None);
|
||||
};
|
||||
let mut page_refs = if page_num == 1 {
|
||||
// Reuse the cached page-1 HTML from the initial probe. Take
|
||||
// it (rather than clone) so a malformed page-order queue
|
||||
// that re-visits page 1 still falls back to a real fetch.
|
||||
match self.first_page_html.take() {
|
||||
Some(html) => {
|
||||
let doc = scraper::Html::parse_document(&html);
|
||||
parse_manga_list_from(&doc)?
|
||||
}
|
||||
None => {
|
||||
retry_on_transient(
|
||||
|| async {
|
||||
let html = navigate(ctx, self.base_url.as_str()).await?;
|
||||
let doc = scraper::Html::parse_document(&html);
|
||||
parse_manga_list_from(&doc)
|
||||
},
|
||||
PAGE_TRANSIENT_RETRY_ATTEMPTS,
|
||||
PAGE_TRANSIENT_RETRY_DELAY,
|
||||
)
|
||||
.await?
|
||||
}
|
||||
}
|
||||
} else {
|
||||
retry_on_transient(
|
||||
|| async {
|
||||
let url = page_url(&self.base_url, page_num);
|
||||
let html = navigate(ctx, &url).await?;
|
||||
let doc = scraper::Html::parse_document(&html);
|
||||
parse_manga_list_from(&doc)
|
||||
},
|
||||
PAGE_TRANSIENT_RETRY_ATTEMPTS,
|
||||
PAGE_TRANSIENT_RETRY_DELAY,
|
||||
)
|
||||
.await?
|
||||
};
|
||||
if self.backfill {
|
||||
page_refs.reverse();
|
||||
}
|
||||
tracing::info!(page_num, count = page_refs.len(), "page walked");
|
||||
Ok(Some(page_refs))
|
||||
}
|
||||
}
|
||||
|
||||
/// Single point of rate-limited navigation. Every Source request goes
|
||||
/// through here, so the per-host limiter map is the only knob that
|
||||
/// controls per-origin RPS. Also the choke point for transient-page
|
||||
/// detection — every fetched body is screened by
|
||||
/// [`classify_navigate_html`] before being handed to a selector.
|
||||
async fn navigate(ctx: &FetchContext<'_>, url: &str) -> Result<String, PageError> {
|
||||
ctx.rate.wait_for(url).await?;
|
||||
let page = ctx
|
||||
.browser
|
||||
.new_page(url)
|
||||
.await
|
||||
.map_err(|e| PageError::Other(anyhow::Error::from(e)))?;
|
||||
page.wait_for_navigation()
|
||||
.await
|
||||
.map_err(|e| PageError::Other(anyhow::Error::from(e)))?;
|
||||
// Stopgap until we wait on a specific selector per page type —
|
||||
// gives any post-load JS a beat to finish injecting content.
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
let html = page
|
||||
.content()
|
||||
.await
|
||||
.map_err(|e| PageError::Other(anyhow::Error::from(e)))?;
|
||||
page.close().await.ok();
|
||||
classify_navigate_html(html)
|
||||
}
|
||||
|
||||
/// Classify a fetched body. The broken-page template is universal across
|
||||
/// the site — every page type (list, detail, chapter list, reader) gets
|
||||
/// the same `we're sorry, the request file are not found` body when the
|
||||
/// server is hiccuping. Catching it here means individual parsers
|
||||
/// downstream don't have to repeat the check.
|
||||
fn classify_navigate_html(html: String) -> Result<String, PageError> {
|
||||
if is_broken_page_body(&html) {
|
||||
return Err(PageError::transient("broken-page body signature"));
|
||||
}
|
||||
Ok(html)
|
||||
}
|
||||
|
||||
fn parse_last_page(doc: &scraper::Html) -> Option<i32> {
|
||||
// Pagination links carry their page number as text. Take the
|
||||
// numeric maximum so we don't depend on a specific layout (Prev,
|
||||
// Next, ellipses, etc. all get filtered out by .parse).
|
||||
let sel = scraper::Selector::parse("#left_side .pagination a").unwrap();
|
||||
doc.select(&sel)
|
||||
.filter_map(|a| {
|
||||
collapse_whitespace(&a.text().collect::<String>())
|
||||
.parse::<i32>()
|
||||
.ok()
|
||||
})
|
||||
.max()
|
||||
}
|
||||
|
||||
/// Substitutes the first `/N/` path segment with the target page
|
||||
/// number. Source impls that paginate via a different URL shape can
|
||||
/// override this — for the modeled site the segment is always present.
|
||||
fn page_url(template_url: &str, page: i32) -> String {
|
||||
let bytes = template_url.as_bytes();
|
||||
let mut i = 0;
|
||||
while i + 1 < bytes.len() {
|
||||
if bytes[i] == b'/' && bytes[i + 1].is_ascii_digit() {
|
||||
let start = i;
|
||||
let mut j = i + 1;
|
||||
while j < bytes.len() && bytes[j].is_ascii_digit() {
|
||||
j += 1;
|
||||
}
|
||||
if j < bytes.len() && bytes[j] == b'/' {
|
||||
let mut out = String::with_capacity(template_url.len() + 4);
|
||||
out.push_str(&template_url[..start]);
|
||||
out.push_str(&format!("/{page}/"));
|
||||
out.push_str(&template_url[j + 1..]);
|
||||
return out;
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
template_url.to_string()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn parse_manga_list(html: &str) -> Result<Vec<SourceMangaRef>, PageError> {
|
||||
let doc = scraper::Html::parse_document(html);
|
||||
parse_manga_list_from(&doc)
|
||||
}
|
||||
|
||||
/// Parse a manga listing page. `#logo` is present on every well-formed
|
||||
/// listing page on the source; its absence means the response is a
|
||||
/// broken-page placeholder (transient) rather than a genuinely empty
|
||||
/// listing. Empty listings (last-page tail, search with no hits) remain
|
||||
/// `Ok(vec![])`.
|
||||
fn parse_manga_list_from(doc: &scraper::Html) -> Result<Vec<SourceMangaRef>, PageError> {
|
||||
if !has_logo_sentinel(doc) {
|
||||
return Err(PageError::transient("manga list: #logo sentinel missing"));
|
||||
}
|
||||
let sel = scraper::Selector::parse("#left_side .pic_list .updatesli span a").unwrap();
|
||||
Ok(doc
|
||||
.select(&sel)
|
||||
.filter_map(|a| {
|
||||
let url = a.value().attr("href")?.trim().to_string();
|
||||
if url.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let title = collapse_whitespace(&a.text().collect::<String>());
|
||||
if title.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(SourceMangaRef {
|
||||
source_manga_key: derive_key_from_url(&url),
|
||||
title,
|
||||
url,
|
||||
})
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn parse_manga_detail(
|
||||
html: &str,
|
||||
key: &str,
|
||||
include_chapters: bool,
|
||||
) -> Result<SourceManga, PageError> {
|
||||
let doc = scraper::Html::parse_document(html);
|
||||
|
||||
// Sentinel first: a broken-page response will trip this before any
|
||||
// anyhow context is added for missing required fields.
|
||||
if !has_logo_sentinel(&doc) {
|
||||
return Err(PageError::transient("manga detail: #logo sentinel missing"));
|
||||
}
|
||||
|
||||
let title = first_text(&doc, ".w-title h1").context("missing .w-title h1")?;
|
||||
let summary = first_text(&doc, ".manga_summary");
|
||||
let cover_url = first_attr(&doc, ".cover > img:nth-child(1)", "src");
|
||||
|
||||
let authors = links_in_labelled_td(&doc, "Author");
|
||||
let genres = links_in_labelled_td(&doc, "Genre");
|
||||
let raw_status = labelled_td_child_text(&doc, "Status", "span");
|
||||
let status = normalize_status(raw_status.as_deref(), key);
|
||||
|
||||
let alternative_titles = labelled_td_value_after_label(&doc, "Alternative")
|
||||
.map(|s| {
|
||||
s.split([';', ',', '|'])
|
||||
.map(str::trim)
|
||||
.filter(|p| !p.is_empty())
|
||||
.map(String::from)
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let tag_sel = scraper::Selector::parse(".aside-body a.tag").unwrap();
|
||||
let tags: Vec<String> = doc
|
||||
.select(&tag_sel)
|
||||
.map(|a| collapse_whitespace(&a.text().collect::<String>()))
|
||||
.map(|s| strip_tag_count(&s))
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect();
|
||||
|
||||
let chapters = if include_chapters {
|
||||
parse_chapter_list(&doc)
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
let mut manga = SourceManga {
|
||||
source_manga_key: key.to_string(),
|
||||
title,
|
||||
alternative_titles,
|
||||
authors,
|
||||
genres,
|
||||
tags,
|
||||
status,
|
||||
summary,
|
||||
cover_url,
|
||||
chapters,
|
||||
metadata_hash: String::new(),
|
||||
};
|
||||
manga.metadata_hash = compute_metadata_hash(&manga);
|
||||
Ok(manga)
|
||||
}
|
||||
|
||||
/// Source advertises status as "Ongoing" or "Completed"; we normalize
|
||||
/// to the lowercase form the `mangas.status` CHECK constraint accepts.
|
||||
/// Anything else is a parse miss (selector drift, new value, etc.) and
|
||||
/// returns `None` after logging — the manga sync continues regardless.
|
||||
fn normalize_status(raw: Option<&str>, key: &str) -> Option<String> {
|
||||
let trimmed = raw.map(str::trim).filter(|s| !s.is_empty())?;
|
||||
if trimmed.eq_ignore_ascii_case("ongoing") {
|
||||
Some("ongoing".to_string())
|
||||
} else if trimmed.eq_ignore_ascii_case("completed") {
|
||||
Some("completed".to_string())
|
||||
} else {
|
||||
tracing::error!(
|
||||
key,
|
||||
raw_status = trimmed,
|
||||
"unknown manga status (expected 'Ongoing' or 'Completed'); continuing with status=None"
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Strips a trailing digit-only `(NN)` suffix from a tag name, the form
|
||||
/// the source uses to display tag counts. Non-numeric parentheses are
|
||||
/// preserved.
|
||||
fn strip_tag_count(s: &str) -> String {
|
||||
let trimmed = s.trim();
|
||||
if trimmed.ends_with(')') {
|
||||
if let Some(open) = trimmed.rfind('(') {
|
||||
let inside = &trimmed[open + 1..trimmed.len() - 1];
|
||||
if !inside.is_empty() && inside.chars().all(|c| c.is_ascii_digit()) {
|
||||
return trimmed[..open].trim().to_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
trimmed.to_string()
|
||||
}
|
||||
|
||||
fn parse_chapter_list(doc: &scraper::Html) -> Vec<SourceChapterRef> {
|
||||
let sel = scraper::Selector::parse("#chapter_table td h4 a.chico").unwrap();
|
||||
doc.select(&sel)
|
||||
.filter_map(|a| {
|
||||
let url = a.value().attr("href")?.trim().to_string();
|
||||
if url.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let title_text = collapse_whitespace(&a.text().collect::<String>());
|
||||
let number = parse_chapter_number(&title_text).unwrap_or(0);
|
||||
Some(SourceChapterRef {
|
||||
source_chapter_key: derive_chapter_key_from_url(&url),
|
||||
number,
|
||||
title: (!title_text.is_empty()).then_some(title_text),
|
||||
url,
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn parse_chapter_number(text: &str) -> Option<i32> {
|
||||
let mut buf = String::new();
|
||||
for c in text.chars() {
|
||||
if c.is_ascii_digit() {
|
||||
buf.push(c);
|
||||
} else if !buf.is_empty() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
buf.parse().ok()
|
||||
}
|
||||
|
||||
fn derive_key_from_url(url: &str) -> String {
|
||||
url.split('?')
|
||||
.next()
|
||||
.unwrap_or(url)
|
||||
.trim_end_matches('/')
|
||||
.rsplit('/')
|
||||
.find(|s| !s.is_empty())
|
||||
.unwrap_or(url)
|
||||
.to_string()
|
||||
}
|
||||
|
||||
/// Chapter URLs on this source point at the reader's page 1, e.g.
|
||||
/// `.../uu/br_chapter-379272/pg-1/`. The chapter identity is the
|
||||
/// `br_chapter-N` (or `to_chapter-N`) segment — the `pg-\d+` segment
|
||||
/// identifies a page *within* a chapter, so naively taking the last
|
||||
/// path component returns `"pg-1"` for every chapter and collapses
|
||||
/// them all under one source_chapter_key downstream.
|
||||
fn derive_chapter_key_from_url(url: &str) -> String {
|
||||
let trimmed = url.split('?').next().unwrap_or(url).trim_end_matches('/');
|
||||
let without_reader_page = match trimmed.rsplit_once('/') {
|
||||
Some((prefix, last)) if is_reader_page_segment(last) => prefix,
|
||||
_ => trimmed,
|
||||
};
|
||||
without_reader_page
|
||||
.rsplit('/')
|
||||
.find(|s| !s.is_empty())
|
||||
.unwrap_or(url)
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn is_reader_page_segment(s: &str) -> bool {
|
||||
s.len() > 3 && s.starts_with("pg-") && s[3..].bytes().all(|b| b.is_ascii_digit())
|
||||
}
|
||||
|
||||
fn first_text(doc: &scraper::Html, sel: &str) -> Option<String> {
|
||||
let s = scraper::Selector::parse(sel).ok()?;
|
||||
let el = doc.select(&s).next()?;
|
||||
let text = collapse_whitespace(&el.text().collect::<String>());
|
||||
(!text.is_empty()).then_some(text)
|
||||
}
|
||||
|
||||
fn first_attr(doc: &scraper::Html, sel: &str, attr: &str) -> Option<String> {
|
||||
let s = scraper::Selector::parse(sel).ok()?;
|
||||
let el = doc.select(&s).next()?;
|
||||
el.value().attr(attr).map(str::to_string)
|
||||
}
|
||||
|
||||
/// `td` whose contained `label` text begins with `label_prefix` — the
|
||||
/// `scraper`-friendly equivalent of `td:has(label:contains("Foo"))`.
|
||||
fn td_with_label<'a>(
|
||||
doc: &'a scraper::Html,
|
||||
label_prefix: &str,
|
||||
) -> Option<scraper::ElementRef<'a>> {
|
||||
let td_sel = scraper::Selector::parse("td").unwrap();
|
||||
let label_sel = scraper::Selector::parse("label").unwrap();
|
||||
for td in doc.select(&td_sel) {
|
||||
for label in td.select(&label_sel) {
|
||||
let text: String = label.text().collect();
|
||||
if text.trim().starts_with(label_prefix) {
|
||||
return Some(td);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn links_in_labelled_td(doc: &scraper::Html, label_prefix: &str) -> Vec<String> {
|
||||
let Some(td) = td_with_label(doc, label_prefix) else {
|
||||
return Vec::new();
|
||||
};
|
||||
let a_sel = scraper::Selector::parse("a").unwrap();
|
||||
td.select(&a_sel)
|
||||
.map(|a| collapse_whitespace(&a.text().collect::<String>()))
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn labelled_td_child_text(
|
||||
doc: &scraper::Html,
|
||||
label_prefix: &str,
|
||||
child_sel: &str,
|
||||
) -> Option<String> {
|
||||
let td = td_with_label(doc, label_prefix)?;
|
||||
let child = scraper::Selector::parse(child_sel).ok()?;
|
||||
let el = td.select(&child).next()?;
|
||||
let text = collapse_whitespace(&el.text().collect::<String>());
|
||||
(!text.is_empty()).then_some(text)
|
||||
}
|
||||
|
||||
/// Returns the text content of the labelled `td` with the leading
|
||||
/// "Label:" portion stripped — used for "Alternative:" which puts the
|
||||
/// value directly in the cell rather than in a child element.
|
||||
fn labelled_td_value_after_label(
|
||||
doc: &scraper::Html,
|
||||
label_prefix: &str,
|
||||
) -> Option<String> {
|
||||
let td = td_with_label(doc, label_prefix)?;
|
||||
let full: String = td.text().collect();
|
||||
let after = full.split_once(':').map(|(_, r)| r).unwrap_or(&full);
|
||||
let trimmed = collapse_whitespace(after);
|
||||
(!trimmed.is_empty()).then_some(trimmed)
|
||||
}
|
||||
|
||||
fn collapse_whitespace(s: &str) -> String {
|
||||
s.split_whitespace().collect::<Vec<_>>().join(" ")
|
||||
}
|
||||
|
||||
fn compute_metadata_hash(m: &SourceManga) -> String {
|
||||
// Field separators are ASCII unit/record separators so a field
|
||||
// containing a delimiter character can't be mistaken for two
|
||||
// smaller fields.
|
||||
let mut h = Sha256::new();
|
||||
fn feed(h: &mut Sha256, s: &str) {
|
||||
h.update(s.as_bytes());
|
||||
h.update(b"\x1F");
|
||||
}
|
||||
fn feed_list(h: &mut Sha256, xs: &[String]) {
|
||||
for s in xs {
|
||||
feed(h, s);
|
||||
}
|
||||
h.update(b"\x1E");
|
||||
}
|
||||
feed(&mut h, &m.title);
|
||||
feed_list(&mut h, &m.alternative_titles);
|
||||
feed_list(&mut h, &m.authors);
|
||||
feed_list(&mut h, &m.genres);
|
||||
feed_list(&mut h, &m.tags);
|
||||
feed(&mut h, m.status.as_deref().unwrap_or(""));
|
||||
feed(&mut h, m.summary.as_deref().unwrap_or(""));
|
||||
feed(&mut h, m.cover_url.as_deref().unwrap_or(""));
|
||||
format!("{:x}", h.finalize())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
const LISTING_HTML: &str = r#"
|
||||
<html><body>
|
||||
<header><div id="logo">Target</div></header>
|
||||
<div id="left_side">
|
||||
<div class="pic_list">
|
||||
<div class="updatesli">
|
||||
<span><a href="https://target.example/manga/foo">Foo Manga</a></span>
|
||||
</div>
|
||||
<div class="updatesli">
|
||||
<span><a href="https://target.example/manga/bar-baz"> Bar Baz </a></span>
|
||||
</div>
|
||||
<div class="updatesli">
|
||||
<span><a href="">Empty href ignored</a></span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body></html>
|
||||
"#;
|
||||
|
||||
const DETAIL_HTML: &str = r#"
|
||||
<html><body>
|
||||
<header><div id="logo">Target</div></header>
|
||||
<div class="w-title"><h1>Test Manga Title</h1></div>
|
||||
<div class="cover"><img src="/cover.jpg"><img src="/extra-not-cover.jpg"></div>
|
||||
<div class="manga_summary">A summary of the manga.</div>
|
||||
<table>
|
||||
<tr><td><label>Author:</label><a href="/a/1">Author One</a><a href="/a/2">Author Two</a></td></tr>
|
||||
<tr><td><label>Genre(s):</label><a href="/g/1">Action</a><a href="/g/2">Drama</a></td></tr>
|
||||
<tr><td><label>Status:</label><span>Ongoing</span></td></tr>
|
||||
<tr><td><label>Alternative:</label> Alt Title 1; Alt Title 2 </td></tr>
|
||||
</table>
|
||||
<aside><div class="aside-body">
|
||||
<a class="tag">Fantasy (21)</a>
|
||||
<a class="tag">Romance</a>
|
||||
<a class="tag"> Action (5)</a>
|
||||
<a class="not-a-tag">should-be-ignored</a>
|
||||
</div></aside>
|
||||
<table id="chapter_table">
|
||||
<tr><td><h4><a class="chico" href="/manga/foo/chapter/1">Ch.1</a></h4></td></tr>
|
||||
<tr><td><h4><a class="chico" href="/manga/foo/chapter/2">Ch.2 - The Beginning</a></h4></td></tr>
|
||||
<tr><td><h4><a class="chico" href="/manga/foo/chapter/3">Chapter 3: Onward</a></h4></td></tr>
|
||||
</table>
|
||||
</body></html>
|
||||
"#;
|
||||
|
||||
#[test]
|
||||
fn parse_manga_list_extracts_title_url_and_derives_key() {
|
||||
let refs = parse_manga_list(LISTING_HTML).expect("parse");
|
||||
assert_eq!(refs.len(), 2, "third entry has empty href and is skipped");
|
||||
assert_eq!(refs[0].title, "Foo Manga");
|
||||
assert_eq!(refs[0].url, "https://target.example/manga/foo");
|
||||
assert_eq!(refs[0].source_manga_key, "foo");
|
||||
assert_eq!(refs[1].title, "Bar Baz");
|
||||
assert_eq!(refs[1].source_manga_key, "bar-baz");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_manga_list_returns_transient_when_logo_missing() {
|
||||
// Broken-page response: no #logo, no listing. Empty Vec would
|
||||
// hide this as "page has no mangas"; Transient is the signal
|
||||
// upstream code retries on.
|
||||
let html = r#"<html><body>\
|
||||
<p>we're sorry, the request file are not found.</p>\
|
||||
</body></html>"#;
|
||||
let err = parse_manga_list(html).expect_err("expected Transient");
|
||||
assert!(err.is_transient(), "got non-transient: {err}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_manga_list_ok_empty_when_logo_present_but_no_items() {
|
||||
// Last page of pagination, "no results" search, etc. Legitimately
|
||||
// empty must stay distinguishable from "page is broken".
|
||||
let html = r#"<html><body>\
|
||||
<header><div id="logo">Target</div></header>\
|
||||
<div id="left_side"><div class="pic_list"></div></div>\
|
||||
</body></html>"#;
|
||||
let refs = parse_manga_list(html).expect("logo present == not transient");
|
||||
assert!(refs.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_manga_detail_pulls_all_fields() {
|
||||
let m = parse_manga_detail(DETAIL_HTML, "test-key", true).expect("parse");
|
||||
assert_eq!(m.source_manga_key, "test-key");
|
||||
assert_eq!(m.title, "Test Manga Title");
|
||||
assert_eq!(m.summary.as_deref(), Some("A summary of the manga."));
|
||||
assert_eq!(m.authors, vec!["Author One", "Author Two"]);
|
||||
assert_eq!(m.genres, vec!["Action", "Drama"]);
|
||||
assert_eq!(m.status.as_deref(), Some("ongoing"));
|
||||
assert_eq!(m.alternative_titles, vec!["Alt Title 1", "Alt Title 2"]);
|
||||
// Counts in parentheses are stripped — "Fantasy (21)" → "Fantasy".
|
||||
assert_eq!(m.tags, vec!["Fantasy", "Romance", "Action"]);
|
||||
assert_eq!(m.cover_url.as_deref(), Some("/cover.jpg"));
|
||||
assert!(!m.metadata_hash.is_empty());
|
||||
|
||||
assert_eq!(m.chapters.len(), 3);
|
||||
assert_eq!(m.chapters[0].number, 1);
|
||||
assert_eq!(m.chapters[0].title.as_deref(), Some("Ch.1"));
|
||||
assert_eq!(m.chapters[0].url, "/manga/foo/chapter/1");
|
||||
assert_eq!(m.chapters[0].source_chapter_key, "1");
|
||||
assert_eq!(m.chapters[1].number, 2);
|
||||
assert_eq!(m.chapters[1].title.as_deref(), Some("Ch.2 - The Beginning"));
|
||||
assert_eq!(m.chapters[2].number, 3);
|
||||
assert_eq!(m.chapters[2].title.as_deref(), Some("Chapter 3: Onward"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn status_normalized_case_insensitively() {
|
||||
assert_eq!(normalize_status(Some("Ongoing"), "k").as_deref(), Some("ongoing"));
|
||||
assert_eq!(normalize_status(Some("ONGOING"), "k").as_deref(), Some("ongoing"));
|
||||
assert_eq!(normalize_status(Some(" completed "), "k").as_deref(), Some("completed"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_status_logs_and_returns_none() {
|
||||
// Logging is observable in test output via tracing-test, but
|
||||
// here we just assert the contract: unknown becomes None
|
||||
// (and the manga is therefore still synced by the caller).
|
||||
assert!(normalize_status(Some("Hiatus"), "k").is_none());
|
||||
assert!(normalize_status(Some(""), "k").is_none());
|
||||
assert!(normalize_status(None, "k").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_tag_count_drops_trailing_digit_parens_only() {
|
||||
assert_eq!(strip_tag_count("Fantasy (21)"), "Fantasy");
|
||||
assert_eq!(strip_tag_count(" Action (5) "), "Action");
|
||||
assert_eq!(strip_tag_count("Romance"), "Romance");
|
||||
// Non-numeric parens stay put.
|
||||
assert_eq!(strip_tag_count("Slice of Life (sub)"), "Slice of Life (sub)");
|
||||
// Only the trailing paren is considered.
|
||||
assert_eq!(strip_tag_count("Tag (a) (12)"), "Tag (a)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_chapter_list_keeps_all_chapters_with_unique_keys() {
|
||||
// Real listing fixture from the target site. 15 rows: chapters
|
||||
// with various Ch.N markup, one hiatus row, three "notice." rows,
|
||||
// and duplicates of Ch.1 and Ch.52 from different uploaders.
|
||||
// Every row must survive parsing and every chapter must have a
|
||||
// distinct source_chapter_key — chapter URLs all end in `/pg-1/`
|
||||
// (the reader's page-1 entry point), and a naive
|
||||
// last-segment-of-URL derivation returns "pg-1" for every row,
|
||||
// collapsing the whole list into one downstream chapter row.
|
||||
let html = include_str!(
|
||||
"../../../tests/fixtures/target/chapter_list_uu.html"
|
||||
);
|
||||
let doc = scraper::Html::parse_document(html);
|
||||
let chapters = parse_chapter_list(&doc);
|
||||
|
||||
assert_eq!(chapters.len(), 15, "every row kept (notices/hiatus included)");
|
||||
|
||||
let mut keys: Vec<&str> =
|
||||
chapters.iter().map(|c| c.source_chapter_key.as_str()).collect();
|
||||
keys.sort();
|
||||
let dupe = keys.windows(2).find(|w| w[0] == w[1]).map(|w| w[0]);
|
||||
assert!(dupe.is_none(), "duplicate chapter key: {dupe:?}");
|
||||
for c in &chapters {
|
||||
assert_ne!(
|
||||
c.source_chapter_key, "pg-1",
|
||||
"key must not be the reader-page segment: {:?}", c
|
||||
);
|
||||
}
|
||||
|
||||
// Latest chapter is first (source orders newest → oldest).
|
||||
assert_eq!(chapters[0].number, 67);
|
||||
assert_eq!(chapters[0].title.as_deref(), Some("Ch.67 : Official"));
|
||||
assert_eq!(chapters[0].source_chapter_key, "br_chapter-379272");
|
||||
|
||||
// Duplicate-number chapters (different uploaders) survive as
|
||||
// two rows. The (manga_id, number) UNIQUE collapse is a
|
||||
// downstream schema concern handled separately.
|
||||
assert_eq!(
|
||||
chapters.iter().filter(|c| c.number == 52).count(),
|
||||
2,
|
||||
"two Ch.52 uploads must both survive parsing"
|
||||
);
|
||||
assert_eq!(
|
||||
chapters.iter().filter(|c| c.number == 1).count(),
|
||||
2,
|
||||
"Ch.1 Official and Ch.1 Team Hazama are both kept"
|
||||
);
|
||||
|
||||
// Notices / hiatus rows have no leading digit so they parse to
|
||||
// number=0. They are not filtered out.
|
||||
let zero = chapters.iter().filter(|c| c.number == 0).count();
|
||||
assert!(zero >= 4, "hiatus + 3 notices kept; got {zero}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_chapter_number_grabs_first_integer_run() {
|
||||
assert_eq!(parse_chapter_number("Ch.1"), Some(1));
|
||||
assert_eq!(parse_chapter_number("Chapter 12"), Some(12));
|
||||
assert_eq!(parse_chapter_number("Ch.2 - The Beginning"), Some(2));
|
||||
// Decimal chapters keep the integer part (i32 storage).
|
||||
assert_eq!(parse_chapter_number("Ch.12.5"), Some(12));
|
||||
assert_eq!(parse_chapter_number("Special"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_last_page_picks_highest_pagination_link() {
|
||||
let html = r#"
|
||||
<div id="left_side"><div class="pagination">
|
||||
<a href="/list/1/">Prev</a>
|
||||
<ol>
|
||||
<li><a href="/list/1/">1</a></li>
|
||||
<li><a href="/list/2/">2</a></li>
|
||||
<li><a href="/list/47/">47</a></li>
|
||||
<li><a href="/list/2/">Next</a></li>
|
||||
</ol>
|
||||
</div></div>
|
||||
"#;
|
||||
let doc = scraper::Html::parse_document(html);
|
||||
assert_eq!(parse_last_page(&doc), Some(47));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_last_page_none_when_no_pagination() {
|
||||
let doc = scraper::Html::parse_document("<html></html>");
|
||||
assert!(parse_last_page(&doc).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn page_url_substitutes_numeric_path_segment() {
|
||||
assert_eq!(
|
||||
page_url("https://site.example/list/1/?f=1&o=1&sortby=update_date&e=", 5),
|
||||
"https://site.example/list/5/?f=1&o=1&sortby=update_date&e="
|
||||
);
|
||||
// No numeric segment → URL returned unchanged.
|
||||
assert_eq!(
|
||||
page_url("https://site.example/list/?f=1", 5),
|
||||
"https://site.example/list/?f=1"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn derive_key_strips_trailing_slash_and_query() {
|
||||
assert_eq!(derive_key_from_url("https://x.example/manga/foo/"), "foo");
|
||||
assert_eq!(derive_key_from_url("https://x.example/manga/foo?p=1"), "foo");
|
||||
assert_eq!(derive_key_from_url("/manga/bar"), "bar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn derive_chapter_key_strips_trailing_reader_page_segment() {
|
||||
// Listing links go to page 1 of the reader; strip /pg-\d+/.
|
||||
assert_eq!(
|
||||
derive_chapter_key_from_url(".../uu/br_chapter-379272/pg-1/"),
|
||||
"br_chapter-379272"
|
||||
);
|
||||
assert_eq!(
|
||||
derive_chapter_key_from_url(".../uu/to_chapter-13/pg-1/"),
|
||||
"to_chapter-13"
|
||||
);
|
||||
// Defensive: deep-link to a non-first page should still resolve
|
||||
// to the same chapter identity.
|
||||
assert_eq!(
|
||||
derive_chapter_key_from_url(".../uu/br_chapter-379272/pg-25/"),
|
||||
"br_chapter-379272"
|
||||
);
|
||||
// No reader-page suffix → behaves like derive_key_from_url.
|
||||
assert_eq!(
|
||||
derive_chapter_key_from_url(".../uu/br_chapter-379272/"),
|
||||
"br_chapter-379272"
|
||||
);
|
||||
// Query strings are stripped.
|
||||
assert_eq!(
|
||||
derive_chapter_key_from_url(".../uu/br_chapter-379272/pg-1/?ref=x"),
|
||||
"br_chapter-379272"
|
||||
);
|
||||
// `pg-foo` is not a valid reader-page segment; treated as identity.
|
||||
assert_eq!(
|
||||
derive_chapter_key_from_url(".../uu/something/pg-foo/"),
|
||||
"pg-foo"
|
||||
);
|
||||
// Bare `pg-` (no digits) likewise not stripped.
|
||||
assert_eq!(
|
||||
derive_chapter_key_from_url(".../uu/something/pg-/"),
|
||||
"pg-"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn metadata_hash_is_stable_and_field_sensitive() {
|
||||
let base = parse_manga_detail(DETAIL_HTML, "k", true).unwrap();
|
||||
let again = parse_manga_detail(DETAIL_HTML, "k", true).unwrap();
|
||||
assert_eq!(base.metadata_hash, again.metadata_hash);
|
||||
|
||||
// Same fields except status flipped — hash must change.
|
||||
let altered_html = DETAIL_HTML.replace("Ongoing", "Completed");
|
||||
let altered = parse_manga_detail(&altered_html, "k", true).unwrap();
|
||||
assert_ne!(base.metadata_hash, altered.metadata_hash);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_optional_fields_parse_to_none() {
|
||||
let html = r#"<html><body>\
|
||||
<header><div id="logo">Target</div></header>\
|
||||
<div class="w-title"><h1>Minimal</h1></div></body></html>"#;
|
||||
let m = parse_manga_detail(html, "min", true).unwrap();
|
||||
assert_eq!(m.title, "Minimal");
|
||||
assert!(m.summary.is_none());
|
||||
assert!(m.status.is_none());
|
||||
assert!(m.authors.is_empty());
|
||||
assert!(m.genres.is_empty());
|
||||
assert!(m.tags.is_empty());
|
||||
assert!(m.alternative_titles.is_empty());
|
||||
assert!(m.chapters.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_manga_detail_skips_chapters_when_disabled() {
|
||||
// Same fixture that yields 3 chapters above; with include_chapters=false
|
||||
// the chapter table is ignored and the rest of the metadata still parses.
|
||||
let m = parse_manga_detail(DETAIL_HTML, "k", false).unwrap();
|
||||
assert!(m.chapters.is_empty(), "chapters should be empty when disabled");
|
||||
assert_eq!(m.title, "Test Manga Title", "other fields still parse");
|
||||
assert_eq!(m.authors, vec!["Author One", "Author Two"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_manga_detail_errors_on_missing_title() {
|
||||
// Logo present (page is alive) — failure here is a real parse
|
||||
// miss (Other), not Transient.
|
||||
let html = r#"<html><body>\
|
||||
<header><div id="logo">Target</div></header>\
|
||||
<p>nothing</p></body></html>"#;
|
||||
let err = parse_manga_detail(html, "x", true).unwrap_err();
|
||||
assert!(!err.is_transient(), "expected Other, got Transient: {err}");
|
||||
assert!(err.to_string().contains("missing .w-title h1"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_navigate_html_passes_normal_body_through() {
|
||||
let body = "<html><body><header><div id='logo'>Target</div></header>\
|
||||
<p>content</p></body></html>"
|
||||
.to_string();
|
||||
let out = classify_navigate_html(body.clone()).expect("ok");
|
||||
assert_eq!(out, body);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_navigate_html_returns_transient_for_broken_template() {
|
||||
let body = "<html><head></head><body>\
|
||||
<p>we're sorry, the request file are not found.</p>\
|
||||
</body></html>"
|
||||
.to_string();
|
||||
let err = classify_navigate_html(body).expect_err("expected Transient");
|
||||
assert!(err.is_transient(), "got non-transient: {err}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_manga_detail_returns_transient_when_logo_missing() {
|
||||
// Broken-page response on a detail URL — must be reported as
|
||||
// Transient so the job is retried rather than logging "missing
|
||||
// .w-title h1" against a permanently-skipped manga.
|
||||
let html = "<html><body>\
|
||||
<p>we're sorry, the request file are not found.</p>\
|
||||
</body></html>";
|
||||
let err = parse_manga_detail(html, "x", true).expect_err("expected Transient");
|
||||
assert!(err.is_transient(), "got non-transient: {err}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_page_order_backfill_is_last_to_one() {
|
||||
// Backfill walks pages oldest-first: queue is [last, last-1, ..., 1]
|
||||
// so popping from the front yields the last page first.
|
||||
let order = build_page_order(Some(3), true);
|
||||
assert_eq!(Vec::from(order), vec![3, 2, 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_page_order_incremental_is_one_to_last() {
|
||||
// Incremental walks newest-first in natural source order.
|
||||
let order = build_page_order(Some(3), false);
|
||||
assert_eq!(Vec::from(order), vec![1, 2, 3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_page_order_falls_back_to_page_one_only_without_pagination() {
|
||||
let backfill = build_page_order(None, true);
|
||||
assert_eq!(Vec::from(backfill), vec![1]);
|
||||
let incremental = build_page_order(None, false);
|
||||
assert_eq!(Vec::from(incremental), vec![1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_page_order_single_page_index_yields_one_entry() {
|
||||
// Sources with exactly one page should not yield duplicates
|
||||
// regardless of mode.
|
||||
let backfill = build_page_order(Some(1), true);
|
||||
assert_eq!(Vec::from(backfill), vec![1]);
|
||||
let incremental = build_page_order(Some(1), false);
|
||||
assert_eq!(Vec::from(incremental), vec![1]);
|
||||
}
|
||||
}
|
||||
50
backend/src/domain/collection.rs
Normal file
50
backend/src/domain/collection.rs
Normal file
@@ -0,0 +1,50 @@
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::FromRow;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::patch::Patch;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||
pub struct Collection {
|
||||
pub id: Uuid,
|
||||
pub user_id: Uuid,
|
||||
pub name: String,
|
||||
pub description: Option<String>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// Shape returned by `GET /me/collections`. Enriched with the manga
|
||||
/// count and up to three sample cover paths so a collection card can
|
||||
/// render without extra round-trips.
|
||||
#[derive(Debug, Clone, Serialize, FromRow)]
|
||||
pub struct CollectionSummary {
|
||||
pub id: Uuid,
|
||||
pub user_id: Uuid,
|
||||
pub name: String,
|
||||
pub description: Option<String>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
pub manga_count: i64,
|
||||
/// Cover image keys of up to three sample mangas (newest-added
|
||||
/// first). `Vec<String>` rather than `Option<...>` so an empty
|
||||
/// collection renders as `[]` rather than `null`.
|
||||
pub sample_covers: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct NewCollection {
|
||||
pub name: String,
|
||||
#[serde(default)]
|
||||
pub description: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Default)]
|
||||
pub struct CollectionPatch {
|
||||
pub name: Option<String>,
|
||||
/// Three-state: missing key leaves description alone; explicit
|
||||
/// `null` clears it; a string sets it. See `Patch`.
|
||||
#[serde(default)]
|
||||
pub description: Patch<String>,
|
||||
}
|
||||
@@ -5,6 +5,7 @@ use uuid::Uuid;
|
||||
|
||||
use super::author::AuthorRef;
|
||||
use super::genre::GenreRef;
|
||||
use super::patch::Patch;
|
||||
use super::tag::TagRef;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||
@@ -73,82 +74,6 @@ pub struct MangaPatch {
|
||||
pub genre_ids: Option<Vec<Uuid>>,
|
||||
}
|
||||
|
||||
/// Three-state container for nullable PATCH fields.
|
||||
///
|
||||
/// `serde`'s default behaviour collapses both "field missing" and
|
||||
/// "field is `null`" to `Option::None`, which means an `Option<T>`
|
||||
/// patch field can't distinguish "leave alone" from "set to NULL".
|
||||
/// `Patch<T>` carries that distinction by deserializing JSON `null`
|
||||
/// into `Clear` and any value into `Set`; with `#[serde(default)]` on
|
||||
/// the field, a missing key falls through to `Unchanged`.
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq)]
|
||||
pub enum Patch<T> {
|
||||
/// Field absent from the request — leave the column untouched.
|
||||
#[default]
|
||||
Unchanged,
|
||||
/// Field present and explicitly `null` — set the column to NULL.
|
||||
Clear,
|
||||
/// Field present with a value — set the column to that value.
|
||||
Set(T),
|
||||
}
|
||||
|
||||
impl<T> Patch<T> {
|
||||
/// Whether the request indicated this field should be written
|
||||
/// (either to a new value or to NULL).
|
||||
pub fn is_provided(&self) -> bool {
|
||||
!matches!(self, Patch::Unchanged)
|
||||
}
|
||||
|
||||
/// The value to bind when writing, or `None` for `Unchanged`/`Clear`.
|
||||
pub fn set_value(&self) -> Option<&T> {
|
||||
match self {
|
||||
Patch::Set(v) => Some(v),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de, T> serde::Deserialize<'de> for Patch<T>
|
||||
where
|
||||
T: serde::Deserialize<'de>,
|
||||
{
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
Option::<T>::deserialize(deserializer).map(|opt| match opt {
|
||||
Some(v) => Patch::Set(v),
|
||||
None => Patch::Clear,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct Holder {
|
||||
#[serde(default)]
|
||||
desc: Patch<String>,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_key_is_unchanged() {
|
||||
let h: Holder = serde_json::from_value(json!({})).unwrap();
|
||||
assert_eq!(h.desc, Patch::Unchanged);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_null_is_clear() {
|
||||
let h: Holder = serde_json::from_value(json!({ "desc": null })).unwrap();
|
||||
assert_eq!(h.desc, Patch::Clear);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn value_is_set() {
|
||||
let h: Holder = serde_json::from_value(json!({ "desc": "x" })).unwrap();
|
||||
assert_eq!(h.desc, Patch::Set("x".into()));
|
||||
}
|
||||
}
|
||||
// `Patch<T>` lives in `super::patch` so other resources (collections,
|
||||
// future PATCH endpoints) can reuse the same three-state semantics
|
||||
// without re-importing through `manga::`.
|
||||
|
||||
@@ -2,11 +2,15 @@ pub mod api_token;
|
||||
pub mod author;
|
||||
pub mod bookmark;
|
||||
pub mod chapter;
|
||||
pub mod collection;
|
||||
pub mod genre;
|
||||
pub mod manga;
|
||||
pub mod page;
|
||||
pub mod patch;
|
||||
pub mod read_progress;
|
||||
pub mod session;
|
||||
pub mod tag;
|
||||
pub mod upload_entry;
|
||||
pub mod user;
|
||||
pub mod user_preferences;
|
||||
|
||||
@@ -14,10 +18,14 @@ pub use api_token::ApiToken;
|
||||
pub use author::{Author, AuthorRef, AuthorWithCount};
|
||||
pub use bookmark::{Bookmark, BookmarkSummary};
|
||||
pub use chapter::Chapter;
|
||||
pub use collection::{Collection, CollectionSummary};
|
||||
pub use genre::{Genre, GenreRef};
|
||||
pub use manga::{Manga, MangaCard, MangaDetail};
|
||||
pub use page::Page;
|
||||
pub use patch::Patch;
|
||||
pub use read_progress::{ReadProgress, ReadProgressForManga, ReadProgressSummary};
|
||||
pub use session::Session;
|
||||
pub use tag::{Tag, TagRef};
|
||||
pub use upload_entry::UploadEntry;
|
||||
pub use user::User;
|
||||
pub use user_preferences::UserPreferences;
|
||||
|
||||
81
backend/src/domain/patch.rs
Normal file
81
backend/src/domain/patch.rs
Normal file
@@ -0,0 +1,81 @@
|
||||
//! Three-state container for PATCH fields.
|
||||
//!
|
||||
//! `serde`'s default behaviour collapses both "field missing" and
|
||||
//! "field is `null`" to `Option::None`, which means an `Option<T>`
|
||||
//! patch field can't distinguish "leave alone" from "set to NULL".
|
||||
//! `Patch<T>` carries that distinction by deserializing JSON `null`
|
||||
//! into `Clear` and any value into `Set`; with `#[serde(default)]`
|
||||
//! on the field, a missing key falls through to `Unchanged`.
|
||||
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq)]
|
||||
pub enum Patch<T> {
|
||||
/// Field absent from the request — leave the column untouched.
|
||||
#[default]
|
||||
Unchanged,
|
||||
/// Field present and explicitly `null` — set the column to NULL.
|
||||
Clear,
|
||||
/// Field present with a value — set the column to that value.
|
||||
Set(T),
|
||||
}
|
||||
|
||||
impl<T> Patch<T> {
|
||||
/// Whether the request indicated this field should be written
|
||||
/// (either to a new value or to NULL).
|
||||
pub fn is_provided(&self) -> bool {
|
||||
!matches!(self, Patch::Unchanged)
|
||||
}
|
||||
|
||||
/// The value to bind when writing, or `None` for `Unchanged`/`Clear`.
|
||||
pub fn set_value(&self) -> Option<&T> {
|
||||
match self {
|
||||
Patch::Set(v) => Some(v),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de, T> serde::Deserialize<'de> for Patch<T>
|
||||
where
|
||||
T: serde::Deserialize<'de>,
|
||||
{
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
Option::<T>::deserialize(deserializer).map(|opt| match opt {
|
||||
Some(v) => Patch::Set(v),
|
||||
None => Patch::Clear,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde::Deserialize;
|
||||
use serde_json::json;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct Holder {
|
||||
#[serde(default)]
|
||||
desc: Patch<String>,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_key_is_unchanged() {
|
||||
let h: Holder = serde_json::from_value(json!({})).unwrap();
|
||||
assert_eq!(h.desc, Patch::Unchanged);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_null_is_clear() {
|
||||
let h: Holder = serde_json::from_value(json!({ "desc": null })).unwrap();
|
||||
assert_eq!(h.desc, Patch::Clear);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn value_is_set() {
|
||||
let h: Holder = serde_json::from_value(json!({ "desc": "x" })).unwrap();
|
||||
assert_eq!(h.desc, Patch::Set("x".into()));
|
||||
}
|
||||
}
|
||||
50
backend/src/domain/read_progress.rs
Normal file
50
backend/src/domain/read_progress.rs
Normal file
@@ -0,0 +1,50 @@
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::FromRow;
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||
pub struct ReadProgress {
|
||||
pub user_id: Uuid,
|
||||
pub manga_id: Uuid,
|
||||
pub chapter_id: Option<Uuid>,
|
||||
pub page: i32,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// Enriched row for the history view — joins in the manga's title and
|
||||
/// cover plus the chapter number (when the chapter still exists) so a
|
||||
/// card can render without extra round-trips.
|
||||
#[derive(Debug, Clone, Serialize, FromRow)]
|
||||
pub struct ReadProgressSummary {
|
||||
pub manga_id: Uuid,
|
||||
pub manga_title: String,
|
||||
pub manga_cover_image_path: Option<String>,
|
||||
pub chapter_id: Option<Uuid>,
|
||||
/// `None` when the chapter was deleted after this row was written
|
||||
/// (FK ON DELETE SET NULL on `chapter_id`).
|
||||
pub chapter_number: Option<i32>,
|
||||
pub page: i32,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// Returned by `GET /me/read-progress/:manga_id`. Same shape as
|
||||
/// `ReadProgressSummary` minus the manga title/cover (the caller
|
||||
/// already knows them — they're on the manga detail page). Crucially
|
||||
/// includes `chapter_number` so the "Continue reading" CTA can render
|
||||
/// without resolving the chapter id against a paged chapters list.
|
||||
#[derive(Debug, Clone, Serialize, FromRow)]
|
||||
pub struct ReadProgressForManga {
|
||||
pub manga_id: Uuid,
|
||||
pub chapter_id: Option<Uuid>,
|
||||
pub chapter_number: Option<i32>,
|
||||
pub page: i32,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct UpsertReadProgress {
|
||||
pub manga_id: Uuid,
|
||||
pub chapter_id: Option<Uuid>,
|
||||
pub page: Option<i32>,
|
||||
}
|
||||
40
backend/src/domain/upload_entry.rs
Normal file
40
backend/src/domain/upload_entry.rs
Normal file
@@ -0,0 +1,40 @@
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::Serialize;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::chapter::Chapter;
|
||||
use super::manga::Manga;
|
||||
|
||||
/// Tagged union used by `GET /me/uploads` to interleave manga + chapter
|
||||
/// rows chronologically. Serialised as `{ "kind": "...", ... }` so a
|
||||
/// TypeScript discriminated union can pattern-match on `kind`.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(tag = "kind", rename_all = "snake_case")]
|
||||
pub enum UploadEntry {
|
||||
Manga {
|
||||
manga: Manga,
|
||||
/// Mirrored from `manga.created_at` for ordering convenience;
|
||||
/// the frontend reads this to display the timestamp in a
|
||||
/// kind-agnostic column.
|
||||
created_at: DateTime<Utc>,
|
||||
},
|
||||
Chapter {
|
||||
manga_id: Uuid,
|
||||
manga_title: String,
|
||||
manga_cover_image_path: Option<String>,
|
||||
chapter: Chapter,
|
||||
created_at: DateTime<Utc>,
|
||||
},
|
||||
}
|
||||
|
||||
impl UploadEntry {
|
||||
/// Timestamp used for chronological ordering. The repo sorts on
|
||||
/// the underlying column server-side; this is here for callers
|
||||
/// that need to merge or page in Rust.
|
||||
pub fn created_at(&self) -> DateTime<Utc> {
|
||||
match self {
|
||||
UploadEntry::Manga { created_at, .. } => *created_at,
|
||||
UploadEntry::Chapter { created_at, .. } => *created_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ pub mod api;
|
||||
pub mod app;
|
||||
pub mod auth;
|
||||
pub mod config;
|
||||
pub mod crawler;
|
||||
pub mod domain;
|
||||
pub mod error;
|
||||
pub mod repo;
|
||||
|
||||
@@ -12,10 +12,21 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
let config = mangalord::config::Config::from_env()?;
|
||||
let addr: SocketAddr = config.bind_address.parse()?;
|
||||
let app = mangalord::app::build(config).await?;
|
||||
let mangalord::app::AppHandle { router, daemon } = mangalord::app::build(config).await?;
|
||||
|
||||
tracing::info!(%addr, "mangalord listening");
|
||||
let listener = tokio::net::TcpListener::bind(addr).await?;
|
||||
axum::serve(listener, app).await?;
|
||||
axum::serve(listener, router)
|
||||
.with_graceful_shutdown(async {
|
||||
let _ = tokio::signal::ctrl_c().await;
|
||||
tracing::info!("ctrl-c received; shutting down");
|
||||
})
|
||||
.await?;
|
||||
|
||||
// Drain background tasks (crawler daemon) before exiting so Chromium
|
||||
// gets a clean shutdown rather than relying on kill-on-drop.
|
||||
if let Some(d) = daemon {
|
||||
d.shutdown().await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -46,7 +46,7 @@ pub async fn list_for_user(
|
||||
user_id: Uuid,
|
||||
limit: i64,
|
||||
offset: i64,
|
||||
) -> AppResult<Vec<BookmarkSummary>> {
|
||||
) -> AppResult<(Vec<BookmarkSummary>, i64)> {
|
||||
let rows = sqlx::query_as::<_, BookmarkSummary>(
|
||||
r#"
|
||||
SELECT
|
||||
@@ -72,7 +72,12 @@ pub async fn list_for_user(
|
||||
.bind(offset)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
Ok(rows)
|
||||
let (total,): (i64,) =
|
||||
sqlx::query_as("SELECT count(*) FROM bookmarks WHERE user_id = $1")
|
||||
.bind(user_id)
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
Ok((rows, total))
|
||||
}
|
||||
|
||||
pub async fn find_owner(pool: &PgPool, id: Uuid) -> AppResult<Option<Uuid>> {
|
||||
|
||||
@@ -12,12 +12,15 @@ pub async fn list_for_manga(
|
||||
limit: i64,
|
||||
offset: i64,
|
||||
) -> AppResult<Vec<Chapter>> {
|
||||
// Secondary sort by created_at gives duplicate-numbered chapters
|
||||
// (multiple uploaders/translations of the same number) a stable
|
||||
// order in lists and prev/next reader navigation.
|
||||
let rows = sqlx::query_as::<_, Chapter>(
|
||||
r#"
|
||||
SELECT id, manga_id, number, title, page_count, created_at
|
||||
FROM chapters
|
||||
WHERE manga_id = $1
|
||||
ORDER BY number ASC
|
||||
ORDER BY number ASC, created_at ASC
|
||||
LIMIT $2 OFFSET $3
|
||||
"#,
|
||||
)
|
||||
@@ -29,52 +32,65 @@ pub async fn list_for_manga(
|
||||
Ok(rows)
|
||||
}
|
||||
|
||||
pub async fn find_by_manga_and_number(
|
||||
/// Look up a chapter by its UUID, scoped to its manga so a UUID guessed
|
||||
/// from a different manga's URL doesn't accidentally resolve.
|
||||
pub async fn find_by_id_in_manga(
|
||||
pool: &PgPool,
|
||||
manga_id: Uuid,
|
||||
number: i32,
|
||||
chapter_id: Uuid,
|
||||
) -> AppResult<Option<Chapter>> {
|
||||
let row = sqlx::query_as::<_, Chapter>(
|
||||
r#"
|
||||
SELECT id, manga_id, number, title, page_count, created_at
|
||||
FROM chapters
|
||||
WHERE manga_id = $1 AND number = $2
|
||||
WHERE manga_id = $1 AND id = $2
|
||||
"#,
|
||||
)
|
||||
.bind(manga_id)
|
||||
.bind(number)
|
||||
.bind(chapter_id)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
Ok(row)
|
||||
}
|
||||
|
||||
/// Accepts any `PgExecutor` so the upload handler can run this inside a
|
||||
/// transaction with the per-page inserts. Returns `AppError::Conflict`
|
||||
/// on the (manga_id, number) unique violation so handlers can surface a
|
||||
/// clean 409.
|
||||
/// transaction with the per-page inserts.
|
||||
///
|
||||
/// `uploaded_by` records who uploaded the chapter and feeds the
|
||||
/// per-user upload history. `None` means "historical / API token with
|
||||
/// no associated user" — kept nullable to support that case.
|
||||
///
|
||||
/// Chapter identity is the row UUID; the same (manga_id, number)
|
||||
/// combination can repeat (multiple translations, re-uploads). The
|
||||
/// `is_unique_violation` branch below is a defensive holdover from
|
||||
/// 0001's (manga_id, number) UNIQUE — it can no longer fire under
|
||||
/// normal operation, but we surface a clean 409 if a future migration
|
||||
/// re-adds any chapter uniqueness.
|
||||
pub async fn create<'e, E: PgExecutor<'e>>(
|
||||
executor: E,
|
||||
manga_id: Uuid,
|
||||
number: i32,
|
||||
title: Option<&str>,
|
||||
uploaded_by: Option<Uuid>,
|
||||
) -> AppResult<Chapter> {
|
||||
let result = sqlx::query_as::<_, Chapter>(
|
||||
r#"
|
||||
INSERT INTO chapters (manga_id, number, title)
|
||||
VALUES ($1, $2, $3)
|
||||
INSERT INTO chapters (manga_id, number, title, uploaded_by)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
RETURNING id, manga_id, number, title, page_count, created_at
|
||||
"#,
|
||||
)
|
||||
.bind(manga_id)
|
||||
.bind(number)
|
||||
.bind(title)
|
||||
.bind(uploaded_by)
|
||||
.fetch_one(executor)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(c) => Ok(c),
|
||||
Err(e) if is_unique_violation(&e) => Err(AppError::Conflict(format!(
|
||||
"chapter {number} already exists for this manga"
|
||||
"chapter {number} conflicts with an existing chapter for this manga"
|
||||
))),
|
||||
Err(e) => Err(AppError::Database(e)),
|
||||
}
|
||||
|
||||
280
backend/src/repo/collection.rs
Normal file
280
backend/src/repo/collection.rs
Normal file
@@ -0,0 +1,280 @@
|
||||
//! Collection persistence.
|
||||
//!
|
||||
//! Same plain-function pattern as `repo::bookmark`. Ownership is
|
||||
//! tracked via `collections.user_id`; handlers call `find_owner`
|
||||
//! before mutations to keep 403/404 honest.
|
||||
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::domain::collection::{Collection, CollectionSummary};
|
||||
use crate::domain::manga::Manga;
|
||||
use crate::error::{AppError, AppResult};
|
||||
|
||||
pub async fn create(
|
||||
pool: &PgPool,
|
||||
user_id: Uuid,
|
||||
name: &str,
|
||||
description: Option<&str>,
|
||||
) -> AppResult<Collection> {
|
||||
let row = sqlx::query_as::<_, Collection>(
|
||||
r#"
|
||||
INSERT INTO collections (user_id, name, description)
|
||||
VALUES ($1, $2, $3)
|
||||
RETURNING id, user_id, name, description, created_at, updated_at
|
||||
"#,
|
||||
)
|
||||
.bind(user_id)
|
||||
.bind(name.trim())
|
||||
.bind(description)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
sqlx::Error::Database(ref db_err) if db_err.is_unique_violation() => {
|
||||
AppError::Conflict("a collection with this name already exists".into())
|
||||
}
|
||||
other => AppError::Database(other),
|
||||
})?;
|
||||
Ok(row)
|
||||
}
|
||||
|
||||
pub async fn get(pool: &PgPool, id: Uuid) -> AppResult<Collection> {
|
||||
sqlx::query_as::<_, Collection>(
|
||||
r#"
|
||||
SELECT id, user_id, name, description, created_at, updated_at
|
||||
FROM collections
|
||||
WHERE id = $1
|
||||
"#,
|
||||
)
|
||||
.bind(id)
|
||||
.fetch_optional(pool)
|
||||
.await?
|
||||
.ok_or(AppError::NotFound)
|
||||
}
|
||||
|
||||
pub async fn find_owner(pool: &PgPool, id: Uuid) -> AppResult<Option<Uuid>> {
|
||||
let row: Option<(Uuid,)> =
|
||||
sqlx::query_as("SELECT user_id FROM collections WHERE id = $1")
|
||||
.bind(id)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
Ok(row.map(|(u,)| u))
|
||||
}
|
||||
|
||||
/// Paged list of one user's collections. Includes `manga_count` and up
|
||||
/// to three sample cover image keys (newest-added first) so a card can
|
||||
/// render without a follow-up fetch.
|
||||
pub async fn list_for_user(
|
||||
pool: &PgPool,
|
||||
user_id: Uuid,
|
||||
limit: i64,
|
||||
offset: i64,
|
||||
) -> AppResult<(Vec<CollectionSummary>, i64)> {
|
||||
let rows = sqlx::query_as::<_, CollectionSummary>(
|
||||
r#"
|
||||
SELECT
|
||||
c.id, c.user_id, c.name, c.description, c.created_at, c.updated_at,
|
||||
(SELECT count(*) FROM collection_mangas cm WHERE cm.collection_id = c.id)
|
||||
AS manga_count,
|
||||
COALESCE(
|
||||
(
|
||||
-- `array_agg(... ORDER BY ...)` is the only
|
||||
-- spec-guaranteed way to preserve element order;
|
||||
-- a subquery's ORDER BY isn't a contract the
|
||||
-- outer aggregate has to honour. Adding manga_id
|
||||
-- as a tiebreaker keeps the order stable when
|
||||
-- multiple rows share `added_at` (bulk imports).
|
||||
SELECT array_agg(cover_image_path ORDER BY added_at DESC, manga_id)
|
||||
FROM (
|
||||
SELECT m.cover_image_path, cm2.added_at, cm2.manga_id
|
||||
FROM collection_mangas cm2
|
||||
JOIN mangas m ON m.id = cm2.manga_id
|
||||
WHERE cm2.collection_id = c.id
|
||||
AND m.cover_image_path IS NOT NULL
|
||||
ORDER BY cm2.added_at DESC, cm2.manga_id
|
||||
LIMIT 3
|
||||
) p
|
||||
),
|
||||
ARRAY[]::text[]
|
||||
) AS sample_covers
|
||||
FROM collections c
|
||||
WHERE c.user_id = $1
|
||||
ORDER BY c.updated_at DESC, c.id
|
||||
LIMIT $2 OFFSET $3
|
||||
"#,
|
||||
)
|
||||
.bind(user_id)
|
||||
.bind(limit)
|
||||
.bind(offset)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
let (total,): (i64,) =
|
||||
sqlx::query_as("SELECT count(*) FROM collections WHERE user_id = $1")
|
||||
.bind(user_id)
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
Ok((rows, total))
|
||||
}
|
||||
|
||||
pub async fn update(
|
||||
pool: &PgPool,
|
||||
id: Uuid,
|
||||
name: Option<&str>,
|
||||
description_provided: bool,
|
||||
description: Option<&str>,
|
||||
) -> AppResult<Collection> {
|
||||
let row = sqlx::query_as::<_, Collection>(
|
||||
r#"
|
||||
UPDATE collections
|
||||
SET name = COALESCE($2, name),
|
||||
description = CASE WHEN $3::boolean THEN $4 ELSE description END,
|
||||
updated_at = now()
|
||||
WHERE id = $1
|
||||
RETURNING id, user_id, name, description, created_at, updated_at
|
||||
"#,
|
||||
)
|
||||
.bind(id)
|
||||
.bind(name.map(str::trim))
|
||||
.bind(description_provided)
|
||||
.bind(description)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
sqlx::Error::Database(ref db_err) if db_err.is_unique_violation() => {
|
||||
AppError::Conflict("a collection with this name already exists".into())
|
||||
}
|
||||
other => AppError::Database(other),
|
||||
})?
|
||||
.ok_or(AppError::NotFound)?;
|
||||
Ok(row)
|
||||
}
|
||||
|
||||
pub async fn delete(pool: &PgPool, id: Uuid) -> AppResult<()> {
|
||||
sqlx::query("DELETE FROM collections WHERE id = $1")
|
||||
.bind(id)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a manga to a collection. Returns `true` if a new attachment was
|
||||
/// created (handler picks 201), `false` if the manga was already in
|
||||
/// the collection (handler picks 200). Touches `updated_at` so the
|
||||
/// "recent collections" sort reflects activity.
|
||||
///
|
||||
/// FK violations (manga deleted between the handler's `exists` check
|
||||
/// and this insert — a race the API can't fully close from the
|
||||
/// outside) are remapped to `NotFound` so the handler returns 404
|
||||
/// rather than 500.
|
||||
pub async fn add_manga(
|
||||
pool: &PgPool,
|
||||
collection_id: Uuid,
|
||||
manga_id: Uuid,
|
||||
) -> AppResult<bool> {
|
||||
let mut tx = pool.begin().await?;
|
||||
let inserted = sqlx::query(
|
||||
r#"
|
||||
INSERT INTO collection_mangas (collection_id, manga_id)
|
||||
VALUES ($1, $2)
|
||||
ON CONFLICT DO NOTHING
|
||||
"#,
|
||||
)
|
||||
.bind(collection_id)
|
||||
.bind(manga_id)
|
||||
.execute(&mut *tx)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
sqlx::Error::Database(ref db_err) if db_err.is_foreign_key_violation() => {
|
||||
AppError::NotFound
|
||||
}
|
||||
other => AppError::Database(other),
|
||||
})?;
|
||||
let rows_affected = inserted.rows_affected();
|
||||
if rows_affected > 0 {
|
||||
sqlx::query("UPDATE collections SET updated_at = now() WHERE id = $1")
|
||||
.bind(collection_id)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
}
|
||||
tx.commit().await?;
|
||||
Ok(rows_affected > 0)
|
||||
}
|
||||
|
||||
pub async fn remove_manga(
|
||||
pool: &PgPool,
|
||||
collection_id: Uuid,
|
||||
manga_id: Uuid,
|
||||
) -> AppResult<()> {
|
||||
let mut tx = pool.begin().await?;
|
||||
let rows_affected = sqlx::query(
|
||||
"DELETE FROM collection_mangas WHERE collection_id = $1 AND manga_id = $2",
|
||||
)
|
||||
.bind(collection_id)
|
||||
.bind(manga_id)
|
||||
.execute(&mut *tx)
|
||||
.await?
|
||||
.rows_affected();
|
||||
if rows_affected > 0 {
|
||||
sqlx::query("UPDATE collections SET updated_at = now() WHERE id = $1")
|
||||
.bind(collection_id)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
}
|
||||
tx.commit().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn list_mangas(
|
||||
pool: &PgPool,
|
||||
collection_id: Uuid,
|
||||
limit: i64,
|
||||
offset: i64,
|
||||
) -> AppResult<(Vec<Manga>, i64)> {
|
||||
let rows = sqlx::query_as::<_, Manga>(
|
||||
r#"
|
||||
SELECT m.id, m.title, m.status, m.alt_titles, m.description,
|
||||
m.cover_image_path, m.created_at, m.updated_at
|
||||
FROM collection_mangas cm
|
||||
JOIN mangas m ON m.id = cm.manga_id
|
||||
WHERE cm.collection_id = $1
|
||||
ORDER BY cm.added_at DESC, m.id
|
||||
LIMIT $2 OFFSET $3
|
||||
"#,
|
||||
)
|
||||
.bind(collection_id)
|
||||
.bind(limit)
|
||||
.bind(offset)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
let (total,): (i64,) =
|
||||
sqlx::query_as("SELECT count(*) FROM collection_mangas WHERE collection_id = $1")
|
||||
.bind(collection_id)
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
Ok((rows, total))
|
||||
}
|
||||
|
||||
/// Which of `user_id`'s collections currently contain `manga_id`?
|
||||
/// Used by the "Add to collection" modal to pre-check the boxes.
|
||||
pub async fn list_collections_containing(
|
||||
pool: &PgPool,
|
||||
user_id: Uuid,
|
||||
manga_id: Uuid,
|
||||
) -> AppResult<Vec<Uuid>> {
|
||||
let rows: Vec<(Uuid,)> = sqlx::query_as(
|
||||
r#"
|
||||
SELECT c.id
|
||||
FROM collections c
|
||||
JOIN collection_mangas cm ON cm.collection_id = c.id
|
||||
WHERE c.user_id = $1
|
||||
AND cm.manga_id = $2
|
||||
ORDER BY c.updated_at DESC
|
||||
"#,
|
||||
)
|
||||
.bind(user_id)
|
||||
.bind(manga_id)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
Ok(rows.into_iter().map(|(id,)| id).collect())
|
||||
}
|
||||
481
backend/src/repo/crawler.rs
Normal file
481
backend/src/repo/crawler.rs
Normal file
@@ -0,0 +1,481 @@
|
||||
//! Persistence for crawled mangas.
|
||||
//!
|
||||
//! High-level operations:
|
||||
//! - [`ensure_source`]: idempotent registration of a source row.
|
||||
//! - [`upsert_manga_from_source`]: end-to-end "I saw this manga" —
|
||||
//! creates or updates the `mangas` row, threads `manga_sources`, and
|
||||
//! refreshes authors/genres/tags. Returns whether the manga is new,
|
||||
//! updated (metadata_hash changed), or unchanged.
|
||||
//! - [`sync_manga_chapters`]: per-manga chapter reconciliation. Adds
|
||||
//! new ones, refreshes URLs on existing ones, soft-drops vanished.
|
||||
//! - [`mark_dropped_mangas`]: end-of-run pass. Any manga from this
|
||||
//! source whose `last_seen_at` is older than the run start is
|
||||
//! soft-dropped.
|
||||
//!
|
||||
//! Each public function is a transaction boundary so a partial failure
|
||||
//! mid-call leaves the DB in its pre-call state.
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use sqlx::{PgPool, Postgres, Transaction};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::crawler::source::{SourceChapterRef, SourceManga};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum UpsertStatus {
|
||||
New,
|
||||
Updated,
|
||||
Unchanged,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct UpsertedManga {
|
||||
pub manga_id: Uuid,
|
||||
pub status: UpsertStatus,
|
||||
/// Current value of `mangas.cover_image_path` after the upsert.
|
||||
/// `None` means the cover hasn't been downloaded yet — the caller
|
||||
/// uses this to backfill covers for mangas that were synced before
|
||||
/// cover-download support existed.
|
||||
pub cover_image_path: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct ChapterDiff {
|
||||
pub new: usize,
|
||||
pub refreshed: usize,
|
||||
pub dropped: usize,
|
||||
}
|
||||
|
||||
pub async fn ensure_source(
|
||||
pool: &PgPool,
|
||||
id: &str,
|
||||
name: &str,
|
||||
base_url: &str,
|
||||
) -> sqlx::Result<()> {
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO sources (id, name, base_url, enabled)
|
||||
VALUES ($1, $2, $3, true)
|
||||
ON CONFLICT (id) DO UPDATE
|
||||
SET name = EXCLUDED.name,
|
||||
base_url = EXCLUDED.base_url
|
||||
"#,
|
||||
)
|
||||
.bind(id)
|
||||
.bind(name)
|
||||
.bind(base_url)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn upsert_manga_from_source(
|
||||
pool: &PgPool,
|
||||
source_id: &str,
|
||||
source_url: &str,
|
||||
sm: &SourceManga,
|
||||
) -> sqlx::Result<UpsertedManga> {
|
||||
let mut tx = pool.begin().await?;
|
||||
|
||||
let existing: Option<(Uuid, Option<String>)> = sqlx::query_as(
|
||||
r#"
|
||||
SELECT manga_id, metadata_hash
|
||||
FROM manga_sources
|
||||
WHERE source_id = $1 AND source_manga_key = $2
|
||||
"#,
|
||||
)
|
||||
.bind(source_id)
|
||||
.bind(&sm.source_manga_key)
|
||||
.fetch_optional(&mut *tx)
|
||||
.await?;
|
||||
|
||||
let status_db = sm.status.as_deref().unwrap_or("ongoing");
|
||||
|
||||
// Note: `cover_image_path` is intentionally not written here.
|
||||
// The repo layer doesn't know about the storage backend, so the
|
||||
// caller (crawler binary) downloads the cover via the `Storage`
|
||||
// trait and sets the path with `repo::manga::set_cover_image_path`
|
||||
// once the bytes have landed.
|
||||
let (manga_id, status) = match existing {
|
||||
None => {
|
||||
let (id,): (Uuid,) = sqlx::query_as(
|
||||
r#"
|
||||
INSERT INTO mangas (title, description, status, alt_titles)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
RETURNING id
|
||||
"#,
|
||||
)
|
||||
.bind(&sm.title)
|
||||
.bind(sm.summary.as_deref())
|
||||
.bind(status_db)
|
||||
.bind(&sm.alternative_titles)
|
||||
.fetch_one(&mut *tx)
|
||||
.await?;
|
||||
(id, UpsertStatus::New)
|
||||
}
|
||||
Some((id, prev_hash)) if prev_hash.as_deref() == Some(&sm.metadata_hash) => {
|
||||
(id, UpsertStatus::Unchanged)
|
||||
}
|
||||
Some((id, _)) => {
|
||||
sqlx::query(
|
||||
r#"
|
||||
UPDATE mangas
|
||||
SET title = $1,
|
||||
description = $2,
|
||||
status = $3,
|
||||
alt_titles = $4,
|
||||
updated_at = NOW()
|
||||
WHERE id = $5
|
||||
"#,
|
||||
)
|
||||
.bind(&sm.title)
|
||||
.bind(sm.summary.as_deref())
|
||||
.bind(status_db)
|
||||
.bind(&sm.alternative_titles)
|
||||
.bind(id)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
(id, UpsertStatus::Updated)
|
||||
}
|
||||
};
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO manga_sources
|
||||
(source_id, source_manga_key, manga_id, source_url, metadata_hash, last_seen_at, dropped_at)
|
||||
VALUES ($1, $2, $3, $4, $5, NOW(), NULL)
|
||||
ON CONFLICT (source_id, source_manga_key) DO UPDATE
|
||||
SET source_url = EXCLUDED.source_url,
|
||||
metadata_hash = EXCLUDED.metadata_hash,
|
||||
last_seen_at = NOW(),
|
||||
dropped_at = NULL
|
||||
"#,
|
||||
)
|
||||
.bind(source_id)
|
||||
.bind(&sm.source_manga_key)
|
||||
.bind(manga_id)
|
||||
.bind(source_url)
|
||||
.bind(&sm.metadata_hash)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
|
||||
sync_authors(&mut tx, manga_id, &sm.authors).await?;
|
||||
sync_genres(&mut tx, manga_id, &sm.genres).await?;
|
||||
sync_tags(&mut tx, manga_id, &sm.tags).await?;
|
||||
|
||||
let cover_image_path: Option<String> =
|
||||
sqlx::query_scalar("SELECT cover_image_path FROM mangas WHERE id = $1")
|
||||
.bind(manga_id)
|
||||
.fetch_one(&mut *tx)
|
||||
.await?;
|
||||
|
||||
tx.commit().await?;
|
||||
Ok(UpsertedManga {
|
||||
manga_id,
|
||||
status,
|
||||
cover_image_path,
|
||||
})
|
||||
}
|
||||
|
||||
async fn sync_authors(
|
||||
tx: &mut Transaction<'_, Postgres>,
|
||||
manga_id: Uuid,
|
||||
authors: &[String],
|
||||
) -> sqlx::Result<()> {
|
||||
sqlx::query("DELETE FROM manga_authors WHERE manga_id = $1")
|
||||
.bind(manga_id)
|
||||
.execute(&mut **tx)
|
||||
.await?;
|
||||
for (i, name) in authors.iter().enumerate() {
|
||||
let trimmed = name.trim();
|
||||
if trimmed.is_empty() {
|
||||
continue;
|
||||
}
|
||||
// Self-update on conflict so the row id is always returned —
|
||||
// can't use DO NOTHING because that suppresses RETURNING.
|
||||
let (author_id,): (Uuid,) = sqlx::query_as(
|
||||
r#"
|
||||
INSERT INTO authors (name) VALUES ($1)
|
||||
ON CONFLICT (lower(name)) DO UPDATE SET name = authors.name
|
||||
RETURNING id
|
||||
"#,
|
||||
)
|
||||
.bind(trimmed)
|
||||
.fetch_one(&mut **tx)
|
||||
.await?;
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO manga_authors (manga_id, author_id, position)
|
||||
VALUES ($1, $2, $3)
|
||||
ON CONFLICT DO NOTHING
|
||||
"#,
|
||||
)
|
||||
.bind(manga_id)
|
||||
.bind(author_id)
|
||||
.bind(i as i32)
|
||||
.execute(&mut **tx)
|
||||
.await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn sync_genres(
|
||||
tx: &mut Transaction<'_, Postgres>,
|
||||
manga_id: Uuid,
|
||||
genres: &[String],
|
||||
) -> sqlx::Result<()> {
|
||||
sqlx::query("DELETE FROM manga_genres WHERE manga_id = $1")
|
||||
.bind(manga_id)
|
||||
.execute(&mut **tx)
|
||||
.await?;
|
||||
for name in genres {
|
||||
let trimmed = name.trim();
|
||||
if trimmed.is_empty() {
|
||||
continue;
|
||||
}
|
||||
// Case-insensitive lookup so a source-supplied "action"
|
||||
// attaches to the seeded "Action" rather than creating a
|
||||
// second row.
|
||||
let existing: Option<(Uuid,)> =
|
||||
sqlx::query_as("SELECT id FROM genres WHERE lower(name) = lower($1)")
|
||||
.bind(trimmed)
|
||||
.fetch_optional(&mut **tx)
|
||||
.await?;
|
||||
let genre_id = match existing {
|
||||
Some((id,)) => id,
|
||||
None => {
|
||||
let (id,): (Uuid,) = sqlx::query_as(
|
||||
r#"
|
||||
INSERT INTO genres (name) VALUES ($1)
|
||||
ON CONFLICT (name) DO UPDATE SET name = genres.name
|
||||
RETURNING id
|
||||
"#,
|
||||
)
|
||||
.bind(trimmed)
|
||||
.fetch_one(&mut **tx)
|
||||
.await?;
|
||||
tracing::info!(genre = trimmed, "added new genre from source");
|
||||
id
|
||||
}
|
||||
};
|
||||
sqlx::query(
|
||||
"INSERT INTO manga_genres (manga_id, genre_id) VALUES ($1, $2) ON CONFLICT DO NOTHING",
|
||||
)
|
||||
.bind(manga_id)
|
||||
.bind(genre_id)
|
||||
.execute(&mut **tx)
|
||||
.await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn sync_tags(
|
||||
tx: &mut Transaction<'_, Postgres>,
|
||||
manga_id: Uuid,
|
||||
tags: &[String],
|
||||
) -> sqlx::Result<()> {
|
||||
sqlx::query("DELETE FROM manga_tags WHERE manga_id = $1")
|
||||
.bind(manga_id)
|
||||
.execute(&mut **tx)
|
||||
.await?;
|
||||
for name in tags {
|
||||
let trimmed = name.trim();
|
||||
if trimmed.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let (tag_id,): (Uuid,) = sqlx::query_as(
|
||||
r#"
|
||||
INSERT INTO tags (name) VALUES ($1)
|
||||
ON CONFLICT (lower(name)) DO UPDATE SET name = tags.name
|
||||
RETURNING id
|
||||
"#,
|
||||
)
|
||||
.bind(trimmed)
|
||||
.fetch_one(&mut **tx)
|
||||
.await?;
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO manga_tags (manga_id, tag_id, added_by)
|
||||
VALUES ($1, $2, NULL)
|
||||
ON CONFLICT DO NOTHING
|
||||
"#,
|
||||
)
|
||||
.bind(manga_id)
|
||||
.bind(tag_id)
|
||||
.execute(&mut **tx)
|
||||
.await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn sync_manga_chapters(
|
||||
pool: &PgPool,
|
||||
source_id: &str,
|
||||
manga_id: Uuid,
|
||||
chapters: &[SourceChapterRef],
|
||||
) -> sqlx::Result<ChapterDiff> {
|
||||
let mut tx = pool.begin().await?;
|
||||
let mut diff = ChapterDiff::default();
|
||||
let seen_keys: Vec<String> = chapters
|
||||
.iter()
|
||||
.map(|c| c.source_chapter_key.clone())
|
||||
.collect();
|
||||
|
||||
for c in chapters {
|
||||
let existing: Option<(Uuid,)> = sqlx::query_as(
|
||||
"SELECT chapter_id FROM chapter_sources WHERE source_id = $1 AND source_chapter_key = $2",
|
||||
)
|
||||
.bind(source_id)
|
||||
.bind(&c.source_chapter_key)
|
||||
.fetch_optional(&mut *tx)
|
||||
.await?;
|
||||
|
||||
match existing {
|
||||
None => {
|
||||
// New chapter row. As of 0013 there's no (manga_id,
|
||||
// number) UNIQUE, so duplicate-numbered chapters from
|
||||
// the source (different uploaders, notices, alt
|
||||
// translations) each get their own row — chapter
|
||||
// identity is the UUID, not the number.
|
||||
let (chapter_id,): (Uuid,) = sqlx::query_as(
|
||||
r#"
|
||||
INSERT INTO chapters (manga_id, number, title, page_count)
|
||||
VALUES ($1, $2, $3, 0)
|
||||
RETURNING id
|
||||
"#,
|
||||
)
|
||||
.bind(manga_id)
|
||||
.bind(c.number)
|
||||
.bind(c.title.as_deref())
|
||||
.fetch_one(&mut *tx)
|
||||
.await?;
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO chapter_sources
|
||||
(source_id, source_chapter_key, chapter_id, source_url, last_seen_at, dropped_at)
|
||||
VALUES ($1, $2, $3, $4, NOW(), NULL)
|
||||
"#,
|
||||
)
|
||||
.bind(source_id)
|
||||
.bind(&c.source_chapter_key)
|
||||
.bind(chapter_id)
|
||||
.bind(&c.url)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
diff.new += 1;
|
||||
}
|
||||
Some((chapter_id,)) => {
|
||||
sqlx::query("UPDATE chapters SET title = $1 WHERE id = $2")
|
||||
.bind(c.title.as_deref())
|
||||
.bind(chapter_id)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
sqlx::query(
|
||||
r#"
|
||||
UPDATE chapter_sources
|
||||
SET source_url = $1, last_seen_at = NOW(), dropped_at = NULL
|
||||
WHERE source_id = $2 AND source_chapter_key = $3
|
||||
"#,
|
||||
)
|
||||
.bind(&c.url)
|
||||
.bind(source_id)
|
||||
.bind(&c.source_chapter_key)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
diff.refreshed += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Soft-drop any chapter previously seen from this source for this
|
||||
// manga that's not in the current list.
|
||||
let result = sqlx::query(
|
||||
r#"
|
||||
UPDATE chapter_sources cs
|
||||
SET dropped_at = NOW()
|
||||
FROM chapters ch
|
||||
WHERE cs.chapter_id = ch.id
|
||||
AND ch.manga_id = $1
|
||||
AND cs.source_id = $2
|
||||
AND cs.dropped_at IS NULL
|
||||
AND NOT (cs.source_chapter_key = ANY($3))
|
||||
"#,
|
||||
)
|
||||
.bind(manga_id)
|
||||
.bind(source_id)
|
||||
.bind(&seen_keys)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
diff.dropped = result.rows_affected() as usize;
|
||||
|
||||
tx.commit().await?;
|
||||
Ok(diff)
|
||||
}
|
||||
|
||||
/// Record that a complete Backfill walk has finished for `source_id`.
|
||||
/// The presence of this row is what the daemon's mode auto-detection
|
||||
/// uses to flip from Backfill to Incremental on subsequent ticks.
|
||||
///
|
||||
/// Keyed `seed_completed:<source_id>` in `crawler_state`. JSON payload
|
||||
/// stores the timestamp so we can surface "last fully reseeded at" in
|
||||
/// future ops tooling without another migration.
|
||||
pub async fn mark_seed_completed(
|
||||
pool: &PgPool,
|
||||
source_id: &str,
|
||||
at: DateTime<Utc>,
|
||||
) -> sqlx::Result<()> {
|
||||
let key = format!("seed_completed:{source_id}");
|
||||
sqlx::query(
|
||||
"INSERT INTO crawler_state (key, value, updated_at) \
|
||||
VALUES ($1, $2, now()) \
|
||||
ON CONFLICT (key) DO UPDATE \
|
||||
SET value = EXCLUDED.value, updated_at = now()",
|
||||
)
|
||||
.bind(&key)
|
||||
.bind(serde_json::json!({ "at": at.to_rfc3339() }))
|
||||
.execute(pool)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Read the timestamp written by [`mark_seed_completed`], if any.
|
||||
/// `None` means no complete Backfill has ever finished for this
|
||||
/// source — the daemon should run Backfill on the next tick.
|
||||
pub async fn seed_completed_at(
|
||||
pool: &PgPool,
|
||||
source_id: &str,
|
||||
) -> sqlx::Result<Option<DateTime<Utc>>> {
|
||||
let key = format!("seed_completed:{source_id}");
|
||||
let row: Option<serde_json::Value> =
|
||||
sqlx::query_scalar("SELECT value FROM crawler_state WHERE key = $1")
|
||||
.bind(&key)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
Ok(row.and_then(|v| {
|
||||
v.get("at")
|
||||
.and_then(|s| s.as_str())
|
||||
.and_then(|s| DateTime::parse_from_rfc3339(s).ok())
|
||||
.map(|dt| dt.with_timezone(&Utc))
|
||||
}))
|
||||
}
|
||||
|
||||
pub async fn mark_dropped_mangas(
|
||||
pool: &PgPool,
|
||||
source_id: &str,
|
||||
run_started_at: DateTime<Utc>,
|
||||
) -> sqlx::Result<u64> {
|
||||
let res = sqlx::query(
|
||||
r#"
|
||||
UPDATE manga_sources
|
||||
SET dropped_at = NOW()
|
||||
WHERE source_id = $1
|
||||
AND last_seen_at < $2
|
||||
AND dropped_at IS NULL
|
||||
"#,
|
||||
)
|
||||
.bind(source_id)
|
||||
.bind(run_started_at)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
Ok(res.rows_affected())
|
||||
}
|
||||
@@ -181,17 +181,23 @@ pub async fn get_detail(pool: &PgPool, id: Uuid) -> AppResult<MangaDetail> {
|
||||
/// by the caller via `repo::author::set_for_manga` etc. in the same
|
||||
/// transaction. `status` is taken as a validated string — the handler
|
||||
/// is responsible for defaulting/validating it.
|
||||
///
|
||||
/// `uploaded_by` records who created the manga and feeds the per-user
|
||||
/// upload history. `None` means "historical / no associated user" —
|
||||
/// historic rows from before the uploader columns were added carry
|
||||
/// NULL.
|
||||
pub async fn create<'e, E: PgExecutor<'e>>(
|
||||
executor: E,
|
||||
title: &str,
|
||||
status: &str,
|
||||
description: Option<&str>,
|
||||
alt_titles: &[String],
|
||||
uploaded_by: Option<Uuid>,
|
||||
) -> AppResult<Manga> {
|
||||
let row = sqlx::query_as::<_, Manga>(&format!(
|
||||
r#"
|
||||
INSERT INTO mangas (title, status, description, alt_titles)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
INSERT INTO mangas (title, status, description, alt_titles, uploaded_by)
|
||||
VALUES ($1, $2, $3, $4, $5)
|
||||
RETURNING {SELECT_COLS}
|
||||
"#
|
||||
))
|
||||
@@ -199,6 +205,7 @@ pub async fn create<'e, E: PgExecutor<'e>>(
|
||||
.bind(status)
|
||||
.bind(description)
|
||||
.bind(alt_titles)
|
||||
.bind(uploaded_by)
|
||||
.fetch_one(executor)
|
||||
.await?;
|
||||
Ok(row)
|
||||
@@ -255,6 +262,17 @@ pub async fn set_cover_image_path<'e, E: PgExecutor<'e>>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn clear_cover_image_path<'e, E: PgExecutor<'e>>(
|
||||
executor: E,
|
||||
id: Uuid,
|
||||
) -> AppResult<()> {
|
||||
sqlx::query("UPDATE mangas SET cover_image_path = NULL, updated_at = now() WHERE id = $1")
|
||||
.bind(id)
|
||||
.execute(executor)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn exists(pool: &PgPool, id: Uuid) -> AppResult<bool> {
|
||||
let (exists,): (bool,) =
|
||||
sqlx::query_as("SELECT EXISTS(SELECT 1 FROM mangas WHERE id = $1)")
|
||||
|
||||
@@ -2,10 +2,14 @@ pub mod api_token;
|
||||
pub mod author;
|
||||
pub mod bookmark;
|
||||
pub mod chapter;
|
||||
pub mod collection;
|
||||
pub mod crawler;
|
||||
pub mod genre;
|
||||
pub mod manga;
|
||||
pub mod page;
|
||||
pub mod read_progress;
|
||||
pub mod session;
|
||||
pub mod tag;
|
||||
pub mod upload_history;
|
||||
pub mod user;
|
||||
pub mod user_preferences;
|
||||
|
||||
164
backend/src/repo/read_progress.rs
Normal file
164
backend/src/repo/read_progress.rs
Normal file
@@ -0,0 +1,164 @@
|
||||
//! Per-user reading-progress persistence.
|
||||
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::domain::read_progress::{
|
||||
ReadProgress, ReadProgressForManga, ReadProgressSummary,
|
||||
};
|
||||
use crate::error::{AppError, AppResult};
|
||||
|
||||
/// Insert-or-overwrite the user's progress row for this manga.
|
||||
/// Progress can move backwards (re-reading) — we accept the
|
||||
/// simplification that the last write wins.
|
||||
///
|
||||
/// FK violations (manga or chapter deleted between the handler's
|
||||
/// existence check and this write) are mapped to `NotFound` so the
|
||||
/// API returns 404 rather than 500.
|
||||
pub async fn upsert(
|
||||
pool: &PgPool,
|
||||
user_id: Uuid,
|
||||
manga_id: Uuid,
|
||||
chapter_id: Option<Uuid>,
|
||||
page: i32,
|
||||
) -> AppResult<ReadProgress> {
|
||||
sqlx::query_as::<_, ReadProgress>(
|
||||
r#"
|
||||
INSERT INTO read_progress (user_id, manga_id, chapter_id, page, updated_at)
|
||||
VALUES ($1, $2, $3, $4, now())
|
||||
ON CONFLICT (user_id, manga_id) DO UPDATE
|
||||
SET chapter_id = EXCLUDED.chapter_id,
|
||||
page = EXCLUDED.page,
|
||||
updated_at = now()
|
||||
RETURNING user_id, manga_id, chapter_id, page, updated_at
|
||||
"#,
|
||||
)
|
||||
.bind(user_id)
|
||||
.bind(manga_id)
|
||||
.bind(chapter_id)
|
||||
.bind(page)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
sqlx::Error::Database(ref db_err) if db_err.is_foreign_key_violation() => {
|
||||
AppError::NotFound
|
||||
}
|
||||
other => AppError::Database(other),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn get(
|
||||
pool: &PgPool,
|
||||
user_id: Uuid,
|
||||
manga_id: Uuid,
|
||||
) -> AppResult<ReadProgress> {
|
||||
sqlx::query_as::<_, ReadProgress>(
|
||||
r#"
|
||||
SELECT user_id, manga_id, chapter_id, page, updated_at
|
||||
FROM read_progress
|
||||
WHERE user_id = $1 AND manga_id = $2
|
||||
"#,
|
||||
)
|
||||
.bind(user_id)
|
||||
.bind(manga_id)
|
||||
.fetch_optional(pool)
|
||||
.await?
|
||||
.ok_or(AppError::NotFound)
|
||||
}
|
||||
|
||||
/// Same lookup as `get`, but resolves `chapter_number` in one round-
|
||||
/// trip so the manga detail page's "Continue reading" CTA can render
|
||||
/// without having to find the chapter in the paged chapters list.
|
||||
pub async fn get_for_manga(
|
||||
pool: &PgPool,
|
||||
user_id: Uuid,
|
||||
manga_id: Uuid,
|
||||
) -> AppResult<ReadProgressForManga> {
|
||||
sqlx::query_as::<_, ReadProgressForManga>(
|
||||
r#"
|
||||
SELECT rp.manga_id,
|
||||
rp.chapter_id,
|
||||
c.number AS chapter_number,
|
||||
rp.page,
|
||||
rp.updated_at
|
||||
FROM read_progress rp
|
||||
LEFT JOIN chapters c ON c.id = rp.chapter_id
|
||||
WHERE rp.user_id = $1 AND rp.manga_id = $2
|
||||
"#,
|
||||
)
|
||||
.bind(user_id)
|
||||
.bind(manga_id)
|
||||
.fetch_optional(pool)
|
||||
.await?
|
||||
.ok_or(AppError::NotFound)
|
||||
}
|
||||
|
||||
/// Cross-link guard. Returns true when `chapter_id` belongs to
|
||||
/// `manga_id`. The upsert handler calls this before writing to refuse
|
||||
/// PUT bodies that pair a chapter from one manga with another manga
|
||||
/// — the FK alone can't catch that because both ids resolve
|
||||
/// individually.
|
||||
pub async fn chapter_belongs_to_manga(
|
||||
pool: &PgPool,
|
||||
manga_id: Uuid,
|
||||
chapter_id: Uuid,
|
||||
) -> AppResult<bool> {
|
||||
let (matches,): (bool,) = sqlx::query_as(
|
||||
r#"
|
||||
SELECT EXISTS(
|
||||
SELECT 1 FROM chapters
|
||||
WHERE id = $1 AND manga_id = $2
|
||||
)
|
||||
"#,
|
||||
)
|
||||
.bind(chapter_id)
|
||||
.bind(manga_id)
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
Ok(matches)
|
||||
}
|
||||
|
||||
pub async fn list_for_user(
|
||||
pool: &PgPool,
|
||||
user_id: Uuid,
|
||||
limit: i64,
|
||||
offset: i64,
|
||||
) -> AppResult<(Vec<ReadProgressSummary>, i64)> {
|
||||
let rows = sqlx::query_as::<_, ReadProgressSummary>(
|
||||
r#"
|
||||
SELECT rp.manga_id,
|
||||
m.title AS manga_title,
|
||||
m.cover_image_path AS manga_cover_image_path,
|
||||
rp.chapter_id,
|
||||
c.number AS chapter_number,
|
||||
rp.page,
|
||||
rp.updated_at
|
||||
FROM read_progress rp
|
||||
JOIN mangas m ON m.id = rp.manga_id
|
||||
LEFT JOIN chapters c ON c.id = rp.chapter_id
|
||||
WHERE rp.user_id = $1
|
||||
ORDER BY rp.updated_at DESC, rp.manga_id
|
||||
LIMIT $2 OFFSET $3
|
||||
"#,
|
||||
)
|
||||
.bind(user_id)
|
||||
.bind(limit)
|
||||
.bind(offset)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
let (total,): (i64,) =
|
||||
sqlx::query_as("SELECT count(*) FROM read_progress WHERE user_id = $1")
|
||||
.bind(user_id)
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
Ok((rows, total))
|
||||
}
|
||||
|
||||
pub async fn delete(pool: &PgPool, user_id: Uuid, manga_id: Uuid) -> AppResult<()> {
|
||||
sqlx::query("DELETE FROM read_progress WHERE user_id = $1 AND manga_id = $2")
|
||||
.bind(user_id)
|
||||
.bind(manga_id)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
119
backend/src/repo/upload_history.rs
Normal file
119
backend/src/repo/upload_history.rs
Normal file
@@ -0,0 +1,119 @@
|
||||
//! Cross-table upload history.
|
||||
//!
|
||||
//! Mangas and chapters are uploaded by users separately, but the
|
||||
//! profile UI wants a single chronological feed. Rather than open a
|
||||
//! UNION-ALL over two tables with mismatched columns we fetch each
|
||||
//! side, then merge in Rust by `created_at`. Cheap for the volumes a
|
||||
//! single user produces.
|
||||
//!
|
||||
//! Pagination uses limit-only for now; offsets across two unrelated
|
||||
//! tables aren't trivially stable, and the realistic per-user upload
|
||||
//! count is small. Switch to keyset pagination if real users blow
|
||||
//! past a few hundred uploads.
|
||||
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::domain::chapter::Chapter;
|
||||
use crate::domain::manga::Manga;
|
||||
use crate::domain::upload_entry::UploadEntry;
|
||||
use crate::error::AppResult;
|
||||
|
||||
#[derive(sqlx::FromRow)]
|
||||
struct ChapterUploadRow {
|
||||
manga_id: Uuid,
|
||||
manga_title: String,
|
||||
manga_cover_image_path: Option<String>,
|
||||
chapter_id: Uuid,
|
||||
number: i32,
|
||||
title: Option<String>,
|
||||
page_count: i32,
|
||||
created_at: chrono::DateTime<chrono::Utc>,
|
||||
}
|
||||
|
||||
/// Returns up to `limit` of the user's most recent uploads (mangas and
|
||||
/// chapters interleaved by `created_at DESC`) plus the unfiltered
|
||||
/// total count (mangas + chapters owned by the user). The caller is
|
||||
/// responsible for clamping `limit` to a sane value.
|
||||
pub async fn list_for_user(
|
||||
pool: &PgPool,
|
||||
user_id: Uuid,
|
||||
limit: i64,
|
||||
) -> AppResult<(Vec<UploadEntry>, i64)> {
|
||||
let mangas: Vec<Manga> = sqlx::query_as::<_, Manga>(
|
||||
r#"
|
||||
SELECT id, title, status, alt_titles, description,
|
||||
cover_image_path, created_at, updated_at
|
||||
FROM mangas
|
||||
WHERE uploaded_by = $1
|
||||
ORDER BY created_at DESC, id
|
||||
LIMIT $2
|
||||
"#,
|
||||
)
|
||||
.bind(user_id)
|
||||
.bind(limit)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
let chapters: Vec<ChapterUploadRow> = sqlx::query_as::<_, ChapterUploadRow>(
|
||||
r#"
|
||||
SELECT c.manga_id,
|
||||
m.title AS manga_title,
|
||||
m.cover_image_path AS manga_cover_image_path,
|
||||
c.id AS chapter_id,
|
||||
c.number,
|
||||
c.title,
|
||||
c.page_count,
|
||||
c.created_at
|
||||
FROM chapters c
|
||||
JOIN mangas m ON m.id = c.manga_id
|
||||
WHERE c.uploaded_by = $1
|
||||
ORDER BY c.created_at DESC, c.id
|
||||
LIMIT $2
|
||||
"#,
|
||||
)
|
||||
.bind(user_id)
|
||||
.bind(limit)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
let mut entries: Vec<UploadEntry> = Vec::with_capacity(mangas.len() + chapters.len());
|
||||
for m in mangas {
|
||||
entries.push(UploadEntry::Manga {
|
||||
created_at: m.created_at,
|
||||
manga: m,
|
||||
});
|
||||
}
|
||||
for c in chapters {
|
||||
let created_at = c.created_at;
|
||||
entries.push(UploadEntry::Chapter {
|
||||
manga_id: c.manga_id,
|
||||
manga_title: c.manga_title,
|
||||
manga_cover_image_path: c.manga_cover_image_path,
|
||||
chapter: Chapter {
|
||||
id: c.chapter_id,
|
||||
manga_id: c.manga_id,
|
||||
number: c.number,
|
||||
title: c.title,
|
||||
page_count: c.page_count,
|
||||
created_at: c.created_at,
|
||||
},
|
||||
created_at,
|
||||
});
|
||||
}
|
||||
// Newest first; trim to limit after the merge.
|
||||
entries.sort_by(|a, b| b.created_at().cmp(&a.created_at()));
|
||||
entries.truncate(limit as usize);
|
||||
|
||||
let (manga_total, chapter_total): (i64, i64) = sqlx::query_as(
|
||||
r#"
|
||||
SELECT
|
||||
(SELECT count(*) FROM mangas WHERE uploaded_by = $1),
|
||||
(SELECT count(*) FROM chapters WHERE uploaded_by = $1)
|
||||
"#,
|
||||
)
|
||||
.bind(user_id)
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
Ok((entries, manga_total + chapter_total))
|
||||
}
|
||||
@@ -16,6 +16,13 @@ impl LocalStorage {
|
||||
}
|
||||
|
||||
fn resolve(&self, key: &str) -> Result<PathBuf, StorageError> {
|
||||
// NUL bytes are rejected by the Linux syscall layer, but the
|
||||
// error surfaces as an opaque IO failure rather than the
|
||||
// explicit `BadKey` the rest of the contract uses. Catch it
|
||||
// here so the error path is consistent.
|
||||
if key.contains('\0') {
|
||||
return Err(StorageError::BadKey);
|
||||
}
|
||||
let key = key.trim_start_matches('/');
|
||||
if key.is_empty() {
|
||||
return Err(StorageError::BadKey);
|
||||
@@ -114,6 +121,9 @@ mod tests {
|
||||
assert!(matches!(s.get(".").await, Err(StorageError::BadKey)));
|
||||
// Empty segment via doubled slash.
|
||||
assert!(matches!(s.get("a//b").await, Err(StorageError::BadKey)));
|
||||
// NUL byte (rejected explicitly so callers see BadKey rather
|
||||
// than an opaque IO error from the kernel).
|
||||
assert!(matches!(s.put("a\0b", b"x").await, Err(StorageError::BadKey)));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -581,3 +581,27 @@ async fn delete_unknown_token_is_404(pool: PgPool) {
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
/// Bot token names are user-supplied free-form strings; a 10 MB name
|
||||
/// was accepted before. Cap at 64 chars to match the other free-form
|
||||
/// identifier caps (tags, collection names). The response uses
|
||||
/// `ValidationFailed` (422 with per-field details) so clients can
|
||||
/// render the same shape they already handle for `attach_tag`.
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn create_token_rejects_name_over_64_chars(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
"/api/v1/auth/tokens",
|
||||
json!({ "name": "x".repeat(65) }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::UNPROCESSABLE_ENTITY);
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["error"]["code"], "validation_failed");
|
||||
assert!(body["error"]["details"]["name"].is_string());
|
||||
}
|
||||
|
||||
@@ -344,7 +344,7 @@ async fn list_me_enriches_chapter_bookmarks_with_chapter_number(pool: PgPool) {
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "Berserk").await;
|
||||
// Seed a chapter directly so we know its number without uploading pages.
|
||||
mangalord::repo::chapter::create(&pool, manga_id, 7, Some("The Brand"))
|
||||
mangalord::repo::chapter::create(&pool, manga_id, 7, Some("The Brand"), None)
|
||||
.await
|
||||
.unwrap();
|
||||
// Look up its id so we can bookmark it.
|
||||
@@ -433,5 +433,201 @@ async fn list_me_returns_paged_envelope(pool: PgPool) {
|
||||
assert!(body["items"].is_array());
|
||||
assert_eq!(body["page"]["limit"], 50);
|
||||
assert_eq!(body["page"]["offset"], 0);
|
||||
assert!(body["page"]["total"].is_null());
|
||||
// `total` is the unfiltered row count, returned so callers (e.g.
|
||||
// the profile overview's bookmark counter) can show a number
|
||||
// without paging through.
|
||||
assert_eq!(body["page"]["total"], 0);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Bookmark create -> SyncChapterContent job enqueue (background task)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
async fn seed_chapter_with_source(
|
||||
pool: &PgPool,
|
||||
manga_id: Uuid,
|
||||
number: i32,
|
||||
source_id: &str,
|
||||
source_chapter_key: &str,
|
||||
source_url: &str,
|
||||
dropped: bool,
|
||||
) -> Uuid {
|
||||
let chapter_id: Uuid =
|
||||
mangalord::repo::chapter::create(pool, manga_id, number, None, None)
|
||||
.await
|
||||
.unwrap()
|
||||
.id;
|
||||
sqlx::query("INSERT INTO sources (id, name, base_url) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING")
|
||||
.bind(source_id)
|
||||
.bind(source_id)
|
||||
.bind("https://example.com")
|
||||
.execute(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
let dropped_at = if dropped { "now()" } else { "NULL" };
|
||||
sqlx::query(&format!(
|
||||
"INSERT INTO chapter_sources (source_id, source_chapter_key, chapter_id, source_url, dropped_at) \
|
||||
VALUES ($1, $2, $3, $4, {dropped_at})"
|
||||
))
|
||||
.bind(source_id)
|
||||
.bind(source_chapter_key)
|
||||
.bind(chapter_id)
|
||||
.bind(source_url)
|
||||
.execute(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
chapter_id
|
||||
}
|
||||
|
||||
/// Poll `crawler_jobs` for the expected pending count, up to ~1.5s, so the
|
||||
/// detached `tokio::spawn` from the bookmark create handler has time to
|
||||
/// land regardless of CI scheduling jitter.
|
||||
async fn wait_for_pending_count(pool: &PgPool, expected: i64) -> i64 {
|
||||
for _ in 0..30 {
|
||||
let count: i64 = sqlx::query_scalar(
|
||||
"SELECT COUNT(*) FROM crawler_jobs \
|
||||
WHERE state = 'pending' \
|
||||
AND payload->>'kind' = 'sync_chapter_content'",
|
||||
)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.unwrap();
|
||||
if count >= expected {
|
||||
return count;
|
||||
}
|
||||
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
|
||||
}
|
||||
sqlx::query_scalar::<_, i64>(
|
||||
"SELECT COUNT(*) FROM crawler_jobs \
|
||||
WHERE state = 'pending' \
|
||||
AND payload->>'kind' = 'sync_chapter_content'",
|
||||
)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn create_enqueues_sync_chapter_content_jobs_for_pending_chapters(pool: PgPool) {
|
||||
let h = common::harness(pool.clone());
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "Berserk").await;
|
||||
|
||||
// Two zero-page chapters with non-dropped sources.
|
||||
let c1 = seed_chapter_with_source(&pool, manga_id, 1, "target", "ch1", "https://example.com/c1", false).await;
|
||||
let c2 = seed_chapter_with_source(&pool, manga_id, 2, "target", "ch2", "https://example.com/c2", false).await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
"/api/v1/bookmarks",
|
||||
json!({ "manga_id": manga_id.to_string() }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::CREATED);
|
||||
|
||||
let count = wait_for_pending_count(&pool, 2).await;
|
||||
assert_eq!(count, 2, "both pending chapters should be enqueued");
|
||||
|
||||
let chapter_ids: Vec<String> = sqlx::query_scalar(
|
||||
"SELECT payload->>'chapter_id' FROM crawler_jobs \
|
||||
WHERE payload->>'kind' = 'sync_chapter_content' \
|
||||
ORDER BY payload->>'chapter_id'",
|
||||
)
|
||||
.fetch_all(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
let mut expected = vec![c1.to_string(), c2.to_string()];
|
||||
expected.sort();
|
||||
assert_eq!(chapter_ids, expected);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn re_bookmark_after_delete_does_not_re_enqueue_pending_jobs(pool: PgPool) {
|
||||
let h = common::harness(pool.clone());
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "Berserk").await;
|
||||
let _ = seed_chapter_with_source(&pool, manga_id, 1, "target", "ch1", "https://example.com/c1", false).await;
|
||||
|
||||
// First bookmark — should enqueue 1.
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
"/api/v1/bookmarks",
|
||||
json!({ "manga_id": manga_id.to_string() }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
let bookmark_id = common::body_json(resp).await["id"].as_str().unwrap().to_string();
|
||||
assert_eq!(wait_for_pending_count(&pool, 1).await, 1);
|
||||
|
||||
// Delete the bookmark, then re-bookmark — the existing pending job
|
||||
// is still there so the dedup index suppresses the second enqueue.
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::delete_with_cookie(
|
||||
&format!("/api/v1/bookmarks/{bookmark_id}"),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::NO_CONTENT);
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
"/api/v1/bookmarks",
|
||||
json!({ "manga_id": manga_id.to_string() }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::CREATED);
|
||||
|
||||
// Give the background task time to attempt re-enqueue (it should be a no-op).
|
||||
tokio::time::sleep(std::time::Duration::from_millis(300)).await;
|
||||
let final_count: i64 = sqlx::query_scalar(
|
||||
"SELECT COUNT(*) FROM crawler_jobs \
|
||||
WHERE state IN ('pending', 'running') \
|
||||
AND payload->>'kind' = 'sync_chapter_content'",
|
||||
)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(final_count, 1, "dedup index keeps the queue at a single in-flight row");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn create_skips_chapters_with_dropped_sources(pool: PgPool) {
|
||||
let h = common::harness(pool.clone());
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "Berserk").await;
|
||||
|
||||
let _alive = seed_chapter_with_source(&pool, manga_id, 1, "target", "ch1", "https://example.com/c1", false).await;
|
||||
let _dropped = seed_chapter_with_source(&pool, manga_id, 2, "target", "ch2", "https://example.com/c2", true).await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
"/api/v1/bookmarks",
|
||||
json!({ "manga_id": manga_id.to_string() }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::CREATED);
|
||||
|
||||
assert_eq!(
|
||||
wait_for_pending_count(&pool, 1).await,
|
||||
1,
|
||||
"only the chapter with a non-dropped source row gets enqueued"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -12,10 +12,18 @@ async fn seed_manga(h: &common::Harness, cookie: &str, title: &str) -> Uuid {
|
||||
common::seed_manga_via_api(&h.app, cookie, title).await
|
||||
}
|
||||
|
||||
async fn seed_chapter(pool: &PgPool, manga_id: Uuid, number: i32, title: Option<&str>) {
|
||||
mangalord::repo::chapter::create(pool, manga_id, number, title)
|
||||
async fn seed_chapter(
|
||||
pool: &PgPool,
|
||||
manga_id: Uuid,
|
||||
number: i32,
|
||||
title: Option<&str>,
|
||||
) -> Uuid {
|
||||
// Historical seed — uploaded_by remains NULL, mirroring the
|
||||
// pre-Phase-5 rows in the production DB.
|
||||
mangalord::repo::chapter::create(pool, manga_id, number, title, None)
|
||||
.await
|
||||
.unwrap();
|
||||
.unwrap()
|
||||
.id
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
@@ -79,16 +87,16 @@ async fn list_chapters_returns_404_for_unknown_manga(pool: PgPool) {
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn get_chapter_by_number(pool: PgPool) {
|
||||
async fn get_chapter_by_id(pool: PgPool) {
|
||||
let h = common::harness(pool.clone());
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = seed_manga(&h, &cookie, "Berserk").await;
|
||||
seed_chapter(&pool, manga_id, 1, Some("The Brand")).await;
|
||||
let chapter_id = seed_chapter(&pool, manga_id, 1, Some("The Brand")).await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get(&format!(
|
||||
"/api/v1/mangas/{manga_id}/chapters/1"
|
||||
"/api/v1/mangas/{manga_id}/chapters/{chapter_id}"
|
||||
)))
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -97,18 +105,20 @@ async fn get_chapter_by_number(pool: PgPool) {
|
||||
assert_eq!(body["number"], 1);
|
||||
assert_eq!(body["title"], "The Brand");
|
||||
assert_eq!(body["page_count"], 0);
|
||||
assert_eq!(body["id"], chapter_id.to_string());
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn get_chapter_unknown_number_is_404(pool: PgPool) {
|
||||
async fn get_chapter_unknown_id_is_404(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = seed_manga(&h, &cookie, "Berserk").await;
|
||||
let unknown_chapter = Uuid::new_v4();
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get(&format!(
|
||||
"/api/v1/mangas/{manga_id}/chapters/99"
|
||||
"/api/v1/mangas/{manga_id}/chapters/{unknown_chapter}"
|
||||
)))
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -120,10 +130,34 @@ async fn get_chapter_unknown_number_is_404(pool: PgPool) {
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn get_chapter_unknown_manga_is_404(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let unknown = Uuid::nil();
|
||||
let unknown_manga = Uuid::nil();
|
||||
let unknown_chapter = Uuid::new_v4();
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get(&format!("/api/v1/mangas/{unknown}/chapters/1")))
|
||||
.oneshot(common::get(&format!(
|
||||
"/api/v1/mangas/{unknown_manga}/chapters/{unknown_chapter}"
|
||||
)))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
/// Cross-manga isolation: a chapter id belonging to manga A must not
|
||||
/// resolve when accessed via manga B's URL. The (manga_id, id) scoping
|
||||
/// in `find_by_id_in_manga` enforces this.
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn get_chapter_from_wrong_manga_is_404(pool: PgPool) {
|
||||
let h = common::harness(pool.clone());
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_a = seed_manga(&h, &cookie, "Berserk").await;
|
||||
let manga_b = seed_manga(&h, &cookie, "Vagabond").await;
|
||||
let chapter_id = seed_chapter(&pool, manga_a, 1, Some("Episode 1")).await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get(&format!(
|
||||
"/api/v1/mangas/{manga_b}/chapters/{chapter_id}"
|
||||
)))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
||||
@@ -134,12 +168,12 @@ async fn list_pages_empty_for_chapter_without_upload(pool: PgPool) {
|
||||
let h = common::harness(pool.clone());
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = seed_manga(&h, &cookie, "Berserk").await;
|
||||
seed_chapter(&pool, manga_id, 1, None).await;
|
||||
let chapter_id = seed_chapter(&pool, manga_id, 1, None).await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get(&format!(
|
||||
"/api/v1/mangas/{manga_id}/chapters/1/pages"
|
||||
"/api/v1/mangas/{manga_id}/chapters/{chapter_id}/pages"
|
||||
)))
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -153,11 +187,12 @@ async fn list_pages_returns_404_for_unknown_chapter(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = seed_manga(&h, &cookie, "Berserk").await;
|
||||
let unknown_chapter = Uuid::new_v4();
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get(&format!(
|
||||
"/api/v1/mangas/{manga_id}/chapters/99/pages"
|
||||
"/api/v1/mangas/{manga_id}/chapters/{unknown_chapter}/pages"
|
||||
)))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
605
backend/tests/api_collections.rs
Normal file
605
backend/tests/api_collections.rs
Normal file
@@ -0,0 +1,605 @@
|
||||
mod common;
|
||||
|
||||
use axum::http::StatusCode;
|
||||
use serde_json::{json, Value};
|
||||
use sqlx::PgPool;
|
||||
use tower::ServiceExt;
|
||||
use uuid::Uuid;
|
||||
|
||||
async fn create_collection(
|
||||
app: &axum::Router,
|
||||
cookie: &str,
|
||||
name: &str,
|
||||
) -> Value {
|
||||
let resp = app
|
||||
.clone()
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
"/api/v1/collections",
|
||||
json!({ "name": name }),
|
||||
cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::CREATED, "create_collection failed");
|
||||
common::body_json(resp).await
|
||||
}
|
||||
|
||||
fn id_of(v: &Value) -> String {
|
||||
v["id"].as_str().unwrap().to_string()
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn create_then_list_returns_only_own(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie_a) = common::register_user(&h.app).await;
|
||||
let (_, cookie_b) = common::register_user(&h.app).await;
|
||||
|
||||
let _favs = create_collection(&h.app, &cookie_a, "Favorites").await;
|
||||
let _read = create_collection(&h.app, &cookie_a, "Reading List").await;
|
||||
|
||||
// User B sees an empty list.
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::get_with_cookie("/api/v1/me/collections", &cookie_b))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["items"], json!([]));
|
||||
assert_eq!(body["page"]["total"], 0);
|
||||
|
||||
// User A sees both.
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get_with_cookie("/api/v1/me/collections", &cookie_a))
|
||||
.await
|
||||
.unwrap();
|
||||
let body = common::body_json(resp).await;
|
||||
let names: Vec<&str> = body["items"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|c| c["name"].as_str().unwrap())
|
||||
.collect();
|
||||
// Newest-updated first; both rows have the same updated_at on
|
||||
// create so we just sanity-check membership.
|
||||
assert_eq!(names.len(), 2);
|
||||
assert!(names.contains(&"Favorites"));
|
||||
assert!(names.contains(&"Reading List"));
|
||||
// Empty collections render with manga_count 0 and an empty
|
||||
// sample_covers array, not `null`.
|
||||
for item in body["items"].as_array().unwrap() {
|
||||
assert_eq!(item["manga_count"], 0);
|
||||
assert_eq!(item["sample_covers"], json!([]));
|
||||
}
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn duplicate_name_for_same_user_is_409(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let _ = create_collection(&h.app, &cookie, "Favorites").await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
"/api/v1/collections",
|
||||
json!({ "name": "favorites" }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::CONFLICT);
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["error"]["code"], "conflict");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn two_users_can_share_a_collection_name(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, a) = common::register_user(&h.app).await;
|
||||
let (_, b) = common::register_user(&h.app).await;
|
||||
let _ = create_collection(&h.app, &a, "Favorites").await;
|
||||
// No conflict — uniqueness is per-(user_id, lower(name)).
|
||||
let _ = create_collection(&h.app, &b, "Favorites").await;
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn create_requires_authentication(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::post_json(
|
||||
"/api/v1/collections",
|
||||
json!({ "name": "Anon" }),
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn create_rejects_blank_name_with_422(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
"/api/v1/collections",
|
||||
json!({ "name": " " }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::UNPROCESSABLE_ENTITY);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn get_one_returns_404_for_non_owner_no_existence_leak(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, a) = common::register_user(&h.app).await;
|
||||
let (_, b) = common::register_user(&h.app).await;
|
||||
let coll = create_collection(&h.app, &a, "Favorites").await;
|
||||
let id = id_of(&coll);
|
||||
|
||||
// Owner-mismatch is collapsed to 404 so the API doesn't disclose
|
||||
// collection existence to non-owners. Otherwise an attacker could
|
||||
// distinguish "exists, not yours" from "doesn't exist" by status.
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get_with_cookie(
|
||||
&format!("/api/v1/collections/{id}"),
|
||||
&b,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn add_manga_is_idempotent_and_picks_201_then_200(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "Berserk").await;
|
||||
let coll = create_collection(&h.app, &cookie, "Favorites").await;
|
||||
let coll_id = id_of(&coll);
|
||||
|
||||
let req = || {
|
||||
common::post_json_with_cookie(
|
||||
&format!("/api/v1/collections/{coll_id}/mangas"),
|
||||
json!({ "manga_id": manga_id.to_string() }),
|
||||
&cookie,
|
||||
)
|
||||
};
|
||||
|
||||
let first = h.app.clone().oneshot(req()).await.unwrap();
|
||||
assert_eq!(first.status(), StatusCode::CREATED);
|
||||
let second = h.app.oneshot(req()).await.unwrap();
|
||||
assert_eq!(second.status(), StatusCode::OK);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn add_manga_returns_404_when_manga_missing(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let coll = create_collection(&h.app, &cookie, "Favorites").await;
|
||||
let coll_id = id_of(&coll);
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
&format!("/api/v1/collections/{coll_id}/mangas"),
|
||||
json!({ "manga_id": Uuid::new_v4().to_string() }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn add_manga_to_someone_elses_collection_is_404(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, a) = common::register_user(&h.app).await;
|
||||
let (_, b) = common::register_user(&h.app).await;
|
||||
let coll_a = create_collection(&h.app, &a, "Mine").await;
|
||||
let coll_a_id = id_of(&coll_a);
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &b, "Anything").await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
&format!("/api/v1/collections/{coll_a_id}/mangas"),
|
||||
json!({ "manga_id": manga_id.to_string() }),
|
||||
&b,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
// 404 not 403 — same non-existence-leak rationale as `get_one`.
|
||||
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn patch_on_other_users_collection_is_404(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, a) = common::register_user(&h.app).await;
|
||||
let (_, b) = common::register_user(&h.app).await;
|
||||
let coll = create_collection(&h.app, &a, "Mine").await;
|
||||
let id = id_of(&coll);
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::patch_json_with_cookie(
|
||||
&format!("/api/v1/collections/{id}"),
|
||||
json!({ "name": "Hijacked" }),
|
||||
&b,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn patch_description_null_clears_existing_value(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let coll = create_collection(&h.app, &cookie, "C").await;
|
||||
let id = id_of(&coll);
|
||||
// Seed a description first via PATCH.
|
||||
let _ = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::patch_json_with_cookie(
|
||||
&format!("/api/v1/collections/{id}"),
|
||||
json!({ "description": "starting desc" }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
// Now PATCH with description=null and expect the column cleared.
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::patch_json_with_cookie(
|
||||
&format!("/api/v1/collections/{id}"),
|
||||
json!({ "description": null }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
let body = common::body_json(resp).await;
|
||||
assert!(body["description"].is_null(), "expected description cleared");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn patch_description_empty_string_sets_empty_not_null(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let coll = create_collection(&h.app, &cookie, "C").await;
|
||||
let id = id_of(&coll);
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::patch_json_with_cookie(
|
||||
&format!("/api/v1/collections/{id}"),
|
||||
json!({ "description": "" }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
let body = common::body_json(resp).await;
|
||||
// Empty string is a valid distinct value; only `null` clears.
|
||||
assert_eq!(body["description"], "");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn patch_description_omitted_leaves_value_intact(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let coll = create_collection(&h.app, &cookie, "C").await;
|
||||
let id = id_of(&coll);
|
||||
let _ = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::patch_json_with_cookie(
|
||||
&format!("/api/v1/collections/{id}"),
|
||||
json!({ "description": "Keep me" }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
// PATCH that doesn't mention description must not touch it.
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::patch_json_with_cookie(
|
||||
&format!("/api/v1/collections/{id}"),
|
||||
json!({ "name": "Renamed" }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["name"], "Renamed");
|
||||
assert_eq!(body["description"], "Keep me");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn patch_with_empty_body_leaves_row_unchanged(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let coll = create_collection(&h.app, &cookie, "Stable").await;
|
||||
let id = id_of(&coll);
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::patch_json_with_cookie(
|
||||
&format!("/api/v1/collections/{id}"),
|
||||
json!({}),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["name"], "Stable");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn my_collections_for_unknown_manga_returns_empty_list(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get_with_cookie(
|
||||
&format!("/api/v1/mangas/{}/my-collections", Uuid::new_v4()),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
// Non-existent manga is treated the same as a manga the user
|
||||
// hasn't collected — empty list. The handler comment documents
|
||||
// this; the test pins it.
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["collection_ids"], json!([]));
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn list_mangas_returns_collection_contents(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let m1 = common::seed_manga_via_api(&h.app, &cookie, "First").await;
|
||||
let m2 = common::seed_manga_via_api(&h.app, &cookie, "Second").await;
|
||||
let _untagged = common::seed_manga_via_api(&h.app, &cookie, "NotInIt").await;
|
||||
let coll = create_collection(&h.app, &cookie, "Mix").await;
|
||||
let coll_id = id_of(&coll);
|
||||
|
||||
for m in [m1, m2] {
|
||||
let r = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
&format!("/api/v1/collections/{coll_id}/mangas"),
|
||||
json!({ "manga_id": m.to_string() }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(r.status(), StatusCode::CREATED);
|
||||
}
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get_with_cookie(
|
||||
&format!("/api/v1/collections/{coll_id}/mangas"),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
let body = common::body_json(resp).await;
|
||||
let titles: Vec<&str> = body["items"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|m| m["title"].as_str().unwrap())
|
||||
.collect();
|
||||
// Newest-added first.
|
||||
assert_eq!(titles, vec!["Second", "First"]);
|
||||
assert_eq!(body["page"]["total"], 2);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn remove_manga_is_idempotent(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "M").await;
|
||||
let coll = create_collection(&h.app, &cookie, "C").await;
|
||||
let coll_id = id_of(&coll);
|
||||
|
||||
let _ = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
&format!("/api/v1/collections/{coll_id}/mangas"),
|
||||
json!({ "manga_id": manga_id.to_string() }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let first = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::delete_with_cookie(
|
||||
&format!("/api/v1/collections/{coll_id}/mangas/{manga_id}"),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(first.status(), StatusCode::NO_CONTENT);
|
||||
// Removing again is still a 204 — DELETE is idempotent.
|
||||
let second = h
|
||||
.app
|
||||
.oneshot(common::delete_with_cookie(
|
||||
&format!("/api/v1/collections/{coll_id}/mangas/{manga_id}"),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(second.status(), StatusCode::NO_CONTENT);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn my_collections_for_manga_lists_only_owned_containing(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, a) = common::register_user(&h.app).await;
|
||||
let (_, b) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &a, "X").await;
|
||||
|
||||
let a_coll = create_collection(&h.app, &a, "A's").await;
|
||||
let b_coll = create_collection(&h.app, &b, "B's").await;
|
||||
let a_coll_id = id_of(&a_coll);
|
||||
let b_coll_id = id_of(&b_coll);
|
||||
|
||||
for (coll, cookie) in [(&a_coll_id, &a), (&b_coll_id, &b)] {
|
||||
let _ = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
&format!("/api/v1/collections/{coll}/mangas"),
|
||||
json!({ "manga_id": manga_id.to_string() }),
|
||||
cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get_with_cookie(
|
||||
&format!("/api/v1/mangas/{manga_id}/my-collections"),
|
||||
&a,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
let body = common::body_json(resp).await;
|
||||
let ids: Vec<&str> = body["collection_ids"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|v| v.as_str().unwrap())
|
||||
.collect();
|
||||
assert_eq!(ids, vec![a_coll_id.as_str()]);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn patch_collection_updates_name_and_description(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let coll = create_collection(&h.app, &cookie, "Old name").await;
|
||||
let id = id_of(&coll);
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::patch_json_with_cookie(
|
||||
&format!("/api/v1/collections/{id}"),
|
||||
json!({ "name": "New name", "description": "Some notes" }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["name"], "New name");
|
||||
assert_eq!(body["description"], "Some notes");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn delete_collection_cascades_attachments(pool: PgPool) {
|
||||
let h = common::harness(pool.clone());
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "M").await;
|
||||
let coll = create_collection(&h.app, &cookie, "C").await;
|
||||
let coll_id = id_of(&coll);
|
||||
|
||||
let _ = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
&format!("/api/v1/collections/{coll_id}/mangas"),
|
||||
json!({ "manga_id": manga_id.to_string() }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::delete_with_cookie(
|
||||
&format!("/api/v1/collections/{coll_id}"),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::NO_CONTENT);
|
||||
|
||||
let (count,): (i64,) =
|
||||
sqlx::query_as("SELECT count(*) FROM collection_mangas WHERE collection_id = $1")
|
||||
.bind(Uuid::parse_str(&coll_id).unwrap())
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(count, 0, "collection_mangas should cascade-delete with the collection");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn list_summary_carries_sample_covers_when_mangas_attached(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
|
||||
// Seed a manga with a cover via the upload endpoint so the
|
||||
// cover_image_path column gets populated.
|
||||
let make_metadata = |title: &str| {
|
||||
common::MultipartBuilder::new()
|
||||
.add_json("metadata", json!({ "title": title }))
|
||||
.add_file("cover", "cover.png", "image/png", &common::fake_png_bytes())
|
||||
};
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::post_multipart_with_cookie(
|
||||
"/api/v1/mangas",
|
||||
make_metadata("With cover"),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
let body = common::body_json(resp).await;
|
||||
let manga_id = body["id"].as_str().unwrap().to_string();
|
||||
|
||||
let coll = create_collection(&h.app, &cookie, "Visual").await;
|
||||
let coll_id = id_of(&coll);
|
||||
let r = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
&format!("/api/v1/collections/{coll_id}/mangas"),
|
||||
json!({ "manga_id": manga_id }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(r.status(), StatusCode::CREATED);
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get_with_cookie("/api/v1/me/collections", &cookie))
|
||||
.await
|
||||
.unwrap();
|
||||
let body = common::body_json(resp).await;
|
||||
let item = &body["items"][0];
|
||||
assert_eq!(item["manga_count"], 1);
|
||||
let covers = item["sample_covers"].as_array().unwrap();
|
||||
assert_eq!(covers.len(), 1);
|
||||
assert!(covers[0]
|
||||
.as_str()
|
||||
.unwrap()
|
||||
.starts_with(&format!("mangas/{manga_id}/cover")));
|
||||
}
|
||||
405
backend/tests/api_history.rs
Normal file
405
backend/tests/api_history.rs
Normal file
@@ -0,0 +1,405 @@
|
||||
mod common;
|
||||
|
||||
use axum::http::StatusCode;
|
||||
use serde_json::{json, Value};
|
||||
use sqlx::PgPool;
|
||||
use tower::ServiceExt;
|
||||
use uuid::Uuid;
|
||||
|
||||
use common::MultipartBuilder;
|
||||
|
||||
async fn seed_chapter(app: &axum::Router, cookie: &str, manga_id: Uuid, number: i32) -> String {
|
||||
let resp = app
|
||||
.clone()
|
||||
.oneshot(common::post_multipart_with_cookie(
|
||||
&format!("/api/v1/mangas/{manga_id}/chapters"),
|
||||
MultipartBuilder::new()
|
||||
.add_json("metadata", json!({ "number": number }))
|
||||
.add_file("page", "1.png", "image/png", &common::fake_png_bytes()),
|
||||
cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::CREATED);
|
||||
let body = common::body_json(resp).await;
|
||||
body["id"].as_str().unwrap().to_string()
|
||||
}
|
||||
|
||||
async fn upsert_progress(
|
||||
app: &axum::Router,
|
||||
cookie: &str,
|
||||
body: Value,
|
||||
) -> Value {
|
||||
let resp = app
|
||||
.clone()
|
||||
.oneshot(common::put_json_with_cookie(
|
||||
"/api/v1/me/read-progress",
|
||||
body,
|
||||
cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK, "upsert failed: {:?}", resp.status());
|
||||
common::body_json(resp).await
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn upsert_creates_then_overwrites(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "Berserk").await;
|
||||
let chapter_id = seed_chapter(&h.app, &cookie, manga_id, 1).await;
|
||||
|
||||
let first = upsert_progress(
|
||||
&h.app,
|
||||
&cookie,
|
||||
json!({ "manga_id": manga_id.to_string(), "chapter_id": chapter_id, "page": 5 }),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(first["manga_id"], manga_id.to_string());
|
||||
assert_eq!(first["page"], 5);
|
||||
|
||||
// A second upsert overwrites the page even when it moves backwards
|
||||
// — re-reading scenarios just take the latest write.
|
||||
let second = upsert_progress(
|
||||
&h.app,
|
||||
&cookie,
|
||||
json!({ "manga_id": manga_id.to_string(), "chapter_id": chapter_id, "page": 1 }),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(second["page"], 1);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn upsert_with_unknown_manga_is_404(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::put_json_with_cookie(
|
||||
"/api/v1/me/read-progress",
|
||||
json!({ "manga_id": Uuid::new_v4().to_string(), "page": 1 }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
// The FK violation in repo::upsert is mapped to NotFound.
|
||||
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn upsert_with_page_zero_is_422(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "Berserk").await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::put_json_with_cookie(
|
||||
"/api/v1/me/read-progress",
|
||||
json!({ "manga_id": manga_id.to_string(), "page": 0 }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::UNPROCESSABLE_ENTITY);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn list_orders_most_recent_first(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let m1 = common::seed_manga_via_api(&h.app, &cookie, "First").await;
|
||||
let m2 = common::seed_manga_via_api(&h.app, &cookie, "Second").await;
|
||||
|
||||
let _ = upsert_progress(
|
||||
&h.app,
|
||||
&cookie,
|
||||
json!({ "manga_id": m1.to_string(), "page": 1 }),
|
||||
)
|
||||
.await;
|
||||
let _ = upsert_progress(
|
||||
&h.app,
|
||||
&cookie,
|
||||
json!({ "manga_id": m2.to_string(), "page": 1 }),
|
||||
)
|
||||
.await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get_with_cookie("/api/v1/me/read-progress", &cookie))
|
||||
.await
|
||||
.unwrap();
|
||||
let body = common::body_json(resp).await;
|
||||
let titles: Vec<&str> = body["items"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|r| r["manga_title"].as_str().unwrap())
|
||||
.collect();
|
||||
// Second was upserted last → it surfaces first.
|
||||
assert_eq!(titles, vec!["Second", "First"]);
|
||||
assert_eq!(body["page"]["total"], 2);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn list_is_per_user_only(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, a) = common::register_user(&h.app).await;
|
||||
let (_, b) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &a, "Berserk").await;
|
||||
let _ = upsert_progress(
|
||||
&h.app,
|
||||
&a,
|
||||
json!({ "manga_id": manga_id.to_string(), "page": 7 }),
|
||||
)
|
||||
.await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get_with_cookie("/api/v1/me/read-progress", &b))
|
||||
.await
|
||||
.unwrap();
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["items"], json!([]));
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn get_single_manga_returns_404_when_unread(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "Berserk").await;
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get_with_cookie(
|
||||
&format!("/api/v1/me/read-progress/{manga_id}"),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn get_single_manga_returns_progress_after_upsert(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "Berserk").await;
|
||||
let chapter_id = seed_chapter(&h.app, &cookie, manga_id, 7).await;
|
||||
let _ = upsert_progress(
|
||||
&h.app,
|
||||
&cookie,
|
||||
json!({
|
||||
"manga_id": manga_id.to_string(),
|
||||
"chapter_id": chapter_id,
|
||||
"page": 12
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get_with_cookie(
|
||||
&format!("/api/v1/me/read-progress/{manga_id}"),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["page"], 12);
|
||||
// chapter_number is resolved in the same round-trip so the
|
||||
// Continue CTA can render without listing chapters.
|
||||
assert_eq!(body["chapter_number"], 7);
|
||||
assert_eq!(body["chapter_id"], chapter_id);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn upsert_rejects_chapter_from_a_different_manga(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_a = common::seed_manga_via_api(&h.app, &cookie, "A").await;
|
||||
let manga_b = common::seed_manga_via_api(&h.app, &cookie, "B").await;
|
||||
let chapter_of_b = seed_chapter(&h.app, &cookie, manga_b, 1).await;
|
||||
|
||||
// Pair manga A with a chapter from manga B — must be rejected.
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::put_json_with_cookie(
|
||||
"/api/v1/me/read-progress",
|
||||
json!({
|
||||
"manga_id": manga_a.to_string(),
|
||||
"chapter_id": chapter_of_b,
|
||||
"page": 1
|
||||
}),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::UNPROCESSABLE_ENTITY);
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["error"]["code"], "validation_failed");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn delete_progress_on_never_read_manga_is_204(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "Untouched").await;
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::delete_with_cookie(
|
||||
&format!("/api/v1/me/read-progress/{manga_id}"),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
// DELETE is idempotent — clearing nothing is still success.
|
||||
assert_eq!(resp.status(), StatusCode::NO_CONTENT);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn delete_progress_is_idempotent(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "Berserk").await;
|
||||
let _ = upsert_progress(
|
||||
&h.app,
|
||||
&cookie,
|
||||
json!({ "manga_id": manga_id.to_string(), "page": 1 }),
|
||||
)
|
||||
.await;
|
||||
for _ in 0..2 {
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::delete_with_cookie(
|
||||
&format!("/api/v1/me/read-progress/{manga_id}"),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::NO_CONTENT);
|
||||
}
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn deleted_chapter_leaves_progress_row_with_null_chapter(pool: PgPool) {
|
||||
let h = common::harness(pool.clone());
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "Berserk").await;
|
||||
let chapter_id_str = seed_chapter(&h.app, &cookie, manga_id, 1).await;
|
||||
let chapter_id = Uuid::parse_str(&chapter_id_str).unwrap();
|
||||
let _ = upsert_progress(
|
||||
&h.app,
|
||||
&cookie,
|
||||
json!({ "manga_id": manga_id.to_string(), "chapter_id": chapter_id_str, "page": 3 }),
|
||||
)
|
||||
.await;
|
||||
// Delete the chapter directly — the FK ON DELETE SET NULL keeps
|
||||
// the progress row but clears chapter_id.
|
||||
sqlx::query("DELETE FROM chapters WHERE id = $1")
|
||||
.bind(chapter_id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get_with_cookie("/api/v1/me/read-progress", &cookie))
|
||||
.await
|
||||
.unwrap();
|
||||
let body = common::body_json(resp).await;
|
||||
let item = &body["items"][0];
|
||||
assert!(item["chapter_id"].is_null(), "chapter_id should be null after cascade");
|
||||
assert!(item["chapter_number"].is_null());
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn uploads_lists_manga_and_chapter_uploads_interleaved(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
|
||||
// Two manga uploads with covers, then a chapter on one of them.
|
||||
let m1 = common::seed_manga_via_api(&h.app, &cookie, "Alpha").await;
|
||||
let _m2 = common::seed_manga_via_api(&h.app, &cookie, "Beta").await;
|
||||
let _ = seed_chapter(&h.app, &cookie, m1, 1).await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get_with_cookie("/api/v1/me/uploads", &cookie))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = common::body_json(resp).await;
|
||||
let items = body["items"].as_array().unwrap();
|
||||
assert_eq!(items.len(), 3);
|
||||
// Most recent first; the chapter upload happened after both mangas.
|
||||
assert_eq!(items[0]["kind"], "chapter");
|
||||
assert_eq!(items[1]["kind"], "manga");
|
||||
assert_eq!(items[2]["kind"], "manga");
|
||||
assert_eq!(body["page"]["total"], 3);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn uploads_is_per_user_only(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, a) = common::register_user(&h.app).await;
|
||||
let (_, b) = common::register_user(&h.app).await;
|
||||
let _ = common::seed_manga_via_api(&h.app, &a, "A's manga").await;
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get_with_cookie("/api/v1/me/uploads", &b))
|
||||
.await
|
||||
.unwrap();
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["items"], json!([]));
|
||||
assert_eq!(body["page"]["total"], 0);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn manga_create_stamps_uploaded_by_with_current_user(pool: PgPool) {
|
||||
let h = common::harness(pool.clone());
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "Stamped").await;
|
||||
|
||||
let (uploaded_by,): (Option<Uuid>,) =
|
||||
sqlx::query_as("SELECT uploaded_by FROM mangas WHERE id = $1")
|
||||
.bind(manga_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(uploaded_by.is_some(), "manga.uploaded_by should be set");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn chapter_create_stamps_uploaded_by_with_current_user(pool: PgPool) {
|
||||
let h = common::harness(pool.clone());
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "Berserk").await;
|
||||
let chapter_id_str = seed_chapter(&h.app, &cookie, manga_id, 1).await;
|
||||
|
||||
let (uploaded_by,): (Option<Uuid>,) =
|
||||
sqlx::query_as("SELECT uploaded_by FROM chapters WHERE id = $1")
|
||||
.bind(Uuid::parse_str(&chapter_id_str).unwrap())
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(uploaded_by.is_some(), "chapter.uploaded_by should be set");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn read_progress_requires_authentication(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
for path in [
|
||||
"/api/v1/me/read-progress",
|
||||
"/api/v1/me/uploads",
|
||||
] {
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::get(path))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::UNAUTHORIZED, "{path} should require auth");
|
||||
}
|
||||
}
|
||||
412
backend/tests/api_mangas_cover.rs
Normal file
412
backend/tests/api_mangas_cover.rs
Normal file
@@ -0,0 +1,412 @@
|
||||
mod common;
|
||||
|
||||
use axum::http::StatusCode;
|
||||
use serde_json::{json, Value};
|
||||
use sqlx::PgPool;
|
||||
use tower::ServiceExt;
|
||||
use uuid::Uuid;
|
||||
|
||||
use common::{
|
||||
body_json, delete_with_cookie, fake_jpeg_bytes, fake_png_bytes, get, harness,
|
||||
post_multipart_with_cookie, put_multipart, put_multipart_with_cookie, register_user,
|
||||
MultipartBuilder,
|
||||
};
|
||||
|
||||
async fn create_manga_with_cover(
|
||||
app: &axum::Router,
|
||||
cookie: &str,
|
||||
title: &str,
|
||||
cover: Option<(&str, &[u8])>,
|
||||
) -> Value {
|
||||
let mut form =
|
||||
MultipartBuilder::new().add_json("metadata", json!({ "title": title }));
|
||||
if let Some((ct, bytes)) = cover {
|
||||
form = form.add_file("cover", "cover.bin", ct, bytes);
|
||||
}
|
||||
let resp = app
|
||||
.clone()
|
||||
.oneshot(post_multipart_with_cookie("/api/v1/mangas", form, cookie))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
resp.status(),
|
||||
StatusCode::CREATED,
|
||||
"seed create_manga failed: {:?}",
|
||||
resp.status()
|
||||
);
|
||||
body_json(resp).await
|
||||
}
|
||||
|
||||
fn id_of(body: &Value) -> Uuid {
|
||||
Uuid::parse_str(body["id"].as_str().unwrap()).unwrap()
|
||||
}
|
||||
|
||||
fn cover_form(bytes: &[u8]) -> MultipartBuilder {
|
||||
MultipartBuilder::new().add_file("cover", "cover.bin", "application/octet-stream", bytes)
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn put_cover_sets_path_when_none_existed(pool: PgPool) {
|
||||
let h = harness(pool);
|
||||
let (_, cookie) = register_user(&h.app).await;
|
||||
let manga = create_manga_with_cover(&h.app, &cookie, "Cover Me", None).await;
|
||||
let id = id_of(&manga);
|
||||
assert!(manga["cover_image_path"].is_null());
|
||||
|
||||
let bytes = fake_png_bytes();
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(put_multipart_with_cookie(
|
||||
&format!("/api/v1/mangas/{id}/cover"),
|
||||
cover_form(&bytes),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = body_json(resp).await;
|
||||
let expected_key = format!("mangas/{id}/cover.png");
|
||||
assert_eq!(body["cover_image_path"], expected_key);
|
||||
assert_eq!(body["title"], "Cover Me");
|
||||
|
||||
let file_resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(get(&format!("/api/v1/files/{expected_key}")))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(file_resp.status(), StatusCode::OK);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn put_cover_replaces_existing_same_extension(pool: PgPool) {
|
||||
let h = harness(pool);
|
||||
let (_, cookie) = register_user(&h.app).await;
|
||||
let original = fake_png_bytes();
|
||||
let manga = create_manga_with_cover(
|
||||
&h.app,
|
||||
&cookie,
|
||||
"Replace Me",
|
||||
Some(("image/png", &original)),
|
||||
)
|
||||
.await;
|
||||
let id = id_of(&manga);
|
||||
let original_key = format!("mangas/{id}/cover.png");
|
||||
assert_eq!(manga["cover_image_path"], original_key);
|
||||
|
||||
let mut replacement = fake_png_bytes();
|
||||
replacement.extend_from_slice(b"-replacement-marker");
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(put_multipart_with_cookie(
|
||||
&format!("/api/v1/mangas/{id}/cover"),
|
||||
cover_form(&replacement),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = body_json(resp).await;
|
||||
assert_eq!(body["cover_image_path"], original_key);
|
||||
|
||||
let file_resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(get(&format!("/api/v1/files/{original_key}")))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(file_resp.status(), StatusCode::OK);
|
||||
let body_bytes = http_body_util::BodyExt::collect(file_resp.into_body())
|
||||
.await
|
||||
.unwrap()
|
||||
.to_bytes();
|
||||
assert_eq!(body_bytes.as_ref(), replacement.as_slice());
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn put_cover_replaces_existing_different_extension_and_deletes_old_blob(pool: PgPool) {
|
||||
let h = harness(pool);
|
||||
let (_, cookie) = register_user(&h.app).await;
|
||||
let png = fake_png_bytes();
|
||||
let manga = create_manga_with_cover(
|
||||
&h.app,
|
||||
&cookie,
|
||||
"Switch Ext",
|
||||
Some(("image/png", &png)),
|
||||
)
|
||||
.await;
|
||||
let id = id_of(&manga);
|
||||
let old_key = format!("mangas/{id}/cover.png");
|
||||
assert_eq!(manga["cover_image_path"], old_key);
|
||||
|
||||
let jpeg = fake_jpeg_bytes();
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(put_multipart_with_cookie(
|
||||
&format!("/api/v1/mangas/{id}/cover"),
|
||||
cover_form(&jpeg),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = body_json(resp).await;
|
||||
let new_key = format!("mangas/{id}/cover.jpg");
|
||||
assert_eq!(body["cover_image_path"], new_key);
|
||||
|
||||
let new_file = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(get(&format!("/api/v1/files/{new_key}")))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(new_file.status(), StatusCode::OK);
|
||||
|
||||
let old_file = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(get(&format!("/api/v1/files/{old_key}")))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(old_file.status(), StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn put_cover_rejects_unauthenticated(pool: PgPool) {
|
||||
let h = harness(pool);
|
||||
let (_, cookie) = register_user(&h.app).await;
|
||||
let manga = create_manga_with_cover(&h.app, &cookie, "Public Read", None).await;
|
||||
let id = id_of(&manga);
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(put_multipart(
|
||||
&format!("/api/v1/mangas/{id}/cover"),
|
||||
cover_form(&fake_png_bytes()),
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn put_cover_404_on_unknown_id(pool: PgPool) {
|
||||
let h = harness(pool);
|
||||
let (_, cookie) = register_user(&h.app).await;
|
||||
let id = Uuid::new_v4();
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(put_multipart_with_cookie(
|
||||
&format!("/api/v1/mangas/{id}/cover"),
|
||||
cover_form(&fake_png_bytes()),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn put_cover_rejects_non_image_with_unsupported_media_type(pool: PgPool) {
|
||||
let h = harness(pool);
|
||||
let (_, cookie) = register_user(&h.app).await;
|
||||
let manga = create_manga_with_cover(&h.app, &cookie, "Not Image", None).await;
|
||||
let id = id_of(&manga);
|
||||
|
||||
let pdf = b"%PDF-1.4\n%\xc4\xe5".to_vec();
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(put_multipart_with_cookie(
|
||||
&format!("/api/v1/mangas/{id}/cover"),
|
||||
cover_form(&pdf),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::UNSUPPORTED_MEDIA_TYPE);
|
||||
let body = body_json(resp).await;
|
||||
assert_eq!(body["error"]["code"], "unsupported_media_type");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn put_cover_rejects_oversized(pool: PgPool) {
|
||||
let h = harness(pool);
|
||||
let (_, cookie) = register_user(&h.app).await;
|
||||
let manga = create_manga_with_cover(&h.app, &cookie, "Too Big", None).await;
|
||||
let id = id_of(&manga);
|
||||
|
||||
// Harness max_file_bytes is 256 KiB; 300 KiB trips the cap.
|
||||
let mut bytes = fake_png_bytes();
|
||||
bytes.resize(300 * 1024, 0);
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(put_multipart_with_cookie(
|
||||
&format!("/api/v1/mangas/{id}/cover"),
|
||||
cover_form(&bytes),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::PAYLOAD_TOO_LARGE);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn put_cover_rejects_missing_cover_part(pool: PgPool) {
|
||||
let h = harness(pool);
|
||||
let (_, cookie) = register_user(&h.app).await;
|
||||
let manga = create_manga_with_cover(&h.app, &cookie, "Empty Form", None).await;
|
||||
let id = id_of(&manga);
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(put_multipart_with_cookie(
|
||||
&format!("/api/v1/mangas/{id}/cover"),
|
||||
MultipartBuilder::new(),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::UNPROCESSABLE_ENTITY);
|
||||
let body = body_json(resp).await;
|
||||
assert_eq!(body["error"]["code"], "validation_failed");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn put_cover_preserves_other_metadata(pool: PgPool) {
|
||||
let h = harness(pool);
|
||||
let (_, cookie) = register_user(&h.app).await;
|
||||
let manga = create_manga_with_cover(
|
||||
&h.app,
|
||||
&cookie,
|
||||
"Keep My Fields",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
let id = id_of(&manga);
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(put_multipart_with_cookie(
|
||||
&format!("/api/v1/mangas/{id}/cover"),
|
||||
cover_form(&fake_png_bytes()),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = body_json(resp).await;
|
||||
assert_eq!(body["title"], "Keep My Fields");
|
||||
assert_eq!(body["status"], "ongoing");
|
||||
assert_eq!(body["authors"], json!([]));
|
||||
assert_eq!(body["genres"], json!([]));
|
||||
assert_eq!(body["tags"], json!([]));
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn delete_cover_clears_path_and_removes_blob(pool: PgPool) {
|
||||
let h = harness(pool);
|
||||
let (_, cookie) = register_user(&h.app).await;
|
||||
let png = fake_png_bytes();
|
||||
let manga = create_manga_with_cover(
|
||||
&h.app,
|
||||
&cookie,
|
||||
"Bye Cover",
|
||||
Some(("image/png", &png)),
|
||||
)
|
||||
.await;
|
||||
let id = id_of(&manga);
|
||||
let key = format!("mangas/{id}/cover.png");
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(delete_with_cookie(
|
||||
&format!("/api/v1/mangas/{id}/cover"),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = body_json(resp).await;
|
||||
assert!(body["cover_image_path"].is_null());
|
||||
assert_eq!(body["title"], "Bye Cover");
|
||||
|
||||
let file_resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(get(&format!("/api/v1/files/{key}")))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(file_resp.status(), StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn delete_cover_is_idempotent_when_no_cover_present(pool: PgPool) {
|
||||
let h = harness(pool);
|
||||
let (_, cookie) = register_user(&h.app).await;
|
||||
let manga = create_manga_with_cover(&h.app, &cookie, "Never Had One", None).await;
|
||||
let id = id_of(&manga);
|
||||
|
||||
for _ in 0..2 {
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(delete_with_cookie(
|
||||
&format!("/api/v1/mangas/{id}/cover"),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = body_json(resp).await;
|
||||
assert!(body["cover_image_path"].is_null());
|
||||
}
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn delete_cover_rejects_unauthenticated(pool: PgPool) {
|
||||
let h = harness(pool);
|
||||
let (_, cookie) = register_user(&h.app).await;
|
||||
let manga = create_manga_with_cover(&h.app, &cookie, "Locked", None).await;
|
||||
let id = id_of(&manga);
|
||||
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(
|
||||
axum::http::Request::builder()
|
||||
.method("DELETE")
|
||||
.uri(format!("/api/v1/mangas/{id}/cover"))
|
||||
.body(axum::body::Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::UNAUTHORIZED);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn delete_cover_404_on_unknown_id(pool: PgPool) {
|
||||
let h = harness(pool);
|
||||
let (_, cookie) = register_user(&h.app).await;
|
||||
let id = Uuid::new_v4();
|
||||
let resp = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(delete_with_cookie(
|
||||
&format!("/api/v1/mangas/{id}/cover"),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
||||
}
|
||||
@@ -59,6 +59,31 @@ async fn reattach_same_tag_is_idempotent_and_returns_200(pool: PgPool) {
|
||||
assert_eq!(second.status(), StatusCode::OK);
|
||||
}
|
||||
|
||||
/// Tag names over 64 chars are rejected at the handler boundary. The
|
||||
/// repo enforces the same cap, but doing it at the handler keeps the
|
||||
/// envelope consistent with the other validation paths
|
||||
/// (username, collection name, etc.).
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn attach_rejects_tag_name_over_64_chars(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "Berserk").await;
|
||||
|
||||
let long_name: String = "x".repeat(65);
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::post_json_with_cookie(
|
||||
&format!("/api/v1/mangas/{manga_id}/tags"),
|
||||
json!({ "name": long_name }),
|
||||
&cookie,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::UNPROCESSABLE_ENTITY);
|
||||
let body = common::body_json(resp).await;
|
||||
assert_eq!(body["error"]["code"], "validation_failed");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn tag_names_dedup_case_insensitively(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
|
||||
@@ -139,13 +139,17 @@ async fn files_endpoint_streams_in_multiple_frames(pool: PgPool) {
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::CREATED);
|
||||
let chapter_id = common::body_json(resp).await["id"]
|
||||
.as_str()
|
||||
.unwrap()
|
||||
.to_string();
|
||||
|
||||
// Fetch the page back via the streaming files endpoint.
|
||||
let pages = h
|
||||
.app
|
||||
.clone()
|
||||
.oneshot(common::get(&format!(
|
||||
"/api/v1/mangas/{manga_id}/chapters/1/pages"
|
||||
"/api/v1/mangas/{manga_id}/chapters/{chapter_id}/pages"
|
||||
)))
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -317,8 +321,12 @@ async fn create_chapter_rejects_renamed_non_image_page(pool: PgPool) {
|
||||
assert_eq!(body["error"]["code"], "unsupported_media_type");
|
||||
}
|
||||
|
||||
/// Multiple chapters can share the same number — different
|
||||
/// scanlations, re-uploads, translator notes. As of migration 0013,
|
||||
/// (manga_id, number) is not unique and each upload gets its own
|
||||
/// chapter id.
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn create_chapter_returns_409_on_duplicate_number(pool: PgPool) {
|
||||
async fn create_chapter_allows_duplicate_numbers_as_separate_chapters(pool: PgPool) {
|
||||
let h = common::harness(pool);
|
||||
let (_, cookie) = common::register_user(&h.app).await;
|
||||
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "Berserk").await;
|
||||
@@ -334,10 +342,27 @@ async fn create_chapter_returns_409_on_duplicate_number(pool: PgPool) {
|
||||
};
|
||||
let first = h.app.clone().oneshot(make()).await.unwrap();
|
||||
assert_eq!(first.status(), StatusCode::CREATED);
|
||||
let second = h.app.oneshot(make()).await.unwrap();
|
||||
assert_eq!(second.status(), StatusCode::CONFLICT);
|
||||
let body = common::body_json(second).await;
|
||||
assert_eq!(body["error"]["code"], "conflict");
|
||||
let first_id = common::body_json(first).await["id"].as_str().unwrap().to_string();
|
||||
|
||||
let second = h.app.clone().oneshot(make()).await.unwrap();
|
||||
assert_eq!(second.status(), StatusCode::CREATED);
|
||||
let second_id = common::body_json(second).await["id"].as_str().unwrap().to_string();
|
||||
|
||||
assert_ne!(first_id, second_id, "each upload gets a distinct chapter id");
|
||||
|
||||
// List endpoint surfaces both rows.
|
||||
let resp = h
|
||||
.app
|
||||
.oneshot(common::get(&format!("/api/v1/mangas/{manga_id}/chapters")))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), StatusCode::OK);
|
||||
let body = common::body_json(resp).await;
|
||||
let items = body["items"].as_array().unwrap();
|
||||
assert_eq!(items.len(), 2, "both Ch.1 uploads listed separately");
|
||||
for item in items {
|
||||
assert_eq!(item["number"], 1);
|
||||
}
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
|
||||
@@ -192,6 +192,20 @@ pub fn patch_json_with_cookie(
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn put_json_with_cookie(
|
||||
uri: &str,
|
||||
body: serde_json::Value,
|
||||
cookie: &str,
|
||||
) -> Request<Body> {
|
||||
Request::builder()
|
||||
.method("PUT")
|
||||
.uri(uri)
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.header(header::COOKIE, cookie)
|
||||
.body(Body::from(body.to_string()))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn delete_with_cookie(uri: &str, cookie: &str) -> Request<Body> {
|
||||
Request::builder()
|
||||
.method("DELETE")
|
||||
@@ -322,6 +336,37 @@ pub fn post_multipart_with_cookie(
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn put_multipart_with_cookie(
|
||||
uri: &str,
|
||||
builder: MultipartBuilder,
|
||||
cookie: &str,
|
||||
) -> Request<Body> {
|
||||
let (boundary, body) = builder.finalize();
|
||||
Request::builder()
|
||||
.method("PUT")
|
||||
.uri(uri)
|
||||
.header(
|
||||
header::CONTENT_TYPE,
|
||||
format!("multipart/form-data; boundary={boundary}"),
|
||||
)
|
||||
.header(header::COOKIE, cookie)
|
||||
.body(Body::from(body))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn put_multipart(uri: &str, builder: MultipartBuilder) -> Request<Body> {
|
||||
let (boundary, body) = builder.finalize();
|
||||
Request::builder()
|
||||
.method("PUT")
|
||||
.uri(uri)
|
||||
.header(
|
||||
header::CONTENT_TYPE,
|
||||
format!("multipart/form-data; boundary={boundary}"),
|
||||
)
|
||||
.body(Body::from(body))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
/// Realistic PNG file header bytes — enough for `infer` to identify.
|
||||
pub fn fake_png_bytes() -> Vec<u8> {
|
||||
vec![0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0, 0, 0, 0]
|
||||
|
||||
157
backend/tests/crawler_browser_smoke.rs
Normal file
157
backend/tests/crawler_browser_smoke.rs
Normal file
@@ -0,0 +1,157 @@
|
||||
//! Smoke test for the Chromium launcher.
|
||||
//!
|
||||
//! Marked `#[ignore]` because it (a) downloads ~150 MB of Chromium on
|
||||
//! first run via the `fetcher` feature and (b) requires a real `$DISPLAY`
|
||||
//! for the headed path. Run it explicitly:
|
||||
//!
|
||||
//! ```sh
|
||||
//! cargo test --test crawler_browser_smoke -- --ignored --nocapture
|
||||
//! ```
|
||||
//!
|
||||
//! Override the cache location with `CRAWLER_CHROMIUM_DIR=/some/path` if
|
||||
//! `$HOME/.cache/mangalord/chromium` isn't writable.
|
||||
|
||||
use mangalord::crawler::browser::{self, LaunchOptions};
|
||||
|
||||
#[tokio::test]
|
||||
#[ignore = "downloads Chromium and needs a display; run with --ignored"]
|
||||
async fn headed_browser_can_navigate_and_read_title() {
|
||||
// A data URL avoids any network dependency — we're testing the
|
||||
// browser launcher, not connectivity.
|
||||
const PAGE: &str = "data:text/html,<html><head><title>Mangalord%20Smoke</title></head><body>OK</body></html>";
|
||||
|
||||
let handle = browser::launch(LaunchOptions::headed())
|
||||
.await
|
||||
.expect("launch headed chromium");
|
||||
|
||||
let page = handle
|
||||
.browser()
|
||||
.new_page(PAGE)
|
||||
.await
|
||||
.expect("open new page");
|
||||
page.wait_for_navigation()
|
||||
.await
|
||||
.expect("wait for navigation");
|
||||
|
||||
let title = page.get_title().await.expect("get title");
|
||||
assert_eq!(title.as_deref(), Some("Mangalord Smoke"));
|
||||
|
||||
handle.close().await.expect("close cleanly");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[ignore = "downloads Chromium; run with --ignored"]
|
||||
async fn headless_browser_can_navigate_and_read_title() {
|
||||
const PAGE: &str = "data:text/html,<html><head><title>Headless%20OK</title></head><body></body></html>";
|
||||
|
||||
let handle = browser::launch(LaunchOptions::headless())
|
||||
.await
|
||||
.expect("launch headless chromium");
|
||||
|
||||
let page = handle.browser().new_page(PAGE).await.expect("open new page");
|
||||
page.wait_for_navigation().await.expect("wait for navigation");
|
||||
|
||||
let title = page.get_title().await.expect("get title");
|
||||
assert_eq!(title.as_deref(), Some("Headless OK"));
|
||||
|
||||
handle.close().await.expect("close cleanly");
|
||||
}
|
||||
|
||||
/// Live end-to-end: navigate to a real page, get the rendered HTML, and
|
||||
/// parse it with `scraper`. ipify.org renders the visitor's public IP
|
||||
/// into the page DOM, so a successful run proves browser → render →
|
||||
/// `Html::parse_document` → selector → text extraction all work
|
||||
/// against a real site. This is the same path each future `Source`
|
||||
/// impl will take.
|
||||
#[tokio::test]
|
||||
#[ignore = "needs network; run with --ignored"]
|
||||
async fn fetches_public_ip_from_ipify() {
|
||||
use std::time::Duration;
|
||||
|
||||
let handle = browser::launch(LaunchOptions::headless())
|
||||
.await
|
||||
.expect("launch headless chromium");
|
||||
|
||||
let page = handle
|
||||
.browser()
|
||||
.new_page("https://www.ipify.org")
|
||||
.await
|
||||
.expect("open ipify");
|
||||
page.wait_for_navigation().await.expect("wait for navigation");
|
||||
// ipify injects the IP via JS after load, so the navigation event
|
||||
// alone isn't enough — give the script a beat to run.
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
|
||||
let html = page.content().await.expect("get rendered html");
|
||||
let doc = scraper::Html::parse_document(&html);
|
||||
let body_sel = scraper::Selector::parse("body").unwrap();
|
||||
let body_text: String = doc
|
||||
.select(&body_sel)
|
||||
.next()
|
||||
.map(|n| n.text().collect::<Vec<_>>().join(" "))
|
||||
.unwrap_or_default();
|
||||
|
||||
let ip = extract_ipv4(&body_text)
|
||||
.unwrap_or_else(|| panic!("no IPv4 found in ipify body: {body_text}"));
|
||||
eprintln!("ipify says our public IP is: {ip}");
|
||||
|
||||
handle.close().await.expect("close cleanly");
|
||||
}
|
||||
|
||||
/// Proves that `LaunchOptions::extra_args` actually reach Chromium and
|
||||
/// influence its runtime. `--user-agent=...` overrides `navigator.userAgent`,
|
||||
/// observable from JS — read it back via `page.evaluate`.
|
||||
#[tokio::test]
|
||||
#[ignore = "downloads Chromium; run with --ignored"]
|
||||
async fn extra_args_reach_chromium() {
|
||||
const UA: &str = "MangalordCrawlerTest/1.0";
|
||||
let options = LaunchOptions {
|
||||
mode: browser::BrowserMode::Headless,
|
||||
extra_args: vec![format!("--user-agent={UA}")],
|
||||
};
|
||||
let handle = browser::launch(options).await.expect("launch with extra args");
|
||||
|
||||
let page = handle
|
||||
.browser()
|
||||
.new_page("about:blank")
|
||||
.await
|
||||
.expect("open page");
|
||||
page.wait_for_navigation().await.expect("wait");
|
||||
|
||||
let ua: String = page
|
||||
.evaluate("navigator.userAgent")
|
||||
.await
|
||||
.expect("evaluate navigator.userAgent")
|
||||
.into_value()
|
||||
.expect("string value");
|
||||
assert_eq!(
|
||||
ua, UA,
|
||||
"extra --user-agent flag should override navigator.userAgent"
|
||||
);
|
||||
|
||||
handle.close().await.expect("close cleanly");
|
||||
}
|
||||
|
||||
/// Tiny dotted-quad finder — avoids pulling `regex` in just for one
|
||||
/// test. Scans the first valid IPv4 substring (four 0..=255 octets
|
||||
/// separated by dots).
|
||||
fn extract_ipv4(s: &str) -> Option<String> {
|
||||
let bytes = s.as_bytes();
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
if !bytes[i].is_ascii_digit() {
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
let start = i;
|
||||
while i < bytes.len() && (bytes[i].is_ascii_digit() || bytes[i] == b'.') {
|
||||
i += 1;
|
||||
}
|
||||
let candidate = &s[start..i];
|
||||
let parts: Vec<&str> = candidate.split('.').collect();
|
||||
if parts.len() == 4 && parts.iter().all(|p| p.parse::<u8>().is_ok()) {
|
||||
return Some(candidate.to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
372
backend/tests/crawler_daemon.rs
Normal file
372
backend/tests/crawler_daemon.rs
Normal file
@@ -0,0 +1,372 @@
|
||||
//! Integration tests for the crawler daemon's cron + worker pool. The
|
||||
//! daemon's full real path requires Chromium and a live source; here we
|
||||
//! test the seam (MetadataPass / ChapterDispatcher traits) and the
|
||||
//! cron/worker control-flow.
|
||||
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use chrono::NaiveTime;
|
||||
use chrono_tz::Tz;
|
||||
use mangalord::crawler::content::SyncOutcome;
|
||||
use mangalord::crawler::daemon::{
|
||||
self, test_support::CountingMetadataPass, ChapterDispatcher, DaemonConfig, MetadataPass,
|
||||
CRON_LOCK_KEY,
|
||||
};
|
||||
use mangalord::crawler::jobs::{self, JobPayload};
|
||||
use mangalord::crawler::pipeline;
|
||||
use serde_json::json;
|
||||
use sqlx::PgPool;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use uuid::Uuid;
|
||||
|
||||
fn far_future_daily_at() -> NaiveTime {
|
||||
// Some time hours from "now" so the scheduler sleeps for the whole test.
|
||||
NaiveTime::from_hms_opt(23, 59, 0).unwrap()
|
||||
}
|
||||
|
||||
fn make_cfg(
|
||||
metadata_pass: Option<Arc<dyn MetadataPass>>,
|
||||
dispatcher: Arc<dyn ChapterDispatcher>,
|
||||
session_expired: Arc<std::sync::atomic::AtomicBool>,
|
||||
workers: usize,
|
||||
) -> DaemonConfig {
|
||||
DaemonConfig {
|
||||
metadata_pass,
|
||||
dispatcher,
|
||||
chapter_workers: workers,
|
||||
daily_at: far_future_daily_at(),
|
||||
tz: Tz::UTC,
|
||||
retention_days: 7,
|
||||
session_expired,
|
||||
extra_tasks: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn enqueue_chapter_job(pool: &PgPool) -> Uuid {
|
||||
let chapter_id = Uuid::new_v4();
|
||||
let payload = JobPayload::SyncChapterContent {
|
||||
source_id: "target".into(),
|
||||
chapter_id,
|
||||
source_chapter_key: format!("ch-{chapter_id}"),
|
||||
};
|
||||
let res = jobs::enqueue(pool, &payload).await.unwrap();
|
||||
match res {
|
||||
jobs::EnqueueResult::Inserted(_) => chapter_id,
|
||||
jobs::EnqueueResult::Skipped => unreachable!("fresh chapter_id"),
|
||||
}
|
||||
}
|
||||
|
||||
async fn count_state(pool: &PgPool, state: &str) -> i64 {
|
||||
sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM crawler_jobs WHERE state = $1")
|
||||
.bind(state)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
struct AlwaysDoneDispatcher {
|
||||
seen: AtomicUsize,
|
||||
}
|
||||
#[async_trait::async_trait]
|
||||
impl ChapterDispatcher for AlwaysDoneDispatcher {
|
||||
async fn dispatch(&self, _payload: JobPayload) -> anyhow::Result<SyncOutcome> {
|
||||
self.seen.fetch_add(1, Ordering::AcqRel);
|
||||
Ok(SyncOutcome::Fetched { pages: 1 })
|
||||
}
|
||||
}
|
||||
|
||||
struct PanickingDispatcher {
|
||||
seen: AtomicUsize,
|
||||
}
|
||||
#[async_trait::async_trait]
|
||||
impl ChapterDispatcher for PanickingDispatcher {
|
||||
async fn dispatch(&self, _payload: JobPayload) -> anyhow::Result<SyncOutcome> {
|
||||
self.seen.fetch_add(1, Ordering::AcqRel);
|
||||
panic!("intentional dispatcher panic");
|
||||
}
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn workers_drain_jobs_through_dispatcher(pool: PgPool) {
|
||||
enqueue_chapter_job(&pool).await;
|
||||
enqueue_chapter_job(&pool).await;
|
||||
enqueue_chapter_job(&pool).await;
|
||||
|
||||
let dispatcher = Arc::new(AlwaysDoneDispatcher {
|
||||
seen: AtomicUsize::new(0),
|
||||
});
|
||||
let session_expired = Arc::new(std::sync::atomic::AtomicBool::new(false));
|
||||
let cancel = CancellationToken::new();
|
||||
let handle = daemon::spawn(
|
||||
pool.clone(),
|
||||
cancel.clone(),
|
||||
make_cfg(None, dispatcher.clone(), session_expired, 2),
|
||||
);
|
||||
|
||||
// Wait for the workers to drain all three jobs.
|
||||
let dispatcher_seen = || dispatcher.seen.load(Ordering::Acquire);
|
||||
for _ in 0..40 {
|
||||
if dispatcher_seen() >= 3 {
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(50)).await;
|
||||
}
|
||||
assert!(
|
||||
dispatcher_seen() >= 3,
|
||||
"expected at least 3 dispatches, got {}",
|
||||
dispatcher_seen()
|
||||
);
|
||||
|
||||
handle.shutdown().await;
|
||||
assert_eq!(count_state(&pool, "done").await, 3);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn workers_idle_while_session_expired(pool: PgPool) {
|
||||
let id = enqueue_chapter_job(&pool).await;
|
||||
let dispatcher = Arc::new(AlwaysDoneDispatcher {
|
||||
seen: AtomicUsize::new(0),
|
||||
});
|
||||
let session_expired = Arc::new(std::sync::atomic::AtomicBool::new(true));
|
||||
let cancel = CancellationToken::new();
|
||||
let handle = daemon::spawn(
|
||||
pool.clone(),
|
||||
cancel.clone(),
|
||||
make_cfg(None, dispatcher.clone(), Arc::clone(&session_expired), 1),
|
||||
);
|
||||
|
||||
// Wait long enough that a non-idled worker would have leased and ack'd.
|
||||
tokio::time::sleep(Duration::from_millis(800)).await;
|
||||
assert_eq!(
|
||||
dispatcher.seen.load(Ordering::Acquire),
|
||||
0,
|
||||
"dispatcher must not be invoked while session_expired flag is set"
|
||||
);
|
||||
assert_eq!(count_state(&pool, "pending").await, 1);
|
||||
let _ = id;
|
||||
|
||||
handle.shutdown().await;
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn dispatcher_panic_is_contained_and_job_is_acked_failed(pool: PgPool) {
|
||||
enqueue_chapter_job(&pool).await;
|
||||
enqueue_chapter_job(&pool).await;
|
||||
|
||||
let dispatcher = Arc::new(PanickingDispatcher {
|
||||
seen: AtomicUsize::new(0),
|
||||
});
|
||||
let session_expired = Arc::new(std::sync::atomic::AtomicBool::new(false));
|
||||
let cancel = CancellationToken::new();
|
||||
let handle = daemon::spawn(
|
||||
pool.clone(),
|
||||
cancel.clone(),
|
||||
make_cfg(None, dispatcher.clone(), session_expired, 1),
|
||||
);
|
||||
|
||||
// Wait for the worker to handle both panicking jobs.
|
||||
for _ in 0..40 {
|
||||
if dispatcher.seen.load(Ordering::Acquire) >= 2 {
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(50)).await;
|
||||
}
|
||||
assert!(
|
||||
dispatcher.seen.load(Ordering::Acquire) >= 2,
|
||||
"worker must keep going after a panic — handled at least 2 jobs"
|
||||
);
|
||||
|
||||
handle.shutdown().await;
|
||||
|
||||
// attempts=1 below max=5, so the panicking jobs go back to pending with
|
||||
// backoff and `last_error = "worker panicked"`.
|
||||
let last_errors: Vec<String> = sqlx::query_scalar(
|
||||
"SELECT last_error FROM crawler_jobs WHERE last_error IS NOT NULL",
|
||||
)
|
||||
.fetch_all(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(last_errors.len(), 2);
|
||||
assert!(last_errors.iter().all(|e| e == "worker panicked"));
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn cron_skips_tick_when_advisory_lock_held(pool: PgPool) {
|
||||
// With no last_metadata_tick_at row, the daemon does a catch-up tick
|
||||
// immediately on spawn. We hold the advisory lock on a separate
|
||||
// connection beforehand so the catch-up's pg_try_advisory_lock returns
|
||||
// false and the tick must skip without invoking the metadata pass.
|
||||
let mut lock_conn = pool.acquire().await.unwrap();
|
||||
sqlx::query("SELECT pg_advisory_lock($1)")
|
||||
.bind(CRON_LOCK_KEY)
|
||||
.execute(&mut *lock_conn)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let counter = Arc::new(CountingMetadataPass::default());
|
||||
let dispatcher = Arc::new(AlwaysDoneDispatcher {
|
||||
seen: AtomicUsize::new(0),
|
||||
});
|
||||
let session_expired = Arc::new(std::sync::atomic::AtomicBool::new(false));
|
||||
let cancel = CancellationToken::new();
|
||||
// daily_at far in the future so after the (skipped) catch-up the
|
||||
// cron sleeps for the rest of the test rather than racing for the lock.
|
||||
let cfg = make_cfg(
|
||||
Some(counter.clone() as Arc<dyn MetadataPass>),
|
||||
dispatcher,
|
||||
session_expired,
|
||||
1,
|
||||
);
|
||||
let handle = daemon::spawn(pool.clone(), cancel.clone(), cfg);
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(800)).await;
|
||||
assert_eq!(
|
||||
counter.count.load(Ordering::Acquire),
|
||||
0,
|
||||
"cron must skip the catch-up tick while the advisory lock is held"
|
||||
);
|
||||
|
||||
sqlx::query("SELECT pg_advisory_unlock($1)")
|
||||
.bind(CRON_LOCK_KEY)
|
||||
.execute(&mut *lock_conn)
|
||||
.await
|
||||
.unwrap();
|
||||
drop(lock_conn);
|
||||
|
||||
handle.shutdown().await;
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn cron_catches_up_when_last_tick_is_stale(pool: PgPool) {
|
||||
// Pre-seed last_metadata_tick_at well in the past so previous_fire(now)
|
||||
// > last_tick is trivially true and the daemon catches up immediately.
|
||||
sqlx::query(
|
||||
"INSERT INTO crawler_state (key, value) VALUES ($1, $2)
|
||||
ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value",
|
||||
)
|
||||
.bind("last_metadata_tick_at")
|
||||
.bind(json!({"at": "2020-01-01T00:00:00Z"}))
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let counter = Arc::new(CountingMetadataPass::default());
|
||||
let dispatcher = Arc::new(AlwaysDoneDispatcher {
|
||||
seen: AtomicUsize::new(0),
|
||||
});
|
||||
let session_expired = Arc::new(std::sync::atomic::AtomicBool::new(false));
|
||||
let cancel = CancellationToken::new();
|
||||
let handle = daemon::spawn(
|
||||
pool.clone(),
|
||||
cancel.clone(),
|
||||
make_cfg(
|
||||
Some(counter.clone() as Arc<dyn MetadataPass>),
|
||||
dispatcher,
|
||||
session_expired,
|
||||
1,
|
||||
),
|
||||
);
|
||||
|
||||
for _ in 0..40 {
|
||||
if counter.count.load(Ordering::Acquire) >= 1 {
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(50)).await;
|
||||
}
|
||||
assert!(
|
||||
counter.count.load(Ordering::Acquire) >= 1,
|
||||
"catch-up tick should have fired immediately"
|
||||
);
|
||||
|
||||
handle.shutdown().await;
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn enqueue_bookmarked_pending_skips_dropped_sources(pool: PgPool) {
|
||||
// Setup: one manga with two chapters (page_count = 0). One has a
|
||||
// non-dropped source; the other's source is dropped. A user bookmarks
|
||||
// the manga. Expectation: only the non-dropped chapter is enqueued.
|
||||
let user_id: Uuid = sqlx::query_scalar(
|
||||
"INSERT INTO users (username, password_hash) VALUES ($1, $2) RETURNING id",
|
||||
)
|
||||
.bind("alice")
|
||||
.bind("not-a-real-hash")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
let manga_id: Uuid = sqlx::query_scalar(
|
||||
"INSERT INTO mangas (title) VALUES ($1) RETURNING id",
|
||||
)
|
||||
.bind("Berserk")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query("INSERT INTO sources (id, name, base_url) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING")
|
||||
.bind("target")
|
||||
.bind("Target")
|
||||
.bind("https://example.com")
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
let c1: Uuid = sqlx::query_scalar(
|
||||
"INSERT INTO chapters (manga_id, number, page_count) VALUES ($1, 1, 0) RETURNING id",
|
||||
)
|
||||
.bind(manga_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
let c2: Uuid = sqlx::query_scalar(
|
||||
"INSERT INTO chapters (manga_id, number, page_count) VALUES ($1, 2, 0) RETURNING id",
|
||||
)
|
||||
.bind(manga_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
// c1: alive source. c2: dropped source.
|
||||
sqlx::query(
|
||||
"INSERT INTO chapter_sources (source_id, source_chapter_key, chapter_id, source_url) \
|
||||
VALUES ($1, $2, $3, $4)",
|
||||
)
|
||||
.bind("target")
|
||||
.bind("ch1")
|
||||
.bind(c1)
|
||||
.bind("https://example.com/ch1")
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query(
|
||||
"INSERT INTO chapter_sources (source_id, source_chapter_key, chapter_id, source_url, dropped_at) \
|
||||
VALUES ($1, $2, $3, $4, now())",
|
||||
)
|
||||
.bind("target")
|
||||
.bind("ch2")
|
||||
.bind(c2)
|
||||
.bind("https://example.com/ch2")
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query("INSERT INTO bookmarks (user_id, manga_id) VALUES ($1, $2)")
|
||||
.bind(user_id)
|
||||
.bind(manga_id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let summary = pipeline::enqueue_bookmarked_pending(&pool).await.unwrap();
|
||||
assert_eq!(summary.inserted, 1, "only the non-dropped chapter enqueued");
|
||||
assert_eq!(summary.skipped, 0);
|
||||
let payloads: Vec<serde_json::Value> = sqlx::query_scalar(
|
||||
"SELECT payload FROM crawler_jobs WHERE payload->>'kind' = 'sync_chapter_content'",
|
||||
)
|
||||
.fetch_all(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(payloads.len(), 1);
|
||||
assert_eq!(
|
||||
payloads[0]["chapter_id"].as_str().unwrap(),
|
||||
c1.to_string()
|
||||
);
|
||||
}
|
||||
|
||||
85
backend/tests/crawler_incremental.rs
Normal file
85
backend/tests/crawler_incremental.rs
Normal file
@@ -0,0 +1,85 @@
|
||||
//! Integration tests for the incremental-mode coordination state:
|
||||
//! `mark_seed_completed` / `seed_completed_at` round-trip via the
|
||||
//! `crawler_state` table.
|
||||
//!
|
||||
//! End-to-end pipeline behavior (walker + stop-on-Unchanged) requires
|
||||
//! a real `chromiumoxide::Browser` to construct a `FetchContext`, so
|
||||
//! the live integration of that path is covered by
|
||||
//! `crawler_browser_smoke.rs` instead. The pure stop logic itself is
|
||||
//! unit-tested in `crawler::pipeline::tests`.
|
||||
|
||||
use chrono::Utc;
|
||||
use mangalord::repo::crawler;
|
||||
use sqlx::PgPool;
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn seed_completed_at_none_before_any_run(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let res = crawler::seed_completed_at(&pool, "target").await.unwrap();
|
||||
assert!(res.is_none(), "fresh source has no seed marker");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn mark_seed_completed_then_read_round_trips_timestamp(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let at = Utc::now();
|
||||
crawler::mark_seed_completed(&pool, "target", at)
|
||||
.await
|
||||
.unwrap();
|
||||
let read = crawler::seed_completed_at(&pool, "target")
|
||||
.await
|
||||
.unwrap()
|
||||
.expect("marker present after mark");
|
||||
// RFC3339 round-trip is millisecond-precise on chrono::Utc; allow a
|
||||
// 1ms tolerance to absorb postgres jsonb whitespace canonicalization.
|
||||
let drift = (read - at).num_milliseconds().abs();
|
||||
assert!(drift <= 1, "round-trip drift: {drift}ms (at={at}, read={read})");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn mark_seed_completed_overwrites_previous_value(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let first = Utc::now() - chrono::Duration::hours(1);
|
||||
let second = Utc::now();
|
||||
crawler::mark_seed_completed(&pool, "target", first)
|
||||
.await
|
||||
.unwrap();
|
||||
crawler::mark_seed_completed(&pool, "target", second)
|
||||
.await
|
||||
.unwrap();
|
||||
let read = crawler::seed_completed_at(&pool, "target")
|
||||
.await
|
||||
.unwrap()
|
||||
.expect("marker present");
|
||||
let drift = (read - second).num_milliseconds().abs();
|
||||
assert!(drift <= 1, "should reflect the latest mark, not the first");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn seed_completed_is_per_source(pool: PgPool) {
|
||||
// Two sources, only one is marked complete. The other must still
|
||||
// report None — the key is namespaced by source_id.
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
crawler::ensure_source(&pool, "other", "O", "https://y.example")
|
||||
.await
|
||||
.unwrap();
|
||||
crawler::mark_seed_completed(&pool, "target", Utc::now())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(crawler::seed_completed_at(&pool, "target")
|
||||
.await
|
||||
.unwrap()
|
||||
.is_some());
|
||||
assert!(crawler::seed_completed_at(&pool, "other")
|
||||
.await
|
||||
.unwrap()
|
||||
.is_none());
|
||||
}
|
||||
441
backend/tests/crawler_jobs.rs
Normal file
441
backend/tests/crawler_jobs.rs
Normal file
@@ -0,0 +1,441 @@
|
||||
//! Integration tests for `crawler::jobs` queue operations.
|
||||
//!
|
||||
//! Uses `#[sqlx::test(migrations = "./migrations")]` which provisions a fresh
|
||||
//! migrated DB per test. No browser, no axum router — these exercise the SQL
|
||||
//! shape and dedup-index semantics directly against Postgres.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use mangalord::crawler::jobs::{
|
||||
self, EnqueueResult, JobPayload, KIND_SYNC_CHAPTER_CONTENT,
|
||||
};
|
||||
use mangalord::crawler::source::DiscoverMode;
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
fn chapter_content_payload(chapter_id: Uuid) -> JobPayload {
|
||||
JobPayload::SyncChapterContent {
|
||||
source_id: "target".into(),
|
||||
chapter_id,
|
||||
source_chapter_key: format!("ch-{chapter_id}"),
|
||||
}
|
||||
}
|
||||
|
||||
fn discover_payload() -> JobPayload {
|
||||
JobPayload::Discover {
|
||||
source_id: "target".into(),
|
||||
mode: DiscoverMode::Backfill,
|
||||
}
|
||||
}
|
||||
|
||||
async fn job_state(pool: &PgPool, id: Uuid) -> String {
|
||||
sqlx::query_scalar::<_, String>("SELECT state FROM crawler_jobs WHERE id = $1")
|
||||
.bind(id)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
async fn job_attempts(pool: &PgPool, id: Uuid) -> i32 {
|
||||
sqlx::query_scalar::<_, i32>("SELECT attempts FROM crawler_jobs WHERE id = $1")
|
||||
.bind(id)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
async fn job_count(pool: &PgPool) -> i64 {
|
||||
sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM crawler_jobs")
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn enqueue_inserts_pending_row_with_round_trip_payload(pool: PgPool) {
|
||||
let chapter_id = Uuid::new_v4();
|
||||
let payload = chapter_content_payload(chapter_id);
|
||||
|
||||
let result = jobs::enqueue(&pool, &payload).await.unwrap();
|
||||
let id = match result {
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
EnqueueResult::Skipped => panic!("expected Inserted on first enqueue"),
|
||||
};
|
||||
|
||||
assert_eq!(job_state(&pool, id).await, "pending");
|
||||
assert_eq!(job_attempts(&pool, id).await, 0);
|
||||
|
||||
let raw_payload: serde_json::Value =
|
||||
sqlx::query_scalar("SELECT payload FROM crawler_jobs WHERE id = $1")
|
||||
.bind(id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
let decoded: JobPayload = serde_json::from_value(raw_payload).unwrap();
|
||||
match decoded {
|
||||
JobPayload::SyncChapterContent {
|
||||
source_id,
|
||||
chapter_id: c,
|
||||
source_chapter_key,
|
||||
} => {
|
||||
assert_eq!(source_id, "target");
|
||||
assert_eq!(c, chapter_id);
|
||||
assert_eq!(source_chapter_key, format!("ch-{chapter_id}"));
|
||||
}
|
||||
_ => panic!("payload variant mismatch"),
|
||||
}
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn duplicate_chapter_content_while_pending_is_skipped(pool: PgPool) {
|
||||
let chapter_id = Uuid::new_v4();
|
||||
let p = chapter_content_payload(chapter_id);
|
||||
|
||||
let first = jobs::enqueue(&pool, &p).await.unwrap();
|
||||
assert!(matches!(first, EnqueueResult::Inserted(_)));
|
||||
|
||||
let second = jobs::enqueue(&pool, &p).await.unwrap();
|
||||
assert!(matches!(second, EnqueueResult::Skipped));
|
||||
|
||||
assert_eq!(job_count(&pool).await, 1);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn duplicate_after_done_releases_dedup_slot(pool: PgPool) {
|
||||
let chapter_id = Uuid::new_v4();
|
||||
let p = chapter_content_payload(chapter_id);
|
||||
|
||||
let first_id = match jobs::enqueue(&pool, &p).await.unwrap() {
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
EnqueueResult::Skipped => panic!("first enqueue should insert"),
|
||||
};
|
||||
// Move the first job out of (pending|running) so the partial index drops it.
|
||||
sqlx::query("UPDATE crawler_jobs SET state = 'done' WHERE id = $1")
|
||||
.bind(first_id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let second = jobs::enqueue(&pool, &p).await.unwrap();
|
||||
assert!(
|
||||
matches!(second, EnqueueResult::Inserted(_)),
|
||||
"after done the chapter_id slot is free again"
|
||||
);
|
||||
assert_eq!(job_count(&pool).await, 2);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn different_chapter_ids_can_coexist(pool: PgPool) {
|
||||
let p1 = chapter_content_payload(Uuid::new_v4());
|
||||
let p2 = chapter_content_payload(Uuid::new_v4());
|
||||
assert!(matches!(
|
||||
jobs::enqueue(&pool, &p1).await.unwrap(),
|
||||
EnqueueResult::Inserted(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
jobs::enqueue(&pool, &p2).await.unwrap(),
|
||||
EnqueueResult::Inserted(_)
|
||||
));
|
||||
assert_eq!(job_count(&pool).await, 2);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn non_chapter_content_payloads_are_never_deduped(pool: PgPool) {
|
||||
let p = discover_payload();
|
||||
assert!(matches!(
|
||||
jobs::enqueue(&pool, &p).await.unwrap(),
|
||||
EnqueueResult::Inserted(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
jobs::enqueue(&pool, &p).await.unwrap(),
|
||||
EnqueueResult::Inserted(_)
|
||||
));
|
||||
assert_eq!(job_count(&pool).await, 2);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn lease_marks_running_and_bumps_attempts_and_sets_leased_until(pool: PgPool) {
|
||||
let id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
EnqueueResult::Skipped => unreachable!(),
|
||||
};
|
||||
|
||||
let leases = jobs::lease(&pool, None, 10, Duration::from_secs(60))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(leases.len(), 1);
|
||||
let lease = &leases[0];
|
||||
assert_eq!(lease.id, id);
|
||||
assert_eq!(lease.attempts, 1);
|
||||
|
||||
assert_eq!(job_state(&pool, id).await, "running");
|
||||
|
||||
let leased_until: Option<chrono::DateTime<chrono::Utc>> =
|
||||
sqlx::query_scalar("SELECT leased_until FROM crawler_jobs WHERE id = $1")
|
||||
.bind(id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
let leased_until = leased_until.expect("leased_until set");
|
||||
assert!(leased_until > chrono::Utc::now());
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn lease_with_kind_filter_only_matches_that_kind(pool: PgPool) {
|
||||
let discover_id = match jobs::enqueue(&pool, &discover_payload()).await.unwrap() {
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let chapter_id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let leases = jobs::lease(
|
||||
&pool,
|
||||
Some(KIND_SYNC_CHAPTER_CONTENT),
|
||||
10,
|
||||
Duration::from_secs(60),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(leases.len(), 1, "only chapter content payload leases");
|
||||
assert_eq!(leases[0].id, chapter_id);
|
||||
// discover is still pending
|
||||
assert_eq!(job_state(&pool, discover_id).await, "pending");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn concurrent_leases_under_skip_locked_return_disjoint_ids(pool: PgPool) {
|
||||
// 4 pending jobs, two concurrent calls each asking for up to 2.
|
||||
let mut ids = Vec::new();
|
||||
for _ in 0..4 {
|
||||
let id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
ids.push(id);
|
||||
}
|
||||
|
||||
let (a, b) = tokio::join!(
|
||||
jobs::lease(&pool, None, 2, Duration::from_secs(60)),
|
||||
jobs::lease(&pool, None, 2, Duration::from_secs(60)),
|
||||
);
|
||||
let a = a.unwrap();
|
||||
let b = b.unwrap();
|
||||
let mut seen: Vec<Uuid> = a.iter().chain(b.iter()).map(|l| l.id).collect();
|
||||
seen.sort();
|
||||
seen.dedup();
|
||||
let count = a.len() + b.len();
|
||||
assert_eq!(
|
||||
seen.len(),
|
||||
count,
|
||||
"no id appears in both lease results (SKIP LOCKED)"
|
||||
);
|
||||
assert!(count >= 2, "at least one lease saw work");
|
||||
assert!(count <= 4);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn stale_running_lease_can_be_reclaimed(pool: PgPool) {
|
||||
let id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let first = jobs::lease(&pool, None, 1, Duration::from_secs(60))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(first.len(), 1);
|
||||
// Pretend the worker crashed: rewind leased_until into the past.
|
||||
sqlx::query("UPDATE crawler_jobs SET leased_until = now() - interval '1 minute' WHERE id = $1")
|
||||
.bind(id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let second = jobs::lease(&pool, None, 1, Duration::from_secs(60))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(second.len(), 1, "stale running row was re-leased");
|
||||
assert_eq!(second[0].id, id);
|
||||
assert_eq!(second[0].attempts, 2, "attempts bumped again");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn ack_done_transitions_state_and_clears_lease(pool: PgPool) {
|
||||
let id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let leases = jobs::lease(&pool, None, 1, Duration::from_secs(60))
|
||||
.await
|
||||
.unwrap();
|
||||
jobs::ack_done(&pool, leases[0].id).await.unwrap();
|
||||
|
||||
assert_eq!(job_state(&pool, id).await, "done");
|
||||
let leased_until: Option<chrono::DateTime<chrono::Utc>> =
|
||||
sqlx::query_scalar("SELECT leased_until FROM crawler_jobs WHERE id = $1")
|
||||
.bind(id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(leased_until.is_none());
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn ack_failed_under_max_returns_to_pending_with_future_schedule(pool: PgPool) {
|
||||
let id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let leases = jobs::lease(&pool, None, 1, Duration::from_secs(60))
|
||||
.await
|
||||
.unwrap();
|
||||
let lease = &leases[0];
|
||||
jobs::ack_failed(&pool, lease.id, "boom", lease.attempts, lease.max_attempts)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(job_state(&pool, id).await, "pending");
|
||||
|
||||
let (scheduled_at, last_error): (chrono::DateTime<chrono::Utc>, Option<String>) =
|
||||
sqlx::query_as("SELECT scheduled_at, last_error FROM crawler_jobs WHERE id = $1")
|
||||
.bind(id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(scheduled_at > chrono::Utc::now());
|
||||
assert_eq!(last_error.as_deref(), Some("boom"));
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn ack_failed_at_max_marks_dead(pool: PgPool) {
|
||||
let id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
// Force a single lease then mark "this was attempt N where N == max_attempts".
|
||||
let leases = jobs::lease(&pool, None, 1, Duration::from_secs(60))
|
||||
.await
|
||||
.unwrap();
|
||||
let lease = &leases[0];
|
||||
jobs::ack_failed(&pool, lease.id, "final boom", lease.max_attempts, lease.max_attempts)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(job_state(&pool, id).await, "dead");
|
||||
let last_error: Option<String> =
|
||||
sqlx::query_scalar("SELECT last_error FROM crawler_jobs WHERE id = $1")
|
||||
.bind(id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(last_error.as_deref(), Some("final boom"));
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn release_returns_to_pending_and_undoes_attempt_increment(pool: PgPool) {
|
||||
let id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let leases = jobs::lease(&pool, None, 1, Duration::from_secs(60))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(leases[0].attempts, 1);
|
||||
jobs::release(&pool, leases[0].id).await.unwrap();
|
||||
|
||||
assert_eq!(job_state(&pool, id).await, "pending");
|
||||
assert_eq!(job_attempts(&pool, id).await, 0);
|
||||
let leased_until: Option<chrono::DateTime<chrono::Utc>> =
|
||||
sqlx::query_scalar("SELECT leased_until FROM crawler_jobs WHERE id = $1")
|
||||
.bind(id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(leased_until.is_none());
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn reap_done_deletes_old_rows_keeps_fresh(pool: PgPool) {
|
||||
// Two done rows: one old (updated_at 10 days ago), one fresh.
|
||||
let old_id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let fresh_id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
sqlx::query("UPDATE crawler_jobs SET state='done', updated_at = now() - interval '10 days' WHERE id = $1")
|
||||
.bind(old_id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
sqlx::query("UPDATE crawler_jobs SET state='done' WHERE id = $1")
|
||||
.bind(fresh_id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let deleted = jobs::reap_done(&pool, 7).await.unwrap();
|
||||
assert_eq!(deleted, 1);
|
||||
|
||||
let remaining: Vec<Uuid> = sqlx::query_scalar("SELECT id FROM crawler_jobs ORDER BY id")
|
||||
.fetch_all(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(remaining, vec![fresh_id], "only fresh row remains");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn reap_done_zero_is_a_no_op(pool: PgPool) {
|
||||
let id = match jobs::enqueue(&pool, &chapter_content_payload(Uuid::new_v4()))
|
||||
.await
|
||||
.unwrap()
|
||||
{
|
||||
EnqueueResult::Inserted(id) => id,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
sqlx::query("UPDATE crawler_jobs SET state='done', updated_at = now() - interval '999 days' WHERE id = $1")
|
||||
.bind(id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let deleted = jobs::reap_done(&pool, 0).await.unwrap();
|
||||
assert_eq!(deleted, 0);
|
||||
assert_eq!(job_count(&pool).await, 1);
|
||||
}
|
||||
473
backend/tests/crawler_sync.rs
Normal file
473
backend/tests/crawler_sync.rs
Normal file
@@ -0,0 +1,473 @@
|
||||
//! Integration tests for `repo::crawler`.
|
||||
//!
|
||||
//! Each test runs against a fresh, migrated DB via `#[sqlx::test]`.
|
||||
//! `DATABASE_URL` must point to a Postgres where the test user can
|
||||
//! `CREATEDB`.
|
||||
|
||||
use mangalord::crawler::source::{SourceChapterRef, SourceManga};
|
||||
use mangalord::repo::crawler::{self, ChapterDiff, UpsertStatus};
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Helper to spin up a `SourceManga` fixture with a stable shape so
|
||||
/// each test can tweak just the fields it cares about.
|
||||
fn sample_manga(key: &str, title: &str, hash: &str) -> SourceManga {
|
||||
SourceManga {
|
||||
source_manga_key: key.to_string(),
|
||||
title: title.to_string(),
|
||||
alternative_titles: vec!["Alt 1".into()],
|
||||
authors: vec!["Author One".into()],
|
||||
// Action is in the seeded `genres` table; Fantasy is too.
|
||||
genres: vec!["Action".into(), "Fantasy".into()],
|
||||
tags: vec!["popular".into()],
|
||||
status: Some("ongoing".into()),
|
||||
summary: Some("Sample summary.".into()),
|
||||
cover_url: Some("/cover.jpg".into()),
|
||||
chapters: vec![],
|
||||
metadata_hash: hash.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn ensure_source_is_idempotent(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "Target Site", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
crawler::ensure_source(&pool, "target", "Target Site v2", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM sources WHERE id = 'target'")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(count.0, 1);
|
||||
let name: (String,) = sqlx::query_as("SELECT name FROM sources WHERE id = 'target'")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(name.0, "Target Site v2", "name updates on re-call");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn first_upsert_inserts_manga_and_links_metadata(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
||||
|
||||
let res = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(res.status, UpsertStatus::New);
|
||||
|
||||
// mangas row created
|
||||
let row: (String, String, Vec<String>) =
|
||||
sqlx::query_as("SELECT title, status, alt_titles FROM mangas WHERE id = $1")
|
||||
.bind(res.manga_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(row.0, "Foo Manga");
|
||||
assert_eq!(row.1, "ongoing");
|
||||
assert_eq!(row.2, vec!["Alt 1"]);
|
||||
|
||||
// manga_sources row links the two
|
||||
let link: (String, Uuid, Option<String>) = sqlx::query_as(
|
||||
"SELECT source_id, manga_id, metadata_hash FROM manga_sources WHERE source_manga_key = $1",
|
||||
)
|
||||
.bind("foo")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(link.0, "target");
|
||||
assert_eq!(link.1, res.manga_id);
|
||||
assert_eq!(link.2.as_deref(), Some("hash-1"));
|
||||
|
||||
// Authors, genres, tags M2M populated
|
||||
let n_authors: (i64,) =
|
||||
sqlx::query_as("SELECT COUNT(*) FROM manga_authors WHERE manga_id = $1")
|
||||
.bind(res.manga_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(n_authors.0, 1);
|
||||
let n_genres: (i64,) =
|
||||
sqlx::query_as("SELECT COUNT(*) FROM manga_genres WHERE manga_id = $1")
|
||||
.bind(res.manga_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(n_genres.0, 2, "Action + Fantasy");
|
||||
let n_tags: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM manga_tags WHERE manga_id = $1")
|
||||
.bind(res.manga_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(n_tags.0, 1);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn second_upsert_with_same_hash_reports_unchanged(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
||||
let first = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
let second = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(second.status, UpsertStatus::Unchanged);
|
||||
assert_eq!(second.manga_id, first.manga_id);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn upsert_with_changed_hash_updates_fields(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let mut m = sample_manga("foo", "Foo Manga", "hash-1");
|
||||
let first = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
m.title = "Foo Manga (Revised)".into();
|
||||
m.status = Some("completed".into());
|
||||
m.metadata_hash = "hash-2".into();
|
||||
let second = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(second.status, UpsertStatus::Updated);
|
||||
assert_eq!(second.manga_id, first.manga_id);
|
||||
|
||||
let row: (String, String) =
|
||||
sqlx::query_as("SELECT title, status FROM mangas WHERE id = $1")
|
||||
.bind(first.manga_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(row.0, "Foo Manga (Revised)");
|
||||
assert_eq!(row.1, "completed");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn sync_chapters_adds_new_refreshes_existing_and_drops_vanished(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
||||
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let initial = vec![
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "1".into(),
|
||||
number: 1,
|
||||
title: Some("Ch.1".into()),
|
||||
url: "https://x.example/foo/1".into(),
|
||||
},
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "2".into(),
|
||||
number: 2,
|
||||
title: Some("Ch.2".into()),
|
||||
url: "https://x.example/foo/2".into(),
|
||||
},
|
||||
];
|
||||
let diff = crawler::sync_manga_chapters(&pool, "target", up.manga_id, &initial)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
diff,
|
||||
ChapterDiff {
|
||||
new: 2,
|
||||
refreshed: 0,
|
||||
dropped: 0
|
||||
}
|
||||
);
|
||||
|
||||
// Second run: keep ch1, replace ch2 with ch3 — ch2 should be dropped.
|
||||
let second = vec![
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "1".into(),
|
||||
number: 1,
|
||||
title: Some("Ch.1 (renamed)".into()),
|
||||
url: "https://x.example/foo/1".into(),
|
||||
},
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "3".into(),
|
||||
number: 3,
|
||||
title: Some("Ch.3".into()),
|
||||
url: "https://x.example/foo/3".into(),
|
||||
},
|
||||
];
|
||||
let diff = crawler::sync_manga_chapters(&pool, "target", up.manga_id, &second)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
diff,
|
||||
ChapterDiff {
|
||||
new: 1,
|
||||
refreshed: 1,
|
||||
dropped: 1
|
||||
}
|
||||
);
|
||||
|
||||
// Renamed title propagated to chapters.title
|
||||
let title: (Option<String>,) =
|
||||
sqlx::query_as("SELECT c.title FROM chapters c JOIN chapter_sources cs ON cs.chapter_id = c.id WHERE cs.source_chapter_key = '1'")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(title.0.as_deref(), Some("Ch.1 (renamed)"));
|
||||
|
||||
// Vanished chapter is soft-dropped (row still exists, dropped_at set).
|
||||
let dropped: (Option<chrono::DateTime<chrono::Utc>>,) =
|
||||
sqlx::query_as("SELECT dropped_at FROM chapter_sources WHERE source_chapter_key = '2'")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(dropped.0.is_some(), "ch2 should be soft-dropped");
|
||||
}
|
||||
|
||||
/// Real-world sources publish multiple chapters at the same number
|
||||
/// (different uploaders, translator notes, re-releases). After the
|
||||
/// (manga_id, number) UNIQUE drop in 0013, each `SourceChapterRef`
|
||||
/// becomes its own `chapters` row even when the parsed number matches
|
||||
/// — chapter identity is now the chapter id, not the number.
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn sync_chapters_keeps_duplicate_numbered_chapters_as_separate_rows(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let m = sample_manga("foo", "Foo Manga", "hash-1");
|
||||
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Two distinct uploads of Ch.52 (different uploaders → different
|
||||
// URLs/keys, same parsed number) plus a notice/hiatus row that
|
||||
// parses to number=0 alongside a real chapter at number 1.
|
||||
let chapters = vec![
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "br_chapter-A".into(),
|
||||
number: 52,
|
||||
title: Some("Ch.52 : Official".into()),
|
||||
url: "https://x.example/foo/A/pg-1/".into(),
|
||||
},
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "br_chapter-B".into(),
|
||||
number: 52,
|
||||
title: Some("Ch.52 : Official (alt)".into()),
|
||||
url: "https://x.example/foo/B/pg-1/".into(),
|
||||
},
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "br_chapter-NOTICE".into(),
|
||||
number: 0,
|
||||
title: Some("hitaus.".into()),
|
||||
url: "https://x.example/foo/notice/pg-1/".into(),
|
||||
},
|
||||
SourceChapterRef {
|
||||
source_chapter_key: "br_chapter-1".into(),
|
||||
number: 1,
|
||||
title: Some("Ch.1 : Official".into()),
|
||||
url: "https://x.example/foo/1/pg-1/".into(),
|
||||
},
|
||||
];
|
||||
|
||||
let diff = crawler::sync_manga_chapters(&pool, "target", up.manga_id, &chapters)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
diff,
|
||||
ChapterDiff {
|
||||
new: 4,
|
||||
refreshed: 0,
|
||||
dropped: 0
|
||||
},
|
||||
"every source ref yields a new chapter row"
|
||||
);
|
||||
|
||||
let rows: (i64,) =
|
||||
sqlx::query_as("SELECT COUNT(*) FROM chapters WHERE manga_id = $1")
|
||||
.bind(up.manga_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(rows.0, 4, "4 distinct chapter rows even with duplicate numbers");
|
||||
|
||||
let ch52_count: (i64,) = sqlx::query_as(
|
||||
"SELECT COUNT(*) FROM chapters WHERE manga_id = $1 AND number = 52",
|
||||
)
|
||||
.bind(up.manga_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(ch52_count.0, 2, "both Ch.52 uploads survive as separate rows");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn mark_dropped_mangas_only_drops_unseen(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
// Seed two mangas before "now" so a later run_started_at sees them as stale.
|
||||
let _ = crawler::upsert_manga_from_source(
|
||||
&pool,
|
||||
"target",
|
||||
"https://x.example/foo",
|
||||
&sample_manga("foo", "Foo", "hf"),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let _ = crawler::upsert_manga_from_source(
|
||||
&pool,
|
||||
"target",
|
||||
"https://x.example/bar",
|
||||
&sample_manga("bar", "Bar", "hb"),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Now mark a new "run" beginning. Re-upsert only `foo` — `bar`
|
||||
// should be the one flagged dropped.
|
||||
let run_started = chrono::Utc::now();
|
||||
// Sleep briefly so the second upsert's NOW() > run_started_at.
|
||||
tokio::time::sleep(std::time::Duration::from_millis(20)).await;
|
||||
let _ = crawler::upsert_manga_from_source(
|
||||
&pool,
|
||||
"target",
|
||||
"https://x.example/foo",
|
||||
&sample_manga("foo", "Foo", "hf"),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let n = crawler::mark_dropped_mangas(&pool, "target", run_started)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(n, 1, "only bar should have been dropped");
|
||||
|
||||
let foo_dropped: (Option<chrono::DateTime<chrono::Utc>>,) =
|
||||
sqlx::query_as("SELECT dropped_at FROM manga_sources WHERE source_manga_key = 'foo'")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(foo_dropped.0.is_none(), "foo seen this run, must not be dropped");
|
||||
let bar_dropped: (Option<chrono::DateTime<chrono::Utc>>,) =
|
||||
sqlx::query_as("SELECT dropped_at FROM manga_sources WHERE source_manga_key = 'bar'")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(bar_dropped.0.is_some());
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn upsert_surfaces_cover_image_path_for_backfill_decisions(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let m = sample_manga("foo", "Foo", "h1");
|
||||
|
||||
// First upsert: row is brand new, no cover stored yet.
|
||||
let first = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(first.cover_image_path.is_none(), "new manga has no cover yet");
|
||||
|
||||
// Simulate cover landing in storage post-upsert.
|
||||
sqlx::query("UPDATE mangas SET cover_image_path = $1 WHERE id = $2")
|
||||
.bind("mangas/foo/cover.jpg")
|
||||
.bind(first.manga_id)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Second upsert with same hash → Unchanged, but cover path is now
|
||||
// surfaced so the caller knows the backfill is done.
|
||||
let second = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(second.status, UpsertStatus::Unchanged);
|
||||
assert_eq!(
|
||||
second.cover_image_path.as_deref(),
|
||||
Some("mangas/foo/cover.jpg")
|
||||
);
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn arbitrary_genres_from_source_get_inserted(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let mut m = sample_manga("foo", "Foo", "h");
|
||||
// "Action" is seeded by migration 0009. "Webtoons" is not.
|
||||
m.genres = vec!["Action".into(), "Webtoons".into()];
|
||||
|
||||
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let n_genre_links: (i64,) =
|
||||
sqlx::query_as("SELECT COUNT(*) FROM manga_genres WHERE manga_id = $1")
|
||||
.bind(up.manga_id)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(n_genre_links.0, 2, "both seeded and source-added genres attach");
|
||||
|
||||
let webtoons: (i64,) =
|
||||
sqlx::query_as("SELECT COUNT(*) FROM genres WHERE name = 'Webtoons'")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(webtoons.0, 1, "non-seeded genre was inserted");
|
||||
|
||||
// Case-insensitive de-dup: a second sync with the genre re-cased
|
||||
// attaches the existing row, not a new one.
|
||||
let mut m2 = sample_manga("bar", "Bar", "h2");
|
||||
m2.genres = vec!["webtoons".into()];
|
||||
let _ = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/bar", &m2)
|
||||
.await
|
||||
.unwrap();
|
||||
let webtoons_count: (i64,) =
|
||||
sqlx::query_as("SELECT COUNT(*) FROM genres WHERE lower(name) = 'webtoons'")
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(webtoons_count.0, 1, "case-insensitive lookup reuses the existing row");
|
||||
}
|
||||
|
||||
#[sqlx::test(migrations = "./migrations")]
|
||||
async fn re_appearing_manga_clears_dropped_at(pool: PgPool) {
|
||||
crawler::ensure_source(&pool, "target", "T", "https://x.example")
|
||||
.await
|
||||
.unwrap();
|
||||
let m = sample_manga("foo", "Foo", "h1");
|
||||
let up = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Drop it manually.
|
||||
sqlx::query(
|
||||
"UPDATE manga_sources SET dropped_at = NOW() WHERE source_manga_key = 'foo'",
|
||||
)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Re-upsert: the link should un-drop.
|
||||
let _ = crawler::upsert_manga_from_source(&pool, "target", "https://x.example/foo", &m)
|
||||
.await
|
||||
.unwrap();
|
||||
let dropped: (Option<chrono::DateTime<chrono::Utc>>, Uuid) = sqlx::query_as(
|
||||
"SELECT dropped_at, manga_id FROM manga_sources WHERE source_manga_key = 'foo'",
|
||||
)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(dropped.0.is_none());
|
||||
assert_eq!(dropped.1, up.manga_id);
|
||||
}
|
||||
194
backend/tests/fixtures/target/chapter_list_uu.html
vendored
Normal file
194
backend/tests/fixtures/target/chapter_list_uu.html
vendored
Normal file
@@ -0,0 +1,194 @@
|
||||
<table class="listing" id="chapter_table">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-379272/pg-1/"><b>Ch.67</b>
|
||||
: Official </a>
|
||||
<b style="color:#FEFD7F;width;30px;display:inline-block;margin-left:5px">new</b>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../2843005/upload/">bloomingdale</a>
|
||||
</td>
|
||||
<td class="no">May 20, 2026</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-328248/pg-1/"><b>hitaus.</b>
|
||||
</a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../2843005/upload/">bloomingdale</a>
|
||||
</td>
|
||||
<td class="no">Jan 15, 2026</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-326351/pg-1/"><b>Ch.66</b>
|
||||
: Official </a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../2843005/upload/">bloomingdale</a>
|
||||
</td>
|
||||
<td class="no">Jan 10, 2026</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-295078/pg-1/"><b>Ch.52</b>
|
||||
: Official </a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../2843005/upload/">bloomingdale</a>
|
||||
</td>
|
||||
<td class="no">Aug 28, 2025</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-294815/pg-1/"><b>Ch.52</b>
|
||||
: Official </a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../4300634/upload/">mina</a>
|
||||
</td>
|
||||
<td class="no">Aug 27, 2025</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-249964/pg-1/"><b>Ch.10</b>
|
||||
: Official </a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../2843005/upload/">bloomingdale</a>
|
||||
</td>
|
||||
<td class="no">Jan 5, 2025</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/to_chapter-13/pg-1/"><b>Ch.13</b>
|
||||
: Thank you, we'll see you in the next one! </a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no"></td>
|
||||
<td class="no">Dec 30, 2024</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-249095/pg-1/"><b>Ch.9</b>
|
||||
: Official </a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../2843005/upload/">bloomingdale</a>
|
||||
</td>
|
||||
<td class="no">Dec 28, 2024</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-248930/pg-1/"><b>Ch.1</b>
|
||||
: Official </a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../2843005/upload/">bloomingdale</a>
|
||||
</td>
|
||||
<td class="no">Dec 26, 2024</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/to_chapter-12/pg-1/"><b>Ch.12</b>
|
||||
</a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no"></td>
|
||||
<td class="no">Dec 1, 2024</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-244844/pg-1/"><b>notice.</b>
|
||||
: Officials </a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../2843005/upload/">bloomingdale</a>
|
||||
</td>
|
||||
<td class="no">Nov 26, 2024</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/to_chapter-11/pg-1/"><b>Ch.11</b>
|
||||
</a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no"></td>
|
||||
<td class="no">Nov 18, 2024</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-221180/pg-1/"><b>notice.</b>
|
||||
</a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../3781074/upload/">Izanami</a>
|
||||
</td>
|
||||
<td class="no">Jun 21, 2024</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-234803/pg-1/"><b>notice.</b>
|
||||
</a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../2843005/upload/">bloomingdale</a>
|
||||
</td>
|
||||
<td class="no">Sep 13, 2024</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<h4>
|
||||
<a class="chico"
|
||||
href=".../uu/br_chapter-220299/pg-1/"><b>Ch.1</b>
|
||||
: Team Hazama </a>
|
||||
</h4>
|
||||
</td>
|
||||
<td class="no">
|
||||
<a href=".../1457681/upload/">purplepandabear</a>
|
||||
</td>
|
||||
<td class="no">Jun 16, 2024</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
22
docker-compose.prod.yml
Normal file
22
docker-compose.prod.yml
Normal file
@@ -0,0 +1,22 @@
|
||||
# Production overlay: layer on top of docker-compose.yml on the deploy
|
||||
# host so the backend and frontend run from pre-built registry images
|
||||
# instead of building locally.
|
||||
#
|
||||
# docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d
|
||||
#
|
||||
# REGISTRY_URL and IMAGE_TAG are injected by .gitea/workflows/deploy.yml
|
||||
# at deploy time. IMAGE_TAG defaults to `latest` so a manual
|
||||
# `docker compose ... up -d` on the host still works.
|
||||
|
||||
services:
|
||||
backend:
|
||||
build: !reset null
|
||||
image: ${REGISTRY_URL}/mangalord-backend:${IMAGE_TAG:-latest}
|
||||
pull_policy: always
|
||||
restart: unless-stopped
|
||||
|
||||
frontend:
|
||||
build: !reset null
|
||||
image: ${REGISTRY_URL}/mangalord-frontend:${IMAGE_TAG:-latest}
|
||||
pull_policy: always
|
||||
restart: unless-stopped
|
||||
147
frontend/e2e/manga-edit.spec.ts
Normal file
147
frontend/e2e/manga-edit.spec.ts
Normal file
@@ -0,0 +1,147 @@
|
||||
import { test, expect, type Page } from '@playwright/test';
|
||||
|
||||
const userFixture = {
|
||||
id: 'u1',
|
||||
username: 'alice',
|
||||
created_at: '2026-01-01T00:00:00Z'
|
||||
};
|
||||
|
||||
const baseManga = {
|
||||
id: 'm1',
|
||||
title: 'Berserk',
|
||||
status: 'ongoing',
|
||||
alt_titles: ['Old Alt'],
|
||||
description: 'Original description',
|
||||
cover_image_path: null,
|
||||
created_at: '2026-01-01T00:00:00Z',
|
||||
updated_at: '2026-01-01T00:00:00Z',
|
||||
authors: [{ id: 'a1', name: 'Kentaro Miura' }],
|
||||
genres: [],
|
||||
tags: []
|
||||
};
|
||||
|
||||
async function stubAuthenticatedAndGenres(page: Page) {
|
||||
await page.route('**/api/v1/auth/me', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({ user: userFixture })
|
||||
})
|
||||
);
|
||||
await page.route('**/api/v1/genres', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify([
|
||||
{ id: 'g-action', name: 'Action' },
|
||||
{ id: 'g-fantasy', name: 'Fantasy' }
|
||||
])
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
test('anonymous user sees sign-in prompt on /manga/[id]/edit', async ({ page }) => {
|
||||
await page.route('**/api/v1/auth/me', (route) =>
|
||||
route.fulfill({
|
||||
status: 401,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
error: { code: 'unauthenticated', message: 'unauthenticated' }
|
||||
})
|
||||
})
|
||||
);
|
||||
await page.route('**/api/v1/genres', (route) =>
|
||||
route.fulfill({ status: 200, contentType: 'application/json', body: '[]' })
|
||||
);
|
||||
await page.route('**/api/v1/mangas/m1', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify(baseManga)
|
||||
})
|
||||
);
|
||||
|
||||
await page.goto('/manga/m1/edit');
|
||||
await expect(page.getByTestId('edit-signin')).toBeVisible();
|
||||
});
|
||||
|
||||
test('/manga/[id]/edit PATCHes the changed metadata and lands on the manga page', async ({
|
||||
page
|
||||
}) => {
|
||||
await stubAuthenticatedAndGenres(page);
|
||||
|
||||
let patchBody: Record<string, unknown> | null = null;
|
||||
let mangaAfter = { ...baseManga };
|
||||
await page.route('**/api/v1/mangas/m1', async (route) => {
|
||||
const method = route.request().method();
|
||||
if (method === 'GET') {
|
||||
await route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify(mangaAfter)
|
||||
});
|
||||
} else if (method === 'PATCH') {
|
||||
patchBody = JSON.parse(route.request().postData() ?? '{}');
|
||||
mangaAfter = {
|
||||
...mangaAfter,
|
||||
title: (patchBody.title as string) ?? mangaAfter.title,
|
||||
description:
|
||||
'description' in (patchBody as Record<string, unknown>)
|
||||
? ((patchBody.description as string | null) ?? null)
|
||||
: mangaAfter.description
|
||||
};
|
||||
await route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify(mangaAfter)
|
||||
});
|
||||
} else {
|
||||
await route.fallback();
|
||||
}
|
||||
});
|
||||
await page.route('**/api/v1/mangas/m1/chapters*', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
items: [],
|
||||
page: { limit: 50, offset: 0, total: 0 }
|
||||
})
|
||||
})
|
||||
);
|
||||
await page.route('**/api/v1/me/bookmarks*', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
items: [],
|
||||
page: { limit: 50, offset: 0, total: 0 }
|
||||
})
|
||||
})
|
||||
);
|
||||
await page.route('**/api/v1/me/read-progress/m1', (route) =>
|
||||
route.fulfill({
|
||||
status: 404,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
error: { code: 'not_found', message: 'no progress' }
|
||||
})
|
||||
})
|
||||
);
|
||||
|
||||
await page.goto('/manga/m1');
|
||||
// Edit link is gated on session.user — it should be visible to the
|
||||
// stubbed authenticated user.
|
||||
await page.getByTestId('edit-manga-link').click();
|
||||
await expect(page).toHaveURL(/\/manga\/m1\/edit$/);
|
||||
|
||||
const titleInput = page.getByTestId('manga-title');
|
||||
await expect(titleInput).toHaveValue('Berserk');
|
||||
await titleInput.fill('Berserk (Deluxe)');
|
||||
await page.getByTestId('manga-edit-submit').click();
|
||||
|
||||
await expect(page).toHaveURL(/\/manga\/m1$/);
|
||||
await expect(page.getByTestId('manga-title')).toHaveText('Berserk (Deluxe)');
|
||||
expect(patchBody).not.toBeNull();
|
||||
expect((patchBody as Record<string, unknown>).title).toBe('Berserk (Deluxe)');
|
||||
});
|
||||
@@ -14,9 +14,25 @@ async function stubAuthenticated(page: Page) {
|
||||
body: JSON.stringify({ user: userFixture })
|
||||
})
|
||||
);
|
||||
// Profile overview hits these for the count cards — return zeros
|
||||
// unless a test overrides.
|
||||
await page.route('**/api/v1/me/bookmarks?*', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({ items: [], page: { limit: 1, offset: 0, total: 0 } })
|
||||
})
|
||||
);
|
||||
await page.route('**/api/v1/me/collections?*', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({ items: [], page: { limit: 1, offset: 0, total: 0 } })
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
test('settings link shows for authed users and reaches the password form', async ({ page }) => {
|
||||
test('Profile link in nav for authed users; landing shows counts', async ({ page }) => {
|
||||
await stubAuthenticated(page);
|
||||
await page.route('**/api/v1/mangas?*', (route) =>
|
||||
route.fulfill({
|
||||
@@ -27,9 +43,18 @@ test('settings link shows for authed users and reaches the password form', async
|
||||
);
|
||||
|
||||
await page.goto('/');
|
||||
await expect(page.getByTestId('nav-settings')).toBeVisible();
|
||||
await page.getByTestId('nav-settings').click();
|
||||
await expect(page).toHaveURL(/\/settings$/);
|
||||
await expect(page.getByTestId('nav-profile')).toBeVisible();
|
||||
await page.getByTestId('nav-profile').click();
|
||||
await expect(page).toHaveURL(/\/profile$/);
|
||||
await expect(page.getByTestId('overview-bookmarks')).toBeVisible();
|
||||
await expect(page.getByTestId('overview-collections')).toBeVisible();
|
||||
});
|
||||
|
||||
test('Account tab reaches the password form', async ({ page }) => {
|
||||
await stubAuthenticated(page);
|
||||
await page.goto('/profile');
|
||||
await page.getByTestId('tab-account').click();
|
||||
await expect(page).toHaveURL(/\/profile\/account$/);
|
||||
await expect(page.getByTestId('password-form')).toBeVisible();
|
||||
});
|
||||
|
||||
@@ -43,7 +68,7 @@ test('changing password shows success and clears the form', async ({ page }) =>
|
||||
await route.fulfill({ status: 204 });
|
||||
});
|
||||
|
||||
await page.goto('/settings');
|
||||
await page.goto('/profile/account');
|
||||
await page.getByTestId('current-password').fill('hunter2hunter2');
|
||||
await page.getByTestId('new-password').fill('freshpassfreshpass');
|
||||
await page.getByTestId('confirm-password').fill('freshpassfreshpass');
|
||||
@@ -55,7 +80,6 @@ test('changing password shows success and clears the form', async ({ page }) =>
|
||||
current_password: 'hunter2hunter2',
|
||||
new_password: 'freshpassfreshpass'
|
||||
});
|
||||
// Form should clear after success.
|
||||
await expect(page.getByTestId('current-password')).toHaveValue('');
|
||||
});
|
||||
|
||||
@@ -71,7 +95,7 @@ test('wrong current password surfaces the 401 envelope inline', async ({ page })
|
||||
})
|
||||
);
|
||||
|
||||
await page.goto('/settings');
|
||||
await page.goto('/profile/account');
|
||||
await page.getByTestId('current-password').fill('definitelyNotIt');
|
||||
await page.getByTestId('new-password').fill('freshpassfreshpass');
|
||||
await page.getByTestId('confirm-password').fill('freshpassfreshpass');
|
||||
@@ -83,7 +107,7 @@ test('wrong current password surfaces the 401 envelope inline', async ({ page })
|
||||
test('mismatched new + confirm disables the submit button', async ({ page }) => {
|
||||
await stubAuthenticated(page);
|
||||
|
||||
await page.goto('/settings');
|
||||
await page.goto('/profile/account');
|
||||
await page.getByTestId('current-password').fill('hunter2hunter2');
|
||||
await page.getByTestId('new-password').fill('freshpassfreshpass');
|
||||
await page.getByTestId('confirm-password').fill('different');
|
||||
@@ -92,7 +116,7 @@ test('mismatched new + confirm disables the submit button', async ({ page }) =>
|
||||
await expect(page.getByTestId('password-submit')).toBeDisabled();
|
||||
});
|
||||
|
||||
test('anonymous user sees a sign-in prompt on /settings', async ({ page }) => {
|
||||
test('anonymous user sees a profile sign-in prompt', async ({ page }) => {
|
||||
await page.route('**/api/v1/auth/me', (route) =>
|
||||
route.fulfill({
|
||||
status: 401,
|
||||
@@ -103,7 +127,16 @@ test('anonymous user sees a sign-in prompt on /settings', async ({ page }) => {
|
||||
})
|
||||
);
|
||||
|
||||
await page.goto('/settings');
|
||||
await expect(page.getByTestId('settings-signin')).toBeVisible();
|
||||
await page.goto('/profile');
|
||||
await expect(page.getByTestId('profile-signin')).toBeVisible();
|
||||
await expect(page.getByTestId('password-form')).toHaveCount(0);
|
||||
});
|
||||
|
||||
test('/settings 308-redirects to /profile/preferences', async ({ page }) => {
|
||||
await stubAuthenticated(page);
|
||||
await page.goto('/settings');
|
||||
await expect(page).toHaveURL(/\/profile\/preferences$/);
|
||||
// The theme radio is visually hidden (decorated label wraps it), so
|
||||
// assert presence rather than CSS visibility.
|
||||
await expect(page.getByTestId('theme-radio-system')).toBeAttached();
|
||||
});
|
||||
@@ -1,6 +1,7 @@
|
||||
import { test, expect, type Page } from '@playwright/test';
|
||||
|
||||
const mangaId = '22222222-2222-2222-2222-222222222222';
|
||||
const chapterId = 'c2222222-2222-2222-2222-222222222222';
|
||||
const mangaFixture = {
|
||||
id: mangaId,
|
||||
title: 'Vagabond',
|
||||
@@ -11,7 +12,7 @@ const mangaFixture = {
|
||||
updated_at: '2026-01-01T00:00:00Z'
|
||||
};
|
||||
const chapterFixture = {
|
||||
id: 'c1',
|
||||
id: chapterId,
|
||||
manga_id: mangaId,
|
||||
number: 1,
|
||||
title: null,
|
||||
@@ -20,24 +21,24 @@ const chapterFixture = {
|
||||
};
|
||||
const pagesFixture = [
|
||||
{
|
||||
id: 'p1',
|
||||
chapter_id: 'c1',
|
||||
id: 'p1111111-2222-2222-2222-222222222222',
|
||||
chapter_id: chapterId,
|
||||
page_number: 1,
|
||||
storage_key: 'mangas/m2/chapters/c1/pages/0001.png',
|
||||
storage_key: `mangas/${mangaId}/chapters/${chapterId}/pages/0001.png`,
|
||||
content_type: 'image/png'
|
||||
},
|
||||
{
|
||||
id: 'p2',
|
||||
chapter_id: 'c1',
|
||||
id: 'p2222222-2222-2222-2222-222222222222',
|
||||
chapter_id: chapterId,
|
||||
page_number: 2,
|
||||
storage_key: 'mangas/m2/chapters/c1/pages/0002.png',
|
||||
storage_key: `mangas/${mangaId}/chapters/${chapterId}/pages/0002.png`,
|
||||
content_type: 'image/png'
|
||||
},
|
||||
{
|
||||
id: 'p3',
|
||||
chapter_id: 'c1',
|
||||
id: 'p3333333-2222-2222-2222-222222222222',
|
||||
chapter_id: chapterId,
|
||||
page_number: 3,
|
||||
storage_key: 'mangas/m2/chapters/c1/pages/0003.png',
|
||||
storage_key: `mangas/${mangaId}/chapters/${chapterId}/pages/0003.png`,
|
||||
content_type: 'image/png'
|
||||
}
|
||||
];
|
||||
@@ -92,14 +93,16 @@ async function mockReaderApis(page: Page) {
|
||||
})
|
||||
})
|
||||
);
|
||||
await page.route(`**/api/v1/mangas/${mangaId}/chapters/1`, (route) =>
|
||||
await page.route(`**/api/v1/mangas/${mangaId}/chapters/${chapterId}`, (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify(chapterFixture)
|
||||
})
|
||||
);
|
||||
await page.route(`**/api/v1/mangas/${mangaId}/chapters/1/pages`, (route) =>
|
||||
await page.route(
|
||||
`**/api/v1/mangas/${mangaId}/chapters/${chapterId}/pages`,
|
||||
(route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
@@ -131,7 +134,7 @@ test.beforeEach(async ({ context }) => {
|
||||
|
||||
test('switching to continuous mode stacks all pages and hides chevrons', async ({ page }) => {
|
||||
await mockReaderApis(page);
|
||||
await page.goto(`/manga/${mangaId}/chapter/1`);
|
||||
await page.goto(`/manga/${mangaId}/chapter/${chapterId}`);
|
||||
|
||||
// Default single-page mode is active.
|
||||
await expect(page.getByTestId('reader-page')).toBeVisible();
|
||||
@@ -149,7 +152,7 @@ test('switching to continuous mode stacks all pages and hides chevrons', async (
|
||||
|
||||
test('arrow keys do not paginate while in continuous mode', async ({ page }) => {
|
||||
await mockReaderApis(page);
|
||||
await page.goto(`/manga/${mangaId}/chapter/1`);
|
||||
await page.goto(`/manga/${mangaId}/chapter/${chapterId}`);
|
||||
await page.getByTestId('reader-mode-continuous').click();
|
||||
await expect(page.getByTestId('reader-continuous')).toBeVisible();
|
||||
|
||||
@@ -164,7 +167,7 @@ test('arrow keys do not paginate while in continuous mode', async ({ page }) =>
|
||||
|
||||
test('gap select updates the inline gap on the continuous container', async ({ page }) => {
|
||||
await mockReaderApis(page);
|
||||
await page.goto(`/manga/${mangaId}/chapter/1`);
|
||||
await page.goto(`/manga/${mangaId}/chapter/${chapterId}`);
|
||||
await page.getByTestId('reader-mode-continuous').click();
|
||||
|
||||
const container = page.getByTestId('reader-continuous');
|
||||
@@ -192,7 +195,7 @@ test('reader-mode preference set on one page is honored when the reader opens',
|
||||
});
|
||||
await mockReaderApis(page);
|
||||
|
||||
await page.goto(`/manga/${mangaId}/chapter/1`);
|
||||
await page.goto(`/manga/${mangaId}/chapter/${chapterId}`);
|
||||
await expect(page.getByTestId('reader-continuous')).toBeVisible();
|
||||
await expect(page.getByTestId('page-indicator')).toHaveText('3 pages');
|
||||
await expect(page.getByTestId('reader-continuous')).toHaveAttribute(
|
||||
@@ -201,13 +204,13 @@ test('reader-mode preference set on one page is honored when the reader opens',
|
||||
);
|
||||
});
|
||||
|
||||
test('settings page hides the gap picker while in single-page mode', async ({ page }) => {
|
||||
test('preferences page hides the gap picker while in single-page mode', async ({ page }) => {
|
||||
// Visually verifies the conditional render. The radio-click semantics
|
||||
// are exercised in src/lib/preferences.svelte.test.ts; the visible
|
||||
// mode toggle in the reader top bar covers the cross-route propagation
|
||||
// path in the test above.
|
||||
await mockReaderApis(page);
|
||||
await page.goto('/settings');
|
||||
await page.goto('/profile/preferences');
|
||||
|
||||
await expect(page.getByTestId('reader-mode-radio-single')).toBeAttached();
|
||||
await expect(page.getByTestId('reader-mode-radio-continuous')).toBeAttached();
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { test, expect, type Page } from '@playwright/test';
|
||||
|
||||
const mangaId = '11111111-1111-1111-1111-111111111111';
|
||||
const chapterId = 'c1111111-1111-1111-1111-111111111111';
|
||||
const mangaFixture = {
|
||||
id: mangaId,
|
||||
title: 'Berserk',
|
||||
@@ -12,7 +13,7 @@ const mangaFixture = {
|
||||
};
|
||||
const chaptersFixture = [
|
||||
{
|
||||
id: 'c1',
|
||||
id: chapterId,
|
||||
manga_id: mangaId,
|
||||
number: 1,
|
||||
title: 'The Brand',
|
||||
@@ -22,24 +23,24 @@ const chaptersFixture = [
|
||||
];
|
||||
const pagesFixture = [
|
||||
{
|
||||
id: 'p1',
|
||||
chapter_id: 'c1',
|
||||
id: 'p1111111-1111-1111-1111-111111111111',
|
||||
chapter_id: chapterId,
|
||||
page_number: 1,
|
||||
storage_key: 'mangas/m1/chapters/c1/pages/0001.png',
|
||||
storage_key: `mangas/${mangaId}/chapters/${chapterId}/pages/0001.png`,
|
||||
content_type: 'image/png'
|
||||
},
|
||||
{
|
||||
id: 'p2',
|
||||
chapter_id: 'c1',
|
||||
id: 'p2222222-1111-1111-1111-111111111111',
|
||||
chapter_id: chapterId,
|
||||
page_number: 2,
|
||||
storage_key: 'mangas/m1/chapters/c1/pages/0002.png',
|
||||
storage_key: `mangas/${mangaId}/chapters/${chapterId}/pages/0002.png`,
|
||||
content_type: 'image/png'
|
||||
},
|
||||
{
|
||||
id: 'p3',
|
||||
chapter_id: 'c1',
|
||||
id: 'p3333333-1111-1111-1111-111111111111',
|
||||
chapter_id: chapterId,
|
||||
page_number: 3,
|
||||
storage_key: 'mangas/m1/chapters/c1/pages/0003.png',
|
||||
storage_key: `mangas/${mangaId}/chapters/${chapterId}/pages/0003.png`,
|
||||
content_type: 'image/png'
|
||||
}
|
||||
];
|
||||
@@ -86,14 +87,16 @@ async function mockReaderApis(page: Page) {
|
||||
})
|
||||
})
|
||||
);
|
||||
await page.route(`**/api/v1/mangas/${mangaId}/chapters/1`, (route) =>
|
||||
await page.route(`**/api/v1/mangas/${mangaId}/chapters/${chapterId}`, (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify(chaptersFixture[0])
|
||||
})
|
||||
);
|
||||
await page.route(`**/api/v1/mangas/${mangaId}/chapters/1/pages`, (route) =>
|
||||
await page.route(
|
||||
`**/api/v1/mangas/${mangaId}/chapters/${chapterId}/pages`,
|
||||
(route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
@@ -123,7 +126,7 @@ test('manga overview shows title, cover, and a chapter list', async ({ page }) =
|
||||
|
||||
test('reader paginates with arrow keys and j/k, and preloads the next page', async ({ page }) => {
|
||||
await mockReaderApis(page);
|
||||
await page.goto(`/manga/${mangaId}/chapter/1`);
|
||||
await page.goto(`/manga/${mangaId}/chapter/${chapterId}`);
|
||||
|
||||
// Page 1 shown, preload for page 2 in the DOM.
|
||||
await expect(page.getByTestId('page-indicator')).toHaveText('Page 1 / 3');
|
||||
|
||||
@@ -8,14 +8,18 @@ const userFixture = {
|
||||
const mangaFixture = {
|
||||
id: 'm1',
|
||||
title: 'Berserk',
|
||||
author: 'Kentaro Miura',
|
||||
status: 'ongoing',
|
||||
alt_titles: [],
|
||||
description: null,
|
||||
cover_image_path: null,
|
||||
created_at: '2026-01-01T00:00:00Z',
|
||||
updated_at: '2026-01-01T00:00:00Z'
|
||||
updated_at: '2026-01-01T00:00:00Z',
|
||||
authors: [{ id: 'a1', name: 'Kentaro Miura' }],
|
||||
genres: [],
|
||||
tags: []
|
||||
};
|
||||
|
||||
async function mockBaseUploadApis(page: Page) {
|
||||
async function stubAuthenticatedAndGenres(page: Page) {
|
||||
await page.route('**/api/v1/auth/me', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
@@ -23,14 +27,14 @@ async function mockBaseUploadApis(page: Page) {
|
||||
body: JSON.stringify({ user: userFixture })
|
||||
})
|
||||
);
|
||||
await page.route('**/api/v1/mangas?*', (route) =>
|
||||
await page.route('**/api/v1/genres', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
items: [mangaFixture],
|
||||
page: { limit: 200, offset: 0, total: 1 }
|
||||
})
|
||||
body: JSON.stringify([
|
||||
{ id: 'g-action', name: 'Action' },
|
||||
{ id: 'g-fantasy', name: 'Fantasy' }
|
||||
])
|
||||
})
|
||||
);
|
||||
}
|
||||
@@ -45,61 +49,20 @@ test('anonymous user sees sign-in prompt on /upload', async ({ page }) => {
|
||||
})
|
||||
})
|
||||
);
|
||||
await page.route('**/api/v1/mangas?*', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({ items: [], page: { limit: 200, offset: 0, total: 0 } })
|
||||
})
|
||||
await page.route('**/api/v1/genres', (route) =>
|
||||
route.fulfill({ status: 200, contentType: 'application/json', body: '[]' })
|
||||
);
|
||||
|
||||
await page.goto('/upload');
|
||||
await expect(page.getByTestId('upload-signin')).toBeVisible();
|
||||
});
|
||||
|
||||
test('uploading a non-image page surfaces the backend 415 message', async ({ page }) => {
|
||||
await mockBaseUploadApis(page);
|
||||
|
||||
// Backend rejects with 415 unsupported_media_type — we want to see
|
||||
// the human message rendered as the chapter error.
|
||||
await page.route('**/api/v1/mangas/m1/chapters', (route) =>
|
||||
route.fulfill({
|
||||
status: 415,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
error: {
|
||||
code: 'unsupported_media_type',
|
||||
message: 'page[0]: unsupported image type application/pdf'
|
||||
}
|
||||
})
|
||||
})
|
||||
);
|
||||
|
||||
await page.goto('/upload');
|
||||
await page.getByTestId('chapter-manga').selectOption('m1');
|
||||
await page.getByTestId('chapter-number').fill('1');
|
||||
|
||||
// Client validator allows image/png; we lie about the file type so
|
||||
// the request actually reaches the (mocked) backend, exercising the
|
||||
// 415 envelope path.
|
||||
await page.getByTestId('chapter-pages-input').setInputFiles({
|
||||
name: 'fake.png',
|
||||
mimeType: 'image/png',
|
||||
buffer: Buffer.from('%PDF-1.4', 'utf-8')
|
||||
});
|
||||
|
||||
await page.getByTestId('chapter-submit').click();
|
||||
await expect(page.getByTestId('chapter-error')).toContainText(
|
||||
'unsupported image type'
|
||||
);
|
||||
});
|
||||
|
||||
test('happy path: create manga + upload chapter (mocked)', async ({ page }) => {
|
||||
await mockBaseUploadApis(page);
|
||||
test('/upload creates a manga with no staged chapters and lands on the manga page', async ({
|
||||
page
|
||||
}) => {
|
||||
await stubAuthenticatedAndGenres(page);
|
||||
|
||||
let createdManga: typeof mangaFixture | null = null;
|
||||
let createdChapter: { id: string; number: number } | null = null;
|
||||
|
||||
await page.route('**/api/v1/mangas', (route) => {
|
||||
if (route.request().method() === 'POST') {
|
||||
createdManga = { ...mangaFixture, id: 'm2', title: 'Naruto' };
|
||||
@@ -112,15 +75,88 @@ test('happy path: create manga + upload chapter (mocked)', async ({ page }) => {
|
||||
route.fallback();
|
||||
}
|
||||
});
|
||||
await page.route('**/api/v1/mangas/m1/chapters', (route) => {
|
||||
await page.route('**/api/v1/mangas/m2', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({ ...mangaFixture, id: 'm2', title: 'Naruto' })
|
||||
})
|
||||
);
|
||||
await page.route('**/api/v1/mangas/m2/chapters*', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
items: [],
|
||||
page: { limit: 50, offset: 0, total: 0 }
|
||||
})
|
||||
})
|
||||
);
|
||||
await page.route('**/api/v1/me/bookmarks*', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
items: [],
|
||||
page: { limit: 50, offset: 0, total: 0 }
|
||||
})
|
||||
})
|
||||
);
|
||||
await page.route('**/api/v1/me/read-progress/m2', (route) =>
|
||||
route.fulfill({
|
||||
status: 404,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
error: { code: 'not_found', message: 'no progress' }
|
||||
})
|
||||
})
|
||||
);
|
||||
|
||||
await page.goto('/upload');
|
||||
await page.getByTestId('manga-title').fill('Naruto');
|
||||
await page.getByTestId('manga-submit').click();
|
||||
// After create, success → navigate to /manga/{id}.
|
||||
await expect(page).toHaveURL(/\/manga\/m2$/);
|
||||
expect(createdManga).not.toBeNull();
|
||||
});
|
||||
|
||||
test('/upload stages a chapter with renamed page files (page-NNN.<ext>)', async ({
|
||||
page
|
||||
}) => {
|
||||
await stubAuthenticatedAndGenres(page);
|
||||
|
||||
let createdManga: typeof mangaFixture | null = null;
|
||||
let submittedPageNames: string[] = [];
|
||||
|
||||
await page.route('**/api/v1/mangas', (route) => {
|
||||
if (route.request().method() === 'POST') {
|
||||
createdChapter = { id: 'c1', number: 1 };
|
||||
createdManga = { ...mangaFixture, id: 'm3', title: 'Vinland Saga' };
|
||||
route.fulfill({
|
||||
status: 201,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify(createdManga)
|
||||
});
|
||||
} else {
|
||||
route.fallback();
|
||||
}
|
||||
});
|
||||
await page.route('**/api/v1/mangas/m3/chapters', (route) => {
|
||||
if (route.request().method() === 'POST') {
|
||||
const post = route.request().postDataBuffer()?.toString('binary') ?? '';
|
||||
// Pull every Content-Disposition filename out of the
|
||||
// multipart body — that's what the server (and proxies,
|
||||
// logs) would see. We expect only renamed `page-NNN.*`
|
||||
// entries, never the original filenames.
|
||||
const matches = [
|
||||
...post.matchAll(/filename="([^"]+)"/g)
|
||||
].map((m) => m[1]);
|
||||
submittedPageNames = matches.filter((n) => n.startsWith('page-'));
|
||||
route.fulfill({
|
||||
status: 201,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
id: 'c1',
|
||||
manga_id: 'm1',
|
||||
manga_id: 'm3',
|
||||
number: 1,
|
||||
title: null,
|
||||
page_count: 2,
|
||||
@@ -131,62 +167,188 @@ test('happy path: create manga + upload chapter (mocked)', async ({ page }) => {
|
||||
route.fallback();
|
||||
}
|
||||
});
|
||||
await page.route('**/api/v1/mangas/m3', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({ ...mangaFixture, id: 'm3', title: 'Vinland Saga' })
|
||||
})
|
||||
);
|
||||
await page.route('**/api/v1/mangas/m3/chapters?*', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
items: [],
|
||||
page: { limit: 50, offset: 0, total: 0 }
|
||||
})
|
||||
})
|
||||
);
|
||||
await page.route('**/api/v1/me/bookmarks*', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
items: [],
|
||||
page: { limit: 50, offset: 0, total: 0 }
|
||||
})
|
||||
})
|
||||
);
|
||||
await page.route('**/api/v1/me/read-progress/m3', (route) =>
|
||||
route.fulfill({
|
||||
status: 404,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
error: { code: 'not_found', message: 'no progress' }
|
||||
})
|
||||
})
|
||||
);
|
||||
|
||||
await page.goto('/upload');
|
||||
|
||||
// Create manga.
|
||||
await page.getByTestId('manga-title').fill('Naruto');
|
||||
await page.getByTestId('manga-submit').click();
|
||||
await expect(page.getByTestId('manga-success')).toContainText('Created');
|
||||
expect(createdManga).not.toBeNull();
|
||||
|
||||
// Upload chapter with two pages.
|
||||
await page.getByTestId('chapter-manga').selectOption('m1');
|
||||
await page.getByTestId('chapter-number').fill('1');
|
||||
await page.getByTestId('chapter-pages-input').setInputFiles([
|
||||
{
|
||||
name: '1.png',
|
||||
mimeType: 'image/png',
|
||||
buffer: Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a])
|
||||
},
|
||||
{
|
||||
name: '2.png',
|
||||
mimeType: 'image/png',
|
||||
buffer: Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a])
|
||||
}
|
||||
await page.getByTestId('manga-title').fill('Vinland Saga');
|
||||
await page.getByTestId('add-chapter').click();
|
||||
const pngBytes = Buffer.from([
|
||||
0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a
|
||||
]);
|
||||
await expect(page.getByTestId('chapter-pages-list')).toContainText('1.png');
|
||||
await expect(page.getByTestId('chapter-pages-list')).toContainText('2.png');
|
||||
await page
|
||||
.getByTestId('staged-chapter-pages-input')
|
||||
.setInputFiles([
|
||||
{ name: 'IMG_2837.png', mimeType: 'image/png', buffer: pngBytes },
|
||||
{ name: 'random_file.png', mimeType: 'image/png', buffer: pngBytes }
|
||||
]);
|
||||
// The list renders "Page 001" / "Page 002" not the original filenames.
|
||||
const list = page.getByTestId('staged-chapter-pages-list');
|
||||
await expect(list).toContainText('Page 001');
|
||||
await expect(list).toContainText('Page 002');
|
||||
// Original filenames are visible as a dimmed caption (uploader-
|
||||
// reference; dropped after the row).
|
||||
await expect(list).toContainText('IMG_2837.png');
|
||||
|
||||
await page.getByTestId('chapter-submit').click();
|
||||
await expect(page.getByTestId('chapter-success')).toContainText(
|
||||
'2 pages'
|
||||
);
|
||||
expect(createdChapter).not.toBeNull();
|
||||
await page.getByTestId('manga-submit').click();
|
||||
await expect(page).toHaveURL(/\/manga\/m3$/);
|
||||
expect(submittedPageNames).toEqual(['page-001.png', 'page-002.png']);
|
||||
});
|
||||
|
||||
test('client preflight blocks oversized files without hitting the network', async ({ page }) => {
|
||||
await mockBaseUploadApis(page);
|
||||
test('/manga/[id]/upload-chapter happy path uploads renamed pages', async ({
|
||||
page
|
||||
}) => {
|
||||
await stubAuthenticatedAndGenres(page);
|
||||
|
||||
await page.route('**/api/v1/mangas/m1', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify(mangaFixture)
|
||||
})
|
||||
);
|
||||
await page.route('**/api/v1/mangas/m1/chapters?*', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
items: [{ id: 'c0', manga_id: 'm1', number: 1, title: null, page_count: 3, created_at: '2026-01-01T00:00:00Z' }],
|
||||
page: { limit: 200, offset: 0, total: 1 }
|
||||
})
|
||||
})
|
||||
);
|
||||
let submitted: string[] = [];
|
||||
await page.route('**/api/v1/mangas/m1/chapters', (route) => {
|
||||
if (route.request().method() === 'POST') {
|
||||
const post = route.request().postDataBuffer()?.toString('binary') ?? '';
|
||||
submitted = [...post.matchAll(/filename="([^"]+)"/g)].map((m) => m[1]);
|
||||
route.fulfill({
|
||||
status: 201,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
id: 'c-new',
|
||||
manga_id: 'm1',
|
||||
number: 2,
|
||||
title: null,
|
||||
page_count: 1,
|
||||
created_at: '2026-01-01T00:00:00Z'
|
||||
})
|
||||
});
|
||||
} else {
|
||||
route.fallback();
|
||||
}
|
||||
});
|
||||
await page.route('**/api/v1/me/bookmarks*', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
items: [],
|
||||
page: { limit: 50, offset: 0, total: 0 }
|
||||
})
|
||||
})
|
||||
);
|
||||
await page.route('**/api/v1/me/read-progress/m1', (route) =>
|
||||
route.fulfill({
|
||||
status: 404,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
error: { code: 'not_found', message: 'no progress' }
|
||||
})
|
||||
})
|
||||
);
|
||||
|
||||
await page.goto('/manga/m1/upload-chapter');
|
||||
// Default chapter number is the next free one (existing max 1 → 2).
|
||||
await expect(page.getByTestId('chapter-number')).toHaveValue('2');
|
||||
|
||||
const pngBytes = Buffer.from([
|
||||
0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a
|
||||
]);
|
||||
await page.getByTestId('pages-input').setInputFiles({
|
||||
name: 'whatever.png',
|
||||
mimeType: 'image/png',
|
||||
buffer: pngBytes
|
||||
});
|
||||
await expect(page.getByTestId('pages-list')).toContainText('Page 001');
|
||||
|
||||
await page.getByTestId('chapter-submit').click();
|
||||
await expect(page).toHaveURL(/\/manga\/m1$/);
|
||||
expect(submitted.filter((n) => n.startsWith('page-'))).toEqual([
|
||||
'page-001.png'
|
||||
]);
|
||||
});
|
||||
|
||||
test('chapter upload client preflight blocks oversized files', async ({
|
||||
page
|
||||
}) => {
|
||||
await stubAuthenticatedAndGenres(page);
|
||||
await page.route('**/api/v1/mangas/m1', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify(mangaFixture)
|
||||
})
|
||||
);
|
||||
await page.route('**/api/v1/mangas/m1/chapters?*', (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
items: [],
|
||||
page: { limit: 200, offset: 0, total: 0 }
|
||||
})
|
||||
})
|
||||
);
|
||||
let chapterPostCalls = 0;
|
||||
await page.route('**/api/v1/mangas/m1/chapters', (route) => {
|
||||
if (route.request().method() === 'POST') chapterPostCalls += 1;
|
||||
route.fallback();
|
||||
});
|
||||
|
||||
await page.goto('/upload');
|
||||
await page.getByTestId('chapter-manga').selectOption('m1');
|
||||
await page.getByTestId('chapter-number').fill('1');
|
||||
|
||||
// A ~21 MiB buffer — exceeds the 20 MiB client cap.
|
||||
await page.goto('/manga/m1/upload-chapter');
|
||||
const big = Buffer.alloc(21 * 1024 * 1024, 0xff);
|
||||
await page.getByTestId('chapter-pages-input').setInputFiles({
|
||||
await page.getByTestId('pages-input').setInputFiles({
|
||||
name: 'huge.png',
|
||||
mimeType: 'image/png',
|
||||
buffer: big
|
||||
});
|
||||
|
||||
await expect(page.getByTestId('chapter-pages-list')).toContainText('too large');
|
||||
await expect(page.getByTestId('pages-list')).toContainText('too large');
|
||||
await expect(page.getByTestId('chapter-submit')).toBeDisabled();
|
||||
expect(chapterPostCalls).toBe(0);
|
||||
});
|
||||
|
||||
16
frontend/package-lock.json
generated
16
frontend/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "mangalord-frontend",
|
||||
"version": "0.12.0",
|
||||
"version": "0.23.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "mangalord-frontend",
|
||||
"version": "0.12.0",
|
||||
"version": "0.23.0",
|
||||
"devDependencies": {
|
||||
"@lucide/svelte": "^1.16.0",
|
||||
"@playwright/test": "^1.48.0",
|
||||
@@ -169,7 +169,6 @@
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
@@ -193,7 +192,6 @@
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
@@ -1157,7 +1155,6 @@
|
||||
"integrity": "sha512-mQjlkNo+rJvpln7V2IGY2j99BqhcFbS4UN0AQNKNYfhBAFZTuCDAdW3a1sgf330mvtNvsBXn3HpAhcmvdJTcIQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@standard-schema/spec": "^1.0.0",
|
||||
"@sveltejs/acorn-typescript": "^1.0.5",
|
||||
@@ -1200,7 +1197,6 @@
|
||||
"integrity": "sha512-0ba1RQ/PHen5FGpdSrW7Y3fAMQjrXantECALeOiOdBdzR5+5vPP6HVZRLmZaQL+W8m++o+haIAKq5qT+MiZ7VA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@sveltejs/vite-plugin-svelte-inspector": "^3.0.0-next.0||^3.0.0",
|
||||
"debug": "^4.3.7",
|
||||
@@ -1359,7 +1355,6 @@
|
||||
"integrity": "sha512-dyh/xO2Fh5bYrfWaaqGrRQQGkNdmYw6AmaAUvYeUMNTWQtvb796ikLdmTchRmOlOiIJ1TDXfWgVx1QkUlQ6Hew==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~6.21.0"
|
||||
}
|
||||
@@ -1507,7 +1502,6 @@
|
||||
"integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"acorn": "bin/acorn"
|
||||
},
|
||||
@@ -2249,7 +2243,6 @@
|
||||
"integrity": "sha512-8i7LzZj7BF8uplX+ZyOlIz86V6TAsSs+np6m1kpW9u0JWi4z/1t+FzcK1aek+ybTnAC4KhBL4uXCNT0wcUIeCw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"cssstyle": "^4.1.0",
|
||||
"data-urls": "^5.0.0",
|
||||
@@ -2638,7 +2631,6 @@
|
||||
"integrity": "sha512-WHeFSbZYsPu3+bLoNRUuAO+wavNlocOPf3wSHTP7hcFKVnJeWsYlCDbr3mTS14FCizf9ccIxXA8sGL8zKeQN3g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@types/estree": "1.0.8"
|
||||
},
|
||||
@@ -2810,7 +2802,6 @@
|
||||
"integrity": "sha512-ymI5ykLPwIHW839E053FQbI1G+jnRFJEw3Kv5Y4njixVWywQBx+NUFpkkKyk5LIb36Fg9DVXSYpqiGekLD0hyw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@jridgewell/remapping": "^2.3.4",
|
||||
"@jridgewell/sourcemap-codec": "^1.5.0",
|
||||
@@ -2997,7 +2988,6 @@
|
||||
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
@@ -3019,7 +3009,6 @@
|
||||
"integrity": "sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"esbuild": "^0.21.3",
|
||||
"postcss": "^8.4.43",
|
||||
@@ -3138,7 +3127,6 @@
|
||||
"integrity": "sha512-MSmPM9REYqDGBI8439mA4mWhV5sKmDlBKWIYbA3lRb2PTHACE0mgKwA8yQ2xq9vxDTuk4iPrECBAEW2aoFXY0Q==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@vitest/expect": "2.1.9",
|
||||
"@vitest/mocker": "2.1.9",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "mangalord-frontend",
|
||||
"version": "0.16.0",
|
||||
"version": "0.34.1",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
|
||||
@@ -94,6 +94,11 @@ describe('auth api client', () => {
|
||||
expect(url).toMatch(/\/v1\/auth\/logout$/);
|
||||
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||
expect(init.method).toBe('POST');
|
||||
// Consistent content-type for all mutation requests, matching
|
||||
// the rest of the module — axum doesn't require it but the
|
||||
// header keeps the request style uniform.
|
||||
const headers = new Headers(init.headers);
|
||||
expect(headers.get('content-type')).toBe('application/json');
|
||||
});
|
||||
|
||||
it('me returns the user on 200', async () => {
|
||||
|
||||
@@ -32,7 +32,14 @@ export async function login(creds: Credentials): Promise<User> {
|
||||
}
|
||||
|
||||
export async function logout(): Promise<void> {
|
||||
await request<void>('/v1/auth/logout', { method: 'POST' });
|
||||
await request<void>('/v1/auth/logout', {
|
||||
method: 'POST',
|
||||
// Consistent with the other POST/PATCH helpers in this module.
|
||||
// axum doesn't require it (no body), but keeping the header
|
||||
// on every mutation request avoids the false-flag in logs and
|
||||
// matches the project's style.
|
||||
headers: { 'content-type': 'application/json' }
|
||||
});
|
||||
}
|
||||
|
||||
export type ChangePassword = {
|
||||
|
||||
@@ -7,7 +7,12 @@ import {
|
||||
afterEach,
|
||||
type MockInstance
|
||||
} from 'vitest';
|
||||
import { listChapters, getChapter, getChapterPages } from './chapters';
|
||||
import {
|
||||
listChapters,
|
||||
getChapter,
|
||||
getChapterPages,
|
||||
createChapter
|
||||
} from './chapters';
|
||||
|
||||
function ok(body: unknown): Response {
|
||||
return new Response(JSON.stringify(body), {
|
||||
@@ -71,22 +76,59 @@ describe('chapters api client', () => {
|
||||
expect(result.page.total).toBeNull();
|
||||
});
|
||||
|
||||
it('getChapter hits /v1/mangas/{id}/chapters/{n}', async () => {
|
||||
it('getChapter hits /v1/mangas/{id}/chapters/{chapter_id}', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(ok(chapterFixture));
|
||||
const c = await getChapter('m1', 1);
|
||||
const c = await getChapter('m1', 'ch-uuid-1');
|
||||
expect(c).toEqual(chapterFixture);
|
||||
const url = fetchSpy.mock.calls[0][0] as string;
|
||||
expect(url).toMatch(/\/v1\/mangas\/m1\/chapters\/1$/);
|
||||
expect(url).toMatch(/\/v1\/mangas\/m1\/chapters\/ch-uuid-1$/);
|
||||
});
|
||||
|
||||
it('getChapter surfaces 404 via ApiError.code', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(envelope(404, 'not_found', 'not found'));
|
||||
await expect(getChapter('m1', 99)).rejects.toMatchObject({
|
||||
await expect(getChapter('m1', 'unknown-uuid')).rejects.toMatchObject({
|
||||
status: 404,
|
||||
code: 'not_found'
|
||||
});
|
||||
});
|
||||
|
||||
it('createChapter POSTs multipart and renames page files to page-NNN.<ext>', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(ok({ ...chapterFixture, page_count: 3 }));
|
||||
const pages = [
|
||||
new File([new Uint8Array([1, 2])], 'IMG_2837.HEIC', { type: 'image/jpeg' }),
|
||||
new File([new Uint8Array([3, 4])], 'random.png', { type: 'image/png' }),
|
||||
// No extension; MIME-derived fallback should kick in.
|
||||
new File([new Uint8Array([5])], 'scan_42', { type: 'image/webp' })
|
||||
];
|
||||
const result = await createChapter(
|
||||
'm1',
|
||||
{ number: 1, title: null },
|
||||
pages
|
||||
);
|
||||
expect(result.page_count).toBe(3);
|
||||
const url = fetchSpy.mock.calls[0][0] as string;
|
||||
expect(url).toMatch(/\/v1\/mangas\/m1\/chapters$/);
|
||||
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||
expect(init.method).toBe('POST');
|
||||
const form = init.body as FormData;
|
||||
// Metadata part is JSON.
|
||||
const metadata = form.get('metadata') as Blob;
|
||||
expect(metadata.type).toBe('application/json');
|
||||
// Three pages, all renamed; original filenames discarded.
|
||||
const submitted = form.getAll('page') as File[];
|
||||
expect(submitted).toHaveLength(3);
|
||||
// Original-extension preferred over MIME-derived; capitalised
|
||||
// .HEIC dropped because it's not in the allowed list, so the
|
||||
// MIME-derived `.jpg` wins.
|
||||
expect(submitted[0].name).toBe('page-001.jpg');
|
||||
expect(submitted[1].name).toBe('page-002.png');
|
||||
expect(submitted[2].name).toBe('page-003.webp');
|
||||
// No original filenames leak through.
|
||||
for (const f of submitted) {
|
||||
expect(f.name).not.toMatch(/IMG_2837|random|scan_42/);
|
||||
}
|
||||
});
|
||||
|
||||
it('getChapterPages unwraps the {pages} envelope into the array', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(
|
||||
ok({
|
||||
@@ -101,10 +143,10 @@ describe('chapters api client', () => {
|
||||
]
|
||||
})
|
||||
);
|
||||
const pages = await getChapterPages('m1', 1);
|
||||
const pages = await getChapterPages('m1', 'ch-uuid-1');
|
||||
expect(pages).toHaveLength(1);
|
||||
expect(pages[0].storage_key).toContain('0001.png');
|
||||
const url = fetchSpy.mock.calls[0][0] as string;
|
||||
expect(url).toMatch(/\/v1\/mangas\/m1\/chapters\/1\/pages$/);
|
||||
expect(url).toMatch(/\/v1\/mangas\/m1\/chapters\/ch-uuid-1\/pages$/);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -32,9 +32,9 @@ export async function listChapters(
|
||||
);
|
||||
}
|
||||
|
||||
export async function getChapter(mangaId: string, number: number): Promise<Chapter> {
|
||||
export async function getChapter(mangaId: string, chapterId: string): Promise<Chapter> {
|
||||
return request<Chapter>(
|
||||
`/v1/mangas/${encodeURIComponent(mangaId)}/chapters/${number}`
|
||||
`/v1/mangas/${encodeURIComponent(mangaId)}/chapters/${encodeURIComponent(chapterId)}`
|
||||
);
|
||||
}
|
||||
|
||||
@@ -48,10 +48,73 @@ export type ChapterPage = {
|
||||
|
||||
export async function getChapterPages(
|
||||
mangaId: string,
|
||||
number: number
|
||||
chapterId: string
|
||||
): Promise<ChapterPage[]> {
|
||||
const r = await request<{ pages: ChapterPage[] }>(
|
||||
`/v1/mangas/${encodeURIComponent(mangaId)}/chapters/${number}/pages`
|
||||
`/v1/mangas/${encodeURIComponent(mangaId)}/chapters/${encodeURIComponent(chapterId)}/pages`
|
||||
);
|
||||
return r.pages;
|
||||
}
|
||||
|
||||
export type NewChapter = {
|
||||
number: number;
|
||||
title?: string | null;
|
||||
};
|
||||
|
||||
/**
|
||||
* `POST /api/v1/mangas/:id/chapters` is multipart: a `metadata` part
|
||||
* (JSON) plus one or more ordered `page` parts. Each page file is
|
||||
* renamed to `page-NNN.<ext>` before submission so the user's
|
||||
* original filenames (often personally-identifying or just messy:
|
||||
* `IMG_2837.HEIC`, `~/scans/full chapter pack/`) don't end up in
|
||||
* request bodies or server logs. The bytes are unchanged — the
|
||||
* backend still sniffs the MIME from magic bytes and stores under
|
||||
* its own `{nnnn}.{ext}` scheme.
|
||||
*/
|
||||
export async function createChapter(
|
||||
mangaId: string,
|
||||
metadata: NewChapter,
|
||||
pages: File[]
|
||||
): Promise<Chapter> {
|
||||
const form = new FormData();
|
||||
form.append(
|
||||
'metadata',
|
||||
new Blob([JSON.stringify(metadata)], { type: 'application/json' })
|
||||
);
|
||||
pages.forEach((file, i) => {
|
||||
const ext = extensionFor(file);
|
||||
const renamed = new File(
|
||||
[file],
|
||||
`page-${String(i + 1).padStart(3, '0')}${ext}`,
|
||||
{ type: file.type }
|
||||
);
|
||||
form.append('page', renamed);
|
||||
});
|
||||
return request<Chapter>(
|
||||
`/v1/mangas/${encodeURIComponent(mangaId)}/chapters`,
|
||||
{ method: 'POST', body: form }
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Pick a sensible extension for the renamed multipart part. Prefer
|
||||
* the original filename's extension when present (jpg/jpeg/png/webp/
|
||||
* gif/avif), otherwise derive from the MIME type. Falls back to an
|
||||
* empty string so the renamed file is just `page-001` — the
|
||||
* server sniffs bytes anyway.
|
||||
*/
|
||||
function extensionFor(file: File): string {
|
||||
const dot = file.name.lastIndexOf('.');
|
||||
if (dot > 0) {
|
||||
const ext = file.name.slice(dot).toLowerCase();
|
||||
if (/^\.(jpe?g|png|webp|gif|avif)$/.test(ext)) return ext;
|
||||
}
|
||||
const fromMime: Record<string, string> = {
|
||||
'image/jpeg': '.jpg',
|
||||
'image/png': '.png',
|
||||
'image/webp': '.webp',
|
||||
'image/gif': '.gif',
|
||||
'image/avif': '.avif'
|
||||
};
|
||||
return fromMime[file.type] ?? '';
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { describe, it, expect, vi, beforeEach, afterEach, type MockInstance } from 'vitest';
|
||||
import { ApiError } from './client';
|
||||
import { ApiError, request } from './client';
|
||||
import { getManga } from './mangas';
|
||||
|
||||
describe('request error envelope parsing', () => {
|
||||
@@ -48,6 +48,20 @@ describe('request error envelope parsing', () => {
|
||||
expect(err.code).toBe('http_error');
|
||||
});
|
||||
|
||||
it('treats empty 200/201 bodies as undefined (no JSON.parse crash)', async () => {
|
||||
// Regression: addMangaToCollection is typed `void` and the
|
||||
// backend returns 201 (created) / 200 (already there) with
|
||||
// no body. Without the empty-body short-circuit, `res.json()`
|
||||
// would throw `JSON.parse: unexpected end of data`.
|
||||
fetchSpy.mockResolvedValueOnce(new Response(null, { status: 201 }));
|
||||
const created = await request<void>('/v1/whatever', { method: 'POST' });
|
||||
expect(created).toBeUndefined();
|
||||
|
||||
fetchSpy.mockResolvedValueOnce(new Response(null, { status: 200 }));
|
||||
const ok200 = await request<void>('/v1/whatever', { method: 'POST' });
|
||||
expect(ok200).toBeUndefined();
|
||||
});
|
||||
|
||||
it('falls back to http_error code when JSON has no error envelope', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(
|
||||
new Response(JSON.stringify({ message: 'oops' }), {
|
||||
|
||||
@@ -56,10 +56,18 @@ export async function request<T>(path: string, init?: RequestInit): Promise<T> {
|
||||
}
|
||||
throw new ApiError(res.status, code, message);
|
||||
}
|
||||
// Any empty body (not just 204) returns undefined — the manga-add
|
||||
// endpoint, for instance, signals create-vs-already-present via
|
||||
// 201/200 with no body, and callers typed `request<void>` would
|
||||
// otherwise blow up on `res.json()` parsing an empty string.
|
||||
if (res.status === 204) {
|
||||
return undefined as T;
|
||||
}
|
||||
return (await res.json()) as T;
|
||||
const text = await res.text();
|
||||
if (!text) {
|
||||
return undefined as T;
|
||||
}
|
||||
return JSON.parse(text) as T;
|
||||
}
|
||||
|
||||
export type Manga = {
|
||||
|
||||
158
frontend/src/lib/api/collections.test.ts
Normal file
158
frontend/src/lib/api/collections.test.ts
Normal file
@@ -0,0 +1,158 @@
|
||||
import { describe, it, expect, vi, beforeEach, afterEach, type MockInstance } from 'vitest';
|
||||
import {
|
||||
listMyCollections,
|
||||
listMyCollectionsOrEmpty,
|
||||
createCollection,
|
||||
getCollection,
|
||||
updateCollection,
|
||||
deleteCollection,
|
||||
listCollectionMangas,
|
||||
addMangaToCollection,
|
||||
removeMangaFromCollection,
|
||||
getMyCollectionsContaining
|
||||
} from './collections';
|
||||
|
||||
function ok(body: unknown, status = 200): Response {
|
||||
return new Response(JSON.stringify(body), {
|
||||
status,
|
||||
headers: { 'content-type': 'application/json' }
|
||||
});
|
||||
}
|
||||
|
||||
function noContent(): Response {
|
||||
return new Response(null, { status: 204 });
|
||||
}
|
||||
|
||||
function envelope(status: number, code: string, message: string): Response {
|
||||
return new Response(JSON.stringify({ error: { code, message } }), {
|
||||
status,
|
||||
headers: { 'content-type': 'application/json' }
|
||||
});
|
||||
}
|
||||
|
||||
function collectionFixture(extra: Record<string, unknown> = {}) {
|
||||
return {
|
||||
id: 'c1',
|
||||
user_id: 'u1',
|
||||
name: 'Favorites',
|
||||
description: null,
|
||||
created_at: '2026-01-01T00:00:00Z',
|
||||
updated_at: '2026-01-01T00:00:00Z',
|
||||
manga_count: 0,
|
||||
sample_covers: [],
|
||||
...extra
|
||||
};
|
||||
}
|
||||
|
||||
describe('collections api client', () => {
|
||||
let fetchSpy: MockInstance<typeof globalThis.fetch>;
|
||||
|
||||
beforeEach(() => {
|
||||
fetchSpy = vi.spyOn(globalThis, 'fetch');
|
||||
});
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('listMyCollections returns the paged envelope', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(
|
||||
ok({
|
||||
items: [collectionFixture()],
|
||||
page: { limit: 50, offset: 0, total: 1 }
|
||||
})
|
||||
);
|
||||
const result = await listMyCollections();
|
||||
expect(result.items[0].name).toBe('Favorites');
|
||||
const url = fetchSpy.mock.calls[0][0] as string;
|
||||
expect(url).toMatch(/\/v1\/me\/collections$/);
|
||||
});
|
||||
|
||||
it('listMyCollectionsOrEmpty returns empty page on 401', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(envelope(401, 'unauthenticated', 'login required'));
|
||||
const result = await listMyCollectionsOrEmpty();
|
||||
expect(result.items).toEqual([]);
|
||||
expect(result.page.total).toBeNull();
|
||||
});
|
||||
|
||||
it('listMyCollectionsOrEmpty re-throws non-401 errors', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(envelope(500, 'internal_error', 'oops'));
|
||||
await expect(listMyCollectionsOrEmpty()).rejects.toMatchObject({ status: 500 });
|
||||
});
|
||||
|
||||
it('createCollection POSTs JSON to /v1/collections', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(ok(collectionFixture(), 201));
|
||||
const c = await createCollection({ name: 'Favorites' });
|
||||
expect(c.name).toBe('Favorites');
|
||||
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||
expect(init.method).toBe('POST');
|
||||
expect(JSON.parse(init.body as string)).toEqual({ name: 'Favorites' });
|
||||
});
|
||||
|
||||
it('getCollection encodes the id', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(ok(collectionFixture()));
|
||||
await getCollection('id with space');
|
||||
const url = fetchSpy.mock.calls[0][0] as string;
|
||||
expect(url).toContain('/v1/collections/id%20with%20space');
|
||||
});
|
||||
|
||||
it('updateCollection PATCHes with the patch body', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(ok(collectionFixture({ name: 'Read later' })));
|
||||
const updated = await updateCollection('c1', { name: 'Read later' });
|
||||
expect(updated.name).toBe('Read later');
|
||||
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||
expect(init.method).toBe('PATCH');
|
||||
expect(JSON.parse(init.body as string)).toEqual({ name: 'Read later' });
|
||||
});
|
||||
|
||||
it('deleteCollection issues DELETE', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(noContent());
|
||||
await deleteCollection('c1');
|
||||
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||
expect(init.method).toBe('DELETE');
|
||||
});
|
||||
|
||||
it('listCollectionMangas returns the paged envelope of mangas', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(
|
||||
ok({
|
||||
items: [
|
||||
{
|
||||
id: 'm1',
|
||||
title: 'Berserk',
|
||||
status: 'ongoing',
|
||||
alt_titles: [],
|
||||
description: null,
|
||||
cover_image_path: null,
|
||||
created_at: '2026-01-01T00:00:00Z',
|
||||
updated_at: '2026-01-01T00:00:00Z'
|
||||
}
|
||||
],
|
||||
page: { limit: 50, offset: 0, total: 1 }
|
||||
})
|
||||
);
|
||||
const r = await listCollectionMangas('c1');
|
||||
expect(r.items[0].title).toBe('Berserk');
|
||||
});
|
||||
|
||||
it('addMangaToCollection POSTs the manga_id', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(ok({}, 201));
|
||||
await addMangaToCollection('c1', 'm9');
|
||||
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||
expect(init.method).toBe('POST');
|
||||
expect(JSON.parse(init.body as string)).toEqual({ manga_id: 'm9' });
|
||||
});
|
||||
|
||||
it('removeMangaFromCollection DELETEs the nested resource', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(noContent());
|
||||
await removeMangaFromCollection('c1', 'm9');
|
||||
const url = fetchSpy.mock.calls[0][0] as string;
|
||||
expect(url).toMatch(/\/v1\/collections\/c1\/mangas\/m9$/);
|
||||
});
|
||||
|
||||
it('getMyCollectionsContaining returns the id list', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(ok({ collection_ids: ['c1', 'c3'] }));
|
||||
const ids = await getMyCollectionsContaining('m1');
|
||||
expect(ids).toEqual(['c1', 'c3']);
|
||||
const url = fetchSpy.mock.calls[0][0] as string;
|
||||
expect(url).toMatch(/\/v1\/mangas\/m1\/my-collections$/);
|
||||
});
|
||||
});
|
||||
139
frontend/src/lib/api/collections.ts
Normal file
139
frontend/src/lib/api/collections.ts
Normal file
@@ -0,0 +1,139 @@
|
||||
import { ApiError, request, type Manga, type Page } from './client';
|
||||
|
||||
export type Collection = {
|
||||
id: string;
|
||||
user_id: string;
|
||||
name: string;
|
||||
description: string | null;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
};
|
||||
|
||||
/** Returned by `GET /v1/me/collections` — enriched for card rendering. */
|
||||
export type CollectionSummary = Collection & {
|
||||
manga_count: number;
|
||||
/** Up to 3 cover image keys, newest-added first. */
|
||||
sample_covers: string[];
|
||||
};
|
||||
|
||||
export type CollectionsPage = {
|
||||
items: CollectionSummary[];
|
||||
page: Page;
|
||||
};
|
||||
|
||||
export type CollectionMangasPage = {
|
||||
items: Manga[];
|
||||
page: Page;
|
||||
};
|
||||
|
||||
export type NewCollection = {
|
||||
name: string;
|
||||
description?: string | null;
|
||||
};
|
||||
|
||||
export type CollectionPatch = {
|
||||
name?: string;
|
||||
description?: string | null;
|
||||
};
|
||||
|
||||
export type ListMyOptions = { limit?: number; offset?: number };
|
||||
|
||||
export async function listMyCollections(
|
||||
opts: ListMyOptions = {}
|
||||
): Promise<CollectionsPage> {
|
||||
const params = new URLSearchParams();
|
||||
if (opts.limit != null) params.set('limit', String(opts.limit));
|
||||
if (opts.offset != null) params.set('offset', String(opts.offset));
|
||||
const qs = params.toString();
|
||||
return request<CollectionsPage>(`/v1/me/collections${qs ? `?${qs}` : ''}`);
|
||||
}
|
||||
|
||||
/** Empty page on 401 so guest-rendering pages don't have to special-case. */
|
||||
export async function listMyCollectionsOrEmpty(): Promise<CollectionsPage> {
|
||||
try {
|
||||
return await listMyCollections();
|
||||
} catch (e) {
|
||||
if (e instanceof ApiError && e.status === 401) {
|
||||
return { items: [], page: { limit: 50, offset: 0, total: null } };
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
export async function createCollection(
|
||||
input: NewCollection
|
||||
): Promise<Collection> {
|
||||
return request<Collection>('/v1/collections', {
|
||||
method: 'POST',
|
||||
headers: { 'content-type': 'application/json' },
|
||||
body: JSON.stringify(input)
|
||||
});
|
||||
}
|
||||
|
||||
export async function getCollection(id: string): Promise<Collection> {
|
||||
return request<Collection>(`/v1/collections/${encodeURIComponent(id)}`);
|
||||
}
|
||||
|
||||
export async function updateCollection(
|
||||
id: string,
|
||||
patch: CollectionPatch
|
||||
): Promise<Collection> {
|
||||
return request<Collection>(`/v1/collections/${encodeURIComponent(id)}`, {
|
||||
method: 'PATCH',
|
||||
headers: { 'content-type': 'application/json' },
|
||||
body: JSON.stringify(patch)
|
||||
});
|
||||
}
|
||||
|
||||
export async function deleteCollection(id: string): Promise<void> {
|
||||
await request<void>(`/v1/collections/${encodeURIComponent(id)}`, {
|
||||
method: 'DELETE'
|
||||
});
|
||||
}
|
||||
|
||||
export async function listCollectionMangas(
|
||||
id: string,
|
||||
opts: ListMyOptions = {}
|
||||
): Promise<CollectionMangasPage> {
|
||||
const params = new URLSearchParams();
|
||||
if (opts.limit != null) params.set('limit', String(opts.limit));
|
||||
if (opts.offset != null) params.set('offset', String(opts.offset));
|
||||
const qs = params.toString();
|
||||
return request<CollectionMangasPage>(
|
||||
`/v1/collections/${encodeURIComponent(id)}/mangas${qs ? `?${qs}` : ''}`
|
||||
);
|
||||
}
|
||||
|
||||
export async function addMangaToCollection(
|
||||
collectionId: string,
|
||||
mangaId: string
|
||||
): Promise<void> {
|
||||
await request<void>(
|
||||
`/v1/collections/${encodeURIComponent(collectionId)}/mangas`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'content-type': 'application/json' },
|
||||
body: JSON.stringify({ manga_id: mangaId })
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
export async function removeMangaFromCollection(
|
||||
collectionId: string,
|
||||
mangaId: string
|
||||
): Promise<void> {
|
||||
await request<void>(
|
||||
`/v1/collections/${encodeURIComponent(collectionId)}/mangas/${encodeURIComponent(mangaId)}`,
|
||||
{ method: 'DELETE' }
|
||||
);
|
||||
}
|
||||
|
||||
/** Which of the user's collections currently contain this manga. */
|
||||
export async function getMyCollectionsContaining(
|
||||
mangaId: string
|
||||
): Promise<string[]> {
|
||||
const r = await request<{ collection_ids: string[] }>(
|
||||
`/v1/mangas/${encodeURIComponent(mangaId)}/my-collections`
|
||||
);
|
||||
return r.collection_ids;
|
||||
}
|
||||
@@ -4,6 +4,8 @@ import {
|
||||
createManga,
|
||||
getManga,
|
||||
updateManga,
|
||||
updateMangaCover,
|
||||
deleteMangaCover,
|
||||
attachTag,
|
||||
detachTag
|
||||
} from './mangas';
|
||||
@@ -184,6 +186,49 @@ describe('mangas api client', () => {
|
||||
});
|
||||
});
|
||||
|
||||
it('updateMangaCover PUTs multipart with the cover blob', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(
|
||||
ok(detailFixture({ cover_image_path: 'mangas/b1/cover.png' }))
|
||||
);
|
||||
const cover = new Blob([new Uint8Array([0x89, 0x50, 0x4e, 0x47])], { type: 'image/png' });
|
||||
const updated = await updateMangaCover('b1', cover);
|
||||
expect(updated.cover_image_path).toBe('mangas/b1/cover.png');
|
||||
const url = fetchSpy.mock.calls[0][0] as string;
|
||||
expect(url).toMatch(/\/v1\/mangas\/b1\/cover$/);
|
||||
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||
expect(init.method).toBe('PUT');
|
||||
expect(init.body).toBeInstanceOf(FormData);
|
||||
const form = init.body as FormData;
|
||||
expect(form.get('cover')).toBeInstanceOf(Blob);
|
||||
// Boundary is filled in by the browser when body is FormData.
|
||||
expect(init.headers).toBeUndefined();
|
||||
});
|
||||
|
||||
it('updateMangaCover throws ApiError on payload_too_large', async () => {
|
||||
fetchSpy.mockResolvedValue(
|
||||
envelope(413, 'payload_too_large', 'cover exceeds size cap')
|
||||
);
|
||||
const cover = new Blob([new Uint8Array(1)]);
|
||||
await expect(updateMangaCover('b1', cover)).rejects.toMatchObject({
|
||||
name: 'ApiError',
|
||||
status: 413,
|
||||
code: 'payload_too_large'
|
||||
});
|
||||
});
|
||||
|
||||
it('deleteMangaCover DELETEs and returns the refreshed detail with null path', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(
|
||||
ok(detailFixture({ cover_image_path: null }))
|
||||
);
|
||||
const updated = await deleteMangaCover('b1');
|
||||
expect(updated.cover_image_path).toBeNull();
|
||||
const url = fetchSpy.mock.calls[0][0] as string;
|
||||
expect(url).toMatch(/\/v1\/mangas\/b1\/cover$/);
|
||||
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||
expect(init.method).toBe('DELETE');
|
||||
expect(init.body).toBeUndefined();
|
||||
});
|
||||
|
||||
it('attachTag POSTs the name and returns the TagRef', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(
|
||||
ok({ id: 't9', name: 'Dark Fantasy', added_by: 'u1' }, 201)
|
||||
|
||||
@@ -109,6 +109,31 @@ export async function updateManga(
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* PUT /api/v1/mangas/:id/cover (multipart). Replaces the cover image and
|
||||
* returns the refreshed detail. As with createManga the browser fills in
|
||||
* the multipart boundary automatically, so we must NOT set Content-Type.
|
||||
*/
|
||||
export async function updateMangaCover(
|
||||
id: string,
|
||||
cover: Blob
|
||||
): Promise<MangaDetail> {
|
||||
const form = new FormData();
|
||||
form.append('cover', cover);
|
||||
return request<MangaDetail>(
|
||||
`/v1/mangas/${encodeURIComponent(id)}/cover`,
|
||||
{ method: 'PUT', body: form }
|
||||
);
|
||||
}
|
||||
|
||||
/** DELETE /api/v1/mangas/:id/cover. Returns the refreshed detail. */
|
||||
export async function deleteMangaCover(id: string): Promise<MangaDetail> {
|
||||
return request<MangaDetail>(
|
||||
`/v1/mangas/${encodeURIComponent(id)}/cover`,
|
||||
{ method: 'DELETE' }
|
||||
);
|
||||
}
|
||||
|
||||
export async function attachTag(
|
||||
mangaId: string,
|
||||
name: string
|
||||
|
||||
114
frontend/src/lib/api/read_progress.test.ts
Normal file
114
frontend/src/lib/api/read_progress.test.ts
Normal file
@@ -0,0 +1,114 @@
|
||||
import { describe, it, expect, vi, beforeEach, afterEach, type MockInstance } from 'vitest';
|
||||
import {
|
||||
updateReadProgress,
|
||||
listMyReadProgress,
|
||||
listMyReadProgressOrEmpty,
|
||||
getMyReadProgressForManga,
|
||||
clearReadProgress
|
||||
} from './read_progress';
|
||||
|
||||
function ok(body: unknown, status = 200): Response {
|
||||
return new Response(JSON.stringify(body), {
|
||||
status,
|
||||
headers: { 'content-type': 'application/json' }
|
||||
});
|
||||
}
|
||||
|
||||
function noContent(): Response {
|
||||
return new Response(null, { status: 204 });
|
||||
}
|
||||
|
||||
function envelope(status: number, code: string, message: string): Response {
|
||||
return new Response(JSON.stringify({ error: { code, message } }), {
|
||||
status,
|
||||
headers: { 'content-type': 'application/json' }
|
||||
});
|
||||
}
|
||||
|
||||
describe('read_progress api client', () => {
|
||||
let fetchSpy: MockInstance<typeof globalThis.fetch>;
|
||||
|
||||
beforeEach(() => {
|
||||
fetchSpy = vi.spyOn(globalThis, 'fetch');
|
||||
});
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('updateReadProgress PUTs to /v1/me/read-progress', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(
|
||||
ok({
|
||||
user_id: 'u1',
|
||||
manga_id: 'm1',
|
||||
chapter_id: 'c1',
|
||||
page: 5,
|
||||
updated_at: '2026-05-17T12:00:00Z'
|
||||
})
|
||||
);
|
||||
const r = await updateReadProgress({ manga_id: 'm1', chapter_id: 'c1', page: 5 });
|
||||
expect(r.page).toBe(5);
|
||||
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||
expect(init.method).toBe('PUT');
|
||||
expect(JSON.parse(init.body as string)).toEqual({
|
||||
manga_id: 'm1',
|
||||
chapter_id: 'c1',
|
||||
page: 5
|
||||
});
|
||||
});
|
||||
|
||||
it('listMyReadProgress returns the paged envelope', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(
|
||||
ok({
|
||||
items: [],
|
||||
page: { limit: 50, offset: 0, total: 0 }
|
||||
})
|
||||
);
|
||||
const r = await listMyReadProgress();
|
||||
expect(r.items).toEqual([]);
|
||||
const url = fetchSpy.mock.calls[0][0] as string;
|
||||
expect(url).toMatch(/\/v1\/me\/read-progress$/);
|
||||
});
|
||||
|
||||
it('listMyReadProgressOrEmpty returns empty page on 401', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(envelope(401, 'unauthenticated', 'login required'));
|
||||
const r = await listMyReadProgressOrEmpty();
|
||||
expect(r.items).toEqual([]);
|
||||
});
|
||||
|
||||
it('getMyReadProgressForManga returns null on 404 (not yet read)', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(envelope(404, 'not_found', 'no progress'));
|
||||
const r = await getMyReadProgressForManga('m1');
|
||||
expect(r).toBeNull();
|
||||
});
|
||||
|
||||
it('getMyReadProgressForManga returns null on 401 (guest)', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(envelope(401, 'unauthenticated', 'login'));
|
||||
const r = await getMyReadProgressForManga('m1');
|
||||
expect(r).toBeNull();
|
||||
});
|
||||
|
||||
it('getMyReadProgressForManga returns the row with chapter_number when present', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(
|
||||
ok({
|
||||
manga_id: 'm1',
|
||||
chapter_id: 'c1',
|
||||
chapter_number: 7,
|
||||
page: 3,
|
||||
updated_at: '2026-05-17T12:00:00Z'
|
||||
})
|
||||
);
|
||||
const r = await getMyReadProgressForManga('m1');
|
||||
expect(r?.chapter_id).toBe('c1');
|
||||
expect(r?.chapter_number).toBe(7);
|
||||
expect(r?.page).toBe(3);
|
||||
});
|
||||
|
||||
it('clearReadProgress DELETEs the resource', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(noContent());
|
||||
await clearReadProgress('m1');
|
||||
const init = fetchSpy.mock.calls[0][1] as RequestInit;
|
||||
expect(init.method).toBe('DELETE');
|
||||
const url = fetchSpy.mock.calls[0][0] as string;
|
||||
expect(url).toMatch(/\/v1\/me\/read-progress\/m1$/);
|
||||
});
|
||||
});
|
||||
106
frontend/src/lib/api/read_progress.ts
Normal file
106
frontend/src/lib/api/read_progress.ts
Normal file
@@ -0,0 +1,106 @@
|
||||
import { ApiError, request, type Page } from './client';
|
||||
|
||||
export type ReadProgress = {
|
||||
user_id: string;
|
||||
manga_id: string;
|
||||
chapter_id: string | null;
|
||||
page: number;
|
||||
updated_at: string;
|
||||
};
|
||||
|
||||
export type ReadProgressSummary = {
|
||||
manga_id: string;
|
||||
manga_title: string;
|
||||
manga_cover_image_path: string | null;
|
||||
chapter_id: string | null;
|
||||
/** `null` if the chapter was deleted after the progress was written. */
|
||||
chapter_number: number | null;
|
||||
page: number;
|
||||
updated_at: string;
|
||||
};
|
||||
|
||||
export type ReadProgressPage = {
|
||||
items: ReadProgressSummary[];
|
||||
page: Page;
|
||||
};
|
||||
|
||||
export type UpsertReadProgress = {
|
||||
manga_id: string;
|
||||
chapter_id?: string | null;
|
||||
page?: number | null;
|
||||
};
|
||||
|
||||
export async function updateReadProgress(
|
||||
input: UpsertReadProgress
|
||||
): Promise<ReadProgress> {
|
||||
return request<ReadProgress>('/v1/me/read-progress', {
|
||||
method: 'PUT',
|
||||
headers: { 'content-type': 'application/json' },
|
||||
body: JSON.stringify(input)
|
||||
});
|
||||
}
|
||||
|
||||
export async function listMyReadProgress(
|
||||
opts: { limit?: number; offset?: number } = {}
|
||||
): Promise<ReadProgressPage> {
|
||||
const params = new URLSearchParams();
|
||||
if (opts.limit != null) params.set('limit', String(opts.limit));
|
||||
if (opts.offset != null) params.set('offset', String(opts.offset));
|
||||
const qs = params.toString();
|
||||
return request<ReadProgressPage>(
|
||||
`/v1/me/read-progress${qs ? `?${qs}` : ''}`
|
||||
);
|
||||
}
|
||||
|
||||
export async function listMyReadProgressOrEmpty(): Promise<ReadProgressPage> {
|
||||
try {
|
||||
return await listMyReadProgress();
|
||||
} catch (e) {
|
||||
if (e instanceof ApiError && e.status === 401) {
|
||||
return { items: [], page: { limit: 50, offset: 0, total: null } };
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Single-manga response shape returned by GET /me/read-progress/:id.
|
||||
* Includes `chapter_number` so the "Continue reading" CTA can render
|
||||
* without resolving the chapter id against a paged chapters list.
|
||||
*/
|
||||
export type ReadProgressForManga = {
|
||||
manga_id: string;
|
||||
chapter_id: string | null;
|
||||
/** `null` if the chapter was deleted after the progress was written. */
|
||||
chapter_number: number | null;
|
||||
page: number;
|
||||
updated_at: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the user's progress for a specific manga, or `null` when
|
||||
* they've never opened it (or aren't signed in). Used by the manga
|
||||
* detail page's "Continue from Ch. N" CTA and by the reader to seed
|
||||
* its session-local high-water mark from the persisted value.
|
||||
*/
|
||||
export async function getMyReadProgressForManga(
|
||||
mangaId: string
|
||||
): Promise<ReadProgressForManga | null> {
|
||||
try {
|
||||
return await request<ReadProgressForManga>(
|
||||
`/v1/me/read-progress/${encodeURIComponent(mangaId)}`
|
||||
);
|
||||
} catch (e) {
|
||||
if (e instanceof ApiError && (e.status === 404 || e.status === 401)) {
|
||||
return null;
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
export async function clearReadProgress(mangaId: string): Promise<void> {
|
||||
await request<void>(
|
||||
`/v1/me/read-progress/${encodeURIComponent(mangaId)}`,
|
||||
{ method: 'DELETE' }
|
||||
);
|
||||
}
|
||||
79
frontend/src/lib/api/uploads.test.ts
Normal file
79
frontend/src/lib/api/uploads.test.ts
Normal file
@@ -0,0 +1,79 @@
|
||||
import { describe, it, expect, vi, beforeEach, afterEach, type MockInstance } from 'vitest';
|
||||
import { listMyUploads, listMyUploadsOrEmpty } from './uploads';
|
||||
|
||||
function ok(body: unknown): Response {
|
||||
return new Response(JSON.stringify(body), {
|
||||
status: 200,
|
||||
headers: { 'content-type': 'application/json' }
|
||||
});
|
||||
}
|
||||
|
||||
function envelope(status: number, code: string, message: string): Response {
|
||||
return new Response(JSON.stringify({ error: { code, message } }), {
|
||||
status,
|
||||
headers: { 'content-type': 'application/json' }
|
||||
});
|
||||
}
|
||||
|
||||
describe('uploads api client', () => {
|
||||
let fetchSpy: MockInstance<typeof globalThis.fetch>;
|
||||
|
||||
beforeEach(() => {
|
||||
fetchSpy = vi.spyOn(globalThis, 'fetch');
|
||||
});
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it('listMyUploads returns the discriminated union of entries', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(
|
||||
ok({
|
||||
items: [
|
||||
{
|
||||
kind: 'manga',
|
||||
manga: {
|
||||
id: 'm1',
|
||||
title: 'A',
|
||||
status: 'ongoing',
|
||||
alt_titles: [],
|
||||
description: null,
|
||||
cover_image_path: null,
|
||||
created_at: '2026-05-17T12:00:00Z',
|
||||
updated_at: '2026-05-17T12:00:00Z'
|
||||
},
|
||||
created_at: '2026-05-17T12:00:00Z'
|
||||
},
|
||||
{
|
||||
kind: 'chapter',
|
||||
manga_id: 'm1',
|
||||
manga_title: 'A',
|
||||
manga_cover_image_path: null,
|
||||
chapter: {
|
||||
id: 'c1',
|
||||
manga_id: 'm1',
|
||||
number: 1,
|
||||
title: null,
|
||||
page_count: 3,
|
||||
created_at: '2026-05-17T13:00:00Z'
|
||||
},
|
||||
created_at: '2026-05-17T13:00:00Z'
|
||||
}
|
||||
],
|
||||
page: { limit: 50, offset: 0, total: 2 }
|
||||
})
|
||||
);
|
||||
const r = await listMyUploads();
|
||||
expect(r.items[0].kind).toBe('manga');
|
||||
expect(r.items[1].kind).toBe('chapter');
|
||||
// Discriminant pattern-match (compile-time check via the union).
|
||||
if (r.items[1].kind === 'chapter') {
|
||||
expect(r.items[1].chapter.number).toBe(1);
|
||||
}
|
||||
});
|
||||
|
||||
it('listMyUploadsOrEmpty returns empty page on 401', async () => {
|
||||
fetchSpy.mockResolvedValueOnce(envelope(401, 'unauthenticated', 'login required'));
|
||||
const r = await listMyUploadsOrEmpty();
|
||||
expect(r.items).toEqual([]);
|
||||
});
|
||||
});
|
||||
42
frontend/src/lib/api/uploads.ts
Normal file
42
frontend/src/lib/api/uploads.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
import { ApiError, request, type Manga, type Page } from './client';
|
||||
import type { Chapter } from './chapters';
|
||||
|
||||
/**
|
||||
* Tagged union returned by `GET /v1/me/uploads`. The discriminant lives
|
||||
* on the `kind` field; pattern-match on it before accessing the rest.
|
||||
*/
|
||||
export type UploadEntry =
|
||||
| { kind: 'manga'; manga: Manga; created_at: string }
|
||||
| {
|
||||
kind: 'chapter';
|
||||
manga_id: string;
|
||||
manga_title: string;
|
||||
manga_cover_image_path: string | null;
|
||||
chapter: Chapter;
|
||||
created_at: string;
|
||||
};
|
||||
|
||||
export type UploadsPage = {
|
||||
items: UploadEntry[];
|
||||
page: Page;
|
||||
};
|
||||
|
||||
export async function listMyUploads(
|
||||
opts: { limit?: number } = {}
|
||||
): Promise<UploadsPage> {
|
||||
const params = new URLSearchParams();
|
||||
if (opts.limit != null) params.set('limit', String(opts.limit));
|
||||
const qs = params.toString();
|
||||
return request<UploadsPage>(`/v1/me/uploads${qs ? `?${qs}` : ''}`);
|
||||
}
|
||||
|
||||
export async function listMyUploadsOrEmpty(): Promise<UploadsPage> {
|
||||
try {
|
||||
return await listMyUploads();
|
||||
} catch (e) {
|
||||
if (e instanceof ApiError && e.status === 401) {
|
||||
return { items: [], page: { limit: 50, offset: 0, total: null } };
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
279
frontend/src/lib/components/AddToCollectionModal.svelte
Normal file
279
frontend/src/lib/components/AddToCollectionModal.svelte
Normal file
@@ -0,0 +1,279 @@
|
||||
<script lang="ts">
|
||||
import Modal from './Modal.svelte';
|
||||
import {
|
||||
addMangaToCollection,
|
||||
createCollection,
|
||||
listMyCollections,
|
||||
getMyCollectionsContaining,
|
||||
removeMangaFromCollection,
|
||||
type CollectionSummary
|
||||
} from '$lib/api/collections';
|
||||
import Plus from '@lucide/svelte/icons/plus';
|
||||
|
||||
let {
|
||||
open,
|
||||
mangaId,
|
||||
onClose
|
||||
}: {
|
||||
open: boolean;
|
||||
mangaId: string;
|
||||
onClose: () => void;
|
||||
} = $props();
|
||||
|
||||
let collections = $state<CollectionSummary[]>([]);
|
||||
let containingIds = $state<Set<string>>(new Set());
|
||||
let busyIds = $state<Set<string>>(new Set());
|
||||
let newName = $state('');
|
||||
let creating = $state(false);
|
||||
let loading = $state(false);
|
||||
let error: string | null = $state(null);
|
||||
|
||||
// Refetch every time the modal opens (and when the manga id changes
|
||||
// mid-session — unlikely but cheap). The data is per-user and per-
|
||||
// manga, so re-fetching is the simplest way to stay in sync with
|
||||
// changes made elsewhere (e.g., a collection deleted on another page).
|
||||
$effect(() => {
|
||||
if (open) {
|
||||
void load();
|
||||
}
|
||||
});
|
||||
|
||||
async function load() {
|
||||
loading = true;
|
||||
error = null;
|
||||
try {
|
||||
const [page, ids] = await Promise.all([
|
||||
listMyCollections({ limit: 200 }),
|
||||
getMyCollectionsContaining(mangaId)
|
||||
]);
|
||||
collections = page.items;
|
||||
containingIds = new Set(ids);
|
||||
} catch (e) {
|
||||
error = (e as Error).message;
|
||||
} finally {
|
||||
loading = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Functional set updates that read the latest state at mutation
|
||||
// time, so concurrent toggles on different rows don't clobber
|
||||
// each other by building from a stale snapshot.
|
||||
function withAdd<T>(s: Set<T>, v: T): Set<T> {
|
||||
const n = new Set(s);
|
||||
n.add(v);
|
||||
return n;
|
||||
}
|
||||
function withDelete<T>(s: Set<T>, v: T): Set<T> {
|
||||
const n = new Set(s);
|
||||
n.delete(v);
|
||||
return n;
|
||||
}
|
||||
|
||||
async function toggle(collection: CollectionSummary) {
|
||||
if (busyIds.has(collection.id)) return;
|
||||
const wasIn = containingIds.has(collection.id);
|
||||
// Optimistic toggle — local set first; revert on failure.
|
||||
containingIds = wasIn
|
||||
? withDelete(containingIds, collection.id)
|
||||
: withAdd(containingIds, collection.id);
|
||||
busyIds = withAdd(busyIds, collection.id);
|
||||
try {
|
||||
if (wasIn) {
|
||||
await removeMangaFromCollection(collection.id, mangaId);
|
||||
collection.manga_count = Math.max(0, collection.manga_count - 1);
|
||||
} else {
|
||||
await addMangaToCollection(collection.id, mangaId);
|
||||
collection.manga_count += 1;
|
||||
}
|
||||
} catch (e) {
|
||||
// Revert (read latest containingIds, not the pre-toggle snapshot).
|
||||
containingIds = wasIn
|
||||
? withAdd(containingIds, collection.id)
|
||||
: withDelete(containingIds, collection.id);
|
||||
error = (e as Error).message;
|
||||
} finally {
|
||||
busyIds = withDelete(busyIds, collection.id);
|
||||
}
|
||||
}
|
||||
|
||||
async function createAndAdd() {
|
||||
const name = newName.trim();
|
||||
if (!name || creating) return;
|
||||
creating = true;
|
||||
error = null;
|
||||
try {
|
||||
const created = await createCollection({ name });
|
||||
// The list endpoint sorts by updated_at DESC; adding the
|
||||
// manga immediately also bumps it. Append a synthetic
|
||||
// summary so the new collection appears checked-on right
|
||||
// away rather than waiting for a refetch.
|
||||
await addMangaToCollection(created.id, mangaId);
|
||||
collections = [
|
||||
{
|
||||
...created,
|
||||
manga_count: 1,
|
||||
sample_covers: []
|
||||
},
|
||||
...collections
|
||||
];
|
||||
containingIds = new Set([...containingIds, created.id]);
|
||||
newName = '';
|
||||
} catch (e) {
|
||||
error = (e as Error).message;
|
||||
} finally {
|
||||
creating = false;
|
||||
}
|
||||
}
|
||||
|
||||
function onCreateSubmit(e: SubmitEvent) {
|
||||
e.preventDefault();
|
||||
void createAndAdd();
|
||||
}
|
||||
</script>
|
||||
|
||||
<Modal {open} {onClose} title="Add to collection" size="md" testid="add-to-collection-modal">
|
||||
{#if loading}
|
||||
<p class="status">Loading your collections…</p>
|
||||
{:else if error}
|
||||
<p class="error" role="alert" data-testid="add-to-collection-error">{error}</p>
|
||||
{:else if collections.length === 0}
|
||||
<p class="status" data-testid="no-collections">
|
||||
You don't have any collections yet. Create one below to get started.
|
||||
</p>
|
||||
{:else}
|
||||
<ul class="collection-list">
|
||||
{#each collections as c (c.id)}
|
||||
{@const checked = containingIds.has(c.id)}
|
||||
{@const busy = busyIds.has(c.id)}
|
||||
<li>
|
||||
<label class="row" class:checked>
|
||||
<input
|
||||
type="checkbox"
|
||||
{checked}
|
||||
disabled={busy}
|
||||
onchange={() => toggle(c)}
|
||||
data-testid={`collection-toggle-${c.id}`}
|
||||
/>
|
||||
<span class="row-label">
|
||||
<span class="row-name">{c.name}</span>
|
||||
<span class="row-count">
|
||||
{c.manga_count}
|
||||
{c.manga_count === 1 ? 'manga' : 'mangas'}
|
||||
</span>
|
||||
</span>
|
||||
</label>
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
{/if}
|
||||
|
||||
<form
|
||||
class="create-form"
|
||||
onsubmit={onCreateSubmit}
|
||||
action="javascript:void(0)"
|
||||
>
|
||||
<input
|
||||
type="text"
|
||||
bind:value={newName}
|
||||
maxlength="64"
|
||||
placeholder="Create new collection"
|
||||
aria-label="New collection name"
|
||||
data-testid="new-collection-name"
|
||||
/>
|
||||
<button
|
||||
type="submit"
|
||||
class="create-btn"
|
||||
disabled={!newName.trim() || creating}
|
||||
data-testid="create-collection-btn"
|
||||
>
|
||||
<Plus size={14} aria-hidden="true" />
|
||||
<span>{creating ? 'Creating…' : 'Create + add'}</span>
|
||||
</button>
|
||||
</form>
|
||||
</Modal>
|
||||
|
||||
<style>
|
||||
.status {
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.error {
|
||||
color: var(--danger);
|
||||
margin: 0 0 var(--space-2);
|
||||
}
|
||||
|
||||
.collection-list {
|
||||
list-style: none;
|
||||
padding: 0;
|
||||
margin: 0 0 var(--space-3);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: var(--space-1);
|
||||
max-height: 16rem;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: var(--space-2);
|
||||
padding: var(--space-2);
|
||||
border-radius: var(--radius-md);
|
||||
cursor: pointer;
|
||||
transition: background var(--transition);
|
||||
}
|
||||
|
||||
.row:hover {
|
||||
background: var(--surface-elevated);
|
||||
}
|
||||
|
||||
.row.checked {
|
||||
background: var(--primary-soft-bg);
|
||||
}
|
||||
|
||||
.row-label {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
.row-name {
|
||||
color: var(--text);
|
||||
font-weight: var(--weight-medium);
|
||||
}
|
||||
|
||||
.row-count {
|
||||
color: var(--text-muted);
|
||||
font-size: var(--font-xs);
|
||||
}
|
||||
|
||||
.create-form {
|
||||
display: flex;
|
||||
gap: var(--space-2);
|
||||
align-items: center;
|
||||
padding-top: var(--space-3);
|
||||
border-top: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.create-form input {
|
||||
flex: 1;
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
.create-btn {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: var(--space-1);
|
||||
background: var(--primary);
|
||||
color: var(--primary-contrast);
|
||||
border: 1px solid var(--primary);
|
||||
padding: 0 var(--space-3);
|
||||
height: 36px;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.create-btn:hover:not(:disabled) {
|
||||
background: var(--primary-hover);
|
||||
border-color: var(--primary-hover);
|
||||
}
|
||||
</style>
|
||||
129
frontend/src/lib/components/BookmarkList.svelte
Normal file
129
frontend/src/lib/components/BookmarkList.svelte
Normal file
@@ -0,0 +1,129 @@
|
||||
<script lang="ts">
|
||||
import { fileUrl } from '$lib/api/client';
|
||||
import type { Bookmark } from '$lib/api/bookmarks';
|
||||
import BookImage from '@lucide/svelte/icons/book-image';
|
||||
|
||||
let {
|
||||
bookmarks,
|
||||
testid
|
||||
}: {
|
||||
bookmarks: Bookmark[];
|
||||
testid?: string;
|
||||
} = $props();
|
||||
</script>
|
||||
|
||||
<ul class="bookmark-list" data-testid={testid ?? 'bookmark-list'}>
|
||||
{#each bookmarks as b (b.id)}
|
||||
<li class="bookmark">
|
||||
<a href="/manga/{b.manga_id}" class="cover-link" aria-hidden="true" tabindex="-1">
|
||||
{#if b.manga_cover_image_path}
|
||||
<img
|
||||
src={fileUrl(b.manga_cover_image_path)}
|
||||
alt=""
|
||||
class="cover"
|
||||
loading="lazy"
|
||||
/>
|
||||
{:else}
|
||||
<div class="cover cover-placeholder">
|
||||
<BookImage size={22} aria-hidden="true" />
|
||||
</div>
|
||||
{/if}
|
||||
</a>
|
||||
<div class="meta">
|
||||
<a
|
||||
href="/manga/{b.manga_id}"
|
||||
class="title"
|
||||
data-testid="bookmark-title"
|
||||
>
|
||||
{b.manga_title ?? 'Unknown manga'}
|
||||
</a>
|
||||
{#if b.chapter_id && b.chapter_number != null}
|
||||
<a
|
||||
href="/manga/{b.manga_id}/chapter/{b.chapter_id}"
|
||||
class="target"
|
||||
>
|
||||
Chapter {b.chapter_number}{#if b.page != null && b.page > 0} — page {b.page}{/if}
|
||||
</a>
|
||||
{:else if b.chapter_id}
|
||||
<!-- Chapter bookmark whose chapter was deleted;
|
||||
chapter_id != null but chapter_number == null
|
||||
because the LEFT JOIN found nothing. -->
|
||||
<span class="target muted">(chapter removed)</span>
|
||||
{:else}
|
||||
<span class="target muted">Whole manga</span>
|
||||
{/if}
|
||||
<span class="created">
|
||||
Bookmarked {new Date(b.created_at).toLocaleDateString()}
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
|
||||
<style>
|
||||
.bookmark-list {
|
||||
list-style: none;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.bookmark {
|
||||
display: grid;
|
||||
grid-template-columns: 64px 1fr;
|
||||
gap: var(--space-4);
|
||||
align-items: start;
|
||||
padding: var(--space-3) 0;
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.cover-link {
|
||||
display: block;
|
||||
line-height: 0;
|
||||
}
|
||||
|
||||
.cover {
|
||||
width: 64px;
|
||||
height: 96px;
|
||||
object-fit: cover;
|
||||
border-radius: var(--radius-md);
|
||||
background: var(--surface);
|
||||
}
|
||||
|
||||
.cover-placeholder {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
color: var(--text-muted);
|
||||
user-select: none;
|
||||
}
|
||||
|
||||
.meta {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: var(--space-1);
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
.title {
|
||||
font-weight: var(--weight-semibold);
|
||||
font-size: var(--font-base);
|
||||
color: var(--text);
|
||||
}
|
||||
|
||||
.title:hover {
|
||||
color: var(--primary);
|
||||
}
|
||||
|
||||
.target {
|
||||
font-size: var(--font-sm);
|
||||
}
|
||||
|
||||
.muted {
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.created {
|
||||
color: var(--text-muted);
|
||||
font-size: var(--font-xs);
|
||||
}
|
||||
</style>
|
||||
337
frontend/src/lib/components/ChapterPagesEditor.svelte
Normal file
337
frontend/src/lib/components/ChapterPagesEditor.svelte
Normal file
@@ -0,0 +1,337 @@
|
||||
<script lang="ts" module>
|
||||
/**
|
||||
* Working type for a staged page. Owned by the parent so it can
|
||||
* read/write `pages` via `bind:pages`. The component is responsible
|
||||
* for `previewUrl` lifecycle (created on add, revoked on remove /
|
||||
* unmount).
|
||||
*/
|
||||
export type PendingPage = {
|
||||
id: string;
|
||||
file: File;
|
||||
error: string | null;
|
||||
previewUrl: string;
|
||||
};
|
||||
</script>
|
||||
|
||||
<script lang="ts">
|
||||
import { onDestroy } from 'svelte';
|
||||
import { formatBytes, validateImageFile } from '$lib/upload-validation';
|
||||
import Modal from './Modal.svelte';
|
||||
import ArrowUp from '@lucide/svelte/icons/arrow-up';
|
||||
import ArrowDown from '@lucide/svelte/icons/arrow-down';
|
||||
import Trash2 from '@lucide/svelte/icons/trash-2';
|
||||
import UploadCloud from '@lucide/svelte/icons/upload-cloud';
|
||||
|
||||
let {
|
||||
pages = $bindable<PendingPage[]>([]),
|
||||
testidPrefix = 'pages'
|
||||
}: {
|
||||
pages?: PendingPage[];
|
||||
testidPrefix?: string;
|
||||
} = $props();
|
||||
|
||||
let isDragOver = $state(false);
|
||||
let previewIndex = $state<number | null>(null);
|
||||
const previewPage = $derived(
|
||||
previewIndex != null ? pages[previewIndex] ?? null : null
|
||||
);
|
||||
|
||||
function addFiles(files: File[] | FileList) {
|
||||
const arr = Array.from(files);
|
||||
const additions: PendingPage[] = arr.map((file) => ({
|
||||
id: crypto.randomUUID(),
|
||||
file,
|
||||
error: validateImageFile(file),
|
||||
previewUrl: URL.createObjectURL(file)
|
||||
}));
|
||||
pages = [...pages, ...additions];
|
||||
}
|
||||
|
||||
function removePage(id: string) {
|
||||
const idx = pages.findIndex((p) => p.id === id);
|
||||
if (idx < 0) return;
|
||||
URL.revokeObjectURL(pages[idx].previewUrl);
|
||||
pages = pages.filter((p) => p.id !== id);
|
||||
}
|
||||
|
||||
function movePage(id: string, dir: -1 | 1) {
|
||||
const i = pages.findIndex((p) => p.id === id);
|
||||
const j = i + dir;
|
||||
if (i < 0 || j < 0 || j >= pages.length) return;
|
||||
const copy = pages.slice();
|
||||
[copy[i], copy[j]] = [copy[j], copy[i]];
|
||||
pages = copy;
|
||||
}
|
||||
|
||||
function onPagesInputChange(e: Event) {
|
||||
const input = e.target as HTMLInputElement;
|
||||
if (input.files) addFiles(input.files);
|
||||
input.value = '';
|
||||
}
|
||||
|
||||
function onDrop(e: DragEvent) {
|
||||
e.preventDefault();
|
||||
isDragOver = false;
|
||||
if (e.dataTransfer?.files) addFiles(e.dataTransfer.files);
|
||||
}
|
||||
|
||||
function onDragOver(e: DragEvent) {
|
||||
e.preventDefault();
|
||||
isDragOver = true;
|
||||
}
|
||||
|
||||
function onDragLeave() {
|
||||
isDragOver = false;
|
||||
}
|
||||
|
||||
function pageLabel(i: number): string {
|
||||
// Mirror the server's `{nnnn}` storage convention so the visible
|
||||
// label matches what the file ends up named on disk.
|
||||
return `Page ${String(i + 1).padStart(3, '0')}`;
|
||||
}
|
||||
|
||||
onDestroy(() => {
|
||||
// Revoke any outstanding object URLs so the browser can free the
|
||||
// backing image data. Closing the page would do this eventually
|
||||
// anyway, but components inside long-lived single-page apps
|
||||
// benefit from explicit cleanup.
|
||||
for (const p of pages) URL.revokeObjectURL(p.previewUrl);
|
||||
});
|
||||
</script>
|
||||
|
||||
<div
|
||||
class="drop-zone"
|
||||
class:drag-over={isDragOver}
|
||||
ondrop={onDrop}
|
||||
ondragover={onDragOver}
|
||||
ondragleave={onDragLeave}
|
||||
role="region"
|
||||
aria-label="page upload"
|
||||
data-testid="{testidPrefix}-drop-zone"
|
||||
>
|
||||
<UploadCloud size={32} aria-hidden="true" class="drop-icon" />
|
||||
<p>
|
||||
Drop pages here, or
|
||||
<label class="file-link">
|
||||
browse
|
||||
<input
|
||||
type="file"
|
||||
accept="image/*"
|
||||
multiple
|
||||
onchange={onPagesInputChange}
|
||||
data-testid="{testidPrefix}-input"
|
||||
/>
|
||||
</label>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{#if pages.length > 0}
|
||||
<ol class="pages" data-testid="{testidPrefix}-list">
|
||||
{#each pages as p, i (p.id)}
|
||||
<li class:invalid={p.error} data-testid="{testidPrefix}-row">
|
||||
<button
|
||||
type="button"
|
||||
class="thumb-btn"
|
||||
onclick={() => (previewIndex = i)}
|
||||
aria-label="Preview {pageLabel(i)}"
|
||||
title="Preview"
|
||||
data-testid="{testidPrefix}-thumb"
|
||||
>
|
||||
<img src={p.previewUrl} alt="" class="thumb" loading="lazy" />
|
||||
</button>
|
||||
<div class="page-meta">
|
||||
<span class="page-label">{pageLabel(i)}</span>
|
||||
<span class="page-origin" title={p.file.name}>
|
||||
from {p.file.name} · {formatBytes(p.file.size)}
|
||||
</span>
|
||||
</div>
|
||||
<button
|
||||
class="icon-btn"
|
||||
type="button"
|
||||
onclick={() => movePage(p.id, -1)}
|
||||
disabled={i === 0}
|
||||
aria-label="Move {pageLabel(i)} up"
|
||||
title="Move up"
|
||||
>
|
||||
<ArrowUp size={16} aria-hidden="true" />
|
||||
</button>
|
||||
<button
|
||||
class="icon-btn"
|
||||
type="button"
|
||||
onclick={() => movePage(p.id, 1)}
|
||||
disabled={i === pages.length - 1}
|
||||
aria-label="Move {pageLabel(i)} down"
|
||||
title="Move down"
|
||||
>
|
||||
<ArrowDown size={16} aria-hidden="true" />
|
||||
</button>
|
||||
<button
|
||||
class="icon-btn danger"
|
||||
type="button"
|
||||
onclick={() => removePage(p.id)}
|
||||
aria-label="Remove {pageLabel(i)}"
|
||||
title="Remove page"
|
||||
data-testid="{testidPrefix}-remove"
|
||||
>
|
||||
<Trash2 size={16} aria-hidden="true" />
|
||||
</button>
|
||||
{#if p.error}
|
||||
<span class="field-error" role="alert">{p.error}</span>
|
||||
{/if}
|
||||
</li>
|
||||
{/each}
|
||||
</ol>
|
||||
{/if}
|
||||
|
||||
<Modal
|
||||
open={previewIndex != null}
|
||||
title={previewPage ? pageLabel(previewIndex ?? 0) : 'Preview'}
|
||||
onClose={() => (previewIndex = null)}
|
||||
size="lg"
|
||||
closeOnBackdrop={true}
|
||||
testid="page-preview-modal"
|
||||
>
|
||||
{#if previewPage}
|
||||
<img
|
||||
src={previewPage.previewUrl}
|
||||
alt={pageLabel(previewIndex ?? 0)}
|
||||
class="preview-large"
|
||||
/>
|
||||
{/if}
|
||||
</Modal>
|
||||
|
||||
<style>
|
||||
.drop-zone {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
gap: var(--space-2);
|
||||
border: 2px dashed var(--border-strong);
|
||||
border-radius: var(--radius-md);
|
||||
padding: var(--space-6);
|
||||
text-align: center;
|
||||
background: var(--surface);
|
||||
color: var(--text-muted);
|
||||
transition:
|
||||
background var(--transition),
|
||||
border-color var(--transition);
|
||||
}
|
||||
|
||||
.drop-zone :global(.drop-icon) {
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.drop-zone.drag-over {
|
||||
background: var(--primary-soft-bg);
|
||||
border-color: var(--primary);
|
||||
}
|
||||
|
||||
.file-link input[type='file'] {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.file-link {
|
||||
color: var(--primary);
|
||||
text-decoration: underline;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.pages {
|
||||
padding: 0;
|
||||
margin: var(--space-3) 0 0;
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
.pages li {
|
||||
display: grid;
|
||||
grid-template-columns: 56px 1fr auto auto auto;
|
||||
align-items: center;
|
||||
gap: var(--space-2);
|
||||
padding: var(--space-1) var(--space-2);
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.pages li.invalid {
|
||||
background: var(--danger-soft-bg);
|
||||
}
|
||||
|
||||
.thumb-btn {
|
||||
padding: 0;
|
||||
background: transparent;
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius-sm);
|
||||
cursor: pointer;
|
||||
line-height: 0;
|
||||
overflow: hidden;
|
||||
width: 56px;
|
||||
height: 80px;
|
||||
}
|
||||
|
||||
.thumb-btn:hover {
|
||||
border-color: var(--primary);
|
||||
}
|
||||
|
||||
.thumb {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
object-fit: cover;
|
||||
}
|
||||
|
||||
.page-meta {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
.page-label {
|
||||
font-weight: var(--weight-semibold);
|
||||
color: var(--text);
|
||||
font-size: var(--font-sm);
|
||||
}
|
||||
|
||||
.page-origin {
|
||||
color: var(--text-muted);
|
||||
font-size: var(--font-xs);
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.icon-btn {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
width: 32px;
|
||||
height: 32px;
|
||||
padding: 0;
|
||||
background: transparent;
|
||||
color: var(--text-muted);
|
||||
border: 1px solid transparent;
|
||||
border-radius: var(--radius-sm);
|
||||
}
|
||||
|
||||
.icon-btn:hover:not(:disabled) {
|
||||
background: var(--surface-elevated);
|
||||
color: var(--text);
|
||||
}
|
||||
|
||||
.icon-btn.danger:hover:not(:disabled) {
|
||||
color: var(--danger);
|
||||
}
|
||||
|
||||
.field-error {
|
||||
grid-column: 1 / -1;
|
||||
color: var(--danger);
|
||||
font-size: var(--font-sm);
|
||||
}
|
||||
|
||||
.preview-large {
|
||||
display: block;
|
||||
max-width: 100%;
|
||||
max-height: 75vh;
|
||||
margin: 0 auto;
|
||||
object-fit: contain;
|
||||
background: var(--surface-elevated);
|
||||
border-radius: var(--radius-sm);
|
||||
}
|
||||
</style>
|
||||
132
frontend/src/lib/components/CollectionsGrid.svelte
Normal file
132
frontend/src/lib/components/CollectionsGrid.svelte
Normal file
@@ -0,0 +1,132 @@
|
||||
<script lang="ts">
|
||||
import { fileUrl } from '$lib/api/client';
|
||||
import type { CollectionSummary } from '$lib/api/collections';
|
||||
import FolderOpen from '@lucide/svelte/icons/folder-open';
|
||||
|
||||
let {
|
||||
collections
|
||||
}: {
|
||||
collections: CollectionSummary[];
|
||||
} = $props();
|
||||
</script>
|
||||
|
||||
<ul class="grid" data-testid="collections-list">
|
||||
{#each collections as c (c.id)}
|
||||
<li class="card">
|
||||
<a href="/collections/{c.id}" class="cover-link" tabindex="-1" aria-hidden="true">
|
||||
<div class="collage">
|
||||
{#if c.sample_covers.length === 0}
|
||||
<div class="collage-empty">
|
||||
<FolderOpen size={36} aria-hidden="true" />
|
||||
</div>
|
||||
{:else}
|
||||
{#each c.sample_covers as cover (cover)}
|
||||
<img
|
||||
src={fileUrl(cover)}
|
||||
alt=""
|
||||
class="collage-cover"
|
||||
loading="lazy"
|
||||
/>
|
||||
{/each}
|
||||
{/if}
|
||||
</div>
|
||||
</a>
|
||||
<div class="meta">
|
||||
<a href="/collections/{c.id}" class="name" data-testid={`collection-${c.id}`}>
|
||||
{c.name}
|
||||
</a>
|
||||
<span class="count">
|
||||
{c.manga_count}
|
||||
{c.manga_count === 1 ? 'manga' : 'mangas'}
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
|
||||
<style>
|
||||
.grid {
|
||||
list-style: none;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
|
||||
gap: var(--space-4);
|
||||
}
|
||||
|
||||
.card {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: var(--space-2);
|
||||
}
|
||||
|
||||
.cover-link {
|
||||
display: block;
|
||||
line-height: 0;
|
||||
}
|
||||
|
||||
.collage {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
grid-template-rows: 1fr 1fr;
|
||||
gap: 2px;
|
||||
aspect-ratio: 2 / 3;
|
||||
border-radius: var(--radius-md);
|
||||
overflow: hidden;
|
||||
background: var(--surface);
|
||||
}
|
||||
|
||||
.collage-empty {
|
||||
grid-column: 1 / -1;
|
||||
grid-row: 1 / -1;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.collage-cover {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
object-fit: cover;
|
||||
}
|
||||
|
||||
.collage-cover:only-child {
|
||||
grid-column: 1 / -1;
|
||||
grid-row: 1 / -1;
|
||||
}
|
||||
|
||||
.collage-cover:first-child:nth-last-child(2),
|
||||
.collage-cover:first-child:nth-last-child(2) ~ .collage-cover {
|
||||
grid-row: 1 / -1;
|
||||
}
|
||||
|
||||
.collage-cover:first-child:nth-last-child(3) {
|
||||
grid-row: 1 / -1;
|
||||
}
|
||||
|
||||
.meta {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
min-width: 0;
|
||||
gap: var(--space-1);
|
||||
}
|
||||
|
||||
.name {
|
||||
font-weight: var(--weight-semibold);
|
||||
color: var(--text);
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.name:hover {
|
||||
color: var(--primary);
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.count {
|
||||
color: var(--text-muted);
|
||||
font-size: var(--font-xs);
|
||||
}
|
||||
</style>
|
||||
221
frontend/src/lib/components/Modal.svelte
Normal file
221
frontend/src/lib/components/Modal.svelte
Normal file
@@ -0,0 +1,221 @@
|
||||
<script lang="ts">
|
||||
import { onMount } from 'svelte';
|
||||
import type { Snippet } from 'svelte';
|
||||
import X from '@lucide/svelte/icons/x';
|
||||
|
||||
let {
|
||||
open,
|
||||
title,
|
||||
onClose,
|
||||
children,
|
||||
footer,
|
||||
size = 'md',
|
||||
closeOnBackdrop = false,
|
||||
testid
|
||||
}: {
|
||||
open: boolean;
|
||||
title: string;
|
||||
onClose: () => void;
|
||||
children: Snippet;
|
||||
footer?: Snippet;
|
||||
size?: 'sm' | 'md' | 'lg';
|
||||
/**
|
||||
* Whether clicking the dim backdrop closes the modal. Off by
|
||||
* default — forms with unsaved input would discard typed data
|
||||
* on a misclick. Opt-in for confirm dialogs and read-only
|
||||
* popovers.
|
||||
*/
|
||||
closeOnBackdrop?: boolean;
|
||||
testid?: string;
|
||||
} = $props();
|
||||
|
||||
let dialog: HTMLDivElement | undefined = $state();
|
||||
|
||||
// Track previous focus so we can restore it on close — a basic
|
||||
// requirement for any focus-trapping modal.
|
||||
let previouslyFocused: HTMLElement | null = null;
|
||||
|
||||
$effect(() => {
|
||||
if (open) {
|
||||
previouslyFocused = document.activeElement as HTMLElement | null;
|
||||
// Defer until the dialog mounts.
|
||||
queueMicrotask(() => dialog?.focus());
|
||||
} else if (previouslyFocused) {
|
||||
previouslyFocused.focus();
|
||||
previouslyFocused = null;
|
||||
}
|
||||
});
|
||||
|
||||
function focusable(): HTMLElement[] {
|
||||
if (!dialog) return [];
|
||||
// Standard set of "tab can land here" elements, minus those
|
||||
// disabled or with `tabindex=-1`. Sufficient for our forms.
|
||||
const selector = [
|
||||
'a[href]',
|
||||
'button:not([disabled])',
|
||||
'input:not([disabled]):not([type="hidden"])',
|
||||
'select:not([disabled])',
|
||||
'textarea:not([disabled])',
|
||||
'[tabindex]:not([tabindex="-1"])'
|
||||
].join(',');
|
||||
return Array.from(dialog.querySelectorAll<HTMLElement>(selector));
|
||||
}
|
||||
|
||||
function onKeydown(e: KeyboardEvent) {
|
||||
if (!open) return;
|
||||
if (e.key === 'Escape') {
|
||||
e.stopPropagation();
|
||||
onClose();
|
||||
return;
|
||||
}
|
||||
if (e.key === 'Tab') {
|
||||
// Wrap focus inside the dialog so Tab/Shift+Tab don't
|
||||
// escape to the background page.
|
||||
const items = focusable();
|
||||
if (items.length === 0) {
|
||||
e.preventDefault();
|
||||
dialog?.focus();
|
||||
return;
|
||||
}
|
||||
const first = items[0];
|
||||
const last = items[items.length - 1];
|
||||
const active = document.activeElement as HTMLElement | null;
|
||||
if (e.shiftKey) {
|
||||
if (active === first || !dialog?.contains(active)) {
|
||||
e.preventDefault();
|
||||
last.focus();
|
||||
}
|
||||
} else if (active === last) {
|
||||
e.preventDefault();
|
||||
first.focus();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
onMount(() => {
|
||||
document.addEventListener('keydown', onKeydown);
|
||||
return () => document.removeEventListener('keydown', onKeydown);
|
||||
});
|
||||
|
||||
function onBackdropClick(e: MouseEvent) {
|
||||
if (!closeOnBackdrop) return;
|
||||
if (e.target === e.currentTarget) onClose();
|
||||
}
|
||||
</script>
|
||||
|
||||
{#if open}
|
||||
<div
|
||||
class="backdrop"
|
||||
onclick={onBackdropClick}
|
||||
role="presentation"
|
||||
data-testid={testid ? `${testid}-backdrop` : undefined}
|
||||
>
|
||||
<div
|
||||
class="dialog size-{size}"
|
||||
role="dialog"
|
||||
aria-modal="true"
|
||||
aria-labelledby="modal-title"
|
||||
tabindex="-1"
|
||||
bind:this={dialog}
|
||||
data-testid={testid}
|
||||
>
|
||||
<header class="header">
|
||||
<h2 id="modal-title">{title}</h2>
|
||||
<button
|
||||
type="button"
|
||||
class="close"
|
||||
onclick={onClose}
|
||||
aria-label="Close"
|
||||
title="Close"
|
||||
data-testid={testid ? `${testid}-close` : undefined}
|
||||
>
|
||||
<X size={18} aria-hidden="true" />
|
||||
</button>
|
||||
</header>
|
||||
<div class="body">{@render children()}</div>
|
||||
{#if footer}
|
||||
<footer class="footer">{@render footer()}</footer>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<style>
|
||||
.backdrop {
|
||||
position: fixed;
|
||||
inset: 0;
|
||||
background: rgba(0, 0, 0, 0.4);
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: var(--space-4);
|
||||
z-index: var(--z-modal);
|
||||
}
|
||||
|
||||
.dialog {
|
||||
background: var(--surface);
|
||||
color: var(--text);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius-lg);
|
||||
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2);
|
||||
max-height: 90vh;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
width: 100%;
|
||||
outline: none;
|
||||
}
|
||||
|
||||
.size-sm {
|
||||
max-width: 24rem;
|
||||
}
|
||||
.size-md {
|
||||
max-width: 32rem;
|
||||
}
|
||||
.size-lg {
|
||||
max-width: 48rem;
|
||||
}
|
||||
|
||||
.header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
padding: var(--space-3) var(--space-4);
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.header h2 {
|
||||
margin: 0;
|
||||
font-size: var(--font-lg);
|
||||
}
|
||||
|
||||
.close {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
width: 32px;
|
||||
height: 32px;
|
||||
padding: 0;
|
||||
background: transparent;
|
||||
color: var(--text-muted);
|
||||
border: 1px solid transparent;
|
||||
border-radius: var(--radius-sm);
|
||||
}
|
||||
|
||||
.close:hover {
|
||||
background: var(--surface-elevated);
|
||||
color: var(--text);
|
||||
}
|
||||
|
||||
.body {
|
||||
padding: var(--space-4);
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.footer {
|
||||
padding: var(--space-3) var(--space-4);
|
||||
border-top: 1px solid var(--border);
|
||||
display: flex;
|
||||
gap: var(--space-2);
|
||||
justify-content: flex-end;
|
||||
}
|
||||
</style>
|
||||
33
frontend/src/lib/reader-fullscreen.svelte.ts
Normal file
33
frontend/src/lib/reader-fullscreen.svelte.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* Cross-component flag for the reader's "hide all chrome" view.
|
||||
*
|
||||
* The reader page toggles this; the root layout reads it to hide the
|
||||
* top app navbar; the reader itself reads it to hide its own nav bar
|
||||
* and bottom chapter bar. CSS handles the slide animations via a
|
||||
* `data-reader-fullscreen` attribute on `<html>` so the entire frame
|
||||
* (layout chrome included) stays synchronised.
|
||||
*
|
||||
* Always reset on reader unmount — letting the flag leak across
|
||||
* navigation would orphan a hidden app navbar on other pages.
|
||||
*/
|
||||
let active = $state(false);
|
||||
|
||||
export const readerFullscreen = {
|
||||
get value() {
|
||||
return active;
|
||||
},
|
||||
set value(v: boolean) {
|
||||
active = v;
|
||||
if (typeof document !== 'undefined') {
|
||||
if (v) document.documentElement.dataset.readerFullscreen = 'true';
|
||||
else delete document.documentElement.dataset.readerFullscreen;
|
||||
}
|
||||
},
|
||||
toggle() {
|
||||
this.value = !active;
|
||||
},
|
||||
/** Force off — call from the reader's onDestroy. */
|
||||
reset() {
|
||||
this.value = false;
|
||||
}
|
||||
};
|
||||
@@ -60,6 +60,15 @@
|
||||
--icon-md: 18px;
|
||||
--icon-lg: 22px;
|
||||
|
||||
/* App-frame heights (fixed-position bars at the top and bottom of
|
||||
the viewport). These are first-paint fallbacks — the real
|
||||
values are written by ResizeObservers on the actual elements
|
||||
in +layout.svelte and the reader, so they reflect rendered
|
||||
size and survive font / zoom / wrap changes. */
|
||||
--app-header-h: 60px;
|
||||
--reader-nav-h: 56px;
|
||||
--reader-bar-h: 56px;
|
||||
|
||||
--z-dropdown: 10;
|
||||
--z-sticky: 50;
|
||||
--z-modal: 100;
|
||||
|
||||
@@ -6,18 +6,40 @@
|
||||
import { session } from '$lib/session.svelte';
|
||||
import { theme } from '$lib/theme.svelte';
|
||||
import Upload from '@lucide/svelte/icons/upload';
|
||||
import UserCircle from '@lucide/svelte/icons/user-circle';
|
||||
import Bookmark from '@lucide/svelte/icons/bookmark';
|
||||
import Settings from '@lucide/svelte/icons/settings';
|
||||
import FolderOpen from '@lucide/svelte/icons/folder-open';
|
||||
import LogOut from '@lucide/svelte/icons/log-out';
|
||||
import '$lib/styles/tokens.css';
|
||||
|
||||
let { children } = $props();
|
||||
let loggingOut = $state(false);
|
||||
let headerEl: HTMLElement | undefined = $state();
|
||||
|
||||
onMount(() => {
|
||||
theme.init();
|
||||
preferences.init();
|
||||
if (!session.loaded) session.refresh();
|
||||
|
||||
// Publish the header's measured height as a CSS custom
|
||||
// property so sticky descendants (e.g. the reader nav) can
|
||||
// pin themselves directly below it without guessing. A
|
||||
// ResizeObserver keeps it in sync as the viewport reflows
|
||||
// (the nav `flex-wrap: wrap`s on narrow widths), the user
|
||||
// zooms, or fonts swap. Hard-coded pixel offsets in tokens
|
||||
// are wrong in principle — actual height varies with all
|
||||
// of the above.
|
||||
if (!headerEl) return;
|
||||
const publish = () => {
|
||||
document.documentElement.style.setProperty(
|
||||
'--app-header-h',
|
||||
`${headerEl!.offsetHeight}px`
|
||||
);
|
||||
};
|
||||
publish();
|
||||
const ro = new ResizeObserver(publish);
|
||||
ro.observe(headerEl);
|
||||
return () => ro.disconnect();
|
||||
});
|
||||
|
||||
// Pull fresh server preferences whenever the user changes (login,
|
||||
@@ -45,27 +67,31 @@
|
||||
}
|
||||
</script>
|
||||
|
||||
<header>
|
||||
<header bind:this={headerEl}>
|
||||
<nav aria-label="primary">
|
||||
<a class="brand" href="/">Mangalord</a>
|
||||
<a class="nav-link" href="/upload">
|
||||
<Upload size={18} aria-hidden="true" />
|
||||
<span>Upload</span>
|
||||
</a>
|
||||
<a class="nav-link" href="/profile" data-testid="nav-profile">
|
||||
<UserCircle size={18} aria-hidden="true" />
|
||||
<span>Profile</span>
|
||||
</a>
|
||||
<a class="nav-link" href="/bookmarks">
|
||||
<Bookmark size={18} aria-hidden="true" />
|
||||
<span>Bookmarks</span>
|
||||
</a>
|
||||
<a class="nav-link" href="/collections">
|
||||
<FolderOpen size={18} aria-hidden="true" />
|
||||
<span>Collections</span>
|
||||
</a>
|
||||
</nav>
|
||||
<div class="session" data-testid="session-area">
|
||||
{#if !session.loaded}
|
||||
<span data-testid="session-loading" aria-busy="true">…</span>
|
||||
{:else if session.user}
|
||||
<span class="username" data-testid="session-user">{session.user.username}</span>
|
||||
<a class="nav-link" href="/settings" data-testid="nav-settings">
|
||||
<Settings size={18} aria-hidden="true" />
|
||||
<span>Settings</span>
|
||||
</a>
|
||||
<button
|
||||
class="icon-btn"
|
||||
type="button"
|
||||
@@ -144,6 +170,24 @@
|
||||
font-size: var(--font-sm);
|
||||
}
|
||||
|
||||
/* App frame: header is fixed at the viewport top with a slide
|
||||
transition so reader fullscreen (set via `data-reader-fullscreen`
|
||||
on `<html>`) can hide it without jolting the layout. `main` pays
|
||||
the gap with a matching padding-top that animates in lockstep. */
|
||||
header {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
z-index: var(--z-sticky);
|
||||
transform: translateY(0);
|
||||
transition: transform 220ms ease-out;
|
||||
}
|
||||
|
||||
:global(html[data-reader-fullscreen='true']) header {
|
||||
transform: translateY(-100%);
|
||||
}
|
||||
|
||||
.session {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
@@ -181,7 +225,19 @@
|
||||
|
||||
main {
|
||||
padding: var(--space-4);
|
||||
/* Reserve room for the fixed header so its presence doesn't
|
||||
overlap content. The header height comes from a runtime
|
||||
ResizeObserver (see onMount above) so this always tracks
|
||||
the rendered size. */
|
||||
padding-top: calc(var(--app-header-h) + var(--space-4));
|
||||
max-width: 64rem;
|
||||
margin: 0 auto;
|
||||
transition: padding-top 220ms ease-out;
|
||||
}
|
||||
|
||||
:global(html[data-reader-fullscreen='true']) main {
|
||||
/* No top reservation in focus mode — the chapter image runs
|
||||
edge-to-edge once the header has slid off. */
|
||||
padding-top: 0;
|
||||
}
|
||||
</style>
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
<script lang="ts">
|
||||
import { fileUrl } from '$lib/api/client';
|
||||
import BookImage from '@lucide/svelte/icons/book-image';
|
||||
import BookmarkList from '$lib/components/BookmarkList.svelte';
|
||||
|
||||
let { data } = $props();
|
||||
const authenticated = $derived(data.authenticated);
|
||||
@@ -25,122 +24,10 @@
|
||||
{:else if bookmarks.length === 0}
|
||||
<p class="hint" data-testid="bookmarks-empty">No bookmarks yet.</p>
|
||||
{:else}
|
||||
<ul class="bookmark-list" data-testid="bookmark-list">
|
||||
{#each bookmarks as b (b.id)}
|
||||
<li class="bookmark">
|
||||
<a href="/manga/{b.manga_id}" class="cover-link" aria-hidden="true" tabindex="-1">
|
||||
{#if b.manga_cover_image_path}
|
||||
<img
|
||||
src={fileUrl(b.manga_cover_image_path)}
|
||||
alt=""
|
||||
class="cover"
|
||||
loading="lazy"
|
||||
/>
|
||||
{:else}
|
||||
<div class="cover cover-placeholder">
|
||||
<BookImage size={22} aria-hidden="true" />
|
||||
</div>
|
||||
{/if}
|
||||
</a>
|
||||
<div class="meta">
|
||||
<a
|
||||
href="/manga/{b.manga_id}"
|
||||
class="title"
|
||||
data-testid="bookmark-title"
|
||||
>
|
||||
{b.manga_title ?? 'Unknown manga'}
|
||||
</a>
|
||||
{#if b.chapter_id && b.chapter_number != null}
|
||||
<a
|
||||
href="/manga/{b.manga_id}/chapter/{b.chapter_number}"
|
||||
class="target"
|
||||
>
|
||||
Chapter {b.chapter_number}{#if b.page != null && b.page > 0} — page {b.page}{/if}
|
||||
</a>
|
||||
{:else if b.chapter_id}
|
||||
<!-- Chapter bookmark whose chapter was deleted;
|
||||
chapter_id != null but chapter_number == null
|
||||
because the LEFT JOIN found nothing. -->
|
||||
<span class="target muted">(chapter removed)</span>
|
||||
{:else}
|
||||
<span class="target muted">Whole manga</span>
|
||||
{/if}
|
||||
<span class="created">
|
||||
Bookmarked {new Date(b.created_at).toLocaleDateString()}
|
||||
</span>
|
||||
</div>
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
<BookmarkList {bookmarks} />
|
||||
{/if}
|
||||
|
||||
<style>
|
||||
.bookmark-list {
|
||||
list-style: none;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.bookmark {
|
||||
display: grid;
|
||||
grid-template-columns: 64px 1fr;
|
||||
gap: var(--space-4);
|
||||
align-items: start;
|
||||
padding: var(--space-3) 0;
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.cover-link {
|
||||
display: block;
|
||||
line-height: 0;
|
||||
}
|
||||
|
||||
.cover {
|
||||
width: 64px;
|
||||
height: 96px;
|
||||
object-fit: cover;
|
||||
border-radius: var(--radius-md);
|
||||
background: var(--surface);
|
||||
}
|
||||
|
||||
.cover-placeholder {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
color: var(--text-muted);
|
||||
user-select: none;
|
||||
}
|
||||
|
||||
.meta {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: var(--space-1);
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
.title {
|
||||
font-weight: var(--weight-semibold);
|
||||
font-size: var(--font-base);
|
||||
color: var(--text);
|
||||
}
|
||||
|
||||
.title:hover {
|
||||
color: var(--primary);
|
||||
}
|
||||
|
||||
.target {
|
||||
font-size: var(--font-sm);
|
||||
}
|
||||
|
||||
.muted {
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.created {
|
||||
color: var(--text-muted);
|
||||
font-size: var(--font-xs);
|
||||
}
|
||||
|
||||
.error {
|
||||
color: var(--danger);
|
||||
}
|
||||
|
||||
37
frontend/src/routes/collections/+page.svelte
Normal file
37
frontend/src/routes/collections/+page.svelte
Normal file
@@ -0,0 +1,37 @@
|
||||
<script lang="ts">
|
||||
import CollectionsGrid from '$lib/components/CollectionsGrid.svelte';
|
||||
|
||||
let { data } = $props();
|
||||
const collections = $derived(data.collections);
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>Collections — Mangalord</title>
|
||||
</svelte:head>
|
||||
|
||||
<h1>Collections</h1>
|
||||
|
||||
{#if !data.authenticated}
|
||||
<p class="status">
|
||||
<a href="/login">Sign in</a> to see and manage your collections.
|
||||
</p>
|
||||
{:else if data.error}
|
||||
<p class="error" role="alert">{data.error}</p>
|
||||
{:else if collections.length === 0}
|
||||
<p class="status" data-testid="collections-empty">
|
||||
You don't have any collections yet. Open any manga and use
|
||||
<strong>Add to collection</strong> to start one.
|
||||
</p>
|
||||
{:else}
|
||||
<CollectionsGrid {collections} />
|
||||
{/if}
|
||||
|
||||
<style>
|
||||
.status {
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.error {
|
||||
color: var(--danger);
|
||||
}
|
||||
</style>
|
||||
20
frontend/src/routes/collections/+page.ts
Normal file
20
frontend/src/routes/collections/+page.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
import { ApiError } from '$lib/api/client';
|
||||
import { listMyCollections } from '$lib/api/collections';
|
||||
import type { PageLoad } from './$types';
|
||||
|
||||
export const ssr = false;
|
||||
|
||||
export const load: PageLoad = async () => {
|
||||
try {
|
||||
const page = await listMyCollections({ limit: 200 });
|
||||
return { collections: page.items, authenticated: true, error: null };
|
||||
} catch (e) {
|
||||
if (e instanceof ApiError && e.status === 401) {
|
||||
return { collections: [], authenticated: false, error: null };
|
||||
}
|
||||
if (e instanceof ApiError) {
|
||||
return { collections: [], authenticated: true, error: e.message };
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
};
|
||||
313
frontend/src/routes/collections/[id]/+page.svelte
Normal file
313
frontend/src/routes/collections/[id]/+page.svelte
Normal file
@@ -0,0 +1,313 @@
|
||||
<script lang="ts">
|
||||
import { goto } from '$app/navigation';
|
||||
import {
|
||||
deleteCollection,
|
||||
removeMangaFromCollection,
|
||||
updateCollection
|
||||
} from '$lib/api/collections';
|
||||
import type { Manga } from '$lib/api/client';
|
||||
import MangaCard from '$lib/components/MangaCard.svelte';
|
||||
import ArrowLeft from '@lucide/svelte/icons/arrow-left';
|
||||
import Pencil from '@lucide/svelte/icons/pencil';
|
||||
import Check from '@lucide/svelte/icons/check';
|
||||
import Trash2 from '@lucide/svelte/icons/trash-2';
|
||||
import X from '@lucide/svelte/icons/x';
|
||||
|
||||
let { data } = $props();
|
||||
// svelte-ignore state_referenced_locally
|
||||
let collection = $state({ ...data.collection });
|
||||
// svelte-ignore state_referenced_locally
|
||||
let mangas = $state<Manga[]>([...data.mangas]);
|
||||
|
||||
let editing = $state(false);
|
||||
let editName = $state('');
|
||||
let editDescription = $state('');
|
||||
let editError: string | null = $state(null);
|
||||
let editBusy = $state(false);
|
||||
|
||||
function startEdit() {
|
||||
editName = collection.name;
|
||||
editDescription = collection.description ?? '';
|
||||
editError = null;
|
||||
editing = true;
|
||||
}
|
||||
|
||||
async function saveEdit() {
|
||||
if (editBusy) return;
|
||||
editBusy = true;
|
||||
editError = null;
|
||||
try {
|
||||
const updated = await updateCollection(collection.id, {
|
||||
name: editName.trim(),
|
||||
description: editDescription.trim() || null
|
||||
});
|
||||
collection = updated;
|
||||
editing = false;
|
||||
} catch (e) {
|
||||
editError = (e as Error).message;
|
||||
} finally {
|
||||
editBusy = false;
|
||||
}
|
||||
}
|
||||
|
||||
async function onDeleteCollection() {
|
||||
if (!confirm(`Delete collection "${collection.name}"? This cannot be undone.`)) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await deleteCollection(collection.id);
|
||||
goto('/collections');
|
||||
} catch (e) {
|
||||
editError = (e as Error).message;
|
||||
}
|
||||
}
|
||||
|
||||
async function onRemoveManga(m: Manga) {
|
||||
const snapshot = mangas;
|
||||
mangas = mangas.filter((x) => x.id !== m.id);
|
||||
try {
|
||||
await removeMangaFromCollection(collection.id, m.id);
|
||||
} catch (e) {
|
||||
mangas = snapshot;
|
||||
editError = (e as Error).message;
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>{collection.name} — Mangalord</title>
|
||||
</svelte:head>
|
||||
|
||||
<nav class="back">
|
||||
<a href="/collections" class="back-link">
|
||||
<ArrowLeft size={16} aria-hidden="true" />
|
||||
<span>All collections</span>
|
||||
</a>
|
||||
</nav>
|
||||
|
||||
<header class="overview">
|
||||
{#if editing}
|
||||
<form
|
||||
class="edit-form"
|
||||
onsubmit={(e) => {
|
||||
e.preventDefault();
|
||||
void saveEdit();
|
||||
}}
|
||||
action="javascript:void(0)"
|
||||
>
|
||||
<input
|
||||
type="text"
|
||||
bind:value={editName}
|
||||
maxlength="64"
|
||||
required
|
||||
aria-label="Collection name"
|
||||
data-testid="collection-edit-name"
|
||||
/>
|
||||
<textarea
|
||||
bind:value={editDescription}
|
||||
rows="2"
|
||||
maxlength="1024"
|
||||
placeholder="Description (optional)"
|
||||
aria-label="Collection description"
|
||||
data-testid="collection-edit-description"
|
||||
></textarea>
|
||||
<div class="edit-actions">
|
||||
<button
|
||||
type="submit"
|
||||
class="primary"
|
||||
disabled={!editName.trim() || editBusy}
|
||||
data-testid="collection-edit-save"
|
||||
>
|
||||
<Check size={14} aria-hidden="true" />
|
||||
<span>Save</span>
|
||||
</button>
|
||||
<button type="button" onclick={() => (editing = false)} disabled={editBusy}>
|
||||
<X size={14} aria-hidden="true" />
|
||||
<span>Cancel</span>
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
{:else}
|
||||
<div class="title-row">
|
||||
<h1 data-testid="collection-name">{collection.name}</h1>
|
||||
<button
|
||||
type="button"
|
||||
class="icon-btn"
|
||||
onclick={startEdit}
|
||||
aria-label="Edit collection"
|
||||
title="Edit"
|
||||
data-testid="collection-edit-open"
|
||||
>
|
||||
<Pencil size={16} aria-hidden="true" />
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
class="icon-btn danger"
|
||||
onclick={onDeleteCollection}
|
||||
aria-label="Delete collection"
|
||||
title="Delete"
|
||||
data-testid="collection-delete"
|
||||
>
|
||||
<Trash2 size={16} aria-hidden="true" />
|
||||
</button>
|
||||
</div>
|
||||
{#if collection.description}
|
||||
<p class="description" data-testid="collection-description">
|
||||
{collection.description}
|
||||
</p>
|
||||
{/if}
|
||||
{/if}
|
||||
{#if editError}
|
||||
<p class="error" role="alert">{editError}</p>
|
||||
{/if}
|
||||
</header>
|
||||
|
||||
{#if mangas.length === 0}
|
||||
<p class="status" data-testid="collection-empty">
|
||||
This collection is empty.
|
||||
</p>
|
||||
{:else}
|
||||
<ul class="manga-grid" data-testid="collection-manga-list">
|
||||
{#each mangas as m (m.id)}
|
||||
<li class="card-with-remove">
|
||||
<MangaCard manga={m} testid={`collection-manga-${m.id}`} />
|
||||
<button
|
||||
type="button"
|
||||
class="remove"
|
||||
onclick={() => onRemoveManga(m)}
|
||||
aria-label={`Remove ${m.title} from collection`}
|
||||
title="Remove from collection"
|
||||
data-testid={`collection-remove-manga-${m.id}`}
|
||||
>
|
||||
<X size={14} aria-hidden="true" />
|
||||
</button>
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
{/if}
|
||||
|
||||
<style>
|
||||
.back {
|
||||
margin-bottom: var(--space-3);
|
||||
}
|
||||
|
||||
.back-link {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: var(--space-1);
|
||||
color: var(--text-muted);
|
||||
font-size: var(--font-sm);
|
||||
}
|
||||
|
||||
.overview {
|
||||
margin-bottom: var(--space-5);
|
||||
}
|
||||
|
||||
.title-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: var(--space-2);
|
||||
}
|
||||
|
||||
.title-row h1 {
|
||||
margin: 0;
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
.description {
|
||||
color: var(--text-muted);
|
||||
margin: var(--space-2) 0 0;
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
.edit-form {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: var(--space-2);
|
||||
}
|
||||
|
||||
.edit-actions {
|
||||
display: flex;
|
||||
gap: var(--space-2);
|
||||
}
|
||||
|
||||
.primary {
|
||||
background: var(--primary);
|
||||
color: var(--primary-contrast);
|
||||
border-color: var(--primary);
|
||||
}
|
||||
|
||||
.primary:hover:not(:disabled) {
|
||||
background: var(--primary-hover);
|
||||
border-color: var(--primary-hover);
|
||||
}
|
||||
|
||||
.error {
|
||||
color: var(--danger);
|
||||
margin: var(--space-2) 0 0;
|
||||
}
|
||||
|
||||
.status {
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.manga-grid {
|
||||
list-style: none;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(140px, 1fr));
|
||||
gap: var(--space-4);
|
||||
}
|
||||
|
||||
.card-with-remove {
|
||||
position: relative;
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
.remove {
|
||||
position: absolute;
|
||||
top: var(--space-1);
|
||||
right: var(--space-1);
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
width: 24px;
|
||||
height: 24px;
|
||||
padding: 0;
|
||||
background: rgba(0, 0, 0, 0.6);
|
||||
color: white;
|
||||
border: 0;
|
||||
border-radius: 50%;
|
||||
cursor: pointer;
|
||||
opacity: 0;
|
||||
transition: opacity var(--transition);
|
||||
}
|
||||
|
||||
.card-with-remove:hover .remove,
|
||||
.card-with-remove:focus-within .remove {
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.icon-btn {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
width: 32px;
|
||||
height: 32px;
|
||||
padding: 0;
|
||||
background: transparent;
|
||||
color: var(--text-muted);
|
||||
border: 1px solid transparent;
|
||||
border-radius: var(--radius-sm);
|
||||
}
|
||||
|
||||
.icon-btn:hover {
|
||||
background: var(--surface-elevated);
|
||||
color: var(--text);
|
||||
}
|
||||
|
||||
.icon-btn.danger:hover {
|
||||
color: var(--danger);
|
||||
}
|
||||
</style>
|
||||
40
frontend/src/routes/collections/[id]/+page.ts
Normal file
40
frontend/src/routes/collections/[id]/+page.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
import { error, redirect } from '@sveltejs/kit';
|
||||
import { ApiError } from '$lib/api/client';
|
||||
import {
|
||||
getCollection,
|
||||
listCollectionMangas
|
||||
} from '$lib/api/collections';
|
||||
import type { PageLoad } from './$types';
|
||||
|
||||
export const ssr = false;
|
||||
|
||||
export const load: PageLoad = async ({ params, url }) => {
|
||||
try {
|
||||
const [collection, mangas] = await Promise.all([
|
||||
getCollection(params.id),
|
||||
listCollectionMangas(params.id, { limit: 200 })
|
||||
]);
|
||||
return {
|
||||
collection,
|
||||
mangas: mangas.items,
|
||||
total: mangas.page.total
|
||||
};
|
||||
} catch (e) {
|
||||
if (e instanceof ApiError) {
|
||||
// 401 means the user's session is gone — bounce to login
|
||||
// and preserve where they wanted to go.
|
||||
if (e.status === 401) {
|
||||
const next = encodeURIComponent(url.pathname);
|
||||
redirect(302, `/login?next=${next}`);
|
||||
}
|
||||
// 403 (post-Phase-3-polish the backend collapses this to
|
||||
// 404 already, but keep the branch for defense-in-depth)
|
||||
// and 404 both render the standard not-found page so the
|
||||
// URL doesn't disclose collection existence to non-owners.
|
||||
if (e.status === 404 || e.status === 403) {
|
||||
error(404, 'Collection not found');
|
||||
}
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
};
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user