Compare commits

..

1 Commits

Author SHA1 Message Date
MechaCat02
9b4f3525f6 feat: incremental crawl mode with seed-completion gate (0.33.0)
Daemon now auto-detects mode per source: Backfill until the first
full walk records `seed_completed:<source>` in `crawler_state`, then
Incremental (newest-first, stops after N consecutive Unchanged
upserts). `CRAWLER_MODE` overrides to a fixed mode; CLI rejects
`auto` since it has no pre-run DB state.

`Source::discover` returns a lazy `DiscoverWalk` so Incremental can
break out mid-walk without prefetching pages. The drop pass and seed
marker are now gated on a true full walk — fixes a latent soft-drop
of the index tail under partial sweeps.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 06:41:16 +02:00
14 changed files with 34 additions and 374 deletions

View File

@@ -1,71 +0,0 @@
# Gitea Actions
The [`deploy`](workflows/deploy.yml) workflow runs on every push to `main`
(and via manual `workflow_dispatch`). It tests, builds, pushes the images
to a private registry, and rolls the stack over by SSH on the target host.
## Required secrets
Set under *Repo Settings → Actions → Secrets*:
| Name | Example | Purpose |
| -------------------- | ------------------------ | ---------------------------------------------------------------- |
| `REGISTRY_URL` | `registry.example.com` | Registry host. No scheme, no trailing slash. |
| `REGISTRY_USERNAME` | `mangalord-ci` | `docker login` user. |
| `REGISTRY_PASSWORD` | `<token>` | `docker login` token/password. |
| `SSH_HOST` | `mangalord.example.com` | Deploy target hostname/IP. |
| `SSH_USER` | `deploy` | SSH user on the target (must be in the `docker` group). |
| `SSH_PRIVATE_KEY` | `-----BEGIN OPENSSH...` | Private key authorised in the target user's `authorized_keys`. |
| `SSH_PORT` | `22` | Optional. Defaults to `22` if unset. |
## Required variables
Set under *Repo Settings → Actions → Variables* (not secrets — they appear
in logs):
| Name | Example | Purpose |
| ------------- | ------------------------ | ---------------------------------------------------------------------- |
| `DEPLOY_PATH` | `/srv/mangalord` | Directory on target holding `docker-compose.yml`, `.env`, and the prod overlay. |
## One-time host setup
The workflow assumes the deploy target already has:
1. Docker + Docker Compose v2 installed and the `SSH_USER` in the `docker` group.
2. `$DEPLOY_PATH/docker-compose.yml` (copy of the repo's [docker-compose.yml](../docker-compose.yml)).
3. `$DEPLOY_PATH/docker-compose.prod.yml` (copy of the repo's [docker-compose.prod.yml](../docker-compose.prod.yml)).
4. `$DEPLOY_PATH/.env` populated from [.env.example](../.env.example) with production values (real `POSTGRES_PASSWORD`, `COOKIE_SECURE=true`, etc.).
Bootstrap once:
```bash
ssh deploy@mangalord.example.com
sudo mkdir -p /srv/mangalord && sudo chown deploy:deploy /srv/mangalord
cd /srv/mangalord
# place docker-compose.yml, docker-compose.prod.yml, and .env here
```
The first workflow run will pull the images, bring the stack up, and run
the embedded migrations on startup.
## Image tags
Every push produces three tags per image:
- `mangalord-{backend,frontend}:latest`
- `mangalord-{backend,frontend}:<git-sha>` — used by the deploy job; lets
you pin a deploy to a specific commit
- `mangalord-{backend,frontend}:<version>` — the version from
[backend/Cargo.toml](../backend/Cargo.toml) (verified in lockstep with
[frontend/package.json](../frontend/package.json))
## Rollback
SSH to the target, set `IMAGE_TAG` to a previous commit SHA, and re-up:
```bash
cd /srv/mangalord
export REGISTRY_URL=registry.example.com
export IMAGE_TAG=<previous-sha>
docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d
```

View File

@@ -1,144 +0,0 @@
name: deploy
on:
push:
branches: [main]
workflow_dispatch:
jobs:
test-backend:
runs-on: ubuntu-latest
container:
image: rust:1-slim
services:
postgres:
image: postgres:16-alpine
env:
POSTGRES_USER: mangalord
POSTGRES_PASSWORD: mangalord
POSTGRES_DB: mangalord
options: >-
--health-cmd "pg_isready -U mangalord"
--health-interval 5s
--health-timeout 5s
--health-retries 10
env:
DATABASE_URL: postgres://mangalord:mangalord@postgres:5432/mangalord
steps:
- uses: actions/checkout@v4
- name: Install build deps
run: |
apt-get update
apt-get install -y --no-install-recommends pkg-config libssl-dev ca-certificates
- name: Cache cargo registry and target
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
backend/target
key: cargo-${{ runner.os }}-${{ hashFiles('backend/Cargo.lock') }}
restore-keys: |
cargo-${{ runner.os }}-
- name: cargo test
working-directory: backend
run: cargo test --locked
test-frontend:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: '22'
cache: npm
cache-dependency-path: frontend/package-lock.json
- name: npm ci
working-directory: frontend
run: npm ci
- name: vitest
working-directory: frontend
run: npm test
build-and-push:
runs-on: ubuntu-latest
needs: [test-backend, test-frontend]
outputs:
image_tag: ${{ steps.meta.outputs.image_tag }}
version: ${{ steps.meta.outputs.version }}
steps:
- uses: actions/checkout@v4
- name: Resolve image tags
id: meta
run: |
version="$(grep -m1 '^version' backend/Cargo.toml | cut -d'"' -f2)"
frontend_version="$(grep -m1 '"version"' frontend/package.json | cut -d'"' -f4)"
if [ "$version" != "$frontend_version" ]; then
echo "Version mismatch: backend=$version frontend=$frontend_version" >&2
exit 1
fi
echo "image_tag=${GITHUB_SHA}" >> "$GITHUB_OUTPUT"
echo "version=${version}" >> "$GITHUB_OUTPUT"
- uses: docker/setup-buildx-action@v3
- name: docker login
uses: docker/login-action@v3
with:
registry: ${{ secrets.REGISTRY_URL }}
username: ${{ secrets.REGISTRY_USERNAME }}
password: ${{ secrets.REGISTRY_PASSWORD }}
- name: Build & push backend
uses: docker/build-push-action@v5
with:
context: ./backend
push: true
tags: |
${{ secrets.REGISTRY_URL }}/mangalord-backend:latest
${{ secrets.REGISTRY_URL }}/mangalord-backend:${{ steps.meta.outputs.image_tag }}
${{ secrets.REGISTRY_URL }}/mangalord-backend:${{ steps.meta.outputs.version }}
cache-from: type=gha,scope=backend
cache-to: type=gha,mode=max,scope=backend
- name: Build & push frontend
uses: docker/build-push-action@v5
with:
context: ./frontend
push: true
tags: |
${{ secrets.REGISTRY_URL }}/mangalord-frontend:latest
${{ secrets.REGISTRY_URL }}/mangalord-frontend:${{ steps.meta.outputs.image_tag }}
${{ secrets.REGISTRY_URL }}/mangalord-frontend:${{ steps.meta.outputs.version }}
cache-from: type=gha,scope=frontend
cache-to: type=gha,mode=max,scope=frontend
deploy:
runs-on: ubuntu-latest
needs: build-and-push
steps:
- name: SSH deploy
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ secrets.SSH_HOST }}
username: ${{ secrets.SSH_USER }}
key: ${{ secrets.SSH_PRIVATE_KEY }}
port: ${{ secrets.SSH_PORT || 22 }}
envs: REGISTRY_URL,REGISTRY_USERNAME,REGISTRY_PASSWORD,IMAGE_TAG,DEPLOY_PATH
script_stop: true
script: |
set -euo pipefail
cd "$DEPLOY_PATH"
echo "$REGISTRY_PASSWORD" | docker login "$REGISTRY_URL" -u "$REGISTRY_USERNAME" --password-stdin
export REGISTRY_URL IMAGE_TAG
docker compose -f docker-compose.yml -f docker-compose.prod.yml pull
docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d
docker image prune -f
docker logout "$REGISTRY_URL"
env:
REGISTRY_URL: ${{ secrets.REGISTRY_URL }}
REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }}
REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }}
IMAGE_TAG: ${{ needs.build-and-push.outputs.image_tag }}
DEPLOY_PATH: ${{ vars.DEPLOY_PATH }}

2
backend/Cargo.lock generated
View File

@@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "mangalord"
version = "0.34.1"
version = "0.33.0"
dependencies = [
"anyhow",
"argon2",

View File

@@ -1,6 +1,6 @@
[package]
name = "mangalord"
version = "0.34.1"
version = "0.33.0"
edition = "2021"
default-run = "mangalord"

View File

@@ -230,24 +230,8 @@ async fn create_token(
Json(input): Json<CreateTokenInput>,
) -> AppResult<impl IntoResponse> {
let name = input.name.trim();
// Both arms use `ValidationFailed` (422 with field details) to
// match the structured-error shape `attach_tag` returns for the
// same kind of free-form-identifier validation. The other
// /auth/* handlers in this file use `InvalidInput` (400); the
// divergence is pre-existing and would warrant a project-wide
// pass to flip them all if the client side wants uniform per-
// field error rendering.
if name.is_empty() {
return Err(AppError::ValidationFailed {
message: "token name is required".into(),
details: serde_json::json!({ "name": "required" }),
});
}
if name.chars().count() > 64 {
return Err(AppError::ValidationFailed {
message: "token name too long".into(),
details: serde_json::json!({ "name": "max 64 characters" }),
});
return Err(AppError::InvalidInput("token name is required".into()));
}
let (raw, hash) = generate_token();
let token = repo::api_token::create(&state.db, user.id, name, &hash).await?;

View File

@@ -348,7 +348,6 @@ async fn attach_tag(
Path(id): Path<Uuid>,
Json(body): Json<AttachTagBody>,
) -> AppResult<(StatusCode, Json<TagRef>)> {
validate_tag_name(&body.name)?;
if !repo::manga::exists(&state.db, id).await? {
return Err(AppError::NotFound);
}
@@ -395,27 +394,6 @@ async fn detach_tag(
}
}
/// Request-side validation for `POST /mangas/:id/tags` body. Mirrors
/// the repo-level cap in `repo::tag::upsert_by_name` (max 64 chars
/// after trim) but surfaces the failure at the handler boundary with
/// the same envelope shape other validations use.
fn validate_tag_name(name: &str) -> AppResult<()> {
let trimmed = name.trim();
if trimmed.is_empty() {
return Err(AppError::ValidationFailed {
message: "tag name cannot be empty".into(),
details: json!({ "name": "required" }),
});
}
if trimmed.chars().count() > 64 {
return Err(AppError::ValidationFailed {
message: "tag name too long".into(),
details: json!({ "name": "max 64 characters" }),
});
}
Ok(())
}
fn validate_new_manga(input: &NewManga) -> AppResult<()> {
if input.title.trim().is_empty() {
return Err(AppError::ValidationFailed {

View File

@@ -16,13 +16,6 @@ impl LocalStorage {
}
fn resolve(&self, key: &str) -> Result<PathBuf, StorageError> {
// NUL bytes are rejected by the Linux syscall layer, but the
// error surfaces as an opaque IO failure rather than the
// explicit `BadKey` the rest of the contract uses. Catch it
// here so the error path is consistent.
if key.contains('\0') {
return Err(StorageError::BadKey);
}
let key = key.trim_start_matches('/');
if key.is_empty() {
return Err(StorageError::BadKey);
@@ -121,9 +114,6 @@ mod tests {
assert!(matches!(s.get(".").await, Err(StorageError::BadKey)));
// Empty segment via doubled slash.
assert!(matches!(s.get("a//b").await, Err(StorageError::BadKey)));
// NUL byte (rejected explicitly so callers see BadKey rather
// than an opaque IO error from the kernel).
assert!(matches!(s.put("a\0b", b"x").await, Err(StorageError::BadKey)));
}
#[tokio::test]

View File

@@ -581,27 +581,3 @@ async fn delete_unknown_token_is_404(pool: PgPool) {
.unwrap();
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
}
/// Bot token names are user-supplied free-form strings; a 10 MB name
/// was accepted before. Cap at 64 chars to match the other free-form
/// identifier caps (tags, collection names). The response uses
/// `ValidationFailed` (422 with per-field details) so clients can
/// render the same shape they already handle for `attach_tag`.
#[sqlx::test(migrations = "./migrations")]
async fn create_token_rejects_name_over_64_chars(pool: PgPool) {
let h = common::harness(pool);
let (_, cookie) = common::register_user(&h.app).await;
let resp = h
.app
.oneshot(common::post_json_with_cookie(
"/api/v1/auth/tokens",
json!({ "name": "x".repeat(65) }),
&cookie,
))
.await
.unwrap();
assert_eq!(resp.status(), StatusCode::UNPROCESSABLE_ENTITY);
let body = common::body_json(resp).await;
assert_eq!(body["error"]["code"], "validation_failed");
assert!(body["error"]["details"]["name"].is_string());
}

View File

@@ -59,31 +59,6 @@ async fn reattach_same_tag_is_idempotent_and_returns_200(pool: PgPool) {
assert_eq!(second.status(), StatusCode::OK);
}
/// Tag names over 64 chars are rejected at the handler boundary. The
/// repo enforces the same cap, but doing it at the handler keeps the
/// envelope consistent with the other validation paths
/// (username, collection name, etc.).
#[sqlx::test(migrations = "./migrations")]
async fn attach_rejects_tag_name_over_64_chars(pool: PgPool) {
let h = common::harness(pool);
let (_, cookie) = common::register_user(&h.app).await;
let manga_id = common::seed_manga_via_api(&h.app, &cookie, "Berserk").await;
let long_name: String = "x".repeat(65);
let resp = h
.app
.oneshot(common::post_json_with_cookie(
&format!("/api/v1/mangas/{manga_id}/tags"),
json!({ "name": long_name }),
&cookie,
))
.await
.unwrap();
assert_eq!(resp.status(), StatusCode::UNPROCESSABLE_ENTITY);
let body = common::body_json(resp).await;
assert_eq!(body["error"]["code"], "validation_failed");
}
#[sqlx::test(migrations = "./migrations")]
async fn tag_names_dedup_case_insensitively(pool: PgPool) {
let h = common::harness(pool);

View File

@@ -1,22 +0,0 @@
# Production overlay: layer on top of docker-compose.yml on the deploy
# host so the backend and frontend run from pre-built registry images
# instead of building locally.
#
# docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d
#
# REGISTRY_URL and IMAGE_TAG are injected by .gitea/workflows/deploy.yml
# at deploy time. IMAGE_TAG defaults to `latest` so a manual
# `docker compose ... up -d` on the host still works.
services:
backend:
build: !reset null
image: ${REGISTRY_URL}/mangalord-backend:${IMAGE_TAG:-latest}
pull_policy: always
restart: unless-stopped
frontend:
build: !reset null
image: ${REGISTRY_URL}/mangalord-frontend:${IMAGE_TAG:-latest}
pull_policy: always
restart: unless-stopped

View File

@@ -1,6 +1,6 @@
{
"name": "mangalord-frontend",
"version": "0.34.1",
"version": "0.33.0",
"private": true,
"type": "module",
"scripts": {

View File

@@ -94,11 +94,6 @@ describe('auth api client', () => {
expect(url).toMatch(/\/v1\/auth\/logout$/);
const init = fetchSpy.mock.calls[0][1] as RequestInit;
expect(init.method).toBe('POST');
// Consistent content-type for all mutation requests, matching
// the rest of the module — axum doesn't require it but the
// header keeps the request style uniform.
const headers = new Headers(init.headers);
expect(headers.get('content-type')).toBe('application/json');
});
it('me returns the user on 200', async () => {

View File

@@ -32,14 +32,7 @@ export async function login(creds: Credentials): Promise<User> {
}
export async function logout(): Promise<void> {
await request<void>('/v1/auth/logout', {
method: 'POST',
// Consistent with the other POST/PATCH helpers in this module.
// axum doesn't require it (no body), but keeping the header
// on every mutation request avoids the false-flag in logs and
// matches the project's style.
headers: { 'content-type': 'application/json' }
});
await request<void>('/v1/auth/logout', { method: 'POST' });
}
export type ChangePassword = {

View File

@@ -350,48 +350,54 @@
});
/**
* Flush read-progress as the tab is closing. A plain `fetch()`
* during `pagehide` / `beforeunload` is cancelled by every
* browser; `fetch(..., { keepalive: true })` is the supported
* escape hatch and survives the close.
*
* `sendBeacon` would be the textbook alternative, but it's
* POST-only and `/me/read-progress` takes PUT — so a beacon
* always 405s, adds server-log noise, then falls through to this
* same keepalive path anyway. The beacon was dropped; the
* keepalive fetch is the only path.
* `fetch()` initiated during `pagehide` / `beforeunload` is
* cancelled by every browser by default. `sendBeacon` is the
* supported way to ship a small payload during unload — it's
* guaranteed to survive even if the tab is closing. Failure here
* is silent because the API is fire-and-forget.
*/
function flushFinalProgress() {
function beaconFinalProgress() {
if (!session.user) return;
const body = JSON.stringify({
manga_id: manga.id,
chapter_id: chapter.id,
page: progressPage
});
const blob = new Blob([body], { type: 'application/json' });
// sendBeacon only supports POST — the server's PUT route is
// strict on method. The dedicated POST alias is omitted; in
// practice the in-app navigation path (back-link, chapter
// links) already covers the common-case unmount via the
// onDestroy fetch. Fall through to fetch+keepalive for browser
// implementations that don't honor sendBeacon for this endpoint.
try {
void fetch('/api/v1/me/read-progress', {
method: 'PUT',
headers: { 'content-type': 'application/json' },
body,
keepalive: true,
credentials: 'include'
});
const ok = navigator.sendBeacon('/api/v1/me/read-progress', blob);
if (!ok) throw new Error('sendBeacon rejected');
} catch {
// keepalive fetch was rejected (very old Firefox etc.);
// the in-app onDestroy flush below catches the SPA-
// navigation case, which is the common one anyway.
try {
void fetch('/api/v1/me/read-progress', {
method: 'PUT',
headers: { 'content-type': 'application/json' },
body,
keepalive: true,
credentials: 'include'
});
} catch {
// Final fallback failed; the in-app onDestroy flush
// below catches the SPA-navigation case.
}
}
}
onMount(() => {
window.addEventListener('pagehide', flushFinalProgress);
window.addEventListener('pagehide', beaconFinalProgress);
});
onDestroy(() => {
observer?.disconnect();
if (progressTimer) clearTimeout(progressTimer);
if (typeof window !== 'undefined') {
window.removeEventListener('pagehide', flushFinalProgress);
window.removeEventListener('pagehide', beaconFinalProgress);
}
// Don't let the fullscreen flag leak to non-reader pages —
// otherwise the layout header would stay slid-off on /upload