Adds a `tor` service to the compose stack (dockurr/tor) with a torrc tuned for the crawler — SOCKS5 on 9050 with IsolateDestAddr + IsolateDestPort so NEWNYM picks up promptly, control port on 9051 with cookie auth, MaxCircuitDirtiness 60. Backend defaults CRAWLER_PROXY → socks5h://tor:9050 and CRAWLER_TOR_CONTROL_URL → tcp://tor:9051 so TOR + recircuit are on out-of-the-box. Operators can override both to empty in .env to opt out without removing the service. The tor-data named volume is mounted ro on the backend so it can read /var/lib/tor/control_auth_cookie; CookieAuthFileGroupReadable handles the permissions. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
98 lines
3.8 KiB
YAML
98 lines
3.8 KiB
YAML
# Production-like compose. Requires a populated `.env` next to this
|
|
# file: at minimum POSTGRES_PASSWORD must be set to a non-default
|
|
# value (the `?required` form below fails fast otherwise). The
|
|
# frontend container expects HTTPS in front (Caddy/Traefik/nginx)
|
|
# because COOKIE_SECURE=true browsers will refuse to send the session
|
|
# cookie over plain HTTP.
|
|
services:
|
|
postgres:
|
|
image: postgres:16-alpine
|
|
environment:
|
|
POSTGRES_USER: ${POSTGRES_USER:-mangalord}
|
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?POSTGRES_PASSWORD must be set in .env}
|
|
POSTGRES_DB: ${POSTGRES_DB:-mangalord}
|
|
volumes:
|
|
- postgres-data:/var/lib/postgresql/data
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-mangalord}"]
|
|
interval: 5s
|
|
timeout: 5s
|
|
retries: 10
|
|
|
|
tor:
|
|
# SOCKS5 proxy for the crawler, plus a control port so the backend
|
|
# can signal NEWNYM on bad pages. See tor/torrc for the daemon
|
|
# config; both ports are only `expose`d (compose-internal), never
|
|
# bound on the host.
|
|
image: dockurr/tor:latest
|
|
volumes:
|
|
- ./tor/torrc:/etc/tor/torrc:ro
|
|
- tor-data:/var/lib/tor
|
|
expose:
|
|
- "9050"
|
|
- "9051"
|
|
restart: unless-stopped
|
|
|
|
backend:
|
|
build: ./backend
|
|
depends_on:
|
|
postgres:
|
|
condition: service_healthy
|
|
tor:
|
|
condition: service_started
|
|
environment:
|
|
DATABASE_URL: postgres://${POSTGRES_USER:-mangalord}:${POSTGRES_PASSWORD:?POSTGRES_PASSWORD must be set in .env}@postgres:5432/${POSTGRES_DB:-mangalord}
|
|
BIND_ADDRESS: 0.0.0.0:8080
|
|
STORAGE_DIR: /var/lib/mangalord/storage
|
|
RUST_LOG: ${RUST_LOG:-info,mangalord=debug}
|
|
# Auth / cookies — see .env.example for context.
|
|
COOKIE_SECURE: ${COOKIE_SECURE:-true}
|
|
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-}
|
|
SESSION_TTL_DAYS: ${SESSION_TTL_DAYS:-30}
|
|
# CORS — same-origin by default; populate when serving the API on
|
|
# a different host than the frontend.
|
|
CORS_ALLOWED_ORIGINS: ${CORS_ALLOWED_ORIGINS:-}
|
|
# Upload limits.
|
|
MAX_REQUEST_BYTES: ${MAX_REQUEST_BYTES:-209715200}
|
|
MAX_FILE_BYTES: ${MAX_FILE_BYTES:-20971520}
|
|
# System-chromium override for the crawler. Leave blank to use the
|
|
# bundled fetcher; set to e.g. /usr/bin/chromium-headless-shell on
|
|
# arm64 deployments. Pair with `--build-arg INSTALL_CHROMIUM=true`
|
|
# so the image actually contains the binary.
|
|
CRAWLER_CHROMIUM_BINARY: ${CRAWLER_CHROMIUM_BINARY:-}
|
|
# TOR proxy + NEWNYM recircuit (see .env.example for details).
|
|
# Defaults assume the bundled `tor` service above; override to
|
|
# empty strings to disable.
|
|
CRAWLER_PROXY: ${CRAWLER_PROXY-socks5h://tor:9050}
|
|
CRAWLER_TOR_CONTROL_URL: ${CRAWLER_TOR_CONTROL_URL-tcp://tor:9051}
|
|
CRAWLER_TOR_CONTROL_COOKIE_PATH: ${CRAWLER_TOR_CONTROL_COOKIE_PATH-/var/lib/tor/control_auth_cookie}
|
|
CRAWLER_TOR_CONTROL_PASSWORD: ${CRAWLER_TOR_CONTROL_PASSWORD:-}
|
|
CRAWLER_TOR_RECIRCUIT_MAX_ATTEMPTS: ${CRAWLER_TOR_RECIRCUIT_MAX_ATTEMPTS:-3}
|
|
volumes:
|
|
- storage-data:/var/lib/mangalord/storage
|
|
# Read the TOR control-auth cookie from the shared named volume.
|
|
# Read-only on the backend side; the tor service is the writer.
|
|
- tor-data:/var/lib/tor:ro
|
|
# No host port mapping in the default setup — the frontend proxies
|
|
# /api/* through its hooks.server.ts. Expose :8080 only if you want
|
|
# to hit the API directly from the host (e.g., bot scripts during
|
|
# development).
|
|
expose:
|
|
- "8080"
|
|
|
|
frontend:
|
|
build: ./frontend
|
|
depends_on:
|
|
- backend
|
|
environment:
|
|
# SvelteKit's hooks.server.ts proxies /api/* to this URL so the
|
|
# browser only ever talks to :3000 and cookies stay same-origin.
|
|
BACKEND_URL: http://backend:8080
|
|
ports:
|
|
- "3000:3000"
|
|
|
|
volumes:
|
|
postgres-data:
|
|
storage-data:
|
|
tor-data:
|