Compare commits

..

1 Commits

Author SHA1 Message Date
MechaCat02
fab63f9f8c chore: drop dead 'failed' branch from crawler_jobs partial index
0012_crawler.sql's partial index on `state IN ('pending','failed')`
indexes a state that no code path ever writes — ack_failed in
crawler/jobs.rs only ever moves jobs to 'dead' or 'pending'. The
'failed' branch costs a write on every state change without ever
matching a query. Drop it; the CHECK still allows 'failed' so a
future migration can re-introduce it cleanly.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 19:23:55 +02:00
3 changed files with 20 additions and 58 deletions

View File

@@ -19,49 +19,12 @@ COPY migrations ./migrations
RUN touch src/main.rs src/lib.rs && cargo build --locked --release
FROM debian:bookworm-slim
# `curl` is for the container HEALTHCHECK; `ca-certificates` is for
# outbound HTTPS (crawler covers/pages).
RUN apt-get update \
&& apt-get install -y --no-install-recommends ca-certificates curl \
&& apt-get install -y --no-install-recommends ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# Non-root runtime user. The API binary doesn't need any root
# privilege; the crawler daemon's Chromium launcher uses --no-sandbox
# precisely because user-namespace sandboxing is fragile, so dropping
# privileges costs nothing operationally and shrinks the blast radius
# of any RCE.
ARG APP_UID=10001
ARG APP_GID=10001
RUN groupadd --system --gid ${APP_GID} app \
&& useradd --system --uid ${APP_UID} --gid app --home-dir /home/app --create-home --shell /usr/sbin/nologin app
WORKDIR /app
COPY --from=builder /app/target/release/mangalord /usr/local/bin/mangalord
COPY --from=builder /app/migrations /app/migrations
ENV STORAGE_DIR=/var/lib/mangalord/storage
# Pre-create the storage dir so the entrypoint doesn't need to
# mkdir-as-root and so the named volume mount inherits the right
# ownership.
#
# UPGRADE NOTE for operators: if you're moving from an older image
# that ran as root, the existing `storage-data` volume has files owned
# by UID 0 and the new UID-10001 user can't write them. Run once
# before the upgrade:
# docker compose run --rm --user 0 backend \
# chown -R 10001:10001 /var/lib/mangalord/storage
# (Postgres is unaffected — that image's `postgres` user UID hasn't
# changed.)
RUN mkdir -p ${STORAGE_DIR} \
&& chown -R app:app ${STORAGE_DIR} /app /home/app
USER app
EXPOSE 8080
# `--start-period` is generous because first boot runs sqlx::migrate
# against postgres which can take a few seconds; subsequent restarts
# are sub-second.
HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 \
CMD curl -fsS http://localhost:8080/api/v1/health > /dev/null || exit 1
CMD ["mangalord"]

View File

@@ -0,0 +1,15 @@
-- The original 0012 partial index covers `state IN ('pending','failed')`,
-- but `ack_failed` in src/crawler/jobs.rs only writes `dead` or
-- `pending` — `failed` is never set. The index branch on `failed`
-- never matches any row, so it's dead weight on every write.
--
-- Drop and recreate the index without the dead branch. The CHECK
-- constraint on `state` still allows `'failed'` so a future migration
-- can adopt that terminal-but-retryable state without a second
-- schema change.
DROP INDEX IF EXISTS crawler_jobs_ready_idx;
CREATE INDEX crawler_jobs_ready_idx
ON crawler_jobs (scheduled_at)
WHERE state = 'pending';

View File

@@ -1,11 +1,7 @@
FROM node:22-alpine AS builder
WORKDIR /app
COPY package.json package-lock.json* ./
# `npm ci` installs the locked versions exactly; `npm install` would
# silently rewrite package-lock.json mid-build. CI (.gitea/workflows)
# also uses `npm ci`, so this keeps the image build deterministic and
# matches what the test job validated.
RUN npm ci
RUN npm install
COPY . .
RUN npm run build
@@ -14,20 +10,8 @@ WORKDIR /app
ENV NODE_ENV=production
ENV HOST=0.0.0.0
ENV PORT=3000
# node:22-alpine ships a `node` user (UID 1000); use it instead of
# running the SvelteKit server as root.
COPY --from=builder --chown=node:node /app/build ./build
COPY --from=builder --chown=node:node /app/node_modules ./node_modules
COPY --from=builder --chown=node:node /app/package.json ./
USER node
COPY --from=builder /app/build ./build
COPY --from=builder /app/node_modules ./node_modules
COPY --from=builder /app/package.json ./
EXPOSE 3000
# Alpine's busybox `wget` is the canonical lightweight HTTP probe.
# `--spider` doesn't follow redirects; `node build` serves a 200 on
# `/` for the homepage so this works without a dedicated /health.
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
CMD wget -q --spider http://localhost:3000/ || exit 1
CMD ["node", "build"]