Compare commits
1 Commits
chore/cont
...
chore/craw
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fab63f9f8c |
@@ -19,49 +19,12 @@ COPY migrations ./migrations
|
|||||||
RUN touch src/main.rs src/lib.rs && cargo build --locked --release
|
RUN touch src/main.rs src/lib.rs && cargo build --locked --release
|
||||||
|
|
||||||
FROM debian:bookworm-slim
|
FROM debian:bookworm-slim
|
||||||
# `curl` is for the container HEALTHCHECK; `ca-certificates` is for
|
|
||||||
# outbound HTTPS (crawler covers/pages).
|
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
&& apt-get install -y --no-install-recommends ca-certificates curl \
|
&& apt-get install -y --no-install-recommends ca-certificates \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Non-root runtime user. The API binary doesn't need any root
|
|
||||||
# privilege; the crawler daemon's Chromium launcher uses --no-sandbox
|
|
||||||
# precisely because user-namespace sandboxing is fragile, so dropping
|
|
||||||
# privileges costs nothing operationally and shrinks the blast radius
|
|
||||||
# of any RCE.
|
|
||||||
ARG APP_UID=10001
|
|
||||||
ARG APP_GID=10001
|
|
||||||
RUN groupadd --system --gid ${APP_GID} app \
|
|
||||||
&& useradd --system --uid ${APP_UID} --gid app --home-dir /home/app --create-home --shell /usr/sbin/nologin app
|
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY --from=builder /app/target/release/mangalord /usr/local/bin/mangalord
|
COPY --from=builder /app/target/release/mangalord /usr/local/bin/mangalord
|
||||||
COPY --from=builder /app/migrations /app/migrations
|
COPY --from=builder /app/migrations /app/migrations
|
||||||
|
|
||||||
ENV STORAGE_DIR=/var/lib/mangalord/storage
|
ENV STORAGE_DIR=/var/lib/mangalord/storage
|
||||||
# Pre-create the storage dir so the entrypoint doesn't need to
|
|
||||||
# mkdir-as-root and so the named volume mount inherits the right
|
|
||||||
# ownership.
|
|
||||||
#
|
|
||||||
# UPGRADE NOTE for operators: if you're moving from an older image
|
|
||||||
# that ran as root, the existing `storage-data` volume has files owned
|
|
||||||
# by UID 0 and the new UID-10001 user can't write them. Run once
|
|
||||||
# before the upgrade:
|
|
||||||
# docker compose run --rm --user 0 backend \
|
|
||||||
# chown -R 10001:10001 /var/lib/mangalord/storage
|
|
||||||
# (Postgres is unaffected — that image's `postgres` user UID hasn't
|
|
||||||
# changed.)
|
|
||||||
RUN mkdir -p ${STORAGE_DIR} \
|
|
||||||
&& chown -R app:app ${STORAGE_DIR} /app /home/app
|
|
||||||
|
|
||||||
USER app
|
|
||||||
EXPOSE 8080
|
EXPOSE 8080
|
||||||
|
|
||||||
# `--start-period` is generous because first boot runs sqlx::migrate
|
|
||||||
# against postgres which can take a few seconds; subsequent restarts
|
|
||||||
# are sub-second.
|
|
||||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 \
|
|
||||||
CMD curl -fsS http://localhost:8080/api/v1/health > /dev/null || exit 1
|
|
||||||
|
|
||||||
CMD ["mangalord"]
|
CMD ["mangalord"]
|
||||||
|
|||||||
15
backend/migrations/0016_crawler_jobs_drop_failed_state.sql
Normal file
15
backend/migrations/0016_crawler_jobs_drop_failed_state.sql
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
-- The original 0012 partial index covers `state IN ('pending','failed')`,
|
||||||
|
-- but `ack_failed` in src/crawler/jobs.rs only writes `dead` or
|
||||||
|
-- `pending` — `failed` is never set. The index branch on `failed`
|
||||||
|
-- never matches any row, so it's dead weight on every write.
|
||||||
|
--
|
||||||
|
-- Drop and recreate the index without the dead branch. The CHECK
|
||||||
|
-- constraint on `state` still allows `'failed'` so a future migration
|
||||||
|
-- can adopt that terminal-but-retryable state without a second
|
||||||
|
-- schema change.
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS crawler_jobs_ready_idx;
|
||||||
|
|
||||||
|
CREATE INDEX crawler_jobs_ready_idx
|
||||||
|
ON crawler_jobs (scheduled_at)
|
||||||
|
WHERE state = 'pending';
|
||||||
@@ -1,11 +1,7 @@
|
|||||||
FROM node:22-alpine AS builder
|
FROM node:22-alpine AS builder
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY package.json package-lock.json* ./
|
COPY package.json package-lock.json* ./
|
||||||
# `npm ci` installs the locked versions exactly; `npm install` would
|
RUN npm install
|
||||||
# silently rewrite package-lock.json mid-build. CI (.gitea/workflows)
|
|
||||||
# also uses `npm ci`, so this keeps the image build deterministic and
|
|
||||||
# matches what the test job validated.
|
|
||||||
RUN npm ci
|
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
@@ -14,20 +10,8 @@ WORKDIR /app
|
|||||||
ENV NODE_ENV=production
|
ENV NODE_ENV=production
|
||||||
ENV HOST=0.0.0.0
|
ENV HOST=0.0.0.0
|
||||||
ENV PORT=3000
|
ENV PORT=3000
|
||||||
|
COPY --from=builder /app/build ./build
|
||||||
# node:22-alpine ships a `node` user (UID 1000); use it instead of
|
COPY --from=builder /app/node_modules ./node_modules
|
||||||
# running the SvelteKit server as root.
|
COPY --from=builder /app/package.json ./
|
||||||
COPY --from=builder --chown=node:node /app/build ./build
|
|
||||||
COPY --from=builder --chown=node:node /app/node_modules ./node_modules
|
|
||||||
COPY --from=builder --chown=node:node /app/package.json ./
|
|
||||||
|
|
||||||
USER node
|
|
||||||
EXPOSE 3000
|
EXPOSE 3000
|
||||||
|
|
||||||
# Alpine's busybox `wget` is the canonical lightweight HTTP probe.
|
|
||||||
# `--spider` doesn't follow redirects; `node build` serves a 200 on
|
|
||||||
# `/` for the homepage so this works without a dedicated /health.
|
|
||||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
|
|
||||||
CMD wget -q --spider http://localhost:3000/ || exit 1
|
|
||||||
|
|
||||||
CMD ["node", "build"]
|
CMD ["node", "build"]
|
||||||
|
|||||||
Reference in New Issue
Block a user