Compare commits

...

3 Commits

Author SHA1 Message Date
074ab25f8c ci(test-backend): run on ubuntu-latest + rustup instead of rust:1-slim
All checks were successful
deploy / test-backend (pull_request) Successful in 18m36s
deploy / test-frontend (pull_request) Successful in 9m42s
deploy / build-and-push (pull_request) Has been skipped
deploy / deploy (pull_request) Has been skipped
act_runner runs JS actions (checkout/cache) with node inside the job
container; rust:1-slim ships no node, so every JS action failed with
exit 127 ("node: not found"). Drop the container, run on the
node-equipped ubuntu-latest image, install Rust via rustup. The postgres
service is still reachable by name (act_runner containerises the job).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-31 18:18:19 +02:00
6b49a47d0a feat(crawler): system Chromium via CRAWLER_CHROMIUM_BINARY (0.45.0) (#2)
Some checks failed
deploy / test-backend (push) Failing after 7s
deploy / test-frontend (push) Failing after 33s
deploy / build-and-push (push) Has been skipped
deploy / deploy (push) Has been skipped
2026-05-31 15:47:47 +00:00
e851355f28 Merge pull request 'ci: no-SSH local deploy + Dockerfile build fixes' (#1) from fix/ci-deploy-pipeline into main
Some checks failed
deploy / test-backend (push) Failing after 7s
deploy / test-frontend (push) Failing after 30s
deploy / build-and-push (push) Has been skipped
deploy / deploy (push) Has been skipped
2026-05-31 15:43:54 +00:00
9 changed files with 133 additions and 32 deletions

View File

@@ -74,6 +74,14 @@ CRAWLER_DOWNLOAD_ALLOWLIST=
CRAWLER_ALLOW_ANY_HOST=false CRAWLER_ALLOW_ANY_HOST=false
# Hard cap on a single image body. Default 32 MiB. # Hard cap on a single image body. Default 32 MiB.
CRAWLER_MAX_IMAGE_BYTES=33554432 CRAWLER_MAX_IMAGE_BYTES=33554432
# Path to a system Chromium binary. When set, the crawler skips the
# bundled-fetcher download. Required on platforms without a usable
# upstream Chromium build (notably Linux_arm64 / Raspberry Pi). On
# Debian: /usr/bin/chromium-headless-shell or /usr/bin/chromium. On
# Ubuntu the package is chromium-browser (different path). Pair with
# `docker compose build --build-arg INSTALL_CHROMIUM=true backend` so
# the image actually contains the binary.
CRAWLER_CHROMIUM_BINARY=
# ----- Frontend ----- # ----- Frontend -----
# The frontend container runs SvelteKit's Node adapter on :3000 and # The frontend container runs SvelteKit's Node adapter on :3000 and

View File

@@ -10,8 +10,6 @@ on:
jobs: jobs:
test-backend: test-backend:
runs-on: ubuntu-latest runs-on: ubuntu-latest
container:
image: rust:1-slim
services: services:
postgres: postgres:
image: postgres:16-alpine image: postgres:16-alpine
@@ -28,10 +26,18 @@ jobs:
DATABASE_URL: postgres://mangalord:mangalord@postgres:5432/mangalord DATABASE_URL: postgres://mangalord:mangalord@postgres:5432/mangalord
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Install build deps # ubuntu-latest has node (so JS actions like checkout/cache run) but no
# Rust. We intentionally avoid `container: rust:1-slim` because act_runner
# runs JS actions with node *inside* the job container, and the slim Rust
# image ships no node (checkout would fail with exit 127).
- name: Install Rust + build deps
run: | run: |
apt-get update set -eu
apt-get install -y --no-install-recommends pkg-config libssl-dev ca-certificates SUDO=""; [ "$(id -u)" = "0" ] || SUDO="sudo"
$SUDO apt-get update
$SUDO apt-get install -y --no-install-recommends pkg-config libssl-dev ca-certificates curl
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --default-toolchain stable
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
- name: Cache cargo registry and target - name: Cache cargo registry and target
uses: actions/cache@v4 uses: actions/cache@v4
with: with:

2
backend/Cargo.lock generated
View File

@@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]] [[package]]
name = "mangalord" name = "mangalord"
version = "0.44.0" version = "0.45.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"argon2", "argon2",

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "mangalord" name = "mangalord"
version = "0.44.0" version = "0.45.0"
edition = "2021" edition = "2021"
default-run = "mangalord" default-run = "mangalord"

View File

@@ -25,8 +25,23 @@ FROM debian:trixie-slim
# binary ("GLIBC_2.39 not found"). Keep these two in lockstep on bumps. # binary ("GLIBC_2.39 not found"). Keep these two in lockstep on bumps.
# `curl` is for the container HEALTHCHECK; `ca-certificates` is for # `curl` is for the container HEALTHCHECK; `ca-certificates` is for
# outbound HTTPS (crawler covers/pages). # outbound HTTPS (crawler covers/pages).
#
# INSTALL_CHROMIUM is an opt-in for deployments that can't use the
# chromiumoxide fetcher path (notably Linux_arm64 / Raspberry Pi, where
# the upstream snapshot bucket has no usable build). When `true`, adds
# Debian's apt-packaged headless chromium plus a baseline font set —
# pair with `CRAWLER_CHROMIUM_BINARY=/usr/bin/chromium-headless-shell`
# at runtime so the launcher uses it. Default `false` keeps cloud/x86
# images slim.
#
# Build the Pi image with:
# docker compose build --build-arg INSTALL_CHROMIUM=true backend
ARG INSTALL_CHROMIUM=false
RUN apt-get update \ RUN apt-get update \
&& apt-get install -y --no-install-recommends ca-certificates curl \ && apt-get install -y --no-install-recommends ca-certificates curl \
&& if [ "$INSTALL_CHROMIUM" = "true" ]; then \
apt-get install -y --no-install-recommends chromium-headless-shell fonts-liberation; \
fi \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Non-root runtime user. The API binary doesn't need any root # Non-root runtime user. The API binary doesn't need any root

View File

@@ -1,10 +1,17 @@
//! Chromium launcher and lifecycle. //! Chromium launcher and lifecycle.
//! //!
//! Uses `chromiumoxide`'s `fetcher` feature so we don't depend on a //! By default uses `chromiumoxide`'s `fetcher` feature — first call
//! system Chrome install — first call downloads a known-good revision //! downloads a known-good revision into a cache dir and reuses it
//! into a cache dir and reuses it forever after. `BrowserMode` toggles //! forever after. Set `CRAWLER_CHROMIUM_BINARY` to skip the fetcher
//! headed vs headless; the headed path needs a display (real `$DISPLAY` //! and use a system-installed Chromium instead; required on platforms
//! or `xvfb-run`). //! where the upstream snapshot bucket has no usable build (notably
//! `Linux_arm64` / Raspberry Pi). Debian's package is at
//! `/usr/bin/chromium` or `/usr/bin/chromium-headless-shell`; Ubuntu
//! ships it as `chromium-browser` at a different path — don't paste
//! the wrong one.
//!
//! `BrowserMode` toggles headed vs headless; the headed path needs a
//! display (real `$DISPLAY` or `xvfb-run`).
//! //!
//! Extra Chromium command-line flags can be supplied through //! Extra Chromium command-line flags can be supplied through
//! [`LaunchOptions::extra_args`] in code, or via the //! [`LaunchOptions::extra_args`] in code, or via the
@@ -165,31 +172,41 @@ where
} }
} }
/// Launches Chromium. Downloads it on first run via the `fetcher` /// Launches Chromium. If `CRAWLER_CHROMIUM_BINARY` is set, uses that
/// feature; subsequent runs hit the cache. The cache dir is /// path directly. Otherwise downloads via the `fetcher` feature on
/// first run and hits the cache after that. The fetcher cache dir is
/// `$CRAWLER_CHROMIUM_DIR` if set, else `$HOME/.cache/mangalord/chromium`, /// `$CRAWLER_CHROMIUM_DIR` if set, else `$HOME/.cache/mangalord/chromium`,
/// else `./.chromium-cache` as a last-resort repo-local fallback. /// else `./.chromium-cache` as a last-resort repo-local fallback.
pub async fn launch(options: LaunchOptions) -> anyhow::Result<Handle> { pub async fn launch(options: LaunchOptions) -> anyhow::Result<Handle> {
let cache = cache_dir()?; let executable = match system_chromium_path_from_env() {
tokio::fs::create_dir_all(&cache) Some(path) => {
.await tracing::info!(path = %path.display(), "using system chromium (CRAWLER_CHROMIUM_BINARY)");
.with_context(|| format!("create cache dir {}", cache.display()))?; path
}
None => {
let cache = cache_dir()?;
tokio::fs::create_dir_all(&cache)
.await
.with_context(|| format!("create cache dir {}", cache.display()))?;
let fetcher = BrowserFetcher::new( let fetcher = BrowserFetcher::new(
BrowserFetcherOptions::builder() BrowserFetcherOptions::builder()
.with_path(&cache) .with_path(&cache)
.build() .build()
.map_err(|e| anyhow::anyhow!("fetcher options: {e}"))?, .map_err(|e| anyhow::anyhow!("fetcher options: {e}"))?,
); );
tracing::info!(path = %cache.display(), "ensuring chromium revision is present"); tracing::info!(path = %cache.display(), "ensuring chromium revision is present");
let info = fetcher let info = fetcher
.fetch() .fetch()
.await .await
.context("download chromium via fetcher")?; .context("download chromium via fetcher")?;
tracing::info!(executable = %info.executable_path.display(), "chromium ready"); tracing::info!(executable = %info.executable_path.display(), "chromium ready");
info.executable_path
}
};
let mut builder = BrowserConfig::builder() let mut builder = BrowserConfig::builder()
.chrome_executable(info.executable_path) .chrome_executable(executable)
// Linux containers / CI commonly lack the user namespaces // Linux containers / CI commonly lack the user namespaces
// Chromium's sandbox wants. Disable it; the crawler runs in its // Chromium's sandbox wants. Disable it; the crawler runs in its
// own container anyway. // own container anyway.
@@ -246,6 +263,24 @@ fn cache_dir() -> anyhow::Result<PathBuf> {
Ok(PathBuf::from("./.chromium-cache")) Ok(PathBuf::from("./.chromium-cache"))
} }
/// Reads `CRAWLER_CHROMIUM_BINARY` and delegates to the pure helper.
/// Thin wrapper kept separate so the decision logic can be unit-tested
/// without mutating the process environment.
fn system_chromium_path_from_env() -> Option<PathBuf> {
system_chromium_path_from_value(std::env::var_os("CRAWLER_CHROMIUM_BINARY").as_deref())
}
/// Returns `Some(path)` only when the value is set and non-empty. An
/// exported-but-blank var (common in compose `${VAR:-}` patterns when
/// the operator didn't fill it in) must behave like "unset" — otherwise
/// we'd hand chromiumoxide an empty path and fail launch in a confusing
/// way.
pub(crate) fn system_chromium_path_from_value(
raw: Option<&std::ffi::OsStr>,
) -> Option<PathBuf> {
raw.filter(|v| !v.is_empty()).map(PathBuf::from)
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@@ -273,6 +308,33 @@ mod tests {
assert!(parse_args(" \t\n").is_empty()); assert!(parse_args(" \t\n").is_empty());
} }
#[test]
fn system_chromium_path_returns_some_when_value_set() {
let raw = std::ffi::OsString::from("/usr/bin/chromium-headless-shell");
assert_eq!(
system_chromium_path_from_value(Some(raw.as_os_str())),
Some(PathBuf::from("/usr/bin/chromium-headless-shell"))
);
}
#[test]
fn system_chromium_path_returns_none_when_unset() {
assert_eq!(system_chromium_path_from_value(None), None);
}
#[test]
fn system_chromium_path_treats_empty_as_unset() {
// Compose's `${VAR:-}` substitution produces an exported-but-empty
// env var when the operator left it blank. Treat it as unset so
// the launcher falls back to the fetcher path instead of handing
// chromiumoxide an empty path.
let raw = std::ffi::OsString::from("");
assert_eq!(
system_chromium_path_from_value(Some(raw.as_os_str())),
None
);
}
#[test] #[test]
fn default_launch_options_are_headless() { fn default_launch_options_are_headless() {
// Headless is the production-safe default — no display required, // Headless is the production-safe default — no display required,

View File

@@ -10,6 +10,11 @@
//! //!
//! Override the cache location with `CRAWLER_CHROMIUM_DIR=/some/path` if //! Override the cache location with `CRAWLER_CHROMIUM_DIR=/some/path` if
//! `$HOME/.cache/mangalord/chromium` isn't writable. //! `$HOME/.cache/mangalord/chromium` isn't writable.
//!
//! Set `CRAWLER_CHROMIUM_BINARY=/usr/bin/chromium-headless-shell` (or
//! another system chromium path) to exercise the system-chromium
//! launch path instead of the fetcher download — this is the path the
//! Raspberry Pi deployment takes.
use mangalord::crawler::browser::{self, LaunchOptions}; use mangalord::crawler::browser::{self, LaunchOptions};

View File

@@ -39,6 +39,11 @@ services:
# Upload limits. # Upload limits.
MAX_REQUEST_BYTES: ${MAX_REQUEST_BYTES:-209715200} MAX_REQUEST_BYTES: ${MAX_REQUEST_BYTES:-209715200}
MAX_FILE_BYTES: ${MAX_FILE_BYTES:-20971520} MAX_FILE_BYTES: ${MAX_FILE_BYTES:-20971520}
# System-chromium override for the crawler. Leave blank to use the
# bundled fetcher; set to e.g. /usr/bin/chromium-headless-shell on
# arm64 deployments. Pair with `--build-arg INSTALL_CHROMIUM=true`
# so the image actually contains the binary.
CRAWLER_CHROMIUM_BINARY: ${CRAWLER_CHROMIUM_BINARY:-}
volumes: volumes:
- storage-data:/var/lib/mangalord/storage - storage-data:/var/lib/mangalord/storage
# No host port mapping in the default setup — the frontend proxies # No host port mapping in the default setup — the frontend proxies

View File

@@ -1,6 +1,6 @@
{ {
"name": "mangalord-frontend", "name": "mangalord-frontend",
"version": "0.44.0", "version": "0.45.0",
"private": true, "private": true,
"type": "module", "type": "module",
"scripts": { "scripts": {