feat: default crawler browser to headless (0.32.0)

LaunchOptions::from_env() and LaunchOptions::default() now return
BrowserMode::Headless. The in-process daemon (via CrawlerConfig::from_env)
and the standalone crawler binary both pick this up — no display
required for production runs, smaller resource footprint.

`Headed` stays as an explicit opt-in via CRAWLER_BROWSER_MODE=headed
for debugging or sites that fingerprint headless Chrome. New unit test
locks the default in place.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-27 20:27:05 +02:00
parent fa0a7da311
commit 51f42b03e9
4 changed files with 22 additions and 12 deletions

2
backend/Cargo.lock generated
View File

@@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "mangalord"
version = "0.31.0"
version = "0.32.0"
dependencies = [
"anyhow",
"argon2",

View File

@@ -1,6 +1,6 @@
[package]
name = "mangalord"
version = "0.31.0"
version = "0.32.0"
edition = "2021"
default-run = "mangalord"

View File

@@ -27,12 +27,12 @@ use tokio::task::JoinHandle;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum BrowserMode {
/// Real window. Needs `$DISPLAY` (or `xvfb-run` wrapping the
/// binary). This is the default the old Puppeteer crawler used and
/// the assumed mode for the target site until we prove headless
/// works against it.
/// binary). Opt-in via `CRAWLER_BROWSER_MODE=headed` — useful for
/// debugging a flow visually or for sites that fingerprint
/// headless Chrome. Not used in production.
Headed,
/// No window. Faster, lower resource use, but more likely to trip
/// fingerprinting on hostile sites.
/// No window. Faster, lower resource use, runs without a display.
/// This is the default for both `from_env()` and `Default`.
Headless,
}
@@ -65,13 +65,13 @@ impl LaunchOptions {
}
/// Reads `CRAWLER_BROWSER_MODE` (`headless`|`headed`, default
/// `headed`) and `CRAWLER_BROWSER_ARGS` (whitespace-separated
/// `headless`) and `CRAWLER_BROWSER_ARGS` (whitespace-separated
/// Chromium flags). Flags containing whitespace aren't supported
/// through the env var — use the programmatic API for those.
pub fn from_env() -> Self {
let mode = match std::env::var("CRAWLER_BROWSER_MODE").as_deref() {
Ok("headless") => BrowserMode::Headless,
_ => BrowserMode::Headed,
Ok("headed") => BrowserMode::Headed,
_ => BrowserMode::Headless,
};
let extra_args = std::env::var("CRAWLER_BROWSER_ARGS")
.map(|s| parse_args(&s))
@@ -82,7 +82,7 @@ impl LaunchOptions {
impl Default for LaunchOptions {
fn default() -> Self {
Self::headed()
Self::headless()
}
}
@@ -251,4 +251,14 @@ mod tests {
assert!(parse_args("").is_empty());
assert!(parse_args(" \t\n").is_empty());
}
#[test]
fn default_launch_options_are_headless() {
// Headless is the production-safe default — no display required,
// smaller resource footprint. `Headed` stays available as an
// opt-in for debugging via CRAWLER_BROWSER_MODE=headed.
assert_eq!(LaunchOptions::default().mode, BrowserMode::Headless);
assert_eq!(LaunchOptions::headless().mode, BrowserMode::Headless);
assert_eq!(LaunchOptions::headed().mode, BrowserMode::Headed);
}
}