feat: CRAWLER_KEEP_BROWSER_OPEN waits for Ctrl+C in headed mode (0.26.0)
Debug aid: when set in headed mode, the crawler blocks on Ctrl+C at every shutdown point (early auth bails + normal completion) instead of closing the browser immediately. Operator can inspect DOM, cookies, and network state in the visible Chromium window before exit. Ignored in headless (no window to inspect) — logged as a warning if set under headless so the operator doesn't sit waiting. chromiumoxide's `Browser` is `kill_on_drop`, so the close-or-wait helper must await Ctrl+C *before* the Handle is dropped — otherwise the Chromium child gets killed out from under the operator. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2
backend/Cargo.lock
generated
2
backend/Cargo.lock
generated
@@ -1448,7 +1448,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
||||
|
||||
[[package]]
|
||||
name = "mangalord"
|
||||
version = "0.25.0"
|
||||
version = "0.26.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"argon2",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "mangalord"
|
||||
version = "0.25.0"
|
||||
version = "0.26.0"
|
||||
edition = "2021"
|
||||
default-run = "mangalord"
|
||||
|
||||
|
||||
@@ -48,6 +48,11 @@
|
||||
//! Chromium (`--proxy-server`) and `reqwest::Proxy::all`. Supports
|
||||
//! `http://`, `https://`, and `socks5://` (with optional user:pass).
|
||||
//! Example: `socks5://user:pass@host:1080`. Unset → direct.
|
||||
//! - **Keep browser open**: `CRAWLER_KEEP_BROWSER_OPEN=1` — when
|
||||
//! running headed, block on Ctrl+C at every shutdown point so the
|
||||
//! operator can inspect DOM state, cookies, or network calls in the
|
||||
//! visible Chromium window before exit. Ignored in headless mode
|
||||
//! (no window to inspect).
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
@@ -110,6 +115,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
let proxy_url = std::env::var("CRAWLER_PROXY")
|
||||
.ok()
|
||||
.filter(|s| !s.trim().is_empty());
|
||||
let keep_browser_open = env_bool("CRAWLER_KEEP_BROWSER_OPEN", false);
|
||||
|
||||
let db = PgPoolOptions::new()
|
||||
.max_connections(5)
|
||||
@@ -149,6 +155,19 @@ async fn main() -> anyhow::Result<()> {
|
||||
if let Some(proxy) = &proxy_url {
|
||||
options.extra_args.push(format!("--proxy-server={proxy}"));
|
||||
}
|
||||
// Keep-open is a debug aid; only meaningful when there's a window
|
||||
// to inspect. Warn loudly if the operator set it under headless so
|
||||
// they don't sit waiting for a Ctrl+C that won't show anything.
|
||||
let keep_open = match (keep_browser_open, options.mode) {
|
||||
(true, browser::BrowserMode::Headed) => true,
|
||||
(true, browser::BrowserMode::Headless) => {
|
||||
tracing::warn!(
|
||||
"CRAWLER_KEEP_BROWSER_OPEN ignored in headless mode (no window to inspect)"
|
||||
);
|
||||
false
|
||||
}
|
||||
_ => false,
|
||||
};
|
||||
tracing::info!(
|
||||
?options,
|
||||
%start_url,
|
||||
@@ -164,6 +183,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
cookie_domain = ?cookie_domain,
|
||||
user_agent = ?user_agent,
|
||||
proxy = ?proxy_url,
|
||||
keep_open,
|
||||
storage_dir = %storage_dir.display(),
|
||||
"starting crawler"
|
||||
);
|
||||
@@ -177,13 +197,13 @@ async fn main() -> anyhow::Result<()> {
|
||||
// instead of 30 min into a backfill.
|
||||
let session_ready = if let (Some(sid), Some(domain)) = (&phpsessid, &cookie_domain) {
|
||||
if let Err(e) = session::inject_phpsessid(handle.browser(), sid, domain).await {
|
||||
handle.close().await.ok();
|
||||
close_or_wait(handle, keep_open).await;
|
||||
return Err(e);
|
||||
}
|
||||
match session::verify_session(handle.browser(), &start_url).await {
|
||||
Ok(()) => true,
|
||||
Err(e) => {
|
||||
handle.close().await.ok();
|
||||
close_or_wait(handle, keep_open).await;
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
@@ -208,10 +228,27 @@ async fn main() -> anyhow::Result<()> {
|
||||
force_refetch_chapters,
|
||||
)
|
||||
.await;
|
||||
handle.close().await.ok();
|
||||
close_or_wait(handle, keep_open).await;
|
||||
result
|
||||
}
|
||||
|
||||
/// Either close the browser immediately or wait for Ctrl+C first.
|
||||
/// `keep_open=true` is only ever passed when the browser is headed, so
|
||||
/// the operator has a real window to poke at. Browser is dropped at
|
||||
/// the end of this fn in both cases — chromiumoxide's `Browser` is
|
||||
/// `kill_on_drop`, so we must wait for the Ctrl+C *before* the drop
|
||||
/// or the Chromium child gets killed out from under the operator.
|
||||
async fn close_or_wait(handle: browser::Handle, keep_open: bool) {
|
||||
if keep_open {
|
||||
tracing::info!(
|
||||
"crawler finished; browser kept open. Press Ctrl+C to close and exit."
|
||||
);
|
||||
let _ = tokio::signal::ctrl_c().await;
|
||||
tracing::info!("Ctrl+C received; closing browser");
|
||||
}
|
||||
let _ = handle.close().await;
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn run(
|
||||
browser: &chromiumoxide::Browser,
|
||||
|
||||
Reference in New Issue
Block a user