feat: CRAWLER_KEEP_BROWSER_OPEN waits for Ctrl+C in headed mode (0.26.0)
Debug aid: when set in headed mode, the crawler blocks on Ctrl+C at every shutdown point (early auth bails + normal completion) instead of closing the browser immediately. Operator can inspect DOM, cookies, and network state in the visible Chromium window before exit. Ignored in headless (no window to inspect) — logged as a warning if set under headless so the operator doesn't sit waiting. chromiumoxide's `Browser` is `kill_on_drop`, so the close-or-wait helper must await Ctrl+C *before* the Handle is dropped — otherwise the Chromium child gets killed out from under the operator. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2
backend/Cargo.lock
generated
2
backend/Cargo.lock
generated
@@ -1448,7 +1448,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mangalord"
|
name = "mangalord"
|
||||||
version = "0.25.0"
|
version = "0.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"argon2",
|
"argon2",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "mangalord"
|
name = "mangalord"
|
||||||
version = "0.25.0"
|
version = "0.26.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
default-run = "mangalord"
|
default-run = "mangalord"
|
||||||
|
|
||||||
|
|||||||
@@ -48,6 +48,11 @@
|
|||||||
//! Chromium (`--proxy-server`) and `reqwest::Proxy::all`. Supports
|
//! Chromium (`--proxy-server`) and `reqwest::Proxy::all`. Supports
|
||||||
//! `http://`, `https://`, and `socks5://` (with optional user:pass).
|
//! `http://`, `https://`, and `socks5://` (with optional user:pass).
|
||||||
//! Example: `socks5://user:pass@host:1080`. Unset → direct.
|
//! Example: `socks5://user:pass@host:1080`. Unset → direct.
|
||||||
|
//! - **Keep browser open**: `CRAWLER_KEEP_BROWSER_OPEN=1` — when
|
||||||
|
//! running headed, block on Ctrl+C at every shutdown point so the
|
||||||
|
//! operator can inspect DOM state, cookies, or network calls in the
|
||||||
|
//! visible Chromium window before exit. Ignored in headless mode
|
||||||
|
//! (no window to inspect).
|
||||||
|
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@@ -110,6 +115,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
let proxy_url = std::env::var("CRAWLER_PROXY")
|
let proxy_url = std::env::var("CRAWLER_PROXY")
|
||||||
.ok()
|
.ok()
|
||||||
.filter(|s| !s.trim().is_empty());
|
.filter(|s| !s.trim().is_empty());
|
||||||
|
let keep_browser_open = env_bool("CRAWLER_KEEP_BROWSER_OPEN", false);
|
||||||
|
|
||||||
let db = PgPoolOptions::new()
|
let db = PgPoolOptions::new()
|
||||||
.max_connections(5)
|
.max_connections(5)
|
||||||
@@ -149,6 +155,19 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
if let Some(proxy) = &proxy_url {
|
if let Some(proxy) = &proxy_url {
|
||||||
options.extra_args.push(format!("--proxy-server={proxy}"));
|
options.extra_args.push(format!("--proxy-server={proxy}"));
|
||||||
}
|
}
|
||||||
|
// Keep-open is a debug aid; only meaningful when there's a window
|
||||||
|
// to inspect. Warn loudly if the operator set it under headless so
|
||||||
|
// they don't sit waiting for a Ctrl+C that won't show anything.
|
||||||
|
let keep_open = match (keep_browser_open, options.mode) {
|
||||||
|
(true, browser::BrowserMode::Headed) => true,
|
||||||
|
(true, browser::BrowserMode::Headless) => {
|
||||||
|
tracing::warn!(
|
||||||
|
"CRAWLER_KEEP_BROWSER_OPEN ignored in headless mode (no window to inspect)"
|
||||||
|
);
|
||||||
|
false
|
||||||
|
}
|
||||||
|
_ => false,
|
||||||
|
};
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
?options,
|
?options,
|
||||||
%start_url,
|
%start_url,
|
||||||
@@ -164,6 +183,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
cookie_domain = ?cookie_domain,
|
cookie_domain = ?cookie_domain,
|
||||||
user_agent = ?user_agent,
|
user_agent = ?user_agent,
|
||||||
proxy = ?proxy_url,
|
proxy = ?proxy_url,
|
||||||
|
keep_open,
|
||||||
storage_dir = %storage_dir.display(),
|
storage_dir = %storage_dir.display(),
|
||||||
"starting crawler"
|
"starting crawler"
|
||||||
);
|
);
|
||||||
@@ -177,13 +197,13 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
// instead of 30 min into a backfill.
|
// instead of 30 min into a backfill.
|
||||||
let session_ready = if let (Some(sid), Some(domain)) = (&phpsessid, &cookie_domain) {
|
let session_ready = if let (Some(sid), Some(domain)) = (&phpsessid, &cookie_domain) {
|
||||||
if let Err(e) = session::inject_phpsessid(handle.browser(), sid, domain).await {
|
if let Err(e) = session::inject_phpsessid(handle.browser(), sid, domain).await {
|
||||||
handle.close().await.ok();
|
close_or_wait(handle, keep_open).await;
|
||||||
return Err(e);
|
return Err(e);
|
||||||
}
|
}
|
||||||
match session::verify_session(handle.browser(), &start_url).await {
|
match session::verify_session(handle.browser(), &start_url).await {
|
||||||
Ok(()) => true,
|
Ok(()) => true,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
handle.close().await.ok();
|
close_or_wait(handle, keep_open).await;
|
||||||
return Err(e);
|
return Err(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -208,10 +228,27 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
force_refetch_chapters,
|
force_refetch_chapters,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
handle.close().await.ok();
|
close_or_wait(handle, keep_open).await;
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Either close the browser immediately or wait for Ctrl+C first.
|
||||||
|
/// `keep_open=true` is only ever passed when the browser is headed, so
|
||||||
|
/// the operator has a real window to poke at. Browser is dropped at
|
||||||
|
/// the end of this fn in both cases — chromiumoxide's `Browser` is
|
||||||
|
/// `kill_on_drop`, so we must wait for the Ctrl+C *before* the drop
|
||||||
|
/// or the Chromium child gets killed out from under the operator.
|
||||||
|
async fn close_or_wait(handle: browser::Handle, keep_open: bool) {
|
||||||
|
if keep_open {
|
||||||
|
tracing::info!(
|
||||||
|
"crawler finished; browser kept open. Press Ctrl+C to close and exit."
|
||||||
|
);
|
||||||
|
let _ = tokio::signal::ctrl_c().await;
|
||||||
|
tracing::info!("Ctrl+C received; closing browser");
|
||||||
|
}
|
||||||
|
let _ = handle.close().await;
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
async fn run(
|
async fn run(
|
||||||
browser: &chromiumoxide::Browser,
|
browser: &chromiumoxide::Browser,
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "mangalord-frontend",
|
"name": "mangalord-frontend",
|
||||||
"version": "0.25.0",
|
"version": "0.26.0",
|
||||||
"private": true,
|
"private": true,
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
|||||||
Reference in New Issue
Block a user