diff --git a/backend/Cargo.lock b/backend/Cargo.lock index f3e5b80..5979f8d 100644 --- a/backend/Cargo.lock +++ b/backend/Cargo.lock @@ -1448,7 +1448,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" [[package]] name = "mangalord" -version = "0.25.0" +version = "0.26.0" dependencies = [ "anyhow", "argon2", diff --git a/backend/Cargo.toml b/backend/Cargo.toml index b44b7f8..de08c88 100644 --- a/backend/Cargo.toml +++ b/backend/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mangalord" -version = "0.25.0" +version = "0.26.0" edition = "2021" default-run = "mangalord" diff --git a/backend/src/bin/crawler.rs b/backend/src/bin/crawler.rs index d2510fe..88f95d8 100644 --- a/backend/src/bin/crawler.rs +++ b/backend/src/bin/crawler.rs @@ -48,6 +48,11 @@ //! Chromium (`--proxy-server`) and `reqwest::Proxy::all`. Supports //! `http://`, `https://`, and `socks5://` (with optional user:pass). //! Example: `socks5://user:pass@host:1080`. Unset → direct. +//! - **Keep browser open**: `CRAWLER_KEEP_BROWSER_OPEN=1` — when +//! running headed, block on Ctrl+C at every shutdown point so the +//! operator can inspect DOM state, cookies, or network calls in the +//! visible Chromium window before exit. Ignored in headless mode +//! (no window to inspect). use std::path::PathBuf; use std::sync::Arc; @@ -110,6 +115,7 @@ async fn main() -> anyhow::Result<()> { let proxy_url = std::env::var("CRAWLER_PROXY") .ok() .filter(|s| !s.trim().is_empty()); + let keep_browser_open = env_bool("CRAWLER_KEEP_BROWSER_OPEN", false); let db = PgPoolOptions::new() .max_connections(5) @@ -149,6 +155,19 @@ async fn main() -> anyhow::Result<()> { if let Some(proxy) = &proxy_url { options.extra_args.push(format!("--proxy-server={proxy}")); } + // Keep-open is a debug aid; only meaningful when there's a window + // to inspect. Warn loudly if the operator set it under headless so + // they don't sit waiting for a Ctrl+C that won't show anything. + let keep_open = match (keep_browser_open, options.mode) { + (true, browser::BrowserMode::Headed) => true, + (true, browser::BrowserMode::Headless) => { + tracing::warn!( + "CRAWLER_KEEP_BROWSER_OPEN ignored in headless mode (no window to inspect)" + ); + false + } + _ => false, + }; tracing::info!( ?options, %start_url, @@ -164,6 +183,7 @@ async fn main() -> anyhow::Result<()> { cookie_domain = ?cookie_domain, user_agent = ?user_agent, proxy = ?proxy_url, + keep_open, storage_dir = %storage_dir.display(), "starting crawler" ); @@ -177,13 +197,13 @@ async fn main() -> anyhow::Result<()> { // instead of 30 min into a backfill. let session_ready = if let (Some(sid), Some(domain)) = (&phpsessid, &cookie_domain) { if let Err(e) = session::inject_phpsessid(handle.browser(), sid, domain).await { - handle.close().await.ok(); + close_or_wait(handle, keep_open).await; return Err(e); } match session::verify_session(handle.browser(), &start_url).await { Ok(()) => true, Err(e) => { - handle.close().await.ok(); + close_or_wait(handle, keep_open).await; return Err(e); } } @@ -208,10 +228,27 @@ async fn main() -> anyhow::Result<()> { force_refetch_chapters, ) .await; - handle.close().await.ok(); + close_or_wait(handle, keep_open).await; result } +/// Either close the browser immediately or wait for Ctrl+C first. +/// `keep_open=true` is only ever passed when the browser is headed, so +/// the operator has a real window to poke at. Browser is dropped at +/// the end of this fn in both cases — chromiumoxide's `Browser` is +/// `kill_on_drop`, so we must wait for the Ctrl+C *before* the drop +/// or the Chromium child gets killed out from under the operator. +async fn close_or_wait(handle: browser::Handle, keep_open: bool) { + if keep_open { + tracing::info!( + "crawler finished; browser kept open. Press Ctrl+C to close and exit." + ); + let _ = tokio::signal::ctrl_c().await; + tracing::info!("Ctrl+C received; closing browser"); + } + let _ = handle.close().await; +} + #[allow(clippy::too_many_arguments)] async fn run( browser: &chromiumoxide::Browser, diff --git a/frontend/package.json b/frontend/package.json index 8867d66..ee2158a 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "mangalord-frontend", - "version": "0.25.0", + "version": "0.26.0", "private": true, "type": "module", "scripts": {