From 1eebb90e25d9a0948929179d254b75f9877d71ae Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Sun, 31 May 2026 14:47:36 +0200 Subject: [PATCH] fix(crawler): unhang shutdown on lingering Arc, silence WS noise (0.43.1) - Handle::close aborts its chromiumoxide driver task when another Arc outlives the call, so shutdown returns instead of hanging on a stream that never terminates. Generic close_or_abort helper with regression tests covering both Arc paths. - daemon.shutdown() is wrapped in a 5s timeout in main as defense in depth. - Default RUST_LOG silences chromiumoxide::conn / chromiumoxide::handler WS-deserialize ERROR spam. Co-Authored-By: Claude Opus 4.7 (1M context) --- .env.example | 2 +- backend/Cargo.lock | 2 +- backend/Cargo.toml | 2 +- backend/src/crawler/browser.rs | 107 +++++++++++++++++++++++++++------ backend/src/main.rs | 24 +++++++- frontend/package.json | 2 +- 6 files changed, 114 insertions(+), 25 deletions(-) diff --git a/.env.example b/.env.example index 6d0ca9e..b7da316 100644 --- a/.env.example +++ b/.env.example @@ -24,7 +24,7 @@ POSTGRES_DB=mangalord DATABASE_URL=postgres://mangalord:mangalord@postgres:5432/mangalord BIND_ADDRESS=0.0.0.0:8080 STORAGE_DIR=/var/lib/mangalord/storage -RUST_LOG=info,mangalord=debug +RUST_LOG=info,mangalord=debug,chromiumoxide::conn=off,chromiumoxide::handler=off # ----- Auth / cookies ----- # COOKIE_SECURE controls whether the `Secure` flag is set on the session diff --git a/backend/Cargo.lock b/backend/Cargo.lock index b8174fb..caf45c0 100644 --- a/backend/Cargo.lock +++ b/backend/Cargo.lock @@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" [[package]] name = "mangalord" -version = "0.43.0" +version = "0.43.1" dependencies = [ "anyhow", "argon2", diff --git a/backend/Cargo.toml b/backend/Cargo.toml index ed3f95f..152dd80 100644 --- a/backend/Cargo.toml +++ b/backend/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mangalord" -version = "0.43.0" +version = "0.43.1" edition = "2021" default-run = "mangalord" diff --git a/backend/src/crawler/browser.rs b/backend/src/crawler/browser.rs index 8974bc5..08ea598 100644 --- a/backend/src/crawler/browser.rs +++ b/backend/src/crawler/browser.rs @@ -121,29 +121,50 @@ impl Handle { } /// Closes the browser and awaits the driver task. If other Arcs to - /// the browser are still alive we fall back to drop-kills-Chromium - /// semantics and just join the driver — this is the rare case where - /// shutdown raced an outstanding worker; the OS-level kill is the - /// safety net. + /// the browser are still alive we can't issue a clean CDP `close`, + /// so we abort the driver task instead — otherwise `handler.next()` + /// keeps polling forever and `Handle::close` hangs (chromiumoxide's + /// handler stream doesn't end on its own when the underlying WS + /// dies). Chromium itself is reaped by kill-on-drop once the last + /// `Arc` is dropped. pub async fn close(self) -> anyhow::Result<()> { - match Arc::try_unwrap(self.browser) { - Ok(mut owned) => { - let _ = owned.close().await; - let _ = owned.wait().await; - } - Err(shared) => { - tracing::warn!( - strong_count = Arc::strong_count(&shared), - "Handle::close while Arc still shared — relying on kill-on-drop" - ); - drop(shared); - } - } - let _ = self.driver.await; + close_or_abort(self.browser, self.driver, |mut owned| async move { + let _ = owned.close().await; + let _ = owned.wait().await; + }) + .await; Ok(()) } } +/// Shutdown core for [`Handle::close`], extracted so it can be unit- +/// tested without launching real Chromium. When `arc` is uniquely owned, +/// `on_owned` runs against the owned value and the driver is awaited +/// normally. When other Arc holders exist, the driver is aborted before +/// awaiting it so shutdown returns promptly. +async fn close_or_abort(arc: Arc, driver: JoinHandle<()>, on_owned: F) +where + T: Send + 'static, + F: FnOnce(T) -> Fut + Send, + Fut: std::future::Future + Send, +{ + match Arc::try_unwrap(arc) { + Ok(owned) => { + on_owned(owned).await; + let _ = driver.await; + } + Err(shared) => { + tracing::warn!( + strong_count = Arc::strong_count(&shared), + "Handle::close while Arc still shared — aborting driver, relying on kill-on-drop" + ); + drop(shared); + driver.abort(); + let _ = driver.await; + } + } +} + /// Launches Chromium. Downloads it on first run via the `fetcher` /// feature; subsequent runs hit the cache. The cache dir is /// `$CRAWLER_CHROMIUM_DIR` if set, else `$HOME/.cache/mangalord/chromium`, @@ -261,4 +282,54 @@ mod tests { assert_eq!(LaunchOptions::headless().mode, BrowserMode::Headless); assert_eq!(LaunchOptions::headed().mode, BrowserMode::Headed); } + + // Regression: if another Arc outlives `Handle::close`, the + // old code awaited the driver task forever because the chromiumoxide + // handler stream doesn't return None on its own. Aborting the driver + // unblocks shutdown even when kill-on-drop can't fire yet. + #[tokio::test] + async fn close_or_abort_returns_when_arc_is_shared() { + use std::sync::atomic::{AtomicBool, Ordering}; + use std::time::Duration; + + let arc = Arc::new(()); + let _keepalive = Arc::clone(&arc); // forces try_unwrap to fail + let driver = tokio::spawn(std::future::pending::<()>()); + let on_owned_ran = Arc::new(AtomicBool::new(false)); + + let flag = Arc::clone(&on_owned_ran); + let fut = close_or_abort(arc, driver, move |_| { + let flag = Arc::clone(&flag); + async move { flag.store(true, Ordering::Release) } + }); + + tokio::time::timeout(Duration::from_secs(2), fut) + .await + .expect("close_or_abort must not hang when driver is pending and Arc is shared"); + assert!( + !on_owned_ran.load(Ordering::Acquire), + "on_owned must not run when the Arc is still shared" + ); + } + + #[tokio::test] + async fn close_or_abort_runs_on_owned_when_arc_is_unique() { + use std::sync::atomic::{AtomicBool, Ordering}; + + let arc = Arc::new(()); + let driver = tokio::spawn(async {}); // completes immediately + let on_owned_ran = Arc::new(AtomicBool::new(false)); + + let flag = Arc::clone(&on_owned_ran); + close_or_abort(arc, driver, move |_| { + let flag = Arc::clone(&flag); + async move { flag.store(true, Ordering::Release) } + }) + .await; + + assert!( + on_owned_ran.load(Ordering::Acquire), + "on_owned must run when the Arc is unique" + ); + } } diff --git a/backend/src/main.rs b/backend/src/main.rs index b760157..19464dd 100644 --- a/backend/src/main.rs +++ b/backend/src/main.rs @@ -1,12 +1,21 @@ use std::net::SocketAddr; +use std::time::Duration; use tracing_subscriber::EnvFilter; +/// Upper bound on how long we're willing to wait for the crawler daemon +/// to drain before letting `main` return. Without it a wedged background +/// task (e.g. a chromiumoxide handler stuck on a dead WS) blocks the +/// process from exiting after Ctrl-C / SIGTERM. +const CRAWLER_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(5); + #[tokio::main] async fn main() -> anyhow::Result<()> { dotenvy::dotenv().ok(); tracing_subscriber::fmt() .with_env_filter( - EnvFilter::try_from_default_env().unwrap_or_else(|_| "info,mangalord=debug".into()), + EnvFilter::try_from_default_env().unwrap_or_else(|_| { + "info,mangalord=debug,chromiumoxide::conn=off,chromiumoxide::handler=off".into() + }), ) .init(); @@ -21,9 +30,18 @@ async fn main() -> anyhow::Result<()> { .await?; // Drain background tasks (crawler daemon) before exiting so Chromium - // gets a clean shutdown rather than relying on kill-on-drop. + // gets a clean shutdown rather than relying on kill-on-drop. Bounded + // by a timeout so a wedged shutdown path can't trap the process. if let Some(d) = daemon { - d.shutdown().await; + if tokio::time::timeout(CRAWLER_SHUTDOWN_TIMEOUT, d.shutdown()) + .await + .is_err() + { + tracing::warn!( + timeout_s = CRAWLER_SHUTDOWN_TIMEOUT.as_secs(), + "crawler daemon shutdown exceeded timeout; abandoning" + ); + } } Ok(()) } diff --git a/frontend/package.json b/frontend/package.json index 5db3b50..9019161 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "mangalord-frontend", - "version": "0.43.0", + "version": "0.43.1", "private": true, "type": "module", "scripts": {