fix(crawler): unhang shutdown on lingering Arc<Browser>, silence WS noise (0.43.1)
- Handle::close aborts its chromiumoxide driver task when another Arc<Browser> outlives the call, so shutdown returns instead of hanging on a stream that never terminates. Generic close_or_abort helper with regression tests covering both Arc paths. - daemon.shutdown() is wrapped in a 5s timeout in main as defense in depth. - Default RUST_LOG silences chromiumoxide::conn / chromiumoxide::handler WS-deserialize ERROR spam. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2
backend/Cargo.lock
generated
2
backend/Cargo.lock
generated
@@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
||||
|
||||
[[package]]
|
||||
name = "mangalord"
|
||||
version = "0.43.0"
|
||||
version = "0.43.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"argon2",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "mangalord"
|
||||
version = "0.43.0"
|
||||
version = "0.43.1"
|
||||
edition = "2021"
|
||||
default-run = "mangalord"
|
||||
|
||||
|
||||
@@ -121,29 +121,50 @@ impl Handle {
|
||||
}
|
||||
|
||||
/// Closes the browser and awaits the driver task. If other Arcs to
|
||||
/// the browser are still alive we fall back to drop-kills-Chromium
|
||||
/// semantics and just join the driver — this is the rare case where
|
||||
/// shutdown raced an outstanding worker; the OS-level kill is the
|
||||
/// safety net.
|
||||
/// the browser are still alive we can't issue a clean CDP `close`,
|
||||
/// so we abort the driver task instead — otherwise `handler.next()`
|
||||
/// keeps polling forever and `Handle::close` hangs (chromiumoxide's
|
||||
/// handler stream doesn't end on its own when the underlying WS
|
||||
/// dies). Chromium itself is reaped by kill-on-drop once the last
|
||||
/// `Arc<Browser>` is dropped.
|
||||
pub async fn close(self) -> anyhow::Result<()> {
|
||||
match Arc::try_unwrap(self.browser) {
|
||||
Ok(mut owned) => {
|
||||
let _ = owned.close().await;
|
||||
let _ = owned.wait().await;
|
||||
}
|
||||
Err(shared) => {
|
||||
tracing::warn!(
|
||||
strong_count = Arc::strong_count(&shared),
|
||||
"Handle::close while Arc<Browser> still shared — relying on kill-on-drop"
|
||||
);
|
||||
drop(shared);
|
||||
}
|
||||
}
|
||||
let _ = self.driver.await;
|
||||
close_or_abort(self.browser, self.driver, |mut owned| async move {
|
||||
let _ = owned.close().await;
|
||||
let _ = owned.wait().await;
|
||||
})
|
||||
.await;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Shutdown core for [`Handle::close`], extracted so it can be unit-
|
||||
/// tested without launching real Chromium. When `arc` is uniquely owned,
|
||||
/// `on_owned` runs against the owned value and the driver is awaited
|
||||
/// normally. When other Arc holders exist, the driver is aborted before
|
||||
/// awaiting it so shutdown returns promptly.
|
||||
async fn close_or_abort<T, F, Fut>(arc: Arc<T>, driver: JoinHandle<()>, on_owned: F)
|
||||
where
|
||||
T: Send + 'static,
|
||||
F: FnOnce(T) -> Fut + Send,
|
||||
Fut: std::future::Future<Output = ()> + Send,
|
||||
{
|
||||
match Arc::try_unwrap(arc) {
|
||||
Ok(owned) => {
|
||||
on_owned(owned).await;
|
||||
let _ = driver.await;
|
||||
}
|
||||
Err(shared) => {
|
||||
tracing::warn!(
|
||||
strong_count = Arc::strong_count(&shared),
|
||||
"Handle::close while Arc still shared — aborting driver, relying on kill-on-drop"
|
||||
);
|
||||
drop(shared);
|
||||
driver.abort();
|
||||
let _ = driver.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Launches Chromium. Downloads it on first run via the `fetcher`
|
||||
/// feature; subsequent runs hit the cache. The cache dir is
|
||||
/// `$CRAWLER_CHROMIUM_DIR` if set, else `$HOME/.cache/mangalord/chromium`,
|
||||
@@ -261,4 +282,54 @@ mod tests {
|
||||
assert_eq!(LaunchOptions::headless().mode, BrowserMode::Headless);
|
||||
assert_eq!(LaunchOptions::headed().mode, BrowserMode::Headed);
|
||||
}
|
||||
|
||||
// Regression: if another Arc<Browser> outlives `Handle::close`, the
|
||||
// old code awaited the driver task forever because the chromiumoxide
|
||||
// handler stream doesn't return None on its own. Aborting the driver
|
||||
// unblocks shutdown even when kill-on-drop can't fire yet.
|
||||
#[tokio::test]
|
||||
async fn close_or_abort_returns_when_arc_is_shared() {
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::time::Duration;
|
||||
|
||||
let arc = Arc::new(());
|
||||
let _keepalive = Arc::clone(&arc); // forces try_unwrap to fail
|
||||
let driver = tokio::spawn(std::future::pending::<()>());
|
||||
let on_owned_ran = Arc::new(AtomicBool::new(false));
|
||||
|
||||
let flag = Arc::clone(&on_owned_ran);
|
||||
let fut = close_or_abort(arc, driver, move |_| {
|
||||
let flag = Arc::clone(&flag);
|
||||
async move { flag.store(true, Ordering::Release) }
|
||||
});
|
||||
|
||||
tokio::time::timeout(Duration::from_secs(2), fut)
|
||||
.await
|
||||
.expect("close_or_abort must not hang when driver is pending and Arc is shared");
|
||||
assert!(
|
||||
!on_owned_ran.load(Ordering::Acquire),
|
||||
"on_owned must not run when the Arc is still shared"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn close_or_abort_runs_on_owned_when_arc_is_unique() {
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
|
||||
let arc = Arc::new(());
|
||||
let driver = tokio::spawn(async {}); // completes immediately
|
||||
let on_owned_ran = Arc::new(AtomicBool::new(false));
|
||||
|
||||
let flag = Arc::clone(&on_owned_ran);
|
||||
close_or_abort(arc, driver, move |_| {
|
||||
let flag = Arc::clone(&flag);
|
||||
async move { flag.store(true, Ordering::Release) }
|
||||
})
|
||||
.await;
|
||||
|
||||
assert!(
|
||||
on_owned_ran.load(Ordering::Acquire),
|
||||
"on_owned must run when the Arc is unique"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,21 @@
|
||||
use std::net::SocketAddr;
|
||||
use std::time::Duration;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
/// Upper bound on how long we're willing to wait for the crawler daemon
|
||||
/// to drain before letting `main` return. Without it a wedged background
|
||||
/// task (e.g. a chromiumoxide handler stuck on a dead WS) blocks the
|
||||
/// process from exiting after Ctrl-C / SIGTERM.
|
||||
const CRAWLER_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
dotenvy::dotenv().ok();
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(
|
||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| "info,mangalord=debug".into()),
|
||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| {
|
||||
"info,mangalord=debug,chromiumoxide::conn=off,chromiumoxide::handler=off".into()
|
||||
}),
|
||||
)
|
||||
.init();
|
||||
|
||||
@@ -21,9 +30,18 @@ async fn main() -> anyhow::Result<()> {
|
||||
.await?;
|
||||
|
||||
// Drain background tasks (crawler daemon) before exiting so Chromium
|
||||
// gets a clean shutdown rather than relying on kill-on-drop.
|
||||
// gets a clean shutdown rather than relying on kill-on-drop. Bounded
|
||||
// by a timeout so a wedged shutdown path can't trap the process.
|
||||
if let Some(d) = daemon {
|
||||
d.shutdown().await;
|
||||
if tokio::time::timeout(CRAWLER_SHUTDOWN_TIMEOUT, d.shutdown())
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
tracing::warn!(
|
||||
timeout_s = CRAWLER_SHUTDOWN_TIMEOUT.as_secs(),
|
||||
"crawler daemon shutdown exceeded timeout; abandoning"
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user