fix(crawler): unhang shutdown on lingering Arc<Browser>, silence WS noise (0.43.1)
- Handle::close aborts its chromiumoxide driver task when another Arc<Browser> outlives the call, so shutdown returns instead of hanging on a stream that never terminates. Generic close_or_abort helper with regression tests covering both Arc paths. - daemon.shutdown() is wrapped in a 5s timeout in main as defense in depth. - Default RUST_LOG silences chromiumoxide::conn / chromiumoxide::handler WS-deserialize ERROR spam. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -24,7 +24,7 @@ POSTGRES_DB=mangalord
|
|||||||
DATABASE_URL=postgres://mangalord:mangalord@postgres:5432/mangalord
|
DATABASE_URL=postgres://mangalord:mangalord@postgres:5432/mangalord
|
||||||
BIND_ADDRESS=0.0.0.0:8080
|
BIND_ADDRESS=0.0.0.0:8080
|
||||||
STORAGE_DIR=/var/lib/mangalord/storage
|
STORAGE_DIR=/var/lib/mangalord/storage
|
||||||
RUST_LOG=info,mangalord=debug
|
RUST_LOG=info,mangalord=debug,chromiumoxide::conn=off,chromiumoxide::handler=off
|
||||||
|
|
||||||
# ----- Auth / cookies -----
|
# ----- Auth / cookies -----
|
||||||
# COOKIE_SECURE controls whether the `Secure` flag is set on the session
|
# COOKIE_SECURE controls whether the `Secure` flag is set on the session
|
||||||
|
|||||||
2
backend/Cargo.lock
generated
2
backend/Cargo.lock
generated
@@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mangalord"
|
name = "mangalord"
|
||||||
version = "0.43.0"
|
version = "0.43.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"argon2",
|
"argon2",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "mangalord"
|
name = "mangalord"
|
||||||
version = "0.43.0"
|
version = "0.43.1"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
default-run = "mangalord"
|
default-run = "mangalord"
|
||||||
|
|
||||||
|
|||||||
@@ -121,27 +121,48 @@ impl Handle {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Closes the browser and awaits the driver task. If other Arcs to
|
/// Closes the browser and awaits the driver task. If other Arcs to
|
||||||
/// the browser are still alive we fall back to drop-kills-Chromium
|
/// the browser are still alive we can't issue a clean CDP `close`,
|
||||||
/// semantics and just join the driver — this is the rare case where
|
/// so we abort the driver task instead — otherwise `handler.next()`
|
||||||
/// shutdown raced an outstanding worker; the OS-level kill is the
|
/// keeps polling forever and `Handle::close` hangs (chromiumoxide's
|
||||||
/// safety net.
|
/// handler stream doesn't end on its own when the underlying WS
|
||||||
|
/// dies). Chromium itself is reaped by kill-on-drop once the last
|
||||||
|
/// `Arc<Browser>` is dropped.
|
||||||
pub async fn close(self) -> anyhow::Result<()> {
|
pub async fn close(self) -> anyhow::Result<()> {
|
||||||
match Arc::try_unwrap(self.browser) {
|
close_or_abort(self.browser, self.driver, |mut owned| async move {
|
||||||
Ok(mut owned) => {
|
|
||||||
let _ = owned.close().await;
|
let _ = owned.close().await;
|
||||||
let _ = owned.wait().await;
|
let _ = owned.wait().await;
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shutdown core for [`Handle::close`], extracted so it can be unit-
|
||||||
|
/// tested without launching real Chromium. When `arc` is uniquely owned,
|
||||||
|
/// `on_owned` runs against the owned value and the driver is awaited
|
||||||
|
/// normally. When other Arc holders exist, the driver is aborted before
|
||||||
|
/// awaiting it so shutdown returns promptly.
|
||||||
|
async fn close_or_abort<T, F, Fut>(arc: Arc<T>, driver: JoinHandle<()>, on_owned: F)
|
||||||
|
where
|
||||||
|
T: Send + 'static,
|
||||||
|
F: FnOnce(T) -> Fut + Send,
|
||||||
|
Fut: std::future::Future<Output = ()> + Send,
|
||||||
|
{
|
||||||
|
match Arc::try_unwrap(arc) {
|
||||||
|
Ok(owned) => {
|
||||||
|
on_owned(owned).await;
|
||||||
|
let _ = driver.await;
|
||||||
}
|
}
|
||||||
Err(shared) => {
|
Err(shared) => {
|
||||||
tracing::warn!(
|
tracing::warn!(
|
||||||
strong_count = Arc::strong_count(&shared),
|
strong_count = Arc::strong_count(&shared),
|
||||||
"Handle::close while Arc<Browser> still shared — relying on kill-on-drop"
|
"Handle::close while Arc still shared — aborting driver, relying on kill-on-drop"
|
||||||
);
|
);
|
||||||
drop(shared);
|
drop(shared);
|
||||||
|
driver.abort();
|
||||||
|
let _ = driver.await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let _ = self.driver.await;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Launches Chromium. Downloads it on first run via the `fetcher`
|
/// Launches Chromium. Downloads it on first run via the `fetcher`
|
||||||
@@ -261,4 +282,54 @@ mod tests {
|
|||||||
assert_eq!(LaunchOptions::headless().mode, BrowserMode::Headless);
|
assert_eq!(LaunchOptions::headless().mode, BrowserMode::Headless);
|
||||||
assert_eq!(LaunchOptions::headed().mode, BrowserMode::Headed);
|
assert_eq!(LaunchOptions::headed().mode, BrowserMode::Headed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Regression: if another Arc<Browser> outlives `Handle::close`, the
|
||||||
|
// old code awaited the driver task forever because the chromiumoxide
|
||||||
|
// handler stream doesn't return None on its own. Aborting the driver
|
||||||
|
// unblocks shutdown even when kill-on-drop can't fire yet.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn close_or_abort_returns_when_arc_is_shared() {
|
||||||
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
let arc = Arc::new(());
|
||||||
|
let _keepalive = Arc::clone(&arc); // forces try_unwrap to fail
|
||||||
|
let driver = tokio::spawn(std::future::pending::<()>());
|
||||||
|
let on_owned_ran = Arc::new(AtomicBool::new(false));
|
||||||
|
|
||||||
|
let flag = Arc::clone(&on_owned_ran);
|
||||||
|
let fut = close_or_abort(arc, driver, move |_| {
|
||||||
|
let flag = Arc::clone(&flag);
|
||||||
|
async move { flag.store(true, Ordering::Release) }
|
||||||
|
});
|
||||||
|
|
||||||
|
tokio::time::timeout(Duration::from_secs(2), fut)
|
||||||
|
.await
|
||||||
|
.expect("close_or_abort must not hang when driver is pending and Arc is shared");
|
||||||
|
assert!(
|
||||||
|
!on_owned_ran.load(Ordering::Acquire),
|
||||||
|
"on_owned must not run when the Arc is still shared"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn close_or_abort_runs_on_owned_when_arc_is_unique() {
|
||||||
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
|
||||||
|
let arc = Arc::new(());
|
||||||
|
let driver = tokio::spawn(async {}); // completes immediately
|
||||||
|
let on_owned_ran = Arc::new(AtomicBool::new(false));
|
||||||
|
|
||||||
|
let flag = Arc::clone(&on_owned_ran);
|
||||||
|
close_or_abort(arc, driver, move |_| {
|
||||||
|
let flag = Arc::clone(&flag);
|
||||||
|
async move { flag.store(true, Ordering::Release) }
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
on_owned_ran.load(Ordering::Acquire),
|
||||||
|
"on_owned must run when the Arc is unique"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,12 +1,21 @@
|
|||||||
use std::net::SocketAddr;
|
use std::net::SocketAddr;
|
||||||
|
use std::time::Duration;
|
||||||
use tracing_subscriber::EnvFilter;
|
use tracing_subscriber::EnvFilter;
|
||||||
|
|
||||||
|
/// Upper bound on how long we're willing to wait for the crawler daemon
|
||||||
|
/// to drain before letting `main` return. Without it a wedged background
|
||||||
|
/// task (e.g. a chromiumoxide handler stuck on a dead WS) blocks the
|
||||||
|
/// process from exiting after Ctrl-C / SIGTERM.
|
||||||
|
const CRAWLER_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(5);
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> anyhow::Result<()> {
|
async fn main() -> anyhow::Result<()> {
|
||||||
dotenvy::dotenv().ok();
|
dotenvy::dotenv().ok();
|
||||||
tracing_subscriber::fmt()
|
tracing_subscriber::fmt()
|
||||||
.with_env_filter(
|
.with_env_filter(
|
||||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| "info,mangalord=debug".into()),
|
EnvFilter::try_from_default_env().unwrap_or_else(|_| {
|
||||||
|
"info,mangalord=debug,chromiumoxide::conn=off,chromiumoxide::handler=off".into()
|
||||||
|
}),
|
||||||
)
|
)
|
||||||
.init();
|
.init();
|
||||||
|
|
||||||
@@ -21,9 +30,18 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
// Drain background tasks (crawler daemon) before exiting so Chromium
|
// Drain background tasks (crawler daemon) before exiting so Chromium
|
||||||
// gets a clean shutdown rather than relying on kill-on-drop.
|
// gets a clean shutdown rather than relying on kill-on-drop. Bounded
|
||||||
|
// by a timeout so a wedged shutdown path can't trap the process.
|
||||||
if let Some(d) = daemon {
|
if let Some(d) = daemon {
|
||||||
d.shutdown().await;
|
if tokio::time::timeout(CRAWLER_SHUTDOWN_TIMEOUT, d.shutdown())
|
||||||
|
.await
|
||||||
|
.is_err()
|
||||||
|
{
|
||||||
|
tracing::warn!(
|
||||||
|
timeout_s = CRAWLER_SHUTDOWN_TIMEOUT.as_secs(),
|
||||||
|
"crawler daemon shutdown exceeded timeout; abandoning"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "mangalord-frontend",
|
"name": "mangalord-frontend",
|
||||||
"version": "0.43.0",
|
"version": "0.43.1",
|
||||||
"private": true,
|
"private": true,
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
|||||||
Reference in New Issue
Block a user