fix(crawler): unhang shutdown on lingering Arc<Browser>, silence WS noise (0.43.1)
Some checks failed
deploy / test-backend (push) Failing after 6s
deploy / test-frontend (push) Failing after 40s
deploy / build-and-push (push) Has been skipped
deploy / deploy (push) Has been skipped

- Handle::close aborts its chromiumoxide driver task when another
  Arc<Browser> outlives the call, so shutdown returns instead of
  hanging on a stream that never terminates. Generic close_or_abort
  helper with regression tests covering both Arc paths.
- daemon.shutdown() is wrapped in a 5s timeout in main as defense
  in depth.
- Default RUST_LOG silences chromiumoxide::conn / chromiumoxide::handler
  WS-deserialize ERROR spam.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-31 14:47:36 +02:00
parent 030b27754b
commit 1eebb90e25
6 changed files with 114 additions and 25 deletions

View File

@@ -24,7 +24,7 @@ POSTGRES_DB=mangalord
DATABASE_URL=postgres://mangalord:mangalord@postgres:5432/mangalord DATABASE_URL=postgres://mangalord:mangalord@postgres:5432/mangalord
BIND_ADDRESS=0.0.0.0:8080 BIND_ADDRESS=0.0.0.0:8080
STORAGE_DIR=/var/lib/mangalord/storage STORAGE_DIR=/var/lib/mangalord/storage
RUST_LOG=info,mangalord=debug RUST_LOG=info,mangalord=debug,chromiumoxide::conn=off,chromiumoxide::handler=off
# ----- Auth / cookies ----- # ----- Auth / cookies -----
# COOKIE_SECURE controls whether the `Secure` flag is set on the session # COOKIE_SECURE controls whether the `Secure` flag is set on the session

2
backend/Cargo.lock generated
View File

@@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]] [[package]]
name = "mangalord" name = "mangalord"
version = "0.43.0" version = "0.43.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"argon2", "argon2",

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "mangalord" name = "mangalord"
version = "0.43.0" version = "0.43.1"
edition = "2021" edition = "2021"
default-run = "mangalord" default-run = "mangalord"

View File

@@ -121,29 +121,50 @@ impl Handle {
} }
/// Closes the browser and awaits the driver task. If other Arcs to /// Closes the browser and awaits the driver task. If other Arcs to
/// the browser are still alive we fall back to drop-kills-Chromium /// the browser are still alive we can't issue a clean CDP `close`,
/// semantics and just join the driver — this is the rare case where /// so we abort the driver task instead — otherwise `handler.next()`
/// shutdown raced an outstanding worker; the OS-level kill is the /// keeps polling forever and `Handle::close` hangs (chromiumoxide's
/// safety net. /// handler stream doesn't end on its own when the underlying WS
/// dies). Chromium itself is reaped by kill-on-drop once the last
/// `Arc<Browser>` is dropped.
pub async fn close(self) -> anyhow::Result<()> { pub async fn close(self) -> anyhow::Result<()> {
match Arc::try_unwrap(self.browser) { close_or_abort(self.browser, self.driver, |mut owned| async move {
Ok(mut owned) => { let _ = owned.close().await;
let _ = owned.close().await; let _ = owned.wait().await;
let _ = owned.wait().await; })
} .await;
Err(shared) => {
tracing::warn!(
strong_count = Arc::strong_count(&shared),
"Handle::close while Arc<Browser> still shared — relying on kill-on-drop"
);
drop(shared);
}
}
let _ = self.driver.await;
Ok(()) Ok(())
} }
} }
/// Shutdown core for [`Handle::close`], extracted so it can be unit-
/// tested without launching real Chromium. When `arc` is uniquely owned,
/// `on_owned` runs against the owned value and the driver is awaited
/// normally. When other Arc holders exist, the driver is aborted before
/// awaiting it so shutdown returns promptly.
async fn close_or_abort<T, F, Fut>(arc: Arc<T>, driver: JoinHandle<()>, on_owned: F)
where
T: Send + 'static,
F: FnOnce(T) -> Fut + Send,
Fut: std::future::Future<Output = ()> + Send,
{
match Arc::try_unwrap(arc) {
Ok(owned) => {
on_owned(owned).await;
let _ = driver.await;
}
Err(shared) => {
tracing::warn!(
strong_count = Arc::strong_count(&shared),
"Handle::close while Arc still shared — aborting driver, relying on kill-on-drop"
);
drop(shared);
driver.abort();
let _ = driver.await;
}
}
}
/// Launches Chromium. Downloads it on first run via the `fetcher` /// Launches Chromium. Downloads it on first run via the `fetcher`
/// feature; subsequent runs hit the cache. The cache dir is /// feature; subsequent runs hit the cache. The cache dir is
/// `$CRAWLER_CHROMIUM_DIR` if set, else `$HOME/.cache/mangalord/chromium`, /// `$CRAWLER_CHROMIUM_DIR` if set, else `$HOME/.cache/mangalord/chromium`,
@@ -261,4 +282,54 @@ mod tests {
assert_eq!(LaunchOptions::headless().mode, BrowserMode::Headless); assert_eq!(LaunchOptions::headless().mode, BrowserMode::Headless);
assert_eq!(LaunchOptions::headed().mode, BrowserMode::Headed); assert_eq!(LaunchOptions::headed().mode, BrowserMode::Headed);
} }
// Regression: if another Arc<Browser> outlives `Handle::close`, the
// old code awaited the driver task forever because the chromiumoxide
// handler stream doesn't return None on its own. Aborting the driver
// unblocks shutdown even when kill-on-drop can't fire yet.
#[tokio::test]
async fn close_or_abort_returns_when_arc_is_shared() {
use std::sync::atomic::{AtomicBool, Ordering};
use std::time::Duration;
let arc = Arc::new(());
let _keepalive = Arc::clone(&arc); // forces try_unwrap to fail
let driver = tokio::spawn(std::future::pending::<()>());
let on_owned_ran = Arc::new(AtomicBool::new(false));
let flag = Arc::clone(&on_owned_ran);
let fut = close_or_abort(arc, driver, move |_| {
let flag = Arc::clone(&flag);
async move { flag.store(true, Ordering::Release) }
});
tokio::time::timeout(Duration::from_secs(2), fut)
.await
.expect("close_or_abort must not hang when driver is pending and Arc is shared");
assert!(
!on_owned_ran.load(Ordering::Acquire),
"on_owned must not run when the Arc is still shared"
);
}
#[tokio::test]
async fn close_or_abort_runs_on_owned_when_arc_is_unique() {
use std::sync::atomic::{AtomicBool, Ordering};
let arc = Arc::new(());
let driver = tokio::spawn(async {}); // completes immediately
let on_owned_ran = Arc::new(AtomicBool::new(false));
let flag = Arc::clone(&on_owned_ran);
close_or_abort(arc, driver, move |_| {
let flag = Arc::clone(&flag);
async move { flag.store(true, Ordering::Release) }
})
.await;
assert!(
on_owned_ran.load(Ordering::Acquire),
"on_owned must run when the Arc is unique"
);
}
} }

View File

@@ -1,12 +1,21 @@
use std::net::SocketAddr; use std::net::SocketAddr;
use std::time::Duration;
use tracing_subscriber::EnvFilter; use tracing_subscriber::EnvFilter;
/// Upper bound on how long we're willing to wait for the crawler daemon
/// to drain before letting `main` return. Without it a wedged background
/// task (e.g. a chromiumoxide handler stuck on a dead WS) blocks the
/// process from exiting after Ctrl-C / SIGTERM.
const CRAWLER_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(5);
#[tokio::main] #[tokio::main]
async fn main() -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> {
dotenvy::dotenv().ok(); dotenvy::dotenv().ok();
tracing_subscriber::fmt() tracing_subscriber::fmt()
.with_env_filter( .with_env_filter(
EnvFilter::try_from_default_env().unwrap_or_else(|_| "info,mangalord=debug".into()), EnvFilter::try_from_default_env().unwrap_or_else(|_| {
"info,mangalord=debug,chromiumoxide::conn=off,chromiumoxide::handler=off".into()
}),
) )
.init(); .init();
@@ -21,9 +30,18 @@ async fn main() -> anyhow::Result<()> {
.await?; .await?;
// Drain background tasks (crawler daemon) before exiting so Chromium // Drain background tasks (crawler daemon) before exiting so Chromium
// gets a clean shutdown rather than relying on kill-on-drop. // gets a clean shutdown rather than relying on kill-on-drop. Bounded
// by a timeout so a wedged shutdown path can't trap the process.
if let Some(d) = daemon { if let Some(d) = daemon {
d.shutdown().await; if tokio::time::timeout(CRAWLER_SHUTDOWN_TIMEOUT, d.shutdown())
.await
.is_err()
{
tracing::warn!(
timeout_s = CRAWLER_SHUTDOWN_TIMEOUT.as_secs(),
"crawler daemon shutdown exceeded timeout; abandoning"
);
}
} }
Ok(()) Ok(())
} }

View File

@@ -1,6 +1,6 @@
{ {
"name": "mangalord-frontend", "name": "mangalord-frontend",
"version": "0.43.0", "version": "0.43.1",
"private": true, "private": true,
"type": "module", "type": "module",
"scripts": { "scripts": {