fix(crawler): wrap wait_for_navigation in 30s timeout (0.36.1)

A hung TLS handshake or a page that never fires load could wedge a
worker (or the cron metadata pass) indefinitely — chromiumoxide
imposes no navigation timeout of its own.

New crawler::nav::wait_for_nav caps each navigation at NAV_TIMEOUT
(30s) and returns a typed NavError so timeouts surface as transient
(retryable) errors. Wired at the three navigation sites:
- source::target::navigate (catalog/detail/pagination)
- content::sync_chapter_content (chapter reader)
- session::fetch_probe_html (session probe)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-30 18:10:51 +02:00
parent 9f56f283d4
commit e2bd1462ba
8 changed files with 90 additions and 8 deletions

2
backend/Cargo.lock generated
View File

@@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]] [[package]]
name = "mangalord" name = "mangalord"
version = "0.36.0" version = "0.36.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"argon2", "argon2",

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "mangalord" name = "mangalord"
version = "0.36.0" version = "0.36.1"
edition = "2021" edition = "2021"
default-run = "mangalord" default-run = "mangalord"

View File

@@ -111,7 +111,9 @@ pub async fn sync_chapter_content(
.new_page(source_url) .new_page(source_url)
.await .await
.with_context(|| format!("open chapter page {source_url}"))?; .with_context(|| format!("open chapter page {source_url}"))?;
page.wait_for_navigation().await.context("wait for chapter nav")?; crate::crawler::nav::wait_for_nav(&page)
.await
.context("wait for chapter nav")?;
let html = page.content().await.context("read chapter html")?; let html = page.content().await.context("read chapter html")?;
page.close().await.ok(); page.close().await.ok();

View File

@@ -20,6 +20,7 @@ pub mod daemon;
pub mod detect; pub mod detect;
pub mod diff; pub mod diff;
pub mod jobs; pub mod jobs;
pub mod nav;
pub mod pipeline; pub mod pipeline;
pub mod rate_limit; pub mod rate_limit;
pub mod safety; pub mod safety;

View File

@@ -0,0 +1,68 @@
//! Page navigation helpers — wrap `chromiumoxide` `wait_for_navigation`
//! with a timeout so a hung TLS handshake or a page that never fires
//! `load` cannot wedge a worker (or the cron metadata pass) forever.
//!
//! [`NAV_TIMEOUT`] is the global budget. Callers in the crawler use
//! [`wait_for_nav`] to get back a typed error so transient timeouts can
//! be reported separately from underlying CDP errors.
use std::time::Duration;
use chromiumoxide::error::CdpError;
use chromiumoxide::Page;
use thiserror::Error;
/// Maximum wall-clock time we'll wait for a single page navigation. A
/// healthy Chromium reaches `load` in well under a second on the target
/// site; a 30-second cap is generous enough for slow TLS handshakes on
/// the first request after a fresh process while still catching real
/// hangs before they wedge the daemon.
pub const NAV_TIMEOUT: Duration = Duration::from_secs(30);
/// Outcome of a timed-out navigation. `Timeout` is the transient signal
/// callers translate into a retry-friendly error
/// ([`crate::crawler::detect::PageError::Transient`] in the source path,
/// a context'd anyhow elsewhere). `Cdp` carries the underlying
/// chromiumoxide error unchanged.
#[derive(Debug, Error)]
pub enum NavError {
#[error("navigation timed out after {0:?}")]
Timeout(Duration),
#[error(transparent)]
Cdp(#[from] CdpError),
}
/// Wait for the page's next navigation to complete, capped at
/// [`NAV_TIMEOUT`]. Replaces bare `page.wait_for_navigation().await`
/// throughout the crawler.
pub async fn wait_for_nav(page: &Page) -> Result<(), NavError> {
match tokio::time::timeout(NAV_TIMEOUT, page.wait_for_navigation()).await {
Err(_elapsed) => Err(NavError::Timeout(NAV_TIMEOUT)),
Ok(Err(e)) => Err(NavError::Cdp(e)),
Ok(Ok(_)) => Ok(()),
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::future::pending;
/// Sanity-check the timeout pattern used by [`wait_for_nav`]: a
/// future that never resolves must yield `Elapsed` within the
/// configured budget. We can't easily stand up a real `Page` in a
/// unit test, so we assert the underlying primitive behaves the way
/// the helper depends on.
#[tokio::test(flavor = "current_thread", start_paused = true)]
async fn timeout_elapses_on_a_future_that_never_resolves() {
let result =
tokio::time::timeout(Duration::from_millis(50), pending::<()>()).await;
assert!(result.is_err(), "expected Elapsed on a hung future");
}
#[test]
fn nav_error_timeout_message_includes_duration() {
let e = NavError::Timeout(Duration::from_secs(30));
assert_eq!(e.to_string(), "navigation timed out after 30s");
}
}

View File

@@ -203,7 +203,9 @@ async fn fetch_probe_html(browser: &Browser, probe_url: &str) -> anyhow::Result<
.new_page(probe_url) .new_page(probe_url)
.await .await
.with_context(|| format!("open probe page {probe_url}"))?; .with_context(|| format!("open probe page {probe_url}"))?;
page.wait_for_navigation().await.context("wait for nav on probe")?; crate::crawler::nav::wait_for_nav(&page)
.await
.context("wait for nav on probe")?;
let html = page.content().await.context("read probe html")?; let html = page.content().await.context("read probe html")?;
page.close().await.ok(); page.close().await.ok();
Ok(html) Ok(html)

View File

@@ -21,6 +21,7 @@ use super::{
use crate::crawler::detect::{ use crate::crawler::detect::{
has_logo_sentinel, is_broken_page_body, retry_on_transient, PageError, has_logo_sentinel, is_broken_page_body, retry_on_transient, PageError,
}; };
use crate::crawler::nav::{wait_for_nav, NavError};
/// `sources.id` value for this Source impl. Exposed as a const so the /// `sources.id` value for this Source impl. Exposed as a const so the
/// daemon can look up per-source state (e.g. the recovery flag) before /// daemon can look up per-source state (e.g. the recovery flag) before
@@ -216,9 +217,17 @@ async fn navigate(ctx: &FetchContext<'_>, url: &str) -> Result<String, PageError
.new_page(url) .new_page(url)
.await .await
.map_err(|e| PageError::Other(anyhow::Error::from(e)))?; .map_err(|e| PageError::Other(anyhow::Error::from(e)))?;
page.wait_for_navigation() match wait_for_nav(&page).await {
.await Ok(()) => {}
.map_err(|e| PageError::Other(anyhow::Error::from(e)))?; Err(NavError::Timeout(_)) => {
page.close().await.ok();
return Err(PageError::transient("nav timeout"));
}
Err(NavError::Cdp(e)) => {
page.close().await.ok();
return Err(PageError::Other(anyhow::Error::from(e)));
}
}
// Stopgap until we wait on a specific selector per page type — // Stopgap until we wait on a specific selector per page type —
// gives any post-load JS a beat to finish injecting content. // gives any post-load JS a beat to finish injecting content.
tokio::time::sleep(Duration::from_secs(1)).await; tokio::time::sleep(Duration::from_secs(1)).await;

View File

@@ -1,6 +1,6 @@
{ {
"name": "mangalord-frontend", "name": "mangalord-frontend",
"version": "0.36.0", "version": "0.36.1",
"private": true, "private": true,
"type": "module", "type": "module",
"scripts": { "scripts": {