fix(crawler): unify recircuit budget semantics — N = total attempts
The three retry-with-recircuit sites disagreed: detect.rs's retry_on_transient_with_hook used "N = total attempts" (3 → 3 fetches), but session.rs's unauth branch and content.rs's chapter loop used "N = recircuits" (3 → 4 fetches). At the same wall-clock "max=3", different sites hit the upstream a different number of times. Unify on N = total attempts (matching the existing retry_on_transient convention). The CRAWLER_TOR_RECIRCUIT_MAX_ATTEMPTS env var now means exactly what its name suggests. Disabling the recircuit feature collapses to max_attempts=1 (single attempt, no retry) — bit-for-bit pre-TOR behavior preserved. Adds a debug_assert!(max >= 1) on both helpers and a new content.rs test exercising the mixed Transient → Unauth → Ok sequence to lock in the shared-counter invariant. Audit ref: #5. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -73,9 +73,10 @@ pub enum SyncOutcome {
|
|||||||
SessionExpired,
|
SessionExpired,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Per-chapter recircuit budget for both transient pages and
|
/// Per-chapter max fetch attempts when TOR is configured. `N = 3` means
|
||||||
/// `Unauthenticated` outcomes. When TOR is not configured the budget
|
/// up to 3 total page fetches with 2 NEWNYM signals between them. When
|
||||||
/// is effectively 0 (no recircuit attempted; original behavior).
|
/// TOR is not configured the effective budget collapses to 1 (single
|
||||||
|
/// attempt, no retry, no recircuit — bit-for-bit pre-TOR behavior).
|
||||||
const CHAPTER_RECIRCUIT_MAX_ATTEMPTS: u32 = 3;
|
const CHAPTER_RECIRCUIT_MAX_ATTEMPTS: u32 = 3;
|
||||||
|
|
||||||
/// Outcome of [`fetch_chapter_html_with_recircuit`]. `Ok` carries the
|
/// Outcome of [`fetch_chapter_html_with_recircuit`]. `Ok` carries the
|
||||||
@@ -125,15 +126,23 @@ async fn fetch_chapter_html_once(
|
|||||||
Ok(html)
|
Ok(html)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Pure-over-IO loop: fetch + classify, with up to `recircuit_budget`
|
/// Pure-over-IO loop: fetch + classify, up to `max_attempts` total
|
||||||
/// NEWNYM-and-retry cycles after a `Transient` or `Unauthenticated`
|
/// fetches. Between attempts, `recircuit` is invoked (a no-op when
|
||||||
/// outcome. `recircuit_budget = 0` collapses to the original
|
/// TOR isn't configured). `max_attempts = 1` collapses to the
|
||||||
/// single-shot behavior — `Unauthenticated` → `SessionExpired`,
|
/// original single-shot behavior — `Unauthenticated` →
|
||||||
/// `Transient` → `PersistentTransient` on the first hit, no recircuit.
|
/// `SessionExpired`, `Transient` → `PersistentTransient` on the first
|
||||||
|
/// hit, no recircuit.
|
||||||
|
///
|
||||||
|
/// Semantics match [`crate::crawler::detect::retry_on_transient`] and
|
||||||
|
/// [`run_session_probe_loop`]: `N` is **total attempts including the
|
||||||
|
/// first**, so `N = 3` means 3 fetches and up to 2 NEWNYM calls.
|
||||||
|
/// `Unauthenticated` and `Transient` share the budget — the loop
|
||||||
|
/// doesn't distinguish, so a sequence like Transient → Unauth → Ok
|
||||||
|
/// counts as 3 attempts.
|
||||||
async fn fetch_chapter_html_with_recircuit<F, Fut, R, RFut>(
|
async fn fetch_chapter_html_with_recircuit<F, Fut, R, RFut>(
|
||||||
mut fetch: F,
|
mut fetch: F,
|
||||||
mut recircuit: R,
|
mut recircuit: R,
|
||||||
recircuit_budget: u32,
|
max_attempts: u32,
|
||||||
source_url_for_msg: &str,
|
source_url_for_msg: &str,
|
||||||
) -> anyhow::Result<ChapterFetchOutcome>
|
) -> anyhow::Result<ChapterFetchOutcome>
|
||||||
where
|
where
|
||||||
@@ -142,38 +151,36 @@ where
|
|||||||
R: FnMut() -> RFut,
|
R: FnMut() -> RFut,
|
||||||
RFut: std::future::Future<Output = ()>,
|
RFut: std::future::Future<Output = ()>,
|
||||||
{
|
{
|
||||||
let mut recircuits = 0u32;
|
debug_assert!(max_attempts >= 1, "max_attempts must be at least 1");
|
||||||
|
let mut attempt = 0u32;
|
||||||
loop {
|
loop {
|
||||||
|
attempt += 1;
|
||||||
let html = fetch().await?;
|
let html = fetch().await?;
|
||||||
match session::classify_chapter_probe(&html) {
|
match session::classify_chapter_probe(&html) {
|
||||||
ChapterProbe::Ok => return Ok(ChapterFetchOutcome::Ok(html)),
|
ChapterProbe::Ok => return Ok(ChapterFetchOutcome::Ok(html)),
|
||||||
ChapterProbe::Unauthenticated => {
|
ChapterProbe::Unauthenticated => {
|
||||||
if recircuits < recircuit_budget {
|
if attempt >= max_attempts {
|
||||||
recircuits += 1;
|
return Ok(ChapterFetchOutcome::SessionExpired);
|
||||||
|
}
|
||||||
tracing::warn!(
|
tracing::warn!(
|
||||||
attempt = recircuits,
|
attempt,
|
||||||
max = recircuit_budget,
|
max = max_attempts,
|
||||||
url = source_url_for_msg,
|
url = source_url_for_msg,
|
||||||
"chapter probe Unauthenticated; signaling TOR NEWNYM and retrying"
|
"chapter probe Unauthenticated; signaling TOR NEWNYM and retrying"
|
||||||
);
|
);
|
||||||
recircuit().await;
|
recircuit().await;
|
||||||
continue;
|
|
||||||
}
|
|
||||||
return Ok(ChapterFetchOutcome::SessionExpired);
|
|
||||||
}
|
}
|
||||||
ChapterProbe::Transient => {
|
ChapterProbe::Transient => {
|
||||||
if recircuits < recircuit_budget {
|
if attempt >= max_attempts {
|
||||||
recircuits += 1;
|
return Ok(ChapterFetchOutcome::PersistentTransient);
|
||||||
|
}
|
||||||
tracing::warn!(
|
tracing::warn!(
|
||||||
attempt = recircuits,
|
attempt,
|
||||||
max = recircuit_budget,
|
max = max_attempts,
|
||||||
url = source_url_for_msg,
|
url = source_url_for_msg,
|
||||||
"chapter probe Transient; signaling TOR NEWNYM and retrying"
|
"chapter probe Transient; signaling TOR NEWNYM and retrying"
|
||||||
);
|
);
|
||||||
recircuit().await;
|
recircuit().await;
|
||||||
continue;
|
|
||||||
}
|
|
||||||
return Ok(ChapterFetchOutcome::PersistentTransient);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -212,10 +219,11 @@ pub async fn sync_chapter_content(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fetch + classify with a recircuit budget when TOR is configured.
|
// Fetch + classify. With TOR configured, allow up to
|
||||||
// Without TOR the closure-recircuit is a no-op and the loop reduces
|
// CHAPTER_RECIRCUIT_MAX_ATTEMPTS total page fetches with NEWNYM
|
||||||
// to the original single-attempt behavior.
|
// between each. Without TOR, collapse to 1 attempt (no retry, no
|
||||||
let recircuit_budget = if tor.is_some() { CHAPTER_RECIRCUIT_MAX_ATTEMPTS } else { 0 };
|
// recircuit) — matches the pre-TOR single-shot behavior bit-for-bit.
|
||||||
|
let max_attempts = if tor.is_some() { CHAPTER_RECIRCUIT_MAX_ATTEMPTS } else { 1 };
|
||||||
let html = match fetch_chapter_html_with_recircuit(
|
let html = match fetch_chapter_html_with_recircuit(
|
||||||
|| fetch_chapter_html_once(browser, rate, source_url),
|
|| fetch_chapter_html_once(browser, rate, source_url),
|
||||||
|| async {
|
|| async {
|
||||||
@@ -225,7 +233,7 @@ pub async fn sync_chapter_content(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
recircuit_budget,
|
max_attempts,
|
||||||
source_url,
|
source_url,
|
||||||
)
|
)
|
||||||
.await?
|
.await?
|
||||||
@@ -238,7 +246,7 @@ pub async fn sync_chapter_content(
|
|||||||
// session-expired sticky flag).
|
// session-expired sticky flag).
|
||||||
anyhow::bail!(
|
anyhow::bail!(
|
||||||
"chapter page at {source_url} returned a transient response after \
|
"chapter page at {source_url} returned a transient response after \
|
||||||
{recircuit_budget} TOR recircuit(s); will retry"
|
{max_attempts} attempt(s); will retry"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -431,7 +439,8 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn recircuit_loop_unauth_with_zero_budget_returns_session_expired() {
|
async fn recircuit_loop_unauth_with_single_attempt_returns_session_expired() {
|
||||||
|
// max_attempts=1 = TOR disabled, fail-fast on first Unauthenticated.
|
||||||
let mut recircuits = 0u32;
|
let mut recircuits = 0u32;
|
||||||
let mut fetches = 0u32;
|
let mut fetches = 0u32;
|
||||||
let outcome = fetch_chapter_html_with_recircuit(
|
let outcome = fetch_chapter_html_with_recircuit(
|
||||||
@@ -443,18 +452,19 @@ mod tests {
|
|||||||
recircuits += 1;
|
recircuits += 1;
|
||||||
async {}
|
async {}
|
||||||
},
|
},
|
||||||
0,
|
1,
|
||||||
"https://example/c",
|
"https://example/c",
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.expect("ok-result");
|
.expect("ok-result");
|
||||||
assert!(matches!(outcome, ChapterFetchOutcome::SessionExpired));
|
assert!(matches!(outcome, ChapterFetchOutcome::SessionExpired));
|
||||||
assert_eq!(fetches, 1);
|
assert_eq!(fetches, 1);
|
||||||
assert_eq!(recircuits, 0, "no recircuit when budget is 0 (TOR disabled)");
|
assert_eq!(recircuits, 0, "no recircuit when budget is 1 (TOR disabled)");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn recircuit_loop_unauth_then_ok_within_budget() {
|
async fn recircuit_loop_unauth_then_ok_within_budget() {
|
||||||
|
// max_attempts=3 = up to 3 fetches with 2 recircuits between.
|
||||||
let mut recircuits = 0u32;
|
let mut recircuits = 0u32;
|
||||||
let mut fetch_n = 0u32;
|
let mut fetch_n = 0u32;
|
||||||
let outcome = fetch_chapter_html_with_recircuit(
|
let outcome = fetch_chapter_html_with_recircuit(
|
||||||
@@ -496,15 +506,14 @@ mod tests {
|
|||||||
recircuits += 1;
|
recircuits += 1;
|
||||||
async {}
|
async {}
|
||||||
},
|
},
|
||||||
2,
|
3,
|
||||||
"https://example/c",
|
"https://example/c",
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.expect("ok-result");
|
.expect("ok-result");
|
||||||
assert!(matches!(outcome, ChapterFetchOutcome::SessionExpired));
|
assert!(matches!(outcome, ChapterFetchOutcome::SessionExpired));
|
||||||
// budget=2 → initial + 2 recircuit-and-retry = 3 fetches.
|
assert_eq!(fetch_n, 3, "max_attempts=3 → 3 fetches total");
|
||||||
assert_eq!(fetch_n, 3);
|
assert_eq!(recircuits, 2, "2 recircuits between 3 fetches");
|
||||||
assert_eq!(recircuits, 2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
@@ -556,8 +565,40 @@ mod tests {
|
|||||||
.await
|
.await
|
||||||
.expect("ok-result");
|
.expect("ok-result");
|
||||||
assert!(matches!(outcome, ChapterFetchOutcome::PersistentTransient));
|
assert!(matches!(outcome, ChapterFetchOutcome::PersistentTransient));
|
||||||
assert_eq!(fetch_n, 4, "budget=3 → 1 initial + 3 retries");
|
assert_eq!(fetch_n, 3, "max_attempts=3 → 3 fetches total");
|
||||||
assert_eq!(recircuits, 3);
|
assert_eq!(recircuits, 2, "2 recircuits between 3 fetches");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn recircuit_loop_mixed_transient_then_unauth_then_ok_shares_budget() {
|
||||||
|
// Audit-prompted regression: outcomes share the attempt counter.
|
||||||
|
// Sequence: Transient (attempt 1) → Unauth (attempt 2) → Ok (3).
|
||||||
|
let mut recircuits = 0u32;
|
||||||
|
let mut fetch_n = 0u32;
|
||||||
|
let outcome = fetch_chapter_html_with_recircuit(
|
||||||
|
|| {
|
||||||
|
fetch_n += 1;
|
||||||
|
let n = fetch_n;
|
||||||
|
async move {
|
||||||
|
match n {
|
||||||
|
1 => Ok(TRANSIENT_HTML.to_string()),
|
||||||
|
2 => Ok(UNAUTH_HTML.to_string()),
|
||||||
|
_ => Ok(OK_HTML.to_string()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|| {
|
||||||
|
recircuits += 1;
|
||||||
|
async {}
|
||||||
|
},
|
||||||
|
3,
|
||||||
|
"https://example/c",
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.expect("ok");
|
||||||
|
assert!(matches!(outcome, ChapterFetchOutcome::Ok(_)));
|
||||||
|
assert_eq!(fetch_n, 3);
|
||||||
|
assert_eq!(recircuits, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
|
|||||||
@@ -167,19 +167,19 @@ pub async fn verify_session(browser: &Browser, probe_url: &str) -> anyhow::Resul
|
|||||||
|
|
||||||
/// Like [`verify_session`] but, when `tor` is `Some`, signals
|
/// Like [`verify_session`] but, when `tor` is `Some`, signals
|
||||||
/// `SIGNAL NEWNYM` between retries on transient pages AND treats
|
/// `SIGNAL NEWNYM` between retries on transient pages AND treats
|
||||||
/// `Unauthenticated` as a recoverable failure (up to
|
/// `Unauthenticated` as recoverable (up to `tor_max_attempts` total
|
||||||
/// `unauth_max_recircuit` recircuit cycles before giving up). The bare
|
/// probes, calling NEWNYM between each).
|
||||||
/// `verify_session` is `verify_session_with_recircuit(..., None, 0)`.
|
|
||||||
///
|
///
|
||||||
/// When `tor` is `None`, `unauth_max_recircuit` is ignored — `Unauth`
|
/// `verify_session` is `verify_session_with_recircuit(..., None, _)`,
|
||||||
/// stays a hard fail, matching the original behavior.
|
/// which collapses the `Unauthenticated` budget to 1 attempt — i.e.
|
||||||
|
/// fail-fast, exactly the pre-TOR behavior.
|
||||||
pub async fn verify_session_with_recircuit(
|
pub async fn verify_session_with_recircuit(
|
||||||
browser: &Browser,
|
browser: &Browser,
|
||||||
probe_url: &str,
|
probe_url: &str,
|
||||||
tor: Option<&crate::crawler::tor::TorController>,
|
tor: Option<&crate::crawler::tor::TorController>,
|
||||||
unauth_max_recircuit: u32,
|
tor_max_attempts: u32,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let effective_unauth_budget = if tor.is_some() { unauth_max_recircuit } else { 0 };
|
let unauth_max_attempts = if tor.is_some() { tor_max_attempts.max(1) } else { 1 };
|
||||||
run_session_probe_loop(
|
run_session_probe_loop(
|
||||||
|| fetch_probe_html(browser, probe_url),
|
|| fetch_probe_html(browser, probe_url),
|
||||||
|| async {
|
|| async {
|
||||||
@@ -190,7 +190,7 @@ pub async fn verify_session_with_recircuit(
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
PROBE_MAX_ATTEMPTS,
|
PROBE_MAX_ATTEMPTS,
|
||||||
effective_unauth_budget,
|
unauth_max_attempts,
|
||||||
PROBE_RETRY_DELAY,
|
PROBE_RETRY_DELAY,
|
||||||
probe_url,
|
probe_url,
|
||||||
)
|
)
|
||||||
@@ -201,20 +201,25 @@ pub async fn verify_session_with_recircuit(
|
|||||||
/// fetch and recircuit closures so it can be unit-tested without a
|
/// fetch and recircuit closures so it can be unit-tested without a
|
||||||
/// real browser or TOR daemon.
|
/// real browser or TOR daemon.
|
||||||
///
|
///
|
||||||
/// Semantics:
|
/// Both budgets count **total attempts**, including the first — so
|
||||||
|
/// `transient_max_attempts = 3` allows 3 fetches and 2 recircuits
|
||||||
|
/// between them, and `unauth_max_attempts = 1` means "fail-fast, no
|
||||||
|
/// retry". This matches [`crate::crawler::detect::retry_on_transient`]
|
||||||
|
/// and the content-path recircuit loop.
|
||||||
|
///
|
||||||
|
/// Outcomes:
|
||||||
/// - `SessionProbe::Ok` → return `Ok(())`.
|
/// - `SessionProbe::Ok` → return `Ok(())`.
|
||||||
/// - `SessionProbe::Unauthenticated` → if `unauth_max_recircuit > 0`
|
/// - `SessionProbe::Unauthenticated` → recircuit + retry while
|
||||||
/// and budget remaining, call `recircuit` + sleep + retry. Otherwise
|
/// under the unauth budget. After the cap, bail with the
|
||||||
/// bail with the "PHPSESSID expired" diagnostic, mentioning the
|
/// "PHPSESSID expired" diagnostic, mentioning the attempt count so
|
||||||
/// recircuit count so a TOR-misconfig diagnosis is easier.
|
/// a TOR-misconfig diagnosis is easier.
|
||||||
/// - `SessionProbe::Transient` → up to `transient_max_attempts` total
|
/// - `SessionProbe::Transient` → same shape against the transient
|
||||||
/// tries, calling `recircuit` between each. After the cap, bail with
|
/// budget; bails with "site down or rate-limiting" after the cap.
|
||||||
/// the "site down or rate-limiting" diagnostic.
|
|
||||||
async fn run_session_probe_loop<F, Fut, R, RFut>(
|
async fn run_session_probe_loop<F, Fut, R, RFut>(
|
||||||
mut fetch_html: F,
|
mut fetch_html: F,
|
||||||
mut recircuit: R,
|
mut recircuit: R,
|
||||||
transient_max_attempts: u32,
|
transient_max_attempts: u32,
|
||||||
unauth_max_recircuit: u32,
|
unauth_max_attempts: u32,
|
||||||
retry_delay: Duration,
|
retry_delay: Duration,
|
||||||
probe_url_for_msg: &str,
|
probe_url_for_msg: &str,
|
||||||
) -> anyhow::Result<()>
|
) -> anyhow::Result<()>
|
||||||
@@ -224,37 +229,38 @@ where
|
|||||||
R: FnMut() -> RFut,
|
R: FnMut() -> RFut,
|
||||||
RFut: std::future::Future<Output = ()>,
|
RFut: std::future::Future<Output = ()>,
|
||||||
{
|
{
|
||||||
|
debug_assert!(transient_max_attempts >= 1);
|
||||||
|
debug_assert!(unauth_max_attempts >= 1);
|
||||||
let mut transient_attempts = 0u32;
|
let mut transient_attempts = 0u32;
|
||||||
let mut unauth_recircuits = 0u32;
|
let mut unauth_attempts = 0u32;
|
||||||
loop {
|
loop {
|
||||||
let html = fetch_html().await?;
|
let html = fetch_html().await?;
|
||||||
match classify_probe(&html) {
|
match classify_probe(&html) {
|
||||||
SessionProbe::Ok => {
|
SessionProbe::Ok => {
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
transient_attempts,
|
transient_attempts,
|
||||||
unauth_recircuits,
|
unauth_attempts,
|
||||||
"session probe ok — #logo + #avatar_menu present"
|
"session probe ok — #logo + #avatar_menu present"
|
||||||
);
|
);
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
SessionProbe::Unauthenticated => {
|
SessionProbe::Unauthenticated => {
|
||||||
if unauth_recircuits < unauth_max_recircuit {
|
unauth_attempts += 1;
|
||||||
unauth_recircuits += 1;
|
if unauth_attempts >= unauth_max_attempts {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"session probe failed — #avatar_menu not present at {probe_url_for_msg} \
|
||||||
|
after {unauth_attempts} attempt(s); PHPSESSID is missing, \
|
||||||
|
expired, or revoked. Refresh CRAWLER_PHPSESSID and re-run."
|
||||||
|
));
|
||||||
|
}
|
||||||
tracing::warn!(
|
tracing::warn!(
|
||||||
attempt = unauth_recircuits,
|
attempt = unauth_attempts,
|
||||||
max = unauth_max_recircuit,
|
max_attempts = unauth_max_attempts,
|
||||||
"session probe Unauthenticated despite PHPSESSID; signaling TOR \
|
"session probe Unauthenticated despite PHPSESSID; signaling TOR \
|
||||||
NEWNYM and retrying"
|
NEWNYM and retrying"
|
||||||
);
|
);
|
||||||
recircuit().await;
|
recircuit().await;
|
||||||
tokio::time::sleep(retry_delay).await;
|
tokio::time::sleep(retry_delay).await;
|
||||||
continue;
|
|
||||||
}
|
|
||||||
return Err(anyhow!(
|
|
||||||
"session probe failed — #avatar_menu not present at {probe_url_for_msg} \
|
|
||||||
after {unauth_recircuits} TOR recircuit(s); PHPSESSID is missing, \
|
|
||||||
expired, or revoked. Refresh CRAWLER_PHPSESSID and re-run."
|
|
||||||
));
|
|
||||||
}
|
}
|
||||||
SessionProbe::Transient => {
|
SessionProbe::Transient => {
|
||||||
transient_attempts += 1;
|
transient_attempts += 1;
|
||||||
@@ -451,7 +457,8 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn probe_loop_unauth_then_ok_when_recircuit_budget_available() {
|
async fn probe_loop_unauth_then_ok_when_attempt_budget_available() {
|
||||||
|
// Budget = 3 total attempts. Unauth on call 1, ok on call 2.
|
||||||
let mut recircuits = 0u32;
|
let mut recircuits = 0u32;
|
||||||
let mut call = 0u32;
|
let mut call = 0u32;
|
||||||
run_session_probe_loop(
|
run_session_probe_loop(
|
||||||
@@ -482,7 +489,8 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn probe_loop_unauth_with_zero_recircuit_budget_fails_fast() {
|
async fn probe_loop_unauth_with_single_attempt_budget_fails_fast() {
|
||||||
|
// Budget = 1 total attempt = no retry (matches no-TOR behavior).
|
||||||
let mut recircuits = 0u32;
|
let mut recircuits = 0u32;
|
||||||
let mut call = 0u32;
|
let mut call = 0u32;
|
||||||
let err = run_session_probe_loop(
|
let err = run_session_probe_loop(
|
||||||
@@ -495,20 +503,21 @@ mod tests {
|
|||||||
async {}
|
async {}
|
||||||
},
|
},
|
||||||
3,
|
3,
|
||||||
0,
|
1,
|
||||||
Duration::from_millis(0),
|
Duration::from_millis(0),
|
||||||
"https://example/probe",
|
"https://example/probe",
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.expect_err("zero budget → fail");
|
.expect_err("budget=1 → fail-fast");
|
||||||
assert_eq!(call, 1, "no retry when budget is 0");
|
assert_eq!(call, 1, "no retry when budget is 1");
|
||||||
assert_eq!(recircuits, 0);
|
assert_eq!(recircuits, 0);
|
||||||
let msg = format!("{err:#}");
|
let msg = format!("{err:#}");
|
||||||
assert!(msg.contains("Refresh CRAWLER_PHPSESSID"), "msg: {msg}");
|
assert!(msg.contains("Refresh CRAWLER_PHPSESSID"), "msg: {msg}");
|
||||||
|
assert!(msg.contains("after 1 attempt"), "expected attempt count in msg: {msg}");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn probe_loop_unauth_after_exhausting_budget_emits_recircuit_count() {
|
async fn probe_loop_unauth_after_exhausting_budget_emits_attempt_count() {
|
||||||
let mut recircuits = 0u32;
|
let mut recircuits = 0u32;
|
||||||
let mut call = 0u32;
|
let mut call = 0u32;
|
||||||
let err = run_session_probe_loop(
|
let err = run_session_probe_loop(
|
||||||
@@ -521,17 +530,16 @@ mod tests {
|
|||||||
async {}
|
async {}
|
||||||
},
|
},
|
||||||
10, // transient budget irrelevant here
|
10, // transient budget irrelevant here
|
||||||
2,
|
3, // 3 attempts total, 2 recircuits between
|
||||||
Duration::from_millis(0),
|
Duration::from_millis(0),
|
||||||
"https://example/probe",
|
"https://example/probe",
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.expect_err("exhausts unauth budget");
|
.expect_err("exhausts unauth budget");
|
||||||
// 3 fetches total: initial + 2 recircuit-and-retry
|
|
||||||
assert_eq!(call, 3);
|
assert_eq!(call, 3);
|
||||||
assert_eq!(recircuits, 2);
|
assert_eq!(recircuits, 2);
|
||||||
let msg = format!("{err:#}");
|
let msg = format!("{err:#}");
|
||||||
assert!(msg.contains("2 TOR recircuit"), "expected recircuit count in error, got: {msg}");
|
assert!(msg.contains("after 3 attempt"), "expected attempt count in error, got: {msg}");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
@@ -548,14 +556,14 @@ mod tests {
|
|||||||
async {}
|
async {}
|
||||||
},
|
},
|
||||||
3,
|
3,
|
||||||
0,
|
1,
|
||||||
Duration::from_millis(0),
|
Duration::from_millis(0),
|
||||||
"https://example/probe",
|
"https://example/probe",
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.expect_err("transient until max → fail");
|
.expect_err("transient until max → fail");
|
||||||
assert_eq!(call, 3);
|
assert_eq!(call, 3);
|
||||||
// recircuit fires between attempts: 3 attempts → 2 recircuits.
|
// Recircuit fires between attempts: 3 attempts → 2 recircuits.
|
||||||
assert_eq!(recircuits, 2);
|
assert_eq!(recircuits, 2);
|
||||||
let msg = format!("{err:#}");
|
let msg = format!("{err:#}");
|
||||||
assert!(msg.contains("broken-page response after 3 attempts"), "msg: {msg}");
|
assert!(msg.contains("broken-page response after 3 attempts"), "msg: {msg}");
|
||||||
@@ -582,7 +590,7 @@ mod tests {
|
|||||||
async {}
|
async {}
|
||||||
},
|
},
|
||||||
3,
|
3,
|
||||||
0,
|
1,
|
||||||
Duration::from_millis(0),
|
Duration::from_millis(0),
|
||||||
"https://example/probe",
|
"https://example/probe",
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user