fix(crawler): redact TorAuth::Password in Debug, drop NEWNYM info→debug

The startup log line in app.rs and bin/crawler.rs `?t`-debug-formats
the TorController, which through the derived Debug on TorAuth would
expand TorAuth::Password(p) and leak the plaintext password to logs.
Implement Debug manually on TorAuth — None / Password(<redacted>) /
Cookie(<path>) — and lock the redaction with a regression test.

Drop the per-NEWNYM success log from info to debug: a busy crawl
rotates circuits many times per minute. Failed NEWNYMs already log
at warn — those stay loud.

Tightens the closed_connection_mid_reply_is_an_error assertion which
was tautological (`closed connection` OR `AUTHENTICATE`) by driving
the mock to read the AUTH line then drop, exercising only the
EOF-mid-reply path.

Audit ref: #7, #9, nit on tautological test.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-05-31 20:25:36 +02:00
parent c30c7a546f
commit d47e832613

View File

@@ -44,13 +44,28 @@ const READ_TIMEOUT: Duration = Duration::from_secs(5);
/// reachable (different gid, no shared volume, etc.). `None` matches a /// reachable (different gid, no shared volume, etc.). `None` matches a
/// torrc with no `CookieAuthentication 1` and no `HashedControlPassword` /// torrc with no `CookieAuthentication 1` and no `HashedControlPassword`
/// — useful for local experimentation, not for production. /// — useful for local experimentation, not for production.
#[derive(Debug, Clone)] ///
/// `Debug` is implemented manually to redact the password (and the
/// cookie path, which is non-sensitive but uninteresting in logs).
/// Don't add `#[derive(Debug)]` — the controller is `?`-logged at
/// startup and a derive would expand the password into the trace.
#[derive(Clone)]
pub enum TorAuth { pub enum TorAuth {
None, None,
Password(String), Password(String),
Cookie(PathBuf), Cookie(PathBuf),
} }
impl std::fmt::Debug for TorAuth {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TorAuth::None => f.write_str("None"),
TorAuth::Password(_) => f.write_str("Password(<redacted>)"),
TorAuth::Cookie(_) => f.write_str("Cookie(<path>)"),
}
}
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct TorController { pub struct TorController {
/// `host:port` string. Kept as a string (not a `SocketAddr`) so /// `host:port` string. Kept as a string (not a `SocketAddr`) so
@@ -114,7 +129,9 @@ impl TorController {
// QUIT is courtesy; ignore errors — the daemon may close the // QUIT is courtesy; ignore errors — the daemon may close the
// socket before our QUIT lands and that's perfectly fine. // socket before our QUIT lands and that's perfectly fine.
let _ = write_line(&mut write, "QUIT").await; let _ = write_line(&mut write, "QUIT").await;
tracing::info!(addr = %self.addr, "TOR NEWNYM signaled"); // Debug-level: a busy crawl can rotate circuits many times per
// minute, INFO is too chatty. Failures still log at WARN.
tracing::debug!(addr = %self.addr, "TOR NEWNYM signaled");
Ok(()) Ok(())
} }
@@ -313,19 +330,27 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn closed_connection_mid_reply_is_an_error() { async fn closed_connection_mid_reply_is_an_error() {
// Listener accepts and immediately drops without replying. // Listener accepts the AUTH line then drops without replying
// this exercises the EOF-mid-reply path in expect_250 (rather
// than tor's own error replies which are covered elsewhere).
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
let addr = listener.local_addr().unwrap().to_string(); let addr = listener.local_addr().unwrap().to_string();
tokio::spawn(async move { tokio::spawn(async move {
let _ = listener.accept().await; if let Ok((sock, _)) = listener.accept().await {
// Drop the accepted socket immediately. let (r, _w) = sock.into_split();
let mut r = BufReader::new(r);
let mut line = String::new();
let _ = r.read_line(&mut line).await; // read AUTH, ignore
// Drop _w (and the read half via scope exit) so the
// peer sees an immediate EOF on the next read.
}
}); });
let controller = TorController::new(addr, TorAuth::None); let controller = TorController::new(addr, TorAuth::None);
let err = controller.new_identity().await.expect_err("should fail"); let err = controller.new_identity().await.expect_err("should fail");
let msg = format!("{err:#}"); let msg = format!("{err:#}");
assert!( assert!(
msg.contains("closed connection") || msg.contains("AUTHENTICATE"), msg.contains("closed connection"),
"expected close-or-AUTH error, got: {msg}" "expected EOF-mid-reply error, got: {msg}"
); );
} }
@@ -395,6 +420,25 @@ mod tests {
assert_eq!(escape_quoted(r#"a"b\c"#), r#"a\"b\\c"#); assert_eq!(escape_quoted(r#"a"b\c"#), r#"a\"b\\c"#);
} }
#[test]
fn debug_format_redacts_password_and_cookie_path() {
// Regression: app.rs / bin/crawler.rs log the controller at
// startup via `tracing::info!(?t, ...)`. A derived Debug on
// TorAuth would expand TorAuth::Password(p) and leak the
// plaintext into logs.
let c = TorController::new("tor:9051", TorAuth::Password("super-secret".into()));
let dbg = format!("{c:?}");
assert!(!dbg.contains("super-secret"), "password leaked: {dbg}");
assert!(dbg.contains("<redacted>"), "expected <redacted>, got: {dbg}");
let c = TorController::new(
"tor:9051",
TorAuth::Cookie("/var/lib/tor/control_auth_cookie".into()),
);
let dbg = format!("{c:?}");
assert!(!dbg.contains("control_auth_cookie"), "cookie path leaked: {dbg}");
}
#[test] #[test]
fn hex_encode_zero_pads_low_bytes() { fn hex_encode_zero_pads_low_bytes() {
assert_eq!(hex_encode(&[0x00, 0x0f, 0xff]), "000fff"); assert_eq!(hex_encode(&[0x00, 0x0f, 0xff]), "000fff");