From d47e8326136c4d5fda52d86dbcd619cc6e987c1d Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Sun, 31 May 2026 20:25:36 +0200 Subject: [PATCH] =?UTF-8?q?fix(crawler):=20redact=20TorAuth::Password=20in?= =?UTF-8?q?=20Debug,=20drop=20NEWNYM=20info=E2=86=92debug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The startup log line in app.rs and bin/crawler.rs `?t`-debug-formats the TorController, which through the derived Debug on TorAuth would expand TorAuth::Password(p) and leak the plaintext password to logs. Implement Debug manually on TorAuth — None / Password() / Cookie() — and lock the redaction with a regression test. Drop the per-NEWNYM success log from info to debug: a busy crawl rotates circuits many times per minute. Failed NEWNYMs already log at warn — those stay loud. Tightens the closed_connection_mid_reply_is_an_error assertion which was tautological (`closed connection` OR `AUTHENTICATE`) by driving the mock to read the AUTH line then drop, exercising only the EOF-mid-reply path. Audit ref: #7, #9, nit on tautological test. Co-Authored-By: Claude Opus 4.7 (1M context) --- backend/src/crawler/tor.rs | 58 +++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/backend/src/crawler/tor.rs b/backend/src/crawler/tor.rs index 777e676..acb4773 100644 --- a/backend/src/crawler/tor.rs +++ b/backend/src/crawler/tor.rs @@ -44,13 +44,28 @@ const READ_TIMEOUT: Duration = Duration::from_secs(5); /// reachable (different gid, no shared volume, etc.). `None` matches a /// torrc with no `CookieAuthentication 1` and no `HashedControlPassword` /// — useful for local experimentation, not for production. -#[derive(Debug, Clone)] +/// +/// `Debug` is implemented manually to redact the password (and the +/// cookie path, which is non-sensitive but uninteresting in logs). +/// Don't add `#[derive(Debug)]` — the controller is `?`-logged at +/// startup and a derive would expand the password into the trace. +#[derive(Clone)] pub enum TorAuth { None, Password(String), Cookie(PathBuf), } +impl std::fmt::Debug for TorAuth { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TorAuth::None => f.write_str("None"), + TorAuth::Password(_) => f.write_str("Password()"), + TorAuth::Cookie(_) => f.write_str("Cookie()"), + } + } +} + #[derive(Debug, Clone)] pub struct TorController { /// `host:port` string. Kept as a string (not a `SocketAddr`) so @@ -114,7 +129,9 @@ impl TorController { // QUIT is courtesy; ignore errors — the daemon may close the // socket before our QUIT lands and that's perfectly fine. let _ = write_line(&mut write, "QUIT").await; - tracing::info!(addr = %self.addr, "TOR NEWNYM signaled"); + // Debug-level: a busy crawl can rotate circuits many times per + // minute, INFO is too chatty. Failures still log at WARN. + tracing::debug!(addr = %self.addr, "TOR NEWNYM signaled"); Ok(()) } @@ -313,19 +330,27 @@ mod tests { #[tokio::test] async fn closed_connection_mid_reply_is_an_error() { - // Listener accepts and immediately drops without replying. + // Listener accepts the AUTH line then drops without replying — + // this exercises the EOF-mid-reply path in expect_250 (rather + // than tor's own error replies which are covered elsewhere). let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let addr = listener.local_addr().unwrap().to_string(); tokio::spawn(async move { - let _ = listener.accept().await; - // Drop the accepted socket immediately. + if let Ok((sock, _)) = listener.accept().await { + let (r, _w) = sock.into_split(); + let mut r = BufReader::new(r); + let mut line = String::new(); + let _ = r.read_line(&mut line).await; // read AUTH, ignore + // Drop _w (and the read half via scope exit) so the + // peer sees an immediate EOF on the next read. + } }); let controller = TorController::new(addr, TorAuth::None); let err = controller.new_identity().await.expect_err("should fail"); let msg = format!("{err:#}"); assert!( - msg.contains("closed connection") || msg.contains("AUTHENTICATE"), - "expected close-or-AUTH error, got: {msg}" + msg.contains("closed connection"), + "expected EOF-mid-reply error, got: {msg}" ); } @@ -395,6 +420,25 @@ mod tests { assert_eq!(escape_quoted(r#"a"b\c"#), r#"a\"b\\c"#); } + #[test] + fn debug_format_redacts_password_and_cookie_path() { + // Regression: app.rs / bin/crawler.rs log the controller at + // startup via `tracing::info!(?t, ...)`. A derived Debug on + // TorAuth would expand TorAuth::Password(p) and leak the + // plaintext into logs. + let c = TorController::new("tor:9051", TorAuth::Password("super-secret".into())); + let dbg = format!("{c:?}"); + assert!(!dbg.contains("super-secret"), "password leaked: {dbg}"); + assert!(dbg.contains(""), "expected , got: {dbg}"); + + let c = TorController::new( + "tor:9051", + TorAuth::Cookie("/var/lib/tor/control_auth_cookie".into()), + ); + let dbg = format!("{c:?}"); + assert!(!dbg.contains("control_auth_cookie"), "cookie path leaked: {dbg}"); + } + #[test] fn hex_encode_zero_pads_low_bytes() { assert_eq!(hex_encode(&[0x00, 0x0f, 0xff]), "000fff");