fix(crawler): translate socks5h:// → socks5:// for Chromium --proxy-server
Chromium doesn't know the socks5h scheme (curl/reqwest convention) and bails navigations with ERR_NO_SUPPORTED_PROXIES. It does, however, send destination hostnames over SOCKS5 by default, so stripping the `h` is a pure scheme rename — remote-DNS behaviour is preserved. reqwest keeps the user's original CRAWLER_PROXY string (`socks5h://...` remains valid and meaningful for it). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -91,6 +91,26 @@ pub fn registrable_domain(url: &str) -> Option<String> {
|
||||
Some(format!(".{}", registrable.join(".")))
|
||||
}
|
||||
|
||||
/// Normalise a SOCKS proxy URL for Chromium's `--proxy-server=` flag.
|
||||
///
|
||||
/// reqwest accepts both `socks5://` (resolve locally) and
|
||||
/// `socks5h://` (resolve via the SOCKS server — important when the
|
||||
/// proxy is TOR and we don't want the host's resolver to see the
|
||||
/// target hostname). Chromium does **not** know the `socks5h` scheme
|
||||
/// and refuses navigations with `ERR_NO_SUPPORTED_PROXIES`. It
|
||||
/// already sends destination hostnames over SOCKS5 by default
|
||||
/// regardless, so stripping the `h` is a pure scheme rename — the
|
||||
/// remote-DNS behaviour is preserved.
|
||||
///
|
||||
/// Non-SOCKS schemes pass through unchanged.
|
||||
pub fn chromium_proxy_arg(proxy: &str) -> String {
|
||||
if let Some(rest) = proxy.strip_prefix("socks5h://") {
|
||||
format!("socks5://{rest}")
|
||||
} else {
|
||||
proxy.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -191,4 +211,34 @@ mod tests {
|
||||
Some("[2001:db8::1]")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chromium_proxy_arg_strips_socks5h_to_socks5() {
|
||||
// Regression: passing socks5h:// to Chromium yields
|
||||
// ERR_NO_SUPPORTED_PROXIES at navigation time.
|
||||
assert_eq!(
|
||||
chromium_proxy_arg("socks5h://127.0.0.1:9050"),
|
||||
"socks5://127.0.0.1:9050"
|
||||
);
|
||||
assert_eq!(
|
||||
chromium_proxy_arg("socks5h://tor:9050"),
|
||||
"socks5://tor:9050"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chromium_proxy_arg_passes_socks5_unchanged() {
|
||||
assert_eq!(
|
||||
chromium_proxy_arg("socks5://127.0.0.1:9050"),
|
||||
"socks5://127.0.0.1:9050"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chromium_proxy_arg_passes_non_socks_unchanged() {
|
||||
assert_eq!(
|
||||
chromium_proxy_arg("http://proxy.example:8080"),
|
||||
"http://proxy.example:8080"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user