fix(crawler): translate socks5h:// → socks5:// for Chromium --proxy-server
Chromium doesn't know the socks5h scheme (curl/reqwest convention) and bails navigations with ERR_NO_SUPPORTED_PROXIES. It does, however, send destination hostnames over SOCKS5 by default, so stripping the `h` is a pure scheme rename — remote-DNS behaviour is preserved. reqwest keeps the user's original CRAWLER_PROXY string (`socks5h://...` remains valid and meaningful for it). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -140,7 +140,8 @@ async fn spawn_crawler_daemon(
|
|||||||
// authenticated without operator action.
|
// authenticated without operator action.
|
||||||
let mut launch_opts = cfg.browser.clone();
|
let mut launch_opts = cfg.browser.clone();
|
||||||
if let Some(proxy) = &cfg.proxy {
|
if let Some(proxy) = &cfg.proxy {
|
||||||
launch_opts.extra_args.push(format!("--proxy-server={proxy}"));
|
let chromium_proxy = crate::crawler::url_utils::chromium_proxy_arg(proxy);
|
||||||
|
launch_opts.extra_args.push(format!("--proxy-server={chromium_proxy}"));
|
||||||
}
|
}
|
||||||
let on_launch = match (&cfg.phpsessid, &cfg.cookie_domain, &cfg.start_url) {
|
let on_launch = match (&cfg.phpsessid, &cfg.cookie_domain, &cfg.start_url) {
|
||||||
(Some(sid), Some(domain), Some(start_url)) => {
|
(Some(sid), Some(domain), Some(start_url)) => {
|
||||||
|
|||||||
@@ -127,7 +127,8 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
|
|
||||||
let mut options = LaunchOptions::from_env();
|
let mut options = LaunchOptions::from_env();
|
||||||
if let Some(proxy) = &proxy_url {
|
if let Some(proxy) = &proxy_url {
|
||||||
options.extra_args.push(format!("--proxy-server={proxy}"));
|
let chromium_proxy = mangalord::crawler::url_utils::chromium_proxy_arg(proxy);
|
||||||
|
options.extra_args.push(format!("--proxy-server={chromium_proxy}"));
|
||||||
}
|
}
|
||||||
let keep_open = match (keep_browser_open, options.mode) {
|
let keep_open = match (keep_browser_open, options.mode) {
|
||||||
(true, BrowserMode::Headed) => true,
|
(true, BrowserMode::Headed) => true,
|
||||||
|
|||||||
@@ -91,6 +91,26 @@ pub fn registrable_domain(url: &str) -> Option<String> {
|
|||||||
Some(format!(".{}", registrable.join(".")))
|
Some(format!(".{}", registrable.join(".")))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Normalise a SOCKS proxy URL for Chromium's `--proxy-server=` flag.
|
||||||
|
///
|
||||||
|
/// reqwest accepts both `socks5://` (resolve locally) and
|
||||||
|
/// `socks5h://` (resolve via the SOCKS server — important when the
|
||||||
|
/// proxy is TOR and we don't want the host's resolver to see the
|
||||||
|
/// target hostname). Chromium does **not** know the `socks5h` scheme
|
||||||
|
/// and refuses navigations with `ERR_NO_SUPPORTED_PROXIES`. It
|
||||||
|
/// already sends destination hostnames over SOCKS5 by default
|
||||||
|
/// regardless, so stripping the `h` is a pure scheme rename — the
|
||||||
|
/// remote-DNS behaviour is preserved.
|
||||||
|
///
|
||||||
|
/// Non-SOCKS schemes pass through unchanged.
|
||||||
|
pub fn chromium_proxy_arg(proxy: &str) -> String {
|
||||||
|
if let Some(rest) = proxy.strip_prefix("socks5h://") {
|
||||||
|
format!("socks5://{rest}")
|
||||||
|
} else {
|
||||||
|
proxy.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -191,4 +211,34 @@ mod tests {
|
|||||||
Some("[2001:db8::1]")
|
Some("[2001:db8::1]")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn chromium_proxy_arg_strips_socks5h_to_socks5() {
|
||||||
|
// Regression: passing socks5h:// to Chromium yields
|
||||||
|
// ERR_NO_SUPPORTED_PROXIES at navigation time.
|
||||||
|
assert_eq!(
|
||||||
|
chromium_proxy_arg("socks5h://127.0.0.1:9050"),
|
||||||
|
"socks5://127.0.0.1:9050"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
chromium_proxy_arg("socks5h://tor:9050"),
|
||||||
|
"socks5://tor:9050"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn chromium_proxy_arg_passes_socks5_unchanged() {
|
||||||
|
assert_eq!(
|
||||||
|
chromium_proxy_arg("socks5://127.0.0.1:9050"),
|
||||||
|
"socks5://127.0.0.1:9050"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn chromium_proxy_arg_passes_non_socks_unchanged() {
|
||||||
|
assert_eq!(
|
||||||
|
chromium_proxy_arg("http://proxy.example:8080"),
|
||||||
|
"http://proxy.example:8080"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user