//! `HttpServiceImpl` — reqwest-backed outbound HTTP for the v1.1.4 //! `http::*` SDK. //! //! Mirrors the v1.1.1+ stateful-service shape (`KvServiceImpl`): //! script-as-gate authz (`AppHttpRequest`, skipped when //! `cx.principal` is `None`), with the backend talking to the network //! instead of Postgres. The reqwest client is built once at startup //! with the [`crate::ssrf::SsrfResolver`] wired in via //! `dns_resolver`, so the SSRF deny-list applies at every connection — //! including each redirect hop, since redirects are followed manually //! through the same client. //! //! Layering vs the raw client: //! 1. URL validation: scheme must be http/https; ports 22/25/465/587 //! are blocked. (IP-level filtering is the resolver's job.) //! 2. Body-size caps on both request and response (stream-with-cap on //! the response, checking `Content-Length` first). //! 3. Total-request timeout (default 30s, max 60s) on top of the //! client's 10s connect timeout. //! 4. Default `User-Agent` unless the caller set one. //! //! Bodies/headers are never logged (PII): only url + status + duration //! at debug level. use std::collections::BTreeMap; use std::env; use std::sync::Arc; use std::time::Duration; use async_trait::async_trait; use picloud_shared::{HttpError, HttpRequest, HttpResponse, HttpService, SdkCallCx}; use reqwest::header::{HeaderMap, HeaderName, HeaderValue, CONTENT_TYPE, LOCATION, USER_AGENT}; use reqwest::{Client, Method, StatusCode}; use crate::authz::{self, AuthzRepo, Capability}; use crate::ssrf::{self, SsrfPolicy, SSRF_BLOCK_PREFIX}; /// Default per-request timeout (ms) when the script omits `timeout_ms`. pub const DEFAULT_TIMEOUT_MS: u32 = 30_000; /// Hard ceiling on the per-request timeout. Values above this are /// rejected by the bridge (not silently clamped). pub const MAX_TIMEOUT_MS: u32 = 60_000; /// Default redirect cap. pub const DEFAULT_MAX_REDIRECTS: u32 = 5; /// Hard ceiling on redirects. pub const MAX_REDIRECTS_CEILING: u32 = 10; /// 10 MB default body cap on both directions. const DEFAULT_BODY_LIMIT_BYTES: usize = 10 * 1024 * 1024; /// DNS + connect + TLS hard cap. const CONNECT_TIMEOUT: Duration = Duration::from_secs(10); /// Outbound-HTTP tunables. Env-overridable following the same pattern /// as `TriggerConfig::from_env`. #[derive(Debug, Clone, Copy)] pub struct HttpConfig { /// Disables the SSRF deny-list entirely. Dev/test only — the binary /// logs a startup warning when this is set. pub allow_private: bool, pub max_request_body_bytes: usize, pub max_response_body_bytes: usize, } impl HttpConfig { #[must_use] pub const fn conservative() -> Self { Self { allow_private: false, max_request_body_bytes: DEFAULT_BODY_LIMIT_BYTES, max_response_body_bytes: DEFAULT_BODY_LIMIT_BYTES, } } #[must_use] pub fn from_env() -> Self { let mut c = Self::conservative(); if let Ok(v) = env::var("PICLOUD_HTTP_ALLOW_PRIVATE") { c.allow_private = matches!(v.trim().to_ascii_lowercase().as_str(), "1" | "true" | "yes"); } load_usize( &mut c.max_request_body_bytes, "PICLOUD_HTTP_MAX_REQUEST_BODY_BYTES", ); load_usize( &mut c.max_response_body_bytes, "PICLOUD_HTTP_MAX_RESPONSE_BODY_BYTES", ); c } } impl Default for HttpConfig { fn default() -> Self { Self::conservative() } } fn load_usize(dst: &mut usize, key: &str) { if let Ok(v) = env::var(key) { match v.parse::() { Ok(n) => *dst = n, Err(e) => { tracing::warn!(env = key, error = %e, "ignoring invalid http-config value"); } } } } pub struct HttpServiceImpl { client: Client, authz: Arc, config: HttpConfig, /// Same policy wired into the DNS resolver. Held here too because /// reqwest only routes *hostnames* through the custom resolver — a /// URL with a **literal IP** host bypasses it, so literal IPs are /// checked directly at URL-validation time. policy: SsrfPolicy, } impl HttpServiceImpl { /// Build the service, constructing the reqwest client with the SSRF /// resolver. Redirects are followed manually (so per-request limits /// are honored and every hop re-resolves through the SSRF /// resolver), hence `redirect(Policy::none())`. /// /// # Panics /// /// Panics if the reqwest client fails to build — this is a /// startup-time invariant, not a runtime path. #[must_use] pub fn new(config: HttpConfig, authz: Arc) -> Self { let policy = SsrfPolicy::new(config.allow_private); let client = Client::builder() .dns_resolver(ssrf::resolver(policy)) .connect_timeout(CONNECT_TIMEOUT) .redirect(reqwest::redirect::Policy::none()) .build() .expect("build outbound http client"); Self { client, authz, config, policy, } } async fn check_request(&self, cx: &SdkCallCx) -> Result<(), HttpError> { if let Some(ref principal) = cx.principal { authz::require( &*self.authz, principal, Capability::AppHttpRequest(cx.app_id), ) .await .map_err(|_| HttpError::Forbidden)?; } Ok(()) } } #[async_trait] impl HttpService for HttpServiceImpl { async fn request(&self, cx: &SdkCallCx, req: HttpRequest) -> Result { self.check_request(cx).await?; // Request body cap. if let Some(ref body) = req.body { if body.len() > self.config.max_request_body_bytes { return Err(HttpError::BodyTooLarge("request")); } } let timeout = Duration::from_millis(u64::from(req.timeout_ms.min(MAX_TIMEOUT_MS))); let started = std::time::Instant::now(); let url_for_log = req.url.clone(); // Whole-request budget (DNS + connect + TLS + all redirect hops // + body read). Connect alone is further bounded by the // client's CONNECT_TIMEOUT. let outcome = match tokio::time::timeout(timeout, self.run(req)).await { Ok(r) => r, Err(_) => Err(HttpError::Timeout), }; let duration_ms = u64::try_from(started.elapsed().as_millis()).unwrap_or(u64::MAX); match &outcome { Ok(resp) => tracing::debug!( url = %url_for_log, status = resp.status, duration_ms, "outbound http" ), Err(err) => tracing::debug!( url = %url_for_log, error = %err, duration_ms, "outbound http failed" ), } outcome } } impl HttpServiceImpl { /// Core request path: validate, build headers, follow redirects /// manually, read the response body with a cap. async fn run(&self, req: HttpRequest) -> Result { let method = Method::from_bytes(req.method.as_bytes()) .map_err(|_| HttpError::Backend(format!("invalid method: {}", req.method)))?; let mut current = url::Url::parse(&req.url) .map_err(|e| HttpError::InvalidUrl(format!("{}: {e}", req.url)))?; validate_url(¤t, self.policy)?; let mut header_map = build_headers(&req, ¤t)?; let mut method = method; let mut body = req.body.clone(); let mut redirects: u32 = 0; let max_redirects = req.max_redirects.min(MAX_REDIRECTS_CEILING); loop { // Re-validate scheme/port (and literal-IP SSRF) on each hop. // Hostname IP filtering is the resolver's job and runs // automatically at connect time. validate_url(¤t, self.policy)?; let mut rb = self.client.request(method.clone(), current.clone()); rb = rb.headers(header_map.clone()); if let Some(ref b) = body { rb = rb.body(b.clone()); } let resp = rb.send().await.map_err(map_reqwest_err)?; let status = resp.status(); if req.follow_redirects && is_redirect(status) { if let Some(loc) = resp.headers().get(LOCATION) { if redirects >= max_redirects { return Err(HttpError::Backend(format!( "too many redirects (max {max_redirects})" ))); } redirects += 1; let loc_str = loc.to_str().map_err(|_| { HttpError::Backend("redirect Location not valid UTF-8".into()) })?; current = current .join(loc_str) .map_err(|e| HttpError::InvalidUrl(format!("redirect target: {e}")))?; // 303 always → GET; 301/302 historically downgrade // POST→GET (matches browsers). 307/308 preserve. if matches!(status.as_u16(), 301..=303) { method = Method::GET; body = None; header_map.remove(CONTENT_TYPE); } continue; } } return self.read_capped(resp).await; } } async fn read_capped(&self, resp: reqwest::Response) -> Result { let status = resp.status().as_u16(); let mut headers = BTreeMap::new(); for (name, value) in resp.headers() { // Header names lowercased per the documented response shape. headers.insert( name.as_str().to_ascii_lowercase(), value.to_str().unwrap_or("").to_string(), ); } let cap = self.config.max_response_body_bytes; if let Some(len) = resp.content_length() { if len > cap as u64 { return Err(HttpError::BodyTooLarge("response")); } } let mut buf: Vec = Vec::new(); let mut resp = resp; while let Some(chunk) = resp.chunk().await.map_err(map_reqwest_err)? { if buf.len() + chunk.len() > cap { return Err(HttpError::BodyTooLarge("response")); } buf.extend_from_slice(&chunk); } let body_raw = String::from_utf8_lossy(&buf).into_owned(); Ok(HttpResponse { status, headers, body_raw, }) } } /// http/https only; block the SSH + SMTP ports; apply the SSRF policy /// to **literal-IP** hosts (hostnames are filtered by the DNS resolver /// at connect time, but literal IPs never reach the resolver). fn validate_url(url: &url::Url, policy: SsrfPolicy) -> Result<(), HttpError> { match url.scheme() { "http" | "https" => {} other => return Err(HttpError::BlockedScheme(other.to_string())), } match url.host() { None => return Err(HttpError::InvalidUrl("missing host".into())), Some(url::Host::Ipv4(ip)) => { policy .check(std::net::IpAddr::V4(ip)) .map_err(|reason| HttpError::Ssrf(reason.to_string()))?; } Some(url::Host::Ipv6(ip)) => { policy .check(std::net::IpAddr::V6(ip)) .map_err(|reason| HttpError::Ssrf(reason.to_string()))?; } Some(url::Host::Domain(_)) => {} } let port = url .port_or_known_default() .unwrap_or(if url.scheme() == "https" { 443 } else { 80 }); if matches!(port, 22 | 25 | 465 | 587) { return Err(HttpError::BlockedPort(port)); } Ok(()) } /// Build the request header map: merge caller headers, then apply the /// default `User-Agent` (unless overridden) and the bridge-chosen /// `Content-Type` (unless overridden). fn build_headers(req: &HttpRequest, _url: &url::Url) -> Result { let mut map = HeaderMap::new(); let mut has_user_agent = false; let mut has_content_type = false; for (k, v) in &req.headers { let name = HeaderName::from_bytes(k.as_bytes()) .map_err(|_| HttpError::Backend(format!("invalid header name: {k}")))?; let value = HeaderValue::from_str(v) .map_err(|_| HttpError::Backend(format!("invalid header value for {k}")))?; if name == USER_AGENT { has_user_agent = true; } if name == CONTENT_TYPE { has_content_type = true; } map.append(name, value); } if !has_user_agent { let script = req.script_id.as_deref().unwrap_or("unknown"); let ua = format!( "picloud/{} (script:{})", picloud_shared::PRODUCT_VERSION, script ); if let Ok(value) = HeaderValue::from_str(&ua) { map.insert(USER_AGENT, value); } } if !has_content_type { if let Some(ref ct) = req.content_type { if let Ok(value) = HeaderValue::from_str(ct) { map.insert(CONTENT_TYPE, value); } } } Ok(map) } const fn is_redirect(status: StatusCode) -> bool { matches!(status.as_u16(), 301..=303 | 307 | 308) } /// Map a reqwest error to an `HttpError`, never leaking the resolved /// IP. SSRF blocks are detected by scanning the error source chain for /// the resolver's marker prefix. fn map_reqwest_err(err: reqwest::Error) -> HttpError { if let Some(reason) = ssrf_reason(&err) { return HttpError::Ssrf(reason); } if err.is_timeout() { return HttpError::Timeout; } if err.is_connect() { return HttpError::Network("connection failed".into()); } if err.is_request() { return HttpError::Network("request failed".into()); } HttpError::Network("network error".into()) } /// Walk the error source chain looking for the SSRF marker the resolver /// embeds. Returns the category reason (no IP) when found. fn ssrf_reason(err: &reqwest::Error) -> Option { let mut src: Option<&(dyn std::error::Error + 'static)> = Some(err); while let Some(e) = src { let s = e.to_string(); if let Some(idx) = s.find(SSRF_BLOCK_PREFIX) { return Some(s[idx + SSRF_BLOCK_PREFIX.len()..].to_string()); } src = e.source(); } None } #[cfg(test)] mod tests { use super::*; use crate::authz::AuthzError; use async_trait::async_trait; use picloud_shared::{ AdminUserId, AppId, AppRole, ExecutionId, InstanceRole, Principal, RequestId, ScriptId, UserId, }; use std::collections::BTreeMap; use std::io::Write as _; use std::net::SocketAddr; use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::net::TcpListener; struct AllowAuthz; #[async_trait] impl AuthzRepo for AllowAuthz { async fn membership(&self, _u: UserId, _a: AppId) -> Result, AuthzError> { Ok(Some(AppRole::Editor)) } } struct DenyAuthz; #[async_trait] impl AuthzRepo for DenyAuthz { async fn membership(&self, _u: UserId, _a: AppId) -> Result, AuthzError> { Ok(None) } } fn dev_service(authz: Arc) -> HttpServiceImpl { // allow_private so the test TcpListener on 127.0.0.1 is reachable. let mut config = HttpConfig::conservative(); config.allow_private = true; HttpServiceImpl::new(config, authz) } fn anon_cx() -> SdkCallCx { SdkCallCx { app_id: AppId::new(), script_id: ScriptId::new(), principal: None, execution_id: ExecutionId::new(), request_id: RequestId::new(), trigger_depth: 0, root_execution_id: ExecutionId::new(), is_dead_letter_handler: false, event: None, } } fn member_cx() -> SdkCallCx { let mut cx = anon_cx(); cx.principal = Some(Principal { user_id: AdminUserId::new(), instance_role: InstanceRole::Member, scopes: None, app_binding: None, }); cx } fn req(method: &str, url: String) -> HttpRequest { HttpRequest { method: method.into(), url, headers: BTreeMap::new(), body: None, content_type: None, timeout_ms: 5000, follow_redirects: true, max_redirects: 5, script_id: Some("test-script".into()), } } /// Minimal single-shot HTTP/1.1 server. Reads the request, runs /// `handler` to produce the raw response bytes, writes them, closes. /// Returns the bound address. async fn spawn_server(handler: F) -> SocketAddr where F: Fn(String) -> Vec + Send + Sync + 'static, { let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let addr = listener.local_addr().unwrap(); tokio::spawn(async move { loop { let Ok((mut sock, _)) = listener.accept().await else { break; }; let mut buf = vec![0u8; 65536]; let n = sock.read(&mut buf).await.unwrap_or(0); let request = String::from_utf8_lossy(&buf[..n]).to_string(); let response = handler(request); let _ = sock.write_all(&response).await; let _ = sock.flush().await; } }); addr } fn ok_response(body: &str, content_type: &str) -> Vec { let mut v = Vec::new(); write!( v, "HTTP/1.1 200 OK\r\nContent-Type: {content_type}\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{body}", body.len() ) .unwrap(); v } #[tokio::test] async fn get_round_trip() { let addr = spawn_server(|_req| ok_response("hello", "text/plain")).await; let svc = dev_service(Arc::new(AllowAuthz)); let resp = svc .request(&anon_cx(), req("GET", format!("http://{addr}/"))) .await .unwrap(); assert_eq!(resp.status, 200); assert_eq!(resp.body_raw, "hello"); assert_eq!( resp.headers.get("content-type").map(String::as_str), Some("text/plain") ); } #[tokio::test] async fn post_sends_body_and_default_user_agent() { let addr = spawn_server(|request| { // Echo back whether the body + default UA were present. let has_ua = request.to_lowercase().contains("user-agent: picloud/"); let has_body = request.contains("xyzzy"); ok_response(&format!("ua={has_ua},body={has_body}"), "text/plain") }) .await; let svc = dev_service(Arc::new(AllowAuthz)); let mut r = req("POST", format!("http://{addr}/")); r.body = Some(b"xyzzy".to_vec()); r.content_type = Some("text/plain".into()); let resp = svc.request(&anon_cx(), r).await.unwrap(); assert_eq!(resp.body_raw, "ua=true,body=true"); } #[tokio::test] async fn custom_user_agent_overrides_default() { let addr = spawn_server(|request| { let has_custom = request.to_lowercase().contains("user-agent: my-agent"); let has_default = request.to_lowercase().contains("picloud/"); ok_response( &format!("custom={has_custom},default={has_default}"), "text/plain", ) }) .await; let svc = dev_service(Arc::new(AllowAuthz)); let mut r = req("GET", format!("http://{addr}/")); r.headers.insert("User-Agent".into(), "my-agent".into()); let resp = svc.request(&anon_cx(), r).await.unwrap(); assert_eq!(resp.body_raw, "custom=true,default=false"); } #[tokio::test] async fn empty_body_response() { let addr = spawn_server(|_r| { b"HTTP/1.1 204 No Content\r\nContent-Length: 0\r\nConnection: close\r\n\r\n".to_vec() }) .await; let svc = dev_service(Arc::new(AllowAuthz)); let resp = svc .request(&anon_cx(), req("GET", format!("http://{addr}/"))) .await .unwrap(); assert_eq!(resp.status, 204); assert_eq!(resp.body_raw, ""); } #[tokio::test] async fn non_2xx_does_not_error() { let addr = spawn_server(|_r| { b"HTTP/1.1 500 Internal Server Error\r\nContent-Length: 3\r\nConnection: close\r\n\r\nerr".to_vec() }) .await; let svc = dev_service(Arc::new(AllowAuthz)); let resp = svc .request(&anon_cx(), req("GET", format!("http://{addr}/"))) .await .unwrap(); assert_eq!(resp.status, 500); assert_eq!(resp.body_raw, "err"); } #[tokio::test] async fn response_over_content_length_cap_rejected() { let addr = spawn_server(|_r| ok_response("0123456789", "text/plain")).await; let mut config = HttpConfig::conservative(); config.allow_private = true; config.max_response_body_bytes = 5; // body is 10 bytes let svc = HttpServiceImpl::new(config, Arc::new(AllowAuthz)); let err = svc .request(&anon_cx(), req("GET", format!("http://{addr}/"))) .await .unwrap_err(); assert!(matches!(err, HttpError::BodyTooLarge("response"))); } #[tokio::test] async fn response_over_cap_without_content_length_rejected_mid_stream() { // No Content-Length header → must be caught while streaming. let addr = spawn_server(|_r| { b"HTTP/1.1 200 OK\r\nConnection: close\r\n\r\n0123456789ABCDEF".to_vec() }) .await; let mut config = HttpConfig::conservative(); config.allow_private = true; config.max_response_body_bytes = 4; let svc = HttpServiceImpl::new(config, Arc::new(AllowAuthz)); let err = svc .request(&anon_cx(), req("GET", format!("http://{addr}/"))) .await .unwrap_err(); assert!(matches!(err, HttpError::BodyTooLarge("response"))); } #[tokio::test] async fn request_body_over_cap_rejected_before_send() { let mut config = HttpConfig::conservative(); config.allow_private = true; config.max_request_body_bytes = 3; let svc = HttpServiceImpl::new(config, Arc::new(AllowAuthz)); let mut r = req("POST", "http://127.0.0.1:1/".into()); r.body = Some(b"too long".to_vec()); let err = svc.request(&anon_cx(), r).await.unwrap_err(); assert!(matches!(err, HttpError::BodyTooLarge("request"))); } #[tokio::test] async fn redirect_followed_up_to_then_throws_beyond_max() { // Server always 302s to itself → unbounded redirect loop, // bounded by max_redirects. let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let addr = listener.local_addr().unwrap(); tokio::spawn(async move { loop { let Ok((mut sock, _)) = listener.accept().await else { break; }; let mut buf = vec![0u8; 4096]; let _ = sock.read(&mut buf).await; let body = format!( "HTTP/1.1 302 Found\r\nLocation: http://{addr}/next\r\nContent-Length: 0\r\nConnection: close\r\n\r\n" ); let _ = sock.write_all(body.as_bytes()).await; } }); let svc = dev_service(Arc::new(AllowAuthz)); let mut r = req("GET", format!("http://{addr}/")); r.max_redirects = 2; let err = svc.request(&anon_cx(), r).await.unwrap_err(); assert!( matches!(err, HttpError::Backend(ref m) if m.contains("too many redirects")), "expected too-many-redirects, got {err:?}" ); } #[tokio::test] async fn scheme_rejected() { let svc = dev_service(Arc::new(AllowAuthz)); for url in ["file:///etc/passwd", "ftp://host/x", "gopher://host/"] { let err = svc .request(&anon_cx(), req("GET", url.into())) .await .unwrap_err(); match err { HttpError::BlockedScheme(s) => { assert!(url.starts_with(&s), "scheme {s} not in url {url}"); } other => panic!("expected BlockedScheme for {url}, got {other:?}"), } } } #[tokio::test] async fn ports_rejected() { let svc = dev_service(Arc::new(AllowAuthz)); for port in [22u16, 25, 465, 587] { let err = svc .request( &anon_cx(), req("GET", format!("http://example.com:{port}/")), ) .await .unwrap_err(); assert!( matches!(err, HttpError::BlockedPort(p) if p == port), "port {port} should be blocked, got {err:?}" ); } } #[tokio::test] async fn ssrf_blocks_loopback_without_allow_private() { // Default config (deny-list ON). A request to a loopback host // must surface as Ssrf, not a generic network error. let svc = HttpServiceImpl::new(HttpConfig::conservative(), Arc::new(AllowAuthz)); let err = svc .request(&anon_cx(), req("GET", "http://127.0.0.1:9/".into())) .await .unwrap_err(); match err { HttpError::Ssrf(reason) => { assert_eq!(reason, "loopback"); assert!(!reason.contains("127.0.0.1"), "reason must not leak the IP"); } other => panic!("expected Ssrf, got {other:?}"), } } #[tokio::test] async fn ssrf_blocks_hostname_resolving_to_loopback() { // `localhost` resolves to 127.0.0.1 / ::1 — all denied. This // exercises the DNS-resolver path (vs the literal-IP path) and // must surface as Ssrf, not a generic DNS error. let svc = HttpServiceImpl::new(HttpConfig::conservative(), Arc::new(AllowAuthz)); let err = svc .request(&anon_cx(), req("GET", "http://localhost:9/".into())) .await .unwrap_err(); assert!( matches!(err, HttpError::Ssrf(_)), "expected Ssrf for localhost, got {err:?}" ); } #[tokio::test] async fn timeout_throws() { // Server that accepts then never responds. let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let addr = listener.local_addr().unwrap(); tokio::spawn(async move { if let Ok((sock, _)) = listener.accept().await { // Hold the socket open without replying. tokio::time::sleep(Duration::from_secs(30)).await; drop(sock); } }); let svc = dev_service(Arc::new(AllowAuthz)); let mut r = req("GET", format!("http://{addr}/")); r.timeout_ms = 300; let err = svc.request(&anon_cx(), r).await.unwrap_err(); assert!(matches!(err, HttpError::Timeout), "got {err:?}"); } #[tokio::test] async fn anon_skips_authz_member_without_scope_forbidden() { let addr = spawn_server(|_r| ok_response("ok", "text/plain")).await; // Anonymous principal → authz skipped even with DenyAuthz. let svc = dev_service(Arc::new(DenyAuthz)); let ok = svc .request(&anon_cx(), req("GET", format!("http://{addr}/"))) .await; assert!(ok.is_ok()); // Authenticated member with no role → Forbidden. let err = svc .request(&member_cx(), req("GET", format!("http://{addr}/"))) .await .unwrap_err(); assert!(matches!(err, HttpError::Forbidden)); } #[tokio::test] async fn member_with_role_allowed() { let addr = spawn_server(|_r| ok_response("ok", "text/plain")).await; let svc = dev_service(Arc::new(AllowAuthz)); let resp = svc .request(&member_cx(), req("GET", format!("http://{addr}/"))) .await .unwrap(); assert_eq!(resp.status, 200); } }