net.rs
1 //! Network I/O for the POC. 2 //! 3 //! - `fetch_html`: download an HTML document as text 4 //! - `download_to_dir`: manual download of a URL into a directory 5 6 use crate::util::pick_download_path; 7 use anyhow::Context; 8 use reqwest::blocking::Client; 9 use std::fs; 10 use std::path::{Path, PathBuf}; 11 use url::Url; 12 13 /// Result of a manual download. 14 #[derive(Debug, Clone)] 15 pub struct DownloadResult { 16 pub path: PathBuf, 17 pub size_bytes: u64, 18 pub content_type: Option<String>, 19 } 20 21 /// Build a reqwest blocking client with a reasonable UA and redirects enabled. 22 /// 23 /// Optionally accepts a proxy URL (e.g. socks5h://127.0.0.1:9050). 24 pub fn build_client(proxy: Option<&str>) -> anyhow::Result<Client> { 25 let mut builder = Client::builder() 26 .user_agent("text_lazy_browser/0.1 (manual-media; +https://example.invalid)") 27 .redirect(reqwest::redirect::Policy::limited(10)); 28 29 if let Some(proxy_url) = proxy { 30 let proxy = reqwest::Proxy::all(proxy_url) 31 .with_context(|| format!("Invalid proxy URL: {proxy_url}"))?; 32 builder = builder.proxy(proxy); 33 } 34 35 Ok(builder.build()?) 36 } 37 38 /// Fetch a URL as HTML text (best effort). 39 pub fn fetch_html(client: &Client, url: &Url) -> anyhow::Result<String> { 40 let resp = client 41 .get(url.clone()) 42 .header("Accept", "text/html,application/xhtml+xml;q=0.9,*/*;q=0.8") 43 .send() 44 .with_context(|| format!("Failed to GET {}", url))? 45 .error_for_status() 46 .with_context(|| format!("HTTP error for {}", url))?; 47 48 let text = resp 49 .text() 50 .with_context(|| format!("Failed reading body for {}", url))?; 51 Ok(text) 52 } 53 54 /// Download a URL into `download_dir`. The filename is derived from the URL (sanitized) 55 /// and optionally from the Content-Type header. 56 pub fn download_to_dir( 57 client: &Client, 58 url: &Url, 59 download_dir: &Path, 60 fallback_stem: &str, 61 ) -> anyhow::Result<DownloadResult> { 62 let resp = client 63 .get(url.clone()) 64 .header("Accept", "image/*,*/*;q=0.8") 65 .send() 66 .with_context(|| format!("Failed to GET {}", url))? 67 .error_for_status() 68 .with_context(|| format!("HTTP error for {}", url))?; 69 70 let content_type = resp 71 .headers() 72 .get(reqwest::header::CONTENT_TYPE) 73 .and_then(|v| v.to_str().ok()) 74 .map(|s| s.to_string()); 75 76 let bytes = resp.bytes().context("Failed reading response bytes")?; 77 let size_bytes = bytes.len() as u64; 78 79 fs::create_dir_all(download_dir).with_context(|| { 80 format!( 81 "Failed creating download directory {}", 82 download_dir.display() 83 ) 84 })?; 85 86 let path = pick_download_path(download_dir, url, content_type.as_deref(), fallback_stem); 87 fs::write(&path, &bytes).with_context(|| format!("Failed writing {}", path.display()))?; 88 89 Ok(DownloadResult { 90 path, 91 size_bytes, 92 content_type, 93 }) 94 } 95 96 #[cfg(test)] 97 mod tests { 98 use super::*; 99 100 #[test] 101 fn build_client_rejects_bad_proxy() { 102 let err = build_client(Some("not a url")).unwrap_err(); 103 assert!(err.to_string().contains("Invalid proxy URL")); 104 } 105 }