/ src / net.rs
net.rs
  1  //! Network I/O for the POC.
  2  //!
  3  //! - `fetch_html`: download an HTML document as text
  4  //! - `download_to_dir`: manual download of a URL into a directory
  5  
  6  use crate::util::pick_download_path;
  7  use anyhow::Context;
  8  use reqwest::blocking::Client;
  9  use std::fs;
 10  use std::path::{Path, PathBuf};
 11  use url::Url;
 12  
 13  /// Result of a manual download.
 14  #[derive(Debug, Clone)]
 15  pub struct DownloadResult {
 16      pub path: PathBuf,
 17      pub size_bytes: u64,
 18      pub content_type: Option<String>,
 19  }
 20  
 21  /// Build a reqwest blocking client with a reasonable UA and redirects enabled.
 22  ///
 23  /// Optionally accepts a proxy URL (e.g. socks5h://127.0.0.1:9050).
 24  pub fn build_client(proxy: Option<&str>) -> anyhow::Result<Client> {
 25      let mut builder = Client::builder()
 26          .user_agent("text_lazy_browser/0.1 (manual-media; +https://example.invalid)")
 27          .redirect(reqwest::redirect::Policy::limited(10));
 28  
 29      if let Some(proxy_url) = proxy {
 30          let proxy = reqwest::Proxy::all(proxy_url)
 31              .with_context(|| format!("Invalid proxy URL: {proxy_url}"))?;
 32          builder = builder.proxy(proxy);
 33      }
 34  
 35      Ok(builder.build()?)
 36  }
 37  
 38  /// Fetch a URL as HTML text (best effort).
 39  pub fn fetch_html(client: &Client, url: &Url) -> anyhow::Result<String> {
 40      let resp = client
 41          .get(url.clone())
 42          .header("Accept", "text/html,application/xhtml+xml;q=0.9,*/*;q=0.8")
 43          .send()
 44          .with_context(|| format!("Failed to GET {}", url))?
 45          .error_for_status()
 46          .with_context(|| format!("HTTP error for {}", url))?;
 47  
 48      let text = resp
 49          .text()
 50          .with_context(|| format!("Failed reading body for {}", url))?;
 51      Ok(text)
 52  }
 53  
 54  /// Download a URL into `download_dir`. The filename is derived from the URL (sanitized)
 55  /// and optionally from the Content-Type header.
 56  pub fn download_to_dir(
 57      client: &Client,
 58      url: &Url,
 59      download_dir: &Path,
 60      fallback_stem: &str,
 61  ) -> anyhow::Result<DownloadResult> {
 62      let resp = client
 63          .get(url.clone())
 64          .header("Accept", "image/*,*/*;q=0.8")
 65          .send()
 66          .with_context(|| format!("Failed to GET {}", url))?
 67          .error_for_status()
 68          .with_context(|| format!("HTTP error for {}", url))?;
 69  
 70      let content_type = resp
 71          .headers()
 72          .get(reqwest::header::CONTENT_TYPE)
 73          .and_then(|v| v.to_str().ok())
 74          .map(|s| s.to_string());
 75  
 76      let bytes = resp.bytes().context("Failed reading response bytes")?;
 77      let size_bytes = bytes.len() as u64;
 78  
 79      fs::create_dir_all(download_dir).with_context(|| {
 80          format!(
 81              "Failed creating download directory {}",
 82              download_dir.display()
 83          )
 84      })?;
 85  
 86      let path = pick_download_path(download_dir, url, content_type.as_deref(), fallback_stem);
 87      fs::write(&path, &bytes).with_context(|| format!("Failed writing {}", path.display()))?;
 88  
 89      Ok(DownloadResult {
 90          path,
 91          size_bytes,
 92          content_type,
 93      })
 94  }
 95  
 96  #[cfg(test)]
 97  mod tests {
 98      use super::*;
 99  
100      #[test]
101      fn build_client_rejects_bad_proxy() {
102          let err = build_client(Some("not a url")).unwrap_err();
103          assert!(err.to_string().contains("Invalid proxy URL"));
104      }
105  }