search.rs
1 //! SearXNG search integration (JSON API). 2 //! 3 //! This module is intentionally small and side-effect free (aside from the HTTP request). 4 //! It’s meant to be called explicitly by the user (e.g. “Search” screen in the TUI), 5 //! consistent with the app’s “manual network actions” philosophy. 6 //! 7 //! Requires enabling JSON output on the SearXNG instance. Some public instances disable it. 8 //! 9 //! API shape used: 10 //! GET {searx_base}/search?q=...&format=json&pageno=1 11 //! 12 //! Docs: https://docs.searxng.org/dev/search_api.html 13 14 use anyhow::{anyhow, Context}; 15 use reqwest::blocking::Client; 16 use serde::Deserialize; 17 use url::Url; 18 19 /// A single search result. 20 #[derive(Debug, Clone, PartialEq, Eq)] 21 pub struct SearchHit { 22 /// 1-based index (for user-friendly display). 23 pub index: usize, 24 pub title: String, 25 pub url: Url, 26 pub content: String, 27 pub engine: Option<String>, 28 } 29 30 /// Parameters for a SearXNG query. 31 #[derive(Debug, Clone)] 32 pub struct SearchParams<'a> { 33 pub query: &'a str, 34 pub page: usize, 35 /// Optional language code (e.g. "en", "de"). If None, SearXNG defaults apply. 36 pub language: Option<&'a str>, 37 /// Optional safesearch level, typically 0..=2 depending on instance. 38 pub safesearch: Option<u8>, 39 } 40 41 impl<'a> SearchParams<'a> { 42 pub fn new(query: &'a str) -> Self { 43 Self { 44 query, 45 page: 1, 46 language: None, 47 safesearch: None, 48 } 49 } 50 } 51 52 /// Perform a search against a SearXNG instance using JSON output. 53 /// 54 /// `searx_base` examples: 55 /// - `http://localhost:8080` 56 /// - `https://search.example.com/` 57 /// - `https://example.com/searxng/` 58 /// 59 /// Returns results with validated URLs. 60 pub fn search( 61 client: &Client, 62 searx_base: &Url, 63 params: SearchParams<'_>, 64 ) -> anyhow::Result<Vec<SearchHit>> { 65 let url = build_search_url(searx_base, ¶ms)?; 66 let resp = client 67 .get(url.clone()) 68 .header("Accept", "application/json") 69 .send() 70 .with_context(|| format!("Failed to GET {}", url))?; 71 72 // Handle common “JSON disabled” behavior: HTTP 403. 73 let status = resp.status(); 74 if status == reqwest::StatusCode::FORBIDDEN { 75 let body = resp.text().unwrap_or_default(); 76 return Err(anyhow!( 77 "SearXNG returned 403 Forbidden. This often means JSON output is disabled on that instance.\n\ 78 Enable it by adding `json` to `search.formats` in SearXNG settings.yml, then restart.\n\ 79 Response body (truncated): {}", 80 truncate(&body, 240) 81 )); 82 } 83 if status == reqwest::StatusCode::TOO_MANY_REQUESTS { 84 return Err(anyhow!("SearXNG rate-limited the request (429 Too Many Requests). Try again later or self-host.")); 85 } 86 87 let resp = resp 88 .error_for_status() 89 .with_context(|| format!("HTTP error for {}", url))?; 90 91 let body = resp.text().context("Failed reading JSON response body")?; 92 parse_search_json(&body).context("Failed to parse SearXNG JSON response") 93 } 94 95 /// Build the full URL for a SearXNG JSON search request. 96 pub fn build_search_url(searx_base: &Url, params: &SearchParams<'_>) -> anyhow::Result<Url> { 97 let mut url = searx_base 98 .join("search") 99 .with_context(|| format!("Invalid SearXNG base URL: {}", searx_base))?; 100 101 { 102 let mut qp = url.query_pairs_mut(); 103 qp.append_pair("q", params.query); 104 qp.append_pair("format", "json"); 105 qp.append_pair("pageno", ¶ms.page.max(1).to_string()); 106 if let Some(lang) = params.language { 107 qp.append_pair("language", lang); 108 } 109 if let Some(safe) = params.safesearch { 110 qp.append_pair("safesearch", &safe.to_string()); 111 } 112 } 113 114 Ok(url) 115 } 116 117 /* ------------------------------- parsing ------------------------------- */ 118 119 #[derive(Debug, Deserialize)] 120 struct SearxResponse { 121 #[serde(default)] 122 results: Vec<SearxResult>, 123 } 124 125 #[derive(Debug, Deserialize)] 126 struct SearxResult { 127 title: Option<String>, 128 url: Option<String>, 129 content: Option<String>, 130 engine: Option<String>, 131 } 132 133 /// Parse JSON response into hits (pure function, easy to test). 134 pub fn parse_search_json(json: &str) -> anyhow::Result<Vec<SearchHit>> { 135 let parsed: SearxResponse = serde_json::from_str(json).context("invalid JSON")?; 136 137 let mut hits = Vec::new(); 138 for r in parsed.results { 139 let title = r.title.unwrap_or_default().trim().to_string(); 140 let url_str = match r.url { 141 Some(u) if !u.trim().is_empty() => u, 142 _ => continue, 143 }; 144 let url = match Url::parse(url_str.trim()) { 145 Ok(u) => u, 146 Err(_) => continue, 147 }; 148 let content = r.content.unwrap_or_default().trim().to_string(); 149 let engine = r.engine.and_then(|e| { 150 let t = e.trim().to_string(); 151 if t.is_empty() { 152 None 153 } else { 154 Some(t) 155 } 156 }); 157 158 let index = hits.len() + 1; 159 hits.push(SearchHit { 160 index, 161 title, 162 url, 163 content, 164 engine, 165 }); 166 } 167 168 Ok(hits) 169 } 170 171 fn truncate(s: &str, max: usize) -> String { 172 if s.len() <= max { 173 return s.to_string(); 174 } 175 let mut out = s[..max].to_string(); 176 out.push('…'); 177 out 178 } 179 180 #[cfg(test)] 181 mod tests { 182 use super::*; 183 use pretty_assertions::assert_eq; 184 185 #[test] 186 fn build_search_url_basic() { 187 let base = Url::parse("http://localhost:8080").unwrap(); 188 let params = SearchParams::new("hello world"); 189 let url = build_search_url(&base, ¶ms).unwrap(); 190 assert!(url.as_str().starts_with("http://localhost:8080/search?")); 191 assert!(url.as_str().contains("q=hello+world")); 192 assert!(url.as_str().contains("format=json")); 193 assert!(url.as_str().contains("pageno=1")); 194 } 195 196 #[test] 197 fn build_search_url_with_subpath() { 198 let base = Url::parse("https://example.com/searxng/").unwrap(); 199 let mut params = SearchParams::new("rust"); 200 params.page = 2; 201 params.language = Some("en"); 202 params.safesearch = Some(1); 203 let url = build_search_url(&base, ¶ms).unwrap(); 204 assert!(url 205 .as_str() 206 .starts_with("https://example.com/searxng/search?")); 207 assert!(url.as_str().contains("q=rust")); 208 assert!(url.as_str().contains("pageno=2")); 209 assert!(url.as_str().contains("language=en")); 210 assert!(url.as_str().contains("safesearch=1")); 211 } 212 213 #[test] 214 fn parse_search_json_extracts_hits() { 215 let sample = r#" 216 { 217 "query": "test", 218 "results": [ 219 {"title": "Example", "url": "https://example.org", "content": "snippet", "engine": "dummy"}, 220 {"title": "No URL", "content": "skip"}, 221 {"title": "Bad URL", "url": "not a url", "content": "skip"} 222 ] 223 }"#; 224 225 let hits = parse_search_json(sample).unwrap(); 226 assert_eq!(hits.len(), 1); 227 assert_eq!(hits[0].index, 1); 228 assert_eq!(hits[0].title, "Example"); 229 assert_eq!(hits[0].url.as_str(), "https://example.org/"); 230 assert_eq!(hits[0].content, "snippet"); 231 assert_eq!(hits[0].engine.as_deref(), Some("dummy")); 232 } 233 }