/ src / search.rs
search.rs
  1  //! SearXNG search integration (JSON API).
  2  //!
  3  //! This module is intentionally small and side-effect free (aside from the HTTP request).
  4  //! It’s meant to be called explicitly by the user (e.g. “Search” screen in the TUI),
  5  //! consistent with the app’s “manual network actions” philosophy.
  6  //!
  7  //! Requires enabling JSON output on the SearXNG instance. Some public instances disable it.
  8  //!
  9  //! API shape used:
 10  //!   GET {searx_base}/search?q=...&format=json&pageno=1
 11  //!
 12  //! Docs: https://docs.searxng.org/dev/search_api.html
 13  
 14  use anyhow::{anyhow, Context};
 15  use reqwest::blocking::Client;
 16  use serde::Deserialize;
 17  use url::Url;
 18  
 19  /// A single search result.
 20  #[derive(Debug, Clone, PartialEq, Eq)]
 21  pub struct SearchHit {
 22      /// 1-based index (for user-friendly display).
 23      pub index: usize,
 24      pub title: String,
 25      pub url: Url,
 26      pub content: String,
 27      pub engine: Option<String>,
 28  }
 29  
 30  /// Parameters for a SearXNG query.
 31  #[derive(Debug, Clone)]
 32  pub struct SearchParams<'a> {
 33      pub query: &'a str,
 34      pub page: usize,
 35      /// Optional language code (e.g. "en", "de"). If None, SearXNG defaults apply.
 36      pub language: Option<&'a str>,
 37      /// Optional safesearch level, typically 0..=2 depending on instance.
 38      pub safesearch: Option<u8>,
 39  }
 40  
 41  impl<'a> SearchParams<'a> {
 42      pub fn new(query: &'a str) -> Self {
 43          Self {
 44              query,
 45              page: 1,
 46              language: None,
 47              safesearch: None,
 48          }
 49      }
 50  }
 51  
 52  /// Perform a search against a SearXNG instance using JSON output.
 53  ///
 54  /// `searx_base` examples:
 55  /// - `http://localhost:8080`
 56  /// - `https://search.example.com/`
 57  /// - `https://example.com/searxng/`
 58  ///
 59  /// Returns results with validated URLs.
 60  pub fn search(
 61      client: &Client,
 62      searx_base: &Url,
 63      params: SearchParams<'_>,
 64  ) -> anyhow::Result<Vec<SearchHit>> {
 65      let url = build_search_url(searx_base, &params)?;
 66      let resp = client
 67          .get(url.clone())
 68          .header("Accept", "application/json")
 69          .send()
 70          .with_context(|| format!("Failed to GET {}", url))?;
 71  
 72      // Handle common “JSON disabled” behavior: HTTP 403.
 73      let status = resp.status();
 74      if status == reqwest::StatusCode::FORBIDDEN {
 75          let body = resp.text().unwrap_or_default();
 76          return Err(anyhow!(
 77              "SearXNG returned 403 Forbidden. This often means JSON output is disabled on that instance.\n\
 78               Enable it by adding `json` to `search.formats` in SearXNG settings.yml, then restart.\n\
 79               Response body (truncated): {}",
 80              truncate(&body, 240)
 81          ));
 82      }
 83      if status == reqwest::StatusCode::TOO_MANY_REQUESTS {
 84          return Err(anyhow!("SearXNG rate-limited the request (429 Too Many Requests). Try again later or self-host."));
 85      }
 86  
 87      let resp = resp
 88          .error_for_status()
 89          .with_context(|| format!("HTTP error for {}", url))?;
 90  
 91      let body = resp.text().context("Failed reading JSON response body")?;
 92      parse_search_json(&body).context("Failed to parse SearXNG JSON response")
 93  }
 94  
 95  /// Build the full URL for a SearXNG JSON search request.
 96  pub fn build_search_url(searx_base: &Url, params: &SearchParams<'_>) -> anyhow::Result<Url> {
 97      let mut url = searx_base
 98          .join("search")
 99          .with_context(|| format!("Invalid SearXNG base URL: {}", searx_base))?;
100  
101      {
102          let mut qp = url.query_pairs_mut();
103          qp.append_pair("q", params.query);
104          qp.append_pair("format", "json");
105          qp.append_pair("pageno", &params.page.max(1).to_string());
106          if let Some(lang) = params.language {
107              qp.append_pair("language", lang);
108          }
109          if let Some(safe) = params.safesearch {
110              qp.append_pair("safesearch", &safe.to_string());
111          }
112      }
113  
114      Ok(url)
115  }
116  
117  /* ------------------------------- parsing ------------------------------- */
118  
119  #[derive(Debug, Deserialize)]
120  struct SearxResponse {
121      #[serde(default)]
122      results: Vec<SearxResult>,
123  }
124  
125  #[derive(Debug, Deserialize)]
126  struct SearxResult {
127      title: Option<String>,
128      url: Option<String>,
129      content: Option<String>,
130      engine: Option<String>,
131  }
132  
133  /// Parse JSON response into hits (pure function, easy to test).
134  pub fn parse_search_json(json: &str) -> anyhow::Result<Vec<SearchHit>> {
135      let parsed: SearxResponse = serde_json::from_str(json).context("invalid JSON")?;
136  
137      let mut hits = Vec::new();
138      for r in parsed.results {
139          let title = r.title.unwrap_or_default().trim().to_string();
140          let url_str = match r.url {
141              Some(u) if !u.trim().is_empty() => u,
142              _ => continue,
143          };
144          let url = match Url::parse(url_str.trim()) {
145              Ok(u) => u,
146              Err(_) => continue,
147          };
148          let content = r.content.unwrap_or_default().trim().to_string();
149          let engine = r.engine.and_then(|e| {
150              let t = e.trim().to_string();
151              if t.is_empty() {
152                  None
153              } else {
154                  Some(t)
155              }
156          });
157  
158          let index = hits.len() + 1;
159          hits.push(SearchHit {
160              index,
161              title,
162              url,
163              content,
164              engine,
165          });
166      }
167  
168      Ok(hits)
169  }
170  
171  fn truncate(s: &str, max: usize) -> String {
172      if s.len() <= max {
173          return s.to_string();
174      }
175      let mut out = s[..max].to_string();
176      out.push('…');
177      out
178  }
179  
180  #[cfg(test)]
181  mod tests {
182      use super::*;
183      use pretty_assertions::assert_eq;
184  
185      #[test]
186      fn build_search_url_basic() {
187          let base = Url::parse("http://localhost:8080").unwrap();
188          let params = SearchParams::new("hello world");
189          let url = build_search_url(&base, &params).unwrap();
190          assert!(url.as_str().starts_with("http://localhost:8080/search?"));
191          assert!(url.as_str().contains("q=hello+world"));
192          assert!(url.as_str().contains("format=json"));
193          assert!(url.as_str().contains("pageno=1"));
194      }
195  
196      #[test]
197      fn build_search_url_with_subpath() {
198          let base = Url::parse("https://example.com/searxng/").unwrap();
199          let mut params = SearchParams::new("rust");
200          params.page = 2;
201          params.language = Some("en");
202          params.safesearch = Some(1);
203          let url = build_search_url(&base, &params).unwrap();
204          assert!(url
205              .as_str()
206              .starts_with("https://example.com/searxng/search?"));
207          assert!(url.as_str().contains("q=rust"));
208          assert!(url.as_str().contains("pageno=2"));
209          assert!(url.as_str().contains("language=en"));
210          assert!(url.as_str().contains("safesearch=1"));
211      }
212  
213      #[test]
214      fn parse_search_json_extracts_hits() {
215          let sample = r#"
216          {
217            "query": "test",
218            "results": [
219              {"title": "Example", "url": "https://example.org", "content": "snippet", "engine": "dummy"},
220              {"title": "No URL", "content": "skip"},
221              {"title": "Bad URL", "url": "not a url", "content": "skip"}
222            ]
223          }"#;
224  
225          let hits = parse_search_json(sample).unwrap();
226          assert_eq!(hits.len(), 1);
227          assert_eq!(hits[0].index, 1);
228          assert_eq!(hits[0].title, "Example");
229          assert_eq!(hits[0].url.as_str(), "https://example.org/");
230          assert_eq!(hits[0].content, "snippet");
231          assert_eq!(hits[0].engine.as_deref(), Some("dummy"));
232      }
233  }