test_web_tools_tavily.py
1 """Tests for Tavily web backend integration. 2 3 Coverage: 4 _tavily_request() — API key handling, endpoint construction, error propagation. 5 _normalize_tavily_search_results() — search response normalization. 6 _normalize_tavily_documents() — extract/crawl response normalization, failed_results. 7 web_search_tool / web_extract_tool / web_crawl_tool — Tavily dispatch paths. 8 """ 9 10 import json 11 import os 12 import asyncio 13 import pytest 14 from unittest.mock import patch, MagicMock 15 16 17 # ─── _tavily_request ───────────────────────────────────────────────────────── 18 19 class TestTavilyRequest: 20 """Test suite for the _tavily_request helper.""" 21 22 def test_raises_without_api_key(self): 23 """No TAVILY_API_KEY → ValueError with guidance.""" 24 with patch.dict(os.environ, {}, clear=False): 25 os.environ.pop("TAVILY_API_KEY", None) 26 from tools.web_tools import _tavily_request 27 with pytest.raises(ValueError, match="TAVILY_API_KEY"): 28 _tavily_request("search", {"query": "test"}) 29 30 def test_posts_with_api_key_in_body(self): 31 """api_key is injected into the JSON payload.""" 32 mock_response = MagicMock() 33 mock_response.json.return_value = {"results": []} 34 mock_response.raise_for_status = MagicMock() 35 36 with patch.dict(os.environ, {"TAVILY_API_KEY": "tvly-test-key"}): 37 with patch("tools.web_tools.httpx.post", return_value=mock_response) as mock_post: 38 from tools.web_tools import _tavily_request 39 result = _tavily_request("search", {"query": "hello"}) 40 41 mock_post.assert_called_once() 42 call_kwargs = mock_post.call_args 43 payload = call_kwargs.kwargs.get("json") or call_kwargs[1].get("json") 44 assert payload["api_key"] == "tvly-test-key" 45 assert payload["query"] == "hello" 46 assert "api.tavily.com/search" in call_kwargs.args[0] 47 48 def test_raises_on_http_error(self): 49 """Non-2xx responses propagate as httpx.HTTPStatusError.""" 50 import httpx as _httpx 51 mock_response = MagicMock() 52 mock_response.raise_for_status.side_effect = _httpx.HTTPStatusError( 53 "401 Unauthorized", request=MagicMock(), response=mock_response 54 ) 55 56 with patch.dict(os.environ, {"TAVILY_API_KEY": "tvly-bad-key"}): 57 with patch("tools.web_tools.httpx.post", return_value=mock_response): 58 from tools.web_tools import _tavily_request 59 with pytest.raises(_httpx.HTTPStatusError): 60 _tavily_request("search", {"query": "test"}) 61 62 63 # ─── _normalize_tavily_search_results ───────────────────────────────────────── 64 65 class TestNormalizeTavilySearchResults: 66 """Test search result normalization.""" 67 68 def test_basic_normalization(self): 69 from tools.web_tools import _normalize_tavily_search_results 70 raw = { 71 "results": [ 72 {"title": "Python Docs", "url": "https://docs.python.org", "content": "Official docs", "score": 0.9}, 73 {"title": "Tutorial", "url": "https://example.com", "content": "A tutorial", "score": 0.8}, 74 ] 75 } 76 result = _normalize_tavily_search_results(raw) 77 assert result["success"] is True 78 web = result["data"]["web"] 79 assert len(web) == 2 80 assert web[0]["title"] == "Python Docs" 81 assert web[0]["url"] == "https://docs.python.org" 82 assert web[0]["description"] == "Official docs" 83 assert web[0]["position"] == 1 84 assert web[1]["position"] == 2 85 86 def test_empty_results(self): 87 from tools.web_tools import _normalize_tavily_search_results 88 result = _normalize_tavily_search_results({"results": []}) 89 assert result["success"] is True 90 assert result["data"]["web"] == [] 91 92 def test_missing_fields(self): 93 from tools.web_tools import _normalize_tavily_search_results 94 result = _normalize_tavily_search_results({"results": [{}]}) 95 web = result["data"]["web"] 96 assert web[0]["title"] == "" 97 assert web[0]["url"] == "" 98 assert web[0]["description"] == "" 99 100 101 # ─── _normalize_tavily_documents ────────────────────────────────────────────── 102 103 class TestNormalizeTavilyDocuments: 104 """Test extract/crawl document normalization.""" 105 106 def test_basic_document(self): 107 from tools.web_tools import _normalize_tavily_documents 108 raw = { 109 "results": [{ 110 "url": "https://example.com", 111 "title": "Example", 112 "raw_content": "Full page content here", 113 }] 114 } 115 docs = _normalize_tavily_documents(raw) 116 assert len(docs) == 1 117 assert docs[0]["url"] == "https://example.com" 118 assert docs[0]["title"] == "Example" 119 assert docs[0]["content"] == "Full page content here" 120 assert docs[0]["raw_content"] == "Full page content here" 121 assert docs[0]["metadata"]["sourceURL"] == "https://example.com" 122 123 def test_falls_back_to_content_when_no_raw_content(self): 124 from tools.web_tools import _normalize_tavily_documents 125 raw = {"results": [{"url": "https://example.com", "content": "Snippet"}]} 126 docs = _normalize_tavily_documents(raw) 127 assert docs[0]["content"] == "Snippet" 128 129 def test_failed_results_included(self): 130 from tools.web_tools import _normalize_tavily_documents 131 raw = { 132 "results": [], 133 "failed_results": [ 134 {"url": "https://fail.com", "error": "timeout"}, 135 ], 136 } 137 docs = _normalize_tavily_documents(raw) 138 assert len(docs) == 1 139 assert docs[0]["url"] == "https://fail.com" 140 assert docs[0]["error"] == "timeout" 141 assert docs[0]["content"] == "" 142 143 def test_failed_urls_included(self): 144 from tools.web_tools import _normalize_tavily_documents 145 raw = { 146 "results": [], 147 "failed_urls": ["https://bad.com"], 148 } 149 docs = _normalize_tavily_documents(raw) 150 assert len(docs) == 1 151 assert docs[0]["url"] == "https://bad.com" 152 assert docs[0]["error"] == "extraction failed" 153 154 def test_fallback_url(self): 155 from tools.web_tools import _normalize_tavily_documents 156 raw = {"results": [{"content": "data"}]} 157 docs = _normalize_tavily_documents(raw, fallback_url="https://fallback.com") 158 assert docs[0]["url"] == "https://fallback.com" 159 160 161 # ─── web_search_tool (Tavily dispatch) ──────────────────────────────────────── 162 163 class TestWebSearchTavily: 164 """Test web_search_tool dispatch to Tavily.""" 165 166 def test_search_dispatches_to_tavily(self): 167 mock_response = MagicMock() 168 mock_response.json.return_value = { 169 "results": [{"title": "Result", "url": "https://r.com", "content": "desc", "score": 0.9}] 170 } 171 mock_response.raise_for_status = MagicMock() 172 173 with patch("tools.web_tools._get_backend", return_value="tavily"), \ 174 patch.dict(os.environ, {"TAVILY_API_KEY": "tvly-test"}), \ 175 patch("tools.web_tools.httpx.post", return_value=mock_response), \ 176 patch("tools.interrupt.is_interrupted", return_value=False): 177 from tools.web_tools import web_search_tool 178 result = json.loads(web_search_tool("test query", limit=3)) 179 assert result["success"] is True 180 assert len(result["data"]["web"]) == 1 181 assert result["data"]["web"][0]["title"] == "Result" 182 183 184 # ─── web_extract_tool (Tavily dispatch) ─────────────────────────────────────── 185 186 class TestWebExtractTavily: 187 """Test web_extract_tool dispatch to Tavily.""" 188 189 def test_extract_dispatches_to_tavily(self): 190 mock_response = MagicMock() 191 mock_response.json.return_value = { 192 "results": [{"url": "https://example.com", "raw_content": "Extracted content", "title": "Page"}] 193 } 194 mock_response.raise_for_status = MagicMock() 195 196 with patch("tools.web_tools._get_backend", return_value="tavily"), \ 197 patch.dict(os.environ, {"TAVILY_API_KEY": "tvly-test"}), \ 198 patch("tools.web_tools.httpx.post", return_value=mock_response), \ 199 patch("tools.web_tools.process_content_with_llm", return_value=None): 200 from tools.web_tools import web_extract_tool 201 result = json.loads(asyncio.get_event_loop().run_until_complete( 202 web_extract_tool(["https://example.com"], use_llm_processing=False) 203 )) 204 assert "results" in result 205 assert len(result["results"]) == 1 206 assert result["results"][0]["url"] == "https://example.com" 207 208 209 # ─── web_crawl_tool (Tavily dispatch) ───────────────────────────────────────── 210 211 class TestWebCrawlTavily: 212 """Test web_crawl_tool dispatch to Tavily.""" 213 214 def test_crawl_dispatches_to_tavily(self): 215 mock_response = MagicMock() 216 mock_response.json.return_value = { 217 "results": [ 218 {"url": "https://example.com/page1", "raw_content": "Page 1 content", "title": "Page 1"}, 219 {"url": "https://example.com/page2", "raw_content": "Page 2 content", "title": "Page 2"}, 220 ] 221 } 222 mock_response.raise_for_status = MagicMock() 223 224 with patch("tools.web_tools._get_backend", return_value="tavily"), \ 225 patch.dict(os.environ, {"TAVILY_API_KEY": "tvly-test"}), \ 226 patch("tools.web_tools.httpx.post", return_value=mock_response), \ 227 patch("tools.web_tools.check_website_access", return_value=None), \ 228 patch("tools.web_tools.is_safe_url", return_value=True), \ 229 patch("tools.interrupt.is_interrupted", return_value=False): 230 from tools.web_tools import web_crawl_tool 231 result = json.loads(asyncio.get_event_loop().run_until_complete( 232 web_crawl_tool("https://example.com", use_llm_processing=False) 233 )) 234 assert "results" in result 235 assert len(result["results"]) == 2 236 assert result["results"][0]["title"] == "Page 1" 237 238 def test_crawl_sends_instructions(self): 239 """Instructions are included in the Tavily crawl payload.""" 240 mock_response = MagicMock() 241 mock_response.json.return_value = {"results": []} 242 mock_response.raise_for_status = MagicMock() 243 244 with patch("tools.web_tools._get_backend", return_value="tavily"), \ 245 patch.dict(os.environ, {"TAVILY_API_KEY": "tvly-test"}), \ 246 patch("tools.web_tools.httpx.post", return_value=mock_response) as mock_post, \ 247 patch("tools.web_tools.check_website_access", return_value=None), \ 248 patch("tools.web_tools.is_safe_url", return_value=True), \ 249 patch("tools.interrupt.is_interrupted", return_value=False): 250 from tools.web_tools import web_crawl_tool 251 asyncio.get_event_loop().run_until_complete( 252 web_crawl_tool("https://example.com", instructions="Find docs", use_llm_processing=False) 253 ) 254 call_kwargs = mock_post.call_args 255 payload = call_kwargs.kwargs.get("json") or call_kwargs[1].get("json") 256 assert payload["instructions"] == "Find docs" 257 assert payload["url"] == "https://example.com"