web-fetch-fallback.test.ts
1 import { beforeEach, describe, expect, it, vi } from 'vitest' 2 3 import { safeFetch } from '@/lib/web-core' 4 import { performBrowserFetch } from '@/server/tools/browser-fetch' 5 import { performWebFetch } from '@/server/tools/web-fetch' 6 7 vi.mock('@/server/tools/browser-fetch', () => ({ 8 performBrowserFetch: vi.fn(), 9 })) 10 11 vi.mock('@/lib/web-core', async () => { 12 const actual = await vi.importActual('@/lib/web-core') 13 return { 14 ...actual, 15 safeFetch: vi.fn(), 16 } 17 }) 18 19 describe('performWebFetch browser fallback', () => { 20 beforeEach(() => { 21 vi.clearAllMocks() 22 }) 23 24 it('falls back to browser fetch when HTTP fetch returns an error status', async () => { 25 vi.mocked(safeFetch).mockResolvedValue( 26 new Response('Forbidden', { 27 status: 403, 28 statusText: 'Forbidden', 29 headers: { 'content-type': 'text/html' }, 30 }), 31 ) 32 33 vi.mocked(performBrowserFetch).mockResolvedValue({ 34 cached: false, 35 fetchTimeMs: 42, 36 result: { 37 url: 'https://blocked.example', 38 finalUrl: 'https://blocked.example/article', 39 title: 'Blocked Site', 40 content: 'Rendered browser content', 41 contentType: 'markdown', 42 truncated: false, 43 bytesRead: 24, 44 fetchMethod: 'browser', 45 }, 46 }) 47 48 const response = await performWebFetch({ 49 url: 'https://blocked.example', 50 browserFallback: true, 51 }) 52 53 expect(performBrowserFetch).toHaveBeenCalledWith({ 54 url: 'https://blocked.example', 55 timeoutSeconds: 60, 56 maxBytes: 500000, 57 extractMode: 'markdown', 58 }) 59 expect(response.result.fetchMethod).toBe('browser') 60 expect(response.result.fallbackReason).toBe('primary HTTP fetch failed') 61 expect(response.result.content).toContain('Rendered browser content') 62 const firstCallOptions = vi.mocked(safeFetch).mock.calls[0]?.[1] as 63 | { headers?: Record<string, string> } 64 | undefined 65 expect(firstCallOptions?.headers?.Accept).toContain('text/markdown') 66 }) 67 68 it('falls back to browser fetch for JS challenge pages with poor extraction', async () => { 69 const challengeHtml = `<!doctype html> 70 <html> 71 <head><title>Just a moment...</title></head> 72 <body> 73 <h1>Enable JavaScript and cookies to continue</h1> 74 <script>console.log('challenge')</script> 75 <script>console.log('challenge')</script> 76 <script>console.log('challenge')</script> 77 <script>console.log('challenge')</script> 78 <script>console.log('challenge')</script> 79 <script>console.log('challenge')</script> 80 <script>console.log('challenge')</script> 81 <script>console.log('challenge')</script> 82 </body> 83 </html>` 84 85 vi.mocked(safeFetch).mockResolvedValue( 86 new Response(challengeHtml, { 87 status: 200, 88 statusText: 'OK', 89 headers: { 'content-type': 'text/html' }, 90 }), 91 ) 92 93 vi.mocked(performBrowserFetch).mockResolvedValue({ 94 cached: false, 95 fetchTimeMs: 31, 96 result: { 97 url: 'https://challenge.example', 98 finalUrl: 'https://challenge.example/article', 99 title: 'Challenge Site', 100 content: 'Rendered browser content', 101 contentType: 'markdown', 102 truncated: false, 103 bytesRead: 24, 104 fetchMethod: 'browser', 105 }, 106 }) 107 108 const response = await performWebFetch({ 109 url: 'https://challenge.example', 110 browserFallback: true, 111 }) 112 113 expect(performBrowserFetch).toHaveBeenCalledWith({ 114 url: 'https://challenge.example', 115 timeoutSeconds: 60, 116 maxBytes: 500000, 117 extractMode: 'markdown', 118 }) 119 expect(response.result.fetchMethod).toBe('browser') 120 expect(response.result.fallbackReason).toBe( 121 'page appears to require JS or bot verification', 122 ) 123 }) 124 125 it('uses markdown response directly when server returns text/markdown', async () => { 126 const markdown = '# Turndown\n\nA markdown response.' 127 vi.mocked(safeFetch).mockResolvedValue( 128 new Response(markdown, { 129 status: 200, 130 statusText: 'OK', 131 headers: { 'content-type': 'text/markdown; charset=utf-8' }, 132 }), 133 ) 134 135 const response = await performWebFetch({ url: 'https://markdown.example' }) 136 137 expect(performBrowserFetch).not.toHaveBeenCalled() 138 expect(response.result.fetchMethod).toBe('http') 139 expect(response.result.title).toBe('Turndown') 140 expect(response.result.content).toContain(markdown) 141 }) 142 143 it('returns a clear error for stylesheet-like payloads when browser fallback is disabled', async () => { 144 const cssPayload = ` 145 .VUoKZ{display:none;position:absolute;z-index:1001;} 146 .TRHLAc{position:absolute;top:0;left:0;right:0;height:4px;} 147 .mTM26c .VUoKZ{display:block;} 148 .gm3-sys-color--on-error{color:#fff;background:#111;} 149 .abc{margin:0;padding:0;display:flex;transform:scaleX(0);} 150 `.repeat(12) 151 152 vi.mocked(safeFetch).mockResolvedValue( 153 new Response(cssPayload, { 154 status: 200, 155 statusText: 'OK', 156 headers: { 'content-type': 'text/html; charset=utf-8' }, 157 }), 158 ) 159 160 await expect( 161 performWebFetch({ url: 'https://news.google.com/search?q=test' }), 162 ).rejects.toThrow( 163 'HTTP fetch returned stylesheet-like content; use browser_fetch for this page.', 164 ) 165 expect(performBrowserFetch).not.toHaveBeenCalled() 166 }) 167 })