/ tests / web-fetch-fallback.test.ts
web-fetch-fallback.test.ts
  1  import { beforeEach, describe, expect, it, vi } from 'vitest'
  2  
  3  import { safeFetch } from '@/lib/web-core'
  4  import { performBrowserFetch } from '@/server/tools/browser-fetch'
  5  import { performWebFetch } from '@/server/tools/web-fetch'
  6  
  7  vi.mock('@/server/tools/browser-fetch', () => ({
  8    performBrowserFetch: vi.fn(),
  9  }))
 10  
 11  vi.mock('@/lib/web-core', async () => {
 12    const actual = await vi.importActual('@/lib/web-core')
 13    return {
 14      ...actual,
 15      safeFetch: vi.fn(),
 16    }
 17  })
 18  
 19  describe('performWebFetch browser fallback', () => {
 20    beforeEach(() => {
 21      vi.clearAllMocks()
 22    })
 23  
 24    it('falls back to browser fetch when HTTP fetch returns an error status', async () => {
 25      vi.mocked(safeFetch).mockResolvedValue(
 26        new Response('Forbidden', {
 27          status: 403,
 28          statusText: 'Forbidden',
 29          headers: { 'content-type': 'text/html' },
 30        }),
 31      )
 32  
 33      vi.mocked(performBrowserFetch).mockResolvedValue({
 34        cached: false,
 35        fetchTimeMs: 42,
 36        result: {
 37          url: 'https://blocked.example',
 38          finalUrl: 'https://blocked.example/article',
 39          title: 'Blocked Site',
 40          content: 'Rendered browser content',
 41          contentType: 'markdown',
 42          truncated: false,
 43          bytesRead: 24,
 44          fetchMethod: 'browser',
 45        },
 46      })
 47  
 48      const response = await performWebFetch({
 49        url: 'https://blocked.example',
 50        browserFallback: true,
 51      })
 52  
 53      expect(performBrowserFetch).toHaveBeenCalledWith({
 54        url: 'https://blocked.example',
 55        timeoutSeconds: 60,
 56        maxBytes: 500000,
 57        extractMode: 'markdown',
 58      })
 59      expect(response.result.fetchMethod).toBe('browser')
 60      expect(response.result.fallbackReason).toBe('primary HTTP fetch failed')
 61      expect(response.result.content).toContain('Rendered browser content')
 62      const firstCallOptions = vi.mocked(safeFetch).mock.calls[0]?.[1] as
 63        | { headers?: Record<string, string> }
 64        | undefined
 65      expect(firstCallOptions?.headers?.Accept).toContain('text/markdown')
 66    })
 67  
 68    it('falls back to browser fetch for JS challenge pages with poor extraction', async () => {
 69      const challengeHtml = `<!doctype html>
 70  <html>
 71    <head><title>Just a moment...</title></head>
 72    <body>
 73      <h1>Enable JavaScript and cookies to continue</h1>
 74      <script>console.log('challenge')</script>
 75      <script>console.log('challenge')</script>
 76      <script>console.log('challenge')</script>
 77      <script>console.log('challenge')</script>
 78      <script>console.log('challenge')</script>
 79      <script>console.log('challenge')</script>
 80      <script>console.log('challenge')</script>
 81      <script>console.log('challenge')</script>
 82    </body>
 83  </html>`
 84  
 85      vi.mocked(safeFetch).mockResolvedValue(
 86        new Response(challengeHtml, {
 87          status: 200,
 88          statusText: 'OK',
 89          headers: { 'content-type': 'text/html' },
 90        }),
 91      )
 92  
 93      vi.mocked(performBrowserFetch).mockResolvedValue({
 94        cached: false,
 95        fetchTimeMs: 31,
 96        result: {
 97          url: 'https://challenge.example',
 98          finalUrl: 'https://challenge.example/article',
 99          title: 'Challenge Site',
100          content: 'Rendered browser content',
101          contentType: 'markdown',
102          truncated: false,
103          bytesRead: 24,
104          fetchMethod: 'browser',
105        },
106      })
107  
108      const response = await performWebFetch({
109        url: 'https://challenge.example',
110        browserFallback: true,
111      })
112  
113      expect(performBrowserFetch).toHaveBeenCalledWith({
114        url: 'https://challenge.example',
115        timeoutSeconds: 60,
116        maxBytes: 500000,
117        extractMode: 'markdown',
118      })
119      expect(response.result.fetchMethod).toBe('browser')
120      expect(response.result.fallbackReason).toBe(
121        'page appears to require JS or bot verification',
122      )
123    })
124  
125    it('uses markdown response directly when server returns text/markdown', async () => {
126      const markdown = '# Turndown\n\nA markdown response.'
127      vi.mocked(safeFetch).mockResolvedValue(
128        new Response(markdown, {
129          status: 200,
130          statusText: 'OK',
131          headers: { 'content-type': 'text/markdown; charset=utf-8' },
132        }),
133      )
134  
135      const response = await performWebFetch({ url: 'https://markdown.example' })
136  
137      expect(performBrowserFetch).not.toHaveBeenCalled()
138      expect(response.result.fetchMethod).toBe('http')
139      expect(response.result.title).toBe('Turndown')
140      expect(response.result.content).toContain(markdown)
141    })
142  
143    it('returns a clear error for stylesheet-like payloads when browser fallback is disabled', async () => {
144      const cssPayload = `
145        .VUoKZ{display:none;position:absolute;z-index:1001;}
146        .TRHLAc{position:absolute;top:0;left:0;right:0;height:4px;}
147        .mTM26c .VUoKZ{display:block;}
148        .gm3-sys-color--on-error{color:#fff;background:#111;}
149        .abc{margin:0;padding:0;display:flex;transform:scaleX(0);}
150      `.repeat(12)
151  
152      vi.mocked(safeFetch).mockResolvedValue(
153        new Response(cssPayload, {
154          status: 200,
155          statusText: 'OK',
156          headers: { 'content-type': 'text/html; charset=utf-8' },
157        }),
158      )
159  
160      await expect(
161        performWebFetch({ url: 'https://news.google.com/search?q=test' }),
162      ).rejects.toThrow(
163        'HTTP fetch returned stylesheet-like content; use browser_fetch for this page.',
164      )
165      expect(performBrowserFetch).not.toHaveBeenCalled()
166    })
167  })