Cradicle Explorer

/ tests / openai-compatible-textual-tool-fallback.test.ts
openai-compatible-textual-tool-fallback.test.ts
  1  import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
  2  import type { ProviderChatInput } from '@/server/providers/types'
  3  import { openAiCompatibleProvider } from '@/server/providers/openai-compatible'
  4  import { performWebFetch } from '@/server/tools/web-fetch'
  5  import { performWebSearch } from '@/server/tools/web-search'
  6  import { recordToolExecutionLog } from '@/server/storage/tool-execution-logs'
  7  
  8  vi.mock('@/server/costs/ledger', () => ({
  9    recordApiCall: vi.fn(),
 10  }))
 11  
 12  vi.mock('@/server/tools/web-fetch', () => ({
 13    performWebFetch: vi.fn(),
 14  }))
 15  
 16  vi.mock('@/server/tools/web-search', () => ({
 17    performWebSearch: vi.fn(),
 18  }))
 19  
 20  vi.mock('@/server/storage/tool-execution-logs', () => ({
 21    recordToolExecutionLog: vi.fn(),
 22  }))
 23  
 24  vi.mock('@/server/storage/chat-store', () => ({
 25    getUploadById: vi.fn(() => null),
 26  }))
 27  
 28  vi.mock('@/server/uploads/files', () => ({
 29    readUploadFile: vi.fn(),
 30  }))
 31  
 32  const originalEnv = { ...process.env }
 33  const originalFetch = global.fetch
 34  
 35  function jsonResponse(body: unknown, status = 200): Response {
 36    return new Response(JSON.stringify(body), {
 37      status,
 38      headers: {
 39        'Content-Type': 'application/json',
 40      },
 41    })
 42  }
 43  
 44  function wait(ms: number): Promise<void> {
 45    return new Promise((resolve) => {
 46      setTimeout(resolve, ms)
 47    })
 48  }
 49  
 50  describe('openAiCompatibleProvider textual tool-call fallback (chat completions)', () => {
 51    beforeEach(() => {
 52      vi.clearAllMocks()
 53      process.env = {
 54        ...originalEnv,
 55        LLM_BASE_URL: 'https://example.test/v1',
 56        LLM_CHAT_MODEL: 'minimax-m2',
 57        LLM_TIMEOUT_MS: '30000',
 58      }
 59      global.fetch = vi.fn() as typeof fetch
 60    })
 61  
 62    afterEach(() => {
 63      process.env = originalEnv
 64      global.fetch = originalFetch
 65    })
 66  
 67    it('executes minimax XML tool calls in chat-completions mode and strips tags from continuation text', async () => {
 68      vi.mocked(performWebFetch).mockResolvedValue({
 69        result: {
 70          url: 'https://news.ycombinator.com/',
 71          title: 'Hacker News',
 72          content: 'Top stories...',
 73          contentType: 'markdown',
 74          truncated: false,
 75          bytesRead: 1234,
 76        },
 77        cached: false,
 78        fetchTimeMs: 12,
 79      })
 80  
 81      const fetchMock = vi.mocked(global.fetch)
 82      fetchMock
 83        .mockResolvedValueOnce(
 84          jsonResponse({
 85            model: 'minimax-m2',
 86            choices: [
 87              {
 88                message: {
 89                  content: `I will fetch it.\n<minimax:tool_call>\n<invoke name="web_fetch">\n<parameter name="url">https://news.ycombinator.com/</parameter>\n</invoke>\n</minimax:tool_call>`,
 90                },
 91                finish_reason: 'stop',
 92              },
 93            ],
 94          }),
 95        )
 96        .mockResolvedValueOnce(
 97          jsonResponse({
 98            model: 'minimax-m2',
 99            choices: [
100              {
101                message: {
102                  content: 'Fetched the page successfully.',
103                },
104                finish_reason: 'stop',
105              },
106            ],
107          }),
108        )
109  
110      const input: ProviderChatInput = {
111        sessionId: 'session-tool-log-xml',
112        systemPrompt: 'You are helpful.',
113        compactedSummary: '',
114        memories: [],
115        messages: [
116          {
117            role: 'user',
118            text: 'can you try the fetch once more',
119            attachments: [],
120          },
121        ],
122        providerOverride: {
123          baseUrl: 'https://example.test/v1',
124          apiKey: null,
125          chatEndpointMode: 'chat_completions',
126        },
127        modelOverride: 'minimax-m2',
128        allowDangerousBashTool: false,
129      }
130  
131      const result = await openAiCompatibleProvider.generateReply(input)
132  
133      expect(result.provider).toBe('openai-compatible')
134      expect(result.mocked).toBe(false)
135      expect(result.text).toContain('Fetched the page successfully.')
136      expect(result.text).toContain('<tool_result>')
137      expect(result.text).toContain('Executed 1 local tool call:')
138      expect(result.text).toContain('web_fetch (textual fallback): ok')
139  
140      expect(performWebFetch).toHaveBeenCalledTimes(1)
141      expect(performWebFetch).toHaveBeenCalledWith({
142        url: 'https://news.ycombinator.com/',
143        maxBytes: 500000,
144      })
145  
146      expect(fetchMock).toHaveBeenCalledTimes(2)
147      expect(String(fetchMock.mock.calls[0]?.[0])).toContain('/chat/completions')
148      expect(String(fetchMock.mock.calls[1]?.[0])).toContain('/chat/completions')
149  
150      const secondInit = fetchMock.mock.calls[1]?.[1] as RequestInit
151      const secondPayload = JSON.parse(String(secondInit.body)) as {
152        messages?: Array<{ role?: string; content?: string }>
153      }
154      const contents = (secondPayload.messages ?? [])
155        .map((message) => (typeof message.content === 'string' ? message.content : ''))
156        .join('\n\n')
157  
158      expect(contents).toContain('I will fetch it.')
159      expect(contents).toContain('Tool results (textual tool-call compatibility mode):')
160      expect(contents).not.toContain('<minimax:tool_call>')
161      expect(contents).not.toContain('<invoke name="web_fetch">')
162      expect(recordToolExecutionLog).toHaveBeenCalledWith(
163        expect.objectContaining({
164          sessionId: 'session-tool-log-xml',
165          toolName: 'web_fetch',
166          source: 'textual',
167          status: 'ok',
168        }),
169      )
170    })
171  
172    it('falls back from failed responses tool-loop to chat-completions textual loop for non-openai providers', async () => {
173      vi.mocked(performWebFetch).mockResolvedValue({
174        result: {
175          url: 'https://news.ycombinator.com/',
176          title: 'Hacker News',
177          content: 'Top stories...',
178          contentType: 'markdown',
179          truncated: false,
180          bytesRead: 1234,
181        },
182        cached: false,
183        fetchTimeMs: 10,
184      })
185  
186      const fetchMock = vi.mocked(global.fetch)
187      fetchMock
188        .mockResolvedValueOnce(
189          jsonResponse(
190            {
191              error: {
192                message: 'tools are unsupported for this backend on /responses',
193              },
194            },
195            400,
196          ),
197        )
198        .mockResolvedValueOnce(
199          jsonResponse({
200            model: 'minimax-m2',
201            choices: [
202              {
203                message: {
204                  content: `<minimax:tool_call>\n<invoke name='web_fetch'>\n<parameter name='url'>https://news.ycombinator.com/</parameter>\n</invoke>\n</minimax:tool_call>`,
205                },
206                finish_reason: 'stop',
207              },
208            ],
209          }),
210        )
211        .mockResolvedValueOnce(
212          jsonResponse({
213            model: 'minimax-m2',
214            choices: [
215              {
216                message: {
217                  content: 'Done after fallback.',
218                },
219                finish_reason: 'stop',
220              },
221            ],
222          }),
223        )
224  
225      const input: ProviderChatInput = {
226        systemPrompt: 'You are helpful.',
227        compactedSummary: '',
228        memories: [],
229        messages: [
230          {
231            role: 'user',
232            text: 'please fetch https://news.ycombinator.com/ again',
233            attachments: [],
234          },
235        ],
236        providerOverride: {
237          baseUrl: 'https://router.example.com/v1',
238          apiKey: null,
239          chatEndpointMode: 'auto',
240        },
241        modelOverride: 'minimax-m2',
242        allowDangerousBashTool: false,
243      }
244  
245      const result = await openAiCompatibleProvider.generateReply(input)
246      expect(result.text).toContain('Done after fallback.')
247      expect(result.text).toContain('<tool_result>')
248      expect(performWebFetch).toHaveBeenCalledTimes(1)
249  
250      expect(fetchMock).toHaveBeenCalledTimes(3)
251      expect(String(fetchMock.mock.calls[0]?.[0])).toContain('/responses')
252      expect(String(fetchMock.mock.calls[1]?.[0])).toContain('/chat/completions')
253      expect(String(fetchMock.mock.calls[2]?.[0])).toContain('/chat/completions')
254    })
255  
256    it('falls back to chat-completions when responses returns status=failed with no assistant text during local tool routing', async () => {
257      vi.mocked(performWebFetch).mockResolvedValue({
258        result: {
259          url: 'https://news.ycombinator.com/',
260          title: 'Hacker News',
261          content: 'Top stories...',
262          contentType: 'markdown',
263          truncated: false,
264          bytesRead: 1234,
265        },
266        cached: false,
267        fetchTimeMs: 9,
268      })
269  
270      const fetchMock = vi.mocked(global.fetch)
271      fetchMock
272        .mockResolvedValueOnce(
273          jsonResponse({
274            id: 'resp_tmp_failed_round',
275            model: 'arcee-ai/trinity-mini-20251201:free',
276            status: 'failed',
277            output: [],
278          }),
279        )
280        .mockResolvedValueOnce(
281          jsonResponse({
282            model: 'arcee-ai/trinity-mini-20251201:free',
283            choices: [
284              {
285                message: {
286                  content: `<minimax:tool_call>\n<invoke name='web_fetch'>\n<parameter name='url'>https://news.ycombinator.com/</parameter>\n</invoke>\n</minimax:tool_call>`,
287                },
288                finish_reason: 'stop',
289              },
290            ],
291          }),
292        )
293        .mockResolvedValueOnce(
294          jsonResponse({
295            model: 'arcee-ai/trinity-mini-20251201:free',
296            choices: [
297              {
298                message: {
299                  content: 'Recovered after failed responses round.',
300                },
301                finish_reason: 'stop',
302              },
303            ],
304          }),
305        )
306  
307      const input: ProviderChatInput = {
308        sessionId: 'session-failed-round-fallback',
309        systemPrompt: 'You are helpful.',
310        compactedSummary: '',
311        memories: [],
312        messages: [
313          {
314            role: 'user',
315            text: 'please fetch https://news.ycombinator.com/',
316            attachments: [],
317          },
318        ],
319        providerOverride: {
320          baseUrl: 'https://openrouter.example.com/v1',
321          apiKey: null,
322          chatEndpointMode: 'auto',
323        },
324        modelOverride: 'arcee-ai/trinity-mini:free',
325        allowDangerousBashTool: false,
326      }
327  
328      const result = await openAiCompatibleProvider.generateReply(input)
329      expect(result.text).toContain('Recovered after failed responses round.')
330      expect(result.text).toContain('web_fetch (textual fallback): ok')
331  
332      expect(fetchMock).toHaveBeenCalledTimes(3)
333      expect(String(fetchMock.mock.calls[0]?.[0])).toContain('/responses')
334      expect(String(fetchMock.mock.calls[1]?.[0])).toContain('/chat/completions')
335      expect(String(fetchMock.mock.calls[2]?.[0])).toContain('/chat/completions')
336    })
337  
338    it('falls back to chat-completions for non-tool prompts when responses returns status=failed with empty output', async () => {
339      const fetchMock = vi.mocked(global.fetch)
340      fetchMock
341        .mockResolvedValueOnce(
342          jsonResponse({
343            id: 'resp_tmp_failed_non_tool',
344            model: 'arcee-ai/trinity-mini-20251201:free',
345            status: 'failed',
346            output: [],
347          }),
348        )
349        .mockResolvedValueOnce(
350          jsonResponse({
351            model: 'arcee-ai/trinity-mini-20251201:free',
352            choices: [
353              {
354                message: {
355                  content: 'Recovered from failed responses non-tool round.',
356                },
357                finish_reason: 'stop',
358              },
359            ],
360          }),
361        )
362  
363      const input: ProviderChatInput = {
364        sessionId: 'session-failed-non-tool-fallback',
365        systemPrompt: 'You are helpful.',
366        compactedSummary: '',
367        memories: [],
368        messages: [
369          {
370            role: 'user',
371            text: 'hello there',
372            attachments: [],
373          },
374        ],
375        providerOverride: {
376          baseUrl: 'https://openrouter.example.com/v1',
377          apiKey: null,
378          chatEndpointMode: 'responses',
379        },
380        modelOverride: 'arcee-ai/trinity-mini:free',
381        allowDangerousBashTool: false,
382      }
383  
384      const result = await openAiCompatibleProvider.generateReply(input)
385      expect(result.text).toContain('Recovered from failed responses non-tool round.')
386  
387      expect(fetchMock).toHaveBeenCalledTimes(2)
388      expect(String(fetchMock.mock.calls[0]?.[0])).toContain('/responses')
389      expect(String(fetchMock.mock.calls[1]?.[0])).toContain('/chat/completions')
390    })
391  
392    it('executes chat-completions native function tool calls when returned by provider', async () => {
393      vi.mocked(performWebFetch).mockResolvedValue({
394        result: {
395          url: 'https://news.ycombinator.com/',
396          title: 'Hacker News',
397          content: 'Top stories...',
398          contentType: 'markdown',
399          truncated: false,
400          bytesRead: 1234,
401        },
402        cached: false,
403        fetchTimeMs: 8,
404      })
405  
406      const fetchMock = vi.mocked(global.fetch)
407      fetchMock
408        .mockResolvedValueOnce(
409          jsonResponse({
410            model: 'moonshotai/Kimi-K2.5',
411            choices: [
412              {
413                message: {
414                  role: 'assistant',
415                  content: '',
416                  tool_calls: [
417                    {
418                      id: 'call_1',
419                      type: 'function',
420                      function: {
421                        name: 'web_fetch',
422                        arguments:
423                          '{"url":"https://news.ycombinator.com/"}',
424                      },
425                    },
426                  ],
427                },
428                finish_reason: 'tool_calls',
429              },
430            ],
431          }),
432        )
433        .mockResolvedValueOnce(
434          jsonResponse({
435            model: 'moonshotai/Kimi-K2.5',
436            choices: [
437              {
438                message: {
439                  content: 'Fetched via native function call.',
440                },
441                finish_reason: 'stop',
442              },
443            ],
444          }),
445        )
446  
447      const input: ProviderChatInput = {
448        systemPrompt: 'You are helpful.',
449        compactedSummary: '',
450        memories: [],
451        messages: [
452          {
453            role: 'user',
454            text: 'please fetch https://news.ycombinator.com/',
455            attachments: [],
456          },
457        ],
458        providerOverride: {
459          baseUrl: 'https://router.example.com/v1',
460          apiKey: null,
461          chatEndpointMode: 'chat_completions',
462        },
463        modelOverride: 'moonshotai/Kimi-K2.5',
464        allowDangerousBashTool: false,
465      }
466  
467      const result = await openAiCompatibleProvider.generateReply(input)
468      expect(result.text).toContain('Fetched via native function call.')
469      expect(result.text).toContain('web_fetch (function): ok')
470      expect(performWebFetch).toHaveBeenCalledTimes(1)
471      expect(fetchMock).toHaveBeenCalledTimes(2)
472  
473      const secondInit = fetchMock.mock.calls[1]?.[1] as RequestInit
474      const secondPayload = JSON.parse(String(secondInit.body)) as {
475        messages?: Array<{ role?: string }>
476      }
477      const messageRoles = (secondPayload.messages ?? [])
478        .map((message) => message.role)
479        .filter(Boolean)
480  
481      expect(messageRoles).toContain('tool')
482    })
483  
484    it('keeps responses tool loops alive across multiple rounds when each round has activity', async () => {
485      process.env.LLM_TIMEOUT_MS = '1000'
486  
487      const fetchMock = vi.mocked(global.fetch)
488      fetchMock
489        .mockImplementationOnce(async () => {
490          await wait(700)
491          return jsonResponse({
492            model: 'minimax-m2',
493            output_text: 'Let me check. get_current_time()',
494          })
495        })
496        .mockImplementationOnce(async () => {
497          await wait(700)
498          return jsonResponse({
499            model: 'minimax-m2',
500            output_text: 'Done after the second round.',
501          })
502        })
503  
504      const input: ProviderChatInput = {
505        sessionId: 'session-timeout-reset',
506        systemPrompt: 'You are helpful.',
507        compactedSummary: '',
508        memories: [],
509        messages: [
510          {
511            role: 'user',
512            text: 'what time is it?',
513            attachments: [],
514          },
515        ],
516        providerOverride: {
517          baseUrl: 'https://router.example.com/v1',
518          apiKey: null,
519          chatEndpointMode: 'responses',
520        },
521        modelOverride: 'minimax-m2',
522        allowDangerousBashTool: false,
523      }
524  
525      const result = await openAiCompatibleProvider.generateReply(input)
526      expect(result.text).toContain('Done after the second round.')
527      expect(result.text).toContain('get_current_time (textual fallback): ok')
528      expect(fetchMock).toHaveBeenCalledTimes(2)
529      expect(String(fetchMock.mock.calls[0]?.[0])).toContain('/responses')
530      expect(String(fetchMock.mock.calls[1]?.[0])).toContain('/responses')
531    })
532  
533    it('allows responses local tool loops to run beyond four rounds without forced fallback', async () => {
534      vi.mocked(performWebSearch).mockResolvedValue({
535        results: [
536          {
537            title: 'Result',
538            url: 'https://example.com/result',
539            snippet: 'Snippet',
540          },
541        ],
542        provider: 'duckduckgo',
543        cached: false,
544        searchTimeMs: 5,
545      })
546      vi.mocked(performWebFetch).mockResolvedValue({
547        result: {
548          url: 'https://example.com/page',
549          title: 'Example page',
550          content: 'Example content',
551          contentType: 'markdown',
552          truncated: false,
553          bytesRead: 512,
554        },
555        cached: false,
556        fetchTimeMs: 8,
557      })
558  
559      const fetchMock = vi.mocked(global.fetch)
560      fetchMock
561        .mockResolvedValueOnce(
562          jsonResponse({
563            model: 'glm-4.7',
564            output_text:
565              "web_search(query='Iran attacks February 28 2026', maxResults=5)",
566          }),
567        )
568        .mockResolvedValueOnce(
569          jsonResponse({
570            model: 'glm-4.7',
571            output_text:
572              "web_fetch(url='https://apnews.com/live/live-updates-israel-iran-february-28-2026')",
573          }),
574        )
575        .mockResolvedValueOnce(
576          jsonResponse({
577            model: 'glm-4.7',
578            output_text:
579              "web_fetch(url='https://www.cnn.com/world/live-news/israel-iran-attack-02-28-26-hnk-intl')",
580          }),
581        )
582        .mockResolvedValueOnce(
583          jsonResponse({
584            model: 'glm-4.7',
585            output_text: "web_fetch(url='https://example.com/another-source')",
586          }),
587        )
588        .mockResolvedValueOnce(
589          jsonResponse({
590            model: 'glm-4.7',
591            output_text: 'Done after five rounds.',
592          }),
593        )
594  
595      const input: ProviderChatInput = {
596        sessionId: 'session-multi-round-tools',
597        systemPrompt: 'You are helpful.',
598        compactedSummary: '',
599        memories: [],
600        messages: [
601          {
602            role: 'user',
603            text: 'search current Iran attack updates and summarize from multiple sources',
604            attachments: [],
605          },
606        ],
607        providerOverride: {
608          baseUrl: 'https://router.example.com/v1',
609          apiKey: null,
610          chatEndpointMode: 'auto',
611        },
612        modelOverride: 'glm-4.7',
613        allowDangerousBashTool: false,
614      }
615  
616      const result = await openAiCompatibleProvider.generateReply(input)
617      expect(result.text).toContain('Done after five rounds.')
618      expect(result.text).toContain('Executed 4 local tool calls:')
619  
620      expect(vi.mocked(performWebSearch)).toHaveBeenCalledTimes(1)
621      expect(performWebFetch).toHaveBeenCalledTimes(3)
622  
623      expect(fetchMock).toHaveBeenCalledTimes(5)
624      for (const call of fetchMock.mock.calls) {
625        expect(String(call[0])).toContain('/responses')
626        expect(String(call[0])).not.toContain('/chat/completions')
627      }
628    })
629  
630    it('streams assistant preview text before local tool rounds fully complete', async () => {
631      vi.mocked(performWebSearch).mockResolvedValue({
632        results: [
633          {
634            title: 'AP',
635            url: 'https://apnews.com/example',
636            snippet: 'update',
637          },
638        ],
639        provider: 'duckduckgo',
640        cached: false,
641        searchTimeMs: 7,
642      })
643  
644      const fetchMock = vi.mocked(global.fetch)
645      fetchMock
646        .mockResolvedValueOnce(
647          jsonResponse({
648            model: 'glm-4.7',
649            output_text:
650              "I'll look that up now. web_search(query='Iran attacks February 28 2026', maxResults=5)",
651          }),
652        )
653        .mockResolvedValueOnce(
654          jsonResponse({
655            model: 'glm-4.7',
656            output_text: 'Here are the key updates.',
657          }),
658        )
659  
660      const input: ProviderChatInput = {
661        sessionId: 'session-stream-preview',
662        systemPrompt: 'You are helpful.',
663        compactedSummary: '',
664        memories: [],
665        messages: [
666          {
667            role: 'user',
668            text: 'search the latest Iran attack updates',
669            attachments: [],
670          },
671        ],
672        providerOverride: {
673          baseUrl: 'https://router.example.com/v1',
674          apiKey: null,
675          chatEndpointMode: 'auto',
676        },
677        modelOverride: 'glm-4.7',
678        allowDangerousBashTool: false,
679      }
680  
681      const streamReply = await openAiCompatibleProvider.streamReply(input)
682      const chunks: Array<{ type: string; text?: string }> = []
683      for await (const chunk of streamReply.stream) {
684        if (chunk.type === 'delta') {
685          chunks.push({ type: 'delta', text: chunk.delta })
686          continue
687        }
688        if (chunk.type === 'tool_complete') {
689          chunks.push({ type: 'tool_complete' })
690        }
691      }
692  
693      const firstPreviewDeltaIndex = chunks.findIndex(
694        (chunk) =>
695          chunk.type === 'delta' &&
696          (chunk.text ?? '').includes("I'll look that up now."),
697      )
698      const firstToolCompleteIndex = chunks.findIndex(
699        (chunk) => chunk.type === 'tool_complete',
700      )
701  
702      expect(firstPreviewDeltaIndex).toBeGreaterThanOrEqual(0)
703      expect(firstToolCompleteIndex).toBeGreaterThan(firstPreviewDeltaIndex)
704      expect(chunks.some((chunk) => chunk.text?.includes('Here are the key updates.'))).toBe(
705        true,
706      )
707      expect(fetchMock).toHaveBeenCalledTimes(2)
708    })
709  
710    it('extracts tagged thinking content from non-stream local rounds and emits it separately', async () => {
711      const fetchMock = vi.mocked(global.fetch)
712      fetchMock.mockResolvedValueOnce(
713        jsonResponse({
714          model: 'glm-4.7',
715          output_text:
716            '<think>Checking sources and validating timeline.</think>No notable updates yet.',
717        }),
718      )
719  
720      const input: ProviderChatInput = {
721        sessionId: 'session-tagged-thinking',
722        systemPrompt: 'You are helpful.',
723        compactedSummary: '',
724        memories: [],
725        messages: [
726          {
727            role: 'user',
728            text: 'search for updates',
729            attachments: [],
730          },
731        ],
732        providerOverride: {
733          baseUrl: 'https://router.example.com/v1',
734          apiKey: null,
735          chatEndpointMode: 'auto',
736        },
737        modelOverride: 'glm-4.7',
738        allowDangerousBashTool: false,
739      }
740  
741      const streamReply = await openAiCompatibleProvider.streamReply(input)
742      let streamedText = ''
743      let streamedThinking = ''
744      for await (const chunk of streamReply.stream) {
745        if (chunk.type === 'delta') {
746          streamedText += chunk.delta
747          continue
748        }
749        if (chunk.type === 'thinking') {
750          streamedThinking += chunk.thinking
751        }
752      }
753  
754      expect(streamedThinking).toContain('Checking sources and validating timeline.')
755      expect(streamedText).toContain('No notable updates yet.')
756      expect(streamedText).not.toContain('<think>')
757      expect(fetchMock).toHaveBeenCalledTimes(1)
758    })
759  
760    it('routes intermediate round narrative to thinking only when a tool continuation is still pending', async () => {
761      vi.mocked(performWebSearch).mockResolvedValue({
762        results: [
763          {
764            title: 'AP',
765            url: 'https://apnews.com/example',
766            snippet: 'update',
767          },
768        ],
769        provider: 'duckduckgo',
770        cached: false,
771        searchTimeMs: 6,
772      })
773  
774      const fetchMock = vi.mocked(global.fetch)
775      fetchMock
776        .mockResolvedValueOnce(
777          jsonResponse({
778            model: 'glm-4.7',
779            output_text:
780              "The user is asking about current attacks. web_search(query='Iran attacks February 28 2026', maxResults=3)",
781            reasoning: 'The user is asking about current attacks.',
782          }),
783        )
784        .mockResolvedValueOnce(
785          jsonResponse({
786            model: 'glm-4.7',
787            output_text: 'Based on current reports, here is the summary.',
788          }),
789        )
790  
791      const input: ProviderChatInput = {
792        sessionId: 'session-thinking-only-intermediate-round',
793        systemPrompt: 'You are helpful.',
794        compactedSummary: '',
795        memories: [],
796        messages: [
797          {
798            role: 'user',
799            text: 'search for latest attack updates and summarize',
800            attachments: [],
801          },
802        ],
803        providerOverride: {
804          baseUrl: 'https://router.example.com/v1',
805          apiKey: null,
806          chatEndpointMode: 'auto',
807        },
808        modelOverride: 'glm-4.7',
809        allowDangerousBashTool: false,
810      }
811  
812      const streamReply = await openAiCompatibleProvider.streamReply(input)
813      let streamedText = ''
814      let streamedThinking = ''
815      for await (const chunk of streamReply.stream) {
816        if (chunk.type === 'delta') {
817          streamedText += chunk.delta
818          continue
819        }
820        if (chunk.type === 'thinking') {
821          streamedThinking += chunk.thinking
822        }
823      }
824  
825      expect(streamedThinking).toContain('The user is asking about current attacks.')
826      expect(streamedText).not.toContain('The user is asking about current attacks.')
827      expect(streamedText).toContain('Based on current reports, here is the summary.')
828      expect(fetchMock).toHaveBeenCalledTimes(2)
829    })
830  
831    it('keeps a slow in-flight responses tool-loop round alive via activity heartbeat', async () => {
832      process.env.LLM_TIMEOUT_MS = '1000'
833  
834      const fetchMock = vi.mocked(global.fetch)
835      fetchMock.mockImplementationOnce(async () => {
836        await wait(2200)
837        return jsonResponse({
838          model: 'minimax-m2',
839          output_text: 'Finished after a long round.',
840        })
841      })
842  
843      const input: ProviderChatInput = {
844        sessionId: 'session-timeout-heartbeat',
845        systemPrompt: 'You are helpful.',
846        compactedSummary: '',
847        memories: [],
848        messages: [
849          {
850            role: 'user',
851            text: 'say hello',
852            attachments: [],
853          },
854        ],
855        providerOverride: {
856          baseUrl: 'https://router.example.com/v1',
857          apiKey: null,
858          chatEndpointMode: 'responses',
859        },
860        modelOverride: 'minimax-m2',
861        allowDangerousBashTool: true,
862      }
863  
864      const result = await openAiCompatibleProvider.generateReply(input)
865      expect(result.text).toContain('Finished after a long round.')
866      expect(fetchMock).toHaveBeenCalledTimes(1)
867      expect(String(fetchMock.mock.calls[0]?.[0])).toContain('/responses')
868    })
869  
870    it('adds a no-tool-executed summary when bash is enabled but model does not call a tool', async () => {
871      const fetchMock = vi.mocked(global.fetch)
872      fetchMock.mockResolvedValueOnce(
873        jsonResponse({
874          model: 'moonshotai/Kimi-K2.5',
875          choices: [
876            {
877              message: {
878                content: 'head -50 packages/web-core/src/external-content.ts',
879              },
880              finish_reason: 'stop',
881            },
882          ],
883        }),
884      )
885  
886      const input: ProviderChatInput = {
887        systemPrompt: 'You are helpful.',
888        compactedSummary: '',
889        memories: [],
890        messages: [
891          {
892            role: 'user',
893            text: 'can you run the bash tool again?',
894            attachments: [],
895          },
896        ],
897        providerOverride: {
898          baseUrl: 'https://router.example.com/v1',
899          apiKey: null,
900          chatEndpointMode: 'chat_completions',
901        },
902        modelOverride: 'moonshotai/Kimi-K2.5',
903        allowDangerousBashTool: true,
904      }
905  
906      const result = await openAiCompatibleProvider.generateReply(input)
907      expect(result.text).toContain('head -50 packages/web-core/src/external-content.ts')
908      expect(result.text).toContain('<tool_result>')
909      expect(result.text).toContain('No local tools were executed in this turn.')
910    })
911  })