Cradicle Explorer

/ tests / openai-compatible-previous-response-id.test.ts
openai-compatible-previous-response-id.test.ts
  1  import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
  2  
  3  import type { ProviderChatInput } from '@/server/providers/types'
  4  import { openAiCompatibleProvider } from '@/server/providers/openai-compatible'
  5  
  6  vi.mock('@/server/costs/ledger', () => ({
  7      recordApiCall: vi.fn(),
  8  }))
  9  
 10  vi.mock('@/server/storage/tool-execution-logs', () => ({
 11      recordToolExecutionLog: vi.fn(),
 12  }))
 13  
 14  vi.mock('@/server/storage/chat-store', () => ({
 15      getUploadById: vi.fn(() => null),
 16  }))
 17  
 18  vi.mock('@/server/uploads/files', () => ({
 19      readUploadFile: vi.fn(),
 20  }))
 21  
 22  const originalEnv = { ...process.env }
 23  const originalFetch = global.fetch
 24  
 25  function jsonResponse(body: unknown, status = 200): Response {
 26      return new Response(JSON.stringify(body), {
 27          status,
 28          headers: { 'Content-Type': 'application/json' },
 29      })
 30  }
 31  
 32  /**
 33   * A minimal two-round tool-loop sequence:
 34   *  Round 1 → model returns a get_current_time function_call
 35   *  Round 2 → model returns final text after receiving the tool result
 36   */
 37  function webSearchRound1(responseId: string) {
 38      return jsonResponse({
 39          id: responseId,
 40          model: 'gpt-4.1',
 41          output: [
 42              {
 43                  type: 'function_call',
 44                  id: 'item_1',
 45                  call_id: 'call_abc',
 46                  name: 'get_current_time',
 47                  arguments: '{}',
 48              },
 49          ],
 50          usage: { input_tokens: 200, output_tokens: 10, total_tokens: 210 },
 51      })
 52  }
 53  
 54  function finalRound(responseId: string, text: string) {
 55      return jsonResponse({
 56          id: responseId,
 57          model: 'gpt-4.1',
 58          output_text: text,
 59          usage: { input_tokens: 50, output_tokens: 20, total_tokens: 70 },
 60      })
 61  }
 62  
 63  function buildInput(baseUrl: string): ProviderChatInput {
 64      return {
 65          systemPrompt: 'You are helpful.',
 66          compactedSummary: '',
 67          memories: [],
 68          messages: [
 69              {
 70                  role: 'user',
 71                  text: 'what time is it',
 72                  attachments: [],
 73              },
 74          ],
 75          providerOverride: {
 76              baseUrl,
 77              apiKey: 'sk-test',
 78              chatEndpointMode: 'auto',
 79          },
 80          modelOverride: 'gpt-4.1',
 81          allowDangerousBashTool: false,
 82      }
 83  }
 84  
 85  describe('openAiCompatibleProvider previous_response_id optimization', () => {
 86      beforeEach(() => {
 87          vi.clearAllMocks()
 88          process.env = {
 89              ...originalEnv,
 90              LLM_BASE_URL: 'https://api.openai.com/v1',
 91              LLM_CHAT_MODEL: 'gpt-4.1',
 92              LLM_TIMEOUT_MS: '30000',
 93          }
 94          global.fetch = vi.fn() as typeof fetch
 95      })
 96  
 97      afterEach(() => {
 98          process.env = originalEnv
 99          global.fetch = originalFetch
100      })
101  
102      it('uses previous_response_id on round 2 and sends only new tool-result items for real OpenAI', async () => {
103          const fetchMock = vi.mocked(global.fetch)
104          fetchMock
105              .mockResolvedValueOnce(webSearchRound1('resp_round1'))
106              .mockResolvedValueOnce(finalRound('resp_round2', 'It is 3pm.'))
107  
108          const result = await openAiCompatibleProvider.generateReply(
109              buildInput('https://api.openai.com/v1'),
110          )
111  
112          expect(result.text).toContain('It is 3pm.')
113          expect(fetchMock).toHaveBeenCalledTimes(2)
114  
115          // Round 1: no previous_response_id
116          const round1Init = fetchMock.mock.calls[0]?.[1] as RequestInit
117          const round1Payload = JSON.parse(String(round1Init.body)) as Record<string, unknown>
118          expect(round1Payload.previous_response_id).toBeUndefined()
119  
120          // Round 2: must carry previous_response_id = 'resp_round1'
121          const round2Init = fetchMock.mock.calls[1]?.[1] as RequestInit
122          const round2Payload = JSON.parse(String(round2Init.body)) as Record<string, unknown>
123          expect(round2Payload.previous_response_id).toBe('resp_round1')
124  
125          // Stateful round 2 must contain ONLY the tool results (function_call_output items).
126          // OpenAI already holds the model's function_call record from resp_round1 server-side;
127          // re-sending responseOutputItems would cause a "Duplicate item found" 400 error.
128          const round2Input = round2Payload.input as unknown[]
129          expect(Array.isArray(round2Input)).toBe(true)
130  
131          // Should be exactly 1 item: the tool result for get_current_time.
132          // (NOT 2 items that would include the model's function_call item again.)
133          expect(round2Input.length).toBe(1)
134  
135          // Confirm it is a function_call_output (tool result), not a function_call or user message
136          const item = round2Input[0] as Record<string, unknown>
137          expect(item.type).toBe('function_call_output')
138      })
139  
140      it('does NOT use previous_response_id for non-OpenAI providers and sends full input each round', async () => {
141          const fetchMock = vi.mocked(global.fetch)
142          fetchMock
143              .mockResolvedValueOnce(webSearchRound1('resp_round1'))
144              .mockResolvedValueOnce(finalRound('resp_round2', 'All done.'))
145  
146          const result = await openAiCompatibleProvider.generateReply(
147              buildInput('https://example.test/v1'),
148          )
149  
150          expect(result.text).toContain('All done.')
151          expect(fetchMock).toHaveBeenCalledTimes(2)
152  
153          // Round 2 for non-OpenAI: no previous_response_id in either round
154          const round1Init = fetchMock.mock.calls[0]?.[1] as RequestInit
155          const round2Init = fetchMock.mock.calls[1]?.[1] as RequestInit
156          const round1Payload = JSON.parse(String(round1Init.body)) as Record<string, unknown>
157          const round2Payload = JSON.parse(String(round2Init.body)) as Record<string, unknown>
158  
159          expect(round1Payload.previous_response_id).toBeUndefined()
160          expect(round2Payload.previous_response_id).toBeUndefined()
161  
162          // Round 2 input must include the original user message (full history re-sent)
163          const round2Input = round2Payload.input as unknown[]
164          const hasUserMessage = round2Input.some(
165              (item) =>
166                  typeof item === 'object' &&
167                  item !== null &&
168                  (item as Record<string, unknown>).role === 'user',
169          )
170          expect(hasUserMessage).toBe(true)
171      })
172  })