Cradicle Explorer

/ tests / openai-compatible-bash-tool.test.ts
openai-compatible-bash-tool.test.ts
  1  import { mkdtemp, rm, writeFile } from 'node:fs/promises'
  2  import { tmpdir } from 'node:os'
  3  import { join } from 'node:path'
  4  
  5  import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
  6  
  7  import type { ProviderChatInput } from '@/server/providers/types'
  8  import { openAiCompatibleProvider } from '@/server/providers/openai-compatible'
  9  
 10  vi.mock('@/server/costs/ledger', () => ({
 11    recordApiCall: vi.fn(),
 12  }))
 13  
 14  vi.mock('@/server/storage/tool-execution-logs', () => ({
 15    recordToolExecutionLog: vi.fn(),
 16  }))
 17  
 18  vi.mock('@/server/storage/chat-store', () => ({
 19    getUploadById: vi.fn(() => null),
 20  }))
 21  
 22  vi.mock('@/server/uploads/files', () => ({
 23    readUploadFile: vi.fn(),
 24  }))
 25  
 26  const originalEnv = { ...process.env }
 27  const originalFetch = global.fetch
 28  
 29  function jsonResponse(body: unknown, status = 200): Response {
 30    return new Response(JSON.stringify(body), {
 31      status,
 32      headers: {
 33        'Content-Type': 'application/json',
 34      },
 35    })
 36  }
 37  
 38  function buildResponsesInput(): ProviderChatInput {
 39    return {
 40      systemPrompt: 'You are helpful.',
 41      compactedSummary: '',
 42      memories: [],
 43      messages: [
 44        {
 45          role: 'user',
 46          text: 'run this bash command',
 47          attachments: [],
 48        },
 49      ],
 50      providerOverride: {
 51        baseUrl: 'https://router.example.com/v1',
 52        apiKey: null,
 53        chatEndpointMode: 'auto',
 54      },
 55      modelOverride: 'minimax-m2',
 56      allowDangerousBashTool: true,
 57    }
 58  }
 59  
 60  describe('openAiCompatibleProvider bash tool hardening', () => {
 61    beforeEach(() => {
 62      vi.clearAllMocks()
 63      process.env = {
 64        ...originalEnv,
 65        LLM_BASE_URL: 'https://example.test/v1',
 66        LLM_CHAT_MODEL: 'minimax-m2',
 67        LLM_TIMEOUT_MS: '30000',
 68      }
 69      global.fetch = vi.fn() as typeof fetch
 70    })
 71  
 72    afterEach(() => {
 73      process.env = originalEnv
 74      global.fetch = originalFetch
 75    })
 76  
 77    it('blocks python script execution when preflight detects shell variable bleed', async () => {
 78      const tempRoot = await mkdtemp(join(tmpdir(), 'helper-bash-preflight-'))
 79      try {
 80        await writeFile(
 81          join(tempRoot, 'bad.py'),
 82          ['import json', 'payload = $DM_JSON', 'print(payload)'].join('\n'),
 83          'utf8',
 84        )
 85  
 86        const fetchMock = vi.mocked(global.fetch)
 87        fetchMock
 88          .mockResolvedValueOnce(
 89            jsonResponse({
 90              model: 'minimax-m2',
 91              output_text: `<tool_call>
 92  bash
 93  <arg_key>command</arg_key><arg_value>python bad.py</arg_value>
 94  <arg_key>cwd</arg_key><arg_value>${tempRoot}</arg_value>
 95  </tool_call>`,
 96            }),
 97          )
 98          .mockResolvedValueOnce(
 99            jsonResponse({
100              model: 'minimax-m2',
101              output_text: 'Done.',
102            }),
103          )
104  
105        const result = await openAiCompatibleProvider.generateReply(
106          buildResponsesInput(),
107        )
108  
109        expect(result.text).toContain('Done.')
110        expect(result.text).toContain('bash (textual fallback): error')
111        expect(result.text).toContain(
112          'exec preflight: detected likely shell variable injection ($DM_JSON)',
113        )
114      } finally {
115        await rm(tempRoot, { recursive: true, force: true })
116      }
117    })
118  
119    it('surfaces command-not-found failure reason in bash tool summaries', async () => {
120      const fetchMock = vi.mocked(global.fetch)
121      fetchMock
122        .mockResolvedValueOnce(
123          jsonResponse({
124            model: 'minimax-m2',
125            output_text: `<tool_call>
126  bash
127  <arg_key>command</arg_key><arg_value>__helper_missing_command_xyz__</arg_value>
128  </tool_call>`,
129          }),
130        )
131        .mockResolvedValueOnce(
132          jsonResponse({
133            model: 'minimax-m2',
134            output_text: 'Done.',
135          }),
136        )
137  
138      const result = await openAiCompatibleProvider.generateReply(buildResponsesInput())
139  
140      expect(result.text).toContain('Done.')
141      expect(result.text).toContain('bash (textual fallback): ok')
142      expect(result.text).toContain('reason: Command not found (exit 127).')
143    })
144  })