openai-compatible-bash-tool.test.ts
1 import { mkdtemp, rm, writeFile } from 'node:fs/promises' 2 import { tmpdir } from 'node:os' 3 import { join } from 'node:path' 4 5 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' 6 7 import type { ProviderChatInput } from '@/server/providers/types' 8 import { openAiCompatibleProvider } from '@/server/providers/openai-compatible' 9 10 vi.mock('@/server/costs/ledger', () => ({ 11 recordApiCall: vi.fn(), 12 })) 13 14 vi.mock('@/server/storage/tool-execution-logs', () => ({ 15 recordToolExecutionLog: vi.fn(), 16 })) 17 18 vi.mock('@/server/storage/chat-store', () => ({ 19 getUploadById: vi.fn(() => null), 20 })) 21 22 vi.mock('@/server/uploads/files', () => ({ 23 readUploadFile: vi.fn(), 24 })) 25 26 const originalEnv = { ...process.env } 27 const originalFetch = global.fetch 28 29 function jsonResponse(body: unknown, status = 200): Response { 30 return new Response(JSON.stringify(body), { 31 status, 32 headers: { 33 'Content-Type': 'application/json', 34 }, 35 }) 36 } 37 38 function buildResponsesInput(): ProviderChatInput { 39 return { 40 systemPrompt: 'You are helpful.', 41 compactedSummary: '', 42 memories: [], 43 messages: [ 44 { 45 role: 'user', 46 text: 'run this bash command', 47 attachments: [], 48 }, 49 ], 50 providerOverride: { 51 baseUrl: 'https://router.example.com/v1', 52 apiKey: null, 53 chatEndpointMode: 'auto', 54 }, 55 modelOverride: 'minimax-m2', 56 allowDangerousBashTool: true, 57 } 58 } 59 60 describe('openAiCompatibleProvider bash tool hardening', () => { 61 beforeEach(() => { 62 vi.clearAllMocks() 63 process.env = { 64 ...originalEnv, 65 LLM_BASE_URL: 'https://example.test/v1', 66 LLM_CHAT_MODEL: 'minimax-m2', 67 LLM_TIMEOUT_MS: '30000', 68 } 69 global.fetch = vi.fn() as typeof fetch 70 }) 71 72 afterEach(() => { 73 process.env = originalEnv 74 global.fetch = originalFetch 75 }) 76 77 it('blocks python script execution when preflight detects shell variable bleed', async () => { 78 const tempRoot = await mkdtemp(join(tmpdir(), 'helper-bash-preflight-')) 79 try { 80 await writeFile( 81 join(tempRoot, 'bad.py'), 82 ['import json', 'payload = $DM_JSON', 'print(payload)'].join('\n'), 83 'utf8', 84 ) 85 86 const fetchMock = vi.mocked(global.fetch) 87 fetchMock 88 .mockResolvedValueOnce( 89 jsonResponse({ 90 model: 'minimax-m2', 91 output_text: `<tool_call> 92 bash 93 <arg_key>command</arg_key><arg_value>python bad.py</arg_value> 94 <arg_key>cwd</arg_key><arg_value>${tempRoot}</arg_value> 95 </tool_call>`, 96 }), 97 ) 98 .mockResolvedValueOnce( 99 jsonResponse({ 100 model: 'minimax-m2', 101 output_text: 'Done.', 102 }), 103 ) 104 105 const result = await openAiCompatibleProvider.generateReply( 106 buildResponsesInput(), 107 ) 108 109 expect(result.text).toContain('Done.') 110 expect(result.text).toContain('bash (textual fallback): error') 111 expect(result.text).toContain( 112 'exec preflight: detected likely shell variable injection ($DM_JSON)', 113 ) 114 } finally { 115 await rm(tempRoot, { recursive: true, force: true }) 116 } 117 }) 118 119 it('surfaces command-not-found failure reason in bash tool summaries', async () => { 120 const fetchMock = vi.mocked(global.fetch) 121 fetchMock 122 .mockResolvedValueOnce( 123 jsonResponse({ 124 model: 'minimax-m2', 125 output_text: `<tool_call> 126 bash 127 <arg_key>command</arg_key><arg_value>__helper_missing_command_xyz__</arg_value> 128 </tool_call>`, 129 }), 130 ) 131 .mockResolvedValueOnce( 132 jsonResponse({ 133 model: 'minimax-m2', 134 output_text: 'Done.', 135 }), 136 ) 137 138 const result = await openAiCompatibleProvider.generateReply(buildResponsesInput()) 139 140 expect(result.text).toContain('Done.') 141 expect(result.text).toContain('bash (textual fallback): ok') 142 expect(result.text).toContain('reason: Command not found (exit 127).') 143 }) 144 })