llm-provider.js
1 /** 2 * LLM Provider Abstraction 3 * Routes all LLM calls through OpenRouter. 4 * The Claude CLI orchestrator handles "free" Claude Max calls separately. 5 */ 6 7 import axios from 'axios'; 8 import './load-env.js'; 9 import Logger from './logger.js'; 10 import { logLLMUsage, getDailySpend, getHourlySpend } from './llm-usage-tracker.js'; 11 12 const logger = new Logger('llm-provider'); 13 14 const { OPENROUTER_API_KEY } = process.env; 15 const provider = 'openrouter'; 16 17 if (!OPENROUTER_API_KEY) { 18 throw new Error( 19 'OPENROUTER_API_KEY not set in environment. Please configure in .env' 20 ); 21 } 22 23 /** 24 * Call LLM API with automatic provider selection and usage tracking 25 * @param {Object} params - Request parameters 26 * @param {string} params.model - Model name (OpenRouter format) 27 * @param {Array} params.messages - Messages array 28 * @param {number} params.temperature - Temperature (0-1) 29 * @param {number} params.max_tokens - Max tokens to generate 30 * @param {boolean} params.json_mode - Whether to request JSON output 31 * @param {Object} params.headers - Additional headers (for OpenRouter) 32 * @param {string} params.stage - Pipeline stage for usage tracking (e.g. 'scoring', 'proposals') 33 * @param {number} params.siteId - Site ID for usage tracking 34 * @returns {Promise<Object>} - Response with content and usage: {content: string, usage: {promptTokens, completionTokens}} 35 */ 36 export async function callLLM({ 37 model, 38 messages, 39 temperature = 0.7, 40 max_tokens = 2000, 41 json_mode = false, 42 headers = {}, 43 stage = null, 44 siteId = null, 45 }) { 46 // Budget enforcement 47 const dailyBudget = parseFloat(process.env.LLM_DAILY_BUDGET) || 50; 48 const hourlyAlert = parseFloat(process.env.LLM_HOURLY_ALERT) || 10; 49 50 try { 51 const dailySpend = getDailySpend(); 52 if (dailySpend >= dailyBudget) { 53 throw new Error( 54 `LLM daily budget exceeded: $${dailySpend.toFixed(2)} spent today (budget: $${dailyBudget}). ` + 55 `Set LLM_DAILY_BUDGET in .env to adjust.` 56 ); 57 } 58 59 const hourlySpend = getHourlySpend(); 60 if (hourlySpend >= hourlyAlert) { 61 logger.warn( 62 `LLM hourly spend alert: $${hourlySpend.toFixed(2)} in the last hour (threshold: $${hourlyAlert})` 63 ); 64 } 65 } catch (budgetErr) { 66 // Re-throw budget exceeded errors, but don't let DB query failures block LLM calls 67 if (budgetErr.message.includes('daily budget exceeded')) { 68 throw budgetErr; 69 } 70 logger.warn(`Budget check failed (non-blocking): ${budgetErr.message}`); 71 } 72 73 const result = await callOpenRouterAPI({ 74 model, 75 messages, 76 temperature, 77 max_tokens, 78 json_mode, 79 headers, 80 }); 81 82 // Auto-track usage if stage is provided and usage data is available 83 if (stage && result.usage && (result.usage.promptTokens || result.usage.completionTokens)) { 84 try { 85 logLLMUsage({ 86 siteId, 87 stage, 88 provider, 89 model, 90 promptTokens: result.usage.promptTokens, 91 completionTokens: result.usage.completionTokens, 92 }); 93 } catch (trackingErr) { 94 // Never let tracking failures break LLM calls 95 logger.warn(`LLM usage tracking failed: ${trackingErr.message}`); 96 } 97 } 98 99 return result; 100 } 101 102 /** 103 * Call OpenRouter API 104 */ 105 async function callOpenRouterAPI({ model, messages, temperature, max_tokens, json_mode, headers }) { 106 const requestBody = { 107 model, 108 messages, 109 temperature, 110 max_tokens, 111 }; 112 113 if (json_mode) { 114 requestBody.response_format = { type: 'json_object' }; 115 } 116 117 try { 118 const response = await axios.post( 119 'https://openrouter.ai/api/v1/chat/completions', 120 requestBody, 121 { 122 headers: { 123 Authorization: `Bearer ${OPENROUTER_API_KEY}`, 124 'HTTP-Referer': 'https://333method.local', 125 'X-Title': '333 Method Automation', 126 'Content-Type': 'application/json', 127 ...headers, 128 }, 129 timeout: 120000, // Match circuit breaker timeout (vision API with large screenshots can be slow) 130 } 131 ); 132 133 const choice = response.data.choices[0]; 134 const content = choice?.message?.content; 135 if (!content) { 136 throw new Error('No content in OpenRouter API response'); 137 } 138 139 // Warn on truncated responses (finish_reason='length' means max_tokens hit) 140 if (choice?.finish_reason === 'length') { 141 const model = requestBody.model || 'unknown'; 142 const tokenUsage = response.data.usage?.completion_tokens || '?'; 143 // Use a simple console.warn to avoid circular logger import issues 144 145 console.warn( 146 `[LLMProvider] WARN: Response truncated (finish_reason=length) model=${model} completion_tokens=${tokenUsage}` 147 ); 148 } 149 150 // Return content and usage data 151 const usage = response.data.usage || {}; 152 return { 153 content, 154 usage: { 155 promptTokens: usage.prompt_tokens || 0, 156 completionTokens: usage.completion_tokens || 0, 157 }, 158 }; 159 } catch (err) { 160 // Enhanced error logging for debugging 400 errors 161 if (err.response) { 162 const errorDetails = { 163 status: err.response.status, 164 statusText: err.response.statusText, 165 errorMessage: err.response.data?.error?.message, 166 errorType: err.response.data?.error?.type, 167 data: err.response.data, 168 model, 169 hasVision: messages.some( 170 m => Array.isArray(m.content) && m.content.some(c => c.type === 'image_url') 171 ), 172 hasTooManyTokens: messages.some( 173 m => Array.isArray(m.content) && JSON.stringify(m).length > 1000000 174 ), 175 jsonMode: json_mode, 176 requestBodySize: JSON.stringify(requestBody).length, 177 timestamp: new Date().toISOString(), 178 }; 179 logger.warn('API Error details', errorDetails); 180 } 181 throw err; 182 } 183 } 184 185 /** 186 * Get current provider name 187 */ 188 export function getProvider() { 189 return provider; 190 } 191 192 /** 193 * Get provider display name 194 */ 195 export function getProviderDisplayName() { 196 return 'OpenRouter'; 197 }