/ src / utils / llm-provider.js
llm-provider.js
  1  /**
  2   * LLM Provider Abstraction
  3   * Routes all LLM calls through OpenRouter.
  4   * The Claude CLI orchestrator handles "free" Claude Max calls separately.
  5   */
  6  
  7  import axios from 'axios';
  8  import './load-env.js';
  9  import Logger from './logger.js';
 10  import { logLLMUsage, getDailySpend, getHourlySpend } from './llm-usage-tracker.js';
 11  
 12  const logger = new Logger('llm-provider');
 13  
 14  const { OPENROUTER_API_KEY } = process.env;
 15  const provider = 'openrouter';
 16  
 17  if (!OPENROUTER_API_KEY) {
 18    throw new Error(
 19      'OPENROUTER_API_KEY not set in environment. Please configure in .env'
 20    );
 21  }
 22  
 23  /**
 24   * Call LLM API with automatic provider selection and usage tracking
 25   * @param {Object} params - Request parameters
 26   * @param {string} params.model - Model name (OpenRouter format)
 27   * @param {Array} params.messages - Messages array
 28   * @param {number} params.temperature - Temperature (0-1)
 29   * @param {number} params.max_tokens - Max tokens to generate
 30   * @param {boolean} params.json_mode - Whether to request JSON output
 31   * @param {Object} params.headers - Additional headers (for OpenRouter)
 32   * @param {string} params.stage - Pipeline stage for usage tracking (e.g. 'scoring', 'proposals')
 33   * @param {number} params.siteId - Site ID for usage tracking
 34   * @returns {Promise<Object>} - Response with content and usage: {content: string, usage: {promptTokens, completionTokens}}
 35   */
 36  export async function callLLM({
 37    model,
 38    messages,
 39    temperature = 0.7,
 40    max_tokens = 2000,
 41    json_mode = false,
 42    headers = {},
 43    stage = null,
 44    siteId = null,
 45  }) {
 46    // Budget enforcement
 47    const dailyBudget = parseFloat(process.env.LLM_DAILY_BUDGET) || 50;
 48    const hourlyAlert = parseFloat(process.env.LLM_HOURLY_ALERT) || 10;
 49  
 50    try {
 51      const dailySpend = getDailySpend();
 52      if (dailySpend >= dailyBudget) {
 53        throw new Error(
 54          `LLM daily budget exceeded: $${dailySpend.toFixed(2)} spent today (budget: $${dailyBudget}). ` +
 55            `Set LLM_DAILY_BUDGET in .env to adjust.`
 56        );
 57      }
 58  
 59      const hourlySpend = getHourlySpend();
 60      if (hourlySpend >= hourlyAlert) {
 61        logger.warn(
 62          `LLM hourly spend alert: $${hourlySpend.toFixed(2)} in the last hour (threshold: $${hourlyAlert})`
 63        );
 64      }
 65    } catch (budgetErr) {
 66      // Re-throw budget exceeded errors, but don't let DB query failures block LLM calls
 67      if (budgetErr.message.includes('daily budget exceeded')) {
 68        throw budgetErr;
 69      }
 70      logger.warn(`Budget check failed (non-blocking): ${budgetErr.message}`);
 71    }
 72  
 73    const result = await callOpenRouterAPI({
 74      model,
 75      messages,
 76      temperature,
 77      max_tokens,
 78      json_mode,
 79      headers,
 80    });
 81  
 82    // Auto-track usage if stage is provided and usage data is available
 83    if (stage && result.usage && (result.usage.promptTokens || result.usage.completionTokens)) {
 84      try {
 85        logLLMUsage({
 86          siteId,
 87          stage,
 88          provider,
 89          model,
 90          promptTokens: result.usage.promptTokens,
 91          completionTokens: result.usage.completionTokens,
 92        });
 93      } catch (trackingErr) {
 94        // Never let tracking failures break LLM calls
 95        logger.warn(`LLM usage tracking failed: ${trackingErr.message}`);
 96      }
 97    }
 98  
 99    return result;
100  }
101  
102  /**
103   * Call OpenRouter API
104   */
105  async function callOpenRouterAPI({ model, messages, temperature, max_tokens, json_mode, headers }) {
106    const requestBody = {
107      model,
108      messages,
109      temperature,
110      max_tokens,
111    };
112  
113    if (json_mode) {
114      requestBody.response_format = { type: 'json_object' };
115    }
116  
117    try {
118      const response = await axios.post(
119        'https://openrouter.ai/api/v1/chat/completions',
120        requestBody,
121        {
122          headers: {
123            Authorization: `Bearer ${OPENROUTER_API_KEY}`,
124            'HTTP-Referer': 'https://333method.local',
125            'X-Title': '333 Method Automation',
126            'Content-Type': 'application/json',
127            ...headers,
128          },
129          timeout: 120000, // Match circuit breaker timeout (vision API with large screenshots can be slow)
130        }
131      );
132  
133      const choice = response.data.choices[0];
134      const content = choice?.message?.content;
135      if (!content) {
136        throw new Error('No content in OpenRouter API response');
137      }
138  
139      // Warn on truncated responses (finish_reason='length' means max_tokens hit)
140      if (choice?.finish_reason === 'length') {
141        const model = requestBody.model || 'unknown';
142        const tokenUsage = response.data.usage?.completion_tokens || '?';
143        // Use a simple console.warn to avoid circular logger import issues
144  
145        console.warn(
146          `[LLMProvider] WARN: Response truncated (finish_reason=length) model=${model} completion_tokens=${tokenUsage}`
147        );
148      }
149  
150      // Return content and usage data
151      const usage = response.data.usage || {};
152      return {
153        content,
154        usage: {
155          promptTokens: usage.prompt_tokens || 0,
156          completionTokens: usage.completion_tokens || 0,
157        },
158      };
159    } catch (err) {
160      // Enhanced error logging for debugging 400 errors
161      if (err.response) {
162        const errorDetails = {
163          status: err.response.status,
164          statusText: err.response.statusText,
165          errorMessage: err.response.data?.error?.message,
166          errorType: err.response.data?.error?.type,
167          data: err.response.data,
168          model,
169          hasVision: messages.some(
170            m => Array.isArray(m.content) && m.content.some(c => c.type === 'image_url')
171          ),
172          hasTooManyTokens: messages.some(
173            m => Array.isArray(m.content) && JSON.stringify(m).length > 1000000
174          ),
175          jsonMode: json_mode,
176          requestBodySize: JSON.stringify(requestBody).length,
177          timestamp: new Date().toISOString(),
178        };
179        logger.warn('API Error details', errorDetails);
180      }
181      throw err;
182    }
183  }
184  
185  /**
186   * Get current provider name
187   */
188  export function getProvider() {
189    return provider;
190  }
191  
192  /**
193   * Get provider display name
194   */
195  export function getProviderDisplayName() {
196    return 'OpenRouter';
197  }