/ scripts / claude-store-wrapper.js
claude-store-wrapper.js
  1  #!/usr/bin/env node
  2  /**
  3   * claude-store-wrapper.js — Extract and repair JSON from claude --output-format json
  4   * raw output, then pipe to claude-store.js.
  5   *
  6   * Usage: node scripts/claude-store-wrapper.js <raw_file> <batch_type>
  7   *
  8   * Handles:
  9   * - Outer claude JSON envelope ({"type":"result","result":"..."})
 10   * - Markdown code fence stripping
 11   * - Embedded unescaped quotes (iterativeRepair)
 12   * - Literal newlines/tabs inside JSON strings (escapeControlChars)
 13   * - Bare NDJSON without outer wrapper (NDJSON reconstructor)
 14   * - Invalid JSON → dumps fail file and exits with error JSON
 15   */
 16  
 17  import { readFileSync, writeFileSync } from 'fs';
 18  import { spawnSync } from 'child_process';
 19  import { join, dirname } from 'path';
 20  import { fileURLToPath } from 'url';
 21  
 22  const __dirname = dirname(fileURLToPath(import.meta.url));
 23  const PROJECT_ROOT = join(__dirname, '..');
 24  
 25  const rawFile = process.argv[2];
 26  const batchType = process.argv[3];
 27  
 28  if (!rawFile || !batchType) {
 29    process.stdout.write(
 30      JSON.stringify({ error: 'usage', msg: 'claude-store-wrapper.js <raw_file> <batch_type>' })
 31    );
 32    process.exit(1);
 33  }
 34  
 35  function escapeControlChars(s) {
 36    let out = '';
 37    let inStr = false;
 38    let esc = false;
 39    for (let i = 0; i < s.length; i++) {
 40      const c = s[i];
 41      if (esc) {
 42        out += c;
 43        esc = false;
 44        continue;
 45      }
 46      if (c === '\\' && inStr) {
 47        out += c;
 48        esc = true;
 49        continue;
 50      }
 51      if (c === '"') {
 52        inStr = !inStr;
 53        out += c;
 54        continue;
 55      }
 56      if (inStr && c === '\n') {
 57        out += '\\n';
 58        continue;
 59      }
 60      if (inStr && c === '\r') {
 61        out += '\\r';
 62        continue;
 63      }
 64      if (inStr && c === '\t') {
 65        out += '\\t';
 66        continue;
 67      }
 68      out += c;
 69    }
 70    return out;
 71  }
 72  
 73  function iterativeRepair(s, maxIter) {
 74    let text = s;
 75    for (let iter = 0; iter < maxIter; iter++) {
 76      let parseErr;
 77      try {
 78        JSON.parse(text);
 79        return text;
 80      } catch (e) {
 81        parseErr = e;
 82      }
 83      const m = parseErr.message.match(/position (\d+)/);
 84      if (!m) break;
 85      const pos = parseInt(m[1]);
 86      let fixed = false;
 87      for (let i = pos; i >= Math.max(0, pos - 300) && !fixed; i--) {
 88        if (text[i] === '"' && text[i - 1] !== '\\') {
 89          const candidate = `${text.slice(0, i)}\\"${text.slice(i + 1)}`;
 90          let nextPos;
 91          try {
 92            JSON.parse(candidate);
 93            nextPos = null;
 94          } catch (e2) {
 95            const m2 = e2.message.match(/position (\d+)/);
 96            nextPos = m2 ? parseInt(m2[1]) : null;
 97          }
 98          if (nextPos === null || nextPos !== pos) {
 99            text = candidate;
100            fixed = true;
101          }
102        }
103      }
104      if (!fixed) break;
105    }
106    return text;
107  }
108  
109  // Map batch types to llm_usage stage names
110  const BATCH_TO_STAGE = {
111    proofread: 'outreach',
112    proposals_email: 'proposals',
113    proposals_sms: 'proposals',
114    reply_responses: 'replies',
115    classify_replies: 'replies',
116    extract_names: 'name-extraction',
117    oversee: 'overseer',
118    score_semantic: 'scoring',
119    score_sites: 'scoring',
120    enrich_sites: 'enrichment',
121    followup_generate: 'outreach',
122  };
123  
124  async function logClaudeCliUsage(envelope) {
125    // batchType is the module-level const from process.argv[3]
126    const stage = BATCH_TO_STAGE[batchType] || 'other';
127    try {
128      const usage = envelope.usage || {};
129      const modelUsage = envelope.modelUsage || {};
130      const model = Object.keys(modelUsage)[0] || 'claude-opus-4-6';
131      const mu = modelUsage[model] || {};
132      const promptTokens =
133        (usage.input_tokens || 0) +
134        (usage.cache_creation_input_tokens || 0) +
135        (usage.cache_read_input_tokens || 0);
136      const completionTokens = usage.output_tokens || mu.outputTokens || 0;
137      const costUsd = envelope.total_cost_usd || mu.costUSD || 0;
138  
139      const { logLLMUsage } = await import('../src/utils/llm-usage-tracker.js');
140      logLLMUsage({
141        siteId: null,
142        stage,
143        provider: 'claude-cli',
144        model,
145        promptTokens,
146        completionTokens,
147        requestId: envelope.session_id || null,
148        costOverride: costUsd || null,
149      });
150    } catch (e) {
151      process.stderr.write(
152        `[claude-store-wrapper] usage tracking failed for ${stage}: ${e.message}\n`
153      );
154    }
155  }
156  
157  let raw;
158  try {
159    raw = readFileSync(rawFile, 'utf8');
160  } catch (e) {
161    process.stdout.write(
162      JSON.stringify({ error: 'read_failed', msg: String(e.message).slice(0, 200) })
163    );
164    process.exit(0);
165  }
166  
167  let text;
168  let envelope;
169  try {
170    envelope = JSON.parse(raw);
171  } catch (_ignored) {
172    envelope = null;
173  }
174  
175  if (envelope) {
176    // Log usage from Claude CLI envelope (best-effort, errors logged to stderr)
177    if (envelope.usage) await logClaudeCliUsage(envelope);
178    if (envelope.is_error) {
179      process.stdout.write(
180        JSON.stringify({ error: 'claude_error', msg: envelope.result || 'empty' })
181      );
182      process.exit(0);
183    }
184    text = typeof envelope.result === 'string' ? envelope.result : raw;
185  } else {
186    text = raw;
187  }
188  
189  // Strip markdown code fences
190  text = text
191    .replace(/^\s*```(?:json)?\s*\n?/, '')
192    .replace(/\n?\s*```\s*$/, '')
193    .trim();
194  
195  // Repair pipeline: try to parse, then fix control chars, then iterative quote repair
196  let parseOk = false;
197  try {
198    JSON.parse(text);
199    parseOk = true;
200  } catch (_ignored) {
201    /* will repair below */
202  }
203  
204  if (!parseOk) {
205    // Step 1: try extracting clean JSON block before any repair (handles trailing prose)
206    const blockMatch = text.match(/(\{[\s\S]*\}|\[[\s\S]*\])/);
207    if (blockMatch) {
208      try {
209        JSON.parse(blockMatch[1]);
210        text = blockMatch[1];
211        parseOk = true;
212      } catch (_ignored) {
213        /* block has its own issues — continue to repair */
214      }
215    }
216  
217    if (!parseOk) {
218      text = escapeControlChars(text);
219      text = iterativeRepair(text, 20);
220  
221      let stillBroken = false;
222      try {
223        JSON.parse(text);
224      } catch (_ignored) {
225        stillBroken = true;
226      }
227      if (stillBroken && blockMatch) {
228        text = blockMatch[1];
229      }
230    }
231  }
232  
233  // Wrap bare array: model returned [...] without batch_type envelope
234  try {
235    const parsed = JSON.parse(text);
236    if (Array.isArray(parsed)) {
237      text = JSON.stringify({ batch_type: batchType, results: parsed });
238    }
239  } catch (_ignored) {
240    /* not valid JSON or not an array — continue */
241  }
242  
243  // Last-resort NDJSON reconstructor
244  let ndjsonOk = false;
245  try {
246    JSON.parse(text);
247    ndjsonOk = true;
248  } catch (_ignored) {
249    /* check ndjson */
250  }
251  if (!ndjsonOk && (text.startsWith('{"message_id":') || text.startsWith('{"site_id":'))) {
252    // Repair each NDJSON line individually before wrapping
253    const inner = text
254      .replace(/\n?\]\}$/, '')
255      .split('\n')
256      .map(line => {
257        if (!line.trim()) return line;
258        try {
259          JSON.parse(line.replace(/,$/, ''));
260          return line; // already valid
261        } catch (_e) {
262          const repaired = iterativeRepair(escapeControlChars(line.replace(/,$/, '')), 20);
263          try {
264            JSON.parse(repaired);
265            return repaired + (line.endsWith(',') ? ',' : '');
266          } catch (_e2) {
267            return line; // leave as-is if repair failed
268          }
269        }
270      })
271      .join('\n');
272    const wrapped = `{"batch_type":"${batchType}","results":[\n${inner}\n]}`;
273    let wrappedOk = false;
274    try {
275      JSON.parse(wrapped);
276      wrappedOk = true;
277    } catch (_ignored) {
278      /* leave as is */
279    }
280    if (wrappedOk) text = wrapped;
281  }
282  
283  // Final validation
284  let finalErr;
285  try {
286    JSON.parse(text);
287  } catch (e) {
288    finalErr = e;
289  }
290  if (finalErr) {
291    const failFile = join(PROJECT_ROOT, 'logs', `orch-fail-${batchType}.json`);
292    try {
293      writeFileSync(failFile, text, 'utf8');
294    } catch (_ignored) {
295      /* best effort */
296    }
297    const pos = parseInt((finalErr.message.match(/position (\d+)/) || [])[1] || '0');
298    const snippet = text.slice(Math.max(0, pos - 60), pos + 60);
299    process.stdout.write(
300      JSON.stringify({
301        error: 'invalid_json',
302        msg: finalErr.message.slice(0, 120),
303        pos,
304        snippet,
305        failFile,
306      })
307    );
308    process.exit(0);
309  }
310  
311  // Pipe to claude-store.js
312  const r = spawnSync('node', [join(__dirname, 'claude-store.js')], {
313    input: text,
314    encoding: 'utf8',
315    cwd: PROJECT_ROOT,
316    maxBuffer: 10 * 1024 * 1024,
317  });
318  process.stdout.write(
319    r.stdout ||
320      r.stderr ||
321      JSON.stringify({ error: 'spawn_failed', status: r.status, spawnErr: String(r.error || '') })
322  );