/ scripts / repair-translated-templates.js
repair-translated-templates.js
  1  #!/usr/bin/env node
  2  /**
  3   * Repair translated template files that have bracket imbalances or
  4   * LLM-added email opt-out text.
  5   *
  6   * Fixes applied:
  7   * 1. Strip STOP/opt-out paragraphs from email template bodies (LLM hallucination)
  8   * 2. Prepend missing { to greeting wrapper (net -1 with no opening brace)
  9   * 3. Strip stray trailing } (remaining net -1)
 10   * 4. Add missing closing } for MX dad_02 audit name spintax (net +1)
 11   * 5. Re-translate JP email_dad_01 (LLM self-correction hallucination)
 12   */
 13  
 14  import { readFileSync, writeFileSync } from 'fs';
 15  import { join, dirname } from 'path';
 16  import { fileURLToPath } from 'url';
 17  import dotenv from 'dotenv';
 18  
 19  dotenv.config();
 20  const __dirname = dirname(fileURLToPath(import.meta.url));
 21  const projectRoot = join(__dirname, '..');
 22  
 23  // ─── Helpers ──────────────────────────────────────────────────────────────────
 24  
 25  function braceNet(text) {
 26    let n = 0;
 27    for (const ch of text) {
 28      if (ch === '{') n++;
 29      else if (ch === '}') n--;
 30    }
 31    return n;
 32  }
 33  
 34  /** Strip any trailing STOP/opt-out paragraph from an email body. */
 35  function stripEmailOptOut(text) {
 36    // Match any final paragraph containing STOP (language-independent)
 37    // Handles: \n\n<optional text>STOP<optional text>.<optional trailing }>
 38    return text.replace(/\n\n[^\n]*\bSTOP\b[^\n]*\.?\}?\s*$/, '').trimEnd();
 39  }
 40  
 41  /** Load a template JSON file, apply repair, write back. Returns list of repairs. */
 42  function repairFile(filePath, channel) {
 43    const raw = readFileSync(filePath, 'utf-8');
 44    const data = JSON.parse(raw);
 45    const repairs = [];
 46  
 47    for (const t of data.templates) {
 48      if (!t.body_spintax) continue;
 49      let body = t.body_spintax;
 50      let changed = false;
 51  
 52      // 1. Strip LLM-added email opt-out
 53      if (channel === 'email' && body.includes('STOP')) {
 54        const fixed = stripEmailOptOut(body);
 55        if (fixed !== body) {
 56          repairs.push(`  ${t.id}: stripped email opt-out`);
 57          body = fixed;
 58          changed = true;
 59        }
 60      }
 61  
 62      // 2. Fix net = -1 cases
 63      let net = braceNet(body);
 64  
 65      if (net === -1) {
 66        // Case A: missing opening { (greeting wrapper stripped by LLM)
 67        if (!body.startsWith('{') && body.match(/^\S[^\n]+\n\n\|\}/)) {
 68          body = `{${body}`;
 69          repairs.push(`  ${t.id}: prepended missing {`);
 70          changed = true;
 71          net = braceNet(body);
 72        }
 73        // Case B: stray trailing }
 74        if (net === -1 && body.endsWith('}')) {
 75          body = body.slice(0, -1).trimEnd();
 76          repairs.push(`  ${t.id}: stripped stray trailing }`);
 77          changed = true;
 78          net = braceNet(body);
 79        }
 80      }
 81  
 82      // 3. Fix net = +1: single-greeting languages produce {{greeting [firstname|there]{p}\n\n|}
 83      //    The inner { before the greeting has no | separator and no closing } — it's spurious.
 84      //    Fix: collapse {{ at the start to single { when the pattern matches.
 85      if (net === 1 && body.match(/^\{\{[^\s{|][^|{]*\[firstname\|there\]\{[^}]+\}\n\n\|/)) {
 86        body = body.replace(/^\{\{([^\s{|][^|{]*\[firstname\|there\]\{[^}]+\})\n\n\|/, '{$1\n\n|');
 87        repairs.push(`  ${t.id}: removed spurious { before single-option greeting`);
 88        changed = true;
 89        net = braceNet(body);
 90      }
 91  
 92      // 4. Fix MX email_dad_02 specific: unclosed { in audit name
 93      if (t.id === 'email_dad_02_mx' && net === 1) {
 94        // The LLM left {Auditorías de {...} {...} unclosed — add the closing }
 95        body = body.replace(/(\{Auditorías de \{[^}]+\} \{[^}]+\})(["']|\s+sencillas)/, '$1}$2');
 96        repairs.push(`  ${t.id}: added closing } to audit name spintax`);
 97        changed = true;
 98        net = braceNet(body);
 99      }
100  
101      if (changed) {
102        t.body_spintax = body;
103      }
104  
105      // Report remaining issues
106      if (net !== 0) {
107        repairs.push(
108          `  ${t.id}: STILL UNBALANCED (net ${net > 0 ? '+' : ''}${net}) — needs re-translation`
109        );
110      }
111    }
112  
113    if (repairs.some(r => !r.includes('STILL UNBALANCED') && !r.startsWith('  //'))) {
114      writeFileSync(filePath, `${JSON.stringify(data, null, 2)}\n`);
115    }
116  
117    return repairs;
118  }
119  
120  // ─── Main ──────────────────────────────────────────────────────────────────────
121  
122  const TRANSLATED_EMAIL_DIRS = [
123    'DE/de',
124    'DK/da',
125    'FR/fr',
126    'ID/id',
127    'IN/hi',
128    'IT/it',
129    'JP/ja',
130    'KR/ko',
131    'MX/es',
132    'NL/nl',
133    'PL/pl',
134    'SE/sv',
135  ];
136  
137  console.log('=== Repairing translated template files ===\n');
138  
139  let totalRepairs = 0;
140  for (const dir of TRANSLATED_EMAIL_DIRS) {
141    const emailPath = join(projectRoot, 'data/templates', dir, 'email.json');
142    try {
143      const repairs = repairFile(emailPath, 'email');
144      if (repairs.length > 0) {
145        console.log(`${dir}/email.json:`);
146        repairs.forEach(r => console.log(r));
147        totalRepairs += repairs.filter(r => !r.includes('STILL UNBALANCED')).length;
148        console.log();
149      }
150    } catch (e) {
151      console.log(`${dir}: ${e.message}`);
152    }
153  }
154  
155  console.log(`\nTotal repairs applied: ${totalRepairs}`);
156  console.log('\nRun check-all-brackets.cjs to verify.');