/ .github / scripts / content-guardian.js
content-guardian.js
  1  #!/usr/bin/env node
  2  
  3  /**
  4   * Content Guardian - Protects authentic CV content from AI hallucinations
  5   *
  6   * This module ensures that verified, authentic content in the CV is never
  7   * overwritten by AI-generated hallucinations. It maintains a protected content
  8   * registry and validates all AI enhancements against known facts.
  9   *
 10   * Features:
 11   * - Protected content registry with verification status
 12   * - AI enhancement validation against authentic data
 13   * - Automatic rollback of hallucinated content
 14   * - Audit trail of content changes
 15   * - Standalone validation (no --init required)
 16   * - CV structure validation (dates, URLs, required fields)
 17   *
 18   * Usage: node content-guardian.js --validate
 19   *
 20   * @author Adrian Wedd
 21   * @version 2.0.0
 22   */
 23  
 24  const fs = require('fs').promises;
 25  const path = require('path');
 26  
 27  class ContentGuardian {
 28      constructor() {
 29          this.dataDir = path.resolve(__dirname, '../../data');
 30          this.protectedContentPath = path.join(this.dataDir, 'protected-content.json');
 31          this.baseCvPath = path.join(this.dataDir, 'base-cv.json');
 32  
 33          // Define protected content categories that should never be fabricated
 34          this.protectedCategories = [
 35              'achievements',
 36              'experience',
 37              'education',
 38              'certifications',
 39              'professional_summary'
 40          ];
 41  
 42          // Hallucination indicators - patterns that suggest fabricated content
 43          this.hallucinationPatterns = [
 44              // Original patterns
 45              /AI Innovation Excellence Award/i,
 46              /15\+ AI-powered autonomous systems/i,
 47              /99\.5% average system reliability/i,
 48              /Published research.*patents/i,
 49              /Keynote speaker.*conferences/i,
 50              /Mentored \d+\+ junior developers/i,
 51              /\d+% average cost reduction/i,
 52              /Filed \d+ patents/i,
 53              /(\d+k\+|\d+,\d+\+) users?/i,
 54              /enterprises worldwide/i,
 55  
 56              // Superlative inflation
 57              /\bworld[- ]class\b/i,
 58              /\bindustry[- ]leading\b/i,
 59              /\bbest[- ]in[- ]class\b/i,
 60              /\bunparalleled\b/i,
 61              /\bgroundbreaking\b/i,
 62              /\brevolutionary\b/i,
 63              /\bpioneering\b(?!\s+use)/i, // allow "pioneering use of" but not generic "pioneering"
 64              /\btrailblazing\b/i,
 65              /\bcutting[- ]edge\b/i,
 66              /\bstate[- ]of[- ]the[- ]art\b/i,
 67  
 68              // Unverifiable metrics
 69              /\d+%\s+(?:improvement|increase|boost|reduction|decrease|growth)/i,
 70  
 71              // Meta-commentary (AI artifacts leaked into content)
 72              /\bAI[- ]enhanced\b/i,
 73              /\[AI\b/i,
 74              /\boptimized\s+by\s+(?:AI|Claude|GPT|LLM)/i,
 75              /\bgenerated\s+by\s+(?:AI|Claude|GPT|LLM)/i,
 76              /\benhanced\s+by\s+AI\b/i,
 77              /\[NOTE:/i,
 78              /\[TODO:/i,
 79              /\bTODO:/i,
 80  
 81              // Fabricated awards/recognitions
 82              /\baward[- ]winning\b/i,
 83              /\brecognized\s+(?:as|for)\s+(?:excellence|outstanding|exceptional)/i,
 84              /\breceived\s+(?:the|an?)\s+.*\baward\b/i,
 85              /\bnominated\s+for\b/i
 86          ];
 87  
 88          // Known legitimate certifications/recognitions (whitelist)
 89          this.knownCertifications = [
 90              'Google Analytics Individual Qualification',
 91              'Google AdWords Certification',
 92              'Bing Ads Accredited Professional'
 93          ];
 94      }
 95  
 96      /**
 97       * Build the default protected content registry inline.
 98       * Used when protected-content.json does not exist so --validate can work standalone.
 99       */
100      buildDefaultRegistry() {
101          return {
102              last_updated: new Date().toISOString(),
103              version: "2.0.0",
104              protection_level: "high",
105              verified_content: {
106                  experience: [
107                      {
108                          position: "Systems Analyst / Acting Senior Change Analyst",
109                          company: "Homes Tasmania (formerly Department of Communities Tasmania)",
110                          period: "2018 - Present",
111                          verified: true,
112                          source: "employment_records",
113                          protection_reason: "Current verified employment"
114                      },
115                      {
116                          position: "ITS Client Services Officer",
117                          company: "University of Tasmania",
118                          period: "2015 - 2018",
119                          verified: true,
120                          source: "employment_records",
121                          protection_reason: "Verified previous employment"
122                      },
123                      {
124                          position: "Director",
125                          company: "Digital Agency PTY LTD",
126                          period: "2015 - 2018",
127                          verified: true,
128                          source: "business_records",
129                          protection_reason: "Verified business ownership"
130                      },
131                      {
132                          position: "Second Level IT Support Engineer",
133                          company: "The Wilderness Society Inc.",
134                          period: "2012 - 2015",
135                          verified: true,
136                          source: "employment_records",
137                          protection_reason: "Verified NGO employment"
138                      },
139                      {
140                          position: "Communications and Logistics Coordinator",
141                          company: "Greenpeace Australia Pacific",
142                          period: "2010 - 2012",
143                          verified: true,
144                          source: "employment_records",
145                          protection_reason: "Verified environmental advocacy role"
146                      }
147                  ],
148                  achievements: [
149                      {
150                          title: "Systems Integration Excellence",
151                          verified: true,
152                          source: "work_portfolio",
153                          protection_reason: "Verified technical work at Homes Tasmania"
154                      },
155                      {
156                          title: "Cybersecurity Leadership",
157                          verified: true,
158                          source: "work_portfolio",
159                          protection_reason: "Verified security initiatives"
160                      },
161                      {
162                          title: "AI Innovation Pioneer",
163                          verified: true,
164                          source: "work_portfolio",
165                          protection_reason: "Verified AI implementation in public sector"
166                      },
167                      {
168                          title: "Environmental Campaign Technology Leadership",
169                          verified: true,
170                          source: "employment_records",
171                          protection_reason: "Verified NGO technical leadership"
172                      },
173                      {
174                          title: "Professional Certification Excellence",
175                          verified: true,
176                          source: "certification_records",
177                          protection_reason: "Verified Google/Bing certifications"
178                      },
179                      {
180                          title: "Automation & Process Improvement",
181                          verified: true,
182                          source: "work_portfolio",
183                          protection_reason: "Verified automation development"
184                      }
185                  ],
186                  certifications: [
187                      {
188                          name: "Google Analytics Individual Qualification",
189                          verified: true,
190                          source: "google_records"
191                      },
192                      {
193                          name: "Google AdWords Certification",
194                          verified: true,
195                          source: "google_records"
196                      },
197                      {
198                          name: "Bing Ads Accredited Professional",
199                          verified: true,
200                          source: "microsoft_records"
201                      }
202                  ]
203              },
204              forbidden_claims: [
205                  "AI Innovation Excellence Award",
206                  "15+ AI-powered autonomous systems",
207                  "99.5% average system reliability",
208                  "Published research on autonomous agent coordination",
209                  "Filed 3 patents for innovative AI system architectures",
210                  "Keynote speaker at major AI and technology conferences",
211                  "Mentored 20+ junior developers and AI engineers",
212                  "500+ stars and are used by enterprises worldwide",
213                  "10,000+ developers",
214                  "1M+ daily active users"
215              ]
216          };
217      }
218  
219      /**
220       * Initialize protected content registry
221       */
222      async initializeProtectedRegistry() {
223          console.log('🛡️ Initializing Content Guardian...');
224  
225          const protectedContent = this.buildDefaultRegistry();
226  
227          await fs.writeFile(this.protectedContentPath, JSON.stringify(protectedContent, null, 2), 'utf8');
228          console.log('✅ Protected content registry initialized');
229      }
230  
231      /**
232       * Load the protected content registry, creating it inline if it does not exist
233       */
234      async loadProtectedData() {
235          try {
236              const raw = await fs.readFile(this.protectedContentPath, 'utf8');
237              return JSON.parse(raw);
238          } catch {
239              console.log('⚠️ protected-content.json not found — using built-in default registry');
240              return this.buildDefaultRegistry();
241          }
242      }
243  
244      /**
245       * Validate CV content against protected registry
246       */
247      async validateContent() {
248          console.log('🔍 Validating CV content against protected registry...');
249  
250          try {
251              const cvData = JSON.parse(await fs.readFile(this.baseCvPath, 'utf8'));
252              const protectedData = await this.loadProtectedData();
253  
254              let violations = [];
255              const content = JSON.stringify(cvData);
256  
257              // Check for forbidden claims
258              for (const forbiddenClaim of protectedData.forbidden_claims) {
259                  if (content.includes(forbiddenClaim)) {
260                      violations.push({
261                          type: 'forbidden_claim',
262                          claim: forbiddenClaim,
263                          severity: 'high'
264                      });
265                  }
266              }
267  
268              // Check for hallucination patterns
269              for (const pattern of this.hallucinationPatterns) {
270                  if (pattern.test(content)) {
271                      violations.push({
272                          type: 'hallucination_pattern',
273                          match: pattern.toString(),
274                          severity: 'high'
275                      });
276                  }
277              }
278  
279              // Check for fabricated awards/recognitions not in the known list
280              const awardViolations = this.checkFabricatedAwards(cvData);
281              violations.push(...awardViolations);
282  
283              // Validate CV structure
284              const structureViolations = this.validateCVStructure(cvData);
285              violations.push(...structureViolations);
286  
287              if (violations.length > 0) {
288                  console.log('🚨 CONTENT VIOLATIONS DETECTED:');
289                  violations.forEach((v, i) => {
290                      console.log(`  ${i + 1}. ${v.type}: ${v.claim || v.match || v.field || v.message} [${v.severity}]`);
291                  });
292  
293                  // Log violations for audit trail
294                  await this.logViolations(violations);
295                  return { valid: false, violations };
296              } else {
297                  console.log('✅ Content validation passed - no hallucinations detected');
298                  return { valid: true, violations: [] };
299              }
300  
301          } catch (error) {
302              console.error('❌ Content validation failed:', error.message);
303              return { valid: false, error: error.message };
304          }
305      }
306  
307      /**
308       * Check for fabricated awards or recognitions not in the known list
309       */
310      checkFabricatedAwards(cvData) {
311          const violations = [];
312  
313          // Check achievements for award-like language
314          const achievements = cvData.achievements || [];
315          for (const ach of achievements) {
316              const text = `${ach.title || ''} ${ach.description || ''}`;
317              // Look for award/recognition language
318              if (/\baward\b/i.test(text) || /\brecognition\b/i.test(text) || /\bhonor(?:ed)?\b/i.test(text)) {
319                  // Check if this is a known certification/recognition
320                  const isKnown = this.knownCertifications.some(cert =>
321                      text.includes(cert)
322                  );
323                  if (!isKnown) {
324                      violations.push({
325                          type: 'fabricated_award',
326                          claim: ach.title,
327                          message: `Unverified award/recognition: "${ach.title}"`,
328                          severity: 'high'
329                      });
330                  }
331              }
332          }
333  
334          // Check certifications for unknown entries
335          const certifications = cvData.certifications || [];
336          for (const cert of certifications) {
337              const certName = cert.name || '';
338              const isKnown = this.knownCertifications.some(known =>
339                  certName.includes(known) || known.includes(certName)
340              );
341              if (!isKnown && certName.length > 0) {
342                  violations.push({
343                      type: 'unknown_certification',
344                      claim: certName,
345                      message: `Certification not in known list: "${certName}"`,
346                      severity: 'medium'
347                  });
348              }
349          }
350  
351          return violations;
352      }
353  
354      /**
355       * Validate CV structure: required fields, reasonable dates, URL formats
356       */
357      validateCVStructure(cvData) {
358          const violations = [];
359          const currentYear = new Date().getFullYear();
360  
361          // Required top-level fields
362          const requiredFields = ['personal_info', 'experience', 'skills'];
363          for (const field of requiredFields) {
364              if (!cvData[field]) {
365                  violations.push({
366                      type: 'missing_required_field',
367                      field: field,
368                      message: `Required field "${field}" is missing`,
369                      severity: 'high'
370                  });
371              }
372          }
373  
374          // Validate personal_info fields
375          if (cvData.personal_info) {
376              const pi = cvData.personal_info;
377              if (!pi.name || pi.name.trim().length === 0) {
378                  violations.push({
379                      type: 'missing_required_field',
380                      field: 'personal_info.name',
381                      message: 'Name is missing or empty',
382                      severity: 'high'
383                  });
384              }
385  
386              // Validate URL formats
387              const urlFields = ['website', 'github', 'linkedin'];
388              for (const urlField of urlFields) {
389                  if (pi[urlField]) {
390                      if (!/^https?:\/\/.+\..+/.test(pi[urlField])) {
391                          violations.push({
392                              type: 'invalid_url',
393                              field: `personal_info.${urlField}`,
394                              message: `Invalid URL format: "${pi[urlField]}"`,
395                              severity: 'medium'
396                          });
397                      }
398                  }
399              }
400  
401              // Validate email format
402              if (pi.email && !/^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(pi.email)) {
403                  violations.push({
404                      type: 'invalid_email',
405                      field: 'personal_info.email',
406                      message: `Invalid email format: "${pi.email}"`,
407                      severity: 'medium'
408                  });
409              }
410          }
411  
412          // Validate experience dates
413          if (cvData.experience && Array.isArray(cvData.experience)) {
414              for (const exp of cvData.experience) {
415                  // Check required fields per experience entry
416                  if (!exp.position || !exp.company || !exp.period) {
417                      violations.push({
418                          type: 'incomplete_experience',
419                          field: `experience: ${exp.position || exp.company || 'unknown'}`,
420                          message: 'Experience entry missing position, company, or period',
421                          severity: 'medium'
422                      });
423                      continue;
424                  }
425  
426                  // Check for future start dates
427                  const yearMatches = exp.period.match(/\b(20\d{2}|19\d{2})\b/g);
428                  if (yearMatches) {
429                      const startYear = parseInt(yearMatches[0], 10);
430                      if (startYear > currentYear + 1) {
431                          violations.push({
432                              type: 'future_date',
433                              field: `experience: ${exp.position}`,
434                              message: `Start date is in the future: ${exp.period}`,
435                              severity: 'high'
436                          });
437                      }
438  
439                      // Check for end date before start date
440                      if (yearMatches.length >= 2) {
441                          const endYear = parseInt(yearMatches[1], 10);
442                          if (endYear < startYear) {
443                              violations.push({
444                                  type: 'invalid_date_range',
445                                  field: `experience: ${exp.position}`,
446                                  message: `End date before start date: ${exp.period}`,
447                                  severity: 'high'
448                              });
449                          }
450                      }
451                  }
452              }
453          }
454  
455          // Validate project dates and required fields
456          if (cvData.projects && Array.isArray(cvData.projects)) {
457              for (const proj of cvData.projects) {
458                  if (!proj.name) {
459                      violations.push({
460                          type: 'incomplete_project',
461                          field: 'projects',
462                          message: 'Project entry missing name',
463                          severity: 'medium'
464                      });
465                      continue;
466                  }
467  
468                  // Validate github URL format if present
469                  if (proj.github && !/^https?:\/\/.+\..+/.test(proj.github)) {
470                      violations.push({
471                          type: 'invalid_url',
472                          field: `projects: ${proj.name}`,
473                          message: `Invalid GitHub URL: "${proj.github}"`,
474                          severity: 'medium'
475                      });
476                  }
477  
478                  // Check for future start dates in projects
479                  if (proj.period) {
480                      const yearMatches = proj.period.match(/\b(20\d{2}|19\d{2})\b/g);
481                      if (yearMatches) {
482                          const startYear = parseInt(yearMatches[0], 10);
483                          if (startYear > currentYear + 1) {
484                              violations.push({
485                                  type: 'future_date',
486                                  field: `projects: ${proj.name}`,
487                                  message: `Start date is in the future: ${proj.period}`,
488                                  severity: 'high'
489                              });
490                          }
491                      }
492                  }
493              }
494          }
495  
496          // Validate skills have required fields
497          if (cvData.skills && Array.isArray(cvData.skills)) {
498              for (const skill of cvData.skills) {
499                  if (!skill.name) {
500                      violations.push({
501                          type: 'incomplete_skill',
502                          field: 'skills',
503                          message: 'Skill entry missing name',
504                          severity: 'medium'
505                      });
506                  }
507              }
508          }
509  
510          return violations;
511      }
512  
513      /**
514       * Log content violations for audit trail
515       */
516      async logViolations(violations) {
517          const auditPath = path.join(this.dataDir, 'content-audit.json');
518          let auditLog = [];
519  
520          try {
521              const existing = await fs.readFile(auditPath, 'utf8');
522              auditLog = JSON.parse(existing);
523          } catch {
524              // File doesn't exist, start fresh
525          }
526  
527          auditLog.push({
528              timestamp: new Date().toISOString(),
529              violations_count: violations.length,
530              violations: violations,
531              action: 'detected_and_logged'
532          });
533  
534          // Keep only last 50 audit entries
535          if (auditLog.length > 50) {
536              auditLog = auditLog.slice(-50);
537          }
538  
539          await fs.writeFile(auditPath, JSON.stringify(auditLog, null, 2), 'utf8');
540      }
541  
542      /**
543       * Add protection flags to achievements
544       */
545      async protectAchievements() {
546          console.log('🛡️ Adding protection flags to verified achievements...');
547  
548          try {
549              const cvData = JSON.parse(await fs.readFile(this.baseCvPath, 'utf8'));
550  
551              // Add protected flag to verified achievements
552              if (cvData.achievements) {
553                  cvData.achievements = cvData.achievements.map(achievement => ({
554                      ...achievement,
555                      protected: true,
556                      verified: true,
557                      source: "manual_verification",
558                      last_verified: new Date().toISOString()
559                  }));
560              }
561  
562              // Add metadata to indicate content protection is active
563              cvData.metadata = {
564                  ...cvData.metadata,
565                  content_protection: {
566                      enabled: true,
567                      guardian_version: "2.0.0",
568                      last_validation: new Date().toISOString(),
569                      protection_level: "high"
570                  }
571              };
572  
573              await fs.writeFile(this.baseCvPath, JSON.stringify(cvData, null, 2), 'utf8');
574              console.log('✅ Achievement protection flags added');
575  
576          } catch (error) {
577              console.error('❌ Failed to protect achievements:', error.message);
578          }
579      }
580  }
581  
582  // CLI interface
583  async function main() {
584      const guardian = new ContentGuardian();
585      const args = process.argv.slice(2);
586  
587      if (args.includes('--init')) {
588          await guardian.initializeProtectedRegistry();
589      } else if (args.includes('--validate')) {
590          const result = await guardian.validateContent();
591          process.exit(result.valid ? 0 : 1);
592      } else if (args.includes('--protect')) {
593          await guardian.protectAchievements();
594      } else {
595          console.log('Content Guardian - Protects authentic CV content');
596          console.log('');
597          console.log('Usage:');
598          console.log('  node content-guardian.js --init      Initialize protected content registry');
599          console.log('  node content-guardian.js --validate  Validate current CV content');
600          console.log('  node content-guardian.js --protect   Add protection flags to achievements');
601      }
602  }
603  
604  if (require.main === module) {
605      main().catch(console.error);
606  }
607  
608  module.exports = ContentGuardian;