content-guardian.js
1 #!/usr/bin/env node 2 3 /** 4 * Content Guardian - Protects authentic CV content from AI hallucinations 5 * 6 * This module ensures that verified, authentic content in the CV is never 7 * overwritten by AI-generated hallucinations. It maintains a protected content 8 * registry and validates all AI enhancements against known facts. 9 * 10 * Features: 11 * - Protected content registry with verification status 12 * - AI enhancement validation against authentic data 13 * - Automatic rollback of hallucinated content 14 * - Audit trail of content changes 15 * - Standalone validation (no --init required) 16 * - CV structure validation (dates, URLs, required fields) 17 * 18 * Usage: node content-guardian.js --validate 19 * 20 * @author Adrian Wedd 21 * @version 2.0.0 22 */ 23 24 const fs = require('fs').promises; 25 const path = require('path'); 26 27 class ContentGuardian { 28 constructor() { 29 this.dataDir = path.resolve(__dirname, '../../data'); 30 this.protectedContentPath = path.join(this.dataDir, 'protected-content.json'); 31 this.baseCvPath = path.join(this.dataDir, 'base-cv.json'); 32 33 // Define protected content categories that should never be fabricated 34 this.protectedCategories = [ 35 'achievements', 36 'experience', 37 'education', 38 'certifications', 39 'professional_summary' 40 ]; 41 42 // Hallucination indicators - patterns that suggest fabricated content 43 this.hallucinationPatterns = [ 44 // Original patterns 45 /AI Innovation Excellence Award/i, 46 /15\+ AI-powered autonomous systems/i, 47 /99\.5% average system reliability/i, 48 /Published research.*patents/i, 49 /Keynote speaker.*conferences/i, 50 /Mentored \d+\+ junior developers/i, 51 /\d+% average cost reduction/i, 52 /Filed \d+ patents/i, 53 /(\d+k\+|\d+,\d+\+) users?/i, 54 /enterprises worldwide/i, 55 56 // Superlative inflation 57 /\bworld[- ]class\b/i, 58 /\bindustry[- ]leading\b/i, 59 /\bbest[- ]in[- ]class\b/i, 60 /\bunparalleled\b/i, 61 /\bgroundbreaking\b/i, 62 /\brevolutionary\b/i, 63 /\bpioneering\b(?!\s+use)/i, // allow "pioneering use of" but not generic "pioneering" 64 /\btrailblazing\b/i, 65 /\bcutting[- ]edge\b/i, 66 /\bstate[- ]of[- ]the[- ]art\b/i, 67 68 // Unverifiable metrics 69 /\d+%\s+(?:improvement|increase|boost|reduction|decrease|growth)/i, 70 71 // Meta-commentary (AI artifacts leaked into content) 72 /\bAI[- ]enhanced\b/i, 73 /\[AI\b/i, 74 /\boptimized\s+by\s+(?:AI|Claude|GPT|LLM)/i, 75 /\bgenerated\s+by\s+(?:AI|Claude|GPT|LLM)/i, 76 /\benhanced\s+by\s+AI\b/i, 77 /\[NOTE:/i, 78 /\[TODO:/i, 79 /\bTODO:/i, 80 81 // Fabricated awards/recognitions 82 /\baward[- ]winning\b/i, 83 /\brecognized\s+(?:as|for)\s+(?:excellence|outstanding|exceptional)/i, 84 /\breceived\s+(?:the|an?)\s+.*\baward\b/i, 85 /\bnominated\s+for\b/i 86 ]; 87 88 // Known legitimate certifications/recognitions (whitelist) 89 this.knownCertifications = [ 90 'Google Analytics Individual Qualification', 91 'Google AdWords Certification', 92 'Bing Ads Accredited Professional' 93 ]; 94 } 95 96 /** 97 * Build the default protected content registry inline. 98 * Used when protected-content.json does not exist so --validate can work standalone. 99 */ 100 buildDefaultRegistry() { 101 return { 102 last_updated: new Date().toISOString(), 103 version: "2.0.0", 104 protection_level: "high", 105 verified_content: { 106 experience: [ 107 { 108 position: "Systems Analyst / Acting Senior Change Analyst", 109 company: "Homes Tasmania (formerly Department of Communities Tasmania)", 110 period: "2018 - Present", 111 verified: true, 112 source: "employment_records", 113 protection_reason: "Current verified employment" 114 }, 115 { 116 position: "ITS Client Services Officer", 117 company: "University of Tasmania", 118 period: "2015 - 2018", 119 verified: true, 120 source: "employment_records", 121 protection_reason: "Verified previous employment" 122 }, 123 { 124 position: "Director", 125 company: "Digital Agency PTY LTD", 126 period: "2015 - 2018", 127 verified: true, 128 source: "business_records", 129 protection_reason: "Verified business ownership" 130 }, 131 { 132 position: "Second Level IT Support Engineer", 133 company: "The Wilderness Society Inc.", 134 period: "2012 - 2015", 135 verified: true, 136 source: "employment_records", 137 protection_reason: "Verified NGO employment" 138 }, 139 { 140 position: "Communications and Logistics Coordinator", 141 company: "Greenpeace Australia Pacific", 142 period: "2010 - 2012", 143 verified: true, 144 source: "employment_records", 145 protection_reason: "Verified environmental advocacy role" 146 } 147 ], 148 achievements: [ 149 { 150 title: "Systems Integration Excellence", 151 verified: true, 152 source: "work_portfolio", 153 protection_reason: "Verified technical work at Homes Tasmania" 154 }, 155 { 156 title: "Cybersecurity Leadership", 157 verified: true, 158 source: "work_portfolio", 159 protection_reason: "Verified security initiatives" 160 }, 161 { 162 title: "AI Innovation Pioneer", 163 verified: true, 164 source: "work_portfolio", 165 protection_reason: "Verified AI implementation in public sector" 166 }, 167 { 168 title: "Environmental Campaign Technology Leadership", 169 verified: true, 170 source: "employment_records", 171 protection_reason: "Verified NGO technical leadership" 172 }, 173 { 174 title: "Professional Certification Excellence", 175 verified: true, 176 source: "certification_records", 177 protection_reason: "Verified Google/Bing certifications" 178 }, 179 { 180 title: "Automation & Process Improvement", 181 verified: true, 182 source: "work_portfolio", 183 protection_reason: "Verified automation development" 184 } 185 ], 186 certifications: [ 187 { 188 name: "Google Analytics Individual Qualification", 189 verified: true, 190 source: "google_records" 191 }, 192 { 193 name: "Google AdWords Certification", 194 verified: true, 195 source: "google_records" 196 }, 197 { 198 name: "Bing Ads Accredited Professional", 199 verified: true, 200 source: "microsoft_records" 201 } 202 ] 203 }, 204 forbidden_claims: [ 205 "AI Innovation Excellence Award", 206 "15+ AI-powered autonomous systems", 207 "99.5% average system reliability", 208 "Published research on autonomous agent coordination", 209 "Filed 3 patents for innovative AI system architectures", 210 "Keynote speaker at major AI and technology conferences", 211 "Mentored 20+ junior developers and AI engineers", 212 "500+ stars and are used by enterprises worldwide", 213 "10,000+ developers", 214 "1M+ daily active users" 215 ] 216 }; 217 } 218 219 /** 220 * Initialize protected content registry 221 */ 222 async initializeProtectedRegistry() { 223 console.log('🛡️ Initializing Content Guardian...'); 224 225 const protectedContent = this.buildDefaultRegistry(); 226 227 await fs.writeFile(this.protectedContentPath, JSON.stringify(protectedContent, null, 2), 'utf8'); 228 console.log('✅ Protected content registry initialized'); 229 } 230 231 /** 232 * Load the protected content registry, creating it inline if it does not exist 233 */ 234 async loadProtectedData() { 235 try { 236 const raw = await fs.readFile(this.protectedContentPath, 'utf8'); 237 return JSON.parse(raw); 238 } catch { 239 console.log('⚠️ protected-content.json not found — using built-in default registry'); 240 return this.buildDefaultRegistry(); 241 } 242 } 243 244 /** 245 * Validate CV content against protected registry 246 */ 247 async validateContent() { 248 console.log('🔍 Validating CV content against protected registry...'); 249 250 try { 251 const cvData = JSON.parse(await fs.readFile(this.baseCvPath, 'utf8')); 252 const protectedData = await this.loadProtectedData(); 253 254 let violations = []; 255 const content = JSON.stringify(cvData); 256 257 // Check for forbidden claims 258 for (const forbiddenClaim of protectedData.forbidden_claims) { 259 if (content.includes(forbiddenClaim)) { 260 violations.push({ 261 type: 'forbidden_claim', 262 claim: forbiddenClaim, 263 severity: 'high' 264 }); 265 } 266 } 267 268 // Check for hallucination patterns 269 for (const pattern of this.hallucinationPatterns) { 270 if (pattern.test(content)) { 271 violations.push({ 272 type: 'hallucination_pattern', 273 match: pattern.toString(), 274 severity: 'high' 275 }); 276 } 277 } 278 279 // Check for fabricated awards/recognitions not in the known list 280 const awardViolations = this.checkFabricatedAwards(cvData); 281 violations.push(...awardViolations); 282 283 // Validate CV structure 284 const structureViolations = this.validateCVStructure(cvData); 285 violations.push(...structureViolations); 286 287 if (violations.length > 0) { 288 console.log('🚨 CONTENT VIOLATIONS DETECTED:'); 289 violations.forEach((v, i) => { 290 console.log(` ${i + 1}. ${v.type}: ${v.claim || v.match || v.field || v.message} [${v.severity}]`); 291 }); 292 293 // Log violations for audit trail 294 await this.logViolations(violations); 295 return { valid: false, violations }; 296 } else { 297 console.log('✅ Content validation passed - no hallucinations detected'); 298 return { valid: true, violations: [] }; 299 } 300 301 } catch (error) { 302 console.error('❌ Content validation failed:', error.message); 303 return { valid: false, error: error.message }; 304 } 305 } 306 307 /** 308 * Check for fabricated awards or recognitions not in the known list 309 */ 310 checkFabricatedAwards(cvData) { 311 const violations = []; 312 313 // Check achievements for award-like language 314 const achievements = cvData.achievements || []; 315 for (const ach of achievements) { 316 const text = `${ach.title || ''} ${ach.description || ''}`; 317 // Look for award/recognition language 318 if (/\baward\b/i.test(text) || /\brecognition\b/i.test(text) || /\bhonor(?:ed)?\b/i.test(text)) { 319 // Check if this is a known certification/recognition 320 const isKnown = this.knownCertifications.some(cert => 321 text.includes(cert) 322 ); 323 if (!isKnown) { 324 violations.push({ 325 type: 'fabricated_award', 326 claim: ach.title, 327 message: `Unverified award/recognition: "${ach.title}"`, 328 severity: 'high' 329 }); 330 } 331 } 332 } 333 334 // Check certifications for unknown entries 335 const certifications = cvData.certifications || []; 336 for (const cert of certifications) { 337 const certName = cert.name || ''; 338 const isKnown = this.knownCertifications.some(known => 339 certName.includes(known) || known.includes(certName) 340 ); 341 if (!isKnown && certName.length > 0) { 342 violations.push({ 343 type: 'unknown_certification', 344 claim: certName, 345 message: `Certification not in known list: "${certName}"`, 346 severity: 'medium' 347 }); 348 } 349 } 350 351 return violations; 352 } 353 354 /** 355 * Validate CV structure: required fields, reasonable dates, URL formats 356 */ 357 validateCVStructure(cvData) { 358 const violations = []; 359 const currentYear = new Date().getFullYear(); 360 361 // Required top-level fields 362 const requiredFields = ['personal_info', 'experience', 'skills']; 363 for (const field of requiredFields) { 364 if (!cvData[field]) { 365 violations.push({ 366 type: 'missing_required_field', 367 field: field, 368 message: `Required field "${field}" is missing`, 369 severity: 'high' 370 }); 371 } 372 } 373 374 // Validate personal_info fields 375 if (cvData.personal_info) { 376 const pi = cvData.personal_info; 377 if (!pi.name || pi.name.trim().length === 0) { 378 violations.push({ 379 type: 'missing_required_field', 380 field: 'personal_info.name', 381 message: 'Name is missing or empty', 382 severity: 'high' 383 }); 384 } 385 386 // Validate URL formats 387 const urlFields = ['website', 'github', 'linkedin']; 388 for (const urlField of urlFields) { 389 if (pi[urlField]) { 390 if (!/^https?:\/\/.+\..+/.test(pi[urlField])) { 391 violations.push({ 392 type: 'invalid_url', 393 field: `personal_info.${urlField}`, 394 message: `Invalid URL format: "${pi[urlField]}"`, 395 severity: 'medium' 396 }); 397 } 398 } 399 } 400 401 // Validate email format 402 if (pi.email && !/^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(pi.email)) { 403 violations.push({ 404 type: 'invalid_email', 405 field: 'personal_info.email', 406 message: `Invalid email format: "${pi.email}"`, 407 severity: 'medium' 408 }); 409 } 410 } 411 412 // Validate experience dates 413 if (cvData.experience && Array.isArray(cvData.experience)) { 414 for (const exp of cvData.experience) { 415 // Check required fields per experience entry 416 if (!exp.position || !exp.company || !exp.period) { 417 violations.push({ 418 type: 'incomplete_experience', 419 field: `experience: ${exp.position || exp.company || 'unknown'}`, 420 message: 'Experience entry missing position, company, or period', 421 severity: 'medium' 422 }); 423 continue; 424 } 425 426 // Check for future start dates 427 const yearMatches = exp.period.match(/\b(20\d{2}|19\d{2})\b/g); 428 if (yearMatches) { 429 const startYear = parseInt(yearMatches[0], 10); 430 if (startYear > currentYear + 1) { 431 violations.push({ 432 type: 'future_date', 433 field: `experience: ${exp.position}`, 434 message: `Start date is in the future: ${exp.period}`, 435 severity: 'high' 436 }); 437 } 438 439 // Check for end date before start date 440 if (yearMatches.length >= 2) { 441 const endYear = parseInt(yearMatches[1], 10); 442 if (endYear < startYear) { 443 violations.push({ 444 type: 'invalid_date_range', 445 field: `experience: ${exp.position}`, 446 message: `End date before start date: ${exp.period}`, 447 severity: 'high' 448 }); 449 } 450 } 451 } 452 } 453 } 454 455 // Validate project dates and required fields 456 if (cvData.projects && Array.isArray(cvData.projects)) { 457 for (const proj of cvData.projects) { 458 if (!proj.name) { 459 violations.push({ 460 type: 'incomplete_project', 461 field: 'projects', 462 message: 'Project entry missing name', 463 severity: 'medium' 464 }); 465 continue; 466 } 467 468 // Validate github URL format if present 469 if (proj.github && !/^https?:\/\/.+\..+/.test(proj.github)) { 470 violations.push({ 471 type: 'invalid_url', 472 field: `projects: ${proj.name}`, 473 message: `Invalid GitHub URL: "${proj.github}"`, 474 severity: 'medium' 475 }); 476 } 477 478 // Check for future start dates in projects 479 if (proj.period) { 480 const yearMatches = proj.period.match(/\b(20\d{2}|19\d{2})\b/g); 481 if (yearMatches) { 482 const startYear = parseInt(yearMatches[0], 10); 483 if (startYear > currentYear + 1) { 484 violations.push({ 485 type: 'future_date', 486 field: `projects: ${proj.name}`, 487 message: `Start date is in the future: ${proj.period}`, 488 severity: 'high' 489 }); 490 } 491 } 492 } 493 } 494 } 495 496 // Validate skills have required fields 497 if (cvData.skills && Array.isArray(cvData.skills)) { 498 for (const skill of cvData.skills) { 499 if (!skill.name) { 500 violations.push({ 501 type: 'incomplete_skill', 502 field: 'skills', 503 message: 'Skill entry missing name', 504 severity: 'medium' 505 }); 506 } 507 } 508 } 509 510 return violations; 511 } 512 513 /** 514 * Log content violations for audit trail 515 */ 516 async logViolations(violations) { 517 const auditPath = path.join(this.dataDir, 'content-audit.json'); 518 let auditLog = []; 519 520 try { 521 const existing = await fs.readFile(auditPath, 'utf8'); 522 auditLog = JSON.parse(existing); 523 } catch { 524 // File doesn't exist, start fresh 525 } 526 527 auditLog.push({ 528 timestamp: new Date().toISOString(), 529 violations_count: violations.length, 530 violations: violations, 531 action: 'detected_and_logged' 532 }); 533 534 // Keep only last 50 audit entries 535 if (auditLog.length > 50) { 536 auditLog = auditLog.slice(-50); 537 } 538 539 await fs.writeFile(auditPath, JSON.stringify(auditLog, null, 2), 'utf8'); 540 } 541 542 /** 543 * Add protection flags to achievements 544 */ 545 async protectAchievements() { 546 console.log('🛡️ Adding protection flags to verified achievements...'); 547 548 try { 549 const cvData = JSON.parse(await fs.readFile(this.baseCvPath, 'utf8')); 550 551 // Add protected flag to verified achievements 552 if (cvData.achievements) { 553 cvData.achievements = cvData.achievements.map(achievement => ({ 554 ...achievement, 555 protected: true, 556 verified: true, 557 source: "manual_verification", 558 last_verified: new Date().toISOString() 559 })); 560 } 561 562 // Add metadata to indicate content protection is active 563 cvData.metadata = { 564 ...cvData.metadata, 565 content_protection: { 566 enabled: true, 567 guardian_version: "2.0.0", 568 last_validation: new Date().toISOString(), 569 protection_level: "high" 570 } 571 }; 572 573 await fs.writeFile(this.baseCvPath, JSON.stringify(cvData, null, 2), 'utf8'); 574 console.log('✅ Achievement protection flags added'); 575 576 } catch (error) { 577 console.error('❌ Failed to protect achievements:', error.message); 578 } 579 } 580 } 581 582 // CLI interface 583 async function main() { 584 const guardian = new ContentGuardian(); 585 const args = process.argv.slice(2); 586 587 if (args.includes('--init')) { 588 await guardian.initializeProtectedRegistry(); 589 } else if (args.includes('--validate')) { 590 const result = await guardian.validateContent(); 591 process.exit(result.valid ? 0 : 1); 592 } else if (args.includes('--protect')) { 593 await guardian.protectAchievements(); 594 } else { 595 console.log('Content Guardian - Protects authentic CV content'); 596 console.log(''); 597 console.log('Usage:'); 598 console.log(' node content-guardian.js --init Initialize protected content registry'); 599 console.log(' node content-guardian.js --validate Validate current CV content'); 600 console.log(' node content-guardian.js --protect Add protection flags to achievements'); 601 } 602 } 603 604 if (require.main === module) { 605 main().catch(console.error); 606 } 607 608 module.exports = ContentGuardian;