claim-verifier.js
1 #!/usr/bin/env node 2 3 /** 4 * AI Claim Verification Engine 5 * 6 * Verifies AI-generated claims against actual GitHub data to ensure factual accuracy 7 * and prevent exaggeration in CV content. This system extracts quantifiable claims 8 * from AI-enhanced content and cross-references them with real metrics. 9 * 10 * Features: 11 * - Quantifiable claim extraction from text 12 * - GitHub data comparison and validation 13 * - Discrepancy detection and flagging 14 * - Confidence scoring for claims 15 * - Human review recommendations 16 * 17 * Usage: node claim-verifier.js 18 */ 19 20 const fs = require('fs').promises; 21 const path = require('path'); 22 23 /** 24 * Claim Verification System 25 * 26 * Analyzes AI-generated content to identify and verify quantifiable claims 27 * against actual GitHub activity data 28 */ 29 class ClaimVerifier { 30 constructor() { 31 this.dataDir = path.join(process.cwd(), 'data'); 32 this.verificationResults = { 33 verified_claims: [], 34 unverified_claims: [], 35 flagged_discrepancies: [], 36 confidence_scores: {}, 37 recommendations: [] 38 }; 39 } 40 41 /** 42 * Main verification pipeline 43 */ 44 async verifyAllClaims() { 45 console.log('🔍 **AI CLAIM VERIFICATION INITIATED**'); 46 console.log('📊 Analyzing AI-generated content against GitHub data...'); 47 console.log(''); 48 49 try { 50 // Load data sources 51 const aiEnhancements = await this.loadAIEnhancements(); 52 const activityData = await this.loadActivityData(); 53 const githubMetrics = await this.extractGitHubMetrics(activityData); 54 55 // Extract claims from AI-generated content 56 const extractedClaims = await this.extractQuantifiableClaims(aiEnhancements); 57 58 // Verify each claim against actual data 59 for (const claim of extractedClaims) { 60 const verification = await this.verifyClaim(claim, githubMetrics); 61 this.categorizeVerification(claim, verification); 62 } 63 64 // Generate verification report 65 const report = await this.generateVerificationReport(); 66 await this.saveVerificationResults(report); 67 68 console.log('✅ Claim verification completed'); 69 console.log(`📊 Results: ${this.verificationResults.verified_claims.length} verified, ${this.verificationResults.unverified_claims.length} unverified, ${this.verificationResults.flagged_discrepancies.length} flagged`); 70 71 return report; 72 73 } catch (error) { 74 console.error('❌ Claim verification failed:', error.message); 75 throw error; 76 } 77 } 78 79 /** 80 * Load AI enhancements data 81 */ 82 async loadAIEnhancements() { 83 try { 84 const enhancementsPath = path.join(this.dataDir, 'ai-enhancements.json'); 85 const content = await fs.readFile(enhancementsPath, 'utf8'); 86 return JSON.parse(content); 87 } catch { 88 console.warn('⚠️ AI enhancements data not found'); 89 return {}; 90 } 91 } 92 93 /** 94 * Load activity data for verification 95 */ 96 async loadActivityData() { 97 try { 98 const summaryPath = path.join(this.dataDir, 'activity-summary.json'); 99 const summaryContent = await fs.readFile(summaryPath, 'utf8'); 100 const summary = JSON.parse(summaryContent); 101 102 // Also load detailed activity data if available 103 const latestActivityFile = summary?.data_files?.latest_activity; 104 if (latestActivityFile) { 105 const detailedPath = path.join(this.dataDir, 'activity', latestActivityFile); 106 try { 107 const detailedContent = await fs.readFile(detailedPath, 'utf8'); 108 const detailed = JSON.parse(detailedContent); 109 return { summary, detailed }; 110 } catch { 111 console.warn('⚠️ Detailed activity data not found, using summary only'); 112 return { summary }; 113 } 114 } 115 116 return { summary }; 117 } catch { 118 console.warn('⚠️ Activity data not found'); 119 return {}; 120 } 121 } 122 123 /** 124 * Extract GitHub metrics from activity data 125 */ 126 async extractGitHubMetrics(activityData) { 127 const metrics = { 128 commits: { 129 total: activityData?.summary?.total_commits || 0, 130 period_days: activityData?.summary?.lookback_period_days || 30 131 }, 132 repositories: { 133 total: activityData?.detailed?.repositories?.summary?.total_count || 0, 134 languages: activityData?.detailed?.repositories?.summary?.languages || [], 135 stars: activityData?.detailed?.repositories?.summary?.total_stars || 0, 136 forks: activityData?.detailed?.repositories?.summary?.total_forks || 0 137 }, 138 activity: { 139 active_days: activityData?.summary?.active_days || 0, 140 net_lines: activityData?.summary?.net_lines_contributed || 0 141 } 142 }; 143 144 console.log('📈 Extracted GitHub metrics:'); 145 console.log(` - Total commits: ${metrics.commits.total}`); 146 console.log(` - Repositories: ${metrics.repositories.total}`); 147 console.log(` - Languages: ${metrics.repositories.languages.length}`); 148 console.log(` - Stars received: ${metrics.repositories.stars}`); 149 console.log(` - Active days: ${metrics.activity.active_days}`); 150 console.log(''); 151 152 return metrics; 153 } 154 155 /** 156 * Extract quantifiable claims from AI-generated content 157 */ 158 async extractQuantifiableClaims(aiEnhancements) { 159 const claims = []; 160 161 // Define patterns for quantifiable claims 162 const claimPatterns = [ 163 // Numbers with descriptors (projects, systems, etc.) 164 '(\\d+)\\+?\\s*(projects?|systems?|applications?|repositories?|years?|languages?)', 165 // Percentages with context 166 '(\\d+)%(?:\\s+|\\s*)(improvement|reduction|increase|efficiency|accuracy|across|while)', 167 // Performance improvement claims 168 '(reduced|reducing|improving|increasing|optimizing|increased).*?(\\d+)%', 169 // Reliability/system percentages 170 '(\\d+\\.\\d+|\\d+)%\\s*(system|reliability)', 171 // Delivery/creation claims 172 '(delivered|created|built|developed)\\s+(\\d+)\\+?\\s*(\\w+)', 173 // Scale descriptors 174 '(dozens?|hundreds?|thousands?)\\s*of\\s*(\\w+)', 175 // Years of experience 176 '(\\d+)\\+?\\s*years?\\s*(of\\s+)?(experience|expertise)' 177 ]; 178 179 // Extract from professional summary 180 if (aiEnhancements?.professional_summary?.enhanced) { 181 const text = aiEnhancements.professional_summary.enhanced; 182 console.log(`🔍 Analyzing professional summary (${text.length} chars)`); 183 const summaryClaims = this.extractClaimsFromText(text, 'professional_summary', claimPatterns); 184 console.log(` Found ${summaryClaims.length} claims in professional summary`); 185 claims.push(...summaryClaims); 186 } 187 188 // Extract from other AI-enhanced sections 189 const sections = ['skills_enhancement', 'experience_enhancement', 'project_enhancement']; 190 for (const section of sections) { 191 if (aiEnhancements[section]?.enhanced) { 192 const text = JSON.stringify(aiEnhancements[section].enhanced); 193 claims.push(...this.extractClaimsFromText(text, section, claimPatterns)); 194 } 195 } 196 197 console.log(`🔍 Extracted ${claims.length} quantifiable claims for verification`); 198 return claims; 199 } 200 201 /** 202 * Extract claims from text using patterns 203 */ 204 extractClaimsFromText(text, source, patterns) { 205 const claims = []; 206 207 for (let i = 0; i < patterns.length; i++) { 208 // Create fresh regex object for each pattern 209 const regex = new RegExp(patterns[i], 'gi'); 210 let match; 211 let patternMatches = 0; 212 213 while ((match = regex.exec(text)) !== null) { 214 patternMatches++; 215 claims.push({ 216 text: match[0], 217 source, 218 type: this.categorizeClaimType(match[0]), 219 extracted_value: this.extractNumericValue(match[0]), 220 context: this.getClaimContext(text, match.index, 50) 221 }); 222 } 223 224 if (patternMatches > 0) { 225 console.log(` Pattern ${i + 1} matched ${patternMatches} times: ${patterns[i]}`); 226 } 227 } 228 229 return claims; 230 } 231 232 /** 233 * Categorize claim type 234 */ 235 categorizeClaimType(claimText) { 236 const lowerText = claimText.toLowerCase(); 237 238 if (lowerText.includes('project') || lowerText.includes('system') || lowerText.includes('application')) { 239 return 'project_count'; 240 } else if (lowerText.includes('language')) { 241 return 'language_count'; 242 } else if (lowerText.includes('year')) { 243 return 'experience_duration'; 244 } else if (lowerText.includes('%')) { 245 return 'performance_metric'; 246 } else if (lowerText.includes('repository') || lowerText.includes('repo')) { 247 return 'repository_count'; 248 } else { 249 return 'general_quantity'; 250 } 251 } 252 253 /** 254 * Extract numeric value from claim 255 */ 256 extractNumericValue(claimText) { 257 const numbers = claimText.match(/\d+/g); 258 return numbers ? parseInt(numbers[0]) : null; 259 } 260 261 /** 262 * Get context around claim 263 */ 264 getClaimContext(text, index, contextLength) { 265 const start = Math.max(0, index - contextLength); 266 const end = Math.min(text.length, index + contextLength); 267 return text.substring(start, end).trim(); 268 } 269 270 /** 271 * Verify individual claim against GitHub metrics 272 */ 273 async verifyClaim(claim, githubMetrics) { 274 const verification = { 275 claim, 276 verified: false, 277 confidence: 0, 278 actual_value: null, 279 discrepancy: null, 280 reasoning: '' 281 }; 282 283 switch (claim.type) { 284 case 'project_count': 285 case 'repository_count': 286 verification.actual_value = githubMetrics.repositories.total; 287 verification.verified = this.isReasonableRange(claim.extracted_value, verification.actual_value, 0.5); 288 verification.confidence = this.calculateConfidence(claim.extracted_value, verification.actual_value); 289 verification.reasoning = `Claimed ${claim.extracted_value}, actual ${verification.actual_value}`; 290 break; 291 292 case 'language_count': 293 verification.actual_value = githubMetrics.repositories.languages.length; 294 verification.verified = this.isReasonableRange(claim.extracted_value, verification.actual_value, 0.3); 295 verification.confidence = this.calculateConfidence(claim.extracted_value, verification.actual_value); 296 verification.reasoning = `Claimed ${claim.extracted_value}, actual ${verification.actual_value}`; 297 break; 298 299 case 'performance_metric': 300 // Performance metrics are harder to verify without specific context 301 verification.verified = false; 302 verification.confidence = 0.1; 303 verification.reasoning = 'Performance metrics require additional context for verification'; 304 break; 305 306 default: 307 verification.verified = false; 308 verification.confidence = 0.2; 309 verification.reasoning = 'General claims require manual review'; 310 } 311 312 if (verification.actual_value !== null && claim.extracted_value !== null) { 313 verification.discrepancy = Math.abs(claim.extracted_value - verification.actual_value); 314 } 315 316 return verification; 317 } 318 319 /** 320 * Check if claimed value is in reasonable range of actual value 321 */ 322 isReasonableRange(claimed, actual, tolerance) { 323 if (claimed === null || actual === null) return false; 324 325 const difference = Math.abs(claimed - actual); 326 const maxAllowed = Math.max(actual * tolerance, 2); // Allow at least 2 units difference 327 328 return difference <= maxAllowed; 329 } 330 331 /** 332 * Calculate confidence score for claim verification 333 */ 334 calculateConfidence(claimed, actual) { 335 if (claimed === null || actual === null) return 0; 336 337 const difference = Math.abs(claimed - actual); 338 const average = (claimed + actual) / 2; 339 const relativeError = average > 0 ? difference / average : 1; 340 341 return Math.max(0, 1 - relativeError); 342 } 343 344 /** 345 * Categorize verification result 346 */ 347 categorizeVerification(claim, verification) { 348 if (verification.verified && verification.confidence > 0.7) { 349 this.verificationResults.verified_claims.push({ claim, verification }); 350 } else if (verification.confidence < 0.3 || verification.discrepancy > 5) { 351 this.verificationResults.flagged_discrepancies.push({ claim, verification }); 352 } else { 353 this.verificationResults.unverified_claims.push({ claim, verification }); 354 } 355 356 this.verificationResults.confidence_scores[claim.text] = verification.confidence; 357 } 358 359 /** 360 * Generate comprehensive verification report 361 */ 362 async generateVerificationReport() { 363 const report = { 364 verification_timestamp: new Date().toISOString(), 365 summary: { 366 total_claims_analyzed: this.verificationResults.verified_claims.length + 367 this.verificationResults.unverified_claims.length + 368 this.verificationResults.flagged_discrepancies.length, 369 verified_claims: this.verificationResults.verified_claims.length, 370 unverified_claims: this.verificationResults.unverified_claims.length, 371 flagged_discrepancies: this.verificationResults.flagged_discrepancies.length, 372 overall_confidence: this.calculateOverallConfidence() 373 }, 374 detailed_results: this.verificationResults, 375 recommendations: this.generateRecommendations() 376 }; 377 378 return report; 379 } 380 381 /** 382 * Calculate overall confidence score 383 */ 384 calculateOverallConfidence() { 385 const confidenceValues = Object.values(this.verificationResults.confidence_scores); 386 if (confidenceValues.length === 0) return 0; 387 388 return confidenceValues.reduce((sum, conf) => sum + conf, 0) / confidenceValues.length; 389 } 390 391 /** 392 * Generate recommendations based on verification results 393 */ 394 generateRecommendations() { 395 const recommendations = []; 396 397 if (this.verificationResults.flagged_discrepancies.length > 0) { 398 recommendations.push({ 399 type: 'critical', 400 message: `${this.verificationResults.flagged_discrepancies.length} claims have significant discrepancies and should be reviewed`, 401 action: 'Review and adjust flagged claims to match actual GitHub data' 402 }); 403 } 404 405 if (this.verificationResults.unverified_claims.length > 0) { 406 recommendations.push({ 407 type: 'warning', 408 message: `${this.verificationResults.unverified_claims.length} claims could not be verified automatically`, 409 action: 'Consider providing additional context or documentation for these claims' 410 }); 411 } 412 413 const overallConfidence = this.calculateOverallConfidence(); 414 if (overallConfidence < 0.5) { 415 recommendations.push({ 416 type: 'critical', 417 message: `Overall confidence score is low (${(overallConfidence * 100).toFixed(1)}%)`, 418 action: 'Revise AI prompts to focus on verifiable achievements and metrics' 419 }); 420 } 421 422 return recommendations; 423 } 424 425 /** 426 * Save verification results 427 */ 428 async saveVerificationResults(report) { 429 try { 430 const outputPath = path.join(this.dataDir, 'claim-verification-report.json'); 431 await fs.writeFile(outputPath, JSON.stringify(report, null, 2), 'utf8'); 432 433 console.log(`📄 Verification report saved: ${outputPath}`); 434 435 // Also save a summary for quick review 436 const summaryPath = path.join(this.dataDir, 'verification-summary.json'); 437 await fs.writeFile(summaryPath, JSON.stringify({ 438 last_verification: report.verification_timestamp, 439 summary: report.summary, 440 urgent_recommendations: report.recommendations.filter(r => r.type === 'critical') 441 }, null, 2), 'utf8'); 442 443 } catch (error) { 444 console.error('❌ Failed to save verification results:', error.message); 445 throw error; 446 } 447 } 448 } 449 450 /** 451 * Main execution function 452 */ 453 async function main() { 454 try { 455 const verifier = new ClaimVerifier(); 456 const report = await verifier.verifyAllClaims(); 457 458 console.log(''); 459 console.log('🎯 **CLAIM VERIFICATION SUMMARY**'); 460 console.log(`📊 Total claims analyzed: ${report.summary.total_claims_analyzed}`); 461 console.log(`✅ Verified claims: ${report.summary.verified_claims}`); 462 console.log(`⚠️ Unverified claims: ${report.summary.unverified_claims}`); 463 console.log(`🚨 Flagged discrepancies: ${report.summary.flagged_discrepancies}`); 464 console.log(`🎯 Overall confidence: ${(report.summary.overall_confidence * 100).toFixed(1)}%`); 465 466 if (report.recommendations.length > 0) { 467 console.log(''); 468 console.log('📋 **RECOMMENDATIONS:**'); 469 report.recommendations.forEach((rec, index) => { 470 console.log(`${index + 1}. [${rec.type.toUpperCase()}] ${rec.message}`); 471 console.log(` Action: ${rec.action}`); 472 }); 473 } 474 475 } catch (error) { 476 console.error('❌ Claim verification failed:', error.message); 477 process.exit(1); 478 } 479 } 480 481 // Execute if called directly 482 if (require.main === module) { 483 main().catch(console.error); 484 } 485 486 module.exports = { ClaimVerifier };