consistency.test.ts
1 import { describe, it, expect } from 'vitest'; 2 import { allPersonas } from '../fixtures/personas'; 3 import { generateAllForPersona, type PersonaPipelineResult } from '../mocks/response-generators'; 4 5 // Standard deviation helper 6 function stddev(values: number[]): number { 7 if (values.length <= 1) return 0; 8 const mean = values.reduce((a, b) => a + b, 0) / values.length; 9 const squaredDiffs = values.map(v => (v - mean) ** 2); 10 return Math.sqrt(squaredDiffs.reduce((a, b) => a + b, 0) / (values.length - 1)); 11 } 12 13 // Coefficient of variation: stddev / mean (as fraction, not percentage) 14 function coeffOfVariation(values: number[]): number { 15 const mean = values.reduce((a, b) => a + b, 0) / values.length; 16 if (mean === 0) return 0; 17 return stddev(values) / Math.abs(mean); 18 } 19 20 // Pick 5 representative personas: A01, A15, B01, C01, D01 21 const REPRESENTATIVE_IDS = ['A01', 'A15', 'B01', 'C01', 'D01']; 22 23 describe('Consistency & Reproducibility', () => { 24 const RUNS = 3; 25 26 for (const personaId of REPRESENTATIVE_IDS) { 27 const persona = allPersonas.find(p => p.id === personaId); 28 if (!persona) continue; 29 30 describe(`Persona ${personaId}: ${persona.description}`, () => { 31 // Run each persona N times 32 const results: PersonaPipelineResult[] = []; 33 for (let i = 0; i < RUNS; i++) { 34 results.push(generateAllForPersona(persona)); 35 } 36 37 it('fitScore standard deviation < 0.5', () => { 38 const scores = results.map(r => r.gapAnalysis.fitScore.score); 39 expect(stddev(scores)).toBeLessThan(0.5); 40 }); 41 42 it('gap count range ≤ 1', () => { 43 const counts = results.map(r => r.gapAnalysis.gaps.length); 44 const range = Math.max(...counts) - Math.min(...counts); 45 expect(range).toBeLessThanOrEqual(1); 46 }); 47 48 it('strength count range ≤ 1', () => { 49 const counts = results.map(r => r.gapAnalysis.strengths.length); 50 const range = Math.max(...counts) - Math.min(...counts); 51 expect(range).toBeLessThanOrEqual(1); 52 }); 53 54 it('same critical gaps across runs', () => { 55 const criticalSets = results.map(r => 56 r.gapAnalysis.gaps 57 .filter(g => g.severity === 'critical') 58 .map(g => g.skill.toLowerCase()) 59 .sort() 60 .join(',') 61 ); 62 // All runs should produce the same critical gaps 63 const unique = new Set(criticalSets); 64 expect(unique.size).toBeLessThanOrEqual(1); 65 }); 66 67 it('salary mid variance < 5%', () => { 68 const targetMids = results.map(r => r.careerPlan.salaryAnalysis.targetRoleMarket.mid); 69 expect(coeffOfVariation(targetMids)).toBeLessThan(0.05); 70 71 const currentMids = results.map(r => r.careerPlan.salaryAnalysis.currentRoleMarket.mid); 72 expect(coeffOfVariation(currentMids)).toBeLessThan(0.05); 73 }); 74 }); 75 } 76 });