Cradicle Explorer

/ __tests__ / integration / consistency.test.ts
consistency.test.ts
 1  import { describe, it, expect } from 'vitest';
 2  import { allPersonas } from '../fixtures/personas';
 3  import { generateAllForPersona, type PersonaPipelineResult } from '../mocks/response-generators';
 4  
 5  // Standard deviation helper
 6  function stddev(values: number[]): number {
 7    if (values.length <= 1) return 0;
 8    const mean = values.reduce((a, b) => a + b, 0) / values.length;
 9    const squaredDiffs = values.map(v => (v - mean) ** 2);
10    return Math.sqrt(squaredDiffs.reduce((a, b) => a + b, 0) / (values.length - 1));
11  }
12  
13  // Coefficient of variation: stddev / mean (as fraction, not percentage)
14  function coeffOfVariation(values: number[]): number {
15    const mean = values.reduce((a, b) => a + b, 0) / values.length;
16    if (mean === 0) return 0;
17    return stddev(values) / Math.abs(mean);
18  }
19  
20  // Pick 5 representative personas: A01, A15, B01, C01, D01
21  const REPRESENTATIVE_IDS = ['A01', 'A15', 'B01', 'C01', 'D01'];
22  
23  describe('Consistency & Reproducibility', () => {
24    const RUNS = 3;
25  
26    for (const personaId of REPRESENTATIVE_IDS) {
27      const persona = allPersonas.find(p => p.id === personaId);
28      if (!persona) continue;
29  
30      describe(`Persona ${personaId}: ${persona.description}`, () => {
31        // Run each persona N times
32        const results: PersonaPipelineResult[] = [];
33        for (let i = 0; i < RUNS; i++) {
34          results.push(generateAllForPersona(persona));
35        }
36  
37        it('fitScore standard deviation < 0.5', () => {
38          const scores = results.map(r => r.gapAnalysis.fitScore.score);
39          expect(stddev(scores)).toBeLessThan(0.5);
40        });
41  
42        it('gap count range ≤ 1', () => {
43          const counts = results.map(r => r.gapAnalysis.gaps.length);
44          const range = Math.max(...counts) - Math.min(...counts);
45          expect(range).toBeLessThanOrEqual(1);
46        });
47  
48        it('strength count range ≤ 1', () => {
49          const counts = results.map(r => r.gapAnalysis.strengths.length);
50          const range = Math.max(...counts) - Math.min(...counts);
51          expect(range).toBeLessThanOrEqual(1);
52        });
53  
54        it('same critical gaps across runs', () => {
55          const criticalSets = results.map(r =>
56            r.gapAnalysis.gaps
57              .filter(g => g.severity === 'critical')
58              .map(g => g.skill.toLowerCase())
59              .sort()
60              .join(',')
61          );
62          // All runs should produce the same critical gaps
63          const unique = new Set(criticalSets);
64          expect(unique.size).toBeLessThanOrEqual(1);
65        });
66  
67        it('salary mid variance < 5%', () => {
68          const targetMids = results.map(r => r.careerPlan.salaryAnalysis.targetRoleMarket.mid);
69          expect(coeffOfVariation(targetMids)).toBeLessThan(0.05);
70  
71          const currentMids = results.map(r => r.careerPlan.salaryAnalysis.currentRoleMarket.mid);
72          expect(coeffOfVariation(currentMids)).toBeLessThan(0.05);
73        });
74      });
75    }
76  });