eval-suite.ts
1 /** 2 * Evaluation Framework Example 3 * Demonstrates accuracy, performance, and reliability evaluation 4 */ 5 6 import { accuracyEval, performanceEval, reliabilityEval, EvalSuite } from 'praisonai'; 7 8 async function main() { 9 console.log('=== Accuracy Evaluation ==='); 10 const accuracyResult = await accuracyEval({ 11 input: 'What is 2 + 2?', 12 expectedOutput: '4', 13 actualOutput: 'The answer is 4' 14 }); 15 console.log('Passed:', accuracyResult.passed); 16 console.log('Score:', accuracyResult.score); 17 console.log('Duration:', accuracyResult.duration, 'ms'); 18 19 console.log('\n=== Performance Evaluation ==='); 20 const perfResult = await performanceEval({ 21 func: async () => { 22 // Simulate some work 23 await new Promise(r => setTimeout(r, 100)); 24 return 'done'; 25 }, 26 iterations: 5, 27 warmupRuns: 1 28 }); 29 console.log('Iterations:', perfResult.times.length); 30 console.log('Avg time:', perfResult.avgTime.toFixed(2), 'ms'); 31 console.log('Min time:', perfResult.minTime.toFixed(2), 'ms'); 32 console.log('Max time:', perfResult.maxTime.toFixed(2), 'ms'); 33 34 console.log('\n=== Reliability Evaluation ==='); 35 const reliabilityResult = await reliabilityEval({ 36 expectedToolCalls: ['search', 'calculate'], 37 actualToolCalls: ['search', 'calculate'] 38 }); 39 console.log('Passed:', reliabilityResult.passed); 40 console.log('Score:', reliabilityResult.score); 41 42 console.log('\n=== Eval Suite ==='); 43 const suite = new EvalSuite(); 44 45 await suite.runAccuracy('test1', { 46 input: 'Hello', 47 expectedOutput: 'Hi', 48 actualOutput: 'Hi there!' 49 }); 50 51 await suite.runAccuracy('test2', { 52 input: 'Goodbye', 53 expectedOutput: 'Bye', 54 actualOutput: 'Bye!' 55 }); 56 57 const summary = suite.getSummary(); 58 console.log('Total tests:', summary.total); 59 console.log('Passed:', summary.passed); 60 console.log('Failed:', summary.failed); 61 console.log('Pass rate:', (summary.passRate * 100).toFixed(1) + '%'); 62 } 63 64 main().catch(console.error);