batch_example.py
1 """ 2 Batch Evaluation Example 3 4 This example demonstrates how to run batch evaluations 5 from a JSON test file. 6 """ 7 8 import os 9 import json 10 import tempfile 11 from praisonaiagents.eval import AccuracyEvaluator 12 13 # Check if we have an API key 14 has_api_key = os.getenv("OPENAI_API_KEY") is not None 15 16 # Create a test file with multiple test cases 17 test_cases = [ 18 { 19 "input": "What is 2 + 2?", 20 "expected": "4" 21 }, 22 { 23 "input": "What is the capital of France?", 24 "expected": "Paris" 25 }, 26 { 27 "input": "What color is the sky?", 28 "expected": "Blue" 29 } 30 ] 31 32 # Save test cases to a temporary file 33 with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: 34 json.dump(test_cases, f) 35 test_file = f.name 36 37 print(f"Test file created: {test_file}") 38 print(f"Running {len(test_cases)} test cases...") 39 40 if has_api_key: 41 # You can use the CLI for batch evaluation: 42 # praisonai eval batch --agent agents.yaml --test-file tests.json --batch-type accuracy 43 44 # Or programmatically: 45 from praisonaiagents import Agent 46 47 agent = Agent( 48 instructions="You are a helpful assistant. Answer questions concisely." 49 ) 50 51 results = [] 52 for i, test_case in enumerate(test_cases): 53 print(f"\nTest {i + 1}: {test_case['input']}") 54 55 evaluator = AccuracyEvaluator( 56 agent=agent, 57 input_text=test_case["input"], 58 expected_output=test_case["expected"], 59 num_iterations=1 60 ) 61 62 result = evaluator.run() 63 results.append({ 64 "input": test_case["input"], 65 "expected": test_case["expected"], 66 "score": result.avg_score, 67 "passed": result.passed 68 }) 69 70 print(f" Score: {result.avg_score}/10") 71 print(f" Passed: {result.passed}") 72 73 # Summary 74 passed = sum(1 for r in results if r["passed"]) 75 print(f"\n{'='*50}") 76 print(f"Summary: {passed}/{len(results)} tests passed") 77 print(f"Pass Rate: {passed/len(results):.1%}") 78 else: 79 print("⚠️ No OPENAI_API_KEY found. Running mock batch evaluation...") 80 81 # Mock evaluation without API key 82 results = [] 83 for i, test_case in enumerate(test_cases): 84 print(f"\nTest {i + 1}: {test_case['input']}") 85 86 # Mock result (all would fail without API key) 87 mock_score = 0.0 88 mock_passed = False 89 90 results.append({ 91 "input": test_case["input"], 92 "expected": test_case["expected"], 93 "score": mock_score, 94 "passed": mock_passed 95 }) 96 97 print(f" Score: {mock_score}/10 (mock)") 98 print(f" Passed: {mock_passed} (expected without API key)") 99 100 # Summary 101 passed = sum(1 for r in results if r["passed"]) 102 print(f"\n{'='*50}") 103 print(f"Summary: {passed}/{len(results)} tests passed (mock evaluation)") 104 print(f"Pass Rate: {passed/len(results):.1%}") 105 print("\nTo run real batch evaluation, set OPENAI_API_KEY environment variable")