Cradicle Explorer

/ examples / eval / batch_example.py
batch_example.py
  1  """
  2  Batch Evaluation Example
  3  
  4  This example demonstrates how to run batch evaluations
  5  from a JSON test file.
  6  """
  7  
  8  import os
  9  import json
 10  import tempfile
 11  from praisonaiagents.eval import AccuracyEvaluator
 12  
 13  # Check if we have an API key
 14  has_api_key = os.getenv("OPENAI_API_KEY") is not None
 15  
 16  # Create a test file with multiple test cases
 17  test_cases = [
 18      {
 19          "input": "What is 2 + 2?",
 20          "expected": "4"
 21      },
 22      {
 23          "input": "What is the capital of France?",
 24          "expected": "Paris"
 25      },
 26      {
 27          "input": "What color is the sky?",
 28          "expected": "Blue"
 29      }
 30  ]
 31  
 32  # Save test cases to a temporary file
 33  with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
 34      json.dump(test_cases, f)
 35      test_file = f.name
 36  
 37  print(f"Test file created: {test_file}")
 38  print(f"Running {len(test_cases)} test cases...")
 39  
 40  if has_api_key:
 41      # You can use the CLI for batch evaluation:
 42      # praisonai eval batch --agent agents.yaml --test-file tests.json --batch-type accuracy
 43  
 44      # Or programmatically:
 45      from praisonaiagents import Agent
 46  
 47      agent = Agent(
 48          instructions="You are a helpful assistant. Answer questions concisely."
 49      )
 50  
 51      results = []
 52      for i, test_case in enumerate(test_cases):
 53          print(f"\nTest {i + 1}: {test_case['input']}")
 54          
 55          evaluator = AccuracyEvaluator(
 56              agent=agent,
 57              input_text=test_case["input"],
 58              expected_output=test_case["expected"],
 59              num_iterations=1
 60          )
 61          
 62          result = evaluator.run()
 63          results.append({
 64              "input": test_case["input"],
 65              "expected": test_case["expected"],
 66              "score": result.avg_score,
 67              "passed": result.passed
 68          })
 69          
 70          print(f"  Score: {result.avg_score}/10")
 71          print(f"  Passed: {result.passed}")
 72  
 73      # Summary
 74      passed = sum(1 for r in results if r["passed"])
 75      print(f"\n{'='*50}")
 76      print(f"Summary: {passed}/{len(results)} tests passed")
 77      print(f"Pass Rate: {passed/len(results):.1%}")
 78  else:
 79      print("⚠️  No OPENAI_API_KEY found. Running mock batch evaluation...")
 80      
 81      # Mock evaluation without API key
 82      results = []
 83      for i, test_case in enumerate(test_cases):
 84          print(f"\nTest {i + 1}: {test_case['input']}")
 85          
 86          # Mock result (all would fail without API key)
 87          mock_score = 0.0
 88          mock_passed = False
 89          
 90          results.append({
 91              "input": test_case["input"],
 92              "expected": test_case["expected"],
 93              "score": mock_score,
 94              "passed": mock_passed
 95          })
 96          
 97          print(f"  Score: {mock_score}/10 (mock)")
 98          print(f"  Passed: {mock_passed} (expected without API key)")
 99  
100      # Summary
101      passed = sum(1 for r in results if r["passed"])
102      print(f"\n{'='*50}")
103      print(f"Summary: {passed}/{len(results)} tests passed (mock evaluation)")
104      print(f"Pass Rate: {passed/len(results):.1%}")
105      print("\nTo run real batch evaluation, set OPENAI_API_KEY environment variable")