test_evaluation.py
1 from unittest.mock import patch 2 import time 3 import pytest 4 import os 5 import dotenv 6 dotenv.load_dotenv() 7 import pandas as pd 8 from datetime import datetime 9 from typing import Dict, List 10 from ragaai_catalyst import Evaluation, RagaAICatalyst 11 12 # Simplified model configurations 13 MODEL_CONFIGS = [ 14 {"provider": "openai", "model": "gpt-4"}, # Only one OpenAI model 15 {"provider": "gemini", "model": "gemini-1.5-flash"} # Only one Gemini model 16 ] 17 18 # Common metrics to test 19 CORE_METRICS = [ 20 'Hallucination', 21 'Faithfulness', 22 'Response Correctness', 23 'Context Relevancy' 24 ] 25 26 CHAT_METRICS = [ 27 'Agent Quality', 28 'User Chat Quality' 29 ] 30 31 @pytest.fixture 32 def base_url(): 33 return os.getenv("RAGAAI_CATALYST_BASE_URL") 34 35 @pytest.fixture 36 def access_keys(): 37 return { 38 "access_key": os.getenv("RAGAAI_CATALYST_ACCESS_KEY"), 39 "secret_key": os.getenv("RAGAAI_CATALYST_SECRET_KEY") 40 } 41 42 @pytest.fixture 43 def evaluation(base_url, access_keys): 44 """Create evaluation instance with specific project and dataset""" 45 os.environ["RAGAAI_CATALYST_BASE_URL"] = base_url 46 catalyst = RagaAICatalyst( 47 access_key=access_keys["access_key"], 48 secret_key=access_keys["secret_key"] 49 ) 50 return Evaluation( 51 project_name="prompt_metric_dataset_sk", 52 dataset_name="dataset_19feb_1" 53 ) 54 55 @pytest.fixture 56 def chat_evaluation(base_url, access_keys): 57 """Create evaluation instance for chat metrics""" 58 os.environ["RAGAAI_CATALYST_BASE_URL"] = base_url 59 catalyst = RagaAICatalyst( 60 access_key=access_keys["access_key"], 61 secret_key=access_keys["secret_key"] 62 ) 63 return Evaluation( 64 project_name="prompt_metric_dataset_sk", 65 dataset_name="dataset_19feb_1" 66 ) 67 68 # Basic initialization tests 69 def test_evaluation_initialization(evaluation): 70 """Test if evaluation is initialized correctly""" 71 assert evaluation.project_name == "prompt_metric_dataset_sk" 72 assert evaluation.dataset_name == "dataset_19feb_1" 73 74 def test_project_does_not_exist(): 75 """Test initialization with non-existent project""" 76 with pytest.raises(ValueError, match="Project not found"): 77 Evaluation(project_name="non_existent_project", dataset_name="dataset") 78 79 # Parameterized validation tests 80 @pytest.mark.parametrize("provider_config", MODEL_CONFIGS) 81 def test_metric_validation_checks(evaluation, provider_config): 82 """Test all validation checks in one parameterized test""" 83 schema_mapping = { 84 'Query': 'Prompt', 85 'Response': 'Response', 86 'Context': 'Context', 87 } 88 89 # Test missing schema_mapping 90 with pytest.raises(ValueError): 91 evaluation.add_metrics([{ 92 "name": "Hallucination", 93 "config": provider_config, 94 "column_name": "test_column" 95 }]) 96 97 # Test missing column_name 98 with pytest.raises(ValueError): 99 evaluation.add_metrics([{ 100 "name": "Hallucination", 101 "config": provider_config, 102 "schema_mapping": schema_mapping 103 }]) 104 105 # Test missing metric name 106 with pytest.raises(ValueError): 107 evaluation.add_metrics([{ 108 "config": provider_config, 109 "column_name": "test_column", 110 "schema_mapping": schema_mapping 111 }])