test_synthetic_data_generation.py
1 # import sys 2 # sys.path.append('/Users/ritikagoel/workspace/synthetic-catalyst-internal-api2/ragaai-catalyst') 3 4 import pytest 5 from ragaai_catalyst import SyntheticDataGeneration 6 import os 7 8 import dotenv 9 dotenv.load_dotenv() 10 11 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY") 12 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") 13 14 doc_path = os.path.join(os.path.dirname(__file__), os.path.join("test_data", "util_synthetic_data_doc.csv")) 15 valid_csv_path = os.path.join(os.path.dirname(__file__), os.path.join("test_data", "util_synthetic_data_valid.csv")) 16 invalid_csv_path = os.path.join(os.path.dirname(__file__), os.path.join("test_data", "util_synthetic_data_invalid.csv")) 17 18 @pytest.fixture 19 def synthetic_gen(): 20 return SyntheticDataGeneration() 21 22 @pytest.fixture 23 def sample_text(synthetic_gen): 24 text_file = doc_path # Update this path as needed 25 return synthetic_gen.process_document(input_data=text_file) 26 27 def test_special_chars_csv_processing(synthetic_gen): 28 """Test processing CSV with special characters""" 29 with pytest.raises(Exception): 30 synthetic_gen.process_document(input_data=valid_csv_path) 31 32 33 def test_invalid_llm_proxy(synthetic_gen, sample_text): 34 """Test behavior with invalid internal_llm_proxy URL""" 35 with pytest.raises(Exception, match="No connection adapters were found for"): 36 synthetic_gen.generate_qna( 37 text=sample_text, 38 question_type='mcq', 39 model_config={"provider": "openai", "model": "gpt-4o-mini"}, 40 n=1, 41 internal_llm_proxy="tp://invalid.url", 42 user_id="1" 43 ) 44 45 def test_missing_model_config(synthetic_gen, sample_text): 46 """Test behavior when model_config is not provided""" 47 with pytest.raises(ValueError, match="Model configuration must be provided with a valid provider and model"): 48 synthetic_gen.generate_qna( 49 text=sample_text, 50 question_type='mcq', 51 n=1, 52 internal_llm_proxy="http://20.244.126.4:4000/chat/completions", 53 user_id="1" 54 )