/ tests / test_catalyst / test_synthetic_data_generation.py
test_synthetic_data_generation.py
 1  # import sys
 2  # sys.path.append('/Users/ritikagoel/workspace/synthetic-catalyst-internal-api2/ragaai-catalyst')
 3  
 4  import pytest
 5  from ragaai_catalyst import SyntheticDataGeneration
 6  import os
 7  
 8  import dotenv
 9  dotenv.load_dotenv()
10  
11  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
12  OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
13  
14  doc_path = os.path.join(os.path.dirname(__file__), os.path.join("test_data", "util_synthetic_data_doc.csv"))
15  valid_csv_path = os.path.join(os.path.dirname(__file__), os.path.join("test_data", "util_synthetic_data_valid.csv"))
16  invalid_csv_path = os.path.join(os.path.dirname(__file__), os.path.join("test_data", "util_synthetic_data_invalid.csv"))
17  
18  @pytest.fixture
19  def synthetic_gen():
20      return SyntheticDataGeneration()
21  
22  @pytest.fixture
23  def sample_text(synthetic_gen):
24      text_file = doc_path # Update this path as needed
25      return synthetic_gen.process_document(input_data=text_file)
26  
27  def test_special_chars_csv_processing(synthetic_gen):
28      """Test processing CSV with special characters"""
29      with pytest.raises(Exception):
30          synthetic_gen.process_document(input_data=valid_csv_path)
31      
32  
33  def test_invalid_llm_proxy(synthetic_gen, sample_text):
34      """Test behavior with invalid internal_llm_proxy URL"""
35      with pytest.raises(Exception, match="No connection adapters were found for"):
36          synthetic_gen.generate_qna(
37              text=sample_text,
38              question_type='mcq',
39              model_config={"provider": "openai", "model": "gpt-4o-mini"},
40              n=1,
41              internal_llm_proxy="tp://invalid.url",
42              user_id="1"
43          )
44  
45  def test_missing_model_config(synthetic_gen, sample_text):
46      """Test behavior when model_config is not provided"""
47      with pytest.raises(ValueError, match="Model configuration must be provided with a valid provider and model"):
48          synthetic_gen.generate_qna(
49              text=sample_text,
50              question_type='mcq',
51              n=1,
52              internal_llm_proxy="http://20.244.126.4:4000/chat/completions",
53              user_id="1"
54          )