trace_management.md
1 ## Trace Management 2 3 Record and analyse trace using the `ragaai_catalyst` library. This guide provides steps to initialize tracer with project and dataset name(langchain and llama-index),run tracer and add context,stop the tracer,list dataset,add rows and column and evalutaion on tracer datasets efficiently. 4 5 #### Initialize Tracer Management 6 7 To start managing datasets for a specific project, initialize the `Tracer` class with your project name. 8 9 ##### 1. langchain example 10 11 ```python 12 from ragaai_catalyst import Tracer 13 tracer_dataset_name = "tracer_dataset_name" 14 15 tracer = Tracer( 16 project_name=project_name, 17 dataset_name=tracer_dataset_name, 18 metadata={"key1": "value1", "key2": "value2"}, 19 tracer_type="langchain", 20 pipeline={ 21 "llm_model": "gpt-4o-mini", 22 "vector_store": "faiss", 23 "embed_model": "text-embedding-ada-002", 24 } 25 ) 26 ``` 27 ##### - User code 28 29 ```python 30 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 31 32 chat = ChatLiteLLM(model="gpt-4o-mini") 33 34 messages = [ 35 HumanMessage( 36 content="Translate this sentence from English to German. I love you." 37 ) 38 ] 39 with tracer: 40 response = chat(messages) 41 ``` 42 43 ##### 2. Llama-index example 44 45 ```python 46 from ragaai_catalyst import Tracer 47 tracer_dataset_name = "tracer_dataset_name" 48 49 tracer = Tracer( 50 project_name=project_name, 51 dataset_name=tracer_dataset_name, 52 metadata={"key1": "value1", "key2": "value2"}, 53 tracer_type="llamaindex", 54 pipeline={ 55 "llm_model": "gpt-4o-mini", 56 "vector_store": "faiss", 57 "embed_model": "text-embedding-ada-002", 58 } 59 ) 60 ``` 61 ##### - User code 62 63 ```python 64 from llama_index.core import VectorStoreIndex, Settings, Document 65 from llama_index.readers.file import PDFReader 66 from llama_index.llms.openai import OpenAI 67 from llama_index.embeddings.openai import OpenAIEmbedding 68 import logging 69 70 logging.basicConfig(level=logging.INFO) 71 logger = logging.getLogger(__name__) 72 # Initialize necessary variables 73 retriever = None 74 loaded_doc = None 75 index = None 76 77 def load_document(source_doc_path): 78 """ 79 Load and index the document using LlamaIndex 80 """ 81 try: 82 # Initialize LLM and embedding model 83 Settings.llm = OpenAI(model="gpt-4o-mini") 84 Settings.embed_model = OpenAIEmbedding() 85 86 87 # Load PDF document 88 reader = PDFReader() 89 docs = reader.load_data(source_doc_path) 90 91 # Create documents with metadata 92 documents = [ 93 Document(text=doc.text, metadata={"source": source_doc_path}) 94 for doc in docs 95 ] 96 97 # Create vector store index 98 global index 99 index = VectorStoreIndex.from_documents(documents) 100 101 # Create retriever (to maintain similar interface) 102 retriever = index.as_retriever(similarity_top_k=5) 103 104 logger.info("Document loaded and processed.") 105 return retriever 106 107 except Exception as e: 108 logger.error(f"An error occurred while loading the document: {e}") 109 return None 110 111 def generate_response(retriever, query): 112 """ 113 Generate response for the given query using LlamaIndex 114 """ 115 try: 116 if index is None: 117 logger.error("Index not initialized. Please load document first.") 118 return None 119 120 # Create query engine 121 query_engine = index.as_query_engine( 122 response_mode="compact" 123 ) 124 125 # Generate response 126 response = query_engine.query(query) 127 128 logger.info("Response generated successfully") 129 return str(response) 130 131 except Exception as e: 132 logger.error(f"An error occurred while generating the response: {e}") 133 return None 134 135 def process_document(source_doc_path, loaded_doc, query): 136 """ 137 Process document and generate response using LlamaIndex 138 """ 139 try: 140 # Check if we need to load a new document 141 if loaded_doc != source_doc_path: 142 retriever = load_document(source_doc_path) 143 if retriever is None: 144 return "Failed to load document." 145 loaded_doc = source_doc_path 146 else: 147 logger.info("Using cached document retriever.") 148 149 # Generate response 150 response = generate_response(retriever, query) 151 if response is None: 152 return "Failed to generate response." 153 154 return response 155 156 except Exception as e: 157 logger.error(f"An overall error occurred: {e}") 158 return "An error occurred during the document processing." 159 160 161 162 source_doc_path = "/content/2404.02798v1.pdf" 163 164 questions = [ 165 "What is this paper about?", 166 "Give 10 words summary of the paper?", 167 "What is the main topic of the paper?", 168 "What is the aim of the paper, in 10 words?" 169 ] 170 ``` 171 172 ```python 173 with tracer: 174 for question in questions: 175 response = process_document(source_doc_path, None, question) 176 print(f"Question: {question}\nResponse: {response}\n") 177 ``` 178 179 #### Run tracer and add context 180 181 You can add context using tracer.add_context(context).Context needs to be in str type 182 183 ```python 184 with tracer: 185 response = chat(messages) 186 tracer.add_context(context) 187 188 189 with tracer: 190 for question in questions: 191 response = process_document(source_doc_path, None, question) 192 tracer.add_context(context) 193 ``` 194 195 #### Add rows to the uploaded tracer dataset 196 197 ```python 198 from ragaai_catalyst import Dataset 199 dataset_manager = Dataset(project_name=project_name) 200 add_rows_csv_path = "path to dataset" 201 dataset_manager.add_rows(csv_path=add_rows_csv_path, dataset_name=dataset_name) 202 ``` 203 204 #### Add column to the uploaded tracer dataset 205 206 ```python 207 text_fields = [ 208 { 209 "role": "system", 210 "content": "you are an evaluator, which answers only in yes or no." 211 }, 212 { 213 "role": "user", 214 "content": "are any of the {{asdf}} {{abcd}} related to broken hand" 215 } 216 ] 217 column_name = "from_colab_v1" 218 provider = "openai" 219 model = "gpt-4o-mini" 220 221 variables={ 222 "asdf": "context", 223 "abcd": "feedback" 224 } 225 ``` 226 227 ```python 228 dataset_manager.add_columns( 229 text_fields=text_fields, 230 dataset_name=dataset_name, 231 column_name=column_name, 232 provider=provider, 233 model=model, 234 variables=variables 235 ) 236 ``` 237 238 #### Evaluate metrics 239 240 Evaluate metrics on the uploaded tracer dataset. 241 242 ```python 243 from ragaai_catalyst import Evaluation 244 evaluation = Evaluation(project_name=project_name, 245 dataset_name=tracer_dataset_name) 246 ``` 247 248 ```python 249 schema_mapping={ 250 'prompt': 'prompt', 251 'response': 'response', 252 'context': 'context', 253 } 254 metrics = [ 255 {"name": "Faithfulness", "config": {"model": "gpt-4o-mini", "provider": "openai", "threshold": {"gte": 0.323}}, "column_name": "Faithfulness_v1_gte", "schema_mapping": schema_mapping}, 256 {"name": "Hallucination", "config": {"model": "gpt-4o-mini", "provider": "openai", "threshold": {"lte": 0.323}}, "column_name": "Hallucination_v1_lte", "schema_mapping": schema_mapping}, 257 {"name": "Hallucination", "config": {"model": "gpt-4o-mini", "provider": "openai", "threshold": {"eq": 0.323}}, "column_name": "Hallucination_v1_eq", "schema_mapping": schema_mapping}, 258 {"name": "Context Relevancy", "config": {"model": "gemini-1.5-flash", "provider": "gemini", "threshold": {"eq": 0.323}}, "column_name": "Context_Relevancy_v1_eq", "schema_mapping": schema_mapping}, 259 ] 260 ``` 261 262 ```python 263 evaluation.add_metrics(metrics=metrics) 264 evaluation.get_status() 265 ``` 266 267 #### Appending Metrics for New Data 268 269 If you've added new rows to your dataset, you can calculate metrics just for the new data: 270 271 ```python 272 evaluation.append_metrics(display_name="Faithfulness_v1") 273 ``` 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297