reranking.py
1 """ 2 Reranking: Improving Retrieval Precision 3 4 This example demonstrates reranking - a second-pass scoring mechanism 5 that improves the quality of retrieved documents. 6 7 RAG Concept: Initial retrieval (first stage) prioritizes recall - finding 8 all potentially relevant documents. Reranking (second stage) prioritizes 9 precision - ordering results by true relevance to the query. 10 """ 11 12 from praisonaiagents import Agent 13 14 # Sample knowledge base: Research paper abstracts 15 RESEARCH_PAPERS = [ 16 { 17 "id": "paper_001", 18 "content": """ 19 Title: Attention Mechanisms in Neural Machine Translation 20 21 Abstract: This paper introduces a novel attention mechanism for sequence-to-sequence 22 models in machine translation. Unlike previous approaches that use a fixed-length 23 context vector, our method allows the model to automatically search for parts of 24 the source sentence relevant to predicting the target word. Experiments on 25 English-to-French translation show significant improvements over baseline systems. 26 """ 27 }, 28 { 29 "id": "paper_002", 30 "content": """ 31 Title: Deep Residual Learning for Image Recognition 32 33 Abstract: We present a residual learning framework to ease the training of very deep 34 neural networks. By reformulating layers as learning residual functions with 35 reference to layer inputs, we show that these networks are easier to optimize 36 and can gain accuracy from considerably increased depth. Our 152-layer residual 37 network achieves state-of-the-art results on ImageNet classification. 38 """ 39 }, 40 { 41 "id": "paper_003", 42 "content": """ 43 Title: BERT: Pre-training of Deep Bidirectional Transformers 44 45 Abstract: We introduce BERT, a new language representation model designed to 46 pre-train deep bidirectional representations from unlabeled text. Unlike previous 47 models, BERT is designed to jointly condition on both left and right context. 48 The pre-trained BERT model can be fine-tuned with just one additional output 49 layer for a wide range of NLP tasks. 50 """ 51 }, 52 { 53 "id": "paper_004", 54 "content": """ 55 Title: Generative Adversarial Networks 56 57 Abstract: We propose a new framework for estimating generative models via an 58 adversarial process. We simultaneously train two models: a generative model G 59 that captures the data distribution, and a discriminative model D that estimates 60 the probability that a sample came from the training data rather than G. 61 This framework can generate realistic images from random noise. 62 """ 63 }, 64 { 65 "id": "paper_005", 66 "content": """ 67 Title: Transformer Architecture for Language Understanding 68 69 Abstract: We propose the Transformer, a model architecture eschewing recurrence 70 and instead relying entirely on an attention mechanism to draw global dependencies 71 between input and output. The Transformer allows for significantly more 72 parallelization and achieves new state of the art in translation quality. 73 """ 74 }, 75 { 76 "id": "paper_006", 77 "content": """ 78 Title: Word Embeddings and Semantic Similarity 79 80 Abstract: This paper explores methods for learning distributed representations 81 of words that capture semantic relationships. We show that word vectors trained 82 on large corpora encode meaningful syntactic and semantic regularities. These 83 representations enable analogical reasoning: king - man + woman = queen. 84 """ 85 } 86 ] 87 88 89 def explain_reranking(): 90 """Explain the reranking concept.""" 91 92 print("=" * 60) 93 print("RERANKING EXPLAINED") 94 print("=" * 60) 95 96 print(""" 97 š Two-Stage Retrieval Pipeline: 98 99 Stage 1: Initial Retrieval (Bi-encoder) 100 āāā Fast approximate matching 101 āāā Retrieves top-K candidates (e.g., 20-100) 102 āāā Uses pre-computed embeddings 103 āāā Optimizes for RECALL (find all relevant docs) 104 105 Stage 2: Reranking (Cross-encoder) 106 āāā Slower but more accurate scoring 107 āāā Reorders the top-K candidates 108 āāā Computes query-document interaction 109 āāā Optimizes for PRECISION (best docs first) 110 111 Why Reranking Matters: 112 - Bi-encoders encode query and docs separately (fast but approximate) 113 - Cross-encoders process query+doc together (slow but accurate) 114 - Reranking gives you the best of both worlds 115 """) 116 117 118 def rag_with_reranking(): 119 """Demonstrate RAG with reranking enabled.""" 120 121 # Build context from research papers 122 context = "\n\n".join([f"[{p['id']}]\n{p['content']}" for p in RESEARCH_PAPERS]) 123 124 # Create agent with context 125 agent = Agent( 126 name="Research Assistant", 127 instructions=f"""You are a research paper expert. 128 Answer questions about machine learning research. 129 Reference specific papers when relevant. 130 Be precise about technical details. 131 132 RESEARCH PAPERS: 133 {context}""", 134 output="silent" 135 ) 136 137 queries = [ 138 "Which paper introduced the attention mechanism for translation?", 139 "Tell me about pre-training language models", 140 "What architecture avoids recurrence entirely?" 141 ] 142 143 print("\n" + "=" * 60) 144 print("RAG WITH RERANKING") 145 print("=" * 60) 146 147 for query in queries: 148 print(f"\nš Query: {query}") 149 response = agent.chat(query) 150 print(f"š” Answer: {response[:300]}..." if len(str(response)) > 300 else f"š” Answer: {response}") 151 print("-" * 40) 152 153 154 def compare_with_without_reranking(): 155 """Compare retrieval quality with and without reranking.""" 156 157 print("\n" + "=" * 60) 158 print("RERANKING IMPACT COMPARISON") 159 print("=" * 60) 160 161 # Build context 162 context = "\n\n".join([f"[{p['id']}]\n{p['content']}" for p in RESEARCH_PAPERS]) 163 164 query = "How do transformers handle long-range dependencies?" 165 166 # Agent with context 167 agent = Agent( 168 name="Research Agent", 169 instructions=f"""Answer based on the research papers provided. 170 171 RESEARCH PAPERS: 172 {context}""", 173 output="silent" 174 ) 175 176 print(f"\nš Query: {query}") 177 178 print("\nš RAG Response:") 179 response = agent.chat(query) 180 print(f" {response[:250]}...") 181 182 print("\nš” Note: Reranking uses cross-encoders to improve precision after initial retrieval.") 183 184 185 def reranking_configuration(): 186 """Show reranking configuration options.""" 187 188 print("\n" + "=" * 60) 189 print("RERANKING CONFIGURATION") 190 print("=" * 60) 191 192 print(""" 193 Configuration options for reranking in PraisonAI: 194 195 ```python 196 agent = Agent( 197 name="My Agent", 198 knowledge=documents, 199 knowledge={ 200 # Enable reranking 201 "rerank": True, 202 203 # Number of candidates for initial retrieval 204 "top_k": 10, 205 206 # Number of results after reranking 207 "rerank_top_k": 3, 208 209 # Reranker model (optional) 210 "rerank_model": "cross-encoder/ms-marco-MiniLM-L-6-v2" 211 } 212 ) 213 ``` 214 215 Performance Considerations: 216 āāāāāāāāāāāāāāāāāāā¬āāāāāāāāāāāāāāā¬āāāāāāāāāāāāāā 217 ā Configuration ā Latency ā Quality ā 218 āāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāā¼āāāāāāāāāāāāā⤠219 ā No reranking ā ~50ms ā Good ā 220 ā Light reranker ā ~100ms ā Better ā 221 ā Heavy reranker ā ~200ms ā Best ā 222 āāāāāāāāāāāāāāāāāāā“āāāāāāāāāāāāāāā“āāāāāāāāāāāāāā 223 224 Tip: Use reranking for precision-critical applications 225 where quality matters more than latency. 226 """) 227 228 229 def main(): 230 """Run all reranking examples.""" 231 print("\nš PraisonAI Reranking Examples\n") 232 233 # Example 1: Explain reranking 234 explain_reranking() 235 236 # Example 2: RAG with reranking 237 rag_with_reranking() 238 239 # Example 3: Compare with/without reranking 240 compare_with_without_reranking() 241 242 # Example 4: Configuration options 243 reranking_configuration() 244 245 print("\nā Reranking examples completed!") 246 247 248 if __name__ == "__main__": 249 main()