/ examples / rag / reranking.py
reranking.py
  1  """
  2  Reranking: Improving Retrieval Precision
  3  
  4  This example demonstrates reranking - a second-pass scoring mechanism
  5  that improves the quality of retrieved documents.
  6  
  7  RAG Concept: Initial retrieval (first stage) prioritizes recall - finding
  8  all potentially relevant documents. Reranking (second stage) prioritizes
  9  precision - ordering results by true relevance to the query.
 10  """
 11  
 12  from praisonaiagents import Agent
 13  
 14  # Sample knowledge base: Research paper abstracts
 15  RESEARCH_PAPERS = [
 16      {
 17          "id": "paper_001",
 18          "content": """
 19          Title: Attention Mechanisms in Neural Machine Translation
 20          
 21          Abstract: This paper introduces a novel attention mechanism for sequence-to-sequence
 22          models in machine translation. Unlike previous approaches that use a fixed-length
 23          context vector, our method allows the model to automatically search for parts of
 24          the source sentence relevant to predicting the target word. Experiments on
 25          English-to-French translation show significant improvements over baseline systems.
 26          """
 27      },
 28      {
 29          "id": "paper_002",
 30          "content": """
 31          Title: Deep Residual Learning for Image Recognition
 32          
 33          Abstract: We present a residual learning framework to ease the training of very deep
 34          neural networks. By reformulating layers as learning residual functions with
 35          reference to layer inputs, we show that these networks are easier to optimize
 36          and can gain accuracy from considerably increased depth. Our 152-layer residual
 37          network achieves state-of-the-art results on ImageNet classification.
 38          """
 39      },
 40      {
 41          "id": "paper_003",
 42          "content": """
 43          Title: BERT: Pre-training of Deep Bidirectional Transformers
 44          
 45          Abstract: We introduce BERT, a new language representation model designed to
 46          pre-train deep bidirectional representations from unlabeled text. Unlike previous
 47          models, BERT is designed to jointly condition on both left and right context.
 48          The pre-trained BERT model can be fine-tuned with just one additional output
 49          layer for a wide range of NLP tasks.
 50          """
 51      },
 52      {
 53          "id": "paper_004",
 54          "content": """
 55          Title: Generative Adversarial Networks
 56          
 57          Abstract: We propose a new framework for estimating generative models via an
 58          adversarial process. We simultaneously train two models: a generative model G
 59          that captures the data distribution, and a discriminative model D that estimates
 60          the probability that a sample came from the training data rather than G.
 61          This framework can generate realistic images from random noise.
 62          """
 63      },
 64      {
 65          "id": "paper_005",
 66          "content": """
 67          Title: Transformer Architecture for Language Understanding
 68          
 69          Abstract: We propose the Transformer, a model architecture eschewing recurrence
 70          and instead relying entirely on an attention mechanism to draw global dependencies
 71          between input and output. The Transformer allows for significantly more
 72          parallelization and achieves new state of the art in translation quality.
 73          """
 74      },
 75      {
 76          "id": "paper_006",
 77          "content": """
 78          Title: Word Embeddings and Semantic Similarity
 79          
 80          Abstract: This paper explores methods for learning distributed representations
 81          of words that capture semantic relationships. We show that word vectors trained
 82          on large corpora encode meaningful syntactic and semantic regularities. These
 83          representations enable analogical reasoning: king - man + woman = queen.
 84          """
 85      }
 86  ]
 87  
 88  
 89  def explain_reranking():
 90      """Explain the reranking concept."""
 91      
 92      print("=" * 60)
 93      print("RERANKING EXPLAINED")
 94      print("=" * 60)
 95      
 96      print("""
 97      šŸ”„ Two-Stage Retrieval Pipeline:
 98      
 99      Stage 1: Initial Retrieval (Bi-encoder)
100      ā”œā”€ā”€ Fast approximate matching
101      ā”œā”€ā”€ Retrieves top-K candidates (e.g., 20-100)
102      ā”œā”€ā”€ Uses pre-computed embeddings
103      └── Optimizes for RECALL (find all relevant docs)
104      
105      Stage 2: Reranking (Cross-encoder)
106      ā”œā”€ā”€ Slower but more accurate scoring
107      ā”œā”€ā”€ Reorders the top-K candidates
108      ā”œā”€ā”€ Computes query-document interaction
109      └── Optimizes for PRECISION (best docs first)
110      
111      Why Reranking Matters:
112      - Bi-encoders encode query and docs separately (fast but approximate)
113      - Cross-encoders process query+doc together (slow but accurate)
114      - Reranking gives you the best of both worlds
115      """)
116  
117  
118  def rag_with_reranking():
119      """Demonstrate RAG with reranking enabled."""
120      
121      # Build context from research papers
122      context = "\n\n".join([f"[{p['id']}]\n{p['content']}" for p in RESEARCH_PAPERS])
123      
124      # Create agent with context
125      agent = Agent(
126          name="Research Assistant",
127          instructions=f"""You are a research paper expert.
128          Answer questions about machine learning research.
129          Reference specific papers when relevant.
130          Be precise about technical details.
131          
132          RESEARCH PAPERS:
133          {context}""",
134          output="silent"
135      )
136      
137      queries = [
138          "Which paper introduced the attention mechanism for translation?",
139          "Tell me about pre-training language models",
140          "What architecture avoids recurrence entirely?"
141      ]
142      
143      print("\n" + "=" * 60)
144      print("RAG WITH RERANKING")
145      print("=" * 60)
146      
147      for query in queries:
148          print(f"\nšŸ“ Query: {query}")
149          response = agent.chat(query)
150          print(f"šŸ’” Answer: {response[:300]}..." if len(str(response)) > 300 else f"šŸ’” Answer: {response}")
151          print("-" * 40)
152  
153  
154  def compare_with_without_reranking():
155      """Compare retrieval quality with and without reranking."""
156      
157      print("\n" + "=" * 60)
158      print("RERANKING IMPACT COMPARISON")
159      print("=" * 60)
160      
161      # Build context
162      context = "\n\n".join([f"[{p['id']}]\n{p['content']}" for p in RESEARCH_PAPERS])
163      
164      query = "How do transformers handle long-range dependencies?"
165      
166      # Agent with context
167      agent = Agent(
168          name="Research Agent",
169          instructions=f"""Answer based on the research papers provided.
170          
171          RESEARCH PAPERS:
172          {context}""",
173          output="silent"
174      )
175      
176      print(f"\nšŸ“ Query: {query}")
177      
178      print("\nšŸ” RAG Response:")
179      response = agent.chat(query)
180      print(f"   {response[:250]}...")
181      
182      print("\nšŸ’” Note: Reranking uses cross-encoders to improve precision after initial retrieval.")
183  
184  
185  def reranking_configuration():
186      """Show reranking configuration options."""
187      
188      print("\n" + "=" * 60)
189      print("RERANKING CONFIGURATION")
190      print("=" * 60)
191      
192      print("""
193      Configuration options for reranking in PraisonAI:
194      
195      ```python
196      agent = Agent(
197          name="My Agent",
198          knowledge=documents,
199          knowledge={
200              # Enable reranking
201              "rerank": True,
202              
203              # Number of candidates for initial retrieval
204              "top_k": 10,
205              
206              # Number of results after reranking
207              "rerank_top_k": 3,
208              
209              # Reranker model (optional)
210              "rerank_model": "cross-encoder/ms-marco-MiniLM-L-6-v2"
211          }
212      )
213      ```
214      
215      Performance Considerations:
216      ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”
217      │ Configuration   │ Latency      │ Quality     │
218      ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤
219      │ No reranking    │ ~50ms        │ Good        │
220      │ Light reranker  │ ~100ms       │ Better      │
221      │ Heavy reranker  │ ~200ms       │ Best        │
222      ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”“ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”“ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜
223      
224      Tip: Use reranking for precision-critical applications
225      where quality matters more than latency.
226      """)
227  
228  
229  def main():
230      """Run all reranking examples."""
231      print("\nšŸš€ PraisonAI Reranking Examples\n")
232      
233      # Example 1: Explain reranking
234      explain_reranking()
235      
236      # Example 2: RAG with reranking
237      rag_with_reranking()
238      
239      # Example 3: Compare with/without reranking
240      compare_with_without_reranking()
241      
242      # Example 4: Configuration options
243      reranking_configuration()
244      
245      print("\nāœ… Reranking examples completed!")
246  
247  
248  if __name__ == "__main__":
249      main()