/ examples / rag / citations.py
citations.py
  1  """
  2  RAG with Citations: Source Attribution and Verification
  3  
  4  This example demonstrates how to include citations and source references
  5  in RAG responses for transparency and verification.
  6  
  7  RAG Concept: Trust in AI-generated answers requires verification. Citations
  8  link answers back to source documents, enabling fact-checking and building
  9  user confidence.
 10  """
 11  
 12  from praisonaiagents import Agent
 13  
 14  # Sample knowledge base: Legal documents with clear sources
 15  LEGAL_DOCUMENTS = [
 16      {
 17          "id": "policy_001",
 18          "source": "Employee Handbook v2024",
 19          "section": "Section 3.2 - Leave Policies",
 20          "content": """
 21          Annual Leave Entitlement:
 22          - Full-time employees: 20 days per year
 23          - Part-time employees: Pro-rated based on hours
 24          - Unused leave: Up to 5 days may carry over to next year
 25          - Leave requests: Submit at least 2 weeks in advance
 26          - Approval: Manager approval required for leave > 3 consecutive days
 27          """
 28      },
 29      {
 30          "id": "policy_002",
 31          "source": "Employee Handbook v2024",
 32          "section": "Section 4.1 - Remote Work",
 33          "content": """
 34          Remote Work Policy:
 35          - Eligibility: Employees in approved roles after 6 months tenure
 36          - Maximum: 3 days per week remote work allowed
 37          - Requirements: Reliable internet, dedicated workspace
 38          - Core hours: Must be available 10am-3pm local time
 39          - Equipment: Company provides laptop; employee provides internet
 40          """
 41      },
 42      {
 43          "id": "policy_003",
 44          "source": "Benefits Guide 2024",
 45          "section": "Chapter 2 - Health Insurance",
 46          "content": """
 47          Health Insurance Coverage:
 48          - Provider: BlueCross BlueShield
 49          - Coverage: Employee + dependents eligible
 50          - Premium: Company pays 80%, employee pays 20%
 51          - Deductible: $500 individual, $1000 family
 52          - Enrollment: During annual open enrollment or qualifying life event
 53          """
 54      },
 55      {
 56          "id": "policy_004",
 57          "source": "Benefits Guide 2024",
 58          "section": "Chapter 5 - Retirement",
 59          "content": """
 60          401(k) Retirement Plan:
 61          - Eligibility: After 90 days of employment
 62          - Company match: 100% up to 4% of salary
 63          - Vesting: Immediate vesting for employee contributions
 64          - Company match vesting: 3-year graded schedule
 65          - Investment options: 15 fund choices available
 66          """
 67      },
 68      {
 69          "id": "policy_005",
 70          "source": "Code of Conduct 2024",
 71          "section": "Article 7 - Conflicts of Interest",
 72          "content": """
 73          Conflict of Interest Policy:
 74          - Disclosure: All potential conflicts must be disclosed to HR
 75          - Outside employment: Requires written approval
 76          - Financial interests: Cannot hold >5% in competitors
 77          - Gifts: Cannot accept gifts valued over $50
 78          - Relationships: Must disclose family relationships with vendors
 79          """
 80      }
 81  ]
 82  
 83  
 84  def basic_citations():
 85      """Demonstrate basic citation inclusion in RAG responses."""
 86      
 87      print("=" * 60)
 88      print("BASIC CITATIONS IN RAG")
 89      print("=" * 60)
 90      
 91      # Build context
 92      context = "\n\n".join([f"[{d['source']}, {d['section']}]\n{d['content']}" for d in LEGAL_DOCUMENTS])
 93      
 94      agent = Agent(
 95          name="HR Policy Expert",
 96          instructions=f"""You are an HR policy expert who answers employee questions.
 97          
 98          IMPORTANT: Always cite your sources using this format:
 99          [Source: Document Name, Section]
100          
101          Include the citation immediately after the relevant information.
102          If information comes from multiple sources, cite each one.
103          
104          POLICY DOCUMENTS:
105          {context}""",
106          output="silent"
107      )
108      
109      queries = [
110          "How many vacation days do I get?",
111          "Can I work from home?",
112          "What's the company match for 401k?"
113      ]
114      
115      for query in queries:
116          print(f"\n๐Ÿ“ Query: {query}")
117          response = agent.chat(query)
118          print(f"๐Ÿ’ก Answer with Citations:\n{response}")
119          print("-" * 40)
120  
121  
122  def structured_citations():
123      """Demonstrate structured citation format."""
124      
125      print("\n" + "=" * 60)
126      print("STRUCTURED CITATIONS")
127      print("=" * 60)
128      
129      # Build context
130      context = "\n\n".join([f"[{d['source']}, {d['section']}]\n{d['content']}" for d in LEGAL_DOCUMENTS])
131      
132      agent = Agent(
133          name="Policy Researcher",
134          instructions=f"""You provide policy information with structured citations.
135          
136          Format your response as:
137          
138          ANSWER:
139          [Your answer here]
140          
141          SOURCES:
142          1. [Document] - [Section] - [Relevant quote]
143          2. [Document] - [Section] - [Relevant quote]
144          
145          Always include at least one source citation.
146          
147          POLICY DOCUMENTS:
148          {context}""",
149          output="silent"
150      )
151      
152      query = "What are the requirements for remote work eligibility?"
153      
154      print(f"\n๐Ÿ“ Query: {query}")
155      response = agent.chat(query)
156      print(f"\n๐Ÿ’ก Structured Response:\n{response}")
157  
158  
159  def inline_citations():
160      """Demonstrate inline citation style."""
161      
162      print("\n" + "=" * 60)
163      print("INLINE CITATIONS (Academic Style)")
164      print("=" * 60)
165      
166      # Build context
167      context = "\n\n".join([f"[{d['source']}, {d['section']}]\n{d['content']}" for d in LEGAL_DOCUMENTS])
168      
169      agent = Agent(
170          name="Academic Researcher",
171          instructions=f"""You write responses with inline citations like academic papers.
172          
173          Use numbered citations [1], [2], etc. in the text.
174          List full references at the end.
175          
176          Example:
177          "Employees receive 20 days of leave [1]. Remote work is allowed
178          for up to 3 days per week [2]."
179          
180          References:
181          [1] Employee Handbook v2024, Section 3.2
182          [2] Employee Handbook v2024, Section 4.1
183          
184          POLICY DOCUMENTS:
185          {context}""",
186          output="silent"
187      )
188      
189      query = "Summarize the key employee benefits."
190      
191      print(f"\n๐Ÿ“ Query: {query}")
192      response = agent.chat(query)
193      print(f"\n๐Ÿ’ก Academic-Style Response:\n{response}")
194  
195  
196  def citation_verification():
197      """Demonstrate citation verification concept."""
198      
199      print("\n" + "=" * 60)
200      print("CITATION VERIFICATION")
201      print("=" * 60)
202      
203      print("""
204      ๐Ÿ” Citation Verification Process:
205      
206      1. **Extract Citations**
207         Parse the agent's response to find citation markers
208         
209      2. **Locate Source Documents**
210         Match citations to documents in knowledge base
211         
212      3. **Verify Claims**
213         Check if the cited text supports the claim
214         
215      4. **Flag Discrepancies**
216         Highlight any mismatches or unsupported claims
217      
218      Example Verification:
219      """)
220      
221      # Simulated verification
222      claim = "Employees get 20 days of annual leave"
223      cited_source = "Employee Handbook v2024, Section 3.2"
224      source_text = "Full-time employees: 20 days per year"
225      
226      print(f"   Claim: \"{claim}\"")
227      print(f"   Citation: {cited_source}")
228      print(f"   Source Text: \"{source_text}\"")
229      print("   Verification: โœ… SUPPORTED")
230      
231      print("""
232      
233      Implementation:
234      ```python
235      def verify_citation(claim: str, source_id: str, knowledge) -> bool:
236          # Get source document
237          source = knowledge.get(source_id)
238          
239          # Check if claim is supported by source
240          # (In practice, use semantic similarity)
241          return claim_supported_by_source(claim, source)
242      ```
243      """)
244  
245  
246  def multi_source_synthesis():
247      """Demonstrate synthesizing from multiple sources with citations."""
248      
249      print("\n" + "=" * 60)
250      print("MULTI-SOURCE SYNTHESIS WITH CITATIONS")
251      print("=" * 60)
252      
253      # Build context
254      context = "\n\n".join([f"[{d['source']}, {d['section']}]\n{d['content']}" for d in LEGAL_DOCUMENTS])
255      
256      agent = Agent(
257          name="Benefits Advisor",
258          instructions=f"""You synthesize information from multiple policy documents.
259          
260          When answering:
261          1. Gather relevant information from all sources
262          2. Synthesize into a coherent answer
263          3. Cite each source for the specific information it provides
264          4. Note if sources have different or complementary information
265          
266          Format: Include [Source: X] after each piece of information.
267          
268          POLICY DOCUMENTS:
269          {context}""",
270          output="silent"
271      )
272      
273      query = "Give me a complete overview of employee benefits and policies."
274      
275      print(f"\n๐Ÿ“ Query: {query}")
276      response = agent.chat(query)
277      print(f"\n๐Ÿ’ก Multi-Source Synthesis:\n{response}")
278  
279  
280  def citation_best_practices():
281      """Share best practices for citations in RAG."""
282      
283      print("\n" + "=" * 60)
284      print("CITATION BEST PRACTICES")
285      print("=" * 60)
286      
287      print("""
288      ๐Ÿ“š Best Practices for RAG Citations:
289      
290      1. **Include Metadata in Knowledge Base**
291         ```python
292         documents = [
293             {
294                 "id": "doc_001",
295                 "source": "Policy Manual",
296                 "section": "Chapter 3",
297                 "date": "2024-01-15",
298                 "author": "HR Department",
299                 "content": "..."
300             }
301         ]
302         ```
303      
304      2. **Instruct Agent to Cite**
305         - Be explicit in instructions about citation format
306         - Provide examples of expected citation style
307         - Require citations for factual claims
308      
309      3. **Use Consistent Citation Format**
310         - Inline: [1], [2] with reference list
311         - Parenthetical: (Source, Section)
312         - Footnote style: ยน, ยฒ with notes
313      
314      4. **Enable Verification**
315         - Include document IDs for programmatic lookup
316         - Store enough metadata to locate original
317         - Consider including page numbers or paragraphs
318      
319      5. **Handle Missing Sources**
320         - Instruct agent to acknowledge when info isn't in sources
321         - Distinguish between sourced and general knowledge
322         - Use phrases like "Based on the provided documents..."
323      
324      6. **Citation Granularity**
325         - Document level: Good for general attribution
326         - Section level: Better for verification
327         - Quote level: Best for accuracy-critical applications
328      
329      Example Agent Instructions:
330      ```python
331      instructions = '''
332      Answer questions using the provided documents.
333      
334      Rules:
335      - Cite sources for all factual claims
336      - Use format: [Source: Document Name, Section]
337      - If information isn't in the documents, say so
338      - Never make claims without citation
339      '''
340      ```
341      """)
342  
343  
344  def main():
345      """Run all citation examples."""
346      print("\n๐Ÿš€ PraisonAI RAG Citations Examples\n")
347      
348      # Example 1: Basic citations
349      basic_citations()
350      
351      # Example 2: Structured citations
352      structured_citations()
353      
354      # Example 3: Inline citations
355      inline_citations()
356      
357      # Example 4: Citation verification
358      citation_verification()
359      
360      # Example 5: Multi-source synthesis
361      multi_source_synthesis()
362      
363      # Example 6: Best practices
364      citation_best_practices()
365      
366      print("\nโœ… Citation examples completed!")
367  
368  
369  if __name__ == "__main__":
370      main()