citations.py
1 """ 2 RAG with Citations: Source Attribution and Verification 3 4 This example demonstrates how to include citations and source references 5 in RAG responses for transparency and verification. 6 7 RAG Concept: Trust in AI-generated answers requires verification. Citations 8 link answers back to source documents, enabling fact-checking and building 9 user confidence. 10 """ 11 12 from praisonaiagents import Agent 13 14 # Sample knowledge base: Legal documents with clear sources 15 LEGAL_DOCUMENTS = [ 16 { 17 "id": "policy_001", 18 "source": "Employee Handbook v2024", 19 "section": "Section 3.2 - Leave Policies", 20 "content": """ 21 Annual Leave Entitlement: 22 - Full-time employees: 20 days per year 23 - Part-time employees: Pro-rated based on hours 24 - Unused leave: Up to 5 days may carry over to next year 25 - Leave requests: Submit at least 2 weeks in advance 26 - Approval: Manager approval required for leave > 3 consecutive days 27 """ 28 }, 29 { 30 "id": "policy_002", 31 "source": "Employee Handbook v2024", 32 "section": "Section 4.1 - Remote Work", 33 "content": """ 34 Remote Work Policy: 35 - Eligibility: Employees in approved roles after 6 months tenure 36 - Maximum: 3 days per week remote work allowed 37 - Requirements: Reliable internet, dedicated workspace 38 - Core hours: Must be available 10am-3pm local time 39 - Equipment: Company provides laptop; employee provides internet 40 """ 41 }, 42 { 43 "id": "policy_003", 44 "source": "Benefits Guide 2024", 45 "section": "Chapter 2 - Health Insurance", 46 "content": """ 47 Health Insurance Coverage: 48 - Provider: BlueCross BlueShield 49 - Coverage: Employee + dependents eligible 50 - Premium: Company pays 80%, employee pays 20% 51 - Deductible: $500 individual, $1000 family 52 - Enrollment: During annual open enrollment or qualifying life event 53 """ 54 }, 55 { 56 "id": "policy_004", 57 "source": "Benefits Guide 2024", 58 "section": "Chapter 5 - Retirement", 59 "content": """ 60 401(k) Retirement Plan: 61 - Eligibility: After 90 days of employment 62 - Company match: 100% up to 4% of salary 63 - Vesting: Immediate vesting for employee contributions 64 - Company match vesting: 3-year graded schedule 65 - Investment options: 15 fund choices available 66 """ 67 }, 68 { 69 "id": "policy_005", 70 "source": "Code of Conduct 2024", 71 "section": "Article 7 - Conflicts of Interest", 72 "content": """ 73 Conflict of Interest Policy: 74 - Disclosure: All potential conflicts must be disclosed to HR 75 - Outside employment: Requires written approval 76 - Financial interests: Cannot hold >5% in competitors 77 - Gifts: Cannot accept gifts valued over $50 78 - Relationships: Must disclose family relationships with vendors 79 """ 80 } 81 ] 82 83 84 def basic_citations(): 85 """Demonstrate basic citation inclusion in RAG responses.""" 86 87 print("=" * 60) 88 print("BASIC CITATIONS IN RAG") 89 print("=" * 60) 90 91 # Build context 92 context = "\n\n".join([f"[{d['source']}, {d['section']}]\n{d['content']}" for d in LEGAL_DOCUMENTS]) 93 94 agent = Agent( 95 name="HR Policy Expert", 96 instructions=f"""You are an HR policy expert who answers employee questions. 97 98 IMPORTANT: Always cite your sources using this format: 99 [Source: Document Name, Section] 100 101 Include the citation immediately after the relevant information. 102 If information comes from multiple sources, cite each one. 103 104 POLICY DOCUMENTS: 105 {context}""", 106 output="silent" 107 ) 108 109 queries = [ 110 "How many vacation days do I get?", 111 "Can I work from home?", 112 "What's the company match for 401k?" 113 ] 114 115 for query in queries: 116 print(f"\n๐ Query: {query}") 117 response = agent.chat(query) 118 print(f"๐ก Answer with Citations:\n{response}") 119 print("-" * 40) 120 121 122 def structured_citations(): 123 """Demonstrate structured citation format.""" 124 125 print("\n" + "=" * 60) 126 print("STRUCTURED CITATIONS") 127 print("=" * 60) 128 129 # Build context 130 context = "\n\n".join([f"[{d['source']}, {d['section']}]\n{d['content']}" for d in LEGAL_DOCUMENTS]) 131 132 agent = Agent( 133 name="Policy Researcher", 134 instructions=f"""You provide policy information with structured citations. 135 136 Format your response as: 137 138 ANSWER: 139 [Your answer here] 140 141 SOURCES: 142 1. [Document] - [Section] - [Relevant quote] 143 2. [Document] - [Section] - [Relevant quote] 144 145 Always include at least one source citation. 146 147 POLICY DOCUMENTS: 148 {context}""", 149 output="silent" 150 ) 151 152 query = "What are the requirements for remote work eligibility?" 153 154 print(f"\n๐ Query: {query}") 155 response = agent.chat(query) 156 print(f"\n๐ก Structured Response:\n{response}") 157 158 159 def inline_citations(): 160 """Demonstrate inline citation style.""" 161 162 print("\n" + "=" * 60) 163 print("INLINE CITATIONS (Academic Style)") 164 print("=" * 60) 165 166 # Build context 167 context = "\n\n".join([f"[{d['source']}, {d['section']}]\n{d['content']}" for d in LEGAL_DOCUMENTS]) 168 169 agent = Agent( 170 name="Academic Researcher", 171 instructions=f"""You write responses with inline citations like academic papers. 172 173 Use numbered citations [1], [2], etc. in the text. 174 List full references at the end. 175 176 Example: 177 "Employees receive 20 days of leave [1]. Remote work is allowed 178 for up to 3 days per week [2]." 179 180 References: 181 [1] Employee Handbook v2024, Section 3.2 182 [2] Employee Handbook v2024, Section 4.1 183 184 POLICY DOCUMENTS: 185 {context}""", 186 output="silent" 187 ) 188 189 query = "Summarize the key employee benefits." 190 191 print(f"\n๐ Query: {query}") 192 response = agent.chat(query) 193 print(f"\n๐ก Academic-Style Response:\n{response}") 194 195 196 def citation_verification(): 197 """Demonstrate citation verification concept.""" 198 199 print("\n" + "=" * 60) 200 print("CITATION VERIFICATION") 201 print("=" * 60) 202 203 print(""" 204 ๐ Citation Verification Process: 205 206 1. **Extract Citations** 207 Parse the agent's response to find citation markers 208 209 2. **Locate Source Documents** 210 Match citations to documents in knowledge base 211 212 3. **Verify Claims** 213 Check if the cited text supports the claim 214 215 4. **Flag Discrepancies** 216 Highlight any mismatches or unsupported claims 217 218 Example Verification: 219 """) 220 221 # Simulated verification 222 claim = "Employees get 20 days of annual leave" 223 cited_source = "Employee Handbook v2024, Section 3.2" 224 source_text = "Full-time employees: 20 days per year" 225 226 print(f" Claim: \"{claim}\"") 227 print(f" Citation: {cited_source}") 228 print(f" Source Text: \"{source_text}\"") 229 print(" Verification: โ SUPPORTED") 230 231 print(""" 232 233 Implementation: 234 ```python 235 def verify_citation(claim: str, source_id: str, knowledge) -> bool: 236 # Get source document 237 source = knowledge.get(source_id) 238 239 # Check if claim is supported by source 240 # (In practice, use semantic similarity) 241 return claim_supported_by_source(claim, source) 242 ``` 243 """) 244 245 246 def multi_source_synthesis(): 247 """Demonstrate synthesizing from multiple sources with citations.""" 248 249 print("\n" + "=" * 60) 250 print("MULTI-SOURCE SYNTHESIS WITH CITATIONS") 251 print("=" * 60) 252 253 # Build context 254 context = "\n\n".join([f"[{d['source']}, {d['section']}]\n{d['content']}" for d in LEGAL_DOCUMENTS]) 255 256 agent = Agent( 257 name="Benefits Advisor", 258 instructions=f"""You synthesize information from multiple policy documents. 259 260 When answering: 261 1. Gather relevant information from all sources 262 2. Synthesize into a coherent answer 263 3. Cite each source for the specific information it provides 264 4. Note if sources have different or complementary information 265 266 Format: Include [Source: X] after each piece of information. 267 268 POLICY DOCUMENTS: 269 {context}""", 270 output="silent" 271 ) 272 273 query = "Give me a complete overview of employee benefits and policies." 274 275 print(f"\n๐ Query: {query}") 276 response = agent.chat(query) 277 print(f"\n๐ก Multi-Source Synthesis:\n{response}") 278 279 280 def citation_best_practices(): 281 """Share best practices for citations in RAG.""" 282 283 print("\n" + "=" * 60) 284 print("CITATION BEST PRACTICES") 285 print("=" * 60) 286 287 print(""" 288 ๐ Best Practices for RAG Citations: 289 290 1. **Include Metadata in Knowledge Base** 291 ```python 292 documents = [ 293 { 294 "id": "doc_001", 295 "source": "Policy Manual", 296 "section": "Chapter 3", 297 "date": "2024-01-15", 298 "author": "HR Department", 299 "content": "..." 300 } 301 ] 302 ``` 303 304 2. **Instruct Agent to Cite** 305 - Be explicit in instructions about citation format 306 - Provide examples of expected citation style 307 - Require citations for factual claims 308 309 3. **Use Consistent Citation Format** 310 - Inline: [1], [2] with reference list 311 - Parenthetical: (Source, Section) 312 - Footnote style: ยน, ยฒ with notes 313 314 4. **Enable Verification** 315 - Include document IDs for programmatic lookup 316 - Store enough metadata to locate original 317 - Consider including page numbers or paragraphs 318 319 5. **Handle Missing Sources** 320 - Instruct agent to acknowledge when info isn't in sources 321 - Distinguish between sourced and general knowledge 322 - Use phrases like "Based on the provided documents..." 323 324 6. **Citation Granularity** 325 - Document level: Good for general attribution 326 - Section level: Better for verification 327 - Quote level: Best for accuracy-critical applications 328 329 Example Agent Instructions: 330 ```python 331 instructions = ''' 332 Answer questions using the provided documents. 333 334 Rules: 335 - Cite sources for all factual claims 336 - Use format: [Source: Document Name, Section] 337 - If information isn't in the documents, say so 338 - Never make claims without citation 339 ''' 340 ``` 341 """) 342 343 344 def main(): 345 """Run all citation examples.""" 346 print("\n๐ PraisonAI RAG Citations Examples\n") 347 348 # Example 1: Basic citations 349 basic_citations() 350 351 # Example 2: Structured citations 352 structured_citations() 353 354 # Example 3: Inline citations 355 inline_citations() 356 357 # Example 4: Citation verification 358 citation_verification() 359 360 # Example 5: Multi-source synthesis 361 multi_source_synthesis() 362 363 # Example 6: Best practices 364 citation_best_practices() 365 366 print("\nโ Citation examples completed!") 367 368 369 if __name__ == "__main__": 370 main()