structured_output.py
1 """ 2 Structured Output RAG: Guided and Constrained Generation 3 4 This example demonstrates how to guide RAG output into specific formats 5 and structures using Pydantic models and output constraints. 6 7 RAG Concept: Sometimes you need RAG answers in a specific format - JSON, 8 tables, or structured data. Guided generation ensures the LLM output 9 conforms to your schema. 10 """ 11 12 from typing import List 13 from pydantic import BaseModel, Field 14 from praisonaiagents import Agent 15 16 # Sample knowledge base: Product catalog 17 PRODUCT_CATALOG = [ 18 { 19 "id": "prod_001", 20 "content": """ 21 Product: UltraWidget Pro 22 Category: Electronics 23 Price: $299.99 24 Rating: 4.5/5 (1,250 reviews) 25 Features: Wireless connectivity, 12-hour battery, water-resistant 26 Availability: In stock 27 SKU: UW-PRO-2024 28 """ 29 }, 30 { 31 "id": "prod_002", 32 "content": """ 33 Product: SmartHome Hub 34 Category: Home Automation 35 Price: $149.99 36 Rating: 4.2/5 (890 reviews) 37 Features: Voice control, 100+ device compatibility, energy monitoring 38 Availability: In stock 39 SKU: SH-HUB-2024 40 """ 41 }, 42 { 43 "id": "prod_003", 44 "content": """ 45 Product: ErgoDesk Standing Desk 46 Category: Furniture 47 Price: $599.99 48 Rating: 4.8/5 (2,100 reviews) 49 Features: Electric height adjustment, memory presets, cable management 50 Availability: Ships in 3-5 days 51 SKU: ED-STAND-2024 52 """ 53 }, 54 { 55 "id": "prod_004", 56 "content": """ 57 Product: CloudSync Backup Drive 58 Category: Storage 59 Price: $199.99 60 Rating: 4.6/5 (1,800 reviews) 61 Features: 4TB capacity, automatic backup, encrypted storage 62 Availability: In stock 63 SKU: CS-4TB-2024 64 """ 65 } 66 ] 67 68 69 # Pydantic models for structured output 70 class ProductInfo(BaseModel): 71 """Structured product information.""" 72 name: str = Field(description="Product name") 73 price: float = Field(description="Price in USD") 74 rating: float = Field(description="Rating out of 5") 75 in_stock: bool = Field(description="Whether product is in stock") 76 key_features: List[str] = Field(description="Top 3 features") 77 78 79 class ProductComparison(BaseModel): 80 """Structured comparison of products.""" 81 products: List[str] = Field(description="Names of compared products") 82 winner: str = Field(description="Recommended product") 83 reason: str = Field(description="Why this product is recommended") 84 price_range: str = Field(description="Price range of compared products") 85 86 87 class SearchResult(BaseModel): 88 """Structured search result.""" 89 query: str = Field(description="Original search query") 90 matches: List[str] = Field(description="Matching product names") 91 total_found: int = Field(description="Number of matches") 92 summary: str = Field(description="Brief summary of results") 93 94 95 def json_output_rag(): 96 """Demonstrate RAG with JSON-structured output.""" 97 98 print("=" * 60) 99 print("JSON-STRUCTURED RAG OUTPUT") 100 print("=" * 60) 101 102 # Build context 103 context = "\n\n".join([f"[{p['id']}]\n{p['content']}" for p in PRODUCT_CATALOG]) 104 105 agent = Agent( 106 name="Product Assistant", 107 instructions=f"""You are a product assistant that provides structured information. 108 Always respond with valid JSON matching the requested format. 109 Extract accurate information from the product catalog. 110 111 PRODUCT CATALOG: 112 {context}""", 113 output="silent" 114 ) 115 116 # Request structured product info 117 query = """ 118 Tell me about the UltraWidget Pro. Respond in this JSON format: 119 { 120 "name": "product name", 121 "price": 0.00, 122 "rating": 0.0, 123 "in_stock": true/false, 124 "key_features": ["feature1", "feature2", "feature3"] 125 } 126 """ 127 128 print("\nš Query: Get structured product info") 129 response = agent.chat(query) 130 print("š” Structured Response:\n" + str(response)) 131 print("-" * 40) 132 133 134 def pydantic_guided_rag(): 135 """Demonstrate RAG with Pydantic model guidance.""" 136 137 print("\n" + "=" * 60) 138 print("PYDANTIC-GUIDED RAG") 139 print("=" * 60) 140 141 # Generate schema from Pydantic model 142 schema = ProductComparison.model_json_schema() 143 144 # Build context 145 context = "\n\n".join([f"[{p['id']}]\n{p['content']}" for p in PRODUCT_CATALOG]) 146 147 agent = Agent( 148 name="Product Comparator", 149 instructions=f"""You compare products and provide structured recommendations. 150 Your response MUST be valid JSON matching this schema: 151 {schema} 152 153 Be objective and base recommendations on the product data. 154 155 PRODUCT CATALOG: 156 {context}""", 157 output="silent" 158 ) 159 160 query = "Compare the UltraWidget Pro and SmartHome Hub. Which should I buy?" 161 162 print(f"\nš Query: {query}") 163 print("š Expected Schema: ProductComparison") 164 165 response = agent.chat(query) 166 print(f"\nš” Structured Comparison:\n{response}") 167 168 169 def table_format_rag(): 170 """Demonstrate RAG with table-formatted output.""" 171 172 print("\n" + "=" * 60) 173 print("TABLE-FORMATTED RAG") 174 print("=" * 60) 175 176 # Build context 177 context = "\n\n".join([f"[{p['id']}]\n{p['content']}" for p in PRODUCT_CATALOG]) 178 179 agent = Agent( 180 name="Catalog Browser", 181 instructions=f"""You present product information in clean table format. 182 Use markdown tables for structured data. 183 Include relevant columns based on the query. 184 185 PRODUCT CATALOG: 186 {context}""", 187 output="silent" 188 ) 189 190 query = "Show me all products in a table with name, price, and rating." 191 192 print(f"\nš Query: {query}") 193 response = agent.chat(query) 194 print(f"\nš” Table Output:\n{response}") 195 196 197 def list_format_rag(): 198 """Demonstrate RAG with list-formatted output.""" 199 200 print("\n" + "=" * 60) 201 print("LIST-FORMATTED RAG") 202 print("=" * 60) 203 204 # Build context 205 context = "\n\n".join([f"[{p['id']}]\n{p['content']}" for p in PRODUCT_CATALOG]) 206 207 agent = Agent( 208 name="Feature Lister", 209 instructions=f"""You extract and present information as organized lists. 210 Use bullet points and numbered lists appropriately. 211 Group related items together. 212 213 PRODUCT CATALOG: 214 {context}""", 215 output="silent" 216 ) 217 218 queries = [ 219 "List all product features across the catalog", 220 "What are the top-rated products? List them in order." 221 ] 222 223 for query in queries: 224 print(f"\nš Query: {query}") 225 response = agent.chat(query) 226 print(f"š” List Output:\n{response[:400]}..." if len(str(response)) > 400 else f"š” List Output:\n{response}") 227 print("-" * 40) 228 229 230 def constrained_generation(): 231 """Demonstrate constrained/guided generation patterns.""" 232 233 print("\n" + "=" * 60) 234 print("CONSTRAINED GENERATION PATTERNS") 235 print("=" * 60) 236 237 print(""" 238 šÆ Constrained Generation Techniques: 239 240 1. **Schema Enforcement** 241 - Provide JSON schema in instructions 242 - Agent outputs valid JSON matching schema 243 ```python 244 instructions = f"Respond with JSON matching: {schema}" 245 ``` 246 247 2. **Format Templates** 248 - Give explicit output templates 249 - Agent fills in the blanks 250 ```python 251 instructions = ''' 252 Respond in this format: 253 PRODUCT: [name] 254 PRICE: $[amount] 255 VERDICT: [recommendation] 256 ''' 257 ``` 258 259 3. **Pydantic Validation** 260 - Parse agent output with Pydantic 261 - Retry if validation fails 262 ```python 263 response = agent.chat(query) 264 try: 265 result = ProductInfo.model_validate_json(response) 266 except ValidationError: 267 # Retry with clarification 268 ``` 269 270 4. **Output Parsers** 271 - Post-process agent output 272 - Extract structured data from text 273 ```python 274 def parse_product(text: str) -> dict: 275 # Extract price, rating, etc. 276 return structured_data 277 ``` 278 279 5. **Few-Shot Examples** 280 - Show examples of desired output format 281 - Agent learns pattern from examples 282 ```python 283 instructions = ''' 284 Example: 285 Q: Tell me about Product X 286 A: {"name": "Product X", "price": 99.99} 287 288 Now answer the user's question in the same format. 289 ''' 290 ``` 291 """) 292 293 294 def multi_format_rag(): 295 """Demonstrate switching between output formats.""" 296 297 print("\n" + "=" * 60) 298 print("MULTI-FORMAT RAG") 299 print("=" * 60) 300 301 base_knowledge = PRODUCT_CATALOG 302 query = "Tell me about the ErgoDesk Standing Desk" 303 304 formats = [ 305 ("JSON", "Respond with a JSON object containing name, price, and features."), 306 ("Markdown", "Respond with a markdown-formatted product card with headers."), 307 ("Plain Text", "Respond with a brief, conversational product description."), 308 ("Bullet Points", "Respond with bullet points covering key product details.") 309 ] 310 311 print(f"\nš Base Query: {query}") 312 print("\nš Same query, different output formats:\n") 313 314 # Build context 315 context = "\n\n".join([f"[{p['id']}]\n{p['content']}" for p in base_knowledge]) 316 317 for format_name, format_instruction in formats: 318 agent = Agent( 319 name=f"{format_name} Agent", 320 instructions=f"""You are a product expert. {format_instruction} 321 322 PRODUCT CATALOG: 323 {context}""", 324 output="silent" 325 ) 326 327 response = agent.chat(query) 328 print(f"š {format_name} Format:") 329 print(f"{response[:250]}..." if len(str(response)) > 250 else response) 330 print("-" * 40) 331 332 333 def main(): 334 """Run all structured output RAG examples.""" 335 print("\nš PraisonAI Structured Output RAG Examples\n") 336 337 # Example 1: JSON output 338 json_output_rag() 339 340 # Example 2: Pydantic-guided 341 pydantic_guided_rag() 342 343 # Example 3: Table format 344 table_format_rag() 345 346 # Example 4: List format 347 list_format_rag() 348 349 # Example 5: Constrained generation patterns 350 constrained_generation() 351 352 # Example 6: Multi-format 353 multi_format_rag() 354 355 print("\nā Structured output RAG examples completed!") 356 357 358 if __name__ == "__main__": 359 main()