/ examples / rag / structured_output.py
structured_output.py
  1  """
  2  Structured Output RAG: Guided and Constrained Generation
  3  
  4  This example demonstrates how to guide RAG output into specific formats
  5  and structures using Pydantic models and output constraints.
  6  
  7  RAG Concept: Sometimes you need RAG answers in a specific format - JSON,
  8  tables, or structured data. Guided generation ensures the LLM output
  9  conforms to your schema.
 10  """
 11  
 12  from typing import List
 13  from pydantic import BaseModel, Field
 14  from praisonaiagents import Agent
 15  
 16  # Sample knowledge base: Product catalog
 17  PRODUCT_CATALOG = [
 18      {
 19          "id": "prod_001",
 20          "content": """
 21          Product: UltraWidget Pro
 22          Category: Electronics
 23          Price: $299.99
 24          Rating: 4.5/5 (1,250 reviews)
 25          Features: Wireless connectivity, 12-hour battery, water-resistant
 26          Availability: In stock
 27          SKU: UW-PRO-2024
 28          """
 29      },
 30      {
 31          "id": "prod_002",
 32          "content": """
 33          Product: SmartHome Hub
 34          Category: Home Automation
 35          Price: $149.99
 36          Rating: 4.2/5 (890 reviews)
 37          Features: Voice control, 100+ device compatibility, energy monitoring
 38          Availability: In stock
 39          SKU: SH-HUB-2024
 40          """
 41      },
 42      {
 43          "id": "prod_003",
 44          "content": """
 45          Product: ErgoDesk Standing Desk
 46          Category: Furniture
 47          Price: $599.99
 48          Rating: 4.8/5 (2,100 reviews)
 49          Features: Electric height adjustment, memory presets, cable management
 50          Availability: Ships in 3-5 days
 51          SKU: ED-STAND-2024
 52          """
 53      },
 54      {
 55          "id": "prod_004",
 56          "content": """
 57          Product: CloudSync Backup Drive
 58          Category: Storage
 59          Price: $199.99
 60          Rating: 4.6/5 (1,800 reviews)
 61          Features: 4TB capacity, automatic backup, encrypted storage
 62          Availability: In stock
 63          SKU: CS-4TB-2024
 64          """
 65      }
 66  ]
 67  
 68  
 69  # Pydantic models for structured output
 70  class ProductInfo(BaseModel):
 71      """Structured product information."""
 72      name: str = Field(description="Product name")
 73      price: float = Field(description="Price in USD")
 74      rating: float = Field(description="Rating out of 5")
 75      in_stock: bool = Field(description="Whether product is in stock")
 76      key_features: List[str] = Field(description="Top 3 features")
 77  
 78  
 79  class ProductComparison(BaseModel):
 80      """Structured comparison of products."""
 81      products: List[str] = Field(description="Names of compared products")
 82      winner: str = Field(description="Recommended product")
 83      reason: str = Field(description="Why this product is recommended")
 84      price_range: str = Field(description="Price range of compared products")
 85  
 86  
 87  class SearchResult(BaseModel):
 88      """Structured search result."""
 89      query: str = Field(description="Original search query")
 90      matches: List[str] = Field(description="Matching product names")
 91      total_found: int = Field(description="Number of matches")
 92      summary: str = Field(description="Brief summary of results")
 93  
 94  
 95  def json_output_rag():
 96      """Demonstrate RAG with JSON-structured output."""
 97      
 98      print("=" * 60)
 99      print("JSON-STRUCTURED RAG OUTPUT")
100      print("=" * 60)
101      
102      # Build context
103      context = "\n\n".join([f"[{p['id']}]\n{p['content']}" for p in PRODUCT_CATALOG])
104      
105      agent = Agent(
106          name="Product Assistant",
107          instructions=f"""You are a product assistant that provides structured information.
108          Always respond with valid JSON matching the requested format.
109          Extract accurate information from the product catalog.
110          
111          PRODUCT CATALOG:
112          {context}""",
113          output="silent"
114      )
115      
116      # Request structured product info
117      query = """
118      Tell me about the UltraWidget Pro. Respond in this JSON format:
119      {
120          "name": "product name",
121          "price": 0.00,
122          "rating": 0.0,
123          "in_stock": true/false,
124          "key_features": ["feature1", "feature2", "feature3"]
125      }
126      """
127      
128      print("\nšŸ“ Query: Get structured product info")
129      response = agent.chat(query)
130      print("šŸ’” Structured Response:\n" + str(response))
131      print("-" * 40)
132  
133  
134  def pydantic_guided_rag():
135      """Demonstrate RAG with Pydantic model guidance."""
136      
137      print("\n" + "=" * 60)
138      print("PYDANTIC-GUIDED RAG")
139      print("=" * 60)
140      
141      # Generate schema from Pydantic model
142      schema = ProductComparison.model_json_schema()
143      
144      # Build context
145      context = "\n\n".join([f"[{p['id']}]\n{p['content']}" for p in PRODUCT_CATALOG])
146      
147      agent = Agent(
148          name="Product Comparator",
149          instructions=f"""You compare products and provide structured recommendations.
150          Your response MUST be valid JSON matching this schema:
151          {schema}
152          
153          Be objective and base recommendations on the product data.
154          
155          PRODUCT CATALOG:
156          {context}""",
157          output="silent"
158      )
159      
160      query = "Compare the UltraWidget Pro and SmartHome Hub. Which should I buy?"
161      
162      print(f"\nšŸ“ Query: {query}")
163      print("šŸ“‹ Expected Schema: ProductComparison")
164      
165      response = agent.chat(query)
166      print(f"\nšŸ’” Structured Comparison:\n{response}")
167  
168  
169  def table_format_rag():
170      """Demonstrate RAG with table-formatted output."""
171      
172      print("\n" + "=" * 60)
173      print("TABLE-FORMATTED RAG")
174      print("=" * 60)
175      
176      # Build context
177      context = "\n\n".join([f"[{p['id']}]\n{p['content']}" for p in PRODUCT_CATALOG])
178      
179      agent = Agent(
180          name="Catalog Browser",
181          instructions=f"""You present product information in clean table format.
182          Use markdown tables for structured data.
183          Include relevant columns based on the query.
184          
185          PRODUCT CATALOG:
186          {context}""",
187          output="silent"
188      )
189      
190      query = "Show me all products in a table with name, price, and rating."
191      
192      print(f"\nšŸ“ Query: {query}")
193      response = agent.chat(query)
194      print(f"\nšŸ’” Table Output:\n{response}")
195  
196  
197  def list_format_rag():
198      """Demonstrate RAG with list-formatted output."""
199      
200      print("\n" + "=" * 60)
201      print("LIST-FORMATTED RAG")
202      print("=" * 60)
203      
204      # Build context
205      context = "\n\n".join([f"[{p['id']}]\n{p['content']}" for p in PRODUCT_CATALOG])
206      
207      agent = Agent(
208          name="Feature Lister",
209          instructions=f"""You extract and present information as organized lists.
210          Use bullet points and numbered lists appropriately.
211          Group related items together.
212          
213          PRODUCT CATALOG:
214          {context}""",
215          output="silent"
216      )
217      
218      queries = [
219          "List all product features across the catalog",
220          "What are the top-rated products? List them in order."
221      ]
222      
223      for query in queries:
224          print(f"\nšŸ“ Query: {query}")
225          response = agent.chat(query)
226          print(f"šŸ’” List Output:\n{response[:400]}..." if len(str(response)) > 400 else f"šŸ’” List Output:\n{response}")
227          print("-" * 40)
228  
229  
230  def constrained_generation():
231      """Demonstrate constrained/guided generation patterns."""
232      
233      print("\n" + "=" * 60)
234      print("CONSTRAINED GENERATION PATTERNS")
235      print("=" * 60)
236      
237      print("""
238      šŸŽÆ Constrained Generation Techniques:
239      
240      1. **Schema Enforcement**
241         - Provide JSON schema in instructions
242         - Agent outputs valid JSON matching schema
243         ```python
244         instructions = f"Respond with JSON matching: {schema}"
245         ```
246      
247      2. **Format Templates**
248         - Give explicit output templates
249         - Agent fills in the blanks
250         ```python
251         instructions = '''
252         Respond in this format:
253         PRODUCT: [name]
254         PRICE: $[amount]
255         VERDICT: [recommendation]
256         '''
257         ```
258      
259      3. **Pydantic Validation**
260         - Parse agent output with Pydantic
261         - Retry if validation fails
262         ```python
263         response = agent.chat(query)
264         try:
265             result = ProductInfo.model_validate_json(response)
266         except ValidationError:
267             # Retry with clarification
268         ```
269      
270      4. **Output Parsers**
271         - Post-process agent output
272         - Extract structured data from text
273         ```python
274         def parse_product(text: str) -> dict:
275             # Extract price, rating, etc.
276             return structured_data
277         ```
278      
279      5. **Few-Shot Examples**
280         - Show examples of desired output format
281         - Agent learns pattern from examples
282         ```python
283         instructions = '''
284         Example:
285         Q: Tell me about Product X
286         A: {"name": "Product X", "price": 99.99}
287         
288         Now answer the user's question in the same format.
289         '''
290         ```
291      """)
292  
293  
294  def multi_format_rag():
295      """Demonstrate switching between output formats."""
296      
297      print("\n" + "=" * 60)
298      print("MULTI-FORMAT RAG")
299      print("=" * 60)
300      
301      base_knowledge = PRODUCT_CATALOG
302      query = "Tell me about the ErgoDesk Standing Desk"
303      
304      formats = [
305          ("JSON", "Respond with a JSON object containing name, price, and features."),
306          ("Markdown", "Respond with a markdown-formatted product card with headers."),
307          ("Plain Text", "Respond with a brief, conversational product description."),
308          ("Bullet Points", "Respond with bullet points covering key product details.")
309      ]
310      
311      print(f"\nšŸ“ Base Query: {query}")
312      print("\nšŸ”„ Same query, different output formats:\n")
313      
314      # Build context
315      context = "\n\n".join([f"[{p['id']}]\n{p['content']}" for p in base_knowledge])
316      
317      for format_name, format_instruction in formats:
318          agent = Agent(
319              name=f"{format_name} Agent",
320              instructions=f"""You are a product expert. {format_instruction}
321              
322              PRODUCT CATALOG:
323              {context}""",
324              output="silent"
325          )
326          
327          response = agent.chat(query)
328          print(f"šŸ“‹ {format_name} Format:")
329          print(f"{response[:250]}..." if len(str(response)) > 250 else response)
330          print("-" * 40)
331  
332  
333  def main():
334      """Run all structured output RAG examples."""
335      print("\nšŸš€ PraisonAI Structured Output RAG Examples\n")
336      
337      # Example 1: JSON output
338      json_output_rag()
339      
340      # Example 2: Pydantic-guided
341      pydantic_guided_rag()
342      
343      # Example 3: Table format
344      table_format_rag()
345      
346      # Example 4: List format
347      list_format_rag()
348      
349      # Example 5: Constrained generation patterns
350      constrained_generation()
351      
352      # Example 6: Multi-format
353      multi_format_rag()
354      
355      print("\nāœ… Structured output RAG examples completed!")
356  
357  
358  if __name__ == "__main__":
359      main()