multi_agent_example.py
1 """ 2 Multi-Agent PraisonAI Integration for Terminal-Bench 2.0 3 4 This example shows how to use PraisonAI's AgentTeam and AgentFlow 5 capabilities with Terminal-Bench 2.0 for complex multi-step tasks. 6 7 The multi-agent approach is useful for Terminal-Bench tasks that benefit from: 8 - Task planning and decomposition 9 - Specialized roles (planner, executor, verifier) 10 - Error correction and refinement 11 - Complex workflows with multiple phases 12 13 Usage: 14 harbor run -d terminal-bench/terminal-bench-2 \ 15 --agent-import-path examples.terminal_bench.multi_agent_example:MultiAgentPraisonAI \ 16 --model openai/gpt-4o 17 """ 18 19 import asyncio 20 from typing import Any, Dict, Optional 21 22 try: 23 from harbor.agents.base import BaseAgent 24 from harbor.environments.base import BaseEnvironment 25 from harbor.models.agent.context import AgentContext 26 except ImportError as e: 27 raise ImportError(f"Harbor framework required: {e}") from e 28 29 try: 30 from praisonaiagents import Agent, AgentTeam, Task 31 from praisonaiagents.approval import get_approval_registry, AutoApproveBackend 32 except ImportError as e: 33 raise ImportError(f"PraisonAI required: {e}") from e 34 35 36 class MultiAgentPraisonAI(BaseAgent): 37 """Multi-agent PraisonAI system for complex Terminal-Bench tasks.""" 38 39 @staticmethod 40 def name() -> str: 41 return "praisonai-multi" 42 43 def version(self) -> str | None: 44 try: 45 import praisonaiagents 46 return getattr(praisonaiagents, "__version__", None) 47 except ImportError: 48 return None 49 50 async def setup(self, environment: BaseEnvironment) -> None: 51 """Setup phase - no container setup needed for external agent.""" 52 pass 53 54 async def run( 55 self, 56 instruction: str, 57 environment: BaseEnvironment, 58 context: AgentContext, 59 ) -> None: 60 """ 61 Run multi-agent PraisonAI team on the Terminal-Bench task. 62 63 Uses a three-agent approach: 64 1. Planner: Analyzes task and creates execution plan 65 2. Executor: Implements the plan using shell commands 66 3. Verifier: Tests and validates the solution 67 """ 68 69 # Set auto-approval for container safety 70 registry = get_approval_registry() 71 original_backend = registry.get_backend() 72 registry.set_backend(AutoApproveBackend(), agent_name="multi-agent-planner") 73 74 try: 75 # Create bash tool that wraps Harbor environment 76 async def bash_tool(command: str) -> str: 77 """Execute bash command in Harbor environment.""" 78 if not command.strip(): 79 return "Error: Empty command" 80 81 try: 82 result = await environment.exec(command=command, timeout_sec=30) 83 output_parts = [] 84 if result.stdout: 85 output_parts.append(result.stdout.strip()) 86 if result.stderr: 87 output_parts.append(f"[stderr]: {result.stderr.strip()}") 88 if result.return_code != 0: 89 output_parts.append(f"[exit_code]: {result.return_code}") 90 return "\n".join(output_parts) if output_parts else "(no output)" 91 except Exception as e: 92 return f"Error: {str(e)}" 93 94 # Create specialized agents 95 planner = Agent( 96 name="planner", 97 instructions=( 98 "You are an expert task planner for terminal/coding tasks. " 99 "Analyze the given instruction and create a detailed, step-by-step plan. " 100 "Break down complex tasks into smaller, manageable steps. " 101 "Consider potential issues and edge cases. " 102 "Output your plan as a numbered list of specific actions." 103 ), 104 llm=self.model_name or "openai/gpt-4o", 105 ) 106 107 executor = Agent( 108 name="executor", 109 instructions=( 110 "You are an expert terminal executor. " 111 "Follow the provided plan step-by-step using bash commands. " 112 "Use the bash_tool to execute commands safely. " 113 "Be precise and check each step before proceeding. " 114 "If a step fails, try alternative approaches." 115 ), 116 tools=[bash_tool], 117 llm=self.model_name or "openai/gpt-4o", 118 ) 119 120 verifier = Agent( 121 name="verifier", 122 instructions=( 123 "You are a solution verifier and tester. " 124 "Test the completed solution to ensure it works correctly. " 125 "Run appropriate tests and checks using bash commands. " 126 "Report whether the solution meets the requirements. " 127 "If issues are found, suggest specific fixes." 128 ), 129 tools=[bash_tool], 130 llm=self.model_name or "openai/gpt-4o", 131 ) 132 133 print(f"🚀 Multi-Agent PraisonAI starting: {instruction[:100]}...") 134 135 # Phase 1: Planning 136 print("📋 Phase 1: Task Planning") 137 plan = await planner.astart(f"Create a detailed plan for: {instruction}") 138 print(f"Plan created: {len(plan.split('.')) if plan else 0} steps") 139 140 # Phase 2: Execution 141 print("⚡ Phase 2: Task Execution") 142 execution_prompt = f"Execute this plan step by step:\n\nOriginal task: {instruction}\n\nPlan:\n{plan}" 143 execution_result = await executor.astart(execution_prompt) 144 print("Execution completed") 145 146 # Phase 3: Verification 147 print("✅ Phase 3: Solution Verification") 148 verification_prompt = f"Verify this solution works correctly:\n\nOriginal task: {instruction}\n\nSolution: {execution_result}\n\nRun tests to confirm it works." 149 verification_result = await verifier.astart(verification_prompt) 150 print("Verification completed") 151 152 # Combine results 153 final_result = { 154 "plan": plan, 155 "execution": execution_result, 156 "verification": verification_result, 157 } 158 159 print("✅ Multi-Agent PraisonAI completed task") 160 161 # Populate context 162 self._populate_context([planner, executor, verifier], context, final_result) 163 164 except Exception as e: 165 print(f"❌ Multi-Agent PraisonAI failed: {str(e)}") 166 context.metadata = {"error": str(e)} 167 raise 168 finally: 169 # Restore original approval backend to avoid global state pollution 170 if original_backend: 171 registry.set_backend(original_backend) 172 else: 173 registry.remove_backend(agent_name="multi-agent-planner") 174 175 def _populate_context(self, agents: list, context: AgentContext, result: Dict[str, Any]) -> None: 176 """Populate Harbor context with multi-agent metrics.""" 177 try: 178 # Aggregate token usage from all agents 179 total_input_tokens = 0 180 total_output_tokens = 0 181 total_cost = 0.0 182 183 for agent in agents: 184 # Use agent's actual metrics properties 185 total_input_tokens += getattr(agent, '_total_tokens_in', 0) 186 total_output_tokens += getattr(agent, '_total_tokens_out', 0) 187 total_cost += agent.total_cost or 0.0 188 189 context.n_input_tokens = total_input_tokens if total_input_tokens > 0 else None 190 context.n_output_tokens = total_output_tokens if total_output_tokens > 0 else None 191 context.cost_usd = total_cost if total_cost > 0 else None 192 193 context.metadata = { 194 "framework": "praisonai-multi", 195 "agent_type": "multi-agent-team", 196 "agents": [agent.name for agent in agents], 197 "model": self.model_name or "openai/gpt-4o", 198 "phases": ["planning", "execution", "verification"], 199 "tools_used": ["bash_tool"], 200 "result_summary": str(result.get("verification", ""))[:200], 201 "version": self.version(), 202 } 203 204 except Exception as e: 205 context.metadata = {"context_error": str(e)} 206 207 208 # Alternative implementation using AgentTeam (more structured) 209 class AgentTeamPraisonAI(BaseAgent): 210 """AgentTeam-based implementation for Terminal-Bench tasks.""" 211 212 @staticmethod 213 def name() -> str: 214 return "praisonai-team" 215 216 def version(self) -> str | None: 217 try: 218 import praisonaiagents 219 return getattr(praisonaiagents, "__version__", None) 220 except ImportError: 221 return None 222 223 async def setup(self, environment: BaseEnvironment) -> None: 224 pass 225 226 async def run( 227 self, 228 instruction: str, 229 environment: BaseEnvironment, 230 context: AgentContext, 231 ) -> None: 232 """Run structured AgentTeam workflow.""" 233 234 registry = get_approval_registry() 235 original_backend = registry.get_backend() 236 registry.set_backend(AutoApproveBackend(), agent_name="agent-team") 237 238 try: 239 # Create bash tool 240 async def bash_tool(command: str) -> str: 241 result = await environment.exec(command=command, timeout_sec=30) 242 output_parts = [] 243 if result.stdout: 244 output_parts.append(result.stdout.strip()) 245 if result.stderr: 246 output_parts.append(f"[stderr]: {result.stderr.strip()}") 247 if result.return_code != 0: 248 output_parts.append(f"[exit_code]: {result.return_code}") 249 return "\n".join(output_parts) if output_parts else "(no output)" 250 251 # Create agents 252 planner = Agent( 253 name="planner", 254 instructions="Create detailed execution plans for terminal tasks", 255 llm=self.model_name or "openai/gpt-4o" 256 ) 257 258 executor = Agent( 259 name="executor", 260 instructions="Execute terminal commands based on plans", 261 tools=[bash_tool], 262 llm=self.model_name or "openai/gpt-4o" 263 ) 264 265 # Create tasks 266 plan_task = Task( 267 name="plan", 268 description=f"Create plan for: {instruction}", 269 agent=planner 270 ) 271 272 execute_task = Task( 273 name="execute", 274 description=f"Execute plan for: {instruction}", 275 agent=executor, 276 dependencies=[plan_task] # Execute after planning 277 ) 278 279 # Create and run team 280 team = AgentTeam( 281 agents=[planner, executor], 282 tasks=[plan_task, execute_task], 283 process="sequential" 284 ) 285 286 print(f"🚀 AgentTeam starting: {instruction[:100]}...") 287 result = await team.astart(instruction) 288 print("✅ AgentTeam completed") 289 290 # Populate context 291 context.metadata = { 292 "framework": "praisonai-team", 293 "workflow": "sequential", 294 "agents_count": len(team.agents), 295 "tasks_count": len(team.tasks), 296 "result": str(result)[:200] if result else None, 297 } 298 299 finally: 300 # Restore original approval backend to avoid global state pollution 301 if original_backend: 302 registry.set_backend(original_backend) 303 else: 304 registry.remove_backend(agent_name="agent-team") 305 306 307 if __name__ == "__main__": 308 print("Multi-Agent PraisonAI for Terminal-Bench 2.0") 309 print("=" * 50) 310 print() 311 print("Available agent implementations:") 312 print("1. MultiAgentPraisonAI - Custom multi-agent workflow") 313 print("2. AgentTeamPraisonAI - Structured AgentTeam workflow") 314 print() 315 print("Usage examples:") 316 print() 317 print("# Multi-agent custom workflow") 318 print("harbor run -d terminal-bench/terminal-bench-2 \\") 319 print(" --agent-import-path examples.terminal_bench.multi_agent_example:MultiAgentPraisonAI \\") 320 print(" --model openai/gpt-4o") 321 print() 322 print("# AgentTeam structured workflow") 323 print("harbor run -d terminal-bench/terminal-bench-2 \\") 324 print(" --agent-import-path examples.terminal_bench.multi_agent_example:AgentTeamPraisonAI \\") 325 print(" --model openai/gpt-4o") 326 print() 327 print("Benefits of multi-agent approach:") 328 print("- Task decomposition and planning") 329 print("- Specialized roles and expertise") 330 print("- Error detection and correction") 331 print("- Higher success rates on complex tasks")