praisonai_external_agent.py
1 """ 2 PraisonAI External Agent for Terminal-Bench 2.0 (Harbor) 3 4 An external agent that bridges PraisonAI's Agent class with Harbor's 5 BaseEnvironment interface. The agent uses Harbor's exec() as a bash tool. 6 7 Usage: 8 harbor run -d terminal-bench/terminal-bench-2 \ 9 --agent-import-path examples.terminal_bench.praisonai_external_agent:PraisonAIExternalAgent \ 10 --model openai/gpt-4o \ 11 --ae OPENAI_API_KEY=$OPENAI_API_KEY 12 13 Architecture: 14 Harbor Container ←→ BaseEnvironment.exec() ←→ bash_tool ←→ PraisonAI Agent 15 16 Dependencies: 17 pip install harbor praisonaiagents 18 """ 19 20 import os 21 from typing import Any 22 23 try: 24 from harbor.agents.base import BaseAgent 25 from harbor.environments.base import BaseEnvironment 26 from harbor.models.agent.context import AgentContext 27 except ImportError as e: 28 raise ImportError( 29 f"Harbor framework not installed: {e}\n" 30 "Install with: pip install harbor" 31 ) from e 32 33 try: 34 from praisonaiagents import Agent 35 from praisonaiagents.approval import get_approval_registry, AutoApproveBackend 36 except ImportError as e: 37 raise ImportError( 38 f"PraisonAI not installed: {e}\n" 39 "Install with: pip install praisonaiagents" 40 ) from e 41 42 43 class PraisonAIExternalAgent(BaseAgent): 44 """External PraisonAI agent that drives a Harbor container environment.""" 45 46 @staticmethod 47 def name() -> str: 48 return "praisonai" 49 50 def version(self) -> str | None: 51 try: 52 import praisonaiagents 53 return ( 54 getattr(praisonaiagents, "__version__", None) 55 or getattr(praisonaiagents, "version", None) 56 or "unknown" 57 ) 58 except ImportError: 59 return None 60 61 async def setup(self, environment: BaseEnvironment) -> None: 62 """Setup phase - external agent needs no container setup.""" 63 pass 64 65 async def run( 66 self, 67 instruction: str, 68 environment: BaseEnvironment, 69 context: AgentContext, 70 ) -> None: 71 """ 72 Run the PraisonAI agent on the given instruction using Harbor's environment. 73 74 This method bridges Harbor's BaseEnvironment.exec() to PraisonAI's tool system. 75 """ 76 77 # Inject API keys from Harbor's --ae env vars into host os.environ 78 # so litellm can pick them up (--ae only sets them inside Docker, not the host) 79 agent_env = getattr(context, 'env', {}) or {} 80 for key, val in agent_env.items(): 81 if key not in os.environ and val: 82 os.environ[key] = val 83 print(f"[ENV] Set {key} from Harbor agent env") 84 85 # Set auto-approval for container-isolated execution 86 # Harbor's container provides isolation, so we can safely auto-approve shell commands 87 registry = get_approval_registry() 88 original_backend = registry.get_backend() 89 registry.set_backend(AutoApproveBackend(), agent_name="terminal-agent") 90 91 try: 92 # Create bash tool that wraps Harbor's environment.exec() 93 async def bash_tool(command: str) -> str: 94 """Execute a bash command in the Harbor sandboxed environment.""" 95 if not command.strip(): 96 return "Error: Empty command provided" 97 98 print(f"[CMD] {command[:200]}") 99 try: 100 # Execute command in Harbor's container 101 result = await environment.exec(command=command, timeout_sec=300) 102 103 # Format output similar to PraisonAI's execute_command tool 104 output_parts = [] 105 if result.stdout: 106 output_parts.append(result.stdout.strip()) 107 if result.stderr: 108 output_parts.append(f"[stderr]: {result.stderr.strip()}") 109 if result.return_code != 0: 110 output_parts.append(f"[exit_code]: {result.return_code}") 111 112 output = "\n".join(output_parts) if output_parts else "(no output)" 113 print(f"[OUT] {output[:300]}") 114 return output 115 116 except Exception as e: 117 print(f"[ERR] {str(e)}") 118 return f"Error executing command: {str(e)}" 119 120 # Create PraisonAI agent with the bash tool 121 agent = Agent( 122 name="terminal-agent", 123 instructions=( 124 "You are an expert terminal agent. You solve coding, debugging, data analysis, security, " 125 "and system administration tasks using the bash_tool to run shell commands." 126 "\n\nCRITICAL RULES:" 127 "\n1. START by exploring: ls /app/ && cat /app/*.py /app/*.sh /app/*.txt 2>/dev/null | head -200" 128 " Read ALL task files to understand what is needed before doing anything." 129 "\n2. Find and run the test IMMEDIATELY to see what is expected:" 130 " find /app -name 'test_*.py' | head -3 && cd /app && python3 -m pytest -v 2>&1 | tail -40 || true" 131 " The test error trace tells you EXACTLY what output/behavior is required." 132 "\n3. ITERATE: implement a solution, run the test, read the error, refine. Repeat until it passes." 133 " Never give up after one attempt — try multiple approaches if needed." 134 "\n4. For writing files with special characters use Python:" 135 " python3 -c \"with open('/app/file','w') as f: f.write('content')\"" 136 " or heredoc: cat > /app/file << 'EOF'\\n...content...\\nEOF" 137 "\n5. Read ALL error messages — they tell you exactly what to fix next." 138 "\n6. NEVER just describe your plan. ALWAYS immediately run bash_tool commands." 139 " Keep calling bash_tool until the test PASSES or you have exhausted all approaches." 140 ), 141 tools=[bash_tool], 142 llm=self.model_name or "openai/gpt-4o", 143 ) 144 145 # Execute the agent with outer loop to handle premature stopping 146 print(f"🚀 PraisonAI Agent starting task: {instruction[:100]}...") 147 result = await agent.achat(instruction) 148 for _iter in range(19): 149 result_str = str(result) 150 # Stop if test passed 151 if any(sig in result_str.lower() for sig in [ 152 " passed", "passed ", "test passed", "all tests pass", "1 passed" 153 ]): 154 break 155 result = await agent.achat( 156 "The task is NOT complete yet — keep working. Run bash_tool commands now. " 157 "If you haven't already: (1) read all files in /app/, " 158 "(2) run the test to see the exact failure, " 159 "(3) implement a fix, (4) run the test again. " 160 "Repeat until the test passes. What is your next bash_tool command?" 161 ) 162 print(f"✅ PraisonAI Agent completed task") 163 164 # Populate Harbor context with metadata 165 self._populate_context(agent, context, result) 166 167 except Exception as e: 168 print(f"❌ PraisonAI Agent failed: {str(e)}") 169 context.metadata = {"error": str(e)} 170 raise 171 finally: 172 # Restore original approval backend to avoid global state pollution 173 if original_backend: 174 registry.set_backend(original_backend) 175 else: 176 registry.remove_backend(agent_name="terminal-agent") 177 178 def _populate_context(self, agent: Agent, context: AgentContext, result: Any) -> None: 179 """ 180 Populate Harbor's AgentContext with metrics from PraisonAI agent execution. 181 182 Harbor tracks: n_input_tokens, n_output_tokens, cost_usd, metadata 183 """ 184 try: 185 # Extract token usage and cost from agent 186 try: 187 summary = agent.cost_summary() if callable(getattr(agent, 'cost_summary', None)) else None 188 if isinstance(summary, dict): 189 context.n_input_tokens = summary.get('tokens_in') 190 context.n_output_tokens = summary.get('tokens_out') 191 context.cost_usd = summary.get('cost') 192 else: 193 context.n_input_tokens = getattr(agent, '_total_tokens_in', 0) 194 context.n_output_tokens = getattr(agent, '_total_tokens_out', 0) 195 context.cost_usd = getattr(agent, 'total_cost', None) 196 except Exception: 197 pass 198 199 # Store result summary and agent info 200 context.metadata = { 201 "agent_name": agent.name, 202 "model": getattr(agent, 'llm', 'unknown'), 203 "final_response": str(result)[:500] if result else None, 204 "tools_used": ["bash_tool"], 205 "framework": "praisonai", 206 "version": self.version(), 207 } 208 209 except Exception as e: 210 # Don't fail the whole run if context population fails 211 context.metadata = {"context_error": str(e)} 212 213 214 # Example usage for testing 215 if __name__ == "__main__": 216 print("PraisonAI External Agent for Terminal-Bench 2.0") 217 print("Usage: harbor run -d terminal-bench/terminal-bench-2 \\") 218 print(" --agent-import-path examples.terminal_bench.praisonai_external_agent:PraisonAIExternalAgent \\") 219 print(" --model openai/gpt-4o") 220 print() 221 print("Dependencies:") 222 print(" pip install harbor praisonaiagents")