Cradicle Explorer

/ examples / terminal_bench / praisonai_external_agent.py
praisonai_external_agent.py
  1  """
  2  PraisonAI External Agent for Terminal-Bench 2.0 (Harbor)
  3  
  4  An external agent that bridges PraisonAI's Agent class with Harbor's
  5  BaseEnvironment interface. The agent uses Harbor's exec() as a bash tool.
  6  
  7  Usage:
  8      harbor run -d terminal-bench/terminal-bench-2 \
  9          --agent-import-path examples.terminal_bench.praisonai_external_agent:PraisonAIExternalAgent \
 10          --model openai/gpt-4o \
 11          --ae OPENAI_API_KEY=$OPENAI_API_KEY
 12  
 13  Architecture:
 14      Harbor Container ←→ BaseEnvironment.exec() ←→ bash_tool ←→ PraisonAI Agent
 15  
 16  Dependencies:
 17      pip install harbor praisonaiagents
 18  """
 19  
 20  import os
 21  from typing import Any
 22  
 23  try:
 24      from harbor.agents.base import BaseAgent
 25      from harbor.environments.base import BaseEnvironment  
 26      from harbor.models.agent.context import AgentContext
 27  except ImportError as e:
 28      raise ImportError(
 29          f"Harbor framework not installed: {e}\n"
 30          "Install with: pip install harbor"
 31      ) from e
 32  
 33  try:
 34      from praisonaiagents import Agent
 35      from praisonaiagents.approval import get_approval_registry, AutoApproveBackend
 36  except ImportError as e:
 37      raise ImportError(
 38          f"PraisonAI not installed: {e}\n"
 39          "Install with: pip install praisonaiagents"
 40      ) from e
 41  
 42  
 43  class PraisonAIExternalAgent(BaseAgent):
 44      """External PraisonAI agent that drives a Harbor container environment."""
 45  
 46      @staticmethod
 47      def name() -> str:
 48          return "praisonai"
 49  
 50      def version(self) -> str | None:
 51          try:
 52              import praisonaiagents
 53              return (
 54                  getattr(praisonaiagents, "__version__", None)
 55                  or getattr(praisonaiagents, "version", None)
 56                  or "unknown"
 57              )
 58          except ImportError:
 59              return None
 60  
 61      async def setup(self, environment: BaseEnvironment) -> None:
 62          """Setup phase - external agent needs no container setup."""
 63          pass
 64  
 65      async def run(
 66          self,
 67          instruction: str,
 68          environment: BaseEnvironment,
 69          context: AgentContext,
 70      ) -> None:
 71          """
 72          Run the PraisonAI agent on the given instruction using Harbor's environment.
 73          
 74          This method bridges Harbor's BaseEnvironment.exec() to PraisonAI's tool system.
 75          """
 76          
 77          # Inject API keys from Harbor's --ae env vars into host os.environ
 78          # so litellm can pick them up (--ae only sets them inside Docker, not the host)
 79          agent_env = getattr(context, 'env', {}) or {}
 80          for key, val in agent_env.items():
 81              if key not in os.environ and val:
 82                  os.environ[key] = val
 83                  print(f"[ENV] Set {key} from Harbor agent env")
 84  
 85          # Set auto-approval for container-isolated execution  
 86          # Harbor's container provides isolation, so we can safely auto-approve shell commands
 87          registry = get_approval_registry()
 88          original_backend = registry.get_backend()
 89          registry.set_backend(AutoApproveBackend(), agent_name="terminal-agent")
 90          
 91          try:
 92              # Create bash tool that wraps Harbor's environment.exec()
 93              async def bash_tool(command: str) -> str:
 94                  """Execute a bash command in the Harbor sandboxed environment."""
 95                  if not command.strip():
 96                      return "Error: Empty command provided"
 97                  
 98                  print(f"[CMD] {command[:200]}")
 99                  try:
100                      # Execute command in Harbor's container
101                      result = await environment.exec(command=command, timeout_sec=300)
102                      
103                      # Format output similar to PraisonAI's execute_command tool
104                      output_parts = []
105                      if result.stdout:
106                          output_parts.append(result.stdout.strip())
107                      if result.stderr:
108                          output_parts.append(f"[stderr]: {result.stderr.strip()}")
109                      if result.return_code != 0:
110                          output_parts.append(f"[exit_code]: {result.return_code}")
111                          
112                      output = "\n".join(output_parts) if output_parts else "(no output)"
113                      print(f"[OUT] {output[:300]}")
114                      return output
115                      
116                  except Exception as e:
117                      print(f"[ERR] {str(e)}")
118                      return f"Error executing command: {str(e)}"
119  
120              # Create PraisonAI agent with the bash tool
121              agent = Agent(
122                  name="terminal-agent",
123                  instructions=(
124                      "You are an expert terminal agent. You solve coding, debugging, data analysis, security, "
125                      "and system administration tasks using the bash_tool to run shell commands."
126                      "\n\nCRITICAL RULES:"
127                      "\n1. START by exploring: ls /app/ && cat /app/*.py /app/*.sh /app/*.txt 2>/dev/null | head -200"
128                      "   Read ALL task files to understand what is needed before doing anything."
129                      "\n2. Find and run the test IMMEDIATELY to see what is expected:"
130                      "   find /app -name 'test_*.py' | head -3 && cd /app && python3 -m pytest -v 2>&1 | tail -40 || true"
131                      "   The test error trace tells you EXACTLY what output/behavior is required."
132                      "\n3. ITERATE: implement a solution, run the test, read the error, refine. Repeat until it passes."
133                      "   Never give up after one attempt — try multiple approaches if needed."
134                      "\n4. For writing files with special characters use Python:"
135                      "   python3 -c \"with open('/app/file','w') as f: f.write('content')\""
136                      "   or heredoc: cat > /app/file << 'EOF'\\n...content...\\nEOF"
137                      "\n5. Read ALL error messages — they tell you exactly what to fix next."
138                      "\n6. NEVER just describe your plan. ALWAYS immediately run bash_tool commands."
139                      "   Keep calling bash_tool until the test PASSES or you have exhausted all approaches."
140                  ),
141                  tools=[bash_tool],
142                  llm=self.model_name or "openai/gpt-4o",
143              )
144  
145              # Execute the agent with outer loop to handle premature stopping
146              print(f"🚀 PraisonAI Agent starting task: {instruction[:100]}...")
147              result = await agent.achat(instruction)
148              for _iter in range(19):
149                  result_str = str(result)
150                  # Stop if test passed
151                  if any(sig in result_str.lower() for sig in [
152                      " passed", "passed ", "test passed", "all tests pass", "1 passed"
153                  ]):
154                      break
155                  result = await agent.achat(
156                      "The task is NOT complete yet — keep working. Run bash_tool commands now. "
157                      "If you haven't already: (1) read all files in /app/, "
158                      "(2) run the test to see the exact failure, "
159                      "(3) implement a fix, (4) run the test again. "
160                      "Repeat until the test passes. What is your next bash_tool command?"
161                  )
162              print(f"✅ PraisonAI Agent completed task")
163              
164              # Populate Harbor context with metadata
165              self._populate_context(agent, context, result)
166              
167          except Exception as e:
168              print(f"❌ PraisonAI Agent failed: {str(e)}")
169              context.metadata = {"error": str(e)}
170              raise
171          finally:
172              # Restore original approval backend to avoid global state pollution
173              if original_backend:
174                  registry.set_backend(original_backend)
175              else:
176                  registry.remove_backend(agent_name="terminal-agent")
177  
178      def _populate_context(self, agent: Agent, context: AgentContext, result: Any) -> None:
179          """
180          Populate Harbor's AgentContext with metrics from PraisonAI agent execution.
181          
182          Harbor tracks: n_input_tokens, n_output_tokens, cost_usd, metadata
183          """
184          try:
185              # Extract token usage and cost from agent
186              try:
187                  summary = agent.cost_summary() if callable(getattr(agent, 'cost_summary', None)) else None
188                  if isinstance(summary, dict):
189                      context.n_input_tokens = summary.get('tokens_in')
190                      context.n_output_tokens = summary.get('tokens_out')
191                      context.cost_usd = summary.get('cost')
192                  else:
193                      context.n_input_tokens = getattr(agent, '_total_tokens_in', 0)
194                      context.n_output_tokens = getattr(agent, '_total_tokens_out', 0)
195                      context.cost_usd = getattr(agent, 'total_cost', None)
196              except Exception:
197                  pass
198                  
199              # Store result summary and agent info
200              context.metadata = {
201                  "agent_name": agent.name,
202                  "model": getattr(agent, 'llm', 'unknown'),
203                  "final_response": str(result)[:500] if result else None,
204                  "tools_used": ["bash_tool"],
205                  "framework": "praisonai",
206                  "version": self.version(),
207              }
208              
209          except Exception as e:
210              # Don't fail the whole run if context population fails
211              context.metadata = {"context_error": str(e)}
212  
213  
214  # Example usage for testing
215  if __name__ == "__main__":
216      print("PraisonAI External Agent for Terminal-Bench 2.0")
217      print("Usage: harbor run -d terminal-bench/terminal-bench-2 \\")
218      print("  --agent-import-path examples.terminal_bench.praisonai_external_agent:PraisonAIExternalAgent \\")
219      print("  --model openai/gpt-4o")
220      print()
221      print("Dependencies:")
222      print("  pip install harbor praisonaiagents")