test_integration.py
1 """ 2 Tests for PraisonAI Terminal-Bench 2.0 Integration 3 4 This module contains both unit tests and integration tests for the 5 PraisonAI Harbor integration. 6 7 Run tests with: 8 python -m pytest examples/terminal_bench/test_integration.py -v 9 10 Requirements: 11 pip install pytest praisonaiagents 12 # Harbor is optional for unit tests, required for integration tests 13 """ 14 15 import pytest 16 import asyncio 17 from unittest.mock import Mock, AsyncMock, patch 18 from pathlib import Path 19 20 21 class TestPraisonAIExternalAgent: 22 """Test the external agent implementation.""" 23 24 def test_agent_metadata(self): 25 """Test agent name and version reporting.""" 26 try: 27 from .praisonai_external_agent import PraisonAIExternalAgent 28 except ImportError: 29 pytest.skip("Harbor not installed - skipping Harbor-specific tests") 30 31 agent = PraisonAIExternalAgent() 32 assert agent.name() == "praisonai" 33 34 # Version should be None if praisonaiagents not installed, or actual version 35 version = agent.version() 36 assert version is None or isinstance(version, str) 37 38 @pytest.mark.asyncio 39 async def test_setup(self): 40 """Test agent setup phase.""" 41 try: 42 from .praisonai_external_agent import PraisonAIExternalAgent 43 except ImportError: 44 pytest.skip("Harbor not installed") 45 46 agent = PraisonAIExternalAgent() 47 mock_env = Mock() 48 49 # Setup should complete without error (external agent needs no setup) 50 await agent.setup(mock_env) 51 52 @pytest.mark.asyncio 53 async def test_bash_tool_execution(self): 54 """Test the bash tool wrapper around Harbor's exec().""" 55 try: 56 from .praisonai_external_agent import PraisonAIExternalAgent 57 except ImportError: 58 pytest.skip("Harbor not installed") 59 60 # Mock Harbor environment 61 mock_env = Mock() 62 mock_result = Mock() 63 mock_result.stdout = "Hello, World!" 64 mock_result.stderr = "" 65 mock_result.return_code = 0 66 mock_env.exec = AsyncMock(return_value=mock_result) 67 68 # Mock agent context 69 mock_context = Mock() 70 mock_context.metadata = {} 71 72 agent = PraisonAIExternalAgent() 73 74 # Mock PraisonAI Agent to avoid LLM calls in tests 75 with patch('praisonaiagents.Agent') as mock_agent_class: 76 mock_agent_instance = Mock() 77 mock_agent_instance.start.return_value = "Task completed successfully" 78 mock_agent_instance.name = "terminal-agent" 79 mock_agent_class.return_value = mock_agent_instance 80 81 # Mock approval backend 82 with patch('praisonaiagents.approval.set_approval_backend'): 83 await agent.run("echo 'Hello, World!'", mock_env, mock_context) 84 85 # Verify agent was created with correct parameters 86 mock_agent_class.assert_called_once() 87 args, kwargs = mock_agent_class.call_args 88 89 assert kwargs['name'] == 'terminal-agent' 90 assert 'tools' in kwargs 91 assert len(kwargs['tools']) == 1 # bash_tool 92 assert kwargs['verbose'] is False 93 assert kwargs['memory'] is False 94 95 def test_context_population(self): 96 """Test that agent context is properly populated.""" 97 try: 98 from .praisonai_external_agent import PraisonAIExternalAgent 99 except ImportError: 100 pytest.skip("Harbor not installed") 101 102 agent_impl = PraisonAIExternalAgent() 103 104 # Mock agent with usage data 105 mock_agent = Mock() 106 mock_agent.name = "test-agent" 107 mock_agent.llm = "gpt-4o" 108 mock_agent._usage = Mock() 109 mock_agent._usage.input_tokens = 100 110 mock_agent._usage.output_tokens = 50 111 mock_agent._cost = 0.01 112 113 mock_context = Mock() 114 mock_context.metadata = {} 115 116 # Test context population 117 agent_impl._populate_context(mock_agent, mock_context, "Test result") 118 119 assert mock_context.n_input_tokens == 100 120 assert mock_context.n_output_tokens == 50 121 assert mock_context.cost_usd == 0.01 122 assert mock_context.metadata['agent_name'] == 'test-agent' 123 assert mock_context.metadata['model'] == 'gpt-4o' 124 assert mock_context.metadata['framework'] == 'praisonai' 125 126 127 class TestPraisonAIInstalledAgent: 128 """Test the installed agent implementation.""" 129 130 def test_agent_configuration(self): 131 """Test agent CLI flags and configuration.""" 132 try: 133 from .praisonai_installed_agent import PraisonAIInstalledAgent 134 except ImportError: 135 pytest.skip("Harbor not installed") 136 137 agent = PraisonAIInstalledAgent() 138 assert agent.name() == "praisonai" 139 assert agent.SUPPORTS_ATIF is False # Until trajectory format is implemented 140 141 # Check CLI flags 142 flag_names = [flag.name if hasattr(flag, 'name') else str(flag) for flag in agent.CLI_FLAGS] 143 expected_flags = ['max_turns', 'verbose', 'memory', 'auto_approval'] 144 for expected in expected_flags: 145 assert any(expected in flag_name for flag_name in flag_names) 146 147 def test_version_command(self): 148 """Test version detection command.""" 149 try: 150 from .praisonai_installed_agent import PraisonAIInstalledAgent 151 except ImportError: 152 pytest.skip("Harbor not installed") 153 154 agent = PraisonAIInstalledAgent() 155 version_cmd = agent.get_version_command() 156 157 assert version_cmd is not None 158 assert "praisonaiagents" in version_cmd 159 assert "__version__" in version_cmd 160 161 def test_runner_script_generation(self): 162 """Test that the headless runner script is properly generated.""" 163 try: 164 from .praisonai_installed_agent import PraisonAIInstalledAgent 165 except ImportError: 166 pytest.skip("Harbor not installed") 167 168 agent = PraisonAIInstalledAgent() 169 script = agent._build_runner_script() 170 171 assert "import praisonaiagents" in script 172 assert "Agent" in script 173 assert "execute_command" in script 174 assert "AutoApproveBackend" in script 175 assert "json.dumps" in script 176 177 178 class TestIntegration: 179 """Integration tests that require both PraisonAI and Harbor.""" 180 181 @pytest.mark.integration 182 def test_praisonai_agent_real(self): 183 """ 184 Real agentic test - agent must call LLM end-to-end. 185 186 This is a MANDATORY test per AGENTS.md §9.4. 187 Agent MUST call agent.start() with real prompt and produce LLM output. 188 """ 189 try: 190 from praisonaiagents import Agent 191 except ImportError: 192 pytest.skip("PraisonAI not installed") 193 194 # Create real agent that will call LLM 195 agent = Agent( 196 name="test-terminal-agent", 197 instructions="You are a helpful terminal assistant" 198 ) 199 200 # Real agentic test - agent must call LLM and produce text response 201 result = agent.start("Say hello in one sentence and mention you can help with terminal tasks") 202 203 # Verify we got actual LLM output 204 assert result is not None 205 assert isinstance(result, str) 206 assert len(result) > 0 207 208 # Print output for manual verification 209 print("✅ Real agentic test result:") 210 print(result) 211 212 @pytest.mark.integration 213 @pytest.mark.asyncio 214 async def test_bash_tool_real_execution(self): 215 """Test bash tool with real command execution (if safe).""" 216 # This would test real bash execution in a safe environment 217 # For now, we'll mock it to avoid system changes 218 219 mock_result = Mock() 220 mock_result.stdout = "PraisonAI Terminal Test\n" 221 mock_result.stderr = "" 222 mock_result.return_code = 0 223 224 # Test that our bash tool wrapper works correctly 225 async def mock_exec(command, timeout_sec=30): 226 assert "echo" in command # Ensure we're testing echo command 227 return mock_result 228 229 # This simulates Harbor's BaseEnvironment.exec() 230 result = await mock_exec("echo 'PraisonAI Terminal Test'") 231 232 assert result.stdout.strip() == "PraisonAI Terminal Test" 233 assert result.return_code == 0 234 235 @pytest.mark.integration 236 def test_auto_approval_setup(self): 237 """Test that auto-approval backend works correctly.""" 238 try: 239 from praisonaiagents.approval import set_approval_backend, AutoApproveBackend 240 except ImportError: 241 pytest.skip("PraisonAI approval system not available") 242 243 # Test setting and restoring approval backend 244 original = set_approval_backend(AutoApproveBackend()) 245 new_backend = set_approval_backend(original) 246 247 assert isinstance(new_backend, AutoApproveBackend) 248 249 250 if __name__ == "__main__": 251 # Allow running tests directly 252 import sys 253 254 print("PraisonAI Terminal-Bench 2.0 Integration Tests") 255 print("=" * 50) 256 257 # Check dependencies 258 try: 259 import praisonaiagents 260 print(f"✅ PraisonAI version: {praisonaiagents.__version__}") 261 except ImportError: 262 print("❌ PraisonAI not installed: pip install praisonaiagents") 263 sys.exit(1) 264 265 try: 266 import harbor 267 print("✅ Harbor framework available") 268 except ImportError: 269 print("⚠️ Harbor not installed: pip install harbor") 270 print(" (Some tests will be skipped)") 271 272 print() 273 print("Run tests with: python -m pytest examples/terminal_bench/test_integration.py -v") 274 print("Run real agentic test: python -m pytest examples/terminal_bench/test_integration.py::TestIntegration::test_praisonai_agent_real -v -s")