test_agent_core.py
1 #!/usr/bin/env python3 2 """ 3 Core integration tests for the Ag3ntum agent. 4 5 Tests that the agent can execute a simple task successfully 6 and produces valid output.yaml and agent.jsonl files. 7 """ 8 import json 9 import os 10 import subprocess 11 import sys 12 from pathlib import Path 13 from typing import Optional 14 15 import pytest 16 import yaml 17 18 19 # Path configuration 20 TESTS_DIR: Path = Path(__file__).parent 21 INPUT_DIR: Path = TESTS_DIR / "input" 22 AGENT_DIR: Path = TESTS_DIR.parent.parent # Root directory where src/ is located 23 SESSIONS_DIR: Path = AGENT_DIR / "sessions" 24 25 26 def _is_real_api_key(key: str) -> bool: 27 """Return True only if the key looks like a real Anthropic API key.""" 28 if not key or len(key) < 50: 29 return False 30 if "placeholder" in key or "REPLACE" in key or "000000" in key: 31 return False 32 return key.startswith("sk-ant-") 33 34 35 def _check_api_key_available() -> bool: 36 """ 37 Check if a real ANTHROPIC_API_KEY is available from any source. 38 39 Rejects placeholder/dummy keys used in CI environments. 40 """ 41 for env_var in ("ANTHROPIC_API_KEY", "CLOUDLINUX_ANTHROPIC_API_KEY"): 42 key = os.environ.get(env_var, "") 43 if _is_real_api_key(key): 44 return True 45 46 # Check secrets.yaml (both in Docker /config and local config/) 47 secrets_paths = [ 48 Path("/config/secrets.yaml"), # Docker mount 49 AGENT_DIR / "config" / "secrets.yaml", # Local development 50 ] 51 52 for secrets_path in secrets_paths: 53 if secrets_path.exists(): 54 try: 55 with open(secrets_path) as f: 56 secrets = yaml.safe_load(f) or {} 57 key = secrets.get("anthropic_api_key", "") 58 if _is_real_api_key(key): 59 return True 60 except (yaml.YAMLError, OSError): 61 pass 62 63 return False 64 65 66 # Check if API key is available for E2E tests 67 HAS_API_KEY = _check_api_key_available() 68 69 70 @pytest.fixture 71 def task_file() -> Path: 72 """Path to the test task file.""" 73 return INPUT_DIR / "task.md" 74 75 76 @pytest.fixture 77 def user_profile() -> Path: 78 """Path to the permissive user profile for testing.""" 79 return INPUT_DIR / "permissions.user.permissive.yaml" 80 81 82 def find_latest_session_dir() -> Optional[Path]: 83 """ 84 Find the most recently created session directory. 85 86 Returns: 87 Path to the latest session directory, or None if not found. 88 """ 89 if not SESSIONS_DIR.exists(): 90 return None 91 92 session_dirs = [ 93 d for d in SESSIONS_DIR.iterdir() 94 if d.is_dir() and not d.name.startswith(".") 95 ] 96 97 if not session_dirs: 98 return None 99 100 # Sort by directory name (contains timestamp) 101 session_dirs.sort(key=lambda d: d.name, reverse=True) 102 return session_dirs[0] 103 104 105 class TestAgentCore: 106 """Core agent integration tests.""" 107 108 @pytest.mark.integration 109 @pytest.mark.skipif(not HAS_API_KEY, reason="ANTHROPIC_API_KEY not set - skipping E2E test") 110 def test_agent_executes_task_successfully( 111 self, 112 task_file: Path, 113 user_profile: Path, 114 ) -> None: 115 """ 116 Test that the agent can execute a task and produce valid output. 117 118 This test: 119 1. Runs the agent with the test task and permissive profile 120 2. Verifies output.yaml contains status=COMPLETE and non-empty output 121 3. Verifies agent.jsonl last record has valid subtype, is_error=false, non-empty result 122 """ 123 # Verify test files exist 124 assert task_file.exists(), f"Task file not found: {task_file}" 125 assert user_profile.exists(), f"User profile not found: {user_profile}" 126 127 # Run the agent as a module (python -m src.core) 128 cmd = [ 129 sys.executable, 130 "-m", "src.core", 131 "--task-file", str(task_file), 132 "--profile", str(user_profile), 133 "--timeout", "120", 134 "--max-turns", "20", 135 ] 136 137 env = os.environ.copy() 138 # Ensure we have the API key (check common variable names) 139 if "ANTHROPIC_API_KEY" not in env: 140 # Check for alternative key names 141 alt_key = env.get("CLOUDLINUX_ANTHROPIC_API_KEY") 142 if alt_key: 143 env["ANTHROPIC_API_KEY"] = alt_key 144 145 result = subprocess.run( 146 cmd, 147 cwd=str(AGENT_DIR), 148 env=env, 149 capture_output=True, 150 text=True, 151 timeout=180, # 3 minute timeout for the test 152 ) 153 154 # Log output for debugging 155 print(f"STDOUT:\n{result.stdout}") 156 print(f"STDERR:\n{result.stderr}") 157 158 # Agent should exit with code 0 for COMPLETE status 159 assert result.returncode == 0, ( 160 f"Agent exited with code {result.returncode}\n" 161 f"STDOUT: {result.stdout}\n" 162 f"STDERR: {result.stderr}" 163 ) 164 165 # Find the session directory 166 session_dir = find_latest_session_dir() 167 assert session_dir is not None, "No session directory found" 168 169 # Verify agent.jsonl exists and has valid completion record 170 agent_jsonl_path = session_dir / "agent.jsonl" 171 assert agent_jsonl_path.exists(), ( 172 f"agent.jsonl not found in session: {agent_jsonl_path}" 173 ) 174 175 # Read all lines and get the last record 176 with open(agent_jsonl_path, "r") as f: 177 lines = [line.strip() for line in f if line.strip()] 178 179 assert len(lines) > 0, "agent.jsonl is empty" 180 181 # Parse the last record 182 last_record = json.loads(lines[-1]) 183 184 # Verify last record has valid subtype 185 assert "subtype" in last_record, ( 186 f"Last record missing 'subtype' field: {last_record}" 187 ) 188 subtype = last_record.get("subtype") 189 # Valid subtypes for final record include "success", "result", etc. 190 assert subtype is not None and subtype != "", ( 191 f"Expected non-empty subtype, got: {subtype!r}" 192 ) 193 194 # Verify is_error is false 195 is_error = last_record.get("is_error") 196 assert is_error is False, ( 197 f"Expected is_error=False, got: {is_error!r}" 198 ) 199 200 # Verify result is non-empty 201 result_field = last_record.get("result") 202 assert result_field is not None and result_field != "", ( 203 f"Expected non-empty result, got: {result_field!r}" 204 ) 205 206 print(f"\n✓ Session completed successfully: {session_dir.name}") 207 print(f" Subtype: {subtype}") 208 print(f" Result: {result_field[:100]}...") 209 210 211 if __name__ == "__main__": 212 pytest.main([__file__, "-v", "-s"])