Cradicle Explorer

/ tests / core-tests / test_agent_core.py
test_agent_core.py
  1  #!/usr/bin/env python3
  2  """
  3  Core integration tests for the Ag3ntum agent.
  4  
  5  Tests that the agent can execute a simple task successfully
  6  and produces valid output.yaml and agent.jsonl files.
  7  """
  8  import json
  9  import os
 10  import subprocess
 11  import sys
 12  from pathlib import Path
 13  from typing import Optional
 14  
 15  import pytest
 16  import yaml
 17  
 18  
 19  # Path configuration
 20  TESTS_DIR: Path = Path(__file__).parent
 21  INPUT_DIR: Path = TESTS_DIR / "input"
 22  AGENT_DIR: Path = TESTS_DIR.parent.parent  # Root directory where src/ is located
 23  SESSIONS_DIR: Path = AGENT_DIR / "sessions"
 24  
 25  
 26  def _is_real_api_key(key: str) -> bool:
 27      """Return True only if the key looks like a real Anthropic API key."""
 28      if not key or len(key) < 50:
 29          return False
 30      if "placeholder" in key or "REPLACE" in key or "000000" in key:
 31          return False
 32      return key.startswith("sk-ant-")
 33  
 34  
 35  def _check_api_key_available() -> bool:
 36      """
 37      Check if a real ANTHROPIC_API_KEY is available from any source.
 38  
 39      Rejects placeholder/dummy keys used in CI environments.
 40      """
 41      for env_var in ("ANTHROPIC_API_KEY", "CLOUDLINUX_ANTHROPIC_API_KEY"):
 42          key = os.environ.get(env_var, "")
 43          if _is_real_api_key(key):
 44              return True
 45  
 46      # Check secrets.yaml (both in Docker /config and local config/)
 47      secrets_paths = [
 48          Path("/config/secrets.yaml"),  # Docker mount
 49          AGENT_DIR / "config" / "secrets.yaml",  # Local development
 50      ]
 51  
 52      for secrets_path in secrets_paths:
 53          if secrets_path.exists():
 54              try:
 55                  with open(secrets_path) as f:
 56                      secrets = yaml.safe_load(f) or {}
 57                  key = secrets.get("anthropic_api_key", "")
 58                  if _is_real_api_key(key):
 59                      return True
 60              except (yaml.YAMLError, OSError):
 61                  pass
 62  
 63      return False
 64  
 65  
 66  # Check if API key is available for E2E tests
 67  HAS_API_KEY = _check_api_key_available()
 68  
 69  
 70  @pytest.fixture
 71  def task_file() -> Path:
 72      """Path to the test task file."""
 73      return INPUT_DIR / "task.md"
 74  
 75  
 76  @pytest.fixture
 77  def user_profile() -> Path:
 78      """Path to the permissive user profile for testing."""
 79      return INPUT_DIR / "permissions.user.permissive.yaml"
 80  
 81  
 82  def find_latest_session_dir() -> Optional[Path]:
 83      """
 84      Find the most recently created session directory.
 85  
 86      Returns:
 87          Path to the latest session directory, or None if not found.
 88      """
 89      if not SESSIONS_DIR.exists():
 90          return None
 91  
 92      session_dirs = [
 93          d for d in SESSIONS_DIR.iterdir()
 94          if d.is_dir() and not d.name.startswith(".")
 95      ]
 96  
 97      if not session_dirs:
 98          return None
 99  
100      # Sort by directory name (contains timestamp)
101      session_dirs.sort(key=lambda d: d.name, reverse=True)
102      return session_dirs[0]
103  
104  
105  class TestAgentCore:
106      """Core agent integration tests."""
107  
108      @pytest.mark.integration
109      @pytest.mark.skipif(not HAS_API_KEY, reason="ANTHROPIC_API_KEY not set - skipping E2E test")
110      def test_agent_executes_task_successfully(
111          self,
112          task_file: Path,
113          user_profile: Path,
114      ) -> None:
115          """
116          Test that the agent can execute a task and produce valid output.
117  
118          This test:
119          1. Runs the agent with the test task and permissive profile
120          2. Verifies output.yaml contains status=COMPLETE and non-empty output
121          3. Verifies agent.jsonl last record has valid subtype, is_error=false, non-empty result
122          """
123          # Verify test files exist
124          assert task_file.exists(), f"Task file not found: {task_file}"
125          assert user_profile.exists(), f"User profile not found: {user_profile}"
126  
127          # Run the agent as a module (python -m src.core)
128          cmd = [
129              sys.executable,
130              "-m", "src.core",
131              "--task-file", str(task_file),
132              "--profile", str(user_profile),
133              "--timeout", "120",
134              "--max-turns", "20",
135          ]
136  
137          env = os.environ.copy()
138          # Ensure we have the API key (check common variable names)
139          if "ANTHROPIC_API_KEY" not in env:
140              # Check for alternative key names
141              alt_key = env.get("CLOUDLINUX_ANTHROPIC_API_KEY")
142              if alt_key:
143                  env["ANTHROPIC_API_KEY"] = alt_key
144  
145          result = subprocess.run(
146              cmd,
147              cwd=str(AGENT_DIR),
148              env=env,
149              capture_output=True,
150              text=True,
151              timeout=180,  # 3 minute timeout for the test
152          )
153  
154          # Log output for debugging
155          print(f"STDOUT:\n{result.stdout}")
156          print(f"STDERR:\n{result.stderr}")
157  
158          # Agent should exit with code 0 for COMPLETE status
159          assert result.returncode == 0, (
160              f"Agent exited with code {result.returncode}\n"
161              f"STDOUT: {result.stdout}\n"
162              f"STDERR: {result.stderr}"
163          )
164  
165          # Find the session directory
166          session_dir = find_latest_session_dir()
167          assert session_dir is not None, "No session directory found"
168  
169          # Verify agent.jsonl exists and has valid completion record
170          agent_jsonl_path = session_dir / "agent.jsonl"
171          assert agent_jsonl_path.exists(), (
172              f"agent.jsonl not found in session: {agent_jsonl_path}"
173          )
174  
175          # Read all lines and get the last record
176          with open(agent_jsonl_path, "r") as f:
177              lines = [line.strip() for line in f if line.strip()]
178  
179          assert len(lines) > 0, "agent.jsonl is empty"
180  
181          # Parse the last record
182          last_record = json.loads(lines[-1])
183  
184          # Verify last record has valid subtype
185          assert "subtype" in last_record, (
186              f"Last record missing 'subtype' field: {last_record}"
187          )
188          subtype = last_record.get("subtype")
189          # Valid subtypes for final record include "success", "result", etc.
190          assert subtype is not None and subtype != "", (
191              f"Expected non-empty subtype, got: {subtype!r}"
192          )
193  
194          # Verify is_error is false
195          is_error = last_record.get("is_error")
196          assert is_error is False, (
197              f"Expected is_error=False, got: {is_error!r}"
198          )
199  
200          # Verify result is non-empty
201          result_field = last_record.get("result")
202          assert result_field is not None and result_field != "", (
203              f"Expected non-empty result, got: {result_field!r}"
204          )
205  
206          print(f"\n✓ Session completed successfully: {session_dir.name}")
207          print(f"  Subtype: {subtype}")
208          print(f"  Result: {result_field[:100]}...")
209  
210  
211  if __name__ == "__main__":
212      pytest.main([__file__, "-v", "-s"])