Cradicle Explorer

/ tests / unit / test_client.py
test_client.py
  1  """Unit tests for ArgusClient SDK."""
  2  
  3  from __future__ import annotations
  4  
  5  import argus_ai
  6  from argus_ai.monitoring.thresholds import ThresholdConfig
  7  from argus_ai.scoring.garvis import GarvisWeights
  8  from argus_ai.sdk.client import ArgusClient
  9  from argus_ai.types import AgenticEvalRequest, EvalRequest
 10  
 11  
 12  class TestInit:
 13      def test_default_init(self):
 14          client = argus_ai.init()
 15          assert isinstance(client, ArgusClient)
 16  
 17      def test_init_with_profile(self):
 18          for profile in ["enterprise", "healthcare", "finance", "consumer", "agentic"]:
 19              client = argus_ai.init(profile=profile)
 20              assert isinstance(client, ArgusClient)
 21  
 22      def test_init_with_custom_weights(self):
 23          w = GarvisWeights(safety=0.50, groundedness=0.10)
 24          client = argus_ai.init(weights=w)
 25          assert client.weights.safety == 0.50
 26  
 27      def test_init_with_thresholds(self):
 28          config = ThresholdConfig(composite_min=0.85, safety_min=0.95)
 29          client = argus_ai.init(thresholds=config)
 30          assert client.thresholds.composite_min == 0.85
 31  
 32      def test_init_with_alert_callback(self):
 33          alerts_fired = []
 34          client = argus_ai.init(
 35              thresholds=ThresholdConfig(composite_min=0.99),
 36              on_alert=lambda msg, res: alerts_fired.append(msg),
 37          )
 38          client.evaluate(
 39              prompt="test", response="test response"
 40          )
 41          assert len(alerts_fired) >= 1
 42  
 43  
 44  class TestEvaluate:
 45      def test_basic_evaluate(self):
 46          client = argus_ai.init()
 47          result = client.evaluate(
 48              prompt="What is Python?",
 49              response="Python is a programming language.",
 50          )
 51          assert 0.0 <= result.garvis_composite <= 1.0
 52          assert result.request_id is not None
 53          assert result.evaluation_ms > 0
 54  
 55      def test_evaluate_with_all_fields(self):
 56          client = argus_ai.init()
 57          result = client.evaluate(
 58              prompt="What is Python?",
 59              response="Python is a programming language.",
 60              context="Python was created by Guido van Rossum.",
 61              ground_truth="Python is a high-level programming language.",
 62              model_name="claude-sonnet-4",
 63              latency_ms=500.0,
 64              input_tokens=20,
 65              output_tokens=15,
 66              cost_usd=0.001,
 67              metadata={"env": "test"},
 68          )
 69          assert 0.0 <= result.garvis_composite <= 1.0
 70          assert len(result.metric_details) == 6
 71  
 72      def test_evaluate_with_pii_triggers_safety_alert(self):
 73          client = argus_ai.init(
 74              thresholds=ThresholdConfig(safety_min=0.95)
 75          )
 76          result = client.evaluate(
 77              prompt="Give me contact info",
 78              response="Email john@example.com or call 555-123-4567.",
 79          )
 80          assert result.safety < 0.95
 81          assert len(result.alerts) >= 1
 82  
 83      def test_evaluate_request_method(self):
 84          client = argus_ai.init()
 85          req = EvalRequest(
 86              prompt="test prompt",
 87              response="test response",
 88          )
 89          result = client.evaluate_request(req)
 90          assert 0.0 <= result.garvis_composite <= 1.0
 91  
 92  
 93  class TestScore:
 94      def test_quick_score(self):
 95          client = argus_ai.init()
 96          score = client.score(
 97              prompt="What is AI?",
 98              response="AI is artificial intelligence.",
 99          )
100          assert 0.0 <= score.composite <= 1.0
101          assert score.weights_used is not None
102  
103      def test_quick_score_with_context(self):
104          client = argus_ai.init()
105          score = client.score(
106              prompt="What is AI?",
107              response=(
108                  "Based on the context, AI refers to artificial "
109                  "intelligence systems."
110              ),
111              context="AI stands for artificial intelligence.",
112          )
113          assert score.groundedness > 0.3  # grounded in short context
114  
115  
116  class TestBatchEvaluate:
117      def test_batch_evaluate(self):
118          client = argus_ai.init()
119          requests = [
120              EvalRequest(prompt=f"Q{i}", response=f"Answer {i}")
121              for i in range(5)
122          ]
123          results = client.batch_evaluate(requests)
124          assert len(results) == 5
125          for r in results:
126              assert 0.0 <= r.garvis_composite <= 1.0
127  
128  
129  class TestAgenticEvaluate:
130      def test_agentic_evaluate(self):
131          client = argus_ai.init(profile="agentic")
132          req = AgenticEvalRequest(
133              prompt="Run pipeline",
134              response="Pipeline done.",
135              steps_planned=5,
136              steps_completed=4,
137              steps_failed=1,
138              steps_recovered=1,
139              retries=2,
140              total_cost_usd=0.30,
141          )
142          result, agentic_metrics = client.evaluate_agentic(req)
143          assert 0.0 <= result.garvis_composite <= 1.0
144          assert len(agentic_metrics) == 3
145  
146          metric_names = {m.name for m in agentic_metrics}
147          assert "AgentStabilityFactor" in metric_names
148          assert "ErrorRecoveryRate" in metric_names
149          assert "CostPerCompletedStep" in metric_names
150  
151          # Agentic metrics should also be in result.metric_details
152          assert len(result.metric_details) == 9  # 6 garvis + 3 agentic
153  
154  
155  class TestExporters:
156      def test_console_exporter_default(self):
157          client = argus_ai.init(exporters=["console"])
158          result = client.evaluate(
159              prompt="test", response="test response"
160          )
161          assert result is not None
162  
163      def test_unavailable_exporter_warns(self):
164          # Should not raise, just warn
165          client = argus_ai.init(exporters=["prometheus", "opentelemetry"])
166          result = client.evaluate(
167              prompt="test", response="test response"
168          )
169          assert result is not None