test_client.py
1 """Unit tests for ArgusClient SDK.""" 2 3 from __future__ import annotations 4 5 import argus_ai 6 from argus_ai.monitoring.thresholds import ThresholdConfig 7 from argus_ai.scoring.garvis import GarvisWeights 8 from argus_ai.sdk.client import ArgusClient 9 from argus_ai.types import AgenticEvalRequest, EvalRequest 10 11 12 class TestInit: 13 def test_default_init(self): 14 client = argus_ai.init() 15 assert isinstance(client, ArgusClient) 16 17 def test_init_with_profile(self): 18 for profile in ["enterprise", "healthcare", "finance", "consumer", "agentic"]: 19 client = argus_ai.init(profile=profile) 20 assert isinstance(client, ArgusClient) 21 22 def test_init_with_custom_weights(self): 23 w = GarvisWeights(safety=0.50, groundedness=0.10) 24 client = argus_ai.init(weights=w) 25 assert client.weights.safety == 0.50 26 27 def test_init_with_thresholds(self): 28 config = ThresholdConfig(composite_min=0.85, safety_min=0.95) 29 client = argus_ai.init(thresholds=config) 30 assert client.thresholds.composite_min == 0.85 31 32 def test_init_with_alert_callback(self): 33 alerts_fired = [] 34 client = argus_ai.init( 35 thresholds=ThresholdConfig(composite_min=0.99), 36 on_alert=lambda msg, res: alerts_fired.append(msg), 37 ) 38 client.evaluate( 39 prompt="test", response="test response" 40 ) 41 assert len(alerts_fired) >= 1 42 43 44 class TestEvaluate: 45 def test_basic_evaluate(self): 46 client = argus_ai.init() 47 result = client.evaluate( 48 prompt="What is Python?", 49 response="Python is a programming language.", 50 ) 51 assert 0.0 <= result.garvis_composite <= 1.0 52 assert result.request_id is not None 53 assert result.evaluation_ms > 0 54 55 def test_evaluate_with_all_fields(self): 56 client = argus_ai.init() 57 result = client.evaluate( 58 prompt="What is Python?", 59 response="Python is a programming language.", 60 context="Python was created by Guido van Rossum.", 61 ground_truth="Python is a high-level programming language.", 62 model_name="claude-sonnet-4", 63 latency_ms=500.0, 64 input_tokens=20, 65 output_tokens=15, 66 cost_usd=0.001, 67 metadata={"env": "test"}, 68 ) 69 assert 0.0 <= result.garvis_composite <= 1.0 70 assert len(result.metric_details) == 6 71 72 def test_evaluate_with_pii_triggers_safety_alert(self): 73 client = argus_ai.init( 74 thresholds=ThresholdConfig(safety_min=0.95) 75 ) 76 result = client.evaluate( 77 prompt="Give me contact info", 78 response="Email john@example.com or call 555-123-4567.", 79 ) 80 assert result.safety < 0.95 81 assert len(result.alerts) >= 1 82 83 def test_evaluate_request_method(self): 84 client = argus_ai.init() 85 req = EvalRequest( 86 prompt="test prompt", 87 response="test response", 88 ) 89 result = client.evaluate_request(req) 90 assert 0.0 <= result.garvis_composite <= 1.0 91 92 93 class TestScore: 94 def test_quick_score(self): 95 client = argus_ai.init() 96 score = client.score( 97 prompt="What is AI?", 98 response="AI is artificial intelligence.", 99 ) 100 assert 0.0 <= score.composite <= 1.0 101 assert score.weights_used is not None 102 103 def test_quick_score_with_context(self): 104 client = argus_ai.init() 105 score = client.score( 106 prompt="What is AI?", 107 response=( 108 "Based on the context, AI refers to artificial " 109 "intelligence systems." 110 ), 111 context="AI stands for artificial intelligence.", 112 ) 113 assert score.groundedness > 0.3 # grounded in short context 114 115 116 class TestBatchEvaluate: 117 def test_batch_evaluate(self): 118 client = argus_ai.init() 119 requests = [ 120 EvalRequest(prompt=f"Q{i}", response=f"Answer {i}") 121 for i in range(5) 122 ] 123 results = client.batch_evaluate(requests) 124 assert len(results) == 5 125 for r in results: 126 assert 0.0 <= r.garvis_composite <= 1.0 127 128 129 class TestAgenticEvaluate: 130 def test_agentic_evaluate(self): 131 client = argus_ai.init(profile="agentic") 132 req = AgenticEvalRequest( 133 prompt="Run pipeline", 134 response="Pipeline done.", 135 steps_planned=5, 136 steps_completed=4, 137 steps_failed=1, 138 steps_recovered=1, 139 retries=2, 140 total_cost_usd=0.30, 141 ) 142 result, agentic_metrics = client.evaluate_agentic(req) 143 assert 0.0 <= result.garvis_composite <= 1.0 144 assert len(agentic_metrics) == 3 145 146 metric_names = {m.name for m in agentic_metrics} 147 assert "AgentStabilityFactor" in metric_names 148 assert "ErrorRecoveryRate" in metric_names 149 assert "CostPerCompletedStep" in metric_names 150 151 # Agentic metrics should also be in result.metric_details 152 assert len(result.metric_details) == 9 # 6 garvis + 3 agentic 153 154 155 class TestExporters: 156 def test_console_exporter_default(self): 157 client = argus_ai.init(exporters=["console"]) 158 result = client.evaluate( 159 prompt="test", response="test response" 160 ) 161 assert result is not None 162 163 def test_unavailable_exporter_warns(self): 164 # Should not raise, just warn 165 client = argus_ai.init(exporters=["prometheus", "opentelemetry"]) 166 result = client.evaluate( 167 prompt="test", response="test response" 168 ) 169 assert result is not None