test_providers_live.py
1 """Live provider integration tests — skipped by default, run with --run-live. 2 3 All tests make real API calls and require valid API keys. 4 5 Usage: 6 uv run pytest tests/test_providers_live.py --run-live -v 7 uv run pytest tests/test_providers_live.py --run-live -v -k anthropic 8 uv run pytest tests/test_providers_live.py --run-live -v -k BrightData 9 """ 10 11 __all__: list[str] = [] 12 13 import asyncio 14 import json 15 import os 16 import sys 17 from pathlib import Path 18 from typing import Any 19 20 import pytest 21 from jinja2 import Template 22 23 from exceptions import APIError 24 from integrations.anthropic import AnthropicClient 25 from integrations.brightdata import BrightDataClient 26 from integrations.deepseek import DeepSeekClient 27 from integrations.gemini import GeminiClient 28 from integrations.grok import GrokClient 29 from integrations.mistral import MistralClient 30 from integrations.openai import OpenAIClient 31 from models.job import FitCategory, JobListing, LLMEvaluation 32 from services.job_importer import JobImporterService 33 34 # --------------------------------------------------------------------------- 35 # Shared constants 36 # --------------------------------------------------------------------------- 37 38 _PROVIDER_MAP: dict[str, tuple[type[Any], str, str]] = { 39 # client class model key env var 40 "anthropic": (AnthropicClient, "claude-haiku-4-5", "ANTHROPIC_API_KEY"), 41 "openai": (OpenAIClient, "gpt-4o-mini", "OPENAI_API_KEY"), 42 "gemini": (GeminiClient, "gemini-3.1-flash-lite-preview", "GEMINI_API_KEY"), 43 "grok": (GrokClient, "grok-3-mini-fast", "XAI_API_KEY"), 44 "deepseek": (DeepSeekClient, "deepseek-chat", "DEEPSEEK_API_KEY"), 45 "mistral": (MistralClient, "mistral-small-latest", "MISTRAL_API_KEY"), 46 } 47 48 # Unpack once here; every test class reads from these names. 49 # To change a model or rotate a key var, edit _PROVIDER_MAP only. 50 _, _ANTHROPIC_MODEL, _ANTHROPIC_KEY = _PROVIDER_MAP["anthropic"] 51 _, _OPENAI_MODEL, _OPENAI_KEY = _PROVIDER_MAP["openai"] 52 _, _GEMINI_MODEL, _GEMINI_KEY = _PROVIDER_MAP["gemini"] 53 _, _GROK_MODEL, _GROK_KEY = _PROVIDER_MAP["grok"] 54 _, _DEEPSEEK_MODEL, _DEEPSEEK_KEY = _PROVIDER_MAP["deepseek"] 55 _, _MISTRAL_MODEL, _MISTRAL_KEY = _PROVIDER_MAP["mistral"] 56 57 _FIXTURES_DIR = Path(__file__).parent / "fixtures" 58 _LIVE_OUTPUT_DIR = Path(__file__).parent / "live_output" 59 60 _TINY_SYSTEM = "You are a helpful assistant." 61 _TINY_USER = "Respond with exactly: OK" 62 63 # Frozen snapshots of the production prompts, copied at spec-implementation time. 64 # These are intentionally NOT symlinked to prompts/ — if the production persona or 65 # system prompt is updated, these tests continue to evaluate the same scenario. 66 # Refresh these files manually when you want the live tests to track a new prompt version. 67 _STRUCTURED_SYSTEM = (_FIXTURES_DIR / "score_jobs_system.txt").read_text(encoding="utf-8") 68 _PERSONA = (_FIXTURES_DIR / "score_jobs_persona.txt").read_text(encoding="utf-8") 69 _USER_TEMPLATE = Template( 70 (_FIXTURES_DIR / "score_jobs_user.jinja2").read_text(encoding="utf-8") 71 ) 72 73 # JobListing fixtures rendered through the same template path as JobScorerService.score_job(). 74 _TEST_JOB = JobListing.model_validate({ 75 "job_title": "Staff NLP / LLM Engineer", 76 "job_summary": ( 77 "Own the end-to-end LLM pipeline for a document understanding product: " 78 "fine-tuning, RAG architecture, and low-latency inference with vLLM. " 79 "Lead a team of 4 ML engineers. Requirements: PyTorch, HuggingFace, " 80 "distributed training, 7+ years NLP/ML experience." 81 ), 82 "company_name": "Acme Language AI", 83 }) 84 85 _TEST_JOB_UNICODE = JobListing.model_validate({ 86 "job_title": "高级 NLP Tech Lead — Soci\u00e9t\u00e9 Texte Inc.", 87 "job_summary": ( 88 "Build 大规模语言模型 inference systems for document AI. Own model lifecycle from " 89 "研究 to production (vLLM, DeepSpeed). Lead team of ML engineers. " 90 "Requirements: PyTorch, 7+ ans d\u2019exp\u00e9rience NLP, distributed training. " 91 "Contact: r\u00e9sum\u00e9@soci\u00e9t\u00e9texte.example.com" 92 ), 93 "company_name": "Soci\u00e9t\u00e9 Texte Inc.", 94 }) 95 96 _STRUCTURED_USER = _USER_TEMPLATE.render( 97 persona=_PERSONA, 98 job_title=_TEST_JOB.job_title, 99 job_description=_TEST_JOB.job_summary, 100 ) 101 102 _STRUCTURED_USER_UNICODE = _USER_TEMPLATE.render( 103 persona=_PERSONA, 104 job_title=_TEST_JOB_UNICODE.job_title, 105 job_description=_TEST_JOB_UNICODE.job_summary, 106 ) 107 108 _BRIGHTDATA_TEST_URL = ( 109 "https://www.linkedin.com/jobs/search/?f_TPR=r86400&geoId=101165590&keywords=machine%20learning" 110 ) 111 112 113 # --------------------------------------------------------------------------- 114 # Skip helper 115 # --------------------------------------------------------------------------- 116 117 118 def _skip_unless_live(provider: str, key_env: str) -> pytest.MarkDecorator: 119 """Return a combined skip marker for --run-live + env var presence. 120 121 A test is skipped when --run-live was not passed, or the provider's API 122 key env var is not set. This lets partial key sets run a subset of providers. 123 124 Args: 125 provider: Human-readable provider name (used in skip reason). 126 key_env: Name of the environment variable holding the provider's API key. 127 128 Returns: 129 A pytest.MarkDecorator that skips the test when either condition is unmet. 130 """ 131 missing_flag = "--run-live" not in sys.argv 132 missing_key = not os.environ.get(key_env) 133 134 reasons: list[str] = [] 135 if missing_flag: 136 reasons.append("pass --run-live to enable") 137 if missing_key: 138 reasons.append(f"{key_env} not set") 139 140 return pytest.mark.skipif( 141 missing_flag or missing_key, 142 reason=f"{provider}: " + "; ".join(reasons) if reasons else "live test skipped", 143 ) 144 145 146 # --------------------------------------------------------------------------- 147 # Anthropic 148 # --------------------------------------------------------------------------- 149 150 151 @_skip_unless_live("anthropic", _ANTHROPIC_KEY) 152 class TestAnthropicLive: 153 """Live integration tests for AnthropicClient — happy path and error handling.""" 154 155 def test_ping(self) -> None: 156 # Validates API key and model name; a wrong model or expired key fails here. 157 client = AnthropicClient(api_key=os.environ[_ANTHROPIC_KEY], model=_ANTHROPIC_MODEL) 158 client.ping(temperature=0.0) 159 160 def test_plain_text_completion(self) -> None: 161 # Verifies response extraction from Anthropic's content[0].text field. 162 client = AnthropicClient(api_key=os.environ[_ANTHROPIC_KEY], model=_ANTHROPIC_MODEL) 163 result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0) 164 assert isinstance(result, str) 165 assert len(result) > 0 166 assert "ok" in result.lower() 167 168 def test_structured_response_parses_to_llm_evaluation(self) -> None: 169 # Verifies tool-use wiring: Anthropic returns structured output via tool_use blocks. 170 client = AnthropicClient(api_key=os.environ[_ANTHROPIC_KEY], model=_ANTHROPIC_MODEL) 171 result = client.complete( 172 system=_STRUCTURED_SYSTEM, 173 user=_STRUCTURED_USER, 174 temperature=0.0, 175 response_schema=LLMEvaluation, 176 ) 177 json.loads(result) 178 evaluation = LLMEvaluation.model_validate_json(result) 179 assert evaluation.fit_category != FitCategory.ERROR 180 assert 0 <= evaluation.score <= 100 181 assert 0 <= evaluation.networking_opportunity <= 3 182 183 def test_last_usage_returns_nonzero_after_complete(self) -> None: 184 # Verifies real SDK response populates _tls token counts via last_usage. 185 client = AnthropicClient(api_key=os.environ[_ANTHROPIC_KEY], model=_ANTHROPIC_MODEL) 186 client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0) 187 in_tok, out_tok = client.last_usage 188 assert in_tok > 0 189 assert out_tok > 0 190 191 def test_temperature_and_seed_accepted(self) -> None: 192 # Anthropic ignores seed but must not raise; validates signature compatibility. 193 client = AnthropicClient(api_key=os.environ[_ANTHROPIC_KEY], model=_ANTHROPIC_MODEL) 194 result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.5, seed=42) 195 assert isinstance(result, str) 196 assert len(result) > 0 197 198 @pytest.mark.timeout(5) 199 def test_invalid_model_raises_api_error(self) -> None: 200 # 4xx from bad model name must be classified as APIError and NOT retried. 201 # Tenacity backoff (min=2s, 5 attempts) would exceed 5s if the error were retried. 202 client = AnthropicClient( 203 api_key=os.environ[_ANTHROPIC_KEY], model="nonexistent-model-xyz" 204 ) 205 with pytest.raises(APIError) as exc_info: 206 client.ping() 207 assert exc_info.value.status_code in {400, 401, 403, 404, 422} 208 assert str(exc_info.value) 209 210 def test_structured_response_with_unicode_input(self) -> None: 211 # Encoding robustness: CJK and accented characters in input must not 212 # garble the JSON round-trip (tool_use input dict -> json.dumps). 213 client = AnthropicClient(api_key=os.environ[_ANTHROPIC_KEY], model=_ANTHROPIC_MODEL) 214 result = client.complete( 215 system=_STRUCTURED_SYSTEM, 216 user=_STRUCTURED_USER_UNICODE, 217 temperature=0.0, 218 response_schema=LLMEvaluation, 219 ) 220 json.loads(result) 221 evaluation = LLMEvaluation.model_validate_json(result) 222 assert any(c.isalpha() for c in evaluation.reasoning) 223 224 225 # --------------------------------------------------------------------------- 226 # OpenAI 227 # --------------------------------------------------------------------------- 228 229 230 @_skip_unless_live("openai", _OPENAI_KEY) 231 class TestOpenAILive: 232 """Live integration tests for OpenAIClient — happy path and error handling.""" 233 234 def test_ping(self) -> None: 235 # Validates API key and model name; a wrong model or expired key fails here. 236 client = OpenAIClient(api_key=os.environ[_OPENAI_KEY], model=_OPENAI_MODEL) 237 client.ping(temperature=0.0) 238 239 def test_plain_text_completion(self) -> None: 240 # Verifies response extraction from choices[0].message.content. 241 client = OpenAIClient(api_key=os.environ[_OPENAI_KEY], model=_OPENAI_MODEL) 242 result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0) 243 assert isinstance(result, str) 244 assert len(result) > 0 245 assert "ok" in result.lower() 246 247 def test_structured_response_parses_to_llm_evaluation(self) -> None: 248 # Verifies beta.chat.completions.parse wiring and parsed.model_dump_json() path. 249 client = OpenAIClient(api_key=os.environ[_OPENAI_KEY], model=_OPENAI_MODEL) 250 result = client.complete( 251 system=_STRUCTURED_SYSTEM, 252 user=_STRUCTURED_USER, 253 temperature=0.0, 254 response_schema=LLMEvaluation, 255 ) 256 json.loads(result) 257 evaluation = LLMEvaluation.model_validate_json(result) 258 assert evaluation.fit_category != FitCategory.ERROR 259 assert 0 <= evaluation.score <= 100 260 assert 0 <= evaluation.networking_opportunity <= 3 261 262 def test_last_usage_returns_nonzero_after_complete(self) -> None: 263 # Verifies real SDK response populates _tls token counts via last_usage. 264 client = OpenAIClient(api_key=os.environ[_OPENAI_KEY], model=_OPENAI_MODEL) 265 client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0) 266 in_tok, out_tok = client.last_usage 267 assert in_tok > 0 268 assert out_tok > 0 269 270 def test_temperature_and_seed_accepted(self) -> None: 271 # OpenAI passes seed to the API; verifies no unexpected kwarg rejection. 272 client = OpenAIClient(api_key=os.environ[_OPENAI_KEY], model=_OPENAI_MODEL) 273 result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.5, seed=42) 274 assert isinstance(result, str) 275 assert len(result) > 0 276 277 @pytest.mark.timeout(5) 278 def test_invalid_model_raises_api_error(self) -> None: 279 # 4xx from bad model name must be classified as APIError and NOT retried. 280 # Tenacity backoff (min=2s, 5 attempts) would exceed 5s if the error were retried. 281 client = OpenAIClient(api_key=os.environ[_OPENAI_KEY], model="nonexistent-model-xyz") 282 with pytest.raises(APIError) as exc_info: 283 client.ping() 284 assert exc_info.value.status_code in {400, 401, 403, 404, 422} 285 assert str(exc_info.value) 286 287 def test_structured_response_with_unicode_input(self) -> None: 288 # Encoding robustness: CJK and accented input must survive the beta parse path. 289 client = OpenAIClient(api_key=os.environ[_OPENAI_KEY], model=_OPENAI_MODEL) 290 result = client.complete( 291 system=_STRUCTURED_SYSTEM, 292 user=_STRUCTURED_USER_UNICODE, 293 temperature=0.0, 294 response_schema=LLMEvaluation, 295 ) 296 json.loads(result) 297 evaluation = LLMEvaluation.model_validate_json(result) 298 assert any(c.isalpha() for c in evaluation.reasoning) 299 300 301 # --------------------------------------------------------------------------- 302 # Gemini 303 # --------------------------------------------------------------------------- 304 305 306 @_skip_unless_live("gemini", _GEMINI_KEY) 307 class TestGeminiLive: 308 """Live integration tests for GeminiClient — happy path and error handling.""" 309 310 def test_ping(self) -> None: 311 # Validates API key and model name; a wrong model or expired key fails here. 312 client = GeminiClient(api_key=os.environ[_GEMINI_KEY], model=_GEMINI_MODEL) 313 client.ping(temperature=0.0) 314 315 def test_plain_text_completion(self) -> None: 316 # Verifies response extraction from response.text. 317 client = GeminiClient(api_key=os.environ[_GEMINI_KEY], model=_GEMINI_MODEL) 318 result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0) 319 assert isinstance(result, str) 320 assert len(result) > 0 321 assert "ok" in result.lower() 322 323 def test_structured_response_parses_to_llm_evaluation(self) -> None: 324 # Verifies response_mime_type=application/json + response_schema param wiring. 325 client = GeminiClient(api_key=os.environ[_GEMINI_KEY], model=_GEMINI_MODEL) 326 result = client.complete( 327 system=_STRUCTURED_SYSTEM, 328 user=_STRUCTURED_USER, 329 temperature=0.0, 330 response_schema=LLMEvaluation, 331 ) 332 json.loads(result) 333 evaluation = LLMEvaluation.model_validate_json(result) 334 assert evaluation.fit_category != FitCategory.ERROR 335 assert 0 <= evaluation.score <= 100 336 assert 0 <= evaluation.networking_opportunity <= 3 337 338 def test_last_usage_returns_nonzero_after_complete(self) -> None: 339 # Verifies real SDK response populates _tls token counts via last_usage. 340 client = GeminiClient(api_key=os.environ[_GEMINI_KEY], model=_GEMINI_MODEL) 341 client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0) 342 in_tok, out_tok = client.last_usage 343 assert in_tok > 0 344 assert out_tok > 0 345 346 def test_temperature_and_seed_accepted(self) -> None: 347 # Gemini passes seed via GenerateContentConfig; verifies no kwarg rejection. 348 client = GeminiClient(api_key=os.environ[_GEMINI_KEY], model=_GEMINI_MODEL) 349 result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.5, seed=42) 350 assert isinstance(result, str) 351 assert len(result) > 0 352 353 @pytest.mark.timeout(5) 354 def test_invalid_model_raises_api_error(self) -> None: 355 # 4xx from bad model name must be classified as APIError and NOT retried. 356 # Tenacity backoff (min=2s, 5 attempts) would exceed 5s if the error were retried. 357 client = GeminiClient(api_key=os.environ[_GEMINI_KEY], model="nonexistent-model-xyz") 358 with pytest.raises(APIError) as exc_info: 359 client.ping() 360 assert exc_info.value.status_code in {400, 401, 403, 404, 422} 361 assert str(exc_info.value) 362 363 def test_structured_response_with_unicode_input(self) -> None: 364 # Encoding robustness: CJK and accented input must survive the JSON response path. 365 client = GeminiClient(api_key=os.environ[_GEMINI_KEY], model=_GEMINI_MODEL) 366 result = client.complete( 367 system=_STRUCTURED_SYSTEM, 368 user=_STRUCTURED_USER_UNICODE, 369 temperature=0.0, 370 response_schema=LLMEvaluation, 371 ) 372 json.loads(result) 373 evaluation = LLMEvaluation.model_validate_json(result) 374 assert any(c.isalpha() for c in evaluation.reasoning) 375 376 377 # --------------------------------------------------------------------------- 378 # Grok 379 # --------------------------------------------------------------------------- 380 381 382 @_skip_unless_live("grok", _GROK_KEY) 383 class TestGrokLive: 384 """Live integration tests for GrokClient — happy path and error handling.""" 385 386 def test_ping(self) -> None: 387 # Validates API key and model name; a wrong model or expired key fails here. 388 client = GrokClient(api_key=os.environ[_GROK_KEY], model=_GROK_MODEL) 389 client.ping(temperature=0.0) 390 391 def test_plain_text_completion(self) -> None: 392 # Verifies response extraction from choices[0].message.content via xAI base URL. 393 client = GrokClient(api_key=os.environ[_GROK_KEY], model=_GROK_MODEL) 394 result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0) 395 assert isinstance(result, str) 396 assert len(result) > 0 397 assert "ok" in result.lower() 398 399 def test_structured_response_parses_to_llm_evaluation(self) -> None: 400 # Verifies beta.chat.completions.parse wiring via the xAI (Grok) base URL. 401 client = GrokClient(api_key=os.environ[_GROK_KEY], model=_GROK_MODEL) 402 result = client.complete( 403 system=_STRUCTURED_SYSTEM, 404 user=_STRUCTURED_USER, 405 temperature=0.0, 406 response_schema=LLMEvaluation, 407 ) 408 json.loads(result) 409 evaluation = LLMEvaluation.model_validate_json(result) 410 assert evaluation.fit_category != FitCategory.ERROR 411 assert 0 <= evaluation.score <= 100 412 assert 0 <= evaluation.networking_opportunity <= 3 413 414 def test_last_usage_returns_nonzero_after_complete(self) -> None: 415 # Verifies real SDK response populates _tls token counts via last_usage. 416 client = GrokClient(api_key=os.environ[_GROK_KEY], model=_GROK_MODEL) 417 client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0) 418 in_tok, out_tok = client.last_usage 419 assert in_tok > 0 420 assert out_tok > 0 421 422 def test_temperature_and_seed_accepted(self) -> None: 423 # Grok passes seed to the xAI API; verifies no unexpected kwarg rejection. 424 client = GrokClient(api_key=os.environ[_GROK_KEY], model=_GROK_MODEL) 425 result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.5, seed=42) 426 assert isinstance(result, str) 427 assert len(result) > 0 428 429 @pytest.mark.timeout(5) 430 def test_invalid_model_raises_api_error(self) -> None: 431 # 4xx from bad model name must be classified as APIError and NOT retried. 432 # Tenacity backoff (min=2s, 5 attempts) would exceed 5s if the error were retried. 433 client = GrokClient(api_key=os.environ[_GROK_KEY], model="nonexistent-model-xyz") 434 with pytest.raises(APIError) as exc_info: 435 client.ping() 436 assert exc_info.value.status_code in {400, 401, 403, 404, 422} 437 assert str(exc_info.value) 438 439 def test_structured_response_with_unicode_input(self) -> None: 440 # Encoding robustness: CJK and accented input must survive the beta parse path. 441 client = GrokClient(api_key=os.environ[_GROK_KEY], model=_GROK_MODEL) 442 result = client.complete( 443 system=_STRUCTURED_SYSTEM, 444 user=_STRUCTURED_USER_UNICODE, 445 temperature=0.0, 446 response_schema=LLMEvaluation, 447 ) 448 json.loads(result) 449 evaluation = LLMEvaluation.model_validate_json(result) 450 assert any(c.isalpha() for c in evaluation.reasoning) 451 452 453 # --------------------------------------------------------------------------- 454 # DeepSeek 455 # --------------------------------------------------------------------------- 456 457 458 @_skip_unless_live("deepseek", _DEEPSEEK_KEY) 459 class TestDeepSeekLive: 460 """Live integration tests for DeepSeekClient — happy path and error handling.""" 461 462 def test_ping(self) -> None: 463 # Validates API key and model name; a wrong model or expired key fails here. 464 client = DeepSeekClient(api_key=os.environ[_DEEPSEEK_KEY], model=_DEEPSEEK_MODEL) 465 client.ping(temperature=0.0) 466 467 def test_plain_text_completion(self) -> None: 468 # Verifies response extraction from choices[0].message.content via DeepSeek base URL. 469 client = DeepSeekClient(api_key=os.environ[_DEEPSEEK_KEY], model=_DEEPSEEK_MODEL) 470 result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0) 471 assert isinstance(result, str) 472 assert len(result) > 0 473 assert "ok" in result.lower() 474 475 def test_structured_response_parses_to_llm_evaluation(self) -> None: 476 # Verifies json_object mode + schema injection into system prompt. 477 # DeepSeek may not strictly comply with the schema; json.loads is the primary guard. 478 client = DeepSeekClient(api_key=os.environ[_DEEPSEEK_KEY], model=_DEEPSEEK_MODEL) 479 result = client.complete( 480 system=_STRUCTURED_SYSTEM, 481 user=_STRUCTURED_USER, 482 temperature=0.0, 483 response_schema=LLMEvaluation, 484 ) 485 json.loads(result) 486 evaluation = LLMEvaluation.model_validate_json(result) 487 assert evaluation.fit_category != FitCategory.ERROR 488 assert 0 <= evaluation.score <= 100 489 assert 0 <= evaluation.networking_opportunity <= 3 490 491 def test_last_usage_returns_nonzero_after_complete(self) -> None: 492 # Verifies real SDK response populates _tls token counts via last_usage. 493 client = DeepSeekClient(api_key=os.environ[_DEEPSEEK_KEY], model=_DEEPSEEK_MODEL) 494 client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0) 495 in_tok, out_tok = client.last_usage 496 assert in_tok > 0 497 assert out_tok > 0 498 499 def test_temperature_and_seed_accepted(self) -> None: 500 # DeepSeek passes seed to the API; verifies no unexpected kwarg rejection. 501 client = DeepSeekClient(api_key=os.environ[_DEEPSEEK_KEY], model=_DEEPSEEK_MODEL) 502 result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.5, seed=42) 503 assert isinstance(result, str) 504 assert len(result) > 0 505 506 @pytest.mark.timeout(5) 507 def test_invalid_model_raises_api_error(self) -> None: 508 # 4xx from bad model name must be classified as APIError and NOT retried. 509 # Tenacity backoff (min=2s, 5 attempts) would exceed 5s if the error were retried. 510 client = DeepSeekClient( 511 api_key=os.environ[_DEEPSEEK_KEY], model="nonexistent-model-xyz" 512 ) 513 with pytest.raises(APIError) as exc_info: 514 client.ping() 515 assert exc_info.value.status_code in {400, 401, 403, 404, 422} 516 assert str(exc_info.value) 517 518 def test_structured_response_with_unicode_input(self) -> None: 519 # Encoding robustness: schema is injected as JSON string into system prompt, 520 # so CJK/accented input must survive json.dumps -> API -> json.loads round-trip. 521 client = DeepSeekClient(api_key=os.environ[_DEEPSEEK_KEY], model=_DEEPSEEK_MODEL) 522 result = client.complete( 523 system=_STRUCTURED_SYSTEM, 524 user=_STRUCTURED_USER_UNICODE, 525 temperature=0.0, 526 response_schema=LLMEvaluation, 527 ) 528 json.loads(result) 529 evaluation = LLMEvaluation.model_validate_json(result) 530 assert any(c.isalpha() for c in evaluation.reasoning) 531 532 533 # --------------------------------------------------------------------------- 534 # Mistral 535 # --------------------------------------------------------------------------- 536 537 538 @_skip_unless_live("mistral", _MISTRAL_KEY) 539 class TestMistralLive: 540 """Live integration tests for MistralClient — happy path and error handling.""" 541 542 def test_ping(self) -> None: 543 # Validates API key and model name; a wrong model or expired key fails here. 544 client = MistralClient(api_key=os.environ[_MISTRAL_KEY], model=_MISTRAL_MODEL) 545 client.ping(temperature=0.0) 546 547 def test_plain_text_completion(self) -> None: 548 # Verifies response extraction from choices[0].message.content via the native SDK. 549 client = MistralClient(api_key=os.environ[_MISTRAL_KEY], model=_MISTRAL_MODEL) 550 result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0) 551 assert isinstance(result, str) 552 assert len(result) > 0 553 assert "ok" in result.lower() 554 555 def test_structured_response_parses_to_llm_evaluation(self) -> None: 556 # Verifies structured output via the native SDK's chat.parse() with a Pydantic model. 557 client = MistralClient(api_key=os.environ[_MISTRAL_KEY], model=_MISTRAL_MODEL) 558 result = client.complete( 559 system=_STRUCTURED_SYSTEM, 560 user=_STRUCTURED_USER, 561 temperature=0.0, 562 response_schema=LLMEvaluation, 563 ) 564 json.loads(result) 565 evaluation = LLMEvaluation.model_validate_json(result) 566 assert evaluation.fit_category != FitCategory.ERROR 567 assert 0 <= evaluation.score <= 100 568 assert 0 <= evaluation.networking_opportunity <= 3 569 570 def test_last_usage_returns_nonzero_after_complete(self) -> None: 571 # Verifies real SDK response populates _tls token counts via last_usage. 572 client = MistralClient(api_key=os.environ[_MISTRAL_KEY], model=_MISTRAL_MODEL) 573 client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0) 574 in_tok, out_tok = client.last_usage 575 assert in_tok > 0 576 assert out_tok > 0 577 578 def test_temperature_and_seed_accepted(self) -> None: 579 # Mistral passes seed to the API; verifies no unexpected kwarg rejection. 580 client = MistralClient(api_key=os.environ[_MISTRAL_KEY], model=_MISTRAL_MODEL) 581 result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.5, seed=42) 582 assert isinstance(result, str) 583 assert len(result) > 0 584 585 @pytest.mark.timeout(5) 586 def test_invalid_model_raises_api_error(self) -> None: 587 # 4xx from bad model name must be classified as APIError and NOT retried. 588 # Tenacity backoff (min=2s, 5 attempts) would exceed 5s if the error were retried. 589 client = MistralClient(api_key=os.environ[_MISTRAL_KEY], model="nonexistent-model-xyz") 590 with pytest.raises(APIError) as exc_info: 591 client.ping() 592 assert exc_info.value.status_code in {400, 401, 403, 404, 422} 593 assert str(exc_info.value) 594 595 def test_structured_response_with_unicode_input(self) -> None: 596 # Encoding robustness: CJK and accented input must survive chat.parse() round-trip. 597 client = MistralClient(api_key=os.environ[_MISTRAL_KEY], model=_MISTRAL_MODEL) 598 result = client.complete( 599 system=_STRUCTURED_SYSTEM, 600 user=_STRUCTURED_USER_UNICODE, 601 temperature=0.0, 602 response_schema=LLMEvaluation, 603 ) 604 json.loads(result) 605 evaluation = LLMEvaluation.model_validate_json(result) 606 assert any(c.isalpha() for c in evaluation.reasoning) 607 608 609 # --------------------------------------------------------------------------- 610 # BrightData — trigger / check_status / fetch live tests 611 # --------------------------------------------------------------------------- 612 613 614 @pytest.fixture(scope="module") 615 def brightdata_snapshot_id() -> str: 616 """Trigger a small scrape and return the snapshot ID. 617 618 This is the onboarding gate test — fast, cheap (limit_per_input=1), 619 proves auth and API connectivity without waiting for results. 620 """ 621 client = BrightDataClient(token=os.environ["BRIGHTDATA_TOKEN"]) 622 snapshot_id = asyncio.run(client.trigger([_BRIGHTDATA_TEST_URL], limit_per_input=1)) 623 _LIVE_OUTPUT_DIR.mkdir(exist_ok=True) 624 (_LIVE_OUTPUT_DIR / "brightdata_snapshot_id.txt").write_text(snapshot_id, encoding="utf-8") 625 return snapshot_id 626 627 628 class _ReplayClient: 629 """Test double that replays pre-fetched data as a BrightDataProtocol.""" 630 631 def __init__(self, jobs: list[dict[str, Any]]) -> None: 632 self._jobs = jobs 633 634 async def trigger( 635 self, urls: list[str], *, limit_per_input: int | None = None 636 ) -> str: 637 return "sd_replay" 638 639 async def check_status(self, snapshot_id: str) -> str | None: 640 return "ready" 641 642 async def fetch(self, snapshot_id: str) -> list[dict[str, Any]]: 643 return self._jobs 644 645 646 @_skip_unless_live("brightdata", "BRIGHTDATA_TOKEN") 647 class TestBrightDataLive: 648 """Live integration tests for BrightDataClient — trigger, check_status, fetch.""" 649 650 @pytest.mark.timeout(30) 651 def test_trigger_and_check_status( 652 self, brightdata_snapshot_id: str 653 ) -> None: 654 # Lightweight onboarding gate: trigger returns a snapshot ID, and 655 # check_status returns a valid status string (not an error). 656 assert isinstance(brightdata_snapshot_id, str) 657 assert brightdata_snapshot_id.startswith("s") # BrightData IDs start with "s" 658 659 client = BrightDataClient(token=os.environ["BRIGHTDATA_TOKEN"]) 660 status = asyncio.run(client.check_status(brightdata_snapshot_id)) 661 # Status should be a known string or None — not an exception. 662 assert status is None or isinstance(status, str) 663 664 @pytest.mark.timeout(30) 665 def test_check_status_nonexistent_snapshot(self) -> None: 666 # A made-up snapshot ID should return None (not found), not crash. 667 client = BrightDataClient(token=os.environ["BRIGHTDATA_TOKEN"]) 668 status = asyncio.run(client.check_status("sd_nonexistent_000000")) 669 assert status is None 670 671 @pytest.mark.timeout(30) 672 def test_dedup_on_real_data( 673 self, brightdata_snapshot_id: str 674 ) -> None: 675 # Catches dedup logic breaking on real data, e.g. job_posting_id returned 676 # as int not str (a real past bug where int vs str dedup keys diverged). 677 # Uses pre-fetched data via _ReplayClient since snapshot may not be ready. 678 sample_jobs = [ 679 {"job_posting_id": "111", "url": "https://example.com/111", "job_title": "A"}, 680 {"job_posting_id": "111", "url": "https://example.com/111-dup", "job_title": "A dup"}, 681 {"job_posting_id": "222", "url": "https://example.com/222", "job_title": "B"}, 682 ] 683 replay = _ReplayClient(sample_jobs) 684 service = JobImporterService(replay) 685 result, total_fetched = service.deduplicate(sample_jobs) 686 ids = [str(j.get("job_posting_id") or j.get("url") or "") for j in result] 687 assert len(ids) == len(set(ids)) 688 assert len(result) <= total_fetched 689 assert total_fetched == len(sample_jobs)