Cradicle Explorer

/ tests / test_providers_live.py
test_providers_live.py
  1  """Live provider integration tests — skipped by default, run with --run-live.
  2  
  3  All tests make real API calls and require valid API keys.
  4  
  5  Usage:
  6      uv run pytest tests/test_providers_live.py --run-live -v
  7      uv run pytest tests/test_providers_live.py --run-live -v -k anthropic
  8      uv run pytest tests/test_providers_live.py --run-live -v -k BrightData
  9  """
 10  
 11  __all__: list[str] = []
 12  
 13  import asyncio
 14  import json
 15  import os
 16  import sys
 17  from pathlib import Path
 18  from typing import Any
 19  
 20  import pytest
 21  from jinja2 import Template
 22  
 23  from exceptions import APIError
 24  from integrations.anthropic import AnthropicClient
 25  from integrations.brightdata import BrightDataClient
 26  from integrations.deepseek import DeepSeekClient
 27  from integrations.gemini import GeminiClient
 28  from integrations.grok import GrokClient
 29  from integrations.mistral import MistralClient
 30  from integrations.openai import OpenAIClient
 31  from models.job import FitCategory, JobListing, LLMEvaluation
 32  from services.job_importer import JobImporterService
 33  
 34  # ---------------------------------------------------------------------------
 35  # Shared constants
 36  # ---------------------------------------------------------------------------
 37  
 38  _PROVIDER_MAP: dict[str, tuple[type[Any], str, str]] = {
 39      #            client class      model                          key env var
 40      "anthropic": (AnthropicClient, "claude-haiku-4-5",           "ANTHROPIC_API_KEY"),
 41      "openai":    (OpenAIClient,    "gpt-4o-mini",                "OPENAI_API_KEY"),
 42      "gemini":    (GeminiClient,    "gemini-3.1-flash-lite-preview",      "GEMINI_API_KEY"),
 43      "grok":      (GrokClient,      "grok-3-mini-fast",           "XAI_API_KEY"),
 44      "deepseek":  (DeepSeekClient,  "deepseek-chat",              "DEEPSEEK_API_KEY"),
 45      "mistral":   (MistralClient,   "mistral-small-latest",       "MISTRAL_API_KEY"),
 46  }
 47  
 48  # Unpack once here; every test class reads from these names.
 49  # To change a model or rotate a key var, edit _PROVIDER_MAP only.
 50  _, _ANTHROPIC_MODEL, _ANTHROPIC_KEY = _PROVIDER_MAP["anthropic"]
 51  _, _OPENAI_MODEL, _OPENAI_KEY = _PROVIDER_MAP["openai"]
 52  _, _GEMINI_MODEL, _GEMINI_KEY = _PROVIDER_MAP["gemini"]
 53  _, _GROK_MODEL, _GROK_KEY = _PROVIDER_MAP["grok"]
 54  _, _DEEPSEEK_MODEL, _DEEPSEEK_KEY = _PROVIDER_MAP["deepseek"]
 55  _, _MISTRAL_MODEL, _MISTRAL_KEY = _PROVIDER_MAP["mistral"]
 56  
 57  _FIXTURES_DIR = Path(__file__).parent / "fixtures"
 58  _LIVE_OUTPUT_DIR = Path(__file__).parent / "live_output"
 59  
 60  _TINY_SYSTEM = "You are a helpful assistant."
 61  _TINY_USER = "Respond with exactly: OK"
 62  
 63  # Frozen snapshots of the production prompts, copied at spec-implementation time.
 64  # These are intentionally NOT symlinked to prompts/ — if the production persona or
 65  # system prompt is updated, these tests continue to evaluate the same scenario.
 66  # Refresh these files manually when you want the live tests to track a new prompt version.
 67  _STRUCTURED_SYSTEM = (_FIXTURES_DIR / "score_jobs_system.txt").read_text(encoding="utf-8")
 68  _PERSONA = (_FIXTURES_DIR / "score_jobs_persona.txt").read_text(encoding="utf-8")
 69  _USER_TEMPLATE = Template(
 70      (_FIXTURES_DIR / "score_jobs_user.jinja2").read_text(encoding="utf-8")
 71  )
 72  
 73  # JobListing fixtures rendered through the same template path as JobScorerService.score_job().
 74  _TEST_JOB = JobListing.model_validate({
 75      "job_title": "Staff NLP / LLM Engineer",
 76      "job_summary": (
 77          "Own the end-to-end LLM pipeline for a document understanding product: "
 78          "fine-tuning, RAG architecture, and low-latency inference with vLLM. "
 79          "Lead a team of 4 ML engineers. Requirements: PyTorch, HuggingFace, "
 80          "distributed training, 7+ years NLP/ML experience."
 81      ),
 82      "company_name": "Acme Language AI",
 83  })
 84  
 85  _TEST_JOB_UNICODE = JobListing.model_validate({
 86      "job_title": "高级 NLP Tech Lead — Soci\u00e9t\u00e9 Texte Inc.",
 87      "job_summary": (
 88          "Build 大规模语言模型 inference systems for document AI. Own model lifecycle from "
 89          "研究 to production (vLLM, DeepSpeed). Lead team of ML engineers. "
 90          "Requirements: PyTorch, 7+ ans d\u2019exp\u00e9rience NLP, distributed training. "
 91          "Contact: r\u00e9sum\u00e9@soci\u00e9t\u00e9texte.example.com"
 92      ),
 93      "company_name": "Soci\u00e9t\u00e9 Texte Inc.",
 94  })
 95  
 96  _STRUCTURED_USER = _USER_TEMPLATE.render(
 97      persona=_PERSONA,
 98      job_title=_TEST_JOB.job_title,
 99      job_description=_TEST_JOB.job_summary,
100  )
101  
102  _STRUCTURED_USER_UNICODE = _USER_TEMPLATE.render(
103      persona=_PERSONA,
104      job_title=_TEST_JOB_UNICODE.job_title,
105      job_description=_TEST_JOB_UNICODE.job_summary,
106  )
107  
108  _BRIGHTDATA_TEST_URL = (
109      "https://www.linkedin.com/jobs/search/?f_TPR=r86400&geoId=101165590&keywords=machine%20learning"
110  )
111  
112  
113  # ---------------------------------------------------------------------------
114  # Skip helper
115  # ---------------------------------------------------------------------------
116  
117  
118  def _skip_unless_live(provider: str, key_env: str) -> pytest.MarkDecorator:
119      """Return a combined skip marker for --run-live + env var presence.
120  
121      A test is skipped when --run-live was not passed, or the provider's API
122      key env var is not set. This lets partial key sets run a subset of providers.
123  
124      Args:
125          provider: Human-readable provider name (used in skip reason).
126          key_env: Name of the environment variable holding the provider's API key.
127  
128      Returns:
129          A pytest.MarkDecorator that skips the test when either condition is unmet.
130      """
131      missing_flag = "--run-live" not in sys.argv
132      missing_key = not os.environ.get(key_env)
133  
134      reasons: list[str] = []
135      if missing_flag:
136          reasons.append("pass --run-live to enable")
137      if missing_key:
138          reasons.append(f"{key_env} not set")
139  
140      return pytest.mark.skipif(
141          missing_flag or missing_key,
142          reason=f"{provider}: " + "; ".join(reasons) if reasons else "live test skipped",
143      )
144  
145  
146  # ---------------------------------------------------------------------------
147  # Anthropic
148  # ---------------------------------------------------------------------------
149  
150  
151  @_skip_unless_live("anthropic", _ANTHROPIC_KEY)
152  class TestAnthropicLive:
153      """Live integration tests for AnthropicClient — happy path and error handling."""
154  
155      def test_ping(self) -> None:
156          # Validates API key and model name; a wrong model or expired key fails here.
157          client = AnthropicClient(api_key=os.environ[_ANTHROPIC_KEY], model=_ANTHROPIC_MODEL)
158          client.ping(temperature=0.0)
159  
160      def test_plain_text_completion(self) -> None:
161          # Verifies response extraction from Anthropic's content[0].text field.
162          client = AnthropicClient(api_key=os.environ[_ANTHROPIC_KEY], model=_ANTHROPIC_MODEL)
163          result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0)
164          assert isinstance(result, str)
165          assert len(result) > 0
166          assert "ok" in result.lower()
167  
168      def test_structured_response_parses_to_llm_evaluation(self) -> None:
169          # Verifies tool-use wiring: Anthropic returns structured output via tool_use blocks.
170          client = AnthropicClient(api_key=os.environ[_ANTHROPIC_KEY], model=_ANTHROPIC_MODEL)
171          result = client.complete(
172              system=_STRUCTURED_SYSTEM,
173              user=_STRUCTURED_USER,
174              temperature=0.0,
175              response_schema=LLMEvaluation,
176          )
177          json.loads(result)
178          evaluation = LLMEvaluation.model_validate_json(result)
179          assert evaluation.fit_category != FitCategory.ERROR
180          assert 0 <= evaluation.score <= 100
181          assert 0 <= evaluation.networking_opportunity <= 3
182  
183      def test_last_usage_returns_nonzero_after_complete(self) -> None:
184          # Verifies real SDK response populates _tls token counts via last_usage.
185          client = AnthropicClient(api_key=os.environ[_ANTHROPIC_KEY], model=_ANTHROPIC_MODEL)
186          client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0)
187          in_tok, out_tok = client.last_usage
188          assert in_tok > 0
189          assert out_tok > 0
190  
191      def test_temperature_and_seed_accepted(self) -> None:
192          # Anthropic ignores seed but must not raise; validates signature compatibility.
193          client = AnthropicClient(api_key=os.environ[_ANTHROPIC_KEY], model=_ANTHROPIC_MODEL)
194          result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.5, seed=42)
195          assert isinstance(result, str)
196          assert len(result) > 0
197  
198      @pytest.mark.timeout(5)
199      def test_invalid_model_raises_api_error(self) -> None:
200          # 4xx from bad model name must be classified as APIError and NOT retried.
201          # Tenacity backoff (min=2s, 5 attempts) would exceed 5s if the error were retried.
202          client = AnthropicClient(
203              api_key=os.environ[_ANTHROPIC_KEY], model="nonexistent-model-xyz"
204          )
205          with pytest.raises(APIError) as exc_info:
206              client.ping()
207          assert exc_info.value.status_code in {400, 401, 403, 404, 422}
208          assert str(exc_info.value)
209  
210      def test_structured_response_with_unicode_input(self) -> None:
211          # Encoding robustness: CJK and accented characters in input must not
212          # garble the JSON round-trip (tool_use input dict -> json.dumps).
213          client = AnthropicClient(api_key=os.environ[_ANTHROPIC_KEY], model=_ANTHROPIC_MODEL)
214          result = client.complete(
215              system=_STRUCTURED_SYSTEM,
216              user=_STRUCTURED_USER_UNICODE,
217              temperature=0.0,
218              response_schema=LLMEvaluation,
219          )
220          json.loads(result)
221          evaluation = LLMEvaluation.model_validate_json(result)
222          assert any(c.isalpha() for c in evaluation.reasoning)
223  
224  
225  # ---------------------------------------------------------------------------
226  # OpenAI
227  # ---------------------------------------------------------------------------
228  
229  
230  @_skip_unless_live("openai", _OPENAI_KEY)
231  class TestOpenAILive:
232      """Live integration tests for OpenAIClient — happy path and error handling."""
233  
234      def test_ping(self) -> None:
235          # Validates API key and model name; a wrong model or expired key fails here.
236          client = OpenAIClient(api_key=os.environ[_OPENAI_KEY], model=_OPENAI_MODEL)
237          client.ping(temperature=0.0)
238  
239      def test_plain_text_completion(self) -> None:
240          # Verifies response extraction from choices[0].message.content.
241          client = OpenAIClient(api_key=os.environ[_OPENAI_KEY], model=_OPENAI_MODEL)
242          result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0)
243          assert isinstance(result, str)
244          assert len(result) > 0
245          assert "ok" in result.lower()
246  
247      def test_structured_response_parses_to_llm_evaluation(self) -> None:
248          # Verifies beta.chat.completions.parse wiring and parsed.model_dump_json() path.
249          client = OpenAIClient(api_key=os.environ[_OPENAI_KEY], model=_OPENAI_MODEL)
250          result = client.complete(
251              system=_STRUCTURED_SYSTEM,
252              user=_STRUCTURED_USER,
253              temperature=0.0,
254              response_schema=LLMEvaluation,
255          )
256          json.loads(result)
257          evaluation = LLMEvaluation.model_validate_json(result)
258          assert evaluation.fit_category != FitCategory.ERROR
259          assert 0 <= evaluation.score <= 100
260          assert 0 <= evaluation.networking_opportunity <= 3
261  
262      def test_last_usage_returns_nonzero_after_complete(self) -> None:
263          # Verifies real SDK response populates _tls token counts via last_usage.
264          client = OpenAIClient(api_key=os.environ[_OPENAI_KEY], model=_OPENAI_MODEL)
265          client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0)
266          in_tok, out_tok = client.last_usage
267          assert in_tok > 0
268          assert out_tok > 0
269  
270      def test_temperature_and_seed_accepted(self) -> None:
271          # OpenAI passes seed to the API; verifies no unexpected kwarg rejection.
272          client = OpenAIClient(api_key=os.environ[_OPENAI_KEY], model=_OPENAI_MODEL)
273          result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.5, seed=42)
274          assert isinstance(result, str)
275          assert len(result) > 0
276  
277      @pytest.mark.timeout(5)
278      def test_invalid_model_raises_api_error(self) -> None:
279          # 4xx from bad model name must be classified as APIError and NOT retried.
280          # Tenacity backoff (min=2s, 5 attempts) would exceed 5s if the error were retried.
281          client = OpenAIClient(api_key=os.environ[_OPENAI_KEY], model="nonexistent-model-xyz")
282          with pytest.raises(APIError) as exc_info:
283              client.ping()
284          assert exc_info.value.status_code in {400, 401, 403, 404, 422}
285          assert str(exc_info.value)
286  
287      def test_structured_response_with_unicode_input(self) -> None:
288          # Encoding robustness: CJK and accented input must survive the beta parse path.
289          client = OpenAIClient(api_key=os.environ[_OPENAI_KEY], model=_OPENAI_MODEL)
290          result = client.complete(
291              system=_STRUCTURED_SYSTEM,
292              user=_STRUCTURED_USER_UNICODE,
293              temperature=0.0,
294              response_schema=LLMEvaluation,
295          )
296          json.loads(result)
297          evaluation = LLMEvaluation.model_validate_json(result)
298          assert any(c.isalpha() for c in evaluation.reasoning)
299  
300  
301  # ---------------------------------------------------------------------------
302  # Gemini
303  # ---------------------------------------------------------------------------
304  
305  
306  @_skip_unless_live("gemini", _GEMINI_KEY)
307  class TestGeminiLive:
308      """Live integration tests for GeminiClient — happy path and error handling."""
309  
310      def test_ping(self) -> None:
311          # Validates API key and model name; a wrong model or expired key fails here.
312          client = GeminiClient(api_key=os.environ[_GEMINI_KEY], model=_GEMINI_MODEL)
313          client.ping(temperature=0.0)
314  
315      def test_plain_text_completion(self) -> None:
316          # Verifies response extraction from response.text.
317          client = GeminiClient(api_key=os.environ[_GEMINI_KEY], model=_GEMINI_MODEL)
318          result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0)
319          assert isinstance(result, str)
320          assert len(result) > 0
321          assert "ok" in result.lower()
322  
323      def test_structured_response_parses_to_llm_evaluation(self) -> None:
324          # Verifies response_mime_type=application/json + response_schema param wiring.
325          client = GeminiClient(api_key=os.environ[_GEMINI_KEY], model=_GEMINI_MODEL)
326          result = client.complete(
327              system=_STRUCTURED_SYSTEM,
328              user=_STRUCTURED_USER,
329              temperature=0.0,
330              response_schema=LLMEvaluation,
331          )
332          json.loads(result)
333          evaluation = LLMEvaluation.model_validate_json(result)
334          assert evaluation.fit_category != FitCategory.ERROR
335          assert 0 <= evaluation.score <= 100
336          assert 0 <= evaluation.networking_opportunity <= 3
337  
338      def test_last_usage_returns_nonzero_after_complete(self) -> None:
339          # Verifies real SDK response populates _tls token counts via last_usage.
340          client = GeminiClient(api_key=os.environ[_GEMINI_KEY], model=_GEMINI_MODEL)
341          client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0)
342          in_tok, out_tok = client.last_usage
343          assert in_tok > 0
344          assert out_tok > 0
345  
346      def test_temperature_and_seed_accepted(self) -> None:
347          # Gemini passes seed via GenerateContentConfig; verifies no kwarg rejection.
348          client = GeminiClient(api_key=os.environ[_GEMINI_KEY], model=_GEMINI_MODEL)
349          result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.5, seed=42)
350          assert isinstance(result, str)
351          assert len(result) > 0
352  
353      @pytest.mark.timeout(5)
354      def test_invalid_model_raises_api_error(self) -> None:
355          # 4xx from bad model name must be classified as APIError and NOT retried.
356          # Tenacity backoff (min=2s, 5 attempts) would exceed 5s if the error were retried.
357          client = GeminiClient(api_key=os.environ[_GEMINI_KEY], model="nonexistent-model-xyz")
358          with pytest.raises(APIError) as exc_info:
359              client.ping()
360          assert exc_info.value.status_code in {400, 401, 403, 404, 422}
361          assert str(exc_info.value)
362  
363      def test_structured_response_with_unicode_input(self) -> None:
364          # Encoding robustness: CJK and accented input must survive the JSON response path.
365          client = GeminiClient(api_key=os.environ[_GEMINI_KEY], model=_GEMINI_MODEL)
366          result = client.complete(
367              system=_STRUCTURED_SYSTEM,
368              user=_STRUCTURED_USER_UNICODE,
369              temperature=0.0,
370              response_schema=LLMEvaluation,
371          )
372          json.loads(result)
373          evaluation = LLMEvaluation.model_validate_json(result)
374          assert any(c.isalpha() for c in evaluation.reasoning)
375  
376  
377  # ---------------------------------------------------------------------------
378  # Grok
379  # ---------------------------------------------------------------------------
380  
381  
382  @_skip_unless_live("grok", _GROK_KEY)
383  class TestGrokLive:
384      """Live integration tests for GrokClient — happy path and error handling."""
385  
386      def test_ping(self) -> None:
387          # Validates API key and model name; a wrong model or expired key fails here.
388          client = GrokClient(api_key=os.environ[_GROK_KEY], model=_GROK_MODEL)
389          client.ping(temperature=0.0)
390  
391      def test_plain_text_completion(self) -> None:
392          # Verifies response extraction from choices[0].message.content via xAI base URL.
393          client = GrokClient(api_key=os.environ[_GROK_KEY], model=_GROK_MODEL)
394          result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0)
395          assert isinstance(result, str)
396          assert len(result) > 0
397          assert "ok" in result.lower()
398  
399      def test_structured_response_parses_to_llm_evaluation(self) -> None:
400          # Verifies beta.chat.completions.parse wiring via the xAI (Grok) base URL.
401          client = GrokClient(api_key=os.environ[_GROK_KEY], model=_GROK_MODEL)
402          result = client.complete(
403              system=_STRUCTURED_SYSTEM,
404              user=_STRUCTURED_USER,
405              temperature=0.0,
406              response_schema=LLMEvaluation,
407          )
408          json.loads(result)
409          evaluation = LLMEvaluation.model_validate_json(result)
410          assert evaluation.fit_category != FitCategory.ERROR
411          assert 0 <= evaluation.score <= 100
412          assert 0 <= evaluation.networking_opportunity <= 3
413  
414      def test_last_usage_returns_nonzero_after_complete(self) -> None:
415          # Verifies real SDK response populates _tls token counts via last_usage.
416          client = GrokClient(api_key=os.environ[_GROK_KEY], model=_GROK_MODEL)
417          client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0)
418          in_tok, out_tok = client.last_usage
419          assert in_tok > 0
420          assert out_tok > 0
421  
422      def test_temperature_and_seed_accepted(self) -> None:
423          # Grok passes seed to the xAI API; verifies no unexpected kwarg rejection.
424          client = GrokClient(api_key=os.environ[_GROK_KEY], model=_GROK_MODEL)
425          result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.5, seed=42)
426          assert isinstance(result, str)
427          assert len(result) > 0
428  
429      @pytest.mark.timeout(5)
430      def test_invalid_model_raises_api_error(self) -> None:
431          # 4xx from bad model name must be classified as APIError and NOT retried.
432          # Tenacity backoff (min=2s, 5 attempts) would exceed 5s if the error were retried.
433          client = GrokClient(api_key=os.environ[_GROK_KEY], model="nonexistent-model-xyz")
434          with pytest.raises(APIError) as exc_info:
435              client.ping()
436          assert exc_info.value.status_code in {400, 401, 403, 404, 422}
437          assert str(exc_info.value)
438  
439      def test_structured_response_with_unicode_input(self) -> None:
440          # Encoding robustness: CJK and accented input must survive the beta parse path.
441          client = GrokClient(api_key=os.environ[_GROK_KEY], model=_GROK_MODEL)
442          result = client.complete(
443              system=_STRUCTURED_SYSTEM,
444              user=_STRUCTURED_USER_UNICODE,
445              temperature=0.0,
446              response_schema=LLMEvaluation,
447          )
448          json.loads(result)
449          evaluation = LLMEvaluation.model_validate_json(result)
450          assert any(c.isalpha() for c in evaluation.reasoning)
451  
452  
453  # ---------------------------------------------------------------------------
454  # DeepSeek
455  # ---------------------------------------------------------------------------
456  
457  
458  @_skip_unless_live("deepseek", _DEEPSEEK_KEY)
459  class TestDeepSeekLive:
460      """Live integration tests for DeepSeekClient — happy path and error handling."""
461  
462      def test_ping(self) -> None:
463          # Validates API key and model name; a wrong model or expired key fails here.
464          client = DeepSeekClient(api_key=os.environ[_DEEPSEEK_KEY], model=_DEEPSEEK_MODEL)
465          client.ping(temperature=0.0)
466  
467      def test_plain_text_completion(self) -> None:
468          # Verifies response extraction from choices[0].message.content via DeepSeek base URL.
469          client = DeepSeekClient(api_key=os.environ[_DEEPSEEK_KEY], model=_DEEPSEEK_MODEL)
470          result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0)
471          assert isinstance(result, str)
472          assert len(result) > 0
473          assert "ok" in result.lower()
474  
475      def test_structured_response_parses_to_llm_evaluation(self) -> None:
476          # Verifies json_object mode + schema injection into system prompt.
477          # DeepSeek may not strictly comply with the schema; json.loads is the primary guard.
478          client = DeepSeekClient(api_key=os.environ[_DEEPSEEK_KEY], model=_DEEPSEEK_MODEL)
479          result = client.complete(
480              system=_STRUCTURED_SYSTEM,
481              user=_STRUCTURED_USER,
482              temperature=0.0,
483              response_schema=LLMEvaluation,
484          )
485          json.loads(result)
486          evaluation = LLMEvaluation.model_validate_json(result)
487          assert evaluation.fit_category != FitCategory.ERROR
488          assert 0 <= evaluation.score <= 100
489          assert 0 <= evaluation.networking_opportunity <= 3
490  
491      def test_last_usage_returns_nonzero_after_complete(self) -> None:
492          # Verifies real SDK response populates _tls token counts via last_usage.
493          client = DeepSeekClient(api_key=os.environ[_DEEPSEEK_KEY], model=_DEEPSEEK_MODEL)
494          client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0)
495          in_tok, out_tok = client.last_usage
496          assert in_tok > 0
497          assert out_tok > 0
498  
499      def test_temperature_and_seed_accepted(self) -> None:
500          # DeepSeek passes seed to the API; verifies no unexpected kwarg rejection.
501          client = DeepSeekClient(api_key=os.environ[_DEEPSEEK_KEY], model=_DEEPSEEK_MODEL)
502          result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.5, seed=42)
503          assert isinstance(result, str)
504          assert len(result) > 0
505  
506      @pytest.mark.timeout(5)
507      def test_invalid_model_raises_api_error(self) -> None:
508          # 4xx from bad model name must be classified as APIError and NOT retried.
509          # Tenacity backoff (min=2s, 5 attempts) would exceed 5s if the error were retried.
510          client = DeepSeekClient(
511              api_key=os.environ[_DEEPSEEK_KEY], model="nonexistent-model-xyz"
512          )
513          with pytest.raises(APIError) as exc_info:
514              client.ping()
515          assert exc_info.value.status_code in {400, 401, 403, 404, 422}
516          assert str(exc_info.value)
517  
518      def test_structured_response_with_unicode_input(self) -> None:
519          # Encoding robustness: schema is injected as JSON string into system prompt,
520          # so CJK/accented input must survive json.dumps -> API -> json.loads round-trip.
521          client = DeepSeekClient(api_key=os.environ[_DEEPSEEK_KEY], model=_DEEPSEEK_MODEL)
522          result = client.complete(
523              system=_STRUCTURED_SYSTEM,
524              user=_STRUCTURED_USER_UNICODE,
525              temperature=0.0,
526              response_schema=LLMEvaluation,
527          )
528          json.loads(result)
529          evaluation = LLMEvaluation.model_validate_json(result)
530          assert any(c.isalpha() for c in evaluation.reasoning)
531  
532  
533  # ---------------------------------------------------------------------------
534  # Mistral
535  # ---------------------------------------------------------------------------
536  
537  
538  @_skip_unless_live("mistral", _MISTRAL_KEY)
539  class TestMistralLive:
540      """Live integration tests for MistralClient — happy path and error handling."""
541  
542      def test_ping(self) -> None:
543          # Validates API key and model name; a wrong model or expired key fails here.
544          client = MistralClient(api_key=os.environ[_MISTRAL_KEY], model=_MISTRAL_MODEL)
545          client.ping(temperature=0.0)
546  
547      def test_plain_text_completion(self) -> None:
548          # Verifies response extraction from choices[0].message.content via the native SDK.
549          client = MistralClient(api_key=os.environ[_MISTRAL_KEY], model=_MISTRAL_MODEL)
550          result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0)
551          assert isinstance(result, str)
552          assert len(result) > 0
553          assert "ok" in result.lower()
554  
555      def test_structured_response_parses_to_llm_evaluation(self) -> None:
556          # Verifies structured output via the native SDK's chat.parse() with a Pydantic model.
557          client = MistralClient(api_key=os.environ[_MISTRAL_KEY], model=_MISTRAL_MODEL)
558          result = client.complete(
559              system=_STRUCTURED_SYSTEM,
560              user=_STRUCTURED_USER,
561              temperature=0.0,
562              response_schema=LLMEvaluation,
563          )
564          json.loads(result)
565          evaluation = LLMEvaluation.model_validate_json(result)
566          assert evaluation.fit_category != FitCategory.ERROR
567          assert 0 <= evaluation.score <= 100
568          assert 0 <= evaluation.networking_opportunity <= 3
569  
570      def test_last_usage_returns_nonzero_after_complete(self) -> None:
571          # Verifies real SDK response populates _tls token counts via last_usage.
572          client = MistralClient(api_key=os.environ[_MISTRAL_KEY], model=_MISTRAL_MODEL)
573          client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.0)
574          in_tok, out_tok = client.last_usage
575          assert in_tok > 0
576          assert out_tok > 0
577  
578      def test_temperature_and_seed_accepted(self) -> None:
579          # Mistral passes seed to the API; verifies no unexpected kwarg rejection.
580          client = MistralClient(api_key=os.environ[_MISTRAL_KEY], model=_MISTRAL_MODEL)
581          result = client.complete(system=_TINY_SYSTEM, user=_TINY_USER, temperature=0.5, seed=42)
582          assert isinstance(result, str)
583          assert len(result) > 0
584  
585      @pytest.mark.timeout(5)
586      def test_invalid_model_raises_api_error(self) -> None:
587          # 4xx from bad model name must be classified as APIError and NOT retried.
588          # Tenacity backoff (min=2s, 5 attempts) would exceed 5s if the error were retried.
589          client = MistralClient(api_key=os.environ[_MISTRAL_KEY], model="nonexistent-model-xyz")
590          with pytest.raises(APIError) as exc_info:
591              client.ping()
592          assert exc_info.value.status_code in {400, 401, 403, 404, 422}
593          assert str(exc_info.value)
594  
595      def test_structured_response_with_unicode_input(self) -> None:
596          # Encoding robustness: CJK and accented input must survive chat.parse() round-trip.
597          client = MistralClient(api_key=os.environ[_MISTRAL_KEY], model=_MISTRAL_MODEL)
598          result = client.complete(
599              system=_STRUCTURED_SYSTEM,
600              user=_STRUCTURED_USER_UNICODE,
601              temperature=0.0,
602              response_schema=LLMEvaluation,
603          )
604          json.loads(result)
605          evaluation = LLMEvaluation.model_validate_json(result)
606          assert any(c.isalpha() for c in evaluation.reasoning)
607  
608  
609  # ---------------------------------------------------------------------------
610  # BrightData — trigger / check_status / fetch live tests
611  # ---------------------------------------------------------------------------
612  
613  
614  @pytest.fixture(scope="module")
615  def brightdata_snapshot_id() -> str:
616      """Trigger a small scrape and return the snapshot ID.
617  
618      This is the onboarding gate test — fast, cheap (limit_per_input=1),
619      proves auth and API connectivity without waiting for results.
620      """
621      client = BrightDataClient(token=os.environ["BRIGHTDATA_TOKEN"])
622      snapshot_id = asyncio.run(client.trigger([_BRIGHTDATA_TEST_URL], limit_per_input=1))
623      _LIVE_OUTPUT_DIR.mkdir(exist_ok=True)
624      (_LIVE_OUTPUT_DIR / "brightdata_snapshot_id.txt").write_text(snapshot_id, encoding="utf-8")
625      return snapshot_id
626  
627  
628  class _ReplayClient:
629      """Test double that replays pre-fetched data as a BrightDataProtocol."""
630  
631      def __init__(self, jobs: list[dict[str, Any]]) -> None:
632          self._jobs = jobs
633  
634      async def trigger(
635          self, urls: list[str], *, limit_per_input: int | None = None
636      ) -> str:
637          return "sd_replay"
638  
639      async def check_status(self, snapshot_id: str) -> str | None:
640          return "ready"
641  
642      async def fetch(self, snapshot_id: str) -> list[dict[str, Any]]:
643          return self._jobs
644  
645  
646  @_skip_unless_live("brightdata", "BRIGHTDATA_TOKEN")
647  class TestBrightDataLive:
648      """Live integration tests for BrightDataClient — trigger, check_status, fetch."""
649  
650      @pytest.mark.timeout(30)
651      def test_trigger_and_check_status(
652          self, brightdata_snapshot_id: str
653      ) -> None:
654          # Lightweight onboarding gate: trigger returns a snapshot ID, and
655          # check_status returns a valid status string (not an error).
656          assert isinstance(brightdata_snapshot_id, str)
657          assert brightdata_snapshot_id.startswith("s")  # BrightData IDs start with "s"
658  
659          client = BrightDataClient(token=os.environ["BRIGHTDATA_TOKEN"])
660          status = asyncio.run(client.check_status(brightdata_snapshot_id))
661          # Status should be a known string or None — not an exception.
662          assert status is None or isinstance(status, str)
663  
664      @pytest.mark.timeout(30)
665      def test_check_status_nonexistent_snapshot(self) -> None:
666          # A made-up snapshot ID should return None (not found), not crash.
667          client = BrightDataClient(token=os.environ["BRIGHTDATA_TOKEN"])
668          status = asyncio.run(client.check_status("sd_nonexistent_000000"))
669          assert status is None
670  
671      @pytest.mark.timeout(30)
672      def test_dedup_on_real_data(
673          self, brightdata_snapshot_id: str
674      ) -> None:
675          # Catches dedup logic breaking on real data, e.g. job_posting_id returned
676          # as int not str (a real past bug where int vs str dedup keys diverged).
677          # Uses pre-fetched data via _ReplayClient since snapshot may not be ready.
678          sample_jobs = [
679              {"job_posting_id": "111", "url": "https://example.com/111", "job_title": "A"},
680              {"job_posting_id": "111", "url": "https://example.com/111-dup", "job_title": "A dup"},
681              {"job_posting_id": "222", "url": "https://example.com/222", "job_title": "B"},
682          ]
683          replay = _ReplayClient(sample_jobs)
684          service = JobImporterService(replay)
685          result, total_fetched = service.deduplicate(sample_jobs)
686          ids = [str(j.get("job_posting_id") or j.get("url") or "") for j in result]
687          assert len(ids) == len(set(ids))
688          assert len(result) <= total_fetched
689          assert total_fetched == len(sample_jobs)