test_model_normalize.py
1 """Tests for hermes_cli.model_normalize — provider-aware model name normalization. 2 3 Covers issue #5211: opencode-go model names with dots (e.g. minimax-m2.7) 4 must NOT be mangled to hyphens (minimax-m2-7). 5 """ 6 import pytest 7 8 from hermes_cli.model_normalize import ( 9 normalize_model_for_provider, 10 _DOT_TO_HYPHEN_PROVIDERS, 11 _AGGREGATOR_PROVIDERS, 12 _normalize_for_deepseek, 13 detect_vendor, 14 ) 15 16 17 # ── Regression: issue #5211 ──────────────────────────────────────────── 18 19 class TestIssue5211OpenCodeGoDotPreservation: 20 """OpenCode Go model names with dots must pass through unchanged.""" 21 22 @pytest.mark.parametrize("model,expected", [ 23 ("minimax-m2.7", "minimax-m2.7"), 24 ("minimax-m2.5", "minimax-m2.5"), 25 ("glm-4.5", "glm-4.5"), 26 ("kimi-k2.5", "kimi-k2.5"), 27 ("some-model-1.0.3", "some-model-1.0.3"), 28 ]) 29 def test_opencode_go_preserves_dots(self, model, expected): 30 result = normalize_model_for_provider(model, "opencode-go") 31 assert result == expected, f"Expected {expected!r}, got {result!r}" 32 33 def test_opencode_go_not_in_dot_to_hyphen_set(self): 34 """opencode-go must NOT be in the dot-to-hyphen provider set.""" 35 assert "opencode-go" not in _DOT_TO_HYPHEN_PROVIDERS 36 37 38 # ── Anthropic dot-to-hyphen conversion (regression) ──────────────────── 39 40 class TestAnthropicDotToHyphen: 41 """Anthropic API still needs dots→hyphens.""" 42 43 @pytest.mark.parametrize("model,expected", [ 44 ("claude-sonnet-4.6", "claude-sonnet-4-6"), 45 ("claude-opus-4.5", "claude-opus-4-5"), 46 ]) 47 def test_anthropic_converts_dots(self, model, expected): 48 result = normalize_model_for_provider(model, "anthropic") 49 assert result == expected 50 51 def test_anthropic_strips_vendor_prefix(self): 52 result = normalize_model_for_provider("anthropic/claude-sonnet-4.6", "anthropic") 53 assert result == "claude-sonnet-4-6" 54 55 56 # ── OpenCode Zen regression ──────────────────────────────────────────── 57 58 class TestOpenCodeZenModelNormalization: 59 """OpenCode Zen preserves dots for most models, but Claude stays hyphenated.""" 60 61 @pytest.mark.parametrize("model,expected", [ 62 ("claude-sonnet-4.6", "claude-sonnet-4-6"), 63 ("opencode-zen/claude-opus-4.5", "claude-opus-4-5"), 64 ("glm-4.5", "glm-4.5"), 65 ("glm-5.1", "glm-5.1"), 66 ("gpt-5.4", "gpt-5.4"), 67 ("minimax-m2.5-free", "minimax-m2.5-free"), 68 ("kimi-k2.5", "kimi-k2.5"), 69 ]) 70 def test_zen_normalizes_models(self, model, expected): 71 result = normalize_model_for_provider(model, "opencode-zen") 72 assert result == expected 73 74 def test_zen_strips_vendor_prefix(self): 75 result = normalize_model_for_provider("opencode-zen/claude-sonnet-4.6", "opencode-zen") 76 assert result == "claude-sonnet-4-6" 77 78 def test_zen_strips_vendor_prefix_for_non_claude(self): 79 result = normalize_model_for_provider("opencode-zen/glm-5.1", "opencode-zen") 80 assert result == "glm-5.1" 81 82 83 # ── Copilot dot preservation (regression) ────────────────────────────── 84 85 class TestCopilotDotPreservation: 86 """Copilot preserves dots in model names.""" 87 88 @pytest.mark.parametrize("model,expected", [ 89 ("claude-sonnet-4.6", "claude-sonnet-4.6"), 90 ("gpt-5.4", "gpt-5.4"), 91 ]) 92 def test_copilot_preserves_dots(self, model, expected): 93 result = normalize_model_for_provider(model, "copilot") 94 assert result == expected 95 96 97 # ── Copilot model-name normalization (issue #6879 regression) ────────── 98 99 class TestCopilotModelNormalization: 100 """Copilot requires bare dot-notation model IDs. 101 102 Regression coverage for issue #6879 and the broken Copilot branch 103 that previously left vendor-prefixed Anthropic IDs (e.g. 104 ``anthropic/claude-sonnet-4.6``) and dash-notation Claude IDs (e.g. 105 ``claude-sonnet-4-6``) unchanged, causing the Copilot API to reject 106 the request with HTTP 400 "model_not_supported". 107 """ 108 109 @pytest.mark.parametrize("model,expected", [ 110 # Vendor-prefixed Anthropic IDs — prefix must be stripped. 111 ("anthropic/claude-opus-4.6", "claude-opus-4.6"), 112 ("anthropic/claude-sonnet-4.6", "claude-sonnet-4.6"), 113 ("anthropic/claude-sonnet-4.5", "claude-sonnet-4.5"), 114 ("anthropic/claude-haiku-4.5", "claude-haiku-4.5"), 115 # Vendor-prefixed OpenAI IDs — prefix must be stripped. 116 ("openai/gpt-5.4", "gpt-5.4"), 117 ("openai/gpt-4o", "gpt-4o"), 118 ("openai/gpt-4o-mini", "gpt-4o-mini"), 119 # Dash-notation Claude IDs — must be converted to dot-notation. 120 ("claude-opus-4-6", "claude-opus-4.6"), 121 ("claude-sonnet-4-6", "claude-sonnet-4.6"), 122 ("claude-sonnet-4-5", "claude-sonnet-4.5"), 123 ("claude-haiku-4-5", "claude-haiku-4.5"), 124 # Combined: vendor-prefixed + dash-notation. 125 ("anthropic/claude-opus-4-6", "claude-opus-4.6"), 126 ("anthropic/claude-sonnet-4-6", "claude-sonnet-4.6"), 127 # Already-canonical inputs pass through unchanged. 128 ("claude-sonnet-4.6", "claude-sonnet-4.6"), 129 ("gpt-5.4", "gpt-5.4"), 130 ("gpt-5-mini", "gpt-5-mini"), 131 ]) 132 def test_copilot_normalization(self, model, expected): 133 assert normalize_model_for_provider(model, "copilot") == expected 134 135 @pytest.mark.parametrize("model,expected", [ 136 ("anthropic/claude-sonnet-4.6", "claude-sonnet-4.6"), 137 ("claude-sonnet-4-6", "claude-sonnet-4.6"), 138 ("claude-opus-4-6", "claude-opus-4.6"), 139 ("openai/gpt-5.4", "gpt-5.4"), 140 ]) 141 def test_copilot_acp_normalization(self, model, expected): 142 """Copilot ACP shares the same API expectations as HTTP Copilot.""" 143 assert normalize_model_for_provider(model, "copilot-acp") == expected 144 145 def test_openai_codex_still_strips_openai_prefix(self): 146 """Regression: openai-codex must still strip the openai/ prefix.""" 147 assert normalize_model_for_provider("openai/gpt-5.4", "openai-codex") == "gpt-5.4" 148 149 150 # ── Aggregator providers (regression) ────────────────────────────────── 151 152 class TestAggregatorProviders: 153 """Aggregators need vendor/model slugs.""" 154 155 def test_openrouter_prepends_vendor(self): 156 result = normalize_model_for_provider("claude-sonnet-4.6", "openrouter") 157 assert result == "anthropic/claude-sonnet-4.6" 158 159 def test_nous_prepends_vendor(self): 160 result = normalize_model_for_provider("gpt-5.4", "nous") 161 assert result == "openai/gpt-5.4" 162 163 def test_vendor_already_present(self): 164 result = normalize_model_for_provider("anthropic/claude-sonnet-4.6", "openrouter") 165 assert result == "anthropic/claude-sonnet-4.6" 166 167 168 class TestIssue6211NativeProviderPrefixNormalization: 169 @pytest.mark.parametrize("model,target_provider,expected", [ 170 ("zai/glm-5.1", "zai", "glm-5.1"), 171 ("google/gemini-2.5-pro", "gemini", "google/gemini-2.5-pro"), 172 ("moonshot/kimi-k2.5", "kimi-coding", "kimi-k2.5"), 173 ("anthropic/claude-sonnet-4.6", "openrouter", "anthropic/claude-sonnet-4.6"), 174 ("Qwen/Qwen3.5-397B-A17B", "huggingface", "Qwen/Qwen3.5-397B-A17B"), 175 ("modal/zai-org/GLM-5-FP8", "custom", "modal/zai-org/GLM-5-FP8"), 176 ]) 177 def test_native_provider_prefixes_are_only_stripped_on_matching_provider( 178 self, model, target_provider, expected 179 ): 180 assert normalize_model_for_provider(model, target_provider) == expected 181 182 183 # ── detect_vendor ────────────────────────────────────────────────────── 184 185 class TestDetectVendor: 186 @pytest.mark.parametrize("model,expected", [ 187 ("claude-sonnet-4.6", "anthropic"), 188 ("gpt-5.4-mini", "openai"), 189 ("minimax-m2.7", "minimax"), 190 ("glm-4.5", "z-ai"), 191 ("kimi-k2.5", "moonshotai"), 192 ]) 193 def test_detects_known_vendors(self, model, expected): 194 assert detect_vendor(model) == expected 195 196 197 # ── DeepSeek V-series pass-through (bug: V4 models silently folded to V3) ── 198 199 class TestDeepseekVSeriesPassThrough: 200 """DeepSeek's V-series IDs (``deepseek-v4-pro``, ``deepseek-v4-flash``, 201 and future ``deepseek-v<N>-*`` variants) are first-class model IDs 202 accepted directly by DeepSeek's Chat Completions API. Earlier code 203 folded every non-reasoner name into ``deepseek-chat``, which on 204 aggregators (Nous portal, OpenRouter via DeepInfra) routes to V3 — 205 silently downgrading users who picked V4. 206 """ 207 208 @pytest.mark.parametrize("model", [ 209 "deepseek-v4-pro", 210 "deepseek-v4-flash", 211 "deepseek/deepseek-v4-pro", # vendor-prefixed 212 "deepseek/deepseek-v4-flash", 213 "DeepSeek-V4-Pro", # case-insensitive 214 "deepseek-v4-flash-20260423", # dated variant 215 "deepseek-v5-pro", # future V-series 216 "deepseek-v10-ultra", # double-digit future 217 ]) 218 def test_v_series_passes_through(self, model): 219 expected = model.split("/", 1)[-1].lower() 220 assert _normalize_for_deepseek(model) == expected 221 222 def test_deepseek_provider_preserves_v4_pro(self): 223 """End-to-end via normalize_model_for_provider — user selecting 224 V4 Pro must reach DeepSeek's API as V4 Pro, not V3 alias.""" 225 result = normalize_model_for_provider("deepseek-v4-pro", "deepseek") 226 assert result == "deepseek-v4-pro" 227 228 def test_deepseek_provider_preserves_v4_flash(self): 229 result = normalize_model_for_provider("deepseek-v4-flash", "deepseek") 230 assert result == "deepseek-v4-flash" 231 232 233 # ── DeepSeek regressions (existing behaviour still holds) ────────────── 234 235 class TestDeepseekCanonicalAndReasonerMapping: 236 """Canonical pass-through and reasoner-keyword folding stay intact.""" 237 238 @pytest.mark.parametrize("model,expected", [ 239 ("deepseek-chat", "deepseek-chat"), 240 ("deepseek-reasoner", "deepseek-reasoner"), 241 ("DEEPSEEK-CHAT", "deepseek-chat"), 242 ]) 243 def test_canonical_models_pass_through(self, model, expected): 244 assert _normalize_for_deepseek(model) == expected 245 246 @pytest.mark.parametrize("model", [ 247 "deepseek-r1", 248 "deepseek-r1-0528", 249 "deepseek-think-v3", 250 "deepseek-reasoning-preview", 251 "deepseek-cot-experimental", 252 ]) 253 def test_reasoner_keywords_map_to_reasoner(self, model): 254 assert _normalize_for_deepseek(model) == "deepseek-reasoner" 255 256 @pytest.mark.parametrize("model", [ 257 "deepseek-chat-v3.1", # 'chat' prefix, not V-series pattern 258 "unknown-model", 259 "something-random", 260 "gpt-5", # non-DeepSeek names still fall through 261 ]) 262 def test_unknown_names_fall_back_to_chat(self, model): 263 assert _normalize_for_deepseek(model) == "deepseek-chat"