test_moonshot_schema.py
1 """Tests for Moonshot/Kimi flavored-JSON-Schema sanitizer. 2 3 Moonshot's tool-parameter validator rejects several shapes that the rest of 4 the JSON Schema ecosystem accepts: 5 6 1. Properties without ``type`` — Moonshot requires ``type`` on every node. 7 2. ``type`` at the parent of ``anyOf`` — Moonshot requires it only inside 8 ``anyOf`` children. 9 10 These tests cover the repairs applied by ``agent/moonshot_schema.py``. 11 """ 12 13 from __future__ import annotations 14 15 import pytest 16 17 from agent.moonshot_schema import ( 18 is_moonshot_model, 19 sanitize_moonshot_tool_parameters, 20 sanitize_moonshot_tools, 21 ) 22 23 24 class TestMoonshotModelDetection: 25 """is_moonshot_model() must match across aggregator prefixes.""" 26 27 @pytest.mark.parametrize( 28 "model", 29 [ 30 "kimi-k2.6", 31 "kimi-k2-thinking", 32 "moonshotai/Kimi-K2.6", 33 "moonshotai/kimi-k2.6", 34 "nous/moonshotai/kimi-k2.6", 35 "openrouter/moonshotai/kimi-k2-thinking", 36 "MOONSHOTAI/KIMI-K2.6", 37 ], 38 ) 39 def test_positive_matches(self, model): 40 assert is_moonshot_model(model) is True 41 42 @pytest.mark.parametrize( 43 "model", 44 [ 45 "", 46 None, 47 "anthropic/claude-sonnet-4.6", 48 "openai/gpt-5.4", 49 "google/gemini-3-flash-preview", 50 "deepseek-chat", 51 ], 52 ) 53 def test_negative_matches(self, model): 54 assert is_moonshot_model(model) is False 55 56 57 class TestMissingTypeFilled: 58 """Rule 1: every property must carry a type.""" 59 60 def test_property_without_type_gets_string(self): 61 params = { 62 "type": "object", 63 "properties": {"query": {"description": "a bare property"}}, 64 } 65 out = sanitize_moonshot_tool_parameters(params) 66 assert out["properties"]["query"]["type"] == "string" 67 68 def test_property_with_enum_infers_type_from_first_value(self): 69 params = { 70 "type": "object", 71 "properties": {"flag": {"enum": [True, False]}}, 72 } 73 out = sanitize_moonshot_tool_parameters(params) 74 assert out["properties"]["flag"]["type"] == "boolean" 75 76 def test_nested_properties_are_repaired(self): 77 params = { 78 "type": "object", 79 "properties": { 80 "filter": { 81 "type": "object", 82 "properties": { 83 "field": {"description": "no type"}, 84 }, 85 }, 86 }, 87 } 88 out = sanitize_moonshot_tool_parameters(params) 89 assert out["properties"]["filter"]["properties"]["field"]["type"] == "string" 90 91 def test_array_items_without_type_get_repaired(self): 92 params = { 93 "type": "object", 94 "properties": { 95 "tags": { 96 "type": "array", 97 "items": {"description": "tag entry"}, 98 }, 99 }, 100 } 101 out = sanitize_moonshot_tool_parameters(params) 102 assert out["properties"]["tags"]["items"]["type"] == "string" 103 104 def test_ref_node_is_not_given_synthetic_type(self): 105 """$ref nodes should NOT get a synthetic type — the referenced 106 definition supplies it, and Moonshot would reject the conflict.""" 107 params = { 108 "type": "object", 109 "properties": {"payload": {"$ref": "#/$defs/Payload"}}, 110 "$defs": {"Payload": {"type": "object", "properties": {}}}, 111 } 112 out = sanitize_moonshot_tool_parameters(params) 113 assert "type" not in out["properties"]["payload"] 114 assert out["properties"]["payload"]["$ref"] == "#/$defs/Payload" 115 116 117 class TestAnyOfParentType: 118 """Rule 2: type must not appear at the anyOf parent level. 119 120 When an anyOf contains a null-type branch, Moonshot rejects it. 121 The sanitizer collapses the anyOf: single non-null branch is promoted, 122 multiple non-null branches have null removed from the list. 123 """ 124 125 def test_anyof_null_branch_collapsed_to_single_type(self): 126 """anyOf [string, null] → plain string (anyOf removed).""" 127 params = { 128 "type": "object", 129 "properties": { 130 "from_format": { 131 "type": "string", 132 "anyOf": [ 133 {"type": "string"}, 134 {"type": "null"}, 135 ], 136 }, 137 }, 138 } 139 out = sanitize_moonshot_tool_parameters(params) 140 from_format = out["properties"]["from_format"] 141 # null branch removed, anyOf collapsed to the single non-null type 142 assert "anyOf" not in from_format 143 assert from_format["type"] == "string" 144 145 def test_anyof_multiple_non_null_preserved(self): 146 """anyOf [string, integer] (no null) → kept as-is with parent type stripped.""" 147 params = { 148 "type": "object", 149 "properties": { 150 "mode": { 151 "anyOf": [ 152 {"type": "string"}, 153 {"type": "integer"}, 154 ], 155 }, 156 }, 157 } 158 out = sanitize_moonshot_tool_parameters(params) 159 mode = out["properties"]["mode"] 160 assert "anyOf" in mode 161 assert "type" not in mode # parent type stripped 162 163 def test_anyof_enum_with_null_collapsed(self): 164 """anyOf [{enum: [...], type: string}, {type: null}] → enum + type only.""" 165 params = { 166 "type": "object", 167 "properties": { 168 "db_type": { 169 "anyOf": [ 170 {"enum": ["mysql", "postgresql", ""]}, 171 {"type": "null"}, 172 ], 173 }, 174 }, 175 } 176 out = sanitize_moonshot_tool_parameters(params) 177 db_type = out["properties"]["db_type"] 178 assert "anyOf" not in db_type 179 assert db_type["type"] == "string" 180 assert db_type["enum"] == ["mysql", "postgresql"] # "" stripped by enum cleanup 181 182 183 class TestTopLevelGuarantees: 184 """The returned top-level schema is always a well-formed object.""" 185 186 def test_non_dict_input_returns_empty_object(self): 187 assert sanitize_moonshot_tool_parameters(None) == {"type": "object", "properties": {}} 188 assert sanitize_moonshot_tool_parameters("garbage") == {"type": "object", "properties": {}} 189 assert sanitize_moonshot_tool_parameters([]) == {"type": "object", "properties": {}} 190 191 def test_non_object_top_level_coerced(self): 192 params = {"type": "string"} 193 out = sanitize_moonshot_tool_parameters(params) 194 assert out["type"] == "object" 195 assert "properties" in out 196 197 def test_does_not_mutate_input(self): 198 params = { 199 "type": "object", 200 "properties": {"q": {"description": "no type"}}, 201 } 202 snapshot = { 203 "type": params["type"], 204 "properties": {"q": dict(params["properties"]["q"])}, 205 } 206 sanitize_moonshot_tool_parameters(params) 207 assert params["type"] == snapshot["type"] 208 assert "type" not in params["properties"]["q"] 209 210 211 class TestToolListSanitizer: 212 """sanitize_moonshot_tools() walks an OpenAI-format tool list.""" 213 214 def test_applies_per_tool(self): 215 tools = [ 216 { 217 "type": "function", 218 "function": { 219 "name": "search", 220 "description": "Search", 221 "parameters": { 222 "type": "object", 223 "properties": {"q": {"description": "query"}}, 224 }, 225 }, 226 }, 227 { 228 "type": "function", 229 "function": { 230 "name": "noop", 231 "description": "Does nothing", 232 "parameters": {"type": "object", "properties": {}}, 233 }, 234 }, 235 ] 236 out = sanitize_moonshot_tools(tools) 237 assert out[0]["function"]["parameters"]["properties"]["q"]["type"] == "string" 238 # Second tool already clean — should be structurally equivalent 239 assert out[1]["function"]["parameters"] == {"type": "object", "properties": {}} 240 241 def test_empty_list_is_passthrough(self): 242 assert sanitize_moonshot_tools([]) == [] 243 assert sanitize_moonshot_tools(None) is None 244 245 def test_skips_malformed_entries(self): 246 """Entries without a function dict are passed through untouched.""" 247 tools = [{"type": "function"}, {"not": "a tool"}] 248 out = sanitize_moonshot_tools(tools) 249 assert out == tools 250 251 252 class TestRealWorldMCPShape: 253 """End-to-end: a realistic MCP-style schema that used to 400 on Moonshot.""" 254 255 def test_combined_rewrites(self): 256 # Shape: missing type on a property, anyOf with parent type + null, array 257 # items without type — all in one tool. 258 params = { 259 "type": "object", 260 "properties": { 261 "query": {"description": "search text"}, 262 "filter": { 263 "type": "string", 264 "anyOf": [ 265 {"type": "string"}, 266 {"type": "null"}, 267 ], 268 }, 269 "tags": { 270 "type": "array", 271 "items": {"description": "tag"}, 272 }, 273 }, 274 "required": ["query"], 275 } 276 out = sanitize_moonshot_tool_parameters(params) 277 assert out["properties"]["query"]["type"] == "string" 278 # anyOf with null collapsed to plain type 279 assert "anyOf" not in out["properties"]["filter"] 280 assert out["properties"]["filter"]["type"] == "string" 281 assert out["properties"]["tags"]["items"]["type"] == "string" 282 assert out["required"] == ["query"] 283 284 285 class TestEnumNullStripping: 286 """Rule 3: Moonshot rejects null/empty-string inside enum arrays.""" 287 288 def test_enum_null_value_stripped(self): 289 """enum containing Python None must have it removed for Moonshot.""" 290 params = { 291 "type": "object", 292 "properties": { 293 "db_type": { 294 "type": "string", 295 "enum": ["mysql", "postgresql", None], 296 }, 297 }, 298 } 299 out = sanitize_moonshot_tool_parameters(params) 300 db_type = out["properties"]["db_type"] 301 assert None not in db_type["enum"] 302 assert "mysql" in db_type["enum"] 303 assert "postgresql" in db_type["enum"] 304 305 def test_enum_empty_string_stripped(self): 306 """enum containing empty string '' must have it removed for Moonshot.""" 307 params = { 308 "type": "object", 309 "properties": { 310 "db_type": { 311 "type": "string", 312 "enum": ["mysql", "postgresql", ""], 313 }, 314 }, 315 } 316 out = sanitize_moonshot_tool_parameters(params) 317 db_type = out["properties"]["db_type"] 318 assert "" not in db_type["enum"] 319 assert db_type["enum"] == ["mysql", "postgresql"] 320 321 def test_enum_all_null_becomes_no_enum(self): 322 """enum that only had null/empty values is dropped entirely.""" 323 params = { 324 "type": "object", 325 "properties": { 326 "val": { 327 "type": "string", 328 "enum": [None, ""], 329 }, 330 }, 331 } 332 out = sanitize_moonshot_tool_parameters(params) 333 assert "enum" not in out["properties"]["val"] 334 335 def test_dataslayer_db_type_after_mcp_normalize(self): 336 """Real-world: dataslayer db_type anyOf+enum after MCP normalization.""" 337 # This is the exact shape after _normalize_mcp_input_schema runs: 338 # anyOf collapsed, but enum still has null + empty string 339 params = { 340 "type": "object", 341 "properties": { 342 "datasource": {"type": "string"}, 343 "db_type": { 344 "enum": ["mysql", "mariadb", "postgresql", "sqlserver", "oracle", "", None], 345 "type": "string", 346 "nullable": True, 347 "default": None, 348 }, 349 }, 350 "required": ["datasource"], 351 } 352 out = sanitize_moonshot_tool_parameters(params) 353 db_type = out["properties"]["db_type"] 354 assert "nullable" not in db_type, "nullable keyword must be stripped" 355 assert None not in db_type["enum"] 356 assert "" not in db_type["enum"] 357 assert db_type["enum"] == ["mysql", "mariadb", "postgresql", "sqlserver", "oracle"] 358 assert db_type["type"] == "string" 359 360 def test_enum_on_object_type_not_stripped(self): 361 """enum on non-scalar types (object) should NOT be touched.""" 362 params = { 363 "type": "object", 364 "properties": { 365 "config": { 366 "type": "object", 367 "properties": {}, 368 "enum": [{}, None], 369 }, 370 }, 371 } 372 out = sanitize_moonshot_tool_parameters(params) 373 # object-typed enum should pass through unchanged 374 assert "enum" in out["properties"]["config"] 375 376 def test_anyof_collapse_still_runs_nullable_and_enum_cleanup(self): 377 """After anyOf collapses to a single non-null branch, the merged 378 node must still have ``nullable`` stripped and null/empty-string 379 values removed from enum — not skipped by the early anyOf return. 380 """ 381 params = { 382 "type": "object", 383 "properties": { 384 "db_type": { 385 "anyOf": [ 386 {"enum": ["mysql", "postgresql", "", None]}, 387 {"type": "null"}, 388 ], 389 "nullable": True, 390 }, 391 }, 392 } 393 out = sanitize_moonshot_tool_parameters(params) 394 db_type = out["properties"]["db_type"] 395 assert "anyOf" not in db_type 396 assert "nullable" not in db_type, "nullable must be stripped after anyOf collapse" 397 assert db_type["type"] == "string" 398 assert db_type["enum"] == ["mysql", "postgresql"], \ 399 "null/empty enum values must be stripped after anyOf collapse"