/ tests / agent / test_moonshot_schema.py
test_moonshot_schema.py
  1  """Tests for Moonshot/Kimi flavored-JSON-Schema sanitizer.
  2  
  3  Moonshot's tool-parameter validator rejects several shapes that the rest of
  4  the JSON Schema ecosystem accepts:
  5  
  6  1. Properties without ``type`` — Moonshot requires ``type`` on every node.
  7  2. ``type`` at the parent of ``anyOf`` — Moonshot requires it only inside
  8     ``anyOf`` children.
  9  
 10  These tests cover the repairs applied by ``agent/moonshot_schema.py``.
 11  """
 12  
 13  from __future__ import annotations
 14  
 15  import pytest
 16  
 17  from agent.moonshot_schema import (
 18      is_moonshot_model,
 19      sanitize_moonshot_tool_parameters,
 20      sanitize_moonshot_tools,
 21  )
 22  
 23  
 24  class TestMoonshotModelDetection:
 25      """is_moonshot_model() must match across aggregator prefixes."""
 26  
 27      @pytest.mark.parametrize(
 28          "model",
 29          [
 30              "kimi-k2.6",
 31              "kimi-k2-thinking",
 32              "moonshotai/Kimi-K2.6",
 33              "moonshotai/kimi-k2.6",
 34              "nous/moonshotai/kimi-k2.6",
 35              "openrouter/moonshotai/kimi-k2-thinking",
 36              "MOONSHOTAI/KIMI-K2.6",
 37          ],
 38      )
 39      def test_positive_matches(self, model):
 40          assert is_moonshot_model(model) is True
 41  
 42      @pytest.mark.parametrize(
 43          "model",
 44          [
 45              "",
 46              None,
 47              "anthropic/claude-sonnet-4.6",
 48              "openai/gpt-5.4",
 49              "google/gemini-3-flash-preview",
 50              "deepseek-chat",
 51          ],
 52      )
 53      def test_negative_matches(self, model):
 54          assert is_moonshot_model(model) is False
 55  
 56  
 57  class TestMissingTypeFilled:
 58      """Rule 1: every property must carry a type."""
 59  
 60      def test_property_without_type_gets_string(self):
 61          params = {
 62              "type": "object",
 63              "properties": {"query": {"description": "a bare property"}},
 64          }
 65          out = sanitize_moonshot_tool_parameters(params)
 66          assert out["properties"]["query"]["type"] == "string"
 67  
 68      def test_property_with_enum_infers_type_from_first_value(self):
 69          params = {
 70              "type": "object",
 71              "properties": {"flag": {"enum": [True, False]}},
 72          }
 73          out = sanitize_moonshot_tool_parameters(params)
 74          assert out["properties"]["flag"]["type"] == "boolean"
 75  
 76      def test_nested_properties_are_repaired(self):
 77          params = {
 78              "type": "object",
 79              "properties": {
 80                  "filter": {
 81                      "type": "object",
 82                      "properties": {
 83                          "field": {"description": "no type"},
 84                      },
 85                  },
 86              },
 87          }
 88          out = sanitize_moonshot_tool_parameters(params)
 89          assert out["properties"]["filter"]["properties"]["field"]["type"] == "string"
 90  
 91      def test_array_items_without_type_get_repaired(self):
 92          params = {
 93              "type": "object",
 94              "properties": {
 95                  "tags": {
 96                      "type": "array",
 97                      "items": {"description": "tag entry"},
 98                  },
 99              },
100          }
101          out = sanitize_moonshot_tool_parameters(params)
102          assert out["properties"]["tags"]["items"]["type"] == "string"
103  
104      def test_ref_node_is_not_given_synthetic_type(self):
105          """$ref nodes should NOT get a synthetic type — the referenced
106          definition supplies it, and Moonshot would reject the conflict."""
107          params = {
108              "type": "object",
109              "properties": {"payload": {"$ref": "#/$defs/Payload"}},
110              "$defs": {"Payload": {"type": "object", "properties": {}}},
111          }
112          out = sanitize_moonshot_tool_parameters(params)
113          assert "type" not in out["properties"]["payload"]
114          assert out["properties"]["payload"]["$ref"] == "#/$defs/Payload"
115  
116  
117  class TestAnyOfParentType:
118      """Rule 2: type must not appear at the anyOf parent level.
119  
120      When an anyOf contains a null-type branch, Moonshot rejects it.
121      The sanitizer collapses the anyOf: single non-null branch is promoted,
122      multiple non-null branches have null removed from the list.
123      """
124  
125      def test_anyof_null_branch_collapsed_to_single_type(self):
126          """anyOf [string, null] → plain string (anyOf removed)."""
127          params = {
128              "type": "object",
129              "properties": {
130                  "from_format": {
131                      "type": "string",
132                      "anyOf": [
133                          {"type": "string"},
134                          {"type": "null"},
135                      ],
136                  },
137              },
138          }
139          out = sanitize_moonshot_tool_parameters(params)
140          from_format = out["properties"]["from_format"]
141          # null branch removed, anyOf collapsed to the single non-null type
142          assert "anyOf" not in from_format
143          assert from_format["type"] == "string"
144  
145      def test_anyof_multiple_non_null_preserved(self):
146          """anyOf [string, integer] (no null) → kept as-is with parent type stripped."""
147          params = {
148              "type": "object",
149              "properties": {
150                  "mode": {
151                      "anyOf": [
152                          {"type": "string"},
153                          {"type": "integer"},
154                      ],
155                  },
156              },
157          }
158          out = sanitize_moonshot_tool_parameters(params)
159          mode = out["properties"]["mode"]
160          assert "anyOf" in mode
161          assert "type" not in mode  # parent type stripped
162  
163      def test_anyof_enum_with_null_collapsed(self):
164          """anyOf [{enum: [...], type: string}, {type: null}] → enum + type only."""
165          params = {
166              "type": "object",
167              "properties": {
168                  "db_type": {
169                      "anyOf": [
170                          {"enum": ["mysql", "postgresql", ""]},
171                          {"type": "null"},
172                      ],
173                  },
174              },
175          }
176          out = sanitize_moonshot_tool_parameters(params)
177          db_type = out["properties"]["db_type"]
178          assert "anyOf" not in db_type
179          assert db_type["type"] == "string"
180          assert db_type["enum"] == ["mysql", "postgresql"]  # "" stripped by enum cleanup
181  
182  
183  class TestTopLevelGuarantees:
184      """The returned top-level schema is always a well-formed object."""
185  
186      def test_non_dict_input_returns_empty_object(self):
187          assert sanitize_moonshot_tool_parameters(None) == {"type": "object", "properties": {}}
188          assert sanitize_moonshot_tool_parameters("garbage") == {"type": "object", "properties": {}}
189          assert sanitize_moonshot_tool_parameters([]) == {"type": "object", "properties": {}}
190  
191      def test_non_object_top_level_coerced(self):
192          params = {"type": "string"}
193          out = sanitize_moonshot_tool_parameters(params)
194          assert out["type"] == "object"
195          assert "properties" in out
196  
197      def test_does_not_mutate_input(self):
198          params = {
199              "type": "object",
200              "properties": {"q": {"description": "no type"}},
201          }
202          snapshot = {
203              "type": params["type"],
204              "properties": {"q": dict(params["properties"]["q"])},
205          }
206          sanitize_moonshot_tool_parameters(params)
207          assert params["type"] == snapshot["type"]
208          assert "type" not in params["properties"]["q"]
209  
210  
211  class TestToolListSanitizer:
212      """sanitize_moonshot_tools() walks an OpenAI-format tool list."""
213  
214      def test_applies_per_tool(self):
215          tools = [
216              {
217                  "type": "function",
218                  "function": {
219                      "name": "search",
220                      "description": "Search",
221                      "parameters": {
222                          "type": "object",
223                          "properties": {"q": {"description": "query"}},
224                      },
225                  },
226              },
227              {
228                  "type": "function",
229                  "function": {
230                      "name": "noop",
231                      "description": "Does nothing",
232                      "parameters": {"type": "object", "properties": {}},
233                  },
234              },
235          ]
236          out = sanitize_moonshot_tools(tools)
237          assert out[0]["function"]["parameters"]["properties"]["q"]["type"] == "string"
238          # Second tool already clean — should be structurally equivalent
239          assert out[1]["function"]["parameters"] == {"type": "object", "properties": {}}
240  
241      def test_empty_list_is_passthrough(self):
242          assert sanitize_moonshot_tools([]) == []
243          assert sanitize_moonshot_tools(None) is None
244  
245      def test_skips_malformed_entries(self):
246          """Entries without a function dict are passed through untouched."""
247          tools = [{"type": "function"}, {"not": "a tool"}]
248          out = sanitize_moonshot_tools(tools)
249          assert out == tools
250  
251  
252  class TestRealWorldMCPShape:
253      """End-to-end: a realistic MCP-style schema that used to 400 on Moonshot."""
254  
255      def test_combined_rewrites(self):
256          # Shape: missing type on a property, anyOf with parent type + null, array
257          # items without type — all in one tool.
258          params = {
259              "type": "object",
260              "properties": {
261                  "query": {"description": "search text"},
262                  "filter": {
263                      "type": "string",
264                      "anyOf": [
265                          {"type": "string"},
266                          {"type": "null"},
267                      ],
268                  },
269                  "tags": {
270                      "type": "array",
271                      "items": {"description": "tag"},
272                  },
273              },
274              "required": ["query"],
275          }
276          out = sanitize_moonshot_tool_parameters(params)
277          assert out["properties"]["query"]["type"] == "string"
278          # anyOf with null collapsed to plain type
279          assert "anyOf" not in out["properties"]["filter"]
280          assert out["properties"]["filter"]["type"] == "string"
281          assert out["properties"]["tags"]["items"]["type"] == "string"
282          assert out["required"] == ["query"]
283  
284  
285  class TestEnumNullStripping:
286      """Rule 3: Moonshot rejects null/empty-string inside enum arrays."""
287  
288      def test_enum_null_value_stripped(self):
289          """enum containing Python None must have it removed for Moonshot."""
290          params = {
291              "type": "object",
292              "properties": {
293                  "db_type": {
294                      "type": "string",
295                      "enum": ["mysql", "postgresql", None],
296                  },
297              },
298          }
299          out = sanitize_moonshot_tool_parameters(params)
300          db_type = out["properties"]["db_type"]
301          assert None not in db_type["enum"]
302          assert "mysql" in db_type["enum"]
303          assert "postgresql" in db_type["enum"]
304  
305      def test_enum_empty_string_stripped(self):
306          """enum containing empty string '' must have it removed for Moonshot."""
307          params = {
308              "type": "object",
309              "properties": {
310                  "db_type": {
311                      "type": "string",
312                      "enum": ["mysql", "postgresql", ""],
313                  },
314              },
315          }
316          out = sanitize_moonshot_tool_parameters(params)
317          db_type = out["properties"]["db_type"]
318          assert "" not in db_type["enum"]
319          assert db_type["enum"] == ["mysql", "postgresql"]
320  
321      def test_enum_all_null_becomes_no_enum(self):
322          """enum that only had null/empty values is dropped entirely."""
323          params = {
324              "type": "object",
325              "properties": {
326                  "val": {
327                      "type": "string",
328                      "enum": [None, ""],
329                  },
330              },
331          }
332          out = sanitize_moonshot_tool_parameters(params)
333          assert "enum" not in out["properties"]["val"]
334  
335      def test_dataslayer_db_type_after_mcp_normalize(self):
336          """Real-world: dataslayer db_type anyOf+enum after MCP normalization."""
337          # This is the exact shape after _normalize_mcp_input_schema runs:
338          # anyOf collapsed, but enum still has null + empty string
339          params = {
340              "type": "object",
341              "properties": {
342                  "datasource": {"type": "string"},
343                  "db_type": {
344                      "enum": ["mysql", "mariadb", "postgresql", "sqlserver", "oracle", "", None],
345                      "type": "string",
346                      "nullable": True,
347                      "default": None,
348                  },
349              },
350              "required": ["datasource"],
351          }
352          out = sanitize_moonshot_tool_parameters(params)
353          db_type = out["properties"]["db_type"]
354          assert "nullable" not in db_type, "nullable keyword must be stripped"
355          assert None not in db_type["enum"]
356          assert "" not in db_type["enum"]
357          assert db_type["enum"] == ["mysql", "mariadb", "postgresql", "sqlserver", "oracle"]
358          assert db_type["type"] == "string"
359  
360      def test_enum_on_object_type_not_stripped(self):
361          """enum on non-scalar types (object) should NOT be touched."""
362          params = {
363              "type": "object",
364              "properties": {
365                  "config": {
366                      "type": "object",
367                      "properties": {},
368                      "enum": [{}, None],
369                  },
370              },
371          }
372          out = sanitize_moonshot_tool_parameters(params)
373          # object-typed enum should pass through unchanged
374          assert "enum" in out["properties"]["config"]
375  
376      def test_anyof_collapse_still_runs_nullable_and_enum_cleanup(self):
377          """After anyOf collapses to a single non-null branch, the merged
378          node must still have ``nullable`` stripped and null/empty-string
379          values removed from enum — not skipped by the early anyOf return.
380          """
381          params = {
382              "type": "object",
383              "properties": {
384                  "db_type": {
385                      "anyOf": [
386                          {"enum": ["mysql", "postgresql", "", None]},
387                          {"type": "null"},
388                      ],
389                      "nullable": True,
390                  },
391              },
392          }
393          out = sanitize_moonshot_tool_parameters(params)
394          db_type = out["properties"]["db_type"]
395          assert "anyOf" not in db_type
396          assert "nullable" not in db_type, "nullable must be stripped after anyOf collapse"
397          assert db_type["type"] == "string"
398          assert db_type["enum"] == ["mysql", "postgresql"], \
399              "null/empty enum values must be stripped after anyOf collapse"