/ tests / features / test_json_schema_match_feature.py
test_json_schema_match_feature.py
  1  from typing import Any
  2  from typing import Dict
  3  
  4  import pandas as pd
  5  import pytest
  6  
  7  from evidently.legacy.features.json_schema_match_feature import JSONSchemaMatch
  8  
  9  
 10  @pytest.mark.parametrize(
 11      ("column_value, expected_schema, validate_types, exact_match, expected_output"),
 12      [
 13          # Invalid JSON
 14          ('{"name": "Invalid json"]', {"name": str, "age": int}, False, False, False),
 15          # Exact Match
 16          ('{"name": "Jane", "age": 25}', {"name": str, "age": int}, True, True, True),
 17          ('{"name": "Jane", "age": 25}', {"name": str, "age": int, "city": str}, True, True, False),
 18          ('{"name": "Jane", "age": 25, "city": "New York"}', {"name": str, "age": int}, True, True, False),
 19          ('{"name": "Jane", "age": 25}', {"name": int, "age": int}, True, True, False),
 20          # Minimal Match without type validation
 21          ('{"name": "Jane", "age": 25}', {"name": str, "age": int}, False, False, True),
 22          ('{"name": "Jane", "age": 25, "city": "New York"}', {"name": str, "age": int}, False, False, True),
 23          ('{"name": "Jane", "age": null, "city": "New York"}', {"name": str, "age": int}, False, False, False),
 24          # Minimal Match with type validation
 25          ('{"name": "Jane", "age": 25}', {"name": str, "age": int}, True, False, True),
 26          (
 27              '{"name": "Jane", "age": "25"}',
 28              {"name": str, "age": int},
 29              True,
 30              False,
 31              False,
 32          ),  # Fail due to type mismatch (age as string)
 33      ],
 34  )
 35  def test_match_json_schema(
 36      column_value: str, expected_schema: Dict[str, type], validate_types: bool, exact_match: bool, expected_output: bool
 37  ):
 38      schema_match = JSONSchemaMatch(
 39          expected_schema=expected_schema,
 40          validate_types=validate_types,
 41          exact_match=exact_match,
 42          column_name="TestColumnName",
 43      )
 44      result = schema_match.match_json_schema(json_text=column_value)
 45      assert result == expected_output
 46  
 47  
 48  @pytest.mark.parametrize(
 49      ("json_obj, expected_schema, validate_types, expected_output"),
 50      [
 51          # Minimal Match with type validation
 52          ({"name": "Jane", "age": 25}, {"name": str, "age": int}, True, True),
 53          ({"name": "Jane", "age": "25"}, {"name": str, "age": int}, True, False),
 54          ({"name": "Jane", "age": 25, "city": "New York"}, {"name": str, "age": int}, True, True),
 55          ({"name": "Jane", "age": 25, "city": "New York"}, {"name": str, "age": int, "region": str}, True, False),
 56          ({"name": "Jane", "age": None, "city": "New York"}, {"name": str, "age": int}, True, False),
 57          # Minimal Match without type validation
 58          ({"name": "Jane", "age": "25"}, {"name": str, "age": int}, False, True),
 59          ({"name": "Jane", "age": None, "city": "New York"}, {"name": str, "age": int}, False, False),
 60      ],
 61  )
 62  def test_minimal_match(
 63      json_obj: Dict[str, Any], expected_schema: Dict[str, type], validate_types: bool, expected_output: bool
 64  ):
 65      schema_match = JSONSchemaMatch(
 66          expected_schema=expected_schema, validate_types=validate_types, exact_match=False, column_name="TestColumnName"
 67      )
 68      result = schema_match._minimal_match(json_obj)
 69      assert result == expected_output
 70  
 71  
 72  @pytest.mark.parametrize(
 73      ("json_obj, expected_schema, validate_types, expected_output"),
 74      [
 75          # Exact Match
 76          ({"name": "Jane", "age": 25}, {"name": str, "age": int}, True, True),
 77          ({"name": "Jane", "age": 25}, {"name": str, "age": int}, False, True),
 78          ({"name": "Jane", "age": "25"}, {"name": str, "age": int}, True, False),
 79          ({"name": "Jane", "age": 25, "city": "New York"}, {"name": str, "age": int}, True, False),
 80          ({"name": "Jane", "age": 25}, {"name": str, "age": int, "city": str}, True, False),
 81          (
 82              {"name": "Jane", "age": 25, "city": ["New York", "California"]},
 83              {"name": str, "age": int, "city": list},
 84              True,
 85              True,
 86          ),
 87          (
 88              {"name": "Jane", "age": 25, "city": ["New York", "California"]},
 89              {"name": str, "age": int, "city": dict},
 90              True,
 91              False,
 92          ),
 93      ],
 94  )
 95  def test_exact_match(
 96      json_obj: Dict[str, Any], expected_schema: Dict[str, type], validate_types: bool, expected_output: bool
 97  ):
 98      schema_match = JSONSchemaMatch(
 99          expected_schema=expected_schema, validate_types=validate_types, exact_match=False, column_name="TestColumnName"
100      )
101      result = schema_match._exact_match(json_obj)
102      assert result == expected_output
103  
104  
105  test_data = pd.DataFrame(
106      {
107          "TestColumnName": [
108              '{"name": "John", "age": 30, "city": "New York"}',
109              '{"name": "Jane", "age": null, "city": "London"}',
110              '{"name": "Mike", "age": 25, "city": "San Francisco"}',
111              '{"name": "Invalid json"]',
112              '{"name": "Anna", "age": "22", "country": "Canada"}',
113          ]
114      }
115  )
116  
117  
118  @pytest.mark.parametrize(
119      ("expected_schema, validate_types, exact_match, expected_output"),
120      [
121          # Minimal Match without type validation
122          ({"name": str, "age": int}, False, False, [True, False, True, False, True]),
123          # Minimal Match with type validation
124          ({"name": str, "age": int}, True, False, [True, False, True, False, False]),
125          # Exact Match
126          ({"name": str, "age": int, "city": str}, True, True, [True, False, True, False, False]),
127      ],
128  )
129  def test_generate_feature(
130      expected_schema: Dict[str, type], validate_types: bool, exact_match: bool, expected_output: list
131  ):
132      schema_match = JSONSchemaMatch(
133          expected_schema=expected_schema,
134          validate_types=validate_types,
135          exact_match=exact_match,
136          column_name="TestColumnName",
137      )
138      result = schema_match.generate_feature(test_data, None)
139      assert result[schema_match._feature_column_name()].tolist() == expected_output
140  
141  
142  def test_generate_feature_column_name_dne():
143      schema_match = JSONSchemaMatch(
144          expected_schema={"test": str}, validate_types=False, exact_match=False, column_name="DNEColumn"
145      )
146      with pytest.raises(KeyError):
147          schema_match.generate_feature(test_data, None)