test_json_schema_match_feature.py
1 from typing import Any 2 from typing import Dict 3 4 import pandas as pd 5 import pytest 6 7 from evidently.legacy.features.json_schema_match_feature import JSONSchemaMatch 8 9 10 @pytest.mark.parametrize( 11 ("column_value, expected_schema, validate_types, exact_match, expected_output"), 12 [ 13 # Invalid JSON 14 ('{"name": "Invalid json"]', {"name": str, "age": int}, False, False, False), 15 # Exact Match 16 ('{"name": "Jane", "age": 25}', {"name": str, "age": int}, True, True, True), 17 ('{"name": "Jane", "age": 25}', {"name": str, "age": int, "city": str}, True, True, False), 18 ('{"name": "Jane", "age": 25, "city": "New York"}', {"name": str, "age": int}, True, True, False), 19 ('{"name": "Jane", "age": 25}', {"name": int, "age": int}, True, True, False), 20 # Minimal Match without type validation 21 ('{"name": "Jane", "age": 25}', {"name": str, "age": int}, False, False, True), 22 ('{"name": "Jane", "age": 25, "city": "New York"}', {"name": str, "age": int}, False, False, True), 23 ('{"name": "Jane", "age": null, "city": "New York"}', {"name": str, "age": int}, False, False, False), 24 # Minimal Match with type validation 25 ('{"name": "Jane", "age": 25}', {"name": str, "age": int}, True, False, True), 26 ( 27 '{"name": "Jane", "age": "25"}', 28 {"name": str, "age": int}, 29 True, 30 False, 31 False, 32 ), # Fail due to type mismatch (age as string) 33 ], 34 ) 35 def test_match_json_schema( 36 column_value: str, expected_schema: Dict[str, type], validate_types: bool, exact_match: bool, expected_output: bool 37 ): 38 schema_match = JSONSchemaMatch( 39 expected_schema=expected_schema, 40 validate_types=validate_types, 41 exact_match=exact_match, 42 column_name="TestColumnName", 43 ) 44 result = schema_match.match_json_schema(json_text=column_value) 45 assert result == expected_output 46 47 48 @pytest.mark.parametrize( 49 ("json_obj, expected_schema, validate_types, expected_output"), 50 [ 51 # Minimal Match with type validation 52 ({"name": "Jane", "age": 25}, {"name": str, "age": int}, True, True), 53 ({"name": "Jane", "age": "25"}, {"name": str, "age": int}, True, False), 54 ({"name": "Jane", "age": 25, "city": "New York"}, {"name": str, "age": int}, True, True), 55 ({"name": "Jane", "age": 25, "city": "New York"}, {"name": str, "age": int, "region": str}, True, False), 56 ({"name": "Jane", "age": None, "city": "New York"}, {"name": str, "age": int}, True, False), 57 # Minimal Match without type validation 58 ({"name": "Jane", "age": "25"}, {"name": str, "age": int}, False, True), 59 ({"name": "Jane", "age": None, "city": "New York"}, {"name": str, "age": int}, False, False), 60 ], 61 ) 62 def test_minimal_match( 63 json_obj: Dict[str, Any], expected_schema: Dict[str, type], validate_types: bool, expected_output: bool 64 ): 65 schema_match = JSONSchemaMatch( 66 expected_schema=expected_schema, validate_types=validate_types, exact_match=False, column_name="TestColumnName" 67 ) 68 result = schema_match._minimal_match(json_obj) 69 assert result == expected_output 70 71 72 @pytest.mark.parametrize( 73 ("json_obj, expected_schema, validate_types, expected_output"), 74 [ 75 # Exact Match 76 ({"name": "Jane", "age": 25}, {"name": str, "age": int}, True, True), 77 ({"name": "Jane", "age": 25}, {"name": str, "age": int}, False, True), 78 ({"name": "Jane", "age": "25"}, {"name": str, "age": int}, True, False), 79 ({"name": "Jane", "age": 25, "city": "New York"}, {"name": str, "age": int}, True, False), 80 ({"name": "Jane", "age": 25}, {"name": str, "age": int, "city": str}, True, False), 81 ( 82 {"name": "Jane", "age": 25, "city": ["New York", "California"]}, 83 {"name": str, "age": int, "city": list}, 84 True, 85 True, 86 ), 87 ( 88 {"name": "Jane", "age": 25, "city": ["New York", "California"]}, 89 {"name": str, "age": int, "city": dict}, 90 True, 91 False, 92 ), 93 ], 94 ) 95 def test_exact_match( 96 json_obj: Dict[str, Any], expected_schema: Dict[str, type], validate_types: bool, expected_output: bool 97 ): 98 schema_match = JSONSchemaMatch( 99 expected_schema=expected_schema, validate_types=validate_types, exact_match=False, column_name="TestColumnName" 100 ) 101 result = schema_match._exact_match(json_obj) 102 assert result == expected_output 103 104 105 test_data = pd.DataFrame( 106 { 107 "TestColumnName": [ 108 '{"name": "John", "age": 30, "city": "New York"}', 109 '{"name": "Jane", "age": null, "city": "London"}', 110 '{"name": "Mike", "age": 25, "city": "San Francisco"}', 111 '{"name": "Invalid json"]', 112 '{"name": "Anna", "age": "22", "country": "Canada"}', 113 ] 114 } 115 ) 116 117 118 @pytest.mark.parametrize( 119 ("expected_schema, validate_types, exact_match, expected_output"), 120 [ 121 # Minimal Match without type validation 122 ({"name": str, "age": int}, False, False, [True, False, True, False, True]), 123 # Minimal Match with type validation 124 ({"name": str, "age": int}, True, False, [True, False, True, False, False]), 125 # Exact Match 126 ({"name": str, "age": int, "city": str}, True, True, [True, False, True, False, False]), 127 ], 128 ) 129 def test_generate_feature( 130 expected_schema: Dict[str, type], validate_types: bool, exact_match: bool, expected_output: list 131 ): 132 schema_match = JSONSchemaMatch( 133 expected_schema=expected_schema, 134 validate_types=validate_types, 135 exact_match=exact_match, 136 column_name="TestColumnName", 137 ) 138 result = schema_match.generate_feature(test_data, None) 139 assert result[schema_match._feature_column_name()].tolist() == expected_output 140 141 142 def test_generate_feature_column_name_dne(): 143 schema_match = JSONSchemaMatch( 144 expected_schema={"test": str}, validate_types=False, exact_match=False, column_name="DNEColumn" 145 ) 146 with pytest.raises(KeyError): 147 schema_match.generate_feature(test_data, None)