test_column_summary_metric.py
1 import json 2 3 import pandas as pd 4 import pytest 5 6 from evidently.legacy.metrics import ColumnSummaryMetric 7 from evidently.legacy.pipeline.column_mapping import ColumnMapping 8 from evidently.legacy.report import Report 9 10 11 @pytest.mark.parametrize( 12 "current_data, reference_data, column_mapping, metric, expected_json", 13 ( 14 ( 15 pd.DataFrame({"test1": ["a", "a", "c", "d", "e", "f", "g", "h", "i", "j"]}), 16 None, 17 ColumnMapping(), 18 ColumnSummaryMetric(column_name="test1"), 19 { 20 "column_name": "test1", 21 "column_type": "cat", 22 "current_characteristics": { 23 "count": 10, 24 "missing": 0, 25 "missing_percentage": 0.0, 26 "most_common": "a", 27 "most_common_percentage": 20.0, 28 "new_in_current_values_count": None, 29 "number_of_rows": 10, 30 "unique": 9, 31 "unique_percentage": 90.0, 32 "unused_in_current_values_count": None, 33 }, 34 "reference_characteristics": None, 35 }, 36 ), 37 ( 38 pd.DataFrame({"test1": ["a", "a", "a"]}), 39 pd.DataFrame({"test1": ["c", "c", "e", "f", "g", "h", "i", "j"]}), 40 ColumnMapping(), 41 ColumnSummaryMetric(column_name="test1"), 42 { 43 "column_name": "test1", 44 "column_type": "cat", 45 "current_characteristics": { 46 "count": 3, 47 "missing": 0, 48 "missing_percentage": 0.0, 49 "most_common": "a", 50 "most_common_percentage": 100.0, 51 "new_in_current_values_count": 1, 52 "number_of_rows": 3, 53 "unique": 1, 54 "unique_percentage": 33.33, 55 "unused_in_current_values_count": 7, 56 }, 57 "reference_characteristics": { 58 "count": 8, 59 "missing": 0, 60 "missing_percentage": 0.0, 61 "most_common": "c", 62 "most_common_percentage": 25.0, 63 "new_in_current_values_count": None, 64 "number_of_rows": 8, 65 "unique": 7, 66 "unique_percentage": 87.5, 67 "unused_in_current_values_count": None, 68 }, 69 }, 70 ), 71 ( 72 pd.DataFrame({"test1": [1, 2, 3], "test2": [1, 2, 3], "test3": [1, 1, 1]}), 73 pd.DataFrame({"test1": [1, 2, 3], "test2": ["a", "a", "a"], "test3": [1, 1, 1]}), 74 ColumnMapping(numerical_features=["test1"]), 75 ColumnSummaryMetric(column_name="test1"), 76 { 77 "column_name": "test1", 78 "column_type": "num", 79 "current_characteristics": { 80 "count": 3, 81 "infinite_count": 0, 82 "infinite_percentage": 0.0, 83 "max": 3, 84 "mean": 2.0, 85 "min": 1, 86 "missing": 0, 87 "missing_percentage": 0.0, 88 "most_common": 1, 89 "most_common_percentage": 33.33, 90 "number_of_rows": 3, 91 "p25": 1.5, 92 "p50": 2.0, 93 "p75": 2.5, 94 "std": 1.0, 95 "unique": 3, 96 "unique_percentage": 100.0, 97 }, 98 "reference_characteristics": { 99 "count": 3, 100 "infinite_count": 0, 101 "infinite_percentage": 0.0, 102 "max": 3, 103 "mean": 2.0, 104 "min": 1, 105 "missing": 0, 106 "missing_percentage": 0.0, 107 "most_common": 1, 108 "most_common_percentage": 33.33, 109 "number_of_rows": 3, 110 "p25": 1.5, 111 "p50": 2.0, 112 "p75": 2.5, 113 "std": 1.0, 114 "unique": 3, 115 "unique_percentage": 100.0, 116 }, 117 }, 118 ), 119 ), 120 ) 121 def test_column_summary_metric_with_report( 122 current_data: pd.DataFrame, 123 reference_data: pd.DataFrame, 124 column_mapping: ColumnMapping, 125 metric: ColumnSummaryMetric, 126 expected_json: dict, 127 ) -> None: 128 report = Report(metrics=[metric]) 129 report.run(current_data=current_data, reference_data=reference_data, column_mapping=column_mapping) 130 assert report.show() 131 json_result = report.json() 132 assert len(json_result) > 0 133 result = json.loads(json_result) 134 assert result["metrics"][0]["metric"] == "ColumnSummaryMetric" 135 assert result["metrics"][0]["result"] == expected_json